peppy-sage 0.1.3__pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
peppy_sage/__init__.py ADDED
@@ -0,0 +1,55 @@
1
+ # peppy_sage/__init__.py
2
+
3
+ # Import the compiled PyO3 extension module
4
+ # (the same name as your PyO3 crate, built by maturin)
5
+ from ._peppy_sage import (
6
+ PyPeptide,
7
+ PyIndexedDatabase,
8
+ PyProcessedSpectrum,
9
+ PyScorer,
10
+ PyTolerance,
11
+ PyPrecursor,
12
+ PyKind,
13
+ )
14
+
15
+ # Define the _rust alias here for internal use by other modules.
16
+ # This makes it accessible via 'from . import _rust' in other package files.
17
+ _rust = type('RustBindings', (object,), {
18
+ 'PyPeptide': PyPeptide,
19
+ 'PyIndexedDatabase': PyIndexedDatabase,
20
+ 'PyProcessedSpectrum': PyProcessedSpectrum,
21
+ 'PyScorer': PyScorer,
22
+ 'PyTolerance': PyTolerance,
23
+ 'PyPrecursor': PyPrecursor,
24
+ 'PyKind': PyKind,
25
+ })
26
+
27
+ # Import the high-level Python wrappers
28
+ from .core import Peptide, Spectrum, Precursor
29
+ from .indexing import IndexedDatabase
30
+ from .scoring import Scorer
31
+
32
+ __all__ = [
33
+ # Low-level bindings
34
+ "PyPeptide",
35
+ "PyIndexedDatabase",
36
+ "PyProcessedSpectrum",
37
+ "PyScorer",
38
+ "PyTolerance",
39
+ "PyPrecursor",
40
+ "PyKind",
41
+ # High-level Python API
42
+ "Peptide",
43
+ "IndexedDatabase",
44
+ "Spectrum",
45
+ "Precursor",
46
+ "Scorer",
47
+ ]
48
+
49
+ from importlib.metadata import version, PackageNotFoundError
50
+
51
+ try:
52
+ __version__ = version("peppy_sage")
53
+ except PackageNotFoundError:
54
+ # Package is not installed (e.g. running from source tree)
55
+ __version__ = "0.0.0"
peppy_sage/core.py ADDED
@@ -0,0 +1,234 @@
1
+ from typing import List, Optional, Tuple, Union
2
+ import numpy as np
3
+ from pyteomics import mass
4
+ from . import _rust
5
+
6
+ PROTON_MASS = 1.0072764
7
+
8
+
9
+ class Precursor:
10
+ """
11
+ Lightweight Python wrapper for PyPrecursor (MS1 parent ion).
12
+ """
13
+
14
+ def __init__(
15
+ self,
16
+ mz: float,
17
+ charge: int,
18
+ isolation_window: Union[_rust.PyTolerance, Tuple[float, float]] = (0.5, 0.5),
19
+ ):
20
+ """
21
+ Args:
22
+ mz: The m/z of the precursor ion.
23
+ charge: The precursor charge state.
24
+ isolation_window: Either a PyTolerance object or a (low, high) Da tuple.
25
+ """
26
+ if isinstance(isolation_window, tuple):
27
+ isolation_window = _rust.PyTolerance.Da(*isolation_window)
28
+
29
+ self._inner = _rust.PyPrecursor(
30
+ mz=mz,
31
+ charge=charge,
32
+ isolation_window=isolation_window,
33
+ )
34
+
35
+ @property
36
+ def mz(self) -> float:
37
+ return self._inner.mz
38
+
39
+ @property
40
+ def charge(self) -> int:
41
+ return self._inner.charge
42
+
43
+ @property
44
+ def isolation_window(self) -> _rust.PyTolerance:
45
+ return self._inner.isolation_window
46
+
47
+ def __repr__(self) -> str:
48
+ return f"<Precursor mz={self.mz:.4f}, z={self.charge}>"
49
+
50
+
51
+ class Spectrum:
52
+ """
53
+ High-level Python wrapper around Rust's PyProcessedSpectrum.
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ id: str,
59
+ file_id: int,
60
+ scan_start_time: float,
61
+ mz_array: Union[List[float], np.ndarray],
62
+ intensity_array: Union[List[float], np.ndarray],
63
+ precursors: List[Precursor],
64
+ total_ion_current: Optional[float] = None,
65
+ ):
66
+ """
67
+ Create a spectrum from arrays of m/z and intensities.
68
+
69
+ Args:
70
+ id: Spectrum identifier (e.g. 'Scan_1234')
71
+ file_id: Numeric file index (if multiple files are loaded)
72
+ scan_start_time: Retention time (in minutes)
73
+ mz_array: List or numpy array of fragment m/z values
74
+ intensity_array: List or numpy array of corresponding intensities
75
+ precursors: List of Precursor objects
76
+ total_ion_current: Optional total ion current; computed automatically if None
77
+ """
78
+
79
+ mz_array = np.asarray(mz_array, dtype=float)
80
+ intensity_array = np.asarray(intensity_array, dtype=float)
81
+
82
+ if mz_array.shape != intensity_array.shape:
83
+ raise ValueError("mz_array and intensity_array must be the same length.")
84
+
85
+ if total_ion_current is None:
86
+ total_ion_current = float(np.sum(intensity_array))
87
+
88
+ mz_array = np.asarray(mz_array, dtype=np.float32, order="C")
89
+ intensity_array = np.asarray(intensity_array, dtype=np.float32, order="C")
90
+
91
+ self._inner = _rust.PyProcessedSpectrum(
92
+ id,
93
+ file_id,
94
+ scan_start_time,
95
+ mz_array,
96
+ intensity_array,
97
+ [p._inner for p in precursors],
98
+ total_ion_current,
99
+ )
100
+
101
+ # -------------------------------------------------------------------------
102
+ # Properties
103
+ # -------------------------------------------------------------------------
104
+ @property
105
+ def id(self) -> str:
106
+ return self._inner.id
107
+
108
+ @property
109
+ def scan_start_time(self) -> float:
110
+ return self._inner.scan_start_time
111
+
112
+ @property
113
+ def precursors(self) -> List[Precursor]:
114
+ """Return a list of precursor ions."""
115
+ return [Precursor(p.mz, p.charge, p.isolation_window) for p in self._inner.precursors]
116
+
117
+ @property
118
+ def peaks(self) -> List[Tuple[float, float]]:
119
+ """Return list of (m/z, intensity) tuples."""
120
+ return [(m, i) for (m, i) in self._inner.peaks]
121
+
122
+ @property
123
+ def total_ion_current(self) -> float:
124
+ return self._inner.total_ion_current
125
+
126
+ # -------------------------------------------------------------------------
127
+ # Utilities
128
+ # -------------------------------------------------------------------------
129
+ def sort_peaks(self, inplace: bool = True) -> "Spectrum":
130
+ """Sort peaks by m/z value."""
131
+ peaks = sorted(self.peaks, key=lambda x: x[0])
132
+ if inplace:
133
+ mz, ints = zip(*peaks)
134
+ self._inner = _rust.PyProcessedSpectrum(
135
+ self.id,
136
+ self._inner.file_id,
137
+ self.scan_start_time,
138
+ list(mz),
139
+ list(ints),
140
+ [p._inner for p in self.precursors],
141
+ self.total_ion_current,
142
+ )
143
+ return self
144
+ else:
145
+ mz, ints = zip(*peaks)
146
+ return Spectrum(
147
+ id=self.id,
148
+ file_id=self._inner.file_id,
149
+ scan_start_time=self.scan_start_time,
150
+ mz_array=mz,
151
+ intensity_array=ints,
152
+ precursors=self.precursors,
153
+ total_ion_current=self.total_ion_current,
154
+ )
155
+
156
+ def __repr__(self):
157
+ n_peaks = len(self._inner.peaks)
158
+ return f"<Spectrum id={self.id!r}, peaks={n_peaks}, precursors={len(self.precursors)}>"
159
+
160
+
161
+ class Peptide:
162
+ """
163
+ High-level Python wrapper for a Rust PyPeptide.
164
+ Handles sequence, modifications, and mass calculation.
165
+ """
166
+
167
+ def __init__(self, sequence: str, mods: List[float] | None = None): #TODO handle n-term and c-term
168
+ """
169
+ Parameters
170
+ ----------
171
+ sequence : str
172
+ Peptide amino acid sequence (e.g., "PEPTIDEK")
173
+ mods : List[float], optional
174
+ Array of modification masses (length of sequence + 2).
175
+ Default is zeros (no mods).
176
+ """
177
+ self.sequence = sequence
178
+
179
+ if mods is None:
180
+ mods = [0.0] * (len(sequence) + 2)
181
+ elif len(mods) != (len(sequence) + 2):
182
+ raise ValueError(
183
+ f"Modification array must have length sequence ({len(sequence)}) + 2."
184
+ )
185
+
186
+ self.mods = mods
187
+
188
+ # --- Calculate precursor mass using pyteomics ---
189
+ self.monoisotopic_mass = mass.calculate_mass(sequence=sequence)
190
+ self.monoisotopic_mass += sum(mods)
191
+
192
+ # --- Create the Rust-side PyPeptide object ---
193
+ self._inner = _rust.PyPeptide(sequence, self.monoisotopic_mass, mods[1:-1], mods[0], mods[-1])
194
+
195
+ @classmethod
196
+ def from_rust(cls, rust_peptide: _rust.PyPeptide) -> "Peptide":
197
+ """Construct a Python Peptide from a Rust PyPeptide object."""
198
+ sequence = rust_peptide.sequence
199
+ # Rust mods are floats
200
+ mods = rust_peptide.modifications.copy()
201
+ return cls(sequence, mods)
202
+
203
+ def calculate_theoretical_mz(self, z=1):
204
+ return (self.monoisotopic_mass + (PROTON_MASS * z)) / z
205
+
206
+ @staticmethod
207
+ def calculate_theoretical_mz(seq, mods, z) -> float:
208
+ """
209
+ Compute the theoretical m/z for a peptide.
210
+
211
+ Parameters
212
+ ----------
213
+ monoisotopic_mass : float
214
+ Mass of the peptide (including modifications if any)
215
+ charge : int
216
+ Charge state
217
+
218
+ Returns
219
+ -------
220
+ float
221
+ Theoretical m/z
222
+ """
223
+ monoisotopic_mass = mass.calculate_mass(sequence=seq)
224
+ monoisotopic_mass += sum(mods)
225
+
226
+ return (monoisotopic_mass + PROTON_MASS * z) / z
227
+
228
+ def __repr__(self):
229
+ mods_str = ", ".join(f"{m:.2f}" for m in self.mods)
230
+ return (
231
+ f"<Peptide seq='{self.sequence}' "
232
+ f"mass={self.monoisotopic_mass:.4f} "
233
+ f"mods=[{mods_str}]>"
234
+ )
peppy_sage/indexing.py ADDED
@@ -0,0 +1,124 @@
1
+ # indexed_database.py
2
+
3
+ from typing import List, Optional, Tuple, Union
4
+ from .core import Peptide
5
+ from . import _rust
6
+
7
+ ION_KIND_MAP = {
8
+ #"a": _rust.PyKind.A, #TODO other ion types
9
+ "b": _rust.PyKind.B,
10
+ #"c": _rust.PyKind.C,
11
+ #"x": _rust.PyKind.X,
12
+ "y": _rust.PyKind.Y,
13
+ #"z": _rust.PyKind.Z,
14
+ }
15
+
16
+ def _to_rust_peptide(x: Union[Peptide, _rust.PyPeptide, str]) -> _rust.PyPeptide:
17
+ if isinstance(x, _rust.PyPeptide):
18
+ return x
19
+ if isinstance(x, Peptide):
20
+ # assuming your wrapper keeps the raw object on ._inner (or similar)
21
+ return x._inner
22
+ if isinstance(x, str):
23
+ return _rust.PyPeptide(x)
24
+ raise TypeError(
25
+ f"Expected Peptide | PyPeptide | str, got {type(x).__name__}"
26
+ )
27
+
28
+ class IndexedDatabase:
29
+ """
30
+ Pythonic wrapper around the Rust-backed PyIndexedDatabase.
31
+
32
+ Example:
33
+ >>> db = IndexedDatabase.from_peptides(
34
+ ... peptides=my_peptides,
35
+ ... ion_kinds=["b", "y"],
36
+ ... bucket_size=50,
37
+ ... generate_decoys=True,
38
+ ... decoy_tag="DECOY_"
39
+ ... )
40
+ >>> print(len(db.peptides))
41
+ >>> print(db.fragment_count)
42
+ """
43
+
44
+ def __init__(self, _inner: _rust.PyIndexedDatabase):
45
+ self._inner = _inner
46
+
47
+ # -------------------------------------------------------------------------
48
+ # Constructors
49
+ # -------------------------------------------------------------------------
50
+ @classmethod
51
+ def from_peptides(
52
+ cls,
53
+ peptides: List[Union[Peptide, "_rust.PyPeptide"]],
54
+ ion_kinds: List[str],
55
+ bucket_size: int = 8192,
56
+ min_ion_index: int = 1,
57
+ generate_decoys: bool = False,
58
+ decoy_tag: str = "rev_",
59
+ peptide_min_mass: float = 500,
60
+ peptide_max_mass: float = 5000.0,
61
+ ) -> "IndexedDatabase":
62
+ """
63
+ Build an indexed database from a list of peptides and configuration.
64
+
65
+ Args:
66
+ peptides: List of PyPeptide objects (from Rust layer).
67
+ ion_kinds: List of ion types to include, e.g. ["b", "y"].
68
+ bucket_size: Number of fragments per mass bucket.
69
+ min_ion_index: Minimum ion index to include.
70
+ generate_decoys: Whether to generate decoy peptides.
71
+ decoy_tag: Prefix used to label decoy peptides.
72
+ peptide_min_mass: Minimum peptide monoisotopic mass to include.
73
+ peptide_max_mass: Maximum peptide monoisotopic mass to include.
74
+ """
75
+
76
+ try:
77
+ ion_enum_list = [ION_KIND_MAP[k.lower()]() for k in ion_kinds]
78
+ except KeyError as e:
79
+ raise ValueError(f"Invalid ion kind: {e.args[0]}. Must be one of {list(ION_KIND_MAP)}")
80
+
81
+ rust_peptides = [_to_rust_peptide(p) for p in peptides]
82
+
83
+ inner = _rust.PyIndexedDatabase.from_peptides_and_config(
84
+ peptides=rust_peptides,
85
+ bucket_size=bucket_size,
86
+ ion_kinds=ion_enum_list,
87
+ min_ion_index=min_ion_index,
88
+ generate_decoys=generate_decoys,
89
+ decoy_tag=decoy_tag,
90
+ peptide_min_mass=peptide_min_mass,
91
+ peptide_max_mass=peptide_max_mass,
92
+ )
93
+
94
+ return cls(inner)
95
+
96
+ # -------------------------------------------------------------------------
97
+ # Properties
98
+ # -------------------------------------------------------------------------
99
+ @property
100
+ def peptides(self) -> List["_rust.PyPeptide"]:
101
+ """Return the list of peptides stored in this indexed database."""
102
+ return self._inner.peptides
103
+
104
+ @property
105
+ def fragment_count(self) -> int:
106
+ """Return the total number of fragments indexed."""
107
+ return self._inner.fragment_count()
108
+
109
+ # -------------------------------------------------------------------------
110
+ # Debug / Inspection
111
+ # -------------------------------------------------------------------------
112
+ def debug_fragment_summary(self) -> List[Tuple[float, int]]:
113
+ """
114
+ Return a summary of (fragment_mz, peptide_index) tuples for debugging.
115
+ """
116
+ return self._inner.debug_fragment_summary()
117
+
118
+ def summary(self, limit: Optional[int] = 10) -> None:
119
+ """Print a short summary of fragments for inspection."""
120
+ summary = self.debug_fragment_summary()
121
+ print(f"Indexed fragments: {self.fragment_count}")
122
+ print("Example fragments:")
123
+ for mz, pep_idx in summary[:limit]:
124
+ print(f" - m/z={mz:.4f}, peptide_index={pep_idx}")
peppy_sage/scoring.py ADDED
@@ -0,0 +1,60 @@
1
+ from . import _rust
2
+ from . import Peptide
3
+
4
+ class Scorer:
5
+ """
6
+ High-level scoring interface for peppy_sage.
7
+ Wraps the underlying Rust-based PyScorer for convenience.
8
+ """
9
+
10
+ def __init__(
11
+ self,
12
+ precursor_tol_da: tuple[float, float] = (-1.0, 1.0),
13
+ fragment_tol_ppm: tuple[float, float] = (-5.0, 5.0),
14
+ wide_window: bool = False,
15
+ chimera: bool = True,
16
+ report_psms: int = 10,
17
+ min_isotope_err: int = -1,
18
+ max_isotope_err: int = 3,
19
+ min_precursor_charge: int = 1,
20
+ max_precursor_charge: int = 4,
21
+ min_matched_peaks: int = 0,
22
+ annotate_matches: bool = True,
23
+ max_fragment_charge: int = 1,
24
+ ):
25
+ """Initialize the high-level scoring object."""
26
+ precursor_tol = _rust.PyTolerance.Da(*precursor_tol_da)
27
+ fragment_tol = _rust.PyTolerance.Ppm(*fragment_tol_ppm)
28
+
29
+ self._scorer = _rust.PyScorer(
30
+ precursor_tol,
31
+ fragment_tol,
32
+ wide_window,
33
+ chimera,
34
+ report_psms,
35
+ min_isotope_err,
36
+ max_isotope_err,
37
+ min_precursor_charge,
38
+ max_precursor_charge,
39
+ min_matched_peaks,
40
+ annotate_matches,
41
+ max_fragment_charge,
42
+ )
43
+
44
+ def score(self, db, spectrum):
45
+ """
46
+ Score a single spectrum (or a list of spectra) against the given database.
47
+ Returns a list of features (PSMs).
48
+ """
49
+ features = self._scorer.score_spectra(db._inner, spectrum._inner)
50
+
51
+ return features
52
+
53
+ def score_many(self, db, spectra):
54
+ """
55
+ Score many spectra and convert Rust peptides to Python Peptide.
56
+ """
57
+ rust_spectra = [s._inner for s in spectra]
58
+ all_hits = self._scorer.score_many_spectra(db._inner, rust_spectra)
59
+
60
+ return all_hits
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: peppy_sage
3
+ Version: 0.1.3
4
+ Classifier: Programming Language :: Python :: 3
5
+ Classifier: Programming Language :: Rust
6
+ Classifier: Operating System :: OS Independent
7
+ Summary: Hybrid Rust/Python peptide scoring library
8
+ Author: Daniel Geiszler
9
+ License: MIT
10
+ Requires-Python: >=3.11
@@ -0,0 +1,8 @@
1
+ peppy_sage-0.1.3.dist-info/METADATA,sha256=p4xp97UvxhRJwPh9SXbeVbcA5P3Ci4J5QNvXDvs82wc,303
2
+ peppy_sage-0.1.3.dist-info/WHEEL,sha256=58FYlvu5vg1OXOKVWfxLInpqINB3aeZOnVlG23zDrM0,155
3
+ peppy_sage/__init__.py,sha256=u5XtfUF6wqNAGMMfohjNAT7yQSFU5sgbQYVr_W1MTYI,1367
4
+ peppy_sage/_peppy_sage.pypy39-pp73-s390x-linux-gnu.so,sha256=tjxxQJ_rQSYK_Lmc4ycbCtRXdgNkTO-YjbPp8vRniwA,1407848
5
+ peppy_sage/core.py,sha256=ko0Mop95BY5hjXt6qll3G7AUhPMYAnGOSZ_p6tzqjD8,7549
6
+ peppy_sage/indexing.py,sha256=4jV_kcx9bp8R-Ql18i9mPv4XIX1vFGrr84S9r2y8wXQ,4517
7
+ peppy_sage/scoring.py,sha256=NmSiNSV12L4quuAAXS-TLIuiLUXRmA7qE5Z8ArNZZbU,1904
8
+ peppy_sage-0.1.3.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.10.1)
3
+ Root-Is-Purelib: false
4
+ Tag: pp39-pypy39_pp73-manylinux_2_17_s390x
5
+ Tag: pp39-pypy39_pp73-manylinux2014_s390x