imspy-predictors 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. imspy_predictors/__init__.py +280 -0
  2. imspy_predictors/ccs/__init__.py +32 -0
  3. imspy_predictors/ccs/predictors.py +768 -0
  4. imspy_predictors/ccs/utility.py +84 -0
  5. imspy_predictors/data_utils.py +589 -0
  6. imspy_predictors/hashing.py +255 -0
  7. imspy_predictors/intensity/__init__.py +41 -0
  8. imspy_predictors/intensity/predictors.py +882 -0
  9. imspy_predictors/intensity/utility.py +458 -0
  10. imspy_predictors/ionization/__init__.py +25 -0
  11. imspy_predictors/ionization/predictors.py +518 -0
  12. imspy_predictors/koina_models/__init__.py +92 -0
  13. imspy_predictors/koina_models/access_models.py +371 -0
  14. imspy_predictors/koina_models/input_filters.py +488 -0
  15. imspy_predictors/lazy_imports.py +126 -0
  16. imspy_predictors/losses.py +419 -0
  17. imspy_predictors/mixture.py +350 -0
  18. imspy_predictors/models/__init__.py +57 -0
  19. imspy_predictors/models/heads.py +561 -0
  20. imspy_predictors/models/transformer.py +317 -0
  21. imspy_predictors/models/unified.py +608 -0
  22. imspy_predictors/pretrained/__init__.py +0 -0
  23. imspy_predictors/pretrained/ccs/test_metrics.json +7 -0
  24. imspy_predictors/pretrained/charge/test_metrics.json +5 -0
  25. imspy_predictors/pretrained/hub.py +161 -0
  26. imspy_predictors/pretrained/rt/test_metrics.json +7 -0
  27. imspy_predictors/pretrained/tokenizer-ptm.json +1 -0
  28. imspy_predictors/pretrained/unimod-vocab.json +1055 -0
  29. imspy_predictors/rt/__init__.py +21 -0
  30. imspy_predictors/rt/predictors.py +540 -0
  31. imspy_predictors/training.py +1271 -0
  32. imspy_predictors/utilities/__init__.py +29 -0
  33. imspy_predictors/utilities/hf_tokenizers.py +87 -0
  34. imspy_predictors/utilities/simple_tokenizer.py +312 -0
  35. imspy_predictors/utilities/tokenizers.py +232 -0
  36. imspy_predictors/utility.py +328 -0
  37. imspy_predictors-0.5.0.dist-info/METADATA +110 -0
  38. imspy_predictors-0.5.0.dist-info/RECORD +39 -0
  39. imspy_predictors-0.5.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,280 @@
1
+ """
2
+ imspy_predictors - ML-based predictors for CCS, retention time, and fragment intensity in mass spectrometry.
3
+
4
+ This package provides machine learning models for predicting peptide properties:
5
+ - CCS (Collision Cross Section) / Ion Mobility
6
+ - Retention Time
7
+ - Fragment Intensity (via Koina/Prosit)
8
+ - Charge State / Ionization
9
+
10
+ All models use PyTorch as the deep learning backend.
11
+
12
+ Requires imspy-core for core data structures and Rust tokenizer.
13
+
14
+ Optional dependencies:
15
+ - koina: For remote model access via Koina servers (pip install imspy-predictors[koina])
16
+ - imspy-search: For sagepy-based PSM predictions
17
+ - imspy-simulation: For simulation utilities
18
+ """
19
+
20
+ __version__ = "0.5.0"
21
+
22
+ # Track which components are available
23
+ _IMSPY_CORE_AVAILABLE = False
24
+ _TORCH_AVAILABLE = False
25
+
26
+ # Core utility functions (no external deps)
27
+ from imspy_predictors.utility import (
28
+ get_model_path,
29
+ load_tokenizer_from_resources,
30
+ InMemoryCheckpoint,
31
+ get_device,
32
+ count_parameters,
33
+ save_model_checkpoint,
34
+ load_model_checkpoint,
35
+ )
36
+
37
+ # Hashing utilities (PyTorch only, no imspy_core)
38
+ from imspy_predictors.hashing import (
39
+ CosimHasher,
40
+ TimsHasher,
41
+ SpectralHasher,
42
+ )
43
+
44
+ # Mixture models (PyTorch only, no imspy_core)
45
+ from imspy_predictors.mixture import (
46
+ GaussianMixtureModel,
47
+ )
48
+
49
+ # CCS / Ion Mobility predictors (requires imspy_core)
50
+ try:
51
+ from imspy_predictors.ccs import (
52
+ PeptideIonMobilityApex,
53
+ DeepPeptideIonMobilityApex,
54
+ SquareRootProjectionLayer,
55
+ load_deep_ccs_predictor,
56
+ get_sqrt_slopes_and_intercepts,
57
+ predict_inverse_ion_mobility_with_koina,
58
+ )
59
+ _IMSPY_CORE_AVAILABLE = True
60
+ except ImportError:
61
+ PeptideIonMobilityApex = None
62
+ DeepPeptideIonMobilityApex = None
63
+ SquareRootProjectionLayer = None
64
+ load_deep_ccs_predictor = None
65
+ get_sqrt_slopes_and_intercepts = None
66
+ predict_inverse_ion_mobility_with_koina = None
67
+
68
+ # Retention time predictors (requires imspy_core)
69
+ try:
70
+ from imspy_predictors.rt import (
71
+ PeptideChromatographyApex,
72
+ DeepChromatographyApex,
73
+ load_deep_retention_time_predictor,
74
+ predict_retention_time_with_koina,
75
+ linear_map,
76
+ )
77
+ except ImportError:
78
+ PeptideChromatographyApex = None
79
+ DeepChromatographyApex = None
80
+ load_deep_retention_time_predictor = None
81
+ predict_retention_time_with_koina = None
82
+ linear_map = None
83
+
84
+ # Fragment intensity predictors (requires imspy_core)
85
+ try:
86
+ from imspy_predictors.intensity import (
87
+ IonIntensityPredictor,
88
+ Prosit2023TimsTofWrapper,
89
+ get_collision_energy_calibration_factor,
90
+ remove_unimod_annotation,
91
+ predict_fragment_intensities_with_koina,
92
+ post_process_predicted_fragment_spectra,
93
+ get_prosit_intensity_flat_labels,
94
+ )
95
+ except ImportError:
96
+ IonIntensityPredictor = None
97
+ Prosit2023TimsTofWrapper = None
98
+ get_collision_energy_calibration_factor = None
99
+ remove_unimod_annotation = None
100
+ predict_fragment_intensities_with_koina = None
101
+ post_process_predicted_fragment_spectra = None
102
+ get_prosit_intensity_flat_labels = None
103
+
104
+ # Charge state / ionization predictors (requires imspy_core)
105
+ try:
106
+ from imspy_predictors.ionization import (
107
+ PeptideChargeStateDistribution,
108
+ BinomialChargeStateDistributionModel,
109
+ DeepChargeStateDistribution,
110
+ load_deep_charge_state_predictor,
111
+ charge_state_distribution_from_sequence_rust,
112
+ charge_state_distributions_from_sequences_rust,
113
+ predict_peptide_flyability_with_koina,
114
+ )
115
+ except ImportError:
116
+ PeptideChargeStateDistribution = None
117
+ BinomialChargeStateDistributionModel = None
118
+ DeepChargeStateDistribution = None
119
+ load_deep_charge_state_predictor = None
120
+ charge_state_distribution_from_sequence_rust = None
121
+ charge_state_distributions_from_sequences_rust = None
122
+ predict_peptide_flyability_with_koina = None
123
+
124
+ # Tokenizers (requires imspy_core/Rust bindings)
125
+ try:
126
+ from imspy_predictors.utilities import ProformaTokenizer
127
+ except ImportError:
128
+ ProformaTokenizer = None
129
+
130
+ # HFProformaTokenizer requires transformers (optional)
131
+ try:
132
+ from imspy_predictors.utilities import HFProformaTokenizer
133
+ except (ImportError, TypeError):
134
+ HFProformaTokenizer = None
135
+
136
+ # New PyTorch models (optional - requires torch)
137
+ try:
138
+ from imspy_predictors.models import (
139
+ PeptideTransformer,
140
+ PeptideTransformerConfig,
141
+ UnifiedPeptideModel,
142
+ TaskLoss,
143
+ CCSHead,
144
+ RTHead,
145
+ ChargeHead,
146
+ IntensityHead,
147
+ INSTRUMENT_TYPES,
148
+ INSTRUMENT_TO_ID,
149
+ get_instrument_id,
150
+ )
151
+ from imspy_predictors.data_utils import (
152
+ PeptideDataset,
153
+ HuggingFaceDatasetWrapper,
154
+ create_dataloader,
155
+ collate_peptide_batch,
156
+ load_ionmob_dataset,
157
+ load_prospect_rt_dataset,
158
+ load_prospect_charge_dataset,
159
+ load_prospect_ms2_dataset,
160
+ load_timstof_ms2_dataset,
161
+ )
162
+ from imspy_predictors.training import (
163
+ Trainer,
164
+ TrainingConfig,
165
+ EarlyStopping,
166
+ MetricTracker,
167
+ train_ccs_model,
168
+ train_rt_model,
169
+ train_intensity_model,
170
+ )
171
+ _TORCH_AVAILABLE = True
172
+ except ImportError:
173
+ PeptideTransformer = None
174
+ PeptideTransformerConfig = None
175
+ UnifiedPeptideModel = None
176
+ TaskLoss = None
177
+ CCSHead = None
178
+ RTHead = None
179
+ ChargeHead = None
180
+ IntensityHead = None
181
+ INSTRUMENT_TYPES = None
182
+ INSTRUMENT_TO_ID = None
183
+ get_instrument_id = None
184
+ PeptideDataset = None
185
+ HuggingFaceDatasetWrapper = None
186
+ create_dataloader = None
187
+ collate_peptide_batch = None
188
+ load_ionmob_dataset = None
189
+ load_prospect_rt_dataset = None
190
+ load_prospect_charge_dataset = None
191
+ load_prospect_ms2_dataset = None
192
+ load_timstof_ms2_dataset = None
193
+ Trainer = None
194
+ TrainingConfig = None
195
+ EarlyStopping = None
196
+ MetricTracker = None
197
+ train_ccs_model = None
198
+ train_rt_model = None
199
+ train_intensity_model = None
200
+
201
+ __all__ = [
202
+ # Version
203
+ '__version__',
204
+ # Utility
205
+ 'get_model_path',
206
+ 'load_tokenizer_from_resources',
207
+ 'InMemoryCheckpoint',
208
+ 'get_device',
209
+ 'count_parameters',
210
+ 'save_model_checkpoint',
211
+ 'load_model_checkpoint',
212
+ # Hashing
213
+ 'CosimHasher',
214
+ 'TimsHasher',
215
+ 'SpectralHasher',
216
+ # Mixture
217
+ 'GaussianMixtureModel',
218
+ # CCS
219
+ 'PeptideIonMobilityApex',
220
+ 'DeepPeptideIonMobilityApex',
221
+ 'SquareRootProjectionLayer',
222
+ 'load_deep_ccs_predictor',
223
+ 'get_sqrt_slopes_and_intercepts',
224
+ 'predict_inverse_ion_mobility_with_koina',
225
+ # RT
226
+ 'PeptideChromatographyApex',
227
+ 'DeepChromatographyApex',
228
+ 'load_deep_retention_time_predictor',
229
+ 'predict_retention_time_with_koina',
230
+ 'linear_map',
231
+ # Intensity
232
+ 'IonIntensityPredictor',
233
+ 'Prosit2023TimsTofWrapper',
234
+ 'get_collision_energy_calibration_factor',
235
+ 'remove_unimod_annotation',
236
+ 'predict_fragment_intensities_with_koina',
237
+ 'post_process_predicted_fragment_spectra',
238
+ 'get_prosit_intensity_flat_labels',
239
+ # Ionization
240
+ 'PeptideChargeStateDistribution',
241
+ 'BinomialChargeStateDistributionModel',
242
+ 'DeepChargeStateDistribution',
243
+ 'load_deep_charge_state_predictor',
244
+ 'charge_state_distribution_from_sequence_rust',
245
+ 'charge_state_distributions_from_sequences_rust',
246
+ 'predict_peptide_flyability_with_koina',
247
+ # Tokenizers
248
+ 'ProformaTokenizer',
249
+ 'HFProformaTokenizer',
250
+ # PyTorch models (new unified architecture)
251
+ 'PeptideTransformer',
252
+ 'PeptideTransformerConfig',
253
+ 'UnifiedPeptideModel',
254
+ 'TaskLoss',
255
+ 'CCSHead',
256
+ 'RTHead',
257
+ 'ChargeHead',
258
+ 'IntensityHead',
259
+ 'INSTRUMENT_TYPES',
260
+ 'INSTRUMENT_TO_ID',
261
+ 'get_instrument_id',
262
+ # Data utilities
263
+ 'PeptideDataset',
264
+ 'HuggingFaceDatasetWrapper',
265
+ 'create_dataloader',
266
+ 'collate_peptide_batch',
267
+ 'load_ionmob_dataset',
268
+ 'load_prospect_rt_dataset',
269
+ 'load_prospect_charge_dataset',
270
+ 'load_prospect_ms2_dataset',
271
+ 'load_timstof_ms2_dataset',
272
+ # Training utilities
273
+ 'Trainer',
274
+ 'TrainingConfig',
275
+ 'EarlyStopping',
276
+ 'MetricTracker',
277
+ 'train_ccs_model',
278
+ 'train_rt_model',
279
+ 'train_intensity_model',
280
+ ]
@@ -0,0 +1,32 @@
1
+ """CCS (Collision Cross Section) prediction module."""
2
+
3
+ from imspy_predictors.ccs.predictors import (
4
+ PeptideIonMobilityApex,
5
+ DeepPeptideIonMobilityApex,
6
+ SquareRootProjectionLayer,
7
+ load_deep_ccs_predictor,
8
+ get_sqrt_slopes_and_intercepts,
9
+ predict_inverse_ion_mobility_with_koina,
10
+ )
11
+
12
+ from imspy_predictors.ccs.utility import (
13
+ load_tokenizer_from_resources as load_ccs_tokenizer,
14
+ token_list_from_sequence,
15
+ tokenize_and_pad,
16
+ )
17
+
18
+ __all__ = [
19
+ # Predictors
20
+ 'PeptideIonMobilityApex',
21
+ 'DeepPeptideIonMobilityApex',
22
+ 'SquareRootProjectionLayer',
23
+ # Loaders
24
+ 'load_deep_ccs_predictor',
25
+ 'load_ccs_tokenizer',
26
+ # Utilities
27
+ 'get_sqrt_slopes_and_intercepts',
28
+ 'token_list_from_sequence',
29
+ 'tokenize_and_pad',
30
+ # Koina
31
+ 'predict_inverse_ion_mobility_with_koina',
32
+ ]