imspy-predictors 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imspy_predictors/__init__.py +280 -0
- imspy_predictors/ccs/__init__.py +32 -0
- imspy_predictors/ccs/predictors.py +768 -0
- imspy_predictors/ccs/utility.py +84 -0
- imspy_predictors/data_utils.py +589 -0
- imspy_predictors/hashing.py +255 -0
- imspy_predictors/intensity/__init__.py +41 -0
- imspy_predictors/intensity/predictors.py +882 -0
- imspy_predictors/intensity/utility.py +458 -0
- imspy_predictors/ionization/__init__.py +25 -0
- imspy_predictors/ionization/predictors.py +518 -0
- imspy_predictors/koina_models/__init__.py +92 -0
- imspy_predictors/koina_models/access_models.py +371 -0
- imspy_predictors/koina_models/input_filters.py +488 -0
- imspy_predictors/lazy_imports.py +126 -0
- imspy_predictors/losses.py +419 -0
- imspy_predictors/mixture.py +350 -0
- imspy_predictors/models/__init__.py +57 -0
- imspy_predictors/models/heads.py +561 -0
- imspy_predictors/models/transformer.py +317 -0
- imspy_predictors/models/unified.py +608 -0
- imspy_predictors/pretrained/__init__.py +0 -0
- imspy_predictors/pretrained/ccs/test_metrics.json +7 -0
- imspy_predictors/pretrained/charge/test_metrics.json +5 -0
- imspy_predictors/pretrained/hub.py +161 -0
- imspy_predictors/pretrained/rt/test_metrics.json +7 -0
- imspy_predictors/pretrained/tokenizer-ptm.json +1 -0
- imspy_predictors/pretrained/unimod-vocab.json +1055 -0
- imspy_predictors/rt/__init__.py +21 -0
- imspy_predictors/rt/predictors.py +540 -0
- imspy_predictors/training.py +1271 -0
- imspy_predictors/utilities/__init__.py +29 -0
- imspy_predictors/utilities/hf_tokenizers.py +87 -0
- imspy_predictors/utilities/simple_tokenizer.py +312 -0
- imspy_predictors/utilities/tokenizers.py +232 -0
- imspy_predictors/utility.py +328 -0
- imspy_predictors-0.5.0.dist-info/METADATA +110 -0
- imspy_predictors-0.5.0.dist-info/RECORD +39 -0
- imspy_predictors-0.5.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""
|
|
2
|
+
imspy_predictors - ML-based predictors for CCS, retention time, and fragment intensity in mass spectrometry.
|
|
3
|
+
|
|
4
|
+
This package provides machine learning models for predicting peptide properties:
|
|
5
|
+
- CCS (Collision Cross Section) / Ion Mobility
|
|
6
|
+
- Retention Time
|
|
7
|
+
- Fragment Intensity (via Koina/Prosit)
|
|
8
|
+
- Charge State / Ionization
|
|
9
|
+
|
|
10
|
+
All models use PyTorch as the deep learning backend.
|
|
11
|
+
|
|
12
|
+
Requires imspy-core for core data structures and Rust tokenizer.
|
|
13
|
+
|
|
14
|
+
Optional dependencies:
|
|
15
|
+
- koina: For remote model access via Koina servers (pip install imspy-predictors[koina])
|
|
16
|
+
- imspy-search: For sagepy-based PSM predictions
|
|
17
|
+
- imspy-simulation: For simulation utilities
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
__version__ = "0.5.0"
|
|
21
|
+
|
|
22
|
+
# Track which components are available
|
|
23
|
+
_IMSPY_CORE_AVAILABLE = False
|
|
24
|
+
_TORCH_AVAILABLE = False
|
|
25
|
+
|
|
26
|
+
# Core utility functions (no external deps)
|
|
27
|
+
from imspy_predictors.utility import (
|
|
28
|
+
get_model_path,
|
|
29
|
+
load_tokenizer_from_resources,
|
|
30
|
+
InMemoryCheckpoint,
|
|
31
|
+
get_device,
|
|
32
|
+
count_parameters,
|
|
33
|
+
save_model_checkpoint,
|
|
34
|
+
load_model_checkpoint,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Hashing utilities (PyTorch only, no imspy_core)
|
|
38
|
+
from imspy_predictors.hashing import (
|
|
39
|
+
CosimHasher,
|
|
40
|
+
TimsHasher,
|
|
41
|
+
SpectralHasher,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Mixture models (PyTorch only, no imspy_core)
|
|
45
|
+
from imspy_predictors.mixture import (
|
|
46
|
+
GaussianMixtureModel,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# CCS / Ion Mobility predictors (requires imspy_core)
|
|
50
|
+
try:
|
|
51
|
+
from imspy_predictors.ccs import (
|
|
52
|
+
PeptideIonMobilityApex,
|
|
53
|
+
DeepPeptideIonMobilityApex,
|
|
54
|
+
SquareRootProjectionLayer,
|
|
55
|
+
load_deep_ccs_predictor,
|
|
56
|
+
get_sqrt_slopes_and_intercepts,
|
|
57
|
+
predict_inverse_ion_mobility_with_koina,
|
|
58
|
+
)
|
|
59
|
+
_IMSPY_CORE_AVAILABLE = True
|
|
60
|
+
except ImportError:
|
|
61
|
+
PeptideIonMobilityApex = None
|
|
62
|
+
DeepPeptideIonMobilityApex = None
|
|
63
|
+
SquareRootProjectionLayer = None
|
|
64
|
+
load_deep_ccs_predictor = None
|
|
65
|
+
get_sqrt_slopes_and_intercepts = None
|
|
66
|
+
predict_inverse_ion_mobility_with_koina = None
|
|
67
|
+
|
|
68
|
+
# Retention time predictors (requires imspy_core)
|
|
69
|
+
try:
|
|
70
|
+
from imspy_predictors.rt import (
|
|
71
|
+
PeptideChromatographyApex,
|
|
72
|
+
DeepChromatographyApex,
|
|
73
|
+
load_deep_retention_time_predictor,
|
|
74
|
+
predict_retention_time_with_koina,
|
|
75
|
+
linear_map,
|
|
76
|
+
)
|
|
77
|
+
except ImportError:
|
|
78
|
+
PeptideChromatographyApex = None
|
|
79
|
+
DeepChromatographyApex = None
|
|
80
|
+
load_deep_retention_time_predictor = None
|
|
81
|
+
predict_retention_time_with_koina = None
|
|
82
|
+
linear_map = None
|
|
83
|
+
|
|
84
|
+
# Fragment intensity predictors (requires imspy_core)
|
|
85
|
+
try:
|
|
86
|
+
from imspy_predictors.intensity import (
|
|
87
|
+
IonIntensityPredictor,
|
|
88
|
+
Prosit2023TimsTofWrapper,
|
|
89
|
+
get_collision_energy_calibration_factor,
|
|
90
|
+
remove_unimod_annotation,
|
|
91
|
+
predict_fragment_intensities_with_koina,
|
|
92
|
+
post_process_predicted_fragment_spectra,
|
|
93
|
+
get_prosit_intensity_flat_labels,
|
|
94
|
+
)
|
|
95
|
+
except ImportError:
|
|
96
|
+
IonIntensityPredictor = None
|
|
97
|
+
Prosit2023TimsTofWrapper = None
|
|
98
|
+
get_collision_energy_calibration_factor = None
|
|
99
|
+
remove_unimod_annotation = None
|
|
100
|
+
predict_fragment_intensities_with_koina = None
|
|
101
|
+
post_process_predicted_fragment_spectra = None
|
|
102
|
+
get_prosit_intensity_flat_labels = None
|
|
103
|
+
|
|
104
|
+
# Charge state / ionization predictors (requires imspy_core)
|
|
105
|
+
try:
|
|
106
|
+
from imspy_predictors.ionization import (
|
|
107
|
+
PeptideChargeStateDistribution,
|
|
108
|
+
BinomialChargeStateDistributionModel,
|
|
109
|
+
DeepChargeStateDistribution,
|
|
110
|
+
load_deep_charge_state_predictor,
|
|
111
|
+
charge_state_distribution_from_sequence_rust,
|
|
112
|
+
charge_state_distributions_from_sequences_rust,
|
|
113
|
+
predict_peptide_flyability_with_koina,
|
|
114
|
+
)
|
|
115
|
+
except ImportError:
|
|
116
|
+
PeptideChargeStateDistribution = None
|
|
117
|
+
BinomialChargeStateDistributionModel = None
|
|
118
|
+
DeepChargeStateDistribution = None
|
|
119
|
+
load_deep_charge_state_predictor = None
|
|
120
|
+
charge_state_distribution_from_sequence_rust = None
|
|
121
|
+
charge_state_distributions_from_sequences_rust = None
|
|
122
|
+
predict_peptide_flyability_with_koina = None
|
|
123
|
+
|
|
124
|
+
# Tokenizers (requires imspy_core/Rust bindings)
|
|
125
|
+
try:
|
|
126
|
+
from imspy_predictors.utilities import ProformaTokenizer
|
|
127
|
+
except ImportError:
|
|
128
|
+
ProformaTokenizer = None
|
|
129
|
+
|
|
130
|
+
# HFProformaTokenizer requires transformers (optional)
|
|
131
|
+
try:
|
|
132
|
+
from imspy_predictors.utilities import HFProformaTokenizer
|
|
133
|
+
except (ImportError, TypeError):
|
|
134
|
+
HFProformaTokenizer = None
|
|
135
|
+
|
|
136
|
+
# New PyTorch models (optional - requires torch)
|
|
137
|
+
try:
|
|
138
|
+
from imspy_predictors.models import (
|
|
139
|
+
PeptideTransformer,
|
|
140
|
+
PeptideTransformerConfig,
|
|
141
|
+
UnifiedPeptideModel,
|
|
142
|
+
TaskLoss,
|
|
143
|
+
CCSHead,
|
|
144
|
+
RTHead,
|
|
145
|
+
ChargeHead,
|
|
146
|
+
IntensityHead,
|
|
147
|
+
INSTRUMENT_TYPES,
|
|
148
|
+
INSTRUMENT_TO_ID,
|
|
149
|
+
get_instrument_id,
|
|
150
|
+
)
|
|
151
|
+
from imspy_predictors.data_utils import (
|
|
152
|
+
PeptideDataset,
|
|
153
|
+
HuggingFaceDatasetWrapper,
|
|
154
|
+
create_dataloader,
|
|
155
|
+
collate_peptide_batch,
|
|
156
|
+
load_ionmob_dataset,
|
|
157
|
+
load_prospect_rt_dataset,
|
|
158
|
+
load_prospect_charge_dataset,
|
|
159
|
+
load_prospect_ms2_dataset,
|
|
160
|
+
load_timstof_ms2_dataset,
|
|
161
|
+
)
|
|
162
|
+
from imspy_predictors.training import (
|
|
163
|
+
Trainer,
|
|
164
|
+
TrainingConfig,
|
|
165
|
+
EarlyStopping,
|
|
166
|
+
MetricTracker,
|
|
167
|
+
train_ccs_model,
|
|
168
|
+
train_rt_model,
|
|
169
|
+
train_intensity_model,
|
|
170
|
+
)
|
|
171
|
+
_TORCH_AVAILABLE = True
|
|
172
|
+
except ImportError:
|
|
173
|
+
PeptideTransformer = None
|
|
174
|
+
PeptideTransformerConfig = None
|
|
175
|
+
UnifiedPeptideModel = None
|
|
176
|
+
TaskLoss = None
|
|
177
|
+
CCSHead = None
|
|
178
|
+
RTHead = None
|
|
179
|
+
ChargeHead = None
|
|
180
|
+
IntensityHead = None
|
|
181
|
+
INSTRUMENT_TYPES = None
|
|
182
|
+
INSTRUMENT_TO_ID = None
|
|
183
|
+
get_instrument_id = None
|
|
184
|
+
PeptideDataset = None
|
|
185
|
+
HuggingFaceDatasetWrapper = None
|
|
186
|
+
create_dataloader = None
|
|
187
|
+
collate_peptide_batch = None
|
|
188
|
+
load_ionmob_dataset = None
|
|
189
|
+
load_prospect_rt_dataset = None
|
|
190
|
+
load_prospect_charge_dataset = None
|
|
191
|
+
load_prospect_ms2_dataset = None
|
|
192
|
+
load_timstof_ms2_dataset = None
|
|
193
|
+
Trainer = None
|
|
194
|
+
TrainingConfig = None
|
|
195
|
+
EarlyStopping = None
|
|
196
|
+
MetricTracker = None
|
|
197
|
+
train_ccs_model = None
|
|
198
|
+
train_rt_model = None
|
|
199
|
+
train_intensity_model = None
|
|
200
|
+
|
|
201
|
+
__all__ = [
|
|
202
|
+
# Version
|
|
203
|
+
'__version__',
|
|
204
|
+
# Utility
|
|
205
|
+
'get_model_path',
|
|
206
|
+
'load_tokenizer_from_resources',
|
|
207
|
+
'InMemoryCheckpoint',
|
|
208
|
+
'get_device',
|
|
209
|
+
'count_parameters',
|
|
210
|
+
'save_model_checkpoint',
|
|
211
|
+
'load_model_checkpoint',
|
|
212
|
+
# Hashing
|
|
213
|
+
'CosimHasher',
|
|
214
|
+
'TimsHasher',
|
|
215
|
+
'SpectralHasher',
|
|
216
|
+
# Mixture
|
|
217
|
+
'GaussianMixtureModel',
|
|
218
|
+
# CCS
|
|
219
|
+
'PeptideIonMobilityApex',
|
|
220
|
+
'DeepPeptideIonMobilityApex',
|
|
221
|
+
'SquareRootProjectionLayer',
|
|
222
|
+
'load_deep_ccs_predictor',
|
|
223
|
+
'get_sqrt_slopes_and_intercepts',
|
|
224
|
+
'predict_inverse_ion_mobility_with_koina',
|
|
225
|
+
# RT
|
|
226
|
+
'PeptideChromatographyApex',
|
|
227
|
+
'DeepChromatographyApex',
|
|
228
|
+
'load_deep_retention_time_predictor',
|
|
229
|
+
'predict_retention_time_with_koina',
|
|
230
|
+
'linear_map',
|
|
231
|
+
# Intensity
|
|
232
|
+
'IonIntensityPredictor',
|
|
233
|
+
'Prosit2023TimsTofWrapper',
|
|
234
|
+
'get_collision_energy_calibration_factor',
|
|
235
|
+
'remove_unimod_annotation',
|
|
236
|
+
'predict_fragment_intensities_with_koina',
|
|
237
|
+
'post_process_predicted_fragment_spectra',
|
|
238
|
+
'get_prosit_intensity_flat_labels',
|
|
239
|
+
# Ionization
|
|
240
|
+
'PeptideChargeStateDistribution',
|
|
241
|
+
'BinomialChargeStateDistributionModel',
|
|
242
|
+
'DeepChargeStateDistribution',
|
|
243
|
+
'load_deep_charge_state_predictor',
|
|
244
|
+
'charge_state_distribution_from_sequence_rust',
|
|
245
|
+
'charge_state_distributions_from_sequences_rust',
|
|
246
|
+
'predict_peptide_flyability_with_koina',
|
|
247
|
+
# Tokenizers
|
|
248
|
+
'ProformaTokenizer',
|
|
249
|
+
'HFProformaTokenizer',
|
|
250
|
+
# PyTorch models (new unified architecture)
|
|
251
|
+
'PeptideTransformer',
|
|
252
|
+
'PeptideTransformerConfig',
|
|
253
|
+
'UnifiedPeptideModel',
|
|
254
|
+
'TaskLoss',
|
|
255
|
+
'CCSHead',
|
|
256
|
+
'RTHead',
|
|
257
|
+
'ChargeHead',
|
|
258
|
+
'IntensityHead',
|
|
259
|
+
'INSTRUMENT_TYPES',
|
|
260
|
+
'INSTRUMENT_TO_ID',
|
|
261
|
+
'get_instrument_id',
|
|
262
|
+
# Data utilities
|
|
263
|
+
'PeptideDataset',
|
|
264
|
+
'HuggingFaceDatasetWrapper',
|
|
265
|
+
'create_dataloader',
|
|
266
|
+
'collate_peptide_batch',
|
|
267
|
+
'load_ionmob_dataset',
|
|
268
|
+
'load_prospect_rt_dataset',
|
|
269
|
+
'load_prospect_charge_dataset',
|
|
270
|
+
'load_prospect_ms2_dataset',
|
|
271
|
+
'load_timstof_ms2_dataset',
|
|
272
|
+
# Training utilities
|
|
273
|
+
'Trainer',
|
|
274
|
+
'TrainingConfig',
|
|
275
|
+
'EarlyStopping',
|
|
276
|
+
'MetricTracker',
|
|
277
|
+
'train_ccs_model',
|
|
278
|
+
'train_rt_model',
|
|
279
|
+
'train_intensity_model',
|
|
280
|
+
]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""CCS (Collision Cross Section) prediction module."""
|
|
2
|
+
|
|
3
|
+
from imspy_predictors.ccs.predictors import (
|
|
4
|
+
PeptideIonMobilityApex,
|
|
5
|
+
DeepPeptideIonMobilityApex,
|
|
6
|
+
SquareRootProjectionLayer,
|
|
7
|
+
load_deep_ccs_predictor,
|
|
8
|
+
get_sqrt_slopes_and_intercepts,
|
|
9
|
+
predict_inverse_ion_mobility_with_koina,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from imspy_predictors.ccs.utility import (
|
|
13
|
+
load_tokenizer_from_resources as load_ccs_tokenizer,
|
|
14
|
+
token_list_from_sequence,
|
|
15
|
+
tokenize_and_pad,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Predictors
|
|
20
|
+
'PeptideIonMobilityApex',
|
|
21
|
+
'DeepPeptideIonMobilityApex',
|
|
22
|
+
'SquareRootProjectionLayer',
|
|
23
|
+
# Loaders
|
|
24
|
+
'load_deep_ccs_predictor',
|
|
25
|
+
'load_ccs_tokenizer',
|
|
26
|
+
# Utilities
|
|
27
|
+
'get_sqrt_slopes_and_intercepts',
|
|
28
|
+
'token_list_from_sequence',
|
|
29
|
+
'tokenize_and_pad',
|
|
30
|
+
# Koina
|
|
31
|
+
'predict_inverse_ion_mobility_with_koina',
|
|
32
|
+
]
|