alphabase 1.2.2__tar.gz → 1.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {alphabase-1.2.2/alphabase.egg-info → alphabase-1.2.4}/PKG-INFO +23 -3
- {alphabase-1.2.2 → alphabase-1.2.4}/README.md +14 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/__init__.py +1 -1
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/__pycache__/__init__.cpython-39.pyc +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/cli.py +1 -1
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/_const.py +6 -11
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/aa.py +57 -56
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/atom.py +71 -80
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/amino_acid.yaml +2 -2
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/common_constants.yaml +2 -2
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/protease.yaml +1 -1
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/psm_reader.yaml +11 -11
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/quant_reader_config.yaml +123 -123
- alphabase-1.2.4/alphabase/constants/element.py +10 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/isotope.py +90 -82
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/modification.py +170 -162
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/gui.py +1 -1
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/io/hdf.py +30 -61
- alphabase-1.2.4/alphabase/io/tempmmap.py +245 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/peptide/fragment.py +556 -508
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/peptide/mass_calc.py +72 -82
- alphabase-1.2.4/alphabase/peptide/mobility.py +107 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/peptide/precursor.py +264 -253
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/protein/fasta.py +478 -508
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/protein/inference.py +5 -3
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/protein/lcp_digest.py +28 -18
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/protein/protein_level_decoy.py +32 -38
- alphabase-1.2.4/alphabase/psm_reader/__init__.py +58 -0
- alphabase-1.2.4/alphabase/psm_reader/alphapept_reader.py +112 -0
- alphabase-1.2.4/alphabase/psm_reader/dia_psm_reader.py +191 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/psm_reader/maxquant_reader.py +110 -107
- alphabase-1.2.4/alphabase/psm_reader/msfragger_reader.py +188 -0
- alphabase-1.2.4/alphabase/psm_reader/pfind_reader.py +149 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/psm_reader/psm_reader.py +171 -207
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/psm_reader/sage_reader.py +84 -71
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/quantification/quant_reader/config_dict_loader.py +57 -32
- alphabase-1.2.4/alphabase/quantification/quant_reader/longformat_reader.py +253 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/quantification/quant_reader/plexdia_reformatter.py +24 -14
- alphabase-1.2.4/alphabase/quantification/quant_reader/quant_reader_manager.py +74 -0
- alphabase-1.2.4/alphabase/quantification/quant_reader/quantreader_utils.py +37 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/quantification/quant_reader/table_reformatter.py +82 -48
- alphabase-1.2.4/alphabase/quantification/quant_reader/wideformat_reader.py +29 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/spectral_library/base.py +263 -291
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/spectral_library/decoy.py +74 -80
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/spectral_library/flat.py +201 -147
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/spectral_library/reader.py +130 -130
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/spectral_library/translate.py +194 -170
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/spectral_library/validate.py +43 -42
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/utils.py +18 -15
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/yaml_utils.py +3 -1
- {alphabase-1.2.2 → alphabase-1.2.4/alphabase.egg-info}/PKG-INFO +23 -3
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase.egg-info/SOURCES.txt +1 -15
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase.egg-info/requires.txt +8 -2
- alphabase-1.2.4/pyproject.toml +6 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/setup.py +4 -9
- alphabase-1.2.2/alphabase/_modidx.py +0 -266
- alphabase-1.2.2/alphabase/constants/element.py +0 -1
- alphabase-1.2.2/alphabase/io/psm_reader/__init__.py +0 -2
- alphabase-1.2.2/alphabase/io/psm_reader/alphapept_reader.py +0 -1
- alphabase-1.2.2/alphabase/io/psm_reader/dia_psm_reader.py +0 -1
- alphabase-1.2.2/alphabase/io/psm_reader/dia_search_reader.py +0 -2
- alphabase-1.2.2/alphabase/io/psm_reader/maxquant_reader.py +0 -2
- alphabase-1.2.2/alphabase/io/psm_reader/msfragger_reader.py +0 -1
- alphabase-1.2.2/alphabase/io/psm_reader/pfind_reader.py +0 -1
- alphabase-1.2.2/alphabase/io/psm_reader/psm_reader.py +0 -1
- alphabase-1.2.2/alphabase/io/tempmmap.py +0 -154
- alphabase-1.2.2/alphabase/peptide/mobility.py +0 -109
- alphabase-1.2.2/alphabase/psm_reader/__init__.py +0 -18
- alphabase-1.2.2/alphabase/psm_reader/alphapept_reader.py +0 -108
- alphabase-1.2.2/alphabase/psm_reader/dia_psm_reader.py +0 -208
- alphabase-1.2.2/alphabase/psm_reader/msfragger_reader.py +0 -183
- alphabase-1.2.2/alphabase/psm_reader/pfind_reader.py +0 -148
- alphabase-1.2.2/alphabase/quantification/quant_reader/longformat_reader.py +0 -171
- alphabase-1.2.2/alphabase/quantification/quant_reader/quant_reader_manager.py +0 -48
- alphabase-1.2.2/alphabase/quantification/quant_reader/quantreader_utils.py +0 -35
- alphabase-1.2.2/alphabase/quantification/quant_reader/wideformat_reader.py +0 -22
- alphabase-1.2.2/alphabase/scoring/fdr.py +0 -161
- alphabase-1.2.2/alphabase/scoring/feature_extraction_base.py +0 -61
- alphabase-1.2.2/alphabase/scoring/ml_scoring.py +0 -374
- alphabase-1.2.2/alphabase/spectral_library/__init__.py +0 -0
- alphabase-1.2.2/alphabase/statistics/__init__.py +0 -0
- alphabase-1.2.2/alphabase/statistics/regression.py +0 -360
- {alphabase-1.2.2 → alphabase-1.2.4}/LICENSE +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/LICENSE.txt +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/MANIFEST.in +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/__init__.py +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/__emass_element.yaml +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/__used_mod.yaml +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/contaminants.fasta +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/modification.tsv +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/nist_element.yaml +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/io/__init__.py +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/peptide/__init__.py +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/protein/__init__.py +0 -0
- {alphabase-1.2.2/alphabase/scoring → alphabase-1.2.4/alphabase/spectral_library}/__init__.py +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase.egg-info/dependency_links.txt +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase.egg-info/entry_points.txt +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/alphabase.egg-info/top_level.txt +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/setup.cfg +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/tests/test_cli.py +0 -0
- {alphabase-1.2.2 → alphabase-1.2.4}/tests/test_gui.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: alphabase
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.4
|
|
4
4
|
Summary: An infrastructure Python package of the AlphaX ecosystem
|
|
5
5
|
Home-page: https://github.com/MannLabs/alphabase
|
|
6
6
|
Author: Mann Labs
|
|
@@ -37,6 +37,8 @@ Requires-Dist: regex
|
|
|
37
37
|
Requires-Dist: dask
|
|
38
38
|
Requires-Dist: dask_expr
|
|
39
39
|
Requires-Dist: pyahocorasick
|
|
40
|
+
Requires-Dist: pyteomics
|
|
41
|
+
Requires-Dist: lxml
|
|
40
42
|
Requires-Dist: pywin32; sys_platform == "win32"
|
|
41
43
|
Provides-Extra: development-stable
|
|
42
44
|
Requires-Dist: jupyter; extra == "development-stable"
|
|
@@ -44,7 +46,6 @@ Requires-Dist: twine; extra == "development-stable"
|
|
|
44
46
|
Requires-Dist: bumpversion; extra == "development-stable"
|
|
45
47
|
Requires-Dist: pipdeptree; extra == "development-stable"
|
|
46
48
|
Requires-Dist: ipykernel; extra == "development-stable"
|
|
47
|
-
Requires-Dist: nbdev; extra == "development-stable"
|
|
48
49
|
Requires-Dist: pyteomics; extra == "development-stable"
|
|
49
50
|
Requires-Dist: scikit-learn; extra == "development-stable"
|
|
50
51
|
Requires-Dist: matplotlib; extra == "development-stable"
|
|
@@ -69,13 +70,14 @@ Requires-Dist: regex; extra == "development-stable"
|
|
|
69
70
|
Requires-Dist: pydivsufsort; extra == "development-stable"
|
|
70
71
|
Requires-Dist: pyahocorasick; extra == "development-stable"
|
|
71
72
|
Requires-Dist: pytest; extra == "development-stable"
|
|
73
|
+
Requires-Dist: pre-commit==3.7.0; extra == "development-stable"
|
|
74
|
+
Requires-Dist: nbmake==1.5.3; extra == "development-stable"
|
|
72
75
|
Provides-Extra: development
|
|
73
76
|
Requires-Dist: jupyter; extra == "development"
|
|
74
77
|
Requires-Dist: twine; extra == "development"
|
|
75
78
|
Requires-Dist: bumpversion; extra == "development"
|
|
76
79
|
Requires-Dist: pipdeptree; extra == "development"
|
|
77
80
|
Requires-Dist: ipykernel; extra == "development"
|
|
78
|
-
Requires-Dist: nbdev; extra == "development"
|
|
79
81
|
Requires-Dist: pyteomics; extra == "development"
|
|
80
82
|
Requires-Dist: scikit-learn; extra == "development"
|
|
81
83
|
Requires-Dist: matplotlib; extra == "development"
|
|
@@ -100,6 +102,8 @@ Requires-Dist: regex; extra == "development"
|
|
|
100
102
|
Requires-Dist: pydivsufsort; extra == "development"
|
|
101
103
|
Requires-Dist: pyahocorasick; extra == "development"
|
|
102
104
|
Requires-Dist: pytest; extra == "development"
|
|
105
|
+
Requires-Dist: pre-commit; extra == "development"
|
|
106
|
+
Requires-Dist: nbmake; extra == "development"
|
|
103
107
|
Provides-Extra: stable
|
|
104
108
|
Requires-Dist: numba; extra == "stable"
|
|
105
109
|
Requires-Dist: numpy; extra == "stable"
|
|
@@ -116,6 +120,8 @@ Requires-Dist: regex; extra == "stable"
|
|
|
116
120
|
Requires-Dist: dask; extra == "stable"
|
|
117
121
|
Requires-Dist: dask_expr; extra == "stable"
|
|
118
122
|
Requires-Dist: pyahocorasick; extra == "stable"
|
|
123
|
+
Requires-Dist: pyteomics; extra == "stable"
|
|
124
|
+
Requires-Dist: lxml; extra == "stable"
|
|
119
125
|
|
|
120
126
|
# AlphaBase
|
|
121
127
|
|
|
@@ -315,6 +321,20 @@ For an even more interactive participation, check out the
|
|
|
315
321
|
[discussions](https://github.com/MannLabs/alphabase/discussions) and the
|
|
316
322
|
[the Contributors License Agreement](misc/CLA.md).
|
|
317
323
|
|
|
324
|
+
### Notes for developers
|
|
325
|
+
#### pre-commit hooks
|
|
326
|
+
It is highly recommended to use the provided pre-commit hooks, as the CI pipeline enforces all checks therein to
|
|
327
|
+
pass in order to merge a branch.
|
|
328
|
+
|
|
329
|
+
The hooks need to be installed once by
|
|
330
|
+
```bash
|
|
331
|
+
pre-commit install
|
|
332
|
+
```
|
|
333
|
+
You can run the checks yourself using:
|
|
334
|
+
```bash
|
|
335
|
+
pre-commit run --all-files
|
|
336
|
+
```
|
|
337
|
+
|
|
318
338
|
------------------------------------------------------------------------
|
|
319
339
|
|
|
320
340
|
## Changelog
|
|
@@ -196,6 +196,20 @@ For an even more interactive participation, check out the
|
|
|
196
196
|
[discussions](https://github.com/MannLabs/alphabase/discussions) and the
|
|
197
197
|
[the Contributors License Agreement](misc/CLA.md).
|
|
198
198
|
|
|
199
|
+
### Notes for developers
|
|
200
|
+
#### pre-commit hooks
|
|
201
|
+
It is highly recommended to use the provided pre-commit hooks, as the CI pipeline enforces all checks therein to
|
|
202
|
+
pass in order to merge a branch.
|
|
203
|
+
|
|
204
|
+
The hooks need to be installed once by
|
|
205
|
+
```bash
|
|
206
|
+
pre-commit install
|
|
207
|
+
```
|
|
208
|
+
You can run the checks yourself using:
|
|
209
|
+
```bash
|
|
210
|
+
pre-commit run --all-files
|
|
211
|
+
```
|
|
212
|
+
|
|
199
213
|
------------------------------------------------------------------------
|
|
200
214
|
|
|
201
215
|
## Changelog
|
|
Binary file
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
def run(*args, **kwargs):
|
|
2
|
-
pass
|
|
2
|
+
pass
|
|
@@ -3,21 +3,16 @@ import numpy as np
|
|
|
3
3
|
|
|
4
4
|
from alphabase.yaml_utils import load_yaml
|
|
5
5
|
|
|
6
|
-
CONST_FILE_FOLDER = os.path.join(
|
|
7
|
-
os.path.dirname(__file__),
|
|
8
|
-
"const_files"
|
|
9
|
-
)
|
|
6
|
+
CONST_FILE_FOLDER = os.path.join(os.path.dirname(__file__), "const_files")
|
|
10
7
|
|
|
11
|
-
common_const_dict:dict = load_yaml(
|
|
8
|
+
common_const_dict: dict = load_yaml(
|
|
12
9
|
os.path.join(CONST_FILE_FOLDER, "common_constants.yaml")
|
|
13
10
|
)
|
|
14
11
|
|
|
15
|
-
# Only applied in peak and fragment dataframes to save RAM.
|
|
12
|
+
# Only applied in peak and fragment dataframes to save RAM.
|
|
16
13
|
# Using float32 still keeps 0.1 ppm precision in any value range.
|
|
17
14
|
# Default float dtype is "float64" for value calculation and other senarios.
|
|
18
|
-
PEAK_MZ_DTYPE:np.dtype = np.dtype(
|
|
19
|
-
|
|
20
|
-
).type
|
|
21
|
-
PEAK_INTENSITY_DTYPE:np.dtype = np.dtype(
|
|
15
|
+
PEAK_MZ_DTYPE: np.dtype = np.dtype(common_const_dict["PEAK_MZ_DTYPE"]).type
|
|
16
|
+
PEAK_INTENSITY_DTYPE: np.dtype = np.dtype(
|
|
22
17
|
common_const_dict["PEAK_INTENSITY_DTYPE"]
|
|
23
|
-
).type
|
|
18
|
+
).type
|
|
@@ -5,30 +5,30 @@ import typing
|
|
|
5
5
|
|
|
6
6
|
from alphabase.yaml_utils import load_yaml
|
|
7
7
|
|
|
8
|
-
from alphabase.constants.
|
|
9
|
-
calc_mass_from_formula,
|
|
10
|
-
MASS_H2O,
|
|
11
|
-
|
|
8
|
+
from alphabase.constants.atom import (
|
|
9
|
+
calc_mass_from_formula,
|
|
10
|
+
MASS_H2O,
|
|
11
|
+
parse_formula,
|
|
12
|
+
reset_elements,
|
|
12
13
|
)
|
|
13
14
|
|
|
14
15
|
from alphabase.constants._const import CONST_FILE_FOLDER
|
|
15
16
|
|
|
16
17
|
# We use all 128 ASCII code to represent amino acids for flexible extensions in the future.
|
|
17
|
-
# The amino acid masses are stored in 128-lengh array :py:data:`AA_ASCII_MASS`.
|
|
18
|
+
# The amino acid masses are stored in 128-lengh array :py:data:`AA_ASCII_MASS`.
|
|
18
19
|
# If an ASCII code is not in `AA_Formula`, the mass will be set as a large value to disable MS search.
|
|
19
|
-
AA_Formula:dict = load_yaml(
|
|
20
|
-
os.path.join(CONST_FILE_FOLDER, 'amino_acid.yaml')
|
|
21
|
-
)
|
|
20
|
+
AA_Formula: dict = load_yaml(os.path.join(CONST_FILE_FOLDER, "amino_acid.yaml"))
|
|
22
21
|
#: AA mass array with ASCII code, mass of 'A' is AA_ASCII_MASS[ord('A')]
|
|
23
|
-
AA_ASCII_MASS:np.ndarray = np.ones(128)*1e8
|
|
22
|
+
AA_ASCII_MASS: np.ndarray = np.ones(128) * 1e8
|
|
24
23
|
|
|
25
24
|
#: 128-len AA dataframe
|
|
26
|
-
AA_DF:pd.DataFrame = pd.DataFrame()
|
|
25
|
+
AA_DF: pd.DataFrame = pd.DataFrame()
|
|
27
26
|
|
|
28
27
|
# AA formula to formula dict of dict. For example: {'K': {'C': n, 'O': m, ...}}
|
|
29
|
-
AA_Composition:dict = {}
|
|
28
|
+
AA_Composition: dict = {}
|
|
29
|
+
|
|
30
30
|
|
|
31
|
-
def replace_atoms(atom_replace_dict:typing.Dict):
|
|
31
|
+
def replace_atoms(atom_replace_dict: typing.Dict):
|
|
32
32
|
for aa, formula in list(AA_Formula.items()):
|
|
33
33
|
atom_comp = dict(parse_formula(formula))
|
|
34
34
|
for atom_from, atom_to in atom_replace_dict.items():
|
|
@@ -37,58 +37,66 @@ def replace_atoms(atom_replace_dict:typing.Dict):
|
|
|
37
37
|
del atom_comp[atom_from]
|
|
38
38
|
AA_Formula[aa] = "".join([f"{atom}({n})" for atom, n in atom_comp.items()])
|
|
39
39
|
|
|
40
|
-
|
|
40
|
+
|
|
41
|
+
def reset_AA_mass() -> np.ndarray:
|
|
41
42
|
"""AA mass in np.array with shape (128,)"""
|
|
42
43
|
global AA_ASCII_MASS
|
|
43
44
|
for aa, chem in AA_Formula.items():
|
|
44
45
|
AA_ASCII_MASS[ord(aa)] = calc_mass_from_formula(chem)
|
|
45
46
|
return AA_ASCII_MASS
|
|
47
|
+
|
|
48
|
+
|
|
46
49
|
reset_AA_mass()
|
|
47
50
|
|
|
51
|
+
|
|
48
52
|
def reset_AA_df():
|
|
49
53
|
global AA_DF
|
|
50
54
|
AA_DF = pd.DataFrame()
|
|
51
|
-
AA_DF[
|
|
52
|
-
AA_DF[
|
|
55
|
+
AA_DF["aa"] = [chr(aa) for aa in range(len(AA_ASCII_MASS))]
|
|
56
|
+
AA_DF["formula"] = [""] * len(AA_ASCII_MASS)
|
|
53
57
|
aa_idxes = []
|
|
54
58
|
formulas = []
|
|
55
59
|
for aa, formula in AA_Formula.items():
|
|
56
60
|
aa_idxes.append(ord(aa))
|
|
57
61
|
formulas.append(formula)
|
|
58
|
-
AA_DF.loc[aa_idxes,
|
|
59
|
-
AA_DF[
|
|
62
|
+
AA_DF.loc[aa_idxes, "formula"] = formulas
|
|
63
|
+
AA_DF["mass"] = AA_ASCII_MASS
|
|
60
64
|
return AA_DF
|
|
65
|
+
|
|
66
|
+
|
|
61
67
|
reset_AA_df()
|
|
62
68
|
|
|
69
|
+
|
|
63
70
|
def reset_AA_Composition():
|
|
64
71
|
global AA_Composition
|
|
65
72
|
AA_Composition = {}
|
|
66
73
|
for aa, formula, mass in AA_DF.values:
|
|
67
|
-
AA_Composition[aa] = dict(
|
|
68
|
-
parse_formula(formula)
|
|
69
|
-
)
|
|
74
|
+
AA_Composition[aa] = dict(parse_formula(formula))
|
|
70
75
|
return AA_Composition
|
|
76
|
+
|
|
77
|
+
|
|
71
78
|
reset_AA_Composition()
|
|
72
79
|
|
|
73
|
-
|
|
80
|
+
|
|
81
|
+
def reset_AA_atoms(atom_replace_dict: typing.Dict = {}):
|
|
74
82
|
reset_elements()
|
|
75
83
|
replace_atoms(atom_replace_dict)
|
|
76
84
|
reset_AA_mass()
|
|
77
85
|
reset_AA_df()
|
|
78
86
|
reset_AA_Composition()
|
|
79
87
|
|
|
80
|
-
|
|
88
|
+
|
|
89
|
+
def update_an_AA(aa: str, formula: str):
|
|
81
90
|
aa_idx = ord(aa)
|
|
82
|
-
AA_DF.loc[aa_idx,
|
|
91
|
+
AA_DF.loc[aa_idx, "formula"] = formula
|
|
83
92
|
AA_ASCII_MASS[aa_idx] = calc_mass_from_formula(formula)
|
|
84
|
-
AA_DF.loc[aa_idx,
|
|
93
|
+
AA_DF.loc[aa_idx, "mass"] = AA_ASCII_MASS[aa_idx]
|
|
85
94
|
AA_Formula[aa] = formula
|
|
86
95
|
AA_Composition[aa] = dict(parse_formula(formula))
|
|
87
96
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
'''
|
|
97
|
+
|
|
98
|
+
def calc_AA_masses(sequence: str) -> np.ndarray:
|
|
99
|
+
"""
|
|
92
100
|
Parameters
|
|
93
101
|
----------
|
|
94
102
|
sequence : str
|
|
@@ -98,13 +106,12 @@ def calc_AA_masses(
|
|
|
98
106
|
-------
|
|
99
107
|
np.ndarray
|
|
100
108
|
Masses of each amino acid.
|
|
101
|
-
|
|
102
|
-
return AA_ASCII_MASS[np.array(sequence,
|
|
109
|
+
"""
|
|
110
|
+
return AA_ASCII_MASS[np.array(sequence, "c").view(np.int8)]
|
|
103
111
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
'''
|
|
112
|
+
|
|
113
|
+
def calc_AA_masses_for_same_len_seqs(sequence_array: np.ndarray) -> np.ndarray:
|
|
114
|
+
"""
|
|
108
115
|
Calculate AA masses for the array of same-len AA sequences.
|
|
109
116
|
|
|
110
117
|
Parameters
|
|
@@ -121,17 +128,16 @@ def calc_AA_masses_for_same_len_seqs(
|
|
|
121
128
|
-------
|
|
122
129
|
ValueError
|
|
123
130
|
If sequences are not with the same length.
|
|
124
|
-
|
|
131
|
+
"""
|
|
125
132
|
return AA_ASCII_MASS[
|
|
126
|
-
# we use np.int32 here because unicode str
|
|
133
|
+
# we use np.int32 here because unicode str
|
|
127
134
|
# uses 4 bytes for a char.
|
|
128
|
-
np.array(sequence_array).view(np.int32)
|
|
135
|
+
np.array(sequence_array).view(np.int32)
|
|
129
136
|
].reshape(len(sequence_array), -1)
|
|
130
137
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
'''
|
|
138
|
+
|
|
139
|
+
def calc_sequence_masses_for_same_len_seqs(sequence_array: np.ndarray) -> np.ndarray:
|
|
140
|
+
"""
|
|
135
141
|
Calculate sequence masses for the array of same-len AA sequences.
|
|
136
142
|
|
|
137
143
|
Parameters
|
|
@@ -143,34 +149,29 @@ def calc_sequence_masses_for_same_len_seqs(
|
|
|
143
149
|
-------
|
|
144
150
|
np.ndarray
|
|
145
151
|
1-D (array_size, sequence_len) array of masses.
|
|
146
|
-
|
|
152
|
+
|
|
147
153
|
Raises
|
|
148
154
|
-------
|
|
149
155
|
ValueError
|
|
150
156
|
If sequences are not with the same length.
|
|
151
|
-
|
|
152
|
-
return np.sum(
|
|
153
|
-
calc_AA_masses_for_same_len_seqs(sequence_array),
|
|
154
|
-
axis=1
|
|
155
|
-
)+MASS_H2O
|
|
157
|
+
"""
|
|
158
|
+
return np.sum(calc_AA_masses_for_same_len_seqs(sequence_array), axis=1) + MASS_H2O
|
|
156
159
|
|
|
157
160
|
|
|
158
|
-
def calc_AA_masses_for_var_len_seqs(
|
|
159
|
-
|
|
160
|
-
)->np.ndarray:
|
|
161
|
-
'''
|
|
161
|
+
def calc_AA_masses_for_var_len_seqs(sequence_array: np.ndarray) -> np.ndarray:
|
|
162
|
+
"""
|
|
162
163
|
We recommend to use `calc_AA_masses_for_same_len_seqs` as it is much faster.
|
|
163
164
|
|
|
164
165
|
Parameters
|
|
165
166
|
----------
|
|
166
167
|
sequence_array : np.ndarray
|
|
167
168
|
Sequences with variable lengths.
|
|
168
|
-
|
|
169
|
+
|
|
169
170
|
Returns
|
|
170
171
|
-------
|
|
171
172
|
np.ndarray
|
|
172
173
|
1D array of masses, zero values are padded to fill the max length.
|
|
173
|
-
|
|
174
|
-
return AA_ASCII_MASS[
|
|
175
|
-
|
|
176
|
-
|
|
174
|
+
"""
|
|
175
|
+
return AA_ASCII_MASS[np.array(sequence_array).view(np.int32)].reshape(
|
|
176
|
+
len(sequence_array), -1
|
|
177
|
+
)
|
|
@@ -5,26 +5,22 @@ import typing
|
|
|
5
5
|
|
|
6
6
|
from alphabase.yaml_utils import load_yaml
|
|
7
7
|
|
|
8
|
-
from alphabase.constants._const import
|
|
9
|
-
CONST_FILE_FOLDER,
|
|
10
|
-
common_const_dict
|
|
11
|
-
)
|
|
8
|
+
from alphabase.constants._const import CONST_FILE_FOLDER, common_const_dict
|
|
12
9
|
|
|
13
|
-
MASS_PROTON:float = common_const_dict[
|
|
14
|
-
MASS_ISOTOPE:float = common_const_dict[
|
|
10
|
+
MASS_PROTON: float = common_const_dict["MASS_PROTON"]
|
|
11
|
+
MASS_ISOTOPE: float = common_const_dict["MASS_ISOTOPE"]
|
|
15
12
|
|
|
16
|
-
MAX_ISOTOPE_LEN:int = common_const_dict[
|
|
17
|
-
EMPTY_DIST:np.ndarray = np.zeros(MAX_ISOTOPE_LEN)
|
|
13
|
+
MAX_ISOTOPE_LEN: int = common_const_dict["MAX_ISOTOPE_LEN"]
|
|
14
|
+
EMPTY_DIST: np.ndarray = np.zeros(MAX_ISOTOPE_LEN)
|
|
18
15
|
EMPTY_DIST[0] = 1
|
|
19
16
|
|
|
17
|
+
|
|
20
18
|
@numba.njit
|
|
21
|
-
def truncate_isotope(
|
|
22
|
-
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
this function truncates the distribution by top
|
|
27
|
-
`MAX_ISOTOPE_LEN` neighbors those contain the monoisotopic
|
|
19
|
+
def truncate_isotope(isotopes: np.ndarray, mono_idx: int) -> tuple:
|
|
20
|
+
"""
|
|
21
|
+
For a given isotope distribution (intensity patterns),
|
|
22
|
+
this function truncates the distribution by top
|
|
23
|
+
`MAX_ISOTOPE_LEN` neighbors those contain the monoisotopic
|
|
28
24
|
peak pointed by `mono_idx`.
|
|
29
25
|
|
|
30
26
|
Parameters
|
|
@@ -36,34 +32,39 @@ def truncate_isotope(
|
|
|
36
32
|
mono_idx : int
|
|
37
33
|
|
|
38
34
|
Monoisotopic peak position (index) in the isotope patterns
|
|
39
|
-
|
|
35
|
+
|
|
40
36
|
Returns
|
|
41
37
|
-------
|
|
42
38
|
int
|
|
43
|
-
|
|
39
|
+
|
|
44
40
|
the new position of `mono_idx`
|
|
45
41
|
|
|
46
42
|
int
|
|
47
|
-
|
|
43
|
+
|
|
48
44
|
the start position of the truncated isotopes
|
|
49
45
|
|
|
50
46
|
int
|
|
51
|
-
|
|
47
|
+
|
|
52
48
|
the end position of the truncated isotopes
|
|
53
|
-
|
|
49
|
+
"""
|
|
54
50
|
trunc_start = mono_idx - 1
|
|
55
51
|
trunc_end = mono_idx + 1
|
|
56
|
-
while
|
|
52
|
+
while (
|
|
53
|
+
trunc_start >= 0
|
|
54
|
+
and trunc_end < len(isotopes)
|
|
55
|
+
and (trunc_end - trunc_start - 1) < MAX_ISOTOPE_LEN
|
|
56
|
+
):
|
|
57
57
|
if isotopes[trunc_end] >= isotopes[trunc_start]:
|
|
58
58
|
trunc_end += 1
|
|
59
59
|
else:
|
|
60
60
|
trunc_start -= 1
|
|
61
|
-
if trunc_end-trunc_start-1 < MAX_ISOTOPE_LEN:
|
|
61
|
+
if trunc_end - trunc_start - 1 < MAX_ISOTOPE_LEN:
|
|
62
62
|
if trunc_start == -1:
|
|
63
63
|
trunc_end = MAX_ISOTOPE_LEN
|
|
64
64
|
elif trunc_end == len(isotopes):
|
|
65
|
-
trunc_start = len(isotopes)-MAX_ISOTOPE_LEN-1
|
|
66
|
-
return mono_idx-trunc_start-1, trunc_start+1, trunc_end
|
|
65
|
+
trunc_start = len(isotopes) - MAX_ISOTOPE_LEN - 1
|
|
66
|
+
return mono_idx - trunc_start - 1, trunc_start + 1, trunc_end
|
|
67
|
+
|
|
67
68
|
|
|
68
69
|
#: chemical element information in dict defined by `nist_element.yaml`
|
|
69
70
|
CHEM_INFO_DICT = {}
|
|
@@ -72,25 +73,24 @@ CHEM_INFO_DICT = {}
|
|
|
72
73
|
CHEM_MONO_MASS = {}
|
|
73
74
|
|
|
74
75
|
#: {element: np.ndarray of abundance distribution}
|
|
75
|
-
CHEM_ISOTOPE_DIST:numba.typed.Dict = numba.typed.Dict.empty(
|
|
76
|
-
key_type=numba.types.unicode_type,
|
|
77
|
-
value_type=numba.types.float64[:]
|
|
76
|
+
CHEM_ISOTOPE_DIST: numba.typed.Dict = numba.typed.Dict.empty(
|
|
77
|
+
key_type=numba.types.unicode_type, value_type=numba.types.float64[:]
|
|
78
78
|
)
|
|
79
79
|
|
|
80
80
|
#: {element: int (mono position)}
|
|
81
|
-
CHEM_MONO_IDX:numba.typed.Dict = numba.typed.Dict.empty(
|
|
82
|
-
key_type=numba.types.unicode_type,
|
|
83
|
-
value_type=numba.types.int64
|
|
81
|
+
CHEM_MONO_IDX: numba.typed.Dict = numba.typed.Dict.empty(
|
|
82
|
+
key_type=numba.types.unicode_type, value_type=numba.types.int64
|
|
84
83
|
)
|
|
85
84
|
|
|
86
|
-
MASS_H:int = None
|
|
87
|
-
MASS_C:int = None
|
|
88
|
-
MASS_O:int = None
|
|
89
|
-
MASS_N:int = None
|
|
90
|
-
MASS_H2O:int = None
|
|
91
|
-
MASS_NH3:int = None
|
|
85
|
+
MASS_H: int = None
|
|
86
|
+
MASS_C: int = None
|
|
87
|
+
MASS_O: int = None
|
|
88
|
+
MASS_N: int = None
|
|
89
|
+
MASS_H2O: int = None # raise errors if the value is not reset
|
|
90
|
+
MASS_NH3: int = None
|
|
91
|
+
|
|
92
92
|
|
|
93
|
-
def update_atom_infos(new_atom_info:typing.Dict):
|
|
93
|
+
def update_atom_infos(new_atom_info: typing.Dict):
|
|
94
94
|
"""
|
|
95
95
|
Args:
|
|
96
96
|
atom_dict (Dict): Example, replacing N with 15N
|
|
@@ -104,14 +104,14 @@ def update_atom_infos(new_atom_info:typing.Dict):
|
|
|
104
104
|
|
|
105
105
|
reset_elements()
|
|
106
106
|
|
|
107
|
-
def reset_elements():
|
|
108
107
|
|
|
108
|
+
def reset_elements():
|
|
109
109
|
global MASS_C, MASS_H, MASS_O, MASS_N
|
|
110
110
|
global MASS_H2O, MASS_NH3
|
|
111
111
|
|
|
112
112
|
for elem, items in CHEM_INFO_DICT.items():
|
|
113
|
-
isotopes = np.array(items[
|
|
114
|
-
masses = np.array(items[
|
|
113
|
+
isotopes = np.array(items["abundance"])
|
|
114
|
+
masses = np.array(items["mass"])
|
|
115
115
|
_sort_idx = np.argsort(masses)
|
|
116
116
|
masses = masses[_sort_idx]
|
|
117
117
|
isotopes = isotopes[_sort_idx]
|
|
@@ -139,18 +139,19 @@ def reset_elements():
|
|
|
139
139
|
|
|
140
140
|
CHEM_ISOTOPE_DIST[elem] = _isos[start:end]
|
|
141
141
|
CHEM_MONO_IDX[elem] = _mono_idx
|
|
142
|
-
|
|
143
|
-
MASS_C = CHEM_MONO_MASS[
|
|
144
|
-
MASS_H = CHEM_MONO_MASS[
|
|
145
|
-
MASS_N = CHEM_MONO_MASS[
|
|
146
|
-
MASS_O = CHEM_MONO_MASS[
|
|
147
|
-
MASS_H2O = CHEM_MONO_MASS[
|
|
148
|
-
MASS_NH3 = CHEM_MONO_MASS[
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
''
|
|
142
|
+
|
|
143
|
+
MASS_C = CHEM_MONO_MASS["C"]
|
|
144
|
+
MASS_H = CHEM_MONO_MASS["H"]
|
|
145
|
+
MASS_N = CHEM_MONO_MASS["N"]
|
|
146
|
+
MASS_O = CHEM_MONO_MASS["O"]
|
|
147
|
+
MASS_H2O = CHEM_MONO_MASS["H"] * 2 + CHEM_MONO_MASS["O"]
|
|
148
|
+
MASS_NH3 = CHEM_MONO_MASS["H"] * 3 + CHEM_MONO_MASS["N"]
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def load_elem_yaml(yaml_file: str):
|
|
152
|
+
"""Load built-in or user-defined element yaml file. Default yaml is:
|
|
153
|
+
os.path.join(_base_dir, 'nist_element.yaml')
|
|
154
|
+
"""
|
|
154
155
|
global CHEM_INFO_DICT
|
|
155
156
|
global CHEM_MONO_MASS
|
|
156
157
|
global CHEM_ISOTOPE_DIST
|
|
@@ -160,52 +161,42 @@ def load_elem_yaml(yaml_file:str):
|
|
|
160
161
|
|
|
161
162
|
CHEM_MONO_MASS = {}
|
|
162
163
|
CHEM_ISOTOPE_DIST = numba.typed.Dict.empty(
|
|
163
|
-
key_type=numba.types.unicode_type,
|
|
164
|
-
value_type=numba.types.float64[:]
|
|
164
|
+
key_type=numba.types.unicode_type, value_type=numba.types.float64[:]
|
|
165
165
|
)
|
|
166
|
-
|
|
166
|
+
|
|
167
167
|
CHEM_MONO_IDX = numba.typed.Dict.empty(
|
|
168
|
-
key_type=numba.types.unicode_type,
|
|
169
|
-
value_type=numba.types.int64
|
|
168
|
+
key_type=numba.types.unicode_type, value_type=numba.types.int64
|
|
170
169
|
)
|
|
171
170
|
|
|
172
171
|
reset_elements()
|
|
173
172
|
|
|
174
|
-
load_elem_yaml(
|
|
175
|
-
os.path.join(CONST_FILE_FOLDER,
|
|
176
|
-
'nist_element.yaml'
|
|
177
|
-
)
|
|
178
|
-
)
|
|
179
173
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
174
|
+
load_elem_yaml(os.path.join(CONST_FILE_FOLDER, "nist_element.yaml"))
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def parse_formula(formula: str) -> list:
|
|
178
|
+
"""
|
|
179
|
+
Given a formula (str, e.g. `H(1)C(2)O(3)`),
|
|
185
180
|
it generates `[('H', 2), ('C', 2), ('O', 1)]`
|
|
186
|
-
|
|
187
|
-
if not formula:
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
]
|
|
181
|
+
"""
|
|
182
|
+
if not formula:
|
|
183
|
+
return []
|
|
184
|
+
items = [item.split("(") for item in formula.strip(")").split(")")]
|
|
191
185
|
return [(elem, int(n)) for elem, n in items]
|
|
192
186
|
|
|
193
187
|
|
|
194
|
-
def calc_mass_from_formula(formula:str):
|
|
195
|
-
|
|
188
|
+
def calc_mass_from_formula(formula: str):
|
|
189
|
+
"""
|
|
196
190
|
Calculates the mass of the formula`
|
|
197
191
|
|
|
198
192
|
Parameters
|
|
199
193
|
----------
|
|
200
194
|
formula : str
|
|
201
195
|
e.g. `H(1)C(2)O(3)`
|
|
202
|
-
|
|
196
|
+
|
|
203
197
|
Returns
|
|
204
198
|
-------
|
|
205
199
|
float
|
|
206
200
|
mass of the formula
|
|
207
|
-
|
|
208
|
-
return np.sum([
|
|
209
|
-
CHEM_MONO_MASS[elem]*n
|
|
210
|
-
for elem, n in parse_formula(formula)
|
|
211
|
-
])
|
|
201
|
+
"""
|
|
202
|
+
return np.sum([CHEM_MONO_MASS[elem] * n for elem, n in parse_formula(formula)])
|
|
@@ -30,11 +30,11 @@ X: 'C(1000000)'
|
|
|
30
30
|
Y: 'C(9)H(9)N(1)O(2)S(0)'
|
|
31
31
|
Z: 'C(1000000)'
|
|
32
32
|
# Any other ASCII chars could be the placeholders for future usage.
|
|
33
|
-
# For example:
|
|
33
|
+
# For example:
|
|
34
34
|
# phospho site-specific search (only lower case 'sty' can be modified)
|
|
35
35
|
# s is S
|
|
36
36
|
s: 'C(3)H(5)N(1)O(2)S(0)'
|
|
37
37
|
# t is T
|
|
38
38
|
t: 'C(4)H(8)N(1)O(5)P(1)'
|
|
39
39
|
# y is Y
|
|
40
|
-
y: 'C(9)H(9)N(1)O(2)S(0)'
|
|
40
|
+
y: 'C(9)H(9)N(1)O(2)S(0)'
|
|
@@ -2,7 +2,7 @@ MASS_PROTON: 1.007276467 #https://physics.nist.gov/cgi-bin/cuu/Value?arp|search_
|
|
|
2
2
|
MASS_ISOTOPE: 1.0033
|
|
3
3
|
MAX_ISOTOPE_LEN: 10
|
|
4
4
|
MOBILITY:
|
|
5
|
-
# 1059.62245 is the estimated constant coef in
|
|
5
|
+
# 1059.62245 is the estimated constant coef in
|
|
6
6
|
# Mason Schamp equation of Burker.
|
|
7
7
|
CCS_IM_COEF: 1059.62245
|
|
8
8
|
# 28 is the mass of N(2), the default gas in IM bruker
|
|
@@ -10,4 +10,4 @@ MOBILITY:
|
|
|
10
10
|
|
|
11
11
|
# Only applied in peak/fragment dataframes to save RAM
|
|
12
12
|
PEAK_MZ_DTYPE: float32
|
|
13
|
-
PEAK_INTENSITY_DTYPE: float32
|
|
13
|
+
PEAK_INTENSITY_DTYPE: float32
|