alphabase 1.2.2__tar.gz → 1.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. {alphabase-1.2.2/alphabase.egg-info → alphabase-1.2.4}/PKG-INFO +23 -3
  2. {alphabase-1.2.2 → alphabase-1.2.4}/README.md +14 -0
  3. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/__init__.py +1 -1
  4. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/__pycache__/__init__.cpython-39.pyc +0 -0
  5. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/cli.py +1 -1
  6. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/_const.py +6 -11
  7. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/aa.py +57 -56
  8. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/atom.py +71 -80
  9. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/amino_acid.yaml +2 -2
  10. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/common_constants.yaml +2 -2
  11. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/protease.yaml +1 -1
  12. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/psm_reader.yaml +11 -11
  13. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/quant_reader_config.yaml +123 -123
  14. alphabase-1.2.4/alphabase/constants/element.py +10 -0
  15. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/isotope.py +90 -82
  16. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/modification.py +170 -162
  17. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/gui.py +1 -1
  18. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/io/hdf.py +30 -61
  19. alphabase-1.2.4/alphabase/io/tempmmap.py +245 -0
  20. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/peptide/fragment.py +556 -508
  21. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/peptide/mass_calc.py +72 -82
  22. alphabase-1.2.4/alphabase/peptide/mobility.py +107 -0
  23. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/peptide/precursor.py +264 -253
  24. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/protein/fasta.py +478 -508
  25. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/protein/inference.py +5 -3
  26. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/protein/lcp_digest.py +28 -18
  27. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/protein/protein_level_decoy.py +32 -38
  28. alphabase-1.2.4/alphabase/psm_reader/__init__.py +58 -0
  29. alphabase-1.2.4/alphabase/psm_reader/alphapept_reader.py +112 -0
  30. alphabase-1.2.4/alphabase/psm_reader/dia_psm_reader.py +191 -0
  31. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/psm_reader/maxquant_reader.py +110 -107
  32. alphabase-1.2.4/alphabase/psm_reader/msfragger_reader.py +188 -0
  33. alphabase-1.2.4/alphabase/psm_reader/pfind_reader.py +149 -0
  34. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/psm_reader/psm_reader.py +171 -207
  35. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/psm_reader/sage_reader.py +84 -71
  36. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/quantification/quant_reader/config_dict_loader.py +57 -32
  37. alphabase-1.2.4/alphabase/quantification/quant_reader/longformat_reader.py +253 -0
  38. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/quantification/quant_reader/plexdia_reformatter.py +24 -14
  39. alphabase-1.2.4/alphabase/quantification/quant_reader/quant_reader_manager.py +74 -0
  40. alphabase-1.2.4/alphabase/quantification/quant_reader/quantreader_utils.py +37 -0
  41. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/quantification/quant_reader/table_reformatter.py +82 -48
  42. alphabase-1.2.4/alphabase/quantification/quant_reader/wideformat_reader.py +29 -0
  43. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/spectral_library/base.py +263 -291
  44. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/spectral_library/decoy.py +74 -80
  45. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/spectral_library/flat.py +201 -147
  46. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/spectral_library/reader.py +130 -130
  47. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/spectral_library/translate.py +194 -170
  48. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/spectral_library/validate.py +43 -42
  49. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/utils.py +18 -15
  50. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/yaml_utils.py +3 -1
  51. {alphabase-1.2.2 → alphabase-1.2.4/alphabase.egg-info}/PKG-INFO +23 -3
  52. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase.egg-info/SOURCES.txt +1 -15
  53. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase.egg-info/requires.txt +8 -2
  54. alphabase-1.2.4/pyproject.toml +6 -0
  55. {alphabase-1.2.2 → alphabase-1.2.4}/setup.py +4 -9
  56. alphabase-1.2.2/alphabase/_modidx.py +0 -266
  57. alphabase-1.2.2/alphabase/constants/element.py +0 -1
  58. alphabase-1.2.2/alphabase/io/psm_reader/__init__.py +0 -2
  59. alphabase-1.2.2/alphabase/io/psm_reader/alphapept_reader.py +0 -1
  60. alphabase-1.2.2/alphabase/io/psm_reader/dia_psm_reader.py +0 -1
  61. alphabase-1.2.2/alphabase/io/psm_reader/dia_search_reader.py +0 -2
  62. alphabase-1.2.2/alphabase/io/psm_reader/maxquant_reader.py +0 -2
  63. alphabase-1.2.2/alphabase/io/psm_reader/msfragger_reader.py +0 -1
  64. alphabase-1.2.2/alphabase/io/psm_reader/pfind_reader.py +0 -1
  65. alphabase-1.2.2/alphabase/io/psm_reader/psm_reader.py +0 -1
  66. alphabase-1.2.2/alphabase/io/tempmmap.py +0 -154
  67. alphabase-1.2.2/alphabase/peptide/mobility.py +0 -109
  68. alphabase-1.2.2/alphabase/psm_reader/__init__.py +0 -18
  69. alphabase-1.2.2/alphabase/psm_reader/alphapept_reader.py +0 -108
  70. alphabase-1.2.2/alphabase/psm_reader/dia_psm_reader.py +0 -208
  71. alphabase-1.2.2/alphabase/psm_reader/msfragger_reader.py +0 -183
  72. alphabase-1.2.2/alphabase/psm_reader/pfind_reader.py +0 -148
  73. alphabase-1.2.2/alphabase/quantification/quant_reader/longformat_reader.py +0 -171
  74. alphabase-1.2.2/alphabase/quantification/quant_reader/quant_reader_manager.py +0 -48
  75. alphabase-1.2.2/alphabase/quantification/quant_reader/quantreader_utils.py +0 -35
  76. alphabase-1.2.2/alphabase/quantification/quant_reader/wideformat_reader.py +0 -22
  77. alphabase-1.2.2/alphabase/scoring/fdr.py +0 -161
  78. alphabase-1.2.2/alphabase/scoring/feature_extraction_base.py +0 -61
  79. alphabase-1.2.2/alphabase/scoring/ml_scoring.py +0 -374
  80. alphabase-1.2.2/alphabase/spectral_library/__init__.py +0 -0
  81. alphabase-1.2.2/alphabase/statistics/__init__.py +0 -0
  82. alphabase-1.2.2/alphabase/statistics/regression.py +0 -360
  83. {alphabase-1.2.2 → alphabase-1.2.4}/LICENSE +0 -0
  84. {alphabase-1.2.2 → alphabase-1.2.4}/LICENSE.txt +0 -0
  85. {alphabase-1.2.2 → alphabase-1.2.4}/MANIFEST.in +0 -0
  86. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/__init__.py +0 -0
  87. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/__emass_element.yaml +0 -0
  88. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/__used_mod.yaml +0 -0
  89. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/contaminants.fasta +0 -0
  90. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/modification.tsv +0 -0
  91. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/constants/const_files/nist_element.yaml +0 -0
  92. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/io/__init__.py +0 -0
  93. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/peptide/__init__.py +0 -0
  94. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase/protein/__init__.py +0 -0
  95. {alphabase-1.2.2/alphabase/scoring → alphabase-1.2.4/alphabase/spectral_library}/__init__.py +0 -0
  96. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase.egg-info/dependency_links.txt +0 -0
  97. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase.egg-info/entry_points.txt +0 -0
  98. {alphabase-1.2.2 → alphabase-1.2.4}/alphabase.egg-info/top_level.txt +0 -0
  99. {alphabase-1.2.2 → alphabase-1.2.4}/setup.cfg +0 -0
  100. {alphabase-1.2.2 → alphabase-1.2.4}/tests/test_cli.py +0 -0
  101. {alphabase-1.2.2 → alphabase-1.2.4}/tests/test_gui.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: alphabase
3
- Version: 1.2.2
3
+ Version: 1.2.4
4
4
  Summary: An infrastructure Python package of the AlphaX ecosystem
5
5
  Home-page: https://github.com/MannLabs/alphabase
6
6
  Author: Mann Labs
@@ -37,6 +37,8 @@ Requires-Dist: regex
37
37
  Requires-Dist: dask
38
38
  Requires-Dist: dask_expr
39
39
  Requires-Dist: pyahocorasick
40
+ Requires-Dist: pyteomics
41
+ Requires-Dist: lxml
40
42
  Requires-Dist: pywin32; sys_platform == "win32"
41
43
  Provides-Extra: development-stable
42
44
  Requires-Dist: jupyter; extra == "development-stable"
@@ -44,7 +46,6 @@ Requires-Dist: twine; extra == "development-stable"
44
46
  Requires-Dist: bumpversion; extra == "development-stable"
45
47
  Requires-Dist: pipdeptree; extra == "development-stable"
46
48
  Requires-Dist: ipykernel; extra == "development-stable"
47
- Requires-Dist: nbdev; extra == "development-stable"
48
49
  Requires-Dist: pyteomics; extra == "development-stable"
49
50
  Requires-Dist: scikit-learn; extra == "development-stable"
50
51
  Requires-Dist: matplotlib; extra == "development-stable"
@@ -69,13 +70,14 @@ Requires-Dist: regex; extra == "development-stable"
69
70
  Requires-Dist: pydivsufsort; extra == "development-stable"
70
71
  Requires-Dist: pyahocorasick; extra == "development-stable"
71
72
  Requires-Dist: pytest; extra == "development-stable"
73
+ Requires-Dist: pre-commit==3.7.0; extra == "development-stable"
74
+ Requires-Dist: nbmake==1.5.3; extra == "development-stable"
72
75
  Provides-Extra: development
73
76
  Requires-Dist: jupyter; extra == "development"
74
77
  Requires-Dist: twine; extra == "development"
75
78
  Requires-Dist: bumpversion; extra == "development"
76
79
  Requires-Dist: pipdeptree; extra == "development"
77
80
  Requires-Dist: ipykernel; extra == "development"
78
- Requires-Dist: nbdev; extra == "development"
79
81
  Requires-Dist: pyteomics; extra == "development"
80
82
  Requires-Dist: scikit-learn; extra == "development"
81
83
  Requires-Dist: matplotlib; extra == "development"
@@ -100,6 +102,8 @@ Requires-Dist: regex; extra == "development"
100
102
  Requires-Dist: pydivsufsort; extra == "development"
101
103
  Requires-Dist: pyahocorasick; extra == "development"
102
104
  Requires-Dist: pytest; extra == "development"
105
+ Requires-Dist: pre-commit; extra == "development"
106
+ Requires-Dist: nbmake; extra == "development"
103
107
  Provides-Extra: stable
104
108
  Requires-Dist: numba; extra == "stable"
105
109
  Requires-Dist: numpy; extra == "stable"
@@ -116,6 +120,8 @@ Requires-Dist: regex; extra == "stable"
116
120
  Requires-Dist: dask; extra == "stable"
117
121
  Requires-Dist: dask_expr; extra == "stable"
118
122
  Requires-Dist: pyahocorasick; extra == "stable"
123
+ Requires-Dist: pyteomics; extra == "stable"
124
+ Requires-Dist: lxml; extra == "stable"
119
125
 
120
126
  # AlphaBase
121
127
 
@@ -315,6 +321,20 @@ For an even more interactive participation, check out the
315
321
  [discussions](https://github.com/MannLabs/alphabase/discussions) and the
316
322
  [the Contributors License Agreement](misc/CLA.md).
317
323
 
324
+ ### Notes for developers
325
+ #### pre-commit hooks
326
+ It is highly recommended to use the provided pre-commit hooks, as the CI pipeline enforces all checks therein to
327
+ pass in order to merge a branch.
328
+
329
+ The hooks need to be installed once by
330
+ ```bash
331
+ pre-commit install
332
+ ```
333
+ You can run the checks yourself using:
334
+ ```bash
335
+ pre-commit run --all-files
336
+ ```
337
+
318
338
  ------------------------------------------------------------------------
319
339
 
320
340
  ## Changelog
@@ -196,6 +196,20 @@ For an even more interactive participation, check out the
196
196
  [discussions](https://github.com/MannLabs/alphabase/discussions) and the
197
197
  [the Contributors License Agreement](misc/CLA.md).
198
198
 
199
+ ### Notes for developers
200
+ #### pre-commit hooks
201
+ It is highly recommended to use the provided pre-commit hooks, as the CI pipeline enforces all checks therein to
202
+ pass in order to merge a branch.
203
+
204
+ The hooks need to be installed once by
205
+ ```bash
206
+ pre-commit install
207
+ ```
208
+ You can run the checks yourself using:
209
+ ```bash
210
+ pre-commit run --all-files
211
+ ```
212
+
199
213
  ------------------------------------------------------------------------
200
214
 
201
215
  ## Changelog
@@ -2,7 +2,7 @@
2
2
 
3
3
 
4
4
  __project__ = "alphabase"
5
- __version__ = "1.2.2"
5
+ __version__ = "1.2.4"
6
6
  __license__ = "Apache"
7
7
  __description__ = "An infrastructure Python package of the AlphaX ecosystem"
8
8
  __author__ = "Mann Labs"
@@ -1,2 +1,2 @@
1
1
  def run(*args, **kwargs):
2
- pass
2
+ pass
@@ -3,21 +3,16 @@ import numpy as np
3
3
 
4
4
  from alphabase.yaml_utils import load_yaml
5
5
 
6
- CONST_FILE_FOLDER = os.path.join(
7
- os.path.dirname(__file__),
8
- "const_files"
9
- )
6
+ CONST_FILE_FOLDER = os.path.join(os.path.dirname(__file__), "const_files")
10
7
 
11
- common_const_dict:dict = load_yaml(
8
+ common_const_dict: dict = load_yaml(
12
9
  os.path.join(CONST_FILE_FOLDER, "common_constants.yaml")
13
10
  )
14
11
 
15
- # Only applied in peak and fragment dataframes to save RAM.
12
+ # Only applied in peak and fragment dataframes to save RAM.
16
13
  # Using float32 still keeps 0.1 ppm precision in any value range.
17
14
  # Default float dtype is "float64" for value calculation and other senarios.
18
- PEAK_MZ_DTYPE:np.dtype = np.dtype(
19
- common_const_dict["PEAK_MZ_DTYPE"]
20
- ).type
21
- PEAK_INTENSITY_DTYPE:np.dtype = np.dtype(
15
+ PEAK_MZ_DTYPE: np.dtype = np.dtype(common_const_dict["PEAK_MZ_DTYPE"]).type
16
+ PEAK_INTENSITY_DTYPE: np.dtype = np.dtype(
22
17
  common_const_dict["PEAK_INTENSITY_DTYPE"]
23
- ).type
18
+ ).type
@@ -5,30 +5,30 @@ import typing
5
5
 
6
6
  from alphabase.yaml_utils import load_yaml
7
7
 
8
- from alphabase.constants.element import (
9
- calc_mass_from_formula,
10
- MASS_H2O, parse_formula,
11
- reset_elements
8
+ from alphabase.constants.atom import (
9
+ calc_mass_from_formula,
10
+ MASS_H2O,
11
+ parse_formula,
12
+ reset_elements,
12
13
  )
13
14
 
14
15
  from alphabase.constants._const import CONST_FILE_FOLDER
15
16
 
16
17
  # We use all 128 ASCII code to represent amino acids for flexible extensions in the future.
17
- # The amino acid masses are stored in 128-lengh array :py:data:`AA_ASCII_MASS`.
18
+ # The amino acid masses are stored in 128-lengh array :py:data:`AA_ASCII_MASS`.
18
19
  # If an ASCII code is not in `AA_Formula`, the mass will be set as a large value to disable MS search.
19
- AA_Formula:dict = load_yaml(
20
- os.path.join(CONST_FILE_FOLDER, 'amino_acid.yaml')
21
- )
20
+ AA_Formula: dict = load_yaml(os.path.join(CONST_FILE_FOLDER, "amino_acid.yaml"))
22
21
  #: AA mass array with ASCII code, mass of 'A' is AA_ASCII_MASS[ord('A')]
23
- AA_ASCII_MASS:np.ndarray = np.ones(128)*1e8
22
+ AA_ASCII_MASS: np.ndarray = np.ones(128) * 1e8
24
23
 
25
24
  #: 128-len AA dataframe
26
- AA_DF:pd.DataFrame = pd.DataFrame()
25
+ AA_DF: pd.DataFrame = pd.DataFrame()
27
26
 
28
27
  # AA formula to formula dict of dict. For example: {'K': {'C': n, 'O': m, ...}}
29
- AA_Composition:dict = {}
28
+ AA_Composition: dict = {}
29
+
30
30
 
31
- def replace_atoms(atom_replace_dict:typing.Dict):
31
+ def replace_atoms(atom_replace_dict: typing.Dict):
32
32
  for aa, formula in list(AA_Formula.items()):
33
33
  atom_comp = dict(parse_formula(formula))
34
34
  for atom_from, atom_to in atom_replace_dict.items():
@@ -37,58 +37,66 @@ def replace_atoms(atom_replace_dict:typing.Dict):
37
37
  del atom_comp[atom_from]
38
38
  AA_Formula[aa] = "".join([f"{atom}({n})" for atom, n in atom_comp.items()])
39
39
 
40
- def reset_AA_mass()->np.ndarray:
40
+
41
+ def reset_AA_mass() -> np.ndarray:
41
42
  """AA mass in np.array with shape (128,)"""
42
43
  global AA_ASCII_MASS
43
44
  for aa, chem in AA_Formula.items():
44
45
  AA_ASCII_MASS[ord(aa)] = calc_mass_from_formula(chem)
45
46
  return AA_ASCII_MASS
47
+
48
+
46
49
  reset_AA_mass()
47
50
 
51
+
48
52
  def reset_AA_df():
49
53
  global AA_DF
50
54
  AA_DF = pd.DataFrame()
51
- AA_DF['aa'] = [chr(aa) for aa in range(len(AA_ASCII_MASS))]
52
- AA_DF['formula'] = ['']*len(AA_ASCII_MASS)
55
+ AA_DF["aa"] = [chr(aa) for aa in range(len(AA_ASCII_MASS))]
56
+ AA_DF["formula"] = [""] * len(AA_ASCII_MASS)
53
57
  aa_idxes = []
54
58
  formulas = []
55
59
  for aa, formula in AA_Formula.items():
56
60
  aa_idxes.append(ord(aa))
57
61
  formulas.append(formula)
58
- AA_DF.loc[aa_idxes, 'formula'] = formulas
59
- AA_DF['mass'] = AA_ASCII_MASS
62
+ AA_DF.loc[aa_idxes, "formula"] = formulas
63
+ AA_DF["mass"] = AA_ASCII_MASS
60
64
  return AA_DF
65
+
66
+
61
67
  reset_AA_df()
62
68
 
69
+
63
70
  def reset_AA_Composition():
64
71
  global AA_Composition
65
72
  AA_Composition = {}
66
73
  for aa, formula, mass in AA_DF.values:
67
- AA_Composition[aa] = dict(
68
- parse_formula(formula)
69
- )
74
+ AA_Composition[aa] = dict(parse_formula(formula))
70
75
  return AA_Composition
76
+
77
+
71
78
  reset_AA_Composition()
72
79
 
73
- def reset_AA_atoms(atom_replace_dict:typing.Dict = {}):
80
+
81
+ def reset_AA_atoms(atom_replace_dict: typing.Dict = {}):
74
82
  reset_elements()
75
83
  replace_atoms(atom_replace_dict)
76
84
  reset_AA_mass()
77
85
  reset_AA_df()
78
86
  reset_AA_Composition()
79
87
 
80
- def update_an_AA(aa:str, formula:str):
88
+
89
+ def update_an_AA(aa: str, formula: str):
81
90
  aa_idx = ord(aa)
82
- AA_DF.loc[aa_idx,'formula'] = formula
91
+ AA_DF.loc[aa_idx, "formula"] = formula
83
92
  AA_ASCII_MASS[aa_idx] = calc_mass_from_formula(formula)
84
- AA_DF.loc[aa_idx,'mass'] = AA_ASCII_MASS[aa_idx]
93
+ AA_DF.loc[aa_idx, "mass"] = AA_ASCII_MASS[aa_idx]
85
94
  AA_Formula[aa] = formula
86
95
  AA_Composition[aa] = dict(parse_formula(formula))
87
96
 
88
- def calc_AA_masses(
89
- sequence: str
90
- )->np.ndarray:
91
- '''
97
+
98
+ def calc_AA_masses(sequence: str) -> np.ndarray:
99
+ """
92
100
  Parameters
93
101
  ----------
94
102
  sequence : str
@@ -98,13 +106,12 @@ def calc_AA_masses(
98
106
  -------
99
107
  np.ndarray
100
108
  Masses of each amino acid.
101
- '''
102
- return AA_ASCII_MASS[np.array(sequence,'c').view(np.int8)]
109
+ """
110
+ return AA_ASCII_MASS[np.array(sequence, "c").view(np.int8)]
103
111
 
104
- def calc_AA_masses_for_same_len_seqs(
105
- sequence_array: np.ndarray
106
- )->np.ndarray:
107
- '''
112
+
113
+ def calc_AA_masses_for_same_len_seqs(sequence_array: np.ndarray) -> np.ndarray:
114
+ """
108
115
  Calculate AA masses for the array of same-len AA sequences.
109
116
 
110
117
  Parameters
@@ -121,17 +128,16 @@ def calc_AA_masses_for_same_len_seqs(
121
128
  -------
122
129
  ValueError
123
130
  If sequences are not with the same length.
124
- '''
131
+ """
125
132
  return AA_ASCII_MASS[
126
- # we use np.int32 here because unicode str
133
+ # we use np.int32 here because unicode str
127
134
  # uses 4 bytes for a char.
128
- np.array(sequence_array).view(np.int32)
135
+ np.array(sequence_array).view(np.int32)
129
136
  ].reshape(len(sequence_array), -1)
130
137
 
131
- def calc_sequence_masses_for_same_len_seqs(
132
- sequence_array: np.ndarray
133
- )->np.ndarray:
134
- '''
138
+
139
+ def calc_sequence_masses_for_same_len_seqs(sequence_array: np.ndarray) -> np.ndarray:
140
+ """
135
141
  Calculate sequence masses for the array of same-len AA sequences.
136
142
 
137
143
  Parameters
@@ -143,34 +149,29 @@ def calc_sequence_masses_for_same_len_seqs(
143
149
  -------
144
150
  np.ndarray
145
151
  1-D (array_size, sequence_len) array of masses.
146
-
152
+
147
153
  Raises
148
154
  -------
149
155
  ValueError
150
156
  If sequences are not with the same length.
151
- '''
152
- return np.sum(
153
- calc_AA_masses_for_same_len_seqs(sequence_array),
154
- axis=1
155
- )+MASS_H2O
157
+ """
158
+ return np.sum(calc_AA_masses_for_same_len_seqs(sequence_array), axis=1) + MASS_H2O
156
159
 
157
160
 
158
- def calc_AA_masses_for_var_len_seqs(
159
- sequence_array: np.ndarray
160
- )->np.ndarray:
161
- '''
161
+ def calc_AA_masses_for_var_len_seqs(sequence_array: np.ndarray) -> np.ndarray:
162
+ """
162
163
  We recommend to use `calc_AA_masses_for_same_len_seqs` as it is much faster.
163
164
 
164
165
  Parameters
165
166
  ----------
166
167
  sequence_array : np.ndarray
167
168
  Sequences with variable lengths.
168
-
169
+
169
170
  Returns
170
171
  -------
171
172
  np.ndarray
172
173
  1D array of masses, zero values are padded to fill the max length.
173
- '''
174
- return AA_ASCII_MASS[
175
- np.array(sequence_array).view(np.int32)
176
- ].reshape(len(sequence_array), -1)
174
+ """
175
+ return AA_ASCII_MASS[np.array(sequence_array).view(np.int32)].reshape(
176
+ len(sequence_array), -1
177
+ )
@@ -5,26 +5,22 @@ import typing
5
5
 
6
6
  from alphabase.yaml_utils import load_yaml
7
7
 
8
- from alphabase.constants._const import (
9
- CONST_FILE_FOLDER,
10
- common_const_dict
11
- )
8
+ from alphabase.constants._const import CONST_FILE_FOLDER, common_const_dict
12
9
 
13
- MASS_PROTON:float = common_const_dict['MASS_PROTON']
14
- MASS_ISOTOPE:float = common_const_dict['MASS_ISOTOPE']
10
+ MASS_PROTON: float = common_const_dict["MASS_PROTON"]
11
+ MASS_ISOTOPE: float = common_const_dict["MASS_ISOTOPE"]
15
12
 
16
- MAX_ISOTOPE_LEN:int = common_const_dict['MAX_ISOTOPE_LEN']
17
- EMPTY_DIST:np.ndarray = np.zeros(MAX_ISOTOPE_LEN)
13
+ MAX_ISOTOPE_LEN: int = common_const_dict["MAX_ISOTOPE_LEN"]
14
+ EMPTY_DIST: np.ndarray = np.zeros(MAX_ISOTOPE_LEN)
18
15
  EMPTY_DIST[0] = 1
19
16
 
17
+
20
18
  @numba.njit
21
- def truncate_isotope(
22
- isotopes: np.ndarray, mono_idx: int
23
- )->tuple:
24
- '''
25
- For a given isotope distribution (intensity patterns),
26
- this function truncates the distribution by top
27
- `MAX_ISOTOPE_LEN` neighbors those contain the monoisotopic
19
+ def truncate_isotope(isotopes: np.ndarray, mono_idx: int) -> tuple:
20
+ """
21
+ For a given isotope distribution (intensity patterns),
22
+ this function truncates the distribution by top
23
+ `MAX_ISOTOPE_LEN` neighbors those contain the monoisotopic
28
24
  peak pointed by `mono_idx`.
29
25
 
30
26
  Parameters
@@ -36,34 +32,39 @@ def truncate_isotope(
36
32
  mono_idx : int
37
33
 
38
34
  Monoisotopic peak position (index) in the isotope patterns
39
-
35
+
40
36
  Returns
41
37
  -------
42
38
  int
43
-
39
+
44
40
  the new position of `mono_idx`
45
41
 
46
42
  int
47
-
43
+
48
44
  the start position of the truncated isotopes
49
45
 
50
46
  int
51
-
47
+
52
48
  the end position of the truncated isotopes
53
- '''
49
+ """
54
50
  trunc_start = mono_idx - 1
55
51
  trunc_end = mono_idx + 1
56
- while trunc_start >= 0 and trunc_end < len(isotopes) and (trunc_end-trunc_start-1)<MAX_ISOTOPE_LEN:
52
+ while (
53
+ trunc_start >= 0
54
+ and trunc_end < len(isotopes)
55
+ and (trunc_end - trunc_start - 1) < MAX_ISOTOPE_LEN
56
+ ):
57
57
  if isotopes[trunc_end] >= isotopes[trunc_start]:
58
58
  trunc_end += 1
59
59
  else:
60
60
  trunc_start -= 1
61
- if trunc_end-trunc_start-1 < MAX_ISOTOPE_LEN:
61
+ if trunc_end - trunc_start - 1 < MAX_ISOTOPE_LEN:
62
62
  if trunc_start == -1:
63
63
  trunc_end = MAX_ISOTOPE_LEN
64
64
  elif trunc_end == len(isotopes):
65
- trunc_start = len(isotopes)-MAX_ISOTOPE_LEN-1
66
- return mono_idx-trunc_start-1, trunc_start+1, trunc_end
65
+ trunc_start = len(isotopes) - MAX_ISOTOPE_LEN - 1
66
+ return mono_idx - trunc_start - 1, trunc_start + 1, trunc_end
67
+
67
68
 
68
69
  #: chemical element information in dict defined by `nist_element.yaml`
69
70
  CHEM_INFO_DICT = {}
@@ -72,25 +73,24 @@ CHEM_INFO_DICT = {}
72
73
  CHEM_MONO_MASS = {}
73
74
 
74
75
  #: {element: np.ndarray of abundance distribution}
75
- CHEM_ISOTOPE_DIST:numba.typed.Dict = numba.typed.Dict.empty(
76
- key_type=numba.types.unicode_type,
77
- value_type=numba.types.float64[:]
76
+ CHEM_ISOTOPE_DIST: numba.typed.Dict = numba.typed.Dict.empty(
77
+ key_type=numba.types.unicode_type, value_type=numba.types.float64[:]
78
78
  )
79
79
 
80
80
  #: {element: int (mono position)}
81
- CHEM_MONO_IDX:numba.typed.Dict = numba.typed.Dict.empty(
82
- key_type=numba.types.unicode_type,
83
- value_type=numba.types.int64
81
+ CHEM_MONO_IDX: numba.typed.Dict = numba.typed.Dict.empty(
82
+ key_type=numba.types.unicode_type, value_type=numba.types.int64
84
83
  )
85
84
 
86
- MASS_H:int = None
87
- MASS_C:int = None
88
- MASS_O:int = None
89
- MASS_N:int = None
90
- MASS_H2O:int = None #raise errors if the value is not reset
91
- MASS_NH3:int = None
85
+ MASS_H: int = None
86
+ MASS_C: int = None
87
+ MASS_O: int = None
88
+ MASS_N: int = None
89
+ MASS_H2O: int = None # raise errors if the value is not reset
90
+ MASS_NH3: int = None
91
+
92
92
 
93
- def update_atom_infos(new_atom_info:typing.Dict):
93
+ def update_atom_infos(new_atom_info: typing.Dict):
94
94
  """
95
95
  Args:
96
96
  atom_dict (Dict): Example, replacing N with 15N
@@ -104,14 +104,14 @@ def update_atom_infos(new_atom_info:typing.Dict):
104
104
 
105
105
  reset_elements()
106
106
 
107
- def reset_elements():
108
107
 
108
+ def reset_elements():
109
109
  global MASS_C, MASS_H, MASS_O, MASS_N
110
110
  global MASS_H2O, MASS_NH3
111
111
 
112
112
  for elem, items in CHEM_INFO_DICT.items():
113
- isotopes = np.array(items['abundance'])
114
- masses = np.array(items['mass'])
113
+ isotopes = np.array(items["abundance"])
114
+ masses = np.array(items["mass"])
115
115
  _sort_idx = np.argsort(masses)
116
116
  masses = masses[_sort_idx]
117
117
  isotopes = isotopes[_sort_idx]
@@ -139,18 +139,19 @@ def reset_elements():
139
139
 
140
140
  CHEM_ISOTOPE_DIST[elem] = _isos[start:end]
141
141
  CHEM_MONO_IDX[elem] = _mono_idx
142
-
143
- MASS_C = CHEM_MONO_MASS['C']
144
- MASS_H = CHEM_MONO_MASS['H']
145
- MASS_N = CHEM_MONO_MASS['N']
146
- MASS_O = CHEM_MONO_MASS['O']
147
- MASS_H2O = CHEM_MONO_MASS['H']*2 + CHEM_MONO_MASS['O']
148
- MASS_NH3 = CHEM_MONO_MASS['H']*3 + CHEM_MONO_MASS['N']
149
-
150
- def load_elem_yaml(yaml_file:str):
151
- '''Load built-in or user-defined element yaml file. Default yaml is:
152
- os.path.join(_base_dir, 'nist_element.yaml')
153
- '''
142
+
143
+ MASS_C = CHEM_MONO_MASS["C"]
144
+ MASS_H = CHEM_MONO_MASS["H"]
145
+ MASS_N = CHEM_MONO_MASS["N"]
146
+ MASS_O = CHEM_MONO_MASS["O"]
147
+ MASS_H2O = CHEM_MONO_MASS["H"] * 2 + CHEM_MONO_MASS["O"]
148
+ MASS_NH3 = CHEM_MONO_MASS["H"] * 3 + CHEM_MONO_MASS["N"]
149
+
150
+
151
+ def load_elem_yaml(yaml_file: str):
152
+ """Load built-in or user-defined element yaml file. Default yaml is:
153
+ os.path.join(_base_dir, 'nist_element.yaml')
154
+ """
154
155
  global CHEM_INFO_DICT
155
156
  global CHEM_MONO_MASS
156
157
  global CHEM_ISOTOPE_DIST
@@ -160,52 +161,42 @@ def load_elem_yaml(yaml_file:str):
160
161
 
161
162
  CHEM_MONO_MASS = {}
162
163
  CHEM_ISOTOPE_DIST = numba.typed.Dict.empty(
163
- key_type=numba.types.unicode_type,
164
- value_type=numba.types.float64[:]
164
+ key_type=numba.types.unicode_type, value_type=numba.types.float64[:]
165
165
  )
166
-
166
+
167
167
  CHEM_MONO_IDX = numba.typed.Dict.empty(
168
- key_type=numba.types.unicode_type,
169
- value_type=numba.types.int64
168
+ key_type=numba.types.unicode_type, value_type=numba.types.int64
170
169
  )
171
170
 
172
171
  reset_elements()
173
172
 
174
- load_elem_yaml(
175
- os.path.join(CONST_FILE_FOLDER,
176
- 'nist_element.yaml'
177
- )
178
- )
179
173
 
180
- def parse_formula(
181
- formula:str
182
- )->list:
183
- '''
184
- Given a formula (str, e.g. `H(1)C(2)O(3)`),
174
+ load_elem_yaml(os.path.join(CONST_FILE_FOLDER, "nist_element.yaml"))
175
+
176
+
177
+ def parse_formula(formula: str) -> list:
178
+ """
179
+ Given a formula (str, e.g. `H(1)C(2)O(3)`),
185
180
  it generates `[('H', 2), ('C', 2), ('O', 1)]`
186
- '''
187
- if not formula: return []
188
- items = [item.split('(') for item in
189
- formula.strip(')').split(')')
190
- ]
181
+ """
182
+ if not formula:
183
+ return []
184
+ items = [item.split("(") for item in formula.strip(")").split(")")]
191
185
  return [(elem, int(n)) for elem, n in items]
192
186
 
193
187
 
194
- def calc_mass_from_formula(formula:str):
195
- '''
188
+ def calc_mass_from_formula(formula: str):
189
+ """
196
190
  Calculates the mass of the formula`
197
191
 
198
192
  Parameters
199
193
  ----------
200
194
  formula : str
201
195
  e.g. `H(1)C(2)O(3)`
202
-
196
+
203
197
  Returns
204
198
  -------
205
199
  float
206
200
  mass of the formula
207
- '''
208
- return np.sum([
209
- CHEM_MONO_MASS[elem]*n
210
- for elem, n in parse_formula(formula)
211
- ])
201
+ """
202
+ return np.sum([CHEM_MONO_MASS[elem] * n for elem, n in parse_formula(formula)])
@@ -30,11 +30,11 @@ X: 'C(1000000)'
30
30
  Y: 'C(9)H(9)N(1)O(2)S(0)'
31
31
  Z: 'C(1000000)'
32
32
  # Any other ASCII chars could be the placeholders for future usage.
33
- # For example:
33
+ # For example:
34
34
  # phospho site-specific search (only lower case 'sty' can be modified)
35
35
  # s is S
36
36
  s: 'C(3)H(5)N(1)O(2)S(0)'
37
37
  # t is T
38
38
  t: 'C(4)H(8)N(1)O(5)P(1)'
39
39
  # y is Y
40
- y: 'C(9)H(9)N(1)O(2)S(0)'
40
+ y: 'C(9)H(9)N(1)O(2)S(0)'
@@ -2,7 +2,7 @@ MASS_PROTON: 1.007276467 #https://physics.nist.gov/cgi-bin/cuu/Value?arp|search_
2
2
  MASS_ISOTOPE: 1.0033
3
3
  MAX_ISOTOPE_LEN: 10
4
4
  MOBILITY:
5
- # 1059.62245 is the estimated constant coef in
5
+ # 1059.62245 is the estimated constant coef in
6
6
  # Mason Schamp equation of Burker.
7
7
  CCS_IM_COEF: 1059.62245
8
8
  # 28 is the mass of N(2), the default gas in IM bruker
@@ -10,4 +10,4 @@ MOBILITY:
10
10
 
11
11
  # Only applied in peak/fragment dataframes to save RAM
12
12
  PEAK_MZ_DTYPE: float32
13
- PEAK_INTENSITY_DTYPE: float32
13
+ PEAK_INTENSITY_DTYPE: float32
@@ -40,4 +40,4 @@ trypsin_not_p: '([KR](?=[^P]))'
40
40
  trypsin: '([KR])'
41
41
  trypsin/p: '([KR])'
42
42
  non-specific: '()'
43
- no-cleave: '_'
43
+ no-cleave: '_'