isotope-pattern-lib 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,33 @@
1
+ from importlib import resources
2
+ from typing import List
3
+
4
+ from isotope_pattern_lib.core import isotope_pattern
5
+ from isotope_pattern_lib.core.formula_parser import MolecularFormulaParser
6
+ from isotope_pattern_lib.types.settings import Settings
7
+ from isotope_pattern_lib.types.types import IsotopeFormula
8
+
9
+
10
+ def _default_settings() -> Settings:
11
+
12
+ config = resources.files('isotope_pattern_lib').joinpath('resources', 'config.yaml')
13
+ with resources.as_file(config) as config_path:
14
+ return Settings.parse_from_file(path=str(config_path))
15
+
16
+
17
+ parser = MolecularFormulaParser(settings=_default_settings())
18
+
19
+
20
+ def set_parser(config_path: str):
21
+
22
+ global parser
23
+ settings = Settings.parse_from_file(path=config_path)
24
+ parser = MolecularFormulaParser(settings=settings)
25
+
26
+
27
+ def compute_isotope_pattern(formula_string: str) -> List[IsotopeFormula]:
28
+
29
+ global parser
30
+ formula = parser.parse(raw_string=formula_string)
31
+
32
+ patterns = isotope_pattern.compute_isotope_pattern(formula=formula)
33
+ return sorted(patterns, key=lambda x: x.mass)
File without changes
@@ -0,0 +1,30 @@
1
+ import re
2
+
3
+ from isotope_pattern_lib.types.settings import Settings
4
+ from isotope_pattern_lib.types.types import MolecularFormula
5
+
6
+
7
+ class MolecularFormulaParser:
8
+
9
+ pattern = re.compile(r'([A-Z][a-z]?)(\d*)')
10
+
11
+ def __init__(self, settings: Settings):
12
+ self.valid_elements = {element.name: element for element in settings.elements}
13
+
14
+ def parse(self, raw_string: str) -> MolecularFormula:
15
+
16
+ matches = MolecularFormulaParser.pattern.findall(raw_string)
17
+
18
+ element_counts = dict()
19
+ for match in matches:
20
+ if match[0] in self.valid_elements:
21
+ element = self.valid_elements[match[0]]
22
+ count = 1 if match[1] == '' else int(match[1])
23
+ if element in element_counts:
24
+ element_counts[element] += count
25
+ else:
26
+ element_counts[element] = count
27
+ else:
28
+ raise ValueError(f"'{match[0]}' was not found among provided element list")
29
+
30
+ return MolecularFormula(name=raw_string, elements=element_counts)
@@ -0,0 +1,51 @@
1
+ import itertools
2
+ from typing import List, Any, Generator
3
+
4
+ from scipy.stats import multinomial
5
+
6
+ from isotope_pattern_lib.types.types import (
7
+ Isotope,
8
+ IsotopeFormula,
9
+ MolecularFormula
10
+ )
11
+ from isotope_pattern_lib.utils import utils
12
+
13
+
14
+ def compute_isotope_pattern(formula: MolecularFormula) -> Generator[IsotopeFormula, Any, None]:
15
+
16
+ element_isotope_formulas = {}
17
+ for element, count in formula.elements.items():
18
+ element_isotope_formulas[element] = compute_isotope_distributions(element.isotopes, count)
19
+
20
+ for combination in itertools.product(*element_isotope_formulas.values()):
21
+
22
+ name = ""
23
+ counts = {}
24
+ probability = 1.0
25
+ for isotope_formula in combination:
26
+ name += isotope_formula.name
27
+ counts.update(isotope_formula.isotopes)
28
+ probability *= isotope_formula.probability
29
+
30
+ yield IsotopeFormula(
31
+ name=name,
32
+ isotopes=counts,
33
+ probability=probability,
34
+ )
35
+
36
+
37
+ def compute_isotope_distributions(isotopes: List[Isotope], element_count: int) -> List[IsotopeFormula]:
38
+
39
+ distribution = multinomial(n=element_count, p=[isotope.abundance for isotope in isotopes])
40
+
41
+ isotope_formulas = []
42
+ for array in utils.generate_arrays_with_preserved_sum(total_sum=element_count, size=len(isotopes)):
43
+ probability = distribution.pmf(array)
44
+ isotope_counts = dict(zip(isotopes, array))
45
+ isotope_formulas.append(IsotopeFormula(
46
+ name="".join([f"{isotope.name}[{count}]" for isotope, count in isotope_counts.items()]),
47
+ isotopes=isotope_counts,
48
+ probability=probability
49
+ ))
50
+
51
+ return isotope_formulas
@@ -0,0 +1,33 @@
1
+ - name: H
2
+ description: hydrogen
3
+ isotopes:
4
+ - name: H1
5
+ mass: 1.0078250
6
+ abundance: 1.000
7
+ - name: C
8
+ description: carbon
9
+ isotopes:
10
+ - name: C12
11
+ mass: 12.0000000
12
+ abundance: 0.989
13
+ - name: C13
14
+ mass: 13.0033548
15
+ abundance: 0.011
16
+ - name: O
17
+ description: oxygen
18
+ isotopes:
19
+ - name: O16
20
+ mass: 15.9949146
21
+ abundance: 0.998
22
+ - name: O18
23
+ mass: 17.9991596
24
+ abundance: 0.002
25
+ - name: N
26
+ description: nytrogen
27
+ isotopes:
28
+ - name: N14
29
+ mass: 14.0030740
30
+ abundance: 0.996
31
+ - name: N15
32
+ mass: 15.0001089
33
+ abundance: 0.004
File without changes
@@ -0,0 +1,52 @@
1
+ from typing import (
2
+ FrozenSet,
3
+ List
4
+ )
5
+
6
+ import yaml
7
+
8
+ from isotope_pattern_lib.types.types import (
9
+ Element,
10
+ Isotope
11
+ )
12
+
13
+
14
+ class Settings:
15
+
16
+ def __init__(self, elements: FrozenSet[Element]):
17
+ self.elements = elements
18
+
19
+ @classmethod
20
+ def parse_from_file(cls, path: str):
21
+
22
+ with open(path, 'r') as file:
23
+ raw_elements = yaml.safe_load(file)
24
+
25
+ elements = set()
26
+ for raw_element in raw_elements:
27
+
28
+ isotopes = Settings.retrieve_isotopes(raw_isotopes=raw_element['isotopes'])
29
+
30
+ elements.add(
31
+ Element(
32
+ name=raw_element['name'],
33
+ isotopes=isotopes
34
+ )
35
+ )
36
+
37
+ return cls(elements=frozenset(elements))
38
+
39
+ @staticmethod
40
+ def retrieve_isotopes(raw_isotopes: List[dict]) -> List[Isotope]:
41
+
42
+ isotopes = list()
43
+ for raw_isotope in raw_isotopes:
44
+ isotopes.append(
45
+ Isotope(
46
+ name=raw_isotope['name'],
47
+ mass=raw_isotope['mass'],
48
+ abundance=raw_isotope['abundance']
49
+ )
50
+ )
51
+
52
+ return isotopes
@@ -0,0 +1,109 @@
1
+ from typing import (
2
+ Dict,
3
+ List
4
+ )
5
+
6
+
7
+ class Isotope:
8
+
9
+ def __init__(self, name: str, mass: float, abundance: float):
10
+ self.name = name
11
+ self.mass = mass
12
+ self.abundance = abundance
13
+
14
+ def __repr__(self):
15
+ return (
16
+ f'{self.__class__.__name__}'
17
+ f'(name={self.name}, mass={round(self.mass, 3)}, abundance={round(self.abundance, 3)})'
18
+ )
19
+
20
+ def __eq__(self, other):
21
+ if isinstance(other, Isotope):
22
+ name_equality = (self.name == other.name)
23
+ mass_equality = (self.mass == other.mass)
24
+ abundance_equality = (self.abundance == other.abundance)
25
+ return name_equality and mass_equality and abundance_equality
26
+ return False
27
+
28
+ def __ne__(self, other):
29
+ return not self.__eq__(other)
30
+
31
+ def __hash__(self):
32
+ return hash((self.name, self.mass, self.abundance))
33
+
34
+
35
+ class Element:
36
+
37
+ def __init__(self, name: str, isotopes: List[Isotope]):
38
+ self.name = name
39
+ self.isotopes = isotopes
40
+ self._isotopes = frozenset(isotopes)
41
+
42
+ def __repr__(self):
43
+ isotope_names = [isotope.name for isotope in self.isotopes]
44
+ return f'{self.__class__.__name__}(name={self.name}, isotopes={",".join(isotope_names)})'
45
+
46
+ def __eq__(self, other):
47
+ if isinstance(other, Element):
48
+ name_equality = (self.name == other.name)
49
+ isotopes_equality = (self._isotopes == other._isotopes)
50
+ return name_equality and isotopes_equality
51
+ return False
52
+
53
+ def __ne__(self, other):
54
+ return not self.__eq__(other)
55
+
56
+ def __hash__(self):
57
+ return hash((self.name, self._isotopes))
58
+
59
+
60
+ class MolecularFormula:
61
+
62
+ def __init__(self, name: str, elements: Dict[Element, int]):
63
+ self.name = name
64
+ self.elements = elements
65
+
66
+ def __repr__(self):
67
+ return f'{self.__class__.__name__}(name={self.name})'
68
+
69
+ def __eq__(self, other):
70
+ if isinstance(other, MolecularFormula):
71
+ name_equality = (self.name == other.name)
72
+ elements_equality = (self.elements == other.elements)
73
+ return name_equality and elements_equality
74
+ return False
75
+
76
+ def __ne__(self, other):
77
+ return not self.__eq__(other)
78
+
79
+
80
+ class IsotopeFormula:
81
+
82
+ def __init__(self, name: str, isotopes: Dict[Isotope, int], probability: float):
83
+ self.name = name
84
+ self.isotopes = isotopes
85
+ self.probability = probability
86
+ self.mass = self.compute_mass()
87
+
88
+ def __repr__(self):
89
+ return (
90
+ f'{self.__class__.__name__}'
91
+ f'(name={self.name}, mass={round(self.mass, 3)}, probability={round(self.probability, 3)})'
92
+ )
93
+
94
+ def compute_mass(self) -> float:
95
+ mass = 0.0
96
+ for isotope, count in self.isotopes.items():
97
+ mass += isotope.mass * count
98
+ return mass
99
+
100
+ def __eq__(self, other):
101
+ if isinstance(other, IsotopeFormula):
102
+ name_equality = (self.name == other.name)
103
+ isotopes_equality = (self.isotopes == other.isotopes)
104
+ probability_equality = (self.probability == other.probability)
105
+ return name_equality and isotopes_equality and probability_equality
106
+ return False
107
+
108
+ def __ne__(self, other):
109
+ return not self.__eq__(other)
File without changes
@@ -0,0 +1,10 @@
1
+ from typing import List
2
+
3
+
4
+ def generate_arrays_with_preserved_sum(total_sum: int, size: int) -> List[List[int]]:
5
+ if size == 1:
6
+ yield [total_sum]
7
+ else:
8
+ for i in range(total_sum + 1):
9
+ for j in generate_arrays_with_preserved_sum(total_sum=total_sum - i, size=size - 1):
10
+ yield [i] + j
@@ -0,0 +1,161 @@
1
+ Metadata-Version: 2.4
2
+ Name: isotope-pattern-lib
3
+ Version: 1.0.0
4
+ Summary: A library to compute the isotope pattern of a given molecular formula
5
+ Project-URL: Homepage, https://github.com/ksmirn0v/isotope-pattern-lib
6
+ Author-email: Kirill S Smirnov <kirill.smirnov.mail@gmail.com>
7
+ Keywords: isotope pattern,mass spectrometry,molecular formula
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Python: >=3.9
15
+ Requires-Dist: pyyaml>=6.0
16
+ Requires-Dist: scipy>=1.11
17
+ Description-Content-Type: text/markdown
18
+
19
+ # isotope-pattern-lib #
20
+
21
+ The library can be used to compute the isotope pattern
22
+ of a given molecular formula.
23
+
24
+ ## Requirements ##
25
+
26
+ `python >= 3.9`
27
+
28
+ ## Development ##
29
+
30
+ The project uses [`uv`](https://docs.astral.sh/uv/) as its package manager.
31
+
32
+ ```
33
+ uv sync --group test --group dev
34
+ ```
35
+
36
+ Run the test suite (unit and integration tests are run as separate `pytest`
37
+ invocations, so that mutated global state in one suite does not leak into the
38
+ other):
39
+
40
+ ```
41
+ uv run pytest tests/unit
42
+ uv run pytest tests/integration
43
+ ```
44
+
45
+ Build a distributable wheel/sdist:
46
+
47
+ ```
48
+ uv build
49
+ ```
50
+
51
+ ## Releasing ##
52
+
53
+ Every pull request title into `master` must contain exactly one of
54
+ `[MAJOR]`, `[MINOR]`, `[PATCH]`. This determines the version bump that will
55
+ be applied on merge, relative to the latest `X.Y.Z` git tag:
56
+
57
+ - `[MAJOR]` bumps the first number, resetting the rest to zero. If no tag
58
+ exists yet, this creates the first release, `1.0.0`.
59
+ - `[MINOR]` bumps the second number, resetting the patch number to zero.
60
+ - `[PATCH]` bumps the third number.
61
+
62
+ `[MINOR]`/`[PATCH]` require an existing tag to bump from — a PR using either
63
+ of them is invalid until a `[MAJOR]` release has been made at least once.
64
+
65
+ On every PR, CI computes this future version and publishes a `.devN` build
66
+ of it to [test.pypi.org](https://test.pypi.org) for end-to-end verification.
67
+ On merge to `master`, CI creates the git tag and publishes that exact
68
+ version to [pypi.org](https://pypi.org).
69
+
70
+ ## Usage ##
71
+
72
+ Although any of the classes/functions can be used independently,
73
+ the library has the main endpoints, located in
74
+ `isotope_pattern_lib/api.py`:
75
+ ```
76
+ from isotope_pattern_lib import api
77
+ ```
78
+ ---
79
+ ```
80
+ api.set_parser(config_path: str)
81
+ ```
82
+ The call sets the parameters for parsing molecular formulas.
83
+ These parameters should be listed as elements with their corresponding isotopes
84
+ in a `yaml` file that is located in `config_path`.
85
+ The structure of the file will be described later.
86
+
87
+ The call is optional. If no call is conducted, a default parser is used.
88
+
89
+ ---
90
+ ```
91
+ api.compute_isotope_pattern(formula_string: str)
92
+ ```
93
+ The call computes isotope pattern of a given molecular formula, represented
94
+ as a string `formula_string` (_e.g._ `C2H5OH`).
95
+ The output represents a list of `IsotopeFormula` instances, each containing
96
+ the information on the constituent isotopes and the probability of the
97
+ associated compositions.
98
+
99
+ ## YAML file structure ##
100
+
101
+ The YAML file is used to define the way how molecular formulas will be parsed.
102
+ It consists of a list of elements with the definition of the corresponding isotopes.
103
+ Each entry has the following form:
104
+ ```
105
+ - name: string
106
+ description: string
107
+ isotopes:
108
+ - name: string
109
+ mass: float
110
+ abundance: float
111
+ - ...
112
+ - ...
113
+ ```
114
+ **Comments**:
115
+
116
+ - The field `name` is arbitrary, but the raw strings that are used in the
117
+ `compute_isotope_pattern(formula_string: str)` call should not contain
118
+ any element names that are not part of the `name` fields, specified in the
119
+ `yaml` file.
120
+ - Each element can contain as many isotopes as it is wished.
121
+ - The field `abundance` should define a number less or equal to `1.0`.
122
+ However, these numbers, corresponding to different isotopes of an element,
123
+ should not exceed in sum `1.0`.
124
+
125
+ ## Default YAML file structure ##
126
+
127
+ ```
128
+ - name: H
129
+ description: hydrogen
130
+ isotopes:
131
+ - name: H1
132
+ mass: 1.0078250
133
+ abundance: 1.000
134
+ - name: C
135
+ description: carbon
136
+ isotopes:
137
+ - name: C12
138
+ mass: 12.0000000
139
+ abundance: 0.989
140
+ - name: C13
141
+ mass: 13.0033548
142
+ abundance: 0.011
143
+ - name: O
144
+ description: oxygen
145
+ isotopes:
146
+ - name: O16
147
+ mass: 15.9949146
148
+ abundance: 0.998
149
+ - name: O18
150
+ mass: 17.9991596
151
+ abundance: 0.002
152
+ - name: N
153
+ description: nytrogen
154
+ isotopes:
155
+ - name: N14
156
+ mass: 14.0030740
157
+ abundance: 0.996
158
+ - name: N15
159
+ mass: 15.0001089
160
+ abundance: 0.004
161
+ ```
@@ -0,0 +1,14 @@
1
+ isotope_pattern_lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ isotope_pattern_lib/api.py,sha256=SNdkspDwZK3P_ZX40OSBRi-TJg4qspAsZBEXf5ckEMo,1047
3
+ isotope_pattern_lib/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ isotope_pattern_lib/core/formula_parser.py,sha256=kG56O6ZzrR-20KfMHo2u7lkXzDk5eZQ5NpIapvPJBfI,1058
5
+ isotope_pattern_lib/core/isotope_pattern.py,sha256=-R2PS4t7blvu58ESsQZ1VcvQa96YTEayPWoSafgUNgg,1684
6
+ isotope_pattern_lib/resources/config.yaml,sha256=2ZUVMEvMnU709F0bpVLRZvxPknu8e3LkWOSaT9vvYCc,612
7
+ isotope_pattern_lib/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ isotope_pattern_lib/types/settings.py,sha256=gzMeds5pnBotEoWDLW-WGDpxDTV1yuzniguOmfzD5DE,1175
9
+ isotope_pattern_lib/types/types.py,sha256=9W3k6pb91Eto8UqCx8cgYyTP4Mo03Qhvdfi1UgnIbwQ,3313
10
+ isotope_pattern_lib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ isotope_pattern_lib/utils/utils.py,sha256=1YeChOgDV8LFDQYLFGHP88t90KSVc5cjFxpen7OyOc0,332
12
+ isotope_pattern_lib-1.0.0.dist-info/METADATA,sha256=f-HkzTtN2GOialXNihqWBju7CkCvM9nLeNJVxWovE3M,4624
13
+ isotope_pattern_lib-1.0.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
14
+ isotope_pattern_lib-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any