PyPI - isotope-pattern-lib - Versions diffs - 1.0.0__py3-none-any.whl - Mend

isotope-pattern-lib 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

isotope_pattern_lib/__init__.py +0 -0
isotope_pattern_lib/api.py +33 -0
isotope_pattern_lib/core/__init__.py +0 -0
isotope_pattern_lib/core/formula_parser.py +30 -0
isotope_pattern_lib/core/isotope_pattern.py +51 -0
isotope_pattern_lib/resources/config.yaml +33 -0
isotope_pattern_lib/types/__init__.py +0 -0
isotope_pattern_lib/types/settings.py +52 -0
isotope_pattern_lib/types/types.py +109 -0
isotope_pattern_lib/utils/__init__.py +0 -0
isotope_pattern_lib/utils/utils.py +10 -0
isotope_pattern_lib-1.0.0.dist-info/METADATA +161 -0
isotope_pattern_lib-1.0.0.dist-info/RECORD +14 -0
isotope_pattern_lib-1.0.0.dist-info/WHEEL +4 -0

isotope_pattern_lib/__init__.py ADDED Viewed

File without changes

isotope_pattern_lib/api.py ADDED Viewed

@@ -0,0 +1,33 @@
+from importlib import resources
+from typing import List
+from isotope_pattern_lib.core import isotope_pattern
+from isotope_pattern_lib.core.formula_parser import MolecularFormulaParser
+from isotope_pattern_lib.types.settings import Settings
+from isotope_pattern_lib.types.types import IsotopeFormula
+def _default_settings() -> Settings:
+    config = resources.files('isotope_pattern_lib').joinpath('resources', 'config.yaml')
+    with resources.as_file(config) as config_path:
+        return Settings.parse_from_file(path=str(config_path))
+parser = MolecularFormulaParser(settings=_default_settings())
+def set_parser(config_path: str):
+    global parser
+    settings = Settings.parse_from_file(path=config_path)
+    parser = MolecularFormulaParser(settings=settings)
+def compute_isotope_pattern(formula_string: str) -> List[IsotopeFormula]:
+    global parser
+    formula = parser.parse(raw_string=formula_string)
+    patterns = isotope_pattern.compute_isotope_pattern(formula=formula)
+    return sorted(patterns, key=lambda x: x.mass)

isotope_pattern_lib/core/__init__.py ADDED Viewed

File without changes

isotope_pattern_lib/core/formula_parser.py ADDED Viewed

@@ -0,0 +1,30 @@
+import re
+from isotope_pattern_lib.types.settings import Settings
+from isotope_pattern_lib.types.types import MolecularFormula
+class MolecularFormulaParser:
+    pattern = re.compile(r'([A-Z][a-z]?)(\d*)')
+    def __init__(self, settings: Settings):
+        self.valid_elements = {element.name: element for element in settings.elements}
+    def parse(self, raw_string: str) -> MolecularFormula:
+        matches = MolecularFormulaParser.pattern.findall(raw_string)
+        element_counts = dict()
+        for match in matches:
+            if match[0] in self.valid_elements:
+                element = self.valid_elements[match[0]]
+                count = 1 if match[1] == '' else int(match[1])
+                if element in element_counts:
+                    element_counts[element] += count
+                else:
+                    element_counts[element] = count
+            else:
+                raise ValueError(f"'{match[0]}' was not found among provided element list")
+        return MolecularFormula(name=raw_string, elements=element_counts)

isotope_pattern_lib/core/isotope_pattern.py ADDED Viewed

@@ -0,0 +1,51 @@
+import itertools
+from typing import List, Any, Generator
+from scipy.stats import multinomial
+from isotope_pattern_lib.types.types import (
+    Isotope,
+    IsotopeFormula,
+    MolecularFormula
+)
+from isotope_pattern_lib.utils import utils
+def compute_isotope_pattern(formula: MolecularFormula) -> Generator[IsotopeFormula, Any, None]:
+    element_isotope_formulas = {}
+    for element, count in formula.elements.items():
+        element_isotope_formulas[element] = compute_isotope_distributions(element.isotopes, count)
+    for combination in itertools.product(*element_isotope_formulas.values()):
+        name = ""
+        counts = {}
+        probability = 1.0
+        for isotope_formula in combination:
+            name += isotope_formula.name
+            counts.update(isotope_formula.isotopes)
+            probability *= isotope_formula.probability
+        yield IsotopeFormula(
+            name=name,
+            isotopes=counts,
+            probability=probability,
+        )
+def compute_isotope_distributions(isotopes: List[Isotope], element_count: int) -> List[IsotopeFormula]:
+    distribution = multinomial(n=element_count, p=[isotope.abundance for isotope in isotopes])
+    isotope_formulas = []
+    for array in utils.generate_arrays_with_preserved_sum(total_sum=element_count, size=len(isotopes)):
+        probability = distribution.pmf(array)
+        isotope_counts = dict(zip(isotopes, array))
+        isotope_formulas.append(IsotopeFormula(
+            name="".join([f"{isotope.name}[{count}]" for isotope, count in isotope_counts.items()]),
+            isotopes=isotope_counts,
+            probability=probability
+        ))
+    return isotope_formulas

isotope_pattern_lib/resources/config.yaml ADDED Viewed

@@ -0,0 +1,33 @@
+- name: H
+  description: hydrogen
+  isotopes:
+    - name: H1
+      mass: 1.0078250
+      abundance: 1.000
+- name: C
+  description: carbon
+  isotopes:
+    - name: C12
+      mass: 12.0000000
+      abundance: 0.989
+    - name: C13
+      mass: 13.0033548
+      abundance: 0.011
+- name: O
+  description: oxygen
+  isotopes:
+    - name: O16
+      mass: 15.9949146
+      abundance: 0.998
+    - name: O18
+      mass: 17.9991596
+      abundance: 0.002
+- name: N
+  description: nytrogen
+  isotopes:
+    - name: N14
+      mass: 14.0030740
+      abundance: 0.996
+    - name: N15
+      mass: 15.0001089
+      abundance: 0.004

isotope_pattern_lib/types/__init__.py ADDED Viewed

File without changes

isotope_pattern_lib/types/settings.py ADDED Viewed

@@ -0,0 +1,52 @@
+from typing import (
+    FrozenSet,
+    List
+)
+import yaml
+from isotope_pattern_lib.types.types import (
+    Element,
+    Isotope
+)
+class Settings:
+    def __init__(self, elements: FrozenSet[Element]):
+        self.elements = elements
+    @classmethod
+    def parse_from_file(cls, path: str):
+        with open(path, 'r') as file:
+            raw_elements = yaml.safe_load(file)
+        elements = set()
+        for raw_element in raw_elements:
+            isotopes = Settings.retrieve_isotopes(raw_isotopes=raw_element['isotopes'])
+            elements.add(
+                Element(
+                    name=raw_element['name'],
+                    isotopes=isotopes
+                )
+            )
+        return cls(elements=frozenset(elements))
+    @staticmethod
+    def retrieve_isotopes(raw_isotopes: List[dict]) -> List[Isotope]:
+        isotopes = list()
+        for raw_isotope in raw_isotopes:
+            isotopes.append(
+                Isotope(
+                    name=raw_isotope['name'],
+                    mass=raw_isotope['mass'],
+                    abundance=raw_isotope['abundance']
+                )
+            )
+        return isotopes

isotope_pattern_lib/types/types.py ADDED Viewed

@@ -0,0 +1,109 @@
+from typing import (
+    Dict,
+    List
+)
+class Isotope:
+    def __init__(self, name: str, mass: float, abundance: float):
+        self.name = name
+        self.mass = mass
+        self.abundance = abundance
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}'
+            f'(name={self.name}, mass={round(self.mass, 3)}, abundance={round(self.abundance, 3)})'
+        )
+    def __eq__(self, other):
+        if isinstance(other, Isotope):
+            name_equality = (self.name == other.name)
+            mass_equality = (self.mass == other.mass)
+            abundance_equality = (self.abundance == other.abundance)
+            return name_equality and mass_equality and abundance_equality
+        return False
+    def __ne__(self, other):
+        return not self.__eq__(other)
+    def __hash__(self):
+        return hash((self.name, self.mass, self.abundance))
+class Element:
+    def __init__(self, name: str, isotopes: List[Isotope]):
+        self.name = name
+        self.isotopes = isotopes
+        self._isotopes = frozenset(isotopes)
+    def __repr__(self):
+        isotope_names = [isotope.name for isotope in self.isotopes]
+        return f'{self.__class__.__name__}(name={self.name}, isotopes={",".join(isotope_names)})'
+    def __eq__(self, other):
+        if isinstance(other, Element):
+            name_equality = (self.name == other.name)
+            isotopes_equality = (self._isotopes == other._isotopes)
+            return name_equality and isotopes_equality
+        return False
+    def __ne__(self, other):
+        return not self.__eq__(other)
+    def __hash__(self):
+        return hash((self.name, self._isotopes))
+class MolecularFormula:
+    def __init__(self, name: str, elements: Dict[Element, int]):
+        self.name = name
+        self.elements = elements
+    def __repr__(self):
+        return f'{self.__class__.__name__}(name={self.name})'
+    def __eq__(self, other):
+        if isinstance(other, MolecularFormula):
+            name_equality = (self.name == other.name)
+            elements_equality = (self.elements == other.elements)
+            return name_equality and elements_equality
+        return False
+    def __ne__(self, other):
+        return not self.__eq__(other)
+class IsotopeFormula:
+    def __init__(self, name: str, isotopes: Dict[Isotope, int], probability: float):
+        self.name = name
+        self.isotopes = isotopes
+        self.probability = probability
+        self.mass = self.compute_mass()
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}'
+            f'(name={self.name}, mass={round(self.mass, 3)}, probability={round(self.probability, 3)})'
+        )
+    def compute_mass(self) -> float:
+        mass = 0.0
+        for isotope, count in self.isotopes.items():
+            mass += isotope.mass * count
+        return mass
+    def __eq__(self, other):
+        if isinstance(other, IsotopeFormula):
+            name_equality = (self.name == other.name)
+            isotopes_equality = (self.isotopes == other.isotopes)
+            probability_equality = (self.probability == other.probability)
+            return name_equality and isotopes_equality and probability_equality
+        return False
+    def __ne__(self, other):
+        return not self.__eq__(other)

isotope_pattern_lib/utils/__init__.py ADDED Viewed

File without changes

isotope_pattern_lib/utils/utils.py ADDED Viewed

@@ -0,0 +1,10 @@
+from typing import List
+def generate_arrays_with_preserved_sum(total_sum: int, size: int) -> List[List[int]]:
+    if size == 1:
+        yield [total_sum]
+    else:
+        for i in range(total_sum + 1):
+            for j in generate_arrays_with_preserved_sum(total_sum=total_sum - i, size=size - 1):
+                yield [i] + j

isotope_pattern_lib-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,161 @@
+Metadata-Version: 2.4
+Name: isotope-pattern-lib
+Version: 1.0.0
+Summary: A library to compute the isotope pattern of a given molecular formula
+Project-URL: Homepage, https://github.com/ksmirn0v/isotope-pattern-lib
+Author-email: Kirill S Smirnov <kirill.smirnov.mail@gmail.com>
+Keywords: isotope pattern,mass spectrometry,molecular formula
+Classifier: Development Status :: 3 - Alpha
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.9
+Requires-Dist: pyyaml>=6.0
+Requires-Dist: scipy>=1.11
+Description-Content-Type: text/markdown
+# isotope-pattern-lib #
+The library can be used to compute the isotope pattern
+of a given molecular formula.
+## Requirements ##
+`python >= 3.9`
+## Development ##
+The project uses [`uv`](https://docs.astral.sh/uv/) as its package manager.
+```
+uv sync --group test --group dev
+```
+Run the test suite (unit and integration tests are run as separate `pytest`
+invocations, so that mutated global state in one suite does not leak into the
+other):
+```
+uv run pytest tests/unit
+uv run pytest tests/integration
+```
+Build a distributable wheel/sdist:
+```
+uv build
+```
+## Releasing ##
+Every pull request title into `master` must contain exactly one of
+`[MAJOR]`, `[MINOR]`, `[PATCH]`. This determines the version bump that will
+be applied on merge, relative to the latest `X.Y.Z` git tag:
+- `[MAJOR]` bumps the first number, resetting the rest to zero. If no tag
+  exists yet, this creates the first release, `1.0.0`.
+- `[MINOR]` bumps the second number, resetting the patch number to zero.
+- `[PATCH]` bumps the third number.
+`[MINOR]`/`[PATCH]` require an existing tag to bump from — a PR using either
+of them is invalid until a `[MAJOR]` release has been made at least once.
+On every PR, CI computes this future version and publishes a `.devN` build
+of it to [test.pypi.org](https://test.pypi.org) for end-to-end verification.
+On merge to `master`, CI creates the git tag and publishes that exact
+version to [pypi.org](https://pypi.org).
+## Usage ##
+Although any of the classes/functions can be used independently,
+the library has the main endpoints, located in
+`isotope_pattern_lib/api.py`:
+```
+from isotope_pattern_lib import api
+```
+---
+```
+api.set_parser(config_path: str)
+```
+The call sets the parameters for parsing molecular formulas.
+These parameters should be listed as elements with their corresponding isotopes
+in a `yaml` file that is located in `config_path`.
+The structure of the file will be described later.
+The call is optional. If no call is conducted, a default parser is used.
+---
+```
+api.compute_isotope_pattern(formula_string: str)
+```
+The call computes isotope pattern of a given molecular formula, represented
+as a string `formula_string` (_e.g._ `C2H5OH`).
+The output represents a list of `IsotopeFormula` instances, each containing
+the information on the constituent isotopes and the probability of the
+associated compositions.
+## YAML file structure ##
+The YAML file is used to define the way how molecular formulas will be parsed.
+It consists of a list of elements with the definition of the corresponding isotopes.
+Each entry has the following form:
+```
+- name: string
+  description: string
+  isotopes:
+    - name: string
+      mass: float
+      abundance: float
+    - ...
+- ...
+```
+**Comments**:
+- The field `name` is arbitrary, but the raw strings that are used in the
+`compute_isotope_pattern(formula_string: str)` call should not contain
+any element names that are not part of the `name` fields, specified in the
+`yaml` file.
+- Each element can contain as many isotopes as it is wished.
+- The field `abundance` should define a number less or equal to `1.0`.
+However, these numbers, corresponding to different isotopes of an element,
+  should not exceed in sum `1.0`.
+## Default YAML file structure ##
+```
+- name: H
+  description: hydrogen
+  isotopes:
+    - name: H1
+      mass: 1.0078250
+      abundance: 1.000
+- name: C
+  description: carbon
+  isotopes:
+    - name: C12
+      mass: 12.0000000
+      abundance: 0.989
+    - name: C13
+      mass: 13.0033548
+      abundance: 0.011
+- name: O
+  description: oxygen
+  isotopes:
+    - name: O16
+      mass: 15.9949146
+      abundance: 0.998
+    - name: O18
+      mass: 17.9991596
+      abundance: 0.002
+- name: N
+  description: nytrogen
+  isotopes:
+    - name: N14
+      mass: 14.0030740
+      abundance: 0.996
+    - name: N15
+      mass: 15.0001089
+      abundance: 0.004
+```

isotope_pattern_lib-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+isotope_pattern_lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+isotope_pattern_lib/api.py,sha256=SNdkspDwZK3P_ZX40OSBRi-TJg4qspAsZBEXf5ckEMo,1047
+isotope_pattern_lib/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+isotope_pattern_lib/core/formula_parser.py,sha256=kG56O6ZzrR-20KfMHo2u7lkXzDk5eZQ5NpIapvPJBfI,1058
+isotope_pattern_lib/core/isotope_pattern.py,sha256=-R2PS4t7blvu58ESsQZ1VcvQa96YTEayPWoSafgUNgg,1684
+isotope_pattern_lib/resources/config.yaml,sha256=2ZUVMEvMnU709F0bpVLRZvxPknu8e3LkWOSaT9vvYCc,612
+isotope_pattern_lib/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+isotope_pattern_lib/types/settings.py,sha256=gzMeds5pnBotEoWDLW-WGDpxDTV1yuzniguOmfzD5DE,1175
+isotope_pattern_lib/types/types.py,sha256=9W3k6pb91Eto8UqCx8cgYyTP4Mo03Qhvdfi1UgnIbwQ,3313
+isotope_pattern_lib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+isotope_pattern_lib/utils/utils.py,sha256=1YeChOgDV8LFDQYLFGHP88t90KSVc5cjFxpen7OyOc0,332
+isotope_pattern_lib-1.0.0.dist-info/METADATA,sha256=f-HkzTtN2GOialXNihqWBju7CkCvM9nLeNJVxWovE3M,4624
+isotope_pattern_lib-1.0.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+isotope_pattern_lib-1.0.0.dist-info/RECORD,,

isotope_pattern_lib-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any