PyPI - imspy-core - Versions diffs - 0.4.0__py3-none-any.whl - Mend

imspy-core 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

imspy_core/__init__.py +75 -0
imspy_core/chemistry/__init__.py +37 -0
imspy_core/chemistry/amino_acids.py +7 -0
imspy_core/chemistry/constants.py +12 -0
imspy_core/chemistry/elements.py +6 -0
imspy_core/chemistry/mobility.py +82 -0
imspy_core/chemistry/sum_formula.py +38 -0
imspy_core/chemistry/unimod.py +5 -0
imspy_core/chemistry/utility.py +43 -0
imspy_core/core/__init__.py +9 -0
imspy_core/core/base.py +38 -0
imspy_core/data/__init__.py +17 -0
imspy_core/data/peptide.py +528 -0
imspy_core/data/spectrum.py +586 -0
imspy_core/timstof/__init__.py +25 -0
imspy_core/timstof/collision.py +31 -0
imspy_core/timstof/data.py +429 -0
imspy_core/timstof/dda.py +364 -0
imspy_core/timstof/dia.py +131 -0
imspy_core/timstof/frame.py +604 -0
imspy_core/timstof/quadrupole.py +189 -0
imspy_core/timstof/slice.py +506 -0
imspy_core/utility/__init__.py +21 -0
imspy_core/utility/sequence.py +38 -0
imspy_core/utility/utilities.py +278 -0
imspy_core-0.4.0.dist-info/METADATA +70 -0
imspy_core-0.4.0.dist-info/RECORD +28 -0
imspy_core-0.4.0.dist-info/WHEEL +4 -0

imspy_core/utility/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""
+Utility module for imspy_core.
+Contains general purpose utilities for sequences and mathematical functions.
+"""
+from .utilities import (
+    re_index_indices, normal_pdf, gaussian, exp_distribution, exp_gaussian,
+    linear_map, NormalDistribution, ExponentialGaussianDistribution, TokenSequence,
+    is_unimod_start, is_unimod_end, tokenize_proforma_sequence,
+    get_aa_num_proforma_sequence, tokenizer_to_json, tokenizer_from_json
+)
+from .sequence import tokenize_unimod_sequence, remove_unimod_annotation
+__all__ = [
+    're_index_indices', 'tokenize_unimod_sequence', 'remove_unimod_annotation', 'linear_map',
+    'normal_pdf', 'gaussian', 'exp_distribution', 'exp_gaussian',
+    'NormalDistribution', 'ExponentialGaussianDistribution', 'TokenSequence',
+    'is_unimod_start', 'is_unimod_end', 'tokenize_proforma_sequence',
+    'get_aa_num_proforma_sequence', 'tokenizer_to_json', 'tokenizer_from_json'
+]

imspy_core/utility/sequence.py ADDED Viewed

@@ -0,0 +1,38 @@
+import re
+from typing import List
+def remove_unimod_annotation(sequence: str) -> str:
+    """
+    Remove UNIMOD annotations from a peptide sequence.
+    Args:
+        sequence: A peptide sequence with UNIMOD annotations (e.g., "PEPTM[UNIMOD:35]IDE").
+    Returns:
+        The peptide sequence without UNIMOD annotations (e.g., "PEPTMIDE").
+    """
+    pattern = r'\[UNIMOD:\d+\]'
+    return re.sub(pattern, '', sequence)
+def tokenize_unimod_sequence(unimod_sequence: str) -> List[str]:
+    """
+    Tokenizes a sequence of modified amino acids.
+    Args:
+        unimod_sequence: A string representing the sequence of amino acids with modifications.
+    Returns:
+        A list of tokenized amino acids.
+    """
+    token_pattern = r'[A-Z](?:\[UNIMOD:\d+\])?'
+    # Special case handling for [UNIMOD:1] at the beginning
+    if unimod_sequence.startswith("[UNIMOD:1]"):
+        special_token = "<START>[UNIMOD:1]"
+        rest_of_string = unimod_sequence[len("[UNIMOD:1]"):]
+        other_tokens = re.findall(token_pattern, rest_of_string)
+        return [special_token] + other_tokens + ['<END>']
+    else:
+        tokens = re.findall(token_pattern, unimod_sequence)
+        return ['<START>'] + tokens + ['<END>']

imspy_core/utility/utilities.py ADDED Viewed

@@ -0,0 +1,278 @@
+import io
+import json
+import math
+import numba
+import numpy as np
+from typing import List, Optional, TYPE_CHECKING
+if TYPE_CHECKING:
+    import tensorflow as tf
+from numpy.typing import ArrayLike
+@numba.jit(nopython=True)
+def normal_pdf(x: ArrayLike, mass: float, s: float = 0.001,
+               inv_sqrt_2pi: float = 0.3989422804014327, normalize: bool = False):
+    """
+    Args:
+        x:
+        mass:
+        s:
+        inv_sqrt_2pi:
+        normalize:
+    """
+    a = (x - mass) / s
+    if normalize:
+        return np.exp(-0.5 * np.power(a, 2))
+    else:
+        return inv_sqrt_2pi / s * np.exp(-0.5 * np.power(a, 2))
+@numba.jit(nopython=True)
+def gaussian(x, μ: float = 0, σ: float = 1):
+    """
+    Gaussian function
+    :param x:
+    :param μ:
+    :param σ:
+    :return:
+    """
+    A = 1 / np.sqrt(2 * np.pi * np.power(σ, 2))
+    B = np.exp(- (np.power(x - μ, 2) / 2 * np.power(σ, 2)))
+    return A * B
+@numba.jit(nopython=True)
+def exp_distribution(x, λ: float = 1):
+    """
+    Exponential function
+    :param x:
+    :param λ:
+    :return:
+    """
+    if x > 0:
+        return λ * np.exp(-λ * x)
+    return 0
+@numba.jit(nopython=True)
+def exp_gaussian(x, μ: float = -3, σ: float = 1, λ: float = .25):
+    """
+    laplacian distribution with exponential decay
+    :param x:
+    :param μ:
+    :param σ:
+    :param λ:
+    :return:
+    """
+    A = λ / 2 * np.exp(λ / 2 * (2 * μ + λ * np.power(σ, 2) - 2 * x))
+    B = math.erfc((μ + λ * np.power(σ, 2) - x) / (np.sqrt(2) * σ))
+    return A * B
+@numba.jit(nopython=True)
+def linear_map(value, old_min, old_max, new_min=0.0, new_max=60.0):
+    """
+    Linear mapping from one domain to another.
+    Args:
+        value: The value to map.
+        old_min: Minimum of the original range.
+        old_max: Maximum of the original range.
+        new_min: Minimum of the target range (default 0.0).
+        new_max: Maximum of the target range (default 60.0).
+    Returns:
+        The mapped value in the new range.
+    """
+    scale = (new_max - new_min) / (old_max - old_min)
+    offset = new_min - old_min * scale
+    return value * scale + offset
+class NormalDistribution:
+    def __init__(self, μ: float, σ: float):
+        self.μ = μ
+        self.σ = σ
+    def __call__(self, x):
+        return gaussian(x, self.μ, self.σ)
+class ExponentialGaussianDistribution:
+    def __init__(self, μ: float = -3, σ: float = 1, λ: float = .25):
+        self.μ = μ
+        self.σ = σ
+        self.λ = λ
+    def __call__(self, x):
+        return exp_gaussian(x, self.μ, self.σ, self.λ)
+def _from_jsons(jsons: str):
+    return json.loads(jsons)
+class TokenSequence:
+    def __init__(self, sequence_tokenized: Optional[List[str]] = None, jsons: Optional[str] = None):
+        if jsons is not None:
+            self.sequence_tokenized = _from_jsons(jsons)
+            self._jsons = jsons
+        else:
+            self.sequence_tokenized = sequence_tokenized
+            self._jsons = self._to_jsons()
+    def _to_jsons(self):
+        json_dict = self.sequence_tokenized
+        return json.dumps(json_dict)
+    @property
+    def jsons(self):
+        return self._jsons
+def is_unimod_start(char: str):
+    """
+    Tests if char is start of unimod
+    bracket
+    :param char: Character of a proForma formatted aa sequence
+    :type char: str
+    :return: Whether char is start of unimod bracket
+    :rtype: bool
+    """
+    if char in ["(", "[", "{"]:
+        return True
+    else:
+        return False
+def is_unimod_end(char: str):
+    """
+    Tests if char is end of unimod
+    bracket
+    :param char: Character of a proForma formatted aa sequence
+    :type char: str
+    :return: Whether char is end of unimod bracket
+    :rtype: bool
+    """
+    if char in [")", "]", "}"]:
+        return True
+    else:
+        return False
+def tokenize_proforma_sequence(sequence: str):
+    """
+    Tokenize a ProForma formatted sequence string.
+    :param sequence: Sequence string (ProForma formatted)
+    :type sequence: str
+    :return: List of tokens
+    :rtype: List
+    """
+    sequence = sequence.upper().replace("(", "[").replace(")", "]")
+    token_list = ["<START>"]
+    in_unimod_bracket = False
+    tmp_token = ""
+    for aa in sequence:
+        if is_unimod_start(aa):
+            in_unimod_bracket = True
+        if in_unimod_bracket:
+            if is_unimod_end(aa):
+                in_unimod_bracket = False
+            tmp_token += aa
+            continue
+        if tmp_token != "":
+            token_list.append(tmp_token)
+            tmp_token = ""
+        tmp_token += aa
+    if tmp_token != "":
+        token_list.append(tmp_token)
+    if len(token_list) > 1:
+        if token_list[1].find("UNIMOD:1") != -1:
+            token_list[1] = "<START>"+token_list[1]
+            token_list = token_list[1:]
+    token_list.append("<END>")
+    return token_list
+def get_aa_num_proforma_sequence(sequence: str):
+    """
+    get number of amino acids in sequence
+    :param sequence: proforma formatted aa sequence
+    :type sequence: str
+    :return: Number of amino acids
+    :rtype: int
+    """
+    num_aa = 0
+    inside_bracket = False
+    for aa in sequence:
+        if is_unimod_start(aa):
+            inside_bracket = True
+        if inside_bracket:
+            if is_unimod_end(aa):
+                inside_bracket = False
+            continue
+        num_aa += 1
+    return num_aa
+def re_index_indices(ids):
+    """Re-index indices, i.e. replace gaps in indices with consecutive numbers.
+    Can be used, e.g., to re-index frame IDs from precursors for visualization.
+    Args:
+        ids: Indices.
+    Returns:
+        Indices.
+    """
+    _, inverse = np.unique(ids, return_inverse=True)
+    return inverse
+def tokenizer_to_json(tokenizer: "tf.keras.preprocessing.text.Tokenizer", path: str):
+    """
+    save a fit keras tokenizer to json for later use
+    Note:
+        Requires tensorflow to be installed.
+    :param tokenizer: fit keras tokenizer to save
+    :param path: path to save json to
+    """
+    tokenizer_json = tokenizer.to_json()
+    with io.open(path, 'w', encoding='utf-8') as f:
+        f.write(json.dumps(tokenizer_json, ensure_ascii=False))
+def tokenizer_from_json(path: str):
+    """
+    load a pre-fit tokenizer from a json file
+    Note:
+        Requires tensorflow to be installed.
+    :param path: path to tokenizer as json file
+    :return: a keras tokenizer loaded from json
+    """
+    try:
+        import tensorflow as tf
+    except ImportError:
+        raise ImportError(
+            "tokenizer_from_json requires tensorflow. "
+            "Install it with: pip install tensorflow"
+        )
+    with open(path) as f:
+        data = json.load(f)
+    return tf.keras.preprocessing.text.tokenizer_from_json(data)

imspy_core-0.4.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,70 @@
+Metadata-Version: 2.4
+Name: imspy-core
+Version: 0.4.0
+Summary: Core data structures and utilities for processing timsTOF ion mobility spectrometry data.
+License-Expression: MIT
+Author: theGreatHerrLebert
+Author-email: davidteschner@googlemail.com
+Requires-Python: >=3.11,<3.14
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Dist: imspy-connector (>=0.3.0)
+Requires-Dist: mendeleev (>=0.7.0)
+Requires-Dist: numba (>=0.53)
+Requires-Dist: numpy (>=1.21)
+Requires-Dist: opentims-bruker-bridge (>=1.1.0)
+Requires-Dist: pandas (>=2.1)
+Requires-Dist: pyarrow (>=13.0)
+Requires-Dist: scipy (>=1.7.1)
+Requires-Dist: tabulate (>=0.9.0)
+Requires-Dist: toml (>=0.10.2)
+Requires-Dist: tqdm (>=4.66)
+Requires-Dist: zstd (>=1.5.6.1)
+Description-Content-Type: text/markdown
+# imspy-core
+Core data structures and utilities for processing timsTOF ion mobility spectrometry data.
+## Installation
+```bash
+pip install imspy-core
+```
+## Features
+- **Data Structures**: MzSpectrum, TimsSpectrum, PeptideSequence, and more
+- **Chemistry Utilities**: Elements, amino acids, UNIMOD modifications, CCS/mobility conversions
+- **TimsTOF Readers**: Read DDA and DIA datasets from Bruker timsTOF instruments
+- **Low Dependencies**: Only essential packages (numpy, pandas, scipy, numba)
+## Quick Start
+```python
+from imspy_core.timstof import TimsDatasetDDA
+from imspy_core.data import PeptideSequence
+# Read a DDA dataset
+dataset = TimsDatasetDDA("/path/to/data.d")
+frame = dataset.get_tims_frame(1)
+print(frame)
+# Work with peptides
+peptide = PeptideSequence("PEPTIDEK")
+print(f"Mass: {peptide.mono_isotopic_mass}")
+```
+## Related Packages
+- **imspy-predictors**: ML-based predictions (CCS, RT, intensity) - requires TensorFlow
+- **imspy-search**: Database search functionality - requires sagepy, mokapot
+- **imspy-simulation**: Simulation tools for timsTOF data
+- **imspy-vis**: Visualization tools - requires Plotly, Matplotlib
+## License
+MIT License - see LICENSE file for details.

imspy_core-0.4.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,28 @@
+imspy_core/__init__.py,sha256=JMx3P_qFJ3lPO61DtdpXs7I9Yz5c2Yysr9_j0l83IFE,2280
+imspy_core/chemistry/__init__.py,sha256=HhCH2I_rHpv8rRgBBbhpRPAvOygchJN8NPhj86HEhQo,1464
+imspy_core/chemistry/amino_acids.py,sha256=B2JdsY2DA2_gp_C79h2lxeu4Xuv4pUrA_PxDxXXQbDU,234
+imspy_core/chemistry/constants.py,sha256=5zDApZnEqm8QJ_SVeRS2C91xnUleafpJA8RX3QZ202c,394
+imspy_core/chemistry/elements.py,sha256=Fixh7eaEWexOPkMwGl8VsFqj9FfJpEhkeKDZJ-_crSw,277
+imspy_core/chemistry/mobility.py,sha256=_xCVq1qH8dd9a2UqhEutKYNwkPmC0fTPIDjpJ_mcofM,2875
+imspy_core/chemistry/sum_formula.py,sha256=OpRye_5yYXt5tqgVguf-38rHCCysZBDtmC4WCh3fGlE,991
+imspy_core/chemistry/unimod.py,sha256=-eq5yKcNsbsAdpk-BdBdhUk1_XkTXzAPDh14gD9l2GA,162
+imspy_core/chemistry/utility.py,sha256=gINZ8k8BymGhts5aRQ4SPG9pWRkvc6XFecqSEEuTFfQ,1252
+imspy_core/core/__init__.py,sha256=opTLNd3KW9UBYvk0hD9T3fRSry2MuGh2wKxBBXJF6Dg,168
+imspy_core/core/base.py,sha256=NrkKhw3FeK_An0Im3legHaD9sOwOZ_QSJT5zRqcYR2c,867
+imspy_core/data/__init__.py,sha256=7FQKFn4H3SqBmJMZBniSIrewDE1-byGNr1CUcHW41JE,532
+imspy_core/data/peptide.py,sha256=Fd4wt1A9yvdEvhoxuS03KH6KXPxTSm4pvpaOg3PcXL0,20252
+imspy_core/data/spectrum.py,sha256=9eMKcbhQuQiitQXjOrHgJGnj0163jHtal4q20lz9Ivc,18789
+imspy_core/timstof/__init__.py,sha256=BcTi9vvpwmqxzIvkqYzNCmHjhMhxBXo6351IsbRHjmo,852
+imspy_core/timstof/collision.py,sha256=RUab0N_GimlkEvTU7oINycPXJo8qoNngUWQwEAiTtiI,1097
+imspy_core/timstof/data.py,sha256=19BrElNm0YE1G5TL9C_Pq55u9Zs8M2JSQT0hqey_UR4,14564
+imspy_core/timstof/dda.py,sha256=gV-Gz6DtsuGmfw70iBxHPo-F46TsdbLOrLsuucphzWI,13695
+imspy_core/timstof/dia.py,sha256=BDz_3xPFs8uXK50VDwwtm_h4SijEwY0x4e5TS00wK1w,4809
+imspy_core/timstof/frame.py,sha256=4EROm-dvwFmhQDONdWTa_Khlv_BTpNmmRIK8qNYA6Eg,20051
+imspy_core/timstof/quadrupole.py,sha256=_9eeBNUyHAyB7rT5uJm9iwX79C4QZCiJL1FoRtdWmvU,7918
+imspy_core/timstof/slice.py,sha256=ctq42lGd570xkdUQskKzk-ge8ZqNj5wMkE4wiLxorQU,18897
+imspy_core/utility/__init__.py,sha256=1k_X2Vsqqu8RoZbSszm9CaW7ing1RSu-rCmOX2JbJ5Y,917
+imspy_core/utility/sequence.py,sha256=SmvI3fGISdt4m0Z7v_jVR56WACRudRIrRWi_O1UP6Cc,1222
+imspy_core/utility/utilities.py,sha256=n9dNyG5QRHZq4URdZalmpYvKRzb5BynzdrGz3uQcugM,6800
+imspy_core-0.4.0.dist-info/METADATA,sha256=DIJn2FNz_KaKKAoBjsSrRz8XH1LnfrWH4ddLvIAmoeo,2133
+imspy_core-0.4.0.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
+imspy_core-0.4.0.dist-info/RECORD,,

imspy_core-0.4.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: poetry-core 2.3.1
+Root-Is-Purelib: true
+Tag: py3-none-any