PyPI - pyTMHMM-binaries - Versions diffs - 1.3.6__cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl - Mend

pyTMHMM-binaries 1.3.6__cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

pyTMHMM/TMHMM2.0.model +991 -0
pyTMHMM/__init__.py +3 -0
pyTMHMM/api.py +42 -0
pyTMHMM/cli.py +92 -0
pyTMHMM/hmm.c +14983 -0
pyTMHMM/hmm.cpython-314t-aarch64-linux-gnu.so +0 -0
pyTMHMM/hmm.pyx +164 -0
pyTMHMM/model.py +178 -0
pyTMHMM/utils.py +59 -0
pytmhmm_binaries-1.3.6.dist-info/METADATA +44 -0
pytmhmm_binaries-1.3.6.dist-info/RECORD +15 -0
pytmhmm_binaries-1.3.6.dist-info/WHEEL +7 -0
pytmhmm_binaries-1.3.6.dist-info/entry_points.txt +2 -0
pytmhmm_binaries-1.3.6.dist-info/licenses/LICENSE.md +21 -0
pytmhmm_binaries-1.3.6.dist-info/top_level.txt +1 -0

pyTMHMM/hmm.cpython-314t-aarch64-linux-gnu.so ADDED Viewed

Binary file

pyTMHMM/hmm.pyx ADDED Viewed

@@ -0,0 +1,164 @@
+import numpy as np
+cimport numpy as np
+cimport cython
+DTYPE = np.double
+ctypedef np.double_t DTYPE_t
+@cython.boundscheck(False)
+def viterbi(sequence,
+            np.ndarray[DTYPE_t, ndim=1] initial,
+            np.ndarray[DTYPE_t, ndim=2] transitions,
+            np.ndarray[DTYPE_t, ndim=2] emissions,
+            char_map, label_map, name_map):
+    """viterbi
+    Compute the most probable path through the model given the sequence.
+    This function implements Viterbi's algorithm in log-space.
+    :param sequence str: a string over the alphabet specified by the model.
+    :rtype: tuple(matrix, optimal_path)
+    :return: a tuple consisting of the dynamic programming table and the
+             optimal path.
+    """
+    np.seterr(divide='ignore')
+    sequence = sequence.upper()
+    cdef int no_observations = len(sequence)
+    cdef int no_states = len(initial)
+    cdef float neginf = -np.inf
+    # work in log space
+    initial = np.log(initial)
+    transitions = np.log(transitions)
+    emissions = np.log(emissions)
+    cdef np.ndarray[DTYPE_t, ndim=2] M = np.zeros([2, no_states],dtype=DTYPE)
+    cdef np.ndarray[np.intp_t, ndim=2] P = np.zeros([no_observations, no_states], dtype=int)
+    cdef unsigned int i, j, k, max_state, next_state, observation
+    cdef double max_state_prob, prob
+    observation = char_map[sequence[0]]
+    for i in range(no_states):
+        M[0, i] = initial[i] + emissions[i, observation]
+    for i in range(1, no_observations):
+        observation = char_map[sequence[i]]
+        for j in range(no_states):
+            max_state = 0
+            max_state_prob = neginf
+            for k in range(no_states):
+                prob = M[(i - 1) % 2, k] + transitions[k, j]
+                if prob > max_state_prob:
+                    max_state, max_state_prob = k, prob
+            M[i % 2, j] = max_state_prob + emissions[j, observation]
+            P[i, j] = max_state
+    # TODO: figure out why stuff doesn't work when using cython without turning
+    #       the range generator into a list first.
+    # TODO: stuff crashes if one uses reversed(range(no_observations)), why?
+    backtracked = []
+    next_state = np.argmax(M[no_observations % 2,], axis=0)
+    for i in list(range(no_observations - 1, -1, -1)):
+        backtracked.append(label_map[next_state])
+        next_state = P[i, next_state]
+    return M, ''.join(reversed(backtracked))
+@cython.boundscheck(False)
+def forward(sequence,
+            np.ndarray[DTYPE_t, ndim=1] initial,
+            np.ndarray[DTYPE_t, ndim=2] transitions,
+            np.ndarray[DTYPE_t, ndim=2] emissions,
+            char_map, label_map, name_map):
+    """forward
+    Compute the probability distribution of states after observing the sequence.
+    This function implements the scaled Forward algorithm.
+    :param sequence str: a string over the alphabet specified by the model.
+    :rtype: tuple(matrix, constants)
+    :return: the scaled dynamic programming table and the constants used to
+             normalize it.
+    """
+    sequence = sequence.upper()
+    cdef int no_observations = len(sequence)
+    cdef int no_states = len(initial)
+    cdef np.ndarray[DTYPE_t, ndim=2] M = \
+        np.zeros([no_observations, no_states], dtype=DTYPE)
+    cdef np.ndarray[DTYPE_t, ndim=1] constants = \
+        np.zeros(no_observations, dtype=DTYPE)
+    cdef unsigned int i, j, k, observation
+    cdef double prob, state_sum
+    observation = char_map[sequence[0]]
+    for i in range(no_states):
+        M[0, i] = initial[i] * emissions[i, observation]
+    constants[0] = np.sum(M[0])
+    M[0] = M[0] / constants[0]
+    for i in range(1, no_observations):
+        observation = char_map[sequence[i]]
+        for j in range(no_states):
+            state_sum = 0.0
+            for k in range(no_states):
+                state_sum += M[(i - 1), k] * transitions[k, j]
+            M[i, j] = state_sum * emissions[j, observation]
+        constants[i] = np.sum(M[i])
+        M[i] = M[i] / constants[i]
+    return M, constants
+@cython.boundscheck(False)
+def backward(sequence,
+             constants,
+             np.ndarray[DTYPE_t, ndim=1] initial,
+             np.ndarray[DTYPE_t, ndim=2] transitions,
+             np.ndarray[DTYPE_t, ndim=2] emissions,
+             char_map, label_map, name_map):
+    """backward
+    Compute the probability of being in some state and generating the rest of
+    the sequence.
+    This function implements the scaled backward algorithm.
+    :param sequence str: a string over the alphabet specified by the model.
+    :param constants np.ndarray: an array of the constants used to normalize
+                                 the forward table.
+    :rtype: np.ndarray
+    :return: the scaled backward table.
+    """
+    sequence = sequence.upper()
+    cdef int no_observations = len(sequence)
+    cdef int no_states = len(initial)
+    cdef np.ndarray[DTYPE_t, ndim=2] M = \
+        np.zeros([no_observations, no_states], dtype=DTYPE)
+    cdef unsigned int i, j, k, observation
+    cdef double prob, state_sum
+    M[no_observations - 1] = 1.0 / constants[no_observations - 1]
+    for i in range(no_observations-2, -1, -1):
+        observation = char_map[sequence[i]]
+        for j in range(no_states):
+            state_sum = 0.0
+            for k in range(no_states):
+                state_sum += M[(i + 1), k] * transitions[j, k]
+            M[i, j] = state_sum * emissions[j, observation]
+        M[i] = M[i] / constants[i]
+    return M

pyTMHMM/model.py ADDED Viewed

@@ -0,0 +1,178 @@
+import collections
+import re
+import os
+import numpy as np
+def _tokenize(contents):
+    return re.findall(r'([A-Za-z0-9\.\-_]+|[:;\{\}])', contents)
+def _strip_comments(file_like):
+    with open(file_like, 'r') as f:
+        lines = f.readlines()
+    return ''.join(filter(lambda l: not l.startswith('#'), lines))
+def _parse_list(tokens):
+    parsed_list = []
+    while True:
+        token = tokens.popleft()
+        if token == ';':
+            tokens.appendleft(token)
+            return tokens, parsed_list
+        parsed_list.append(token)
+def _parse_map(tokens):
+    parsed_map = collections.OrderedDict()
+    while True:
+        token = tokens.popleft()
+        if token == ';':
+            tokens.appendleft(token)
+            return tokens, parsed_map
+        next_token = tokens.popleft()
+        # Fallback if the map was actually a list
+        if next_token != ':':
+            tokens.appendleft(next_token)
+            tokens.appendleft(token)
+            return tokens, None
+        value = tokens.popleft()
+        parsed_map[token] = float(value)
+def _parse_state(tokens):
+    state_name = tokens.popleft()
+    tokens.popleft() # "{"
+    parsed_state = {}
+    while True:
+        token = tokens.popleft()
+        if token == '}':
+            return tokens, (state_name, parsed_state)
+        if token in ('trans', 'only'):
+            tokens, value = _parse_map(tokens)
+            if value is None:
+                tokens, value = _parse_list(tokens)
+        elif token in ('type', 'end'):
+            value = int(tokens.popleft())
+        else:
+            value = tokens.popleft()
+        parsed_state[token] = value
+        tokens.popleft() # ";"
+def _parse_header(tokens):
+    tokens.popleft() # "header"
+    tokens.popleft() # "{"
+    header = {}
+    while True:
+        token = tokens.popleft()
+        if token == '}':
+            break
+        header[token] = tokens.popleft()
+        tokens.popleft() # ";"
+    return tokens, header
+def _normalize_states(states):
+    """Normalize states by inheriting parameters explicitly.
+    The TMHMM file format allows parameters to be tied to the parameters of
+    some other state. This basically means that a state inherits the parameters
+    from another state.
+    The normalization performed by this function consists of copying the
+    specified parameters from the parent state to the inheriting state such
+    that all states explicitly specify their transition and emission
+    probabilities.
+    """
+    for name, state in states.items():
+        # inherit parent's transition probabilities, but only for
+        # the states specified for this state.
+        if 'tied_trans' in state:
+            parent_state = states[state['tied_trans']]
+            to_states = state['trans']
+            states[name]['trans'] = dict(zip(state['trans'],
+                                         parent_state['trans'].values()))
+        # inherit parent's emission probabilities
+        if 'tied_letter' in state:
+            parent_state = state['tied_letter']
+            states[name]['only'] = dict(states[parent_state]['only'])
+    return states
+def _to_matrix_form(alphabet, states):
+    """
+    Convert a model to matrix form.
+    """
+    # pull out initial probabilities
+    begin = dict(states['begin'])
+    del states['begin']
+    # build state -> index mapping
+    state_map = {v: k for k, v in enumerate(states)}
+    # build character -> index mapping
+    char_map = {v: k for k, v in enumerate(alphabet)}
+    no_states = len(states)
+    initial = np.zeros(shape=(no_states,))
+    transitions = np.zeros(shape=(no_states, no_states))
+    emissions = np.zeros(shape=(no_states, len(alphabet)))
+    label_map = {}
+    name_map = dict(enumerate(states))
+    # initial probabilities
+    for state_name, trans_prob in begin['trans'].items():
+        this_state_idx = state_map[state_name]
+        initial[this_state_idx] = trans_prob
+    for state_name, state in states.items():
+        this_state_idx = state_map[state_name]
+        # label map
+        if 'label' in state:
+            label_map[this_state_idx] = state['label']
+        # transition probabilities
+        for other_state_name, trans_prob in state['trans'].items():
+            other_state_idx = state_map[other_state_name]
+            transitions[this_state_idx, other_state_idx] = trans_prob
+        # emission probabilities
+        for character, emission_prob in state['only'].items():
+            this_character_idx = char_map[character]
+            emissions[this_state_idx, this_character_idx] = emission_prob
+    return initial, transitions, emissions, char_map, label_map, name_map
+def parse(file_like):
+    """
+    Parse a model in the TMHMM 2.0 format.
+    :param file_like: a file-like object to read and parse.
+    :return: a model
+    """
+    contents = _strip_comments(file_like)
+    tokens = collections.deque(_tokenize(contents))
+    tokens, header = _parse_header(tokens)
+    states = {}
+    while tokens:
+        tokens, (name, state) = _parse_state(tokens)
+        states[name] = state
+    assert not tokens, "list of tokens not consumed completely"
+    return header, _to_matrix_form(header['alphabet'],
+                                   _normalize_states(states))

pyTMHMM/utils.py ADDED Viewed

@@ -0,0 +1,59 @@
+from collections import namedtuple
+FastaEntry = namedtuple('FastaEntry', 'id, description, sequence')
+def load_posterior_file(fileobj):
+    # skip header
+    fileobj.readline()
+    res = []
+    for line in fileobj:
+        res.append(map(float, line.split()))
+    return zip(*res)
+def dump_posterior_file(fileobj, posterior):
+    print('inside', 'membrane', 'outside', file=fileobj)
+    for i in range(posterior.shape[0]):
+        line = '{} {} {}'.format(
+            posterior[i, 0], posterior[i, 1], posterior[i, 2])
+        print(line, file=fileobj)
+def load_fasta_file(fileobj):
+    """load_fasta_file
+    Returns a list of `(id, description, sequence)` tuples. The `id` and
+    `description` is extracted from the header line. The `id` is the part of
+    the header line before the first whitespace character. The `description`
+    is everything coming after the first whitespace character and not all
+    FASTA headers have descriptions.
+    """
+    entries = []
+    header = ''
+    sequence = ''
+    def append_entry(header, sequence):
+        arr = header.split(None, 1)
+        if len(arr) == 1:
+            arr.append("")
+        entries.append(FastaEntry(arr[0], arr[1], sequence))
+    for line in fileobj:
+        if line.startswith(">"):
+            # Beginning of file
+            if header == '':
+                header = line[1:].strip()
+            # Middle of file
+            else:
+                append_entry(header, sequence)
+                sequence = ''
+                header = line[1:].strip()
+        else:
+            sequence += line.strip()
+    # End of file
+    if header != '' and sequence != '':
+        append_entry(header, sequence)
+    return entries

pytmhmm_binaries-1.3.6.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,44 @@
+Metadata-Version: 2.4
+Name: pyTMHMM-binaries
+Version: 1.3.6
+Summary: A transmembrane helix finder.
+Author-email: Brian Osborne <bosborne@alum.mit.edu>
+License: MIT License
+Project-URL: Homepage, https://github.com/bosborne/pyTMHMM/
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Classifier: License :: OSI Approved :: MIT License
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE.md
+Requires-Dist: numpy>=1.24
+Provides-Extra: plotting
+Requires-Dist: matplotlib; extra == "plotting"
+Provides-Extra: build
+Requires-Dist: numpy>=1.24; extra == "build"
+Requires-Dist: cython>=0.29; extra == "build"
+Provides-Extra: test
+Requires-Dist: numpy>=1.24; extra == "test"
+Dynamic: license-file
+## pytmhmm_binaries
+this is a fork of pyTMHMM <https://github.com/bosborne/pyTMHMM>
+Things that were changed: minor updates for newer python versions / newer python packaging.
+To build multilinux/musllinux binaries for x86 and arm:
+```bash
+CIBW_ARCHS_LINUX="x86_64 aarch64" uvx cibuildwheel --platform linux
+```
+publishing to pypi:
+```bash
+# binary distribution
+uv publish wheelhouse/*.whl
+# source distribution
+uv publish
+```

pytmhmm_binaries-1.3.6.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+pyTMHMM/TMHMM2.0.model,sha256=qlcway9gAO4wUYXYxYYDx_ZGPstByY0W0d_YAwL_qb4,22262
+pyTMHMM/__init__.py,sha256=_9ywcsMUHKWo_LoDpVGJFXJe6s4Np30IIjCr4_Tsl8g,55
+pyTMHMM/api.py,sha256=arOVvela8S3L4IbGloqSeafgByi2muUVZX7ByXV_RrI,1296
+pyTMHMM/cli.py,sha256=DECs21Os6Jr0kUBSPM6c77WI5nHE6KzEAM0KmtZIZT4,3028
+pyTMHMM/hmm.c,sha256=LLpcYkM4JVGDqjitoQisedzmhD4P2cNLXFK-ZOsKFMs,639862
+pyTMHMM/hmm.cpython-314t-aarch64-linux-gnu.so,sha256=2p2o7luxgnLNbBSd_iEmDnTU0gBYNuB3xu4YBaAF4QA,1159592
+pyTMHMM/hmm.pyx,sha256=ykrGytUMWuIiHQi4HcYMQWfQUgD0MzM_4AppowAPMHo,5479
+pyTMHMM/model.py,sha256=rcnFZOu5rFCENvMiKwYnba49RfSQ3eetFW2xWgyKkw0,5263
+pyTMHMM/utils.py,sha256=OPFgTCCwhEgTFgFPbfQstFgCvgUPz6Qcv-lgPAFGIBg,1702
+pytmhmm_binaries-1.3.6.dist-info/METADATA,sha256=Ya5vTNmLeEIWopmc2HjEjSrVwurT89Y4LuK1o4E6Dj4,1219
+pytmhmm_binaries-1.3.6.dist-info/WHEEL,sha256=exfk16KSmR3o56VDCkqK-bqklnUFXmoh0YHfHtLBcFY,197
+pytmhmm_binaries-1.3.6.dist-info/entry_points.txt,sha256=JX9DDmDMynmwMbsu0SJzW6wSzV_G3_algtUxzQEWNA8,44
+pytmhmm_binaries-1.3.6.dist-info/top_level.txt,sha256=NcLw-C2BK7A2vae9K2CVg9Mwpa25ZUK0tEBnMcEiadY,8
+pytmhmm_binaries-1.3.6.dist-info/RECORD,,
+pytmhmm_binaries-1.3.6.dist-info/licenses/LICENSE.md,sha256=UTkYxrj06GGYGUe6DNBLPYtVcXD5FBk9-fSH_xV6bg8,1080

pytmhmm_binaries-1.3.6.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,7 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.10.2)
+Root-Is-Purelib: false
+Tag: cp314-cp314t-manylinux_2_17_aarch64
+Tag: cp314-cp314t-manylinux2014_aarch64
+Tag: cp314-cp314t-manylinux_2_28_aarch64

pytmhmm_binaries-1.3.6.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ pyTMHMM = pyTMHMM.cli:cli

pytmhmm_binaries-1.3.6.dist-info/licenses/LICENSE.md ADDED Viewed

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2021 Brian Osborne
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

pytmhmm_binaries-1.3.6.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ pyTMHMM