pyTMHMM-binaries 1.3.6__cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyTMHMM/hmm.pyx ADDED
@@ -0,0 +1,164 @@
1
+ import numpy as np
2
+
3
+ cimport numpy as np
4
+ cimport cython
5
+
6
+ DTYPE = np.double
7
+ ctypedef np.double_t DTYPE_t
8
+
9
+
10
+ @cython.boundscheck(False)
11
+ def viterbi(sequence,
12
+ np.ndarray[DTYPE_t, ndim=1] initial,
13
+ np.ndarray[DTYPE_t, ndim=2] transitions,
14
+ np.ndarray[DTYPE_t, ndim=2] emissions,
15
+ char_map, label_map, name_map):
16
+ """viterbi
17
+ Compute the most probable path through the model given the sequence.
18
+
19
+ This function implements Viterbi's algorithm in log-space.
20
+
21
+ :param sequence str: a string over the alphabet specified by the model.
22
+ :rtype: tuple(matrix, optimal_path)
23
+ :return: a tuple consisting of the dynamic programming table and the
24
+ optimal path.
25
+ """
26
+ np.seterr(divide='ignore')
27
+ sequence = sequence.upper()
28
+
29
+ cdef int no_observations = len(sequence)
30
+ cdef int no_states = len(initial)
31
+
32
+ cdef float neginf = -np.inf
33
+
34
+ # work in log space
35
+ initial = np.log(initial)
36
+ transitions = np.log(transitions)
37
+ emissions = np.log(emissions)
38
+
39
+ cdef np.ndarray[DTYPE_t, ndim=2] M = np.zeros([2, no_states],dtype=DTYPE)
40
+ cdef np.ndarray[np.intp_t, ndim=2] P = np.zeros([no_observations, no_states], dtype=int)
41
+ cdef unsigned int i, j, k, max_state, next_state, observation
42
+ cdef double max_state_prob, prob
43
+
44
+ observation = char_map[sequence[0]]
45
+ for i in range(no_states):
46
+ M[0, i] = initial[i] + emissions[i, observation]
47
+
48
+ for i in range(1, no_observations):
49
+ observation = char_map[sequence[i]]
50
+ for j in range(no_states):
51
+ max_state = 0
52
+ max_state_prob = neginf
53
+ for k in range(no_states):
54
+ prob = M[(i - 1) % 2, k] + transitions[k, j]
55
+ if prob > max_state_prob:
56
+ max_state, max_state_prob = k, prob
57
+ M[i % 2, j] = max_state_prob + emissions[j, observation]
58
+ P[i, j] = max_state
59
+
60
+ # TODO: figure out why stuff doesn't work when using cython without turning
61
+ # the range generator into a list first.
62
+ # TODO: stuff crashes if one uses reversed(range(no_observations)), why?
63
+
64
+ backtracked = []
65
+ next_state = np.argmax(M[no_observations % 2,], axis=0)
66
+ for i in list(range(no_observations - 1, -1, -1)):
67
+ backtracked.append(label_map[next_state])
68
+ next_state = P[i, next_state]
69
+
70
+ return M, ''.join(reversed(backtracked))
71
+
72
+
73
+ @cython.boundscheck(False)
74
+ def forward(sequence,
75
+ np.ndarray[DTYPE_t, ndim=1] initial,
76
+ np.ndarray[DTYPE_t, ndim=2] transitions,
77
+ np.ndarray[DTYPE_t, ndim=2] emissions,
78
+ char_map, label_map, name_map):
79
+ """forward
80
+ Compute the probability distribution of states after observing the sequence.
81
+
82
+ This function implements the scaled Forward algorithm.
83
+
84
+ :param sequence str: a string over the alphabet specified by the model.
85
+ :rtype: tuple(matrix, constants)
86
+ :return: the scaled dynamic programming table and the constants used to
87
+ normalize it.
88
+ """
89
+
90
+ sequence = sequence.upper()
91
+
92
+ cdef int no_observations = len(sequence)
93
+ cdef int no_states = len(initial)
94
+
95
+ cdef np.ndarray[DTYPE_t, ndim=2] M = \
96
+ np.zeros([no_observations, no_states], dtype=DTYPE)
97
+ cdef np.ndarray[DTYPE_t, ndim=1] constants = \
98
+ np.zeros(no_observations, dtype=DTYPE)
99
+
100
+ cdef unsigned int i, j, k, observation
101
+ cdef double prob, state_sum
102
+
103
+ observation = char_map[sequence[0]]
104
+ for i in range(no_states):
105
+ M[0, i] = initial[i] * emissions[i, observation]
106
+ constants[0] = np.sum(M[0])
107
+ M[0] = M[0] / constants[0]
108
+
109
+ for i in range(1, no_observations):
110
+ observation = char_map[sequence[i]]
111
+ for j in range(no_states):
112
+ state_sum = 0.0
113
+ for k in range(no_states):
114
+ state_sum += M[(i - 1), k] * transitions[k, j]
115
+ M[i, j] = state_sum * emissions[j, observation]
116
+ constants[i] = np.sum(M[i])
117
+ M[i] = M[i] / constants[i]
118
+
119
+ return M, constants
120
+
121
+
122
+ @cython.boundscheck(False)
123
+ def backward(sequence,
124
+ constants,
125
+ np.ndarray[DTYPE_t, ndim=1] initial,
126
+ np.ndarray[DTYPE_t, ndim=2] transitions,
127
+ np.ndarray[DTYPE_t, ndim=2] emissions,
128
+ char_map, label_map, name_map):
129
+ """backward
130
+ Compute the probability of being in some state and generating the rest of
131
+ the sequence.
132
+
133
+ This function implements the scaled backward algorithm.
134
+
135
+ :param sequence str: a string over the alphabet specified by the model.
136
+ :param constants np.ndarray: an array of the constants used to normalize
137
+ the forward table.
138
+ :rtype: np.ndarray
139
+ :return: the scaled backward table.
140
+ """
141
+
142
+ sequence = sequence.upper()
143
+
144
+ cdef int no_observations = len(sequence)
145
+ cdef int no_states = len(initial)
146
+
147
+ cdef np.ndarray[DTYPE_t, ndim=2] M = \
148
+ np.zeros([no_observations, no_states], dtype=DTYPE)
149
+
150
+ cdef unsigned int i, j, k, observation
151
+ cdef double prob, state_sum
152
+
153
+ M[no_observations - 1] = 1.0 / constants[no_observations - 1]
154
+
155
+ for i in range(no_observations-2, -1, -1):
156
+ observation = char_map[sequence[i]]
157
+ for j in range(no_states):
158
+ state_sum = 0.0
159
+ for k in range(no_states):
160
+ state_sum += M[(i + 1), k] * transitions[j, k]
161
+ M[i, j] = state_sum * emissions[j, observation]
162
+ M[i] = M[i] / constants[i]
163
+
164
+ return M
pyTMHMM/model.py ADDED
@@ -0,0 +1,178 @@
1
+ import collections
2
+ import re
3
+ import os
4
+
5
+ import numpy as np
6
+
7
+
8
+ def _tokenize(contents):
9
+ return re.findall(r'([A-Za-z0-9\.\-_]+|[:;\{\}])', contents)
10
+
11
+
12
+ def _strip_comments(file_like):
13
+ with open(file_like, 'r') as f:
14
+ lines = f.readlines()
15
+ return ''.join(filter(lambda l: not l.startswith('#'), lines))
16
+
17
+
18
+ def _parse_list(tokens):
19
+ parsed_list = []
20
+ while True:
21
+ token = tokens.popleft()
22
+ if token == ';':
23
+ tokens.appendleft(token)
24
+ return tokens, parsed_list
25
+ parsed_list.append(token)
26
+
27
+
28
+ def _parse_map(tokens):
29
+ parsed_map = collections.OrderedDict()
30
+ while True:
31
+ token = tokens.popleft()
32
+ if token == ';':
33
+ tokens.appendleft(token)
34
+ return tokens, parsed_map
35
+ next_token = tokens.popleft()
36
+
37
+ # Fallback if the map was actually a list
38
+ if next_token != ':':
39
+ tokens.appendleft(next_token)
40
+ tokens.appendleft(token)
41
+ return tokens, None
42
+
43
+ value = tokens.popleft()
44
+ parsed_map[token] = float(value)
45
+
46
+
47
+
48
+ def _parse_state(tokens):
49
+ state_name = tokens.popleft()
50
+ tokens.popleft() # "{"
51
+
52
+ parsed_state = {}
53
+ while True:
54
+ token = tokens.popleft()
55
+ if token == '}':
56
+ return tokens, (state_name, parsed_state)
57
+ if token in ('trans', 'only'):
58
+ tokens, value = _parse_map(tokens)
59
+ if value is None:
60
+ tokens, value = _parse_list(tokens)
61
+ elif token in ('type', 'end'):
62
+ value = int(tokens.popleft())
63
+ else:
64
+ value = tokens.popleft()
65
+ parsed_state[token] = value
66
+ tokens.popleft() # ";"
67
+
68
+
69
+ def _parse_header(tokens):
70
+ tokens.popleft() # "header"
71
+ tokens.popleft() # "{"
72
+
73
+ header = {}
74
+ while True:
75
+ token = tokens.popleft()
76
+ if token == '}':
77
+ break
78
+ header[token] = tokens.popleft()
79
+ tokens.popleft() # ";"
80
+ return tokens, header
81
+
82
+
83
+ def _normalize_states(states):
84
+ """Normalize states by inheriting parameters explicitly.
85
+
86
+ The TMHMM file format allows parameters to be tied to the parameters of
87
+ some other state. This basically means that a state inherits the parameters
88
+ from another state.
89
+
90
+ The normalization performed by this function consists of copying the
91
+ specified parameters from the parent state to the inheriting state such
92
+ that all states explicitly specify their transition and emission
93
+ probabilities.
94
+ """
95
+ for name, state in states.items():
96
+ # inherit parent's transition probabilities, but only for
97
+ # the states specified for this state.
98
+ if 'tied_trans' in state:
99
+ parent_state = states[state['tied_trans']]
100
+ to_states = state['trans']
101
+ states[name]['trans'] = dict(zip(state['trans'],
102
+ parent_state['trans'].values()))
103
+
104
+ # inherit parent's emission probabilities
105
+ if 'tied_letter' in state:
106
+ parent_state = state['tied_letter']
107
+ states[name]['only'] = dict(states[parent_state]['only'])
108
+ return states
109
+
110
+
111
+ def _to_matrix_form(alphabet, states):
112
+ """
113
+ Convert a model to matrix form.
114
+ """
115
+ # pull out initial probabilities
116
+ begin = dict(states['begin'])
117
+ del states['begin']
118
+
119
+ # build state -> index mapping
120
+ state_map = {v: k for k, v in enumerate(states)}
121
+ # build character -> index mapping
122
+ char_map = {v: k for k, v in enumerate(alphabet)}
123
+
124
+ no_states = len(states)
125
+
126
+ initial = np.zeros(shape=(no_states,))
127
+ transitions = np.zeros(shape=(no_states, no_states))
128
+ emissions = np.zeros(shape=(no_states, len(alphabet)))
129
+
130
+ label_map = {}
131
+ name_map = dict(enumerate(states))
132
+
133
+ # initial probabilities
134
+ for state_name, trans_prob in begin['trans'].items():
135
+ this_state_idx = state_map[state_name]
136
+ initial[this_state_idx] = trans_prob
137
+
138
+ for state_name, state in states.items():
139
+ this_state_idx = state_map[state_name]
140
+
141
+ # label map
142
+ if 'label' in state:
143
+ label_map[this_state_idx] = state['label']
144
+
145
+ # transition probabilities
146
+ for other_state_name, trans_prob in state['trans'].items():
147
+ other_state_idx = state_map[other_state_name]
148
+ transitions[this_state_idx, other_state_idx] = trans_prob
149
+
150
+ # emission probabilities
151
+ for character, emission_prob in state['only'].items():
152
+ this_character_idx = char_map[character]
153
+ emissions[this_state_idx, this_character_idx] = emission_prob
154
+
155
+ return initial, transitions, emissions, char_map, label_map, name_map
156
+
157
+
158
+ def parse(file_like):
159
+ """
160
+ Parse a model in the TMHMM 2.0 format.
161
+
162
+ :param file_like: a file-like object to read and parse.
163
+ :return: a model
164
+ """
165
+ contents = _strip_comments(file_like)
166
+ tokens = collections.deque(_tokenize(contents))
167
+
168
+ tokens, header = _parse_header(tokens)
169
+
170
+ states = {}
171
+ while tokens:
172
+ tokens, (name, state) = _parse_state(tokens)
173
+ states[name] = state
174
+
175
+ assert not tokens, "list of tokens not consumed completely"
176
+ return header, _to_matrix_form(header['alphabet'],
177
+ _normalize_states(states))
178
+
pyTMHMM/utils.py ADDED
@@ -0,0 +1,59 @@
1
+ from collections import namedtuple
2
+
3
+
4
+ FastaEntry = namedtuple('FastaEntry', 'id, description, sequence')
5
+
6
+
7
+ def load_posterior_file(fileobj):
8
+ # skip header
9
+ fileobj.readline()
10
+ res = []
11
+ for line in fileobj:
12
+ res.append(map(float, line.split()))
13
+ return zip(*res)
14
+
15
+
16
+ def dump_posterior_file(fileobj, posterior):
17
+ print('inside', 'membrane', 'outside', file=fileobj)
18
+ for i in range(posterior.shape[0]):
19
+ line = '{} {} {}'.format(
20
+ posterior[i, 0], posterior[i, 1], posterior[i, 2])
21
+ print(line, file=fileobj)
22
+
23
+
24
+ def load_fasta_file(fileobj):
25
+ """load_fasta_file
26
+
27
+ Returns a list of `(id, description, sequence)` tuples. The `id` and
28
+ `description` is extracted from the header line. The `id` is the part of
29
+ the header line before the first whitespace character. The `description`
30
+ is everything coming after the first whitespace character and not all
31
+ FASTA headers have descriptions.
32
+ """
33
+ entries = []
34
+ header = ''
35
+ sequence = ''
36
+
37
+ def append_entry(header, sequence):
38
+ arr = header.split(None, 1)
39
+ if len(arr) == 1:
40
+ arr.append("")
41
+ entries.append(FastaEntry(arr[0], arr[1], sequence))
42
+
43
+ for line in fileobj:
44
+ if line.startswith(">"):
45
+ # Beginning of file
46
+ if header == '':
47
+ header = line[1:].strip()
48
+ # Middle of file
49
+ else:
50
+ append_entry(header, sequence)
51
+ sequence = ''
52
+ header = line[1:].strip()
53
+ else:
54
+ sequence += line.strip()
55
+ # End of file
56
+ if header != '' and sequence != '':
57
+ append_entry(header, sequence)
58
+ return entries
59
+
@@ -0,0 +1,44 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyTMHMM-binaries
3
+ Version: 1.3.6
4
+ Summary: A transmembrane helix finder.
5
+ Author-email: Brian Osborne <bosborne@alum.mit.edu>
6
+ License: MIT License
7
+ Project-URL: Homepage, https://github.com/bosborne/pyTMHMM/
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE.md
14
+ Requires-Dist: numpy>=1.24
15
+ Provides-Extra: plotting
16
+ Requires-Dist: matplotlib; extra == "plotting"
17
+ Provides-Extra: build
18
+ Requires-Dist: numpy>=1.24; extra == "build"
19
+ Requires-Dist: cython>=0.29; extra == "build"
20
+ Provides-Extra: test
21
+ Requires-Dist: numpy>=1.24; extra == "test"
22
+ Dynamic: license-file
23
+
24
+ ## pytmhmm_binaries
25
+
26
+ this is a fork of pyTMHMM <https://github.com/bosborne/pyTMHMM>
27
+
28
+ Things that were changed: minor updates for newer python versions / newer python packaging.
29
+
30
+ To build multilinux/musllinux binaries for x86 and arm:
31
+
32
+ ```bash
33
+ CIBW_ARCHS_LINUX="x86_64 aarch64" uvx cibuildwheel --platform linux
34
+ ```
35
+
36
+ publishing to pypi:
37
+
38
+ ```bash
39
+ # binary distribution
40
+ uv publish wheelhouse/*.whl
41
+ # source distribution
42
+ uv publish
43
+ ```
44
+
@@ -0,0 +1,15 @@
1
+ pyTMHMM/TMHMM2.0.model,sha256=qlcway9gAO4wUYXYxYYDx_ZGPstByY0W0d_YAwL_qb4,22262
2
+ pyTMHMM/__init__.py,sha256=_9ywcsMUHKWo_LoDpVGJFXJe6s4Np30IIjCr4_Tsl8g,55
3
+ pyTMHMM/api.py,sha256=arOVvela8S3L4IbGloqSeafgByi2muUVZX7ByXV_RrI,1296
4
+ pyTMHMM/cli.py,sha256=DECs21Os6Jr0kUBSPM6c77WI5nHE6KzEAM0KmtZIZT4,3028
5
+ pyTMHMM/hmm.c,sha256=LLpcYkM4JVGDqjitoQisedzmhD4P2cNLXFK-ZOsKFMs,639862
6
+ pyTMHMM/hmm.cpython-314t-aarch64-linux-gnu.so,sha256=2p2o7luxgnLNbBSd_iEmDnTU0gBYNuB3xu4YBaAF4QA,1159592
7
+ pyTMHMM/hmm.pyx,sha256=ykrGytUMWuIiHQi4HcYMQWfQUgD0MzM_4AppowAPMHo,5479
8
+ pyTMHMM/model.py,sha256=rcnFZOu5rFCENvMiKwYnba49RfSQ3eetFW2xWgyKkw0,5263
9
+ pyTMHMM/utils.py,sha256=OPFgTCCwhEgTFgFPbfQstFgCvgUPz6Qcv-lgPAFGIBg,1702
10
+ pytmhmm_binaries-1.3.6.dist-info/METADATA,sha256=Ya5vTNmLeEIWopmc2HjEjSrVwurT89Y4LuK1o4E6Dj4,1219
11
+ pytmhmm_binaries-1.3.6.dist-info/WHEEL,sha256=exfk16KSmR3o56VDCkqK-bqklnUFXmoh0YHfHtLBcFY,197
12
+ pytmhmm_binaries-1.3.6.dist-info/entry_points.txt,sha256=JX9DDmDMynmwMbsu0SJzW6wSzV_G3_algtUxzQEWNA8,44
13
+ pytmhmm_binaries-1.3.6.dist-info/top_level.txt,sha256=NcLw-C2BK7A2vae9K2CVg9Mwpa25ZUK0tEBnMcEiadY,8
14
+ pytmhmm_binaries-1.3.6.dist-info/RECORD,,
15
+ pytmhmm_binaries-1.3.6.dist-info/licenses/LICENSE.md,sha256=UTkYxrj06GGYGUe6DNBLPYtVcXD5FBk9-fSH_xV6bg8,1080
@@ -0,0 +1,7 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: false
4
+ Tag: cp314-cp314t-manylinux_2_17_aarch64
5
+ Tag: cp314-cp314t-manylinux2014_aarch64
6
+ Tag: cp314-cp314t-manylinux_2_28_aarch64
7
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ pyTMHMM = pyTMHMM.cli:cli
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Brian Osborne
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ pyTMHMM