pyTMHMM-binaries 1.3.6__cp314-cp314-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyTMHMM/TMHMM2.0.model +991 -0
- pyTMHMM/__init__.py +3 -0
- pyTMHMM/api.py +42 -0
- pyTMHMM/cli.py +92 -0
- pyTMHMM/hmm.c +14983 -0
- pyTMHMM/hmm.cpython-314-x86_64-linux-musl.so +0 -0
- pyTMHMM/hmm.pyx +164 -0
- pyTMHMM/model.py +178 -0
- pyTMHMM/utils.py +59 -0
- pytmhmm_binaries-1.3.6.dist-info/METADATA +44 -0
- pytmhmm_binaries-1.3.6.dist-info/RECORD +15 -0
- pytmhmm_binaries-1.3.6.dist-info/WHEEL +5 -0
- pytmhmm_binaries-1.3.6.dist-info/entry_points.txt +2 -0
- pytmhmm_binaries-1.3.6.dist-info/licenses/LICENSE.md +21 -0
- pytmhmm_binaries-1.3.6.dist-info/top_level.txt +1 -0
pyTMHMM/__init__.py
ADDED
pyTMHMM/api.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import os.path
|
|
5
|
+
|
|
6
|
+
from pyTMHMM.model import parse
|
|
7
|
+
from pyTMHMM.hmm import viterbi, forward, backward
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
GROUP_NAMES = ('i', 'm', 'o')
|
|
11
|
+
|
|
12
|
+
DEFAULT_MODEL = os.path.join(os.path.dirname(__file__), 'TMHMM2.0.model')
|
|
13
|
+
|
|
14
|
+
def predict(sequence, model_or_filelike=DEFAULT_MODEL, compute_posterior=True):
|
|
15
|
+
if isinstance(model_or_filelike, tuple):
|
|
16
|
+
model = model_or_filelike
|
|
17
|
+
else:
|
|
18
|
+
header, model = parse(model_or_filelike)
|
|
19
|
+
|
|
20
|
+
_, path = viterbi(sequence, *model)
|
|
21
|
+
|
|
22
|
+
if compute_posterior:
|
|
23
|
+
forward_table, constants = forward(sequence, *model)
|
|
24
|
+
backward_table = backward(sequence, constants, *model)
|
|
25
|
+
|
|
26
|
+
posterior = forward_table * backward_table
|
|
27
|
+
_, _, _, char_map, label_map, name_map = model
|
|
28
|
+
|
|
29
|
+
observations = len(sequence)
|
|
30
|
+
states = len(name_map)
|
|
31
|
+
|
|
32
|
+
table = np.zeros(shape=(observations, 3))
|
|
33
|
+
for i in range(observations):
|
|
34
|
+
group_probs = defaultdict(float)
|
|
35
|
+
for j in range(states):
|
|
36
|
+
group = label_map[j].lower()
|
|
37
|
+
group_probs[group] += posterior[i, j]
|
|
38
|
+
|
|
39
|
+
for k, group in enumerate(GROUP_NAMES):
|
|
40
|
+
table[i, k] = group_probs[group]
|
|
41
|
+
return path, table/table.sum(axis=1, keepdims=True)
|
|
42
|
+
return path
|
pyTMHMM/cli.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import itertools
|
|
3
|
+
import textwrap
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
from .api import predict
|
|
7
|
+
from .api import DEFAULT_MODEL
|
|
8
|
+
from .model import parse # noqa: F401
|
|
9
|
+
from .utils import (
|
|
10
|
+
dump_posterior_file,
|
|
11
|
+
load_posterior_file,
|
|
12
|
+
load_fasta_file,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
has_matplotlib = True
|
|
16
|
+
try:
|
|
17
|
+
import matplotlib
|
|
18
|
+
matplotlib.use('Agg')
|
|
19
|
+
import matplotlib.pyplot as plt
|
|
20
|
+
except ImportError:
|
|
21
|
+
has_matplotlib = False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
PRETTY_NAMES = {
|
|
25
|
+
'i': 'inside',
|
|
26
|
+
'M': 'transmembrane helix',
|
|
27
|
+
'o': 'outside',
|
|
28
|
+
'O': 'outside'
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def summarize(path):
|
|
33
|
+
"""
|
|
34
|
+
Summarize a path as a list of (start, end, state) triples.
|
|
35
|
+
"""
|
|
36
|
+
for state, group in itertools.groupby(enumerate(path), key=lambda x: x[1]):
|
|
37
|
+
group = list(group)
|
|
38
|
+
start = min(group, key=lambda x: x[0])[0]
|
|
39
|
+
end = max(group, key=lambda x: x[0])[0]
|
|
40
|
+
yield start, end, state
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def plot(posterior_file, outputfile):
|
|
44
|
+
inside, membrane, outside = load_posterior_file(posterior_file)
|
|
45
|
+
|
|
46
|
+
plt.figure(figsize=(16, 8))
|
|
47
|
+
plt.title('Posterior probabilities')
|
|
48
|
+
plt.suptitle('pyTMHMM')
|
|
49
|
+
plt.plot(inside, label='inside', color='blue')
|
|
50
|
+
plt.plot(membrane, label='transmembrane', color='red')
|
|
51
|
+
plt.fill_between(range(len(inside)), membrane, color='red')
|
|
52
|
+
plt.plot(outside, label='outside', color='black')
|
|
53
|
+
plt.legend(frameon=False, bbox_to_anchor=[0.5, 0],
|
|
54
|
+
loc='upper center', ncol=3, borderaxespad=1.5)
|
|
55
|
+
plt.tight_layout(pad=3)
|
|
56
|
+
plt.savefig(outputfile)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def cli():
|
|
60
|
+
parser = argparse.ArgumentParser()
|
|
61
|
+
parser.add_argument('-f', '--file', dest='sequence_file',
|
|
62
|
+
type=argparse.FileType('r'), required=True,
|
|
63
|
+
help='path to file in fasta format with sequences')
|
|
64
|
+
parser.add_argument('-m', '--model', dest='model_file',
|
|
65
|
+
default=DEFAULT_MODEL,
|
|
66
|
+
help='path to the model to use')
|
|
67
|
+
if has_matplotlib:
|
|
68
|
+
parser.add_argument('-p', '--plot', dest='plot_posterior',
|
|
69
|
+
action='store_true',
|
|
70
|
+
help='plot posterior probabilies')
|
|
71
|
+
args = parser.parse_args()
|
|
72
|
+
|
|
73
|
+
for entry in load_fasta_file(args.sequence_file):
|
|
74
|
+
path, posterior = predict(entry.sequence, args.model_file)
|
|
75
|
+
|
|
76
|
+
with open(entry.id + '.summary', 'w') as summary_file:
|
|
77
|
+
for start, end, state in summarize(path):
|
|
78
|
+
print("{} {} {}".format(start, end, PRETTY_NAMES[state]),
|
|
79
|
+
file=summary_file)
|
|
80
|
+
|
|
81
|
+
with open(entry.id + '.annotation', 'w') as ann_file:
|
|
82
|
+
print('>', entry.id, ' ', entry.description, sep='', file=ann_file)
|
|
83
|
+
for line in textwrap.wrap(path, 79):
|
|
84
|
+
print(line, file=ann_file)
|
|
85
|
+
|
|
86
|
+
plot_filename = entry.id + '.plot'
|
|
87
|
+
with open(plot_filename, 'w') as plot_file:
|
|
88
|
+
dump_posterior_file(plot_file, posterior)
|
|
89
|
+
|
|
90
|
+
if hasattr(args, 'plot_posterior') and args.plot_posterior:
|
|
91
|
+
with open(plot_filename, 'r') as fileobj:
|
|
92
|
+
plot(fileobj, entry.id + '.pdf')
|