pyTMHMM-binaries 1.3.6__cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyTMHMM/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from pyTMHMM.api import predict
2
+
3
+ __all__ = ['predict']
pyTMHMM/api.py ADDED
@@ -0,0 +1,42 @@
1
+ from collections import defaultdict
2
+
3
+ import numpy as np
4
+ import os.path
5
+
6
+ from pyTMHMM.model import parse
7
+ from pyTMHMM.hmm import viterbi, forward, backward
8
+
9
+
10
+ GROUP_NAMES = ('i', 'm', 'o')
11
+
12
+ DEFAULT_MODEL = os.path.join(os.path.dirname(__file__), 'TMHMM2.0.model')
13
+
14
+ def predict(sequence, model_or_filelike=DEFAULT_MODEL, compute_posterior=True):
15
+ if isinstance(model_or_filelike, tuple):
16
+ model = model_or_filelike
17
+ else:
18
+ header, model = parse(model_or_filelike)
19
+
20
+ _, path = viterbi(sequence, *model)
21
+
22
+ if compute_posterior:
23
+ forward_table, constants = forward(sequence, *model)
24
+ backward_table = backward(sequence, constants, *model)
25
+
26
+ posterior = forward_table * backward_table
27
+ _, _, _, char_map, label_map, name_map = model
28
+
29
+ observations = len(sequence)
30
+ states = len(name_map)
31
+
32
+ table = np.zeros(shape=(observations, 3))
33
+ for i in range(observations):
34
+ group_probs = defaultdict(float)
35
+ for j in range(states):
36
+ group = label_map[j].lower()
37
+ group_probs[group] += posterior[i, j]
38
+
39
+ for k, group in enumerate(GROUP_NAMES):
40
+ table[i, k] = group_probs[group]
41
+ return path, table/table.sum(axis=1, keepdims=True)
42
+ return path
pyTMHMM/cli.py ADDED
@@ -0,0 +1,92 @@
1
+ import argparse
2
+ import itertools
3
+ import textwrap
4
+
5
+
6
+ from .api import predict
7
+ from .api import DEFAULT_MODEL
8
+ from .model import parse # noqa: F401
9
+ from .utils import (
10
+ dump_posterior_file,
11
+ load_posterior_file,
12
+ load_fasta_file,
13
+ )
14
+
15
+ has_matplotlib = True
16
+ try:
17
+ import matplotlib
18
+ matplotlib.use('Agg')
19
+ import matplotlib.pyplot as plt
20
+ except ImportError:
21
+ has_matplotlib = False
22
+
23
+
24
+ PRETTY_NAMES = {
25
+ 'i': 'inside',
26
+ 'M': 'transmembrane helix',
27
+ 'o': 'outside',
28
+ 'O': 'outside'
29
+ }
30
+
31
+
32
+ def summarize(path):
33
+ """
34
+ Summarize a path as a list of (start, end, state) triples.
35
+ """
36
+ for state, group in itertools.groupby(enumerate(path), key=lambda x: x[1]):
37
+ group = list(group)
38
+ start = min(group, key=lambda x: x[0])[0]
39
+ end = max(group, key=lambda x: x[0])[0]
40
+ yield start, end, state
41
+
42
+
43
+ def plot(posterior_file, outputfile):
44
+ inside, membrane, outside = load_posterior_file(posterior_file)
45
+
46
+ plt.figure(figsize=(16, 8))
47
+ plt.title('Posterior probabilities')
48
+ plt.suptitle('pyTMHMM')
49
+ plt.plot(inside, label='inside', color='blue')
50
+ plt.plot(membrane, label='transmembrane', color='red')
51
+ plt.fill_between(range(len(inside)), membrane, color='red')
52
+ plt.plot(outside, label='outside', color='black')
53
+ plt.legend(frameon=False, bbox_to_anchor=[0.5, 0],
54
+ loc='upper center', ncol=3, borderaxespad=1.5)
55
+ plt.tight_layout(pad=3)
56
+ plt.savefig(outputfile)
57
+
58
+
59
+ def cli():
60
+ parser = argparse.ArgumentParser()
61
+ parser.add_argument('-f', '--file', dest='sequence_file',
62
+ type=argparse.FileType('r'), required=True,
63
+ help='path to file in fasta format with sequences')
64
+ parser.add_argument('-m', '--model', dest='model_file',
65
+ default=DEFAULT_MODEL,
66
+ help='path to the model to use')
67
+ if has_matplotlib:
68
+ parser.add_argument('-p', '--plot', dest='plot_posterior',
69
+ action='store_true',
70
+ help='plot posterior probabilies')
71
+ args = parser.parse_args()
72
+
73
+ for entry in load_fasta_file(args.sequence_file):
74
+ path, posterior = predict(entry.sequence, args.model_file)
75
+
76
+ with open(entry.id + '.summary', 'w') as summary_file:
77
+ for start, end, state in summarize(path):
78
+ print("{} {} {}".format(start, end, PRETTY_NAMES[state]),
79
+ file=summary_file)
80
+
81
+ with open(entry.id + '.annotation', 'w') as ann_file:
82
+ print('>', entry.id, ' ', entry.description, sep='', file=ann_file)
83
+ for line in textwrap.wrap(path, 79):
84
+ print(line, file=ann_file)
85
+
86
+ plot_filename = entry.id + '.plot'
87
+ with open(plot_filename, 'w') as plot_file:
88
+ dump_posterior_file(plot_file, posterior)
89
+
90
+ if hasattr(args, 'plot_posterior') and args.plot_posterior:
91
+ with open(plot_filename, 'r') as fileobj:
92
+ plot(fileobj, entry.id + '.pdf')