codeine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeine/__init__.py +15 -0
- codeine/constraints/banned.py +444 -0
- codeine/constraints/base.py +39 -0
- codeine/constraints/mutations.py +115 -0
- codeine/graph/base.py +267 -0
- codeine/graph/compile.py +489 -0
- codeine/graph/nodes.py +111 -0
- codeine/graph/view.py +781 -0
- codeine/motifs/restriction.py +105 -0
- codeine/motifs/validate.py +117 -0
- codeine/space/__init__.py +0 -0
- codeine/space/coding.py +490 -0
- codeine/space/mutation.py +512 -0
- codeine/translation/__init__.py +0 -0
- codeine/translation/data/__init__.py +0 -0
- codeine/translation/data/tables.json +2252 -0
- codeine/translation/data/weights.py +232 -0
- codeine/translation/tables.py +200 -0
- codeine/translation/weights.py +323 -0
- codeine/utils/__init__.py +0 -0
- codeine/utils/dict.py +23 -0
- codeine/utils/display.py +124 -0
- codeine/utils/sampling.py +90 -0
- codeine-0.1.0.dist-info/METADATA +162 -0
- codeine-0.1.0.dist-info/RECORD +28 -0
- codeine-0.1.0.dist-info/WHEEL +5 -0
- codeine-0.1.0.dist-info/licenses/LICENSE +21 -0
- codeine-0.1.0.dist-info/top_level.txt +1 -0
codeine/graph/base.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
|
|
5
|
+
from collections import Counter
|
|
6
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
|
|
7
|
+
|
|
8
|
+
from codeine.graph.nodes import CodonNode, ContextNode, EndNode, Node
|
|
9
|
+
from codeine.translation.tables import TranslationTable
|
|
10
|
+
from codeine.translation.weights import CodonWeights
|
|
11
|
+
from codeine.utils.display import format_restrictions
|
|
12
|
+
from codeine.utils.sampling import Seedable
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from codeine.graph.view import CodonGraphView
|
|
16
|
+
|
|
17
|
+
CodonRestriction = Union[str, Sequence[str]]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CodonGraph:
|
|
21
|
+
"""
|
|
22
|
+
Class representing a graph of codon nodes.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
aa_seq: str,
|
|
28
|
+
codon_restrictions: Optional[Dict[int, CodonRestriction]] = None,
|
|
29
|
+
translation_table: Optional[TranslationTable] = None,
|
|
30
|
+
weights: Optional[CodonWeights] = None,
|
|
31
|
+
context_l: str = '',
|
|
32
|
+
context_r: str = '',
|
|
33
|
+
) -> None:
|
|
34
|
+
if len(aa_seq) == 0:
|
|
35
|
+
raise ValueError('Please provide non-empty sequence!')
|
|
36
|
+
|
|
37
|
+
if translation_table is None:
|
|
38
|
+
rna = weights.rna if weights is not None else False
|
|
39
|
+
translation_table = TranslationTable(table_id=1, rna=rna)
|
|
40
|
+
|
|
41
|
+
if weights is None:
|
|
42
|
+
weights = CodonWeights.uniform(table=translation_table, rna=translation_table.rna)
|
|
43
|
+
|
|
44
|
+
self.validate_codon_weights(weights, translation_table)
|
|
45
|
+
|
|
46
|
+
self.tt = translation_table
|
|
47
|
+
self.cw = weights
|
|
48
|
+
|
|
49
|
+
self.aa_seq = aa_seq.upper()
|
|
50
|
+
self.validate_aa_seq()
|
|
51
|
+
|
|
52
|
+
self.codon_restrictions = {}
|
|
53
|
+
self.codon_restrictions = self.validate_codon_restrictions(codon_restrictions)
|
|
54
|
+
|
|
55
|
+
self.context_l = self.tt.normalise_sequence(context_l)
|
|
56
|
+
self.context_r = self.tt.normalise_sequence(context_r)
|
|
57
|
+
|
|
58
|
+
self.left_context_node = None
|
|
59
|
+
self.right_context_node = None
|
|
60
|
+
self.end_node = None
|
|
61
|
+
|
|
62
|
+
self.codon_nodes: Tuple[CodonNode, ...] = ()
|
|
63
|
+
|
|
64
|
+
self.initial_node = None
|
|
65
|
+
self.final_node = None
|
|
66
|
+
|
|
67
|
+
self._initialise_graph()
|
|
68
|
+
|
|
69
|
+
def __repr__(self) -> str:
|
|
70
|
+
molecule = 'RNA' if self.tt.rna else 'DNA'
|
|
71
|
+
|
|
72
|
+
lines = [
|
|
73
|
+
f'{type(self).__name__}',
|
|
74
|
+
'',
|
|
75
|
+
f'Translation table: {self.tt.table_id} ({self.tt.name})',
|
|
76
|
+
f'Molecule type: {molecule}',
|
|
77
|
+
'',
|
|
78
|
+
f'Amino acid sequence ({len(self.aa_seq)} aa)',
|
|
79
|
+
f'{self.aa_seq}',
|
|
80
|
+
''
|
|
81
|
+
]
|
|
82
|
+
if self.codon_restrictions:
|
|
83
|
+
lines += [
|
|
84
|
+
'Codon restrictions:',
|
|
85
|
+
*format_restrictions(
|
|
86
|
+
self.codon_restrictions,
|
|
87
|
+
label='restricted positions',
|
|
88
|
+
),
|
|
89
|
+
'',
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
return '\n'.join(lines)
|
|
93
|
+
|
|
94
|
+
def validate_aa_seq(self) -> None:
|
|
95
|
+
"""
|
|
96
|
+
Check that all amino acids in the sequence are supported.
|
|
97
|
+
"""
|
|
98
|
+
for pos, aa in enumerate(self.aa_seq, start=1):
|
|
99
|
+
if aa not in self.tt.aa_to_codons:
|
|
100
|
+
raise ValueError(f'Invalid amino acid {aa} at position {pos}.')
|
|
101
|
+
|
|
102
|
+
def validate_codon_restrictions(
|
|
103
|
+
self,
|
|
104
|
+
codon_restrictions: Optional[Dict[int, CodonRestriction]],
|
|
105
|
+
) -> Dict[int, List[str]]:
|
|
106
|
+
"""
|
|
107
|
+
Check the inputted restrictions make sense!
|
|
108
|
+
"""
|
|
109
|
+
codon_restrictions = codon_restrictions or {}
|
|
110
|
+
normalised = {}
|
|
111
|
+
|
|
112
|
+
for pos, codon_restriction in codon_restrictions.items():
|
|
113
|
+
if pos < 1 or pos > len(self.aa_seq):
|
|
114
|
+
raise ValueError(f'Restricted position {pos} is out of range.')
|
|
115
|
+
|
|
116
|
+
if isinstance(codon_restriction, str):
|
|
117
|
+
codons = [codon_restriction]
|
|
118
|
+
else:
|
|
119
|
+
codons = list(codon_restriction)
|
|
120
|
+
|
|
121
|
+
if len(codons) == 0:
|
|
122
|
+
raise ValueError(f'Codon restriction at position {pos} cannot be empty.')
|
|
123
|
+
|
|
124
|
+
codons = [self.tt.normalise_sequence(codon) for codon in codons]
|
|
125
|
+
|
|
126
|
+
aa = self.aa_seq[pos - 1]
|
|
127
|
+
|
|
128
|
+
if pos in self.codon_restrictions:
|
|
129
|
+
allowed_codons = [self.tt.normalise_sequence(codon) for codon in self.codon_restrictions[pos]]
|
|
130
|
+
else:
|
|
131
|
+
allowed_codons = self.tt.aa_to_codons[aa]
|
|
132
|
+
|
|
133
|
+
for codon in codons:
|
|
134
|
+
if codon not in allowed_codons:
|
|
135
|
+
raise ValueError(f'Codon {codon} is not valid for amino acid {aa} at position {pos}.')
|
|
136
|
+
|
|
137
|
+
normalised[pos] = codons
|
|
138
|
+
|
|
139
|
+
return normalised
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def validate_codon_weights(weights: CodonWeights, translation_table: TranslationTable) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Check that codon weights are compatible with the provided translation table.
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
weights
|
|
149
|
+
The codon weights.
|
|
150
|
+
translation_table
|
|
151
|
+
The translation table.
|
|
152
|
+
|
|
153
|
+
Raises
|
|
154
|
+
-------
|
|
155
|
+
Various errors if things aren't good.
|
|
156
|
+
"""
|
|
157
|
+
if weights.rna != translation_table.rna:
|
|
158
|
+
raise ValueError('Codon weights and translation table use different molecule types.')
|
|
159
|
+
|
|
160
|
+
expected_codons = {
|
|
161
|
+
aa: Counter(codons)
|
|
162
|
+
for aa, codons in translation_table.aa_to_codons.items()
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
actual_codons = {
|
|
166
|
+
aa: Counter(codons)
|
|
167
|
+
for aa, codons in weights.aa_to_codons.items()
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if actual_codons != expected_codons:
|
|
171
|
+
raise ValueError('Codon weights and translation table do not match.')
|
|
172
|
+
|
|
173
|
+
def codon_node_by_pos(self, pos: int) -> CodonNode:
|
|
174
|
+
"""
|
|
175
|
+
Return the codon node at a given amino-acid position.
|
|
176
|
+
|
|
177
|
+
Positioning is 1-based.
|
|
178
|
+
"""
|
|
179
|
+
if pos < 1 or pos > len(self.codon_nodes):
|
|
180
|
+
raise ValueError(f'Position {pos} is out of range.')
|
|
181
|
+
|
|
182
|
+
return self.codon_nodes[pos - 1]
|
|
183
|
+
|
|
184
|
+
def _initialise_graph(self) -> None:
|
|
185
|
+
"""
|
|
186
|
+
Initialise the codon graph.
|
|
187
|
+
"""
|
|
188
|
+
left_context_node = ContextNode(0, self.context_l)
|
|
189
|
+
right_context_node = ContextNode(len(self.aa_seq) + 1, self.context_r)
|
|
190
|
+
end_node = EndNode()
|
|
191
|
+
|
|
192
|
+
codon_nodes = []
|
|
193
|
+
for ix, aa in enumerate(self.aa_seq):
|
|
194
|
+
pos = ix + 1
|
|
195
|
+
|
|
196
|
+
if pos in self.codon_restrictions:
|
|
197
|
+
codons = self.codon_restrictions[pos]
|
|
198
|
+
else:
|
|
199
|
+
codons = self.tt.aa_to_codons[aa]
|
|
200
|
+
|
|
201
|
+
node = CodonNode(pos, aa, codons)
|
|
202
|
+
codon_nodes.append(node)
|
|
203
|
+
|
|
204
|
+
# Left context -> first codon node
|
|
205
|
+
left_context_node.transitions = {
|
|
206
|
+
left_context_node.sequence: codon_nodes[0]
|
|
207
|
+
}
|
|
208
|
+
codon_nodes[0].parents.add(
|
|
209
|
+
(left_context_node, left_context_node.sequence)
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Codon node -> next codon node
|
|
213
|
+
for i in range(1, len(codon_nodes)):
|
|
214
|
+
previous = codon_nodes[i - 1]
|
|
215
|
+
current = codon_nodes[i]
|
|
216
|
+
|
|
217
|
+
for codon in previous.codons:
|
|
218
|
+
previous.transitions[codon] = current
|
|
219
|
+
current.parents.add((previous, codon))
|
|
220
|
+
|
|
221
|
+
# Last codon node -> right context
|
|
222
|
+
last_codon_node = codon_nodes[-1]
|
|
223
|
+
for codon in last_codon_node.codons:
|
|
224
|
+
last_codon_node.transitions[codon] = right_context_node
|
|
225
|
+
right_context_node.parents.add((last_codon_node, codon))
|
|
226
|
+
|
|
227
|
+
# Right context -> end
|
|
228
|
+
right_context_node.transitions = {
|
|
229
|
+
right_context_node.sequence: end_node
|
|
230
|
+
}
|
|
231
|
+
end_node.parents.add(
|
|
232
|
+
(right_context_node, right_context_node.sequence)
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
self.left_context_node = left_context_node
|
|
236
|
+
self.right_context_node = right_context_node
|
|
237
|
+
self.end_node = end_node
|
|
238
|
+
self.codon_nodes = tuple(codon_nodes)
|
|
239
|
+
|
|
240
|
+
self.initial_node = left_context_node
|
|
241
|
+
self.final_node = end_node
|
|
242
|
+
|
|
243
|
+
@property
|
|
244
|
+
def nodes(self) -> Tuple[Node, ...]:
|
|
245
|
+
"""
|
|
246
|
+
All nodes in the graph, including context and end nodes.
|
|
247
|
+
"""
|
|
248
|
+
return (
|
|
249
|
+
self.left_context_node,
|
|
250
|
+
*self.codon_nodes,
|
|
251
|
+
self.right_context_node,
|
|
252
|
+
self.end_node,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
def view(self, seed: Optional[Seedable] = None) -> 'CodonGraphView':
|
|
256
|
+
"""
|
|
257
|
+
Return a constrained view over this graph.
|
|
258
|
+
|
|
259
|
+
Parameters
|
|
260
|
+
----------
|
|
261
|
+
seed
|
|
262
|
+
Seed used to initialise the view's random number generator.
|
|
263
|
+
rng
|
|
264
|
+
Random number generator used by the view for sampling.
|
|
265
|
+
"""
|
|
266
|
+
from codeine.graph.view import CodonGraphView
|
|
267
|
+
return CodonGraphView(self, seed=seed)
|