codeine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeine/graph/base.py ADDED
@@ -0,0 +1,267 @@
1
+ from __future__ import annotations
2
+
3
+ import random
4
+
5
+ from collections import Counter
6
+ from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
7
+
8
+ from codeine.graph.nodes import CodonNode, ContextNode, EndNode, Node
9
+ from codeine.translation.tables import TranslationTable
10
+ from codeine.translation.weights import CodonWeights
11
+ from codeine.utils.display import format_restrictions
12
+ from codeine.utils.sampling import Seedable
13
+
14
+ if TYPE_CHECKING:
15
+ from codeine.graph.view import CodonGraphView
16
+
17
+ CodonRestriction = Union[str, Sequence[str]]
18
+
19
+
20
+ class CodonGraph:
21
+ """
22
+ Class representing a graph of codon nodes.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ aa_seq: str,
28
+ codon_restrictions: Optional[Dict[int, CodonRestriction]] = None,
29
+ translation_table: Optional[TranslationTable] = None,
30
+ weights: Optional[CodonWeights] = None,
31
+ context_l: str = '',
32
+ context_r: str = '',
33
+ ) -> None:
34
+ if len(aa_seq) == 0:
35
+ raise ValueError('Please provide non-empty sequence!')
36
+
37
+ if translation_table is None:
38
+ rna = weights.rna if weights is not None else False
39
+ translation_table = TranslationTable(table_id=1, rna=rna)
40
+
41
+ if weights is None:
42
+ weights = CodonWeights.uniform(table=translation_table, rna=translation_table.rna)
43
+
44
+ self.validate_codon_weights(weights, translation_table)
45
+
46
+ self.tt = translation_table
47
+ self.cw = weights
48
+
49
+ self.aa_seq = aa_seq.upper()
50
+ self.validate_aa_seq()
51
+
52
+ self.codon_restrictions = {}
53
+ self.codon_restrictions = self.validate_codon_restrictions(codon_restrictions)
54
+
55
+ self.context_l = self.tt.normalise_sequence(context_l)
56
+ self.context_r = self.tt.normalise_sequence(context_r)
57
+
58
+ self.left_context_node = None
59
+ self.right_context_node = None
60
+ self.end_node = None
61
+
62
+ self.codon_nodes: Tuple[CodonNode, ...] = ()
63
+
64
+ self.initial_node = None
65
+ self.final_node = None
66
+
67
+ self._initialise_graph()
68
+
69
+ def __repr__(self) -> str:
70
+ molecule = 'RNA' if self.tt.rna else 'DNA'
71
+
72
+ lines = [
73
+ f'{type(self).__name__}',
74
+ '',
75
+ f'Translation table: {self.tt.table_id} ({self.tt.name})',
76
+ f'Molecule type: {molecule}',
77
+ '',
78
+ f'Amino acid sequence ({len(self.aa_seq)} aa)',
79
+ f'{self.aa_seq}',
80
+ ''
81
+ ]
82
+ if self.codon_restrictions:
83
+ lines += [
84
+ 'Codon restrictions:',
85
+ *format_restrictions(
86
+ self.codon_restrictions,
87
+ label='restricted positions',
88
+ ),
89
+ '',
90
+ ]
91
+
92
+ return '\n'.join(lines)
93
+
94
+ def validate_aa_seq(self) -> None:
95
+ """
96
+ Check that all amino acids in the sequence are supported.
97
+ """
98
+ for pos, aa in enumerate(self.aa_seq, start=1):
99
+ if aa not in self.tt.aa_to_codons:
100
+ raise ValueError(f'Invalid amino acid {aa} at position {pos}.')
101
+
102
+ def validate_codon_restrictions(
103
+ self,
104
+ codon_restrictions: Optional[Dict[int, CodonRestriction]],
105
+ ) -> Dict[int, List[str]]:
106
+ """
107
+ Check the inputted restrictions make sense!
108
+ """
109
+ codon_restrictions = codon_restrictions or {}
110
+ normalised = {}
111
+
112
+ for pos, codon_restriction in codon_restrictions.items():
113
+ if pos < 1 or pos > len(self.aa_seq):
114
+ raise ValueError(f'Restricted position {pos} is out of range.')
115
+
116
+ if isinstance(codon_restriction, str):
117
+ codons = [codon_restriction]
118
+ else:
119
+ codons = list(codon_restriction)
120
+
121
+ if len(codons) == 0:
122
+ raise ValueError(f'Codon restriction at position {pos} cannot be empty.')
123
+
124
+ codons = [self.tt.normalise_sequence(codon) for codon in codons]
125
+
126
+ aa = self.aa_seq[pos - 1]
127
+
128
+ if pos in self.codon_restrictions:
129
+ allowed_codons = [self.tt.normalise_sequence(codon) for codon in self.codon_restrictions[pos]]
130
+ else:
131
+ allowed_codons = self.tt.aa_to_codons[aa]
132
+
133
+ for codon in codons:
134
+ if codon not in allowed_codons:
135
+ raise ValueError(f'Codon {codon} is not valid for amino acid {aa} at position {pos}.')
136
+
137
+ normalised[pos] = codons
138
+
139
+ return normalised
140
+
141
+ @staticmethod
142
+ def validate_codon_weights(weights: CodonWeights, translation_table: TranslationTable) -> None:
143
+ """
144
+ Check that codon weights are compatible with the provided translation table.
145
+
146
+ Parameters
147
+ ----------
148
+ weights
149
+ The codon weights.
150
+ translation_table
151
+ The translation table.
152
+
153
+ Raises
154
+ -------
155
+ Various errors if things aren't good.
156
+ """
157
+ if weights.rna != translation_table.rna:
158
+ raise ValueError('Codon weights and translation table use different molecule types.')
159
+
160
+ expected_codons = {
161
+ aa: Counter(codons)
162
+ for aa, codons in translation_table.aa_to_codons.items()
163
+ }
164
+
165
+ actual_codons = {
166
+ aa: Counter(codons)
167
+ for aa, codons in weights.aa_to_codons.items()
168
+ }
169
+
170
+ if actual_codons != expected_codons:
171
+ raise ValueError('Codon weights and translation table do not match.')
172
+
173
+ def codon_node_by_pos(self, pos: int) -> CodonNode:
174
+ """
175
+ Return the codon node at a given amino-acid position.
176
+
177
+ Positioning is 1-based.
178
+ """
179
+ if pos < 1 or pos > len(self.codon_nodes):
180
+ raise ValueError(f'Position {pos} is out of range.')
181
+
182
+ return self.codon_nodes[pos - 1]
183
+
184
+ def _initialise_graph(self) -> None:
185
+ """
186
+ Initialise the codon graph.
187
+ """
188
+ left_context_node = ContextNode(0, self.context_l)
189
+ right_context_node = ContextNode(len(self.aa_seq) + 1, self.context_r)
190
+ end_node = EndNode()
191
+
192
+ codon_nodes = []
193
+ for ix, aa in enumerate(self.aa_seq):
194
+ pos = ix + 1
195
+
196
+ if pos in self.codon_restrictions:
197
+ codons = self.codon_restrictions[pos]
198
+ else:
199
+ codons = self.tt.aa_to_codons[aa]
200
+
201
+ node = CodonNode(pos, aa, codons)
202
+ codon_nodes.append(node)
203
+
204
+ # Left context -> first codon node
205
+ left_context_node.transitions = {
206
+ left_context_node.sequence: codon_nodes[0]
207
+ }
208
+ codon_nodes[0].parents.add(
209
+ (left_context_node, left_context_node.sequence)
210
+ )
211
+
212
+ # Codon node -> next codon node
213
+ for i in range(1, len(codon_nodes)):
214
+ previous = codon_nodes[i - 1]
215
+ current = codon_nodes[i]
216
+
217
+ for codon in previous.codons:
218
+ previous.transitions[codon] = current
219
+ current.parents.add((previous, codon))
220
+
221
+ # Last codon node -> right context
222
+ last_codon_node = codon_nodes[-1]
223
+ for codon in last_codon_node.codons:
224
+ last_codon_node.transitions[codon] = right_context_node
225
+ right_context_node.parents.add((last_codon_node, codon))
226
+
227
+ # Right context -> end
228
+ right_context_node.transitions = {
229
+ right_context_node.sequence: end_node
230
+ }
231
+ end_node.parents.add(
232
+ (right_context_node, right_context_node.sequence)
233
+ )
234
+
235
+ self.left_context_node = left_context_node
236
+ self.right_context_node = right_context_node
237
+ self.end_node = end_node
238
+ self.codon_nodes = tuple(codon_nodes)
239
+
240
+ self.initial_node = left_context_node
241
+ self.final_node = end_node
242
+
243
+ @property
244
+ def nodes(self) -> Tuple[Node, ...]:
245
+ """
246
+ All nodes in the graph, including context and end nodes.
247
+ """
248
+ return (
249
+ self.left_context_node,
250
+ *self.codon_nodes,
251
+ self.right_context_node,
252
+ self.end_node,
253
+ )
254
+
255
+ def view(self, seed: Optional[Seedable] = None) -> 'CodonGraphView':
256
+ """
257
+ Return a constrained view over this graph.
258
+
259
+ Parameters
260
+ ----------
261
+ seed
262
+ Seed used to initialise the view's random number generator.
263
+ rng
264
+ Random number generator used by the view for sampling.
265
+ """
266
+ from codeine.graph.view import CodonGraphView
267
+ return CodonGraphView(self, seed=seed)