geney 1.4.23__py2.py3-none-any.whl → 1.4.25__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geney/utils/SeqMats.py
CHANGED
|
@@ -8,7 +8,7 @@ from typing import List, Tuple, Union, Optional
|
|
|
8
8
|
from collections import defaultdict
|
|
9
9
|
import numpy as np
|
|
10
10
|
import pandas as pd
|
|
11
|
-
|
|
11
|
+
from geney.utils.utils import contains
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def format_mut_id(text):
|
|
@@ -105,7 +105,7 @@ class SeqMat:
|
|
|
105
105
|
"name": self.name,
|
|
106
106
|
"source": self.source,
|
|
107
107
|
"version": self.version,
|
|
108
|
-
"notes": self.notes
|
|
108
|
+
"notes": self.notes,
|
|
109
109
|
}
|
|
110
110
|
|
|
111
111
|
@property
|
|
@@ -165,7 +165,7 @@ class SeqMat:
|
|
|
165
165
|
# only_snps: bool = False
|
|
166
166
|
# ) -> SeqMat:
|
|
167
167
|
|
|
168
|
-
def
|
|
168
|
+
def apply_mutation(
|
|
169
169
|
self,
|
|
170
170
|
mutations: Union[Tuple[float, str, str], List[Tuple[float, str, str]]] = None,
|
|
171
171
|
*,
|
|
@@ -179,6 +179,11 @@ class SeqMat:
|
|
|
179
179
|
- Supports a single tuple or a list of tuples
|
|
180
180
|
- Assumes mutations sorted by position for vectorized searchsorted
|
|
181
181
|
"""
|
|
182
|
+
turn_back = False
|
|
183
|
+
if self.rev:
|
|
184
|
+
turn_back = True
|
|
185
|
+
self.reverse_complement()
|
|
186
|
+
|
|
182
187
|
# Normalize to list
|
|
183
188
|
# if isinstance(mutations, tuple) and len(mutations) == 3:
|
|
184
189
|
# mutations = [mutations]
|
|
@@ -215,11 +220,16 @@ class SeqMat:
|
|
|
215
220
|
# left-normalize
|
|
216
221
|
while r and a and r[0] == a[0]:
|
|
217
222
|
p += 1; r = r[1:] or '-'; a = a[1:] or '-'
|
|
223
|
+
|
|
224
|
+
if not contains(self.index, p):
|
|
225
|
+
continue # Skip invalid positions
|
|
226
|
+
|
|
218
227
|
if r != '-' and a != '-': subs.append((p, r, a))
|
|
219
228
|
elif r == '-' and a != '-' and not only_snps: ins.append((p, a))
|
|
220
229
|
elif a == '-' and r != '-' and not only_snps: dels.append((p, r))
|
|
221
230
|
else: raise ValueError(f"Unsupported mutation {p}:{r}:{a}")
|
|
222
231
|
|
|
232
|
+
applied_mutations = 0
|
|
223
233
|
# Ensure seq_array indices sorted
|
|
224
234
|
coords = self.seq_array['index']
|
|
225
235
|
|
|
@@ -234,7 +244,7 @@ class SeqMat:
|
|
|
234
244
|
actual = self.seq_array['ref'][idx:idx + length].tobytes().decode()
|
|
235
245
|
raise ValueError(f"Ref mismatch at {pos}: expected {ref}, found {actual}")
|
|
236
246
|
self.seq_array['nt'][idx:idx + length] = np.frombuffer(alt.encode(), dtype='S1')
|
|
237
|
-
|
|
247
|
+
applied_mutations += 1
|
|
238
248
|
# 2) Bulk insertions
|
|
239
249
|
if ins:
|
|
240
250
|
ins.sort(key=lambda x: x[0])
|
|
@@ -248,6 +258,8 @@ class SeqMat:
|
|
|
248
258
|
new_idx = pos + (cnt + i + 1) * eps
|
|
249
259
|
new_rows.append((nt.encode(), new_idx, b'-', np.nan, True))
|
|
250
260
|
self.insertion_counters[pos] += len(alt)
|
|
261
|
+
applied_mutations += 1
|
|
262
|
+
|
|
251
263
|
merged = np.concatenate([self.seq_array, np.array(new_rows, dtype=self.seq_array.dtype)])
|
|
252
264
|
merged.sort(order='index')
|
|
253
265
|
self.seq_array = merged
|
|
@@ -261,10 +273,17 @@ class SeqMat:
|
|
|
261
273
|
for (pos, ref), idx in zip(dels, idxs):
|
|
262
274
|
length = len(ref)
|
|
263
275
|
mask[idx:idx + length] = False
|
|
276
|
+
applied_mutations += 1
|
|
277
|
+
|
|
264
278
|
self.seq_array = self.seq_array[mask]
|
|
265
279
|
|
|
266
280
|
# Finalize valid mask
|
|
267
281
|
self.seq_array['valid_mask'] = self.seq_array['nt'] != b'-'
|
|
282
|
+
|
|
283
|
+
if turn_back:
|
|
284
|
+
self.reverse_complement()
|
|
285
|
+
|
|
286
|
+
self.notes['total_mutations'] = self.notes.get('total_mutations', 0) + applied_mutations
|
|
268
287
|
return self
|
|
269
288
|
|
|
270
289
|
def complement(self) -> SeqMat:
|
|
@@ -37,7 +37,7 @@ geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4w
|
|
|
37
37
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
38
38
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
39
39
|
geney/utils/Fasta_segment.py,sha256=weB5NJ65P0XiyAJCiCHx4T9sHC1pWLpuQeOy0B85gyg,11364
|
|
40
|
-
geney/utils/SeqMats.py,sha256=
|
|
40
|
+
geney/utils/SeqMats.py,sha256=zymeGYvpuJOBlEBx6vrbh9b6y9t4egCmKnerrxxKU8s,17615
|
|
41
41
|
geney/utils/SeqMatsOld.py,sha256=syRU5DAuTh3xUfGW_qP9wlcBO5pHsG_y5PlrfXTIxUY,18502
|
|
42
42
|
geney/utils/TranscriptLibrary.py,sha256=ma_ZVPgglxXDDneEvdqxxeqxG8eSFL-zgLUXyC6BqY8,2070
|
|
43
43
|
geney/utils/__init__.py,sha256=-nJ-DMx1JzP-ZCe_QuQCeM0ZYIT_16jxoXDhUaO_4Oc,714
|
|
@@ -46,7 +46,7 @@ geney/utils/pangolin_utils.py,sha256=JQSPbWxdzqGFYfWQktkfLMaMSGR28eGQhNzO7MLMe5M
|
|
|
46
46
|
geney/utils/spliceai_utils.py,sha256=VtrIbjyQxk_3lw86eWjftRYyal9OzxArJ0GV5u_ymTg,2721
|
|
47
47
|
geney/utils/splicing_utils.py,sha256=vPCGnCPR1ooEZEHR79yFHLmRQXEJHXEQjjxpBR-YWOs,20635
|
|
48
48
|
geney/utils/utils.py,sha256=m51Vd0cEbrcIHo6_8BAuI9YSPcKRs22e5LfVd2Qj6Is,2181
|
|
49
|
-
geney-1.4.
|
|
50
|
-
geney-1.4.
|
|
51
|
-
geney-1.4.
|
|
52
|
-
geney-1.4.
|
|
49
|
+
geney-1.4.25.dist-info/METADATA,sha256=iZ5UDkA4ibY03jtqec5n4grbfchBJxsf2Qpnw7KZW5c,990
|
|
50
|
+
geney-1.4.25.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
51
|
+
geney-1.4.25.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
52
|
+
geney-1.4.25.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|