geney 1.4.22__py2.py3-none-any.whl → 1.4.24__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
geney/utils/SeqMats.py
CHANGED
|
@@ -8,7 +8,7 @@ from typing import List, Tuple, Union, Optional
|
|
|
8
8
|
from collections import defaultdict
|
|
9
9
|
import numpy as np
|
|
10
10
|
import pandas as pd
|
|
11
|
-
|
|
11
|
+
from geney.utils.utils import contains
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def format_mut_id(text):
|
|
@@ -105,7 +105,7 @@ class SeqMat:
|
|
|
105
105
|
"name": self.name,
|
|
106
106
|
"source": self.source,
|
|
107
107
|
"version": self.version,
|
|
108
|
-
"notes": self.notes
|
|
108
|
+
"notes": self.notes,
|
|
109
109
|
}
|
|
110
110
|
|
|
111
111
|
@property
|
|
@@ -159,38 +159,77 @@ class SeqMat:
|
|
|
159
159
|
new.seq_array['valid_mask'] = new.seq_array['nt'] != b'-'
|
|
160
160
|
return new
|
|
161
161
|
|
|
162
|
+
# def apply_mutations(
|
|
163
|
+
# self,
|
|
164
|
+
# mutations: Union[Tuple[float, str, str], List[Tuple[float, str, str]]],
|
|
165
|
+
# only_snps: bool = False
|
|
166
|
+
# ) -> SeqMat:
|
|
167
|
+
|
|
162
168
|
def apply_mutations(
|
|
163
169
|
self,
|
|
164
|
-
mutations: Union[Tuple[float, str, str], List[Tuple[float, str, str]]],
|
|
170
|
+
mutations: Union[Tuple[float, str, str], List[Tuple[float, str, str]]] = None,
|
|
171
|
+
*,
|
|
172
|
+
pos: Optional[float] = None,
|
|
173
|
+
ref: Optional[str] = None,
|
|
174
|
+
alt: Optional[str] = None,
|
|
165
175
|
only_snps: bool = False
|
|
166
|
-
|
|
176
|
+
) -> SeqMat:
|
|
167
177
|
"""
|
|
168
178
|
Apply one or a batch of mutations (pos, ref, alt) efficiently:
|
|
169
179
|
- Supports a single tuple or a list of tuples
|
|
170
180
|
- Assumes mutations sorted by position for vectorized searchsorted
|
|
171
181
|
"""
|
|
182
|
+
turn_back = False
|
|
183
|
+
if self.rev:
|
|
184
|
+
turn_back = True
|
|
185
|
+
self.reverse_complement()
|
|
186
|
+
|
|
172
187
|
# Normalize to list
|
|
173
|
-
if isinstance(mutations, tuple) and len(mutations) == 3:
|
|
188
|
+
# if isinstance(mutations, tuple) and len(mutations) == 3:
|
|
189
|
+
# mutations = [mutations]
|
|
190
|
+
# elif not isinstance(mutations, list):
|
|
191
|
+
# raise TypeError("mutations must be a tuple or list of tuples")
|
|
192
|
+
# Input normalization
|
|
193
|
+
if mutations is None:
|
|
194
|
+
if pos is None or ref is None or alt is None:
|
|
195
|
+
raise ValueError("Either `mutations` or `pos, ref, alt` must be provided")
|
|
196
|
+
mutations = [(pos, ref, alt)]
|
|
197
|
+
elif isinstance(mutations, tuple) and len(mutations) == 3:
|
|
174
198
|
mutations = [mutations]
|
|
175
199
|
elif not isinstance(mutations, list):
|
|
176
|
-
raise TypeError("mutations must be a tuple or list of tuples")
|
|
177
|
-
|
|
178
|
-
# Left-normalize and bucket
|
|
200
|
+
raise TypeError("`mutations` must be a tuple or list of tuples")
|
|
201
|
+
|
|
202
|
+
# # Left-normalize and bucket
|
|
203
|
+
# subs, ins, dels = [], [], []
|
|
204
|
+
# for pos, ref, alt in mutations:
|
|
205
|
+
# while ref and alt and ref[0] == alt[0]:
|
|
206
|
+
# pos += 1
|
|
207
|
+
# ref = ref[1:] or '-'
|
|
208
|
+
# alt = alt[1:] or '-'
|
|
209
|
+
# if ref != '-' and alt != '-':
|
|
210
|
+
# subs.append((pos, ref, alt))
|
|
211
|
+
# elif ref == '-' and alt != '-' and not only_snps:
|
|
212
|
+
# ins.append((pos, alt))
|
|
213
|
+
# elif alt == '-' and ref != '-' and not only_snps:
|
|
214
|
+
# dels.append((pos, ref))
|
|
215
|
+
# else:
|
|
216
|
+
# raise ValueError(f"Unsupported mutation {pos}:{ref}:{alt}.")
|
|
217
|
+
# Bucket mutations
|
|
179
218
|
subs, ins, dels = [], [], []
|
|
180
|
-
for
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
if
|
|
186
|
-
|
|
187
|
-
elif ref == '-' and alt != '-' and not only_snps:
|
|
188
|
-
ins.append((pos, alt))
|
|
189
|
-
elif alt == '-' and ref != '-' and not only_snps:
|
|
190
|
-
dels.append((pos, ref))
|
|
191
|
-
else:
|
|
192
|
-
raise ValueError(f"Unsupported mutation {pos}:{ref}:{alt}.")
|
|
219
|
+
for p, r, a in mutations:
|
|
220
|
+
# left-normalize
|
|
221
|
+
while r and a and r[0] == a[0]:
|
|
222
|
+
p += 1; r = r[1:] or '-'; a = a[1:] or '-'
|
|
223
|
+
|
|
224
|
+
if not contains(self.index, p):
|
|
225
|
+
continue # Skip invalid positions
|
|
193
226
|
|
|
227
|
+
if r != '-' and a != '-': subs.append((p, r, a))
|
|
228
|
+
elif r == '-' and a != '-' and not only_snps: ins.append((p, a))
|
|
229
|
+
elif a == '-' and r != '-' and not only_snps: dels.append((p, r))
|
|
230
|
+
else: raise ValueError(f"Unsupported mutation {p}:{r}:{a}")
|
|
231
|
+
|
|
232
|
+
applied_mutations = 0
|
|
194
233
|
# Ensure seq_array indices sorted
|
|
195
234
|
coords = self.seq_array['index']
|
|
196
235
|
|
|
@@ -205,7 +244,7 @@ class SeqMat:
|
|
|
205
244
|
actual = self.seq_array['ref'][idx:idx + length].tobytes().decode()
|
|
206
245
|
raise ValueError(f"Ref mismatch at {pos}: expected {ref}, found {actual}")
|
|
207
246
|
self.seq_array['nt'][idx:idx + length] = np.frombuffer(alt.encode(), dtype='S1')
|
|
208
|
-
|
|
247
|
+
applied_mutations += 1
|
|
209
248
|
# 2) Bulk insertions
|
|
210
249
|
if ins:
|
|
211
250
|
ins.sort(key=lambda x: x[0])
|
|
@@ -219,6 +258,8 @@ class SeqMat:
|
|
|
219
258
|
new_idx = pos + (cnt + i + 1) * eps
|
|
220
259
|
new_rows.append((nt.encode(), new_idx, b'-', np.nan, True))
|
|
221
260
|
self.insertion_counters[pos] += len(alt)
|
|
261
|
+
applied_mutations += 1
|
|
262
|
+
|
|
222
263
|
merged = np.concatenate([self.seq_array, np.array(new_rows, dtype=self.seq_array.dtype)])
|
|
223
264
|
merged.sort(order='index')
|
|
224
265
|
self.seq_array = merged
|
|
@@ -232,10 +273,17 @@ class SeqMat:
|
|
|
232
273
|
for (pos, ref), idx in zip(dels, idxs):
|
|
233
274
|
length = len(ref)
|
|
234
275
|
mask[idx:idx + length] = False
|
|
276
|
+
applied_mutations += 1
|
|
277
|
+
|
|
235
278
|
self.seq_array = self.seq_array[mask]
|
|
236
279
|
|
|
237
280
|
# Finalize valid mask
|
|
238
281
|
self.seq_array['valid_mask'] = self.seq_array['nt'] != b'-'
|
|
282
|
+
|
|
283
|
+
if turn_back:
|
|
284
|
+
self.reverse_complement()
|
|
285
|
+
|
|
286
|
+
self.notes['total_mutations'] = self.notes.get('total_mutations', 0) + applied_mutations
|
|
239
287
|
return self
|
|
240
288
|
|
|
241
289
|
def complement(self) -> SeqMat:
|
|
@@ -37,7 +37,7 @@ geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4w
|
|
|
37
37
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
38
38
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
39
39
|
geney/utils/Fasta_segment.py,sha256=weB5NJ65P0XiyAJCiCHx4T9sHC1pWLpuQeOy0B85gyg,11364
|
|
40
|
-
geney/utils/SeqMats.py,sha256=
|
|
40
|
+
geney/utils/SeqMats.py,sha256=PaUp6PMKYDYZ8RTodmKAmCa9ywHnkqSEqTjPoATr82k,17616
|
|
41
41
|
geney/utils/SeqMatsOld.py,sha256=syRU5DAuTh3xUfGW_qP9wlcBO5pHsG_y5PlrfXTIxUY,18502
|
|
42
42
|
geney/utils/TranscriptLibrary.py,sha256=ma_ZVPgglxXDDneEvdqxxeqxG8eSFL-zgLUXyC6BqY8,2070
|
|
43
43
|
geney/utils/__init__.py,sha256=-nJ-DMx1JzP-ZCe_QuQCeM0ZYIT_16jxoXDhUaO_4Oc,714
|
|
@@ -46,7 +46,7 @@ geney/utils/pangolin_utils.py,sha256=JQSPbWxdzqGFYfWQktkfLMaMSGR28eGQhNzO7MLMe5M
|
|
|
46
46
|
geney/utils/spliceai_utils.py,sha256=VtrIbjyQxk_3lw86eWjftRYyal9OzxArJ0GV5u_ymTg,2721
|
|
47
47
|
geney/utils/splicing_utils.py,sha256=vPCGnCPR1ooEZEHR79yFHLmRQXEJHXEQjjxpBR-YWOs,20635
|
|
48
48
|
geney/utils/utils.py,sha256=m51Vd0cEbrcIHo6_8BAuI9YSPcKRs22e5LfVd2Qj6Is,2181
|
|
49
|
-
geney-1.4.
|
|
50
|
-
geney-1.4.
|
|
51
|
-
geney-1.4.
|
|
52
|
-
geney-1.4.
|
|
49
|
+
geney-1.4.24.dist-info/METADATA,sha256=myrl1R2vXV7P8TYZX6rUP_xTDFfNBE9vrv2yAzUFpEM,990
|
|
50
|
+
geney-1.4.24.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
51
|
+
geney-1.4.24.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
52
|
+
geney-1.4.24.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|