geney 1.4.23__py2.py3-none-any.whl → 1.4.25__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geney/utils/SeqMats.py CHANGED
@@ -8,7 +8,7 @@ from typing import List, Tuple, Union, Optional
8
8
  from collections import defaultdict
9
9
  import numpy as np
10
10
  import pandas as pd
11
-
11
+ from geney.utils.utils import contains
12
12
 
13
13
 
14
14
  def format_mut_id(text):
@@ -105,7 +105,7 @@ class SeqMat:
105
105
  "name": self.name,
106
106
  "source": self.source,
107
107
  "version": self.version,
108
- "notes": self.notes
108
+ "notes": self.notes,
109
109
  }
110
110
 
111
111
  @property
@@ -165,7 +165,7 @@ class SeqMat:
165
165
  # only_snps: bool = False
166
166
  # ) -> SeqMat:
167
167
 
168
- def apply_mutations(
168
+ def apply_mutation(
169
169
  self,
170
170
  mutations: Union[Tuple[float, str, str], List[Tuple[float, str, str]]] = None,
171
171
  *,
@@ -179,6 +179,11 @@ class SeqMat:
179
179
  - Supports a single tuple or a list of tuples
180
180
  - Assumes mutations sorted by position for vectorized searchsorted
181
181
  """
182
+ turn_back = False
183
+ if self.rev:
184
+ turn_back = True
185
+ self.reverse_complement()
186
+
182
187
  # Normalize to list
183
188
  # if isinstance(mutations, tuple) and len(mutations) == 3:
184
189
  # mutations = [mutations]
@@ -215,11 +220,16 @@ class SeqMat:
215
220
  # left-normalize
216
221
  while r and a and r[0] == a[0]:
217
222
  p += 1; r = r[1:] or '-'; a = a[1:] or '-'
223
+
224
+ if not contains(self.index, p):
225
+ continue # Skip invalid positions
226
+
218
227
  if r != '-' and a != '-': subs.append((p, r, a))
219
228
  elif r == '-' and a != '-' and not only_snps: ins.append((p, a))
220
229
  elif a == '-' and r != '-' and not only_snps: dels.append((p, r))
221
230
  else: raise ValueError(f"Unsupported mutation {p}:{r}:{a}")
222
231
 
232
+ applied_mutations = 0
223
233
  # Ensure seq_array indices sorted
224
234
  coords = self.seq_array['index']
225
235
 
@@ -234,7 +244,7 @@ class SeqMat:
234
244
  actual = self.seq_array['ref'][idx:idx + length].tobytes().decode()
235
245
  raise ValueError(f"Ref mismatch at {pos}: expected {ref}, found {actual}")
236
246
  self.seq_array['nt'][idx:idx + length] = np.frombuffer(alt.encode(), dtype='S1')
237
-
247
+ applied_mutations += 1
238
248
  # 2) Bulk insertions
239
249
  if ins:
240
250
  ins.sort(key=lambda x: x[0])
@@ -248,6 +258,8 @@ class SeqMat:
248
258
  new_idx = pos + (cnt + i + 1) * eps
249
259
  new_rows.append((nt.encode(), new_idx, b'-', np.nan, True))
250
260
  self.insertion_counters[pos] += len(alt)
261
+ applied_mutations += 1
262
+
251
263
  merged = np.concatenate([self.seq_array, np.array(new_rows, dtype=self.seq_array.dtype)])
252
264
  merged.sort(order='index')
253
265
  self.seq_array = merged
@@ -261,10 +273,17 @@ class SeqMat:
261
273
  for (pos, ref), idx in zip(dels, idxs):
262
274
  length = len(ref)
263
275
  mask[idx:idx + length] = False
276
+ applied_mutations += 1
277
+
264
278
  self.seq_array = self.seq_array[mask]
265
279
 
266
280
  # Finalize valid mask
267
281
  self.seq_array['valid_mask'] = self.seq_array['nt'] != b'-'
282
+
283
+ if turn_back:
284
+ self.reverse_complement()
285
+
286
+ self.notes['total_mutations'] = self.notes.get('total_mutations', 0) + applied_mutations
268
287
  return self
269
288
 
270
289
  def complement(self) -> SeqMat:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.4.23
3
+ Version: 1.4.25
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -37,7 +37,7 @@ geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4w
37
37
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
38
38
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
39
39
  geney/utils/Fasta_segment.py,sha256=weB5NJ65P0XiyAJCiCHx4T9sHC1pWLpuQeOy0B85gyg,11364
40
- geney/utils/SeqMats.py,sha256=4NJMXNDw6XQAaKVpNuEIft1Xa1sSxPSFwnIeMzjI3eE,17058
40
+ geney/utils/SeqMats.py,sha256=zymeGYvpuJOBlEBx6vrbh9b6y9t4egCmKnerrxxKU8s,17615
41
41
  geney/utils/SeqMatsOld.py,sha256=syRU5DAuTh3xUfGW_qP9wlcBO5pHsG_y5PlrfXTIxUY,18502
42
42
  geney/utils/TranscriptLibrary.py,sha256=ma_ZVPgglxXDDneEvdqxxeqxG8eSFL-zgLUXyC6BqY8,2070
43
43
  geney/utils/__init__.py,sha256=-nJ-DMx1JzP-ZCe_QuQCeM0ZYIT_16jxoXDhUaO_4Oc,714
@@ -46,7 +46,7 @@ geney/utils/pangolin_utils.py,sha256=JQSPbWxdzqGFYfWQktkfLMaMSGR28eGQhNzO7MLMe5M
46
46
  geney/utils/spliceai_utils.py,sha256=VtrIbjyQxk_3lw86eWjftRYyal9OzxArJ0GV5u_ymTg,2721
47
47
  geney/utils/splicing_utils.py,sha256=vPCGnCPR1ooEZEHR79yFHLmRQXEJHXEQjjxpBR-YWOs,20635
48
48
  geney/utils/utils.py,sha256=m51Vd0cEbrcIHo6_8BAuI9YSPcKRs22e5LfVd2Qj6Is,2181
49
- geney-1.4.23.dist-info/METADATA,sha256=cRWawSdfae-X2F-k7AFedHrQd_OhD2zBLjkA2zzNLrs,990
50
- geney-1.4.23.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
51
- geney-1.4.23.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
52
- geney-1.4.23.dist-info/RECORD,,
49
+ geney-1.4.25.dist-info/METADATA,sha256=iZ5UDkA4ibY03jtqec5n4grbfchBJxsf2Qpnw7KZW5c,990
50
+ geney-1.4.25.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
51
+ geney-1.4.25.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
52
+ geney-1.4.25.dist-info/RECORD,,
File without changes