geney 1.4.22__py2.py3-none-any.whl → 1.4.24__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

geney/utils/SeqMats.py CHANGED
@@ -8,7 +8,7 @@ from typing import List, Tuple, Union, Optional
8
8
  from collections import defaultdict
9
9
  import numpy as np
10
10
  import pandas as pd
11
-
11
+ from geney.utils.utils import contains
12
12
 
13
13
 
14
14
  def format_mut_id(text):
@@ -105,7 +105,7 @@ class SeqMat:
105
105
  "name": self.name,
106
106
  "source": self.source,
107
107
  "version": self.version,
108
- "notes": self.notes
108
+ "notes": self.notes,
109
109
  }
110
110
 
111
111
  @property
@@ -159,38 +159,77 @@ class SeqMat:
159
159
  new.seq_array['valid_mask'] = new.seq_array['nt'] != b'-'
160
160
  return new
161
161
 
162
+ # def apply_mutations(
163
+ # self,
164
+ # mutations: Union[Tuple[float, str, str], List[Tuple[float, str, str]]],
165
+ # only_snps: bool = False
166
+ # ) -> SeqMat:
167
+
162
168
  def apply_mutations(
163
169
  self,
164
- mutations: Union[Tuple[float, str, str], List[Tuple[float, str, str]]],
170
+ mutations: Union[Tuple[float, str, str], List[Tuple[float, str, str]]] = None,
171
+ *,
172
+ pos: Optional[float] = None,
173
+ ref: Optional[str] = None,
174
+ alt: Optional[str] = None,
165
175
  only_snps: bool = False
166
- ) -> SeqMat:
176
+ ) -> SeqMat:
167
177
  """
168
178
  Apply one or a batch of mutations (pos, ref, alt) efficiently:
169
179
  - Supports a single tuple or a list of tuples
170
180
  - Assumes mutations sorted by position for vectorized searchsorted
171
181
  """
182
+ turn_back = False
183
+ if self.rev:
184
+ turn_back = True
185
+ self.reverse_complement()
186
+
172
187
  # Normalize to list
173
- if isinstance(mutations, tuple) and len(mutations) == 3:
188
+ # if isinstance(mutations, tuple) and len(mutations) == 3:
189
+ # mutations = [mutations]
190
+ # elif not isinstance(mutations, list):
191
+ # raise TypeError("mutations must be a tuple or list of tuples")
192
+ # Input normalization
193
+ if mutations is None:
194
+ if pos is None or ref is None or alt is None:
195
+ raise ValueError("Either `mutations` or `pos, ref, alt` must be provided")
196
+ mutations = [(pos, ref, alt)]
197
+ elif isinstance(mutations, tuple) and len(mutations) == 3:
174
198
  mutations = [mutations]
175
199
  elif not isinstance(mutations, list):
176
- raise TypeError("mutations must be a tuple or list of tuples")
177
-
178
- # Left-normalize and bucket
200
+ raise TypeError("`mutations` must be a tuple or list of tuples")
201
+
202
+ # # Left-normalize and bucket
203
+ # subs, ins, dels = [], [], []
204
+ # for pos, ref, alt in mutations:
205
+ # while ref and alt and ref[0] == alt[0]:
206
+ # pos += 1
207
+ # ref = ref[1:] or '-'
208
+ # alt = alt[1:] or '-'
209
+ # if ref != '-' and alt != '-':
210
+ # subs.append((pos, ref, alt))
211
+ # elif ref == '-' and alt != '-' and not only_snps:
212
+ # ins.append((pos, alt))
213
+ # elif alt == '-' and ref != '-' and not only_snps:
214
+ # dels.append((pos, ref))
215
+ # else:
216
+ # raise ValueError(f"Unsupported mutation {pos}:{ref}:{alt}.")
217
+ # Bucket mutations
179
218
  subs, ins, dels = [], [], []
180
- for pos, ref, alt in mutations:
181
- while ref and alt and ref[0] == alt[0]:
182
- pos += 1
183
- ref = ref[1:] or '-'
184
- alt = alt[1:] or '-'
185
- if ref != '-' and alt != '-':
186
- subs.append((pos, ref, alt))
187
- elif ref == '-' and alt != '-' and not only_snps:
188
- ins.append((pos, alt))
189
- elif alt == '-' and ref != '-' and not only_snps:
190
- dels.append((pos, ref))
191
- else:
192
- raise ValueError(f"Unsupported mutation {pos}:{ref}:{alt}.")
219
+ for p, r, a in mutations:
220
+ # left-normalize
221
+ while r and a and r[0] == a[0]:
222
+ p += 1; r = r[1:] or '-'; a = a[1:] or '-'
223
+
224
+ if not contains(self.index, p):
225
+ continue # Skip invalid positions
193
226
 
227
+ if r != '-' and a != '-': subs.append((p, r, a))
228
+ elif r == '-' and a != '-' and not only_snps: ins.append((p, a))
229
+ elif a == '-' and r != '-' and not only_snps: dels.append((p, r))
230
+ else: raise ValueError(f"Unsupported mutation {p}:{r}:{a}")
231
+
232
+ applied_mutations = 0
194
233
  # Ensure seq_array indices sorted
195
234
  coords = self.seq_array['index']
196
235
 
@@ -205,7 +244,7 @@ class SeqMat:
205
244
  actual = self.seq_array['ref'][idx:idx + length].tobytes().decode()
206
245
  raise ValueError(f"Ref mismatch at {pos}: expected {ref}, found {actual}")
207
246
  self.seq_array['nt'][idx:idx + length] = np.frombuffer(alt.encode(), dtype='S1')
208
-
247
+ applied_mutations += 1
209
248
  # 2) Bulk insertions
210
249
  if ins:
211
250
  ins.sort(key=lambda x: x[0])
@@ -219,6 +258,8 @@ class SeqMat:
219
258
  new_idx = pos + (cnt + i + 1) * eps
220
259
  new_rows.append((nt.encode(), new_idx, b'-', np.nan, True))
221
260
  self.insertion_counters[pos] += len(alt)
261
+ applied_mutations += 1
262
+
222
263
  merged = np.concatenate([self.seq_array, np.array(new_rows, dtype=self.seq_array.dtype)])
223
264
  merged.sort(order='index')
224
265
  self.seq_array = merged
@@ -232,10 +273,17 @@ class SeqMat:
232
273
  for (pos, ref), idx in zip(dels, idxs):
233
274
  length = len(ref)
234
275
  mask[idx:idx + length] = False
276
+ applied_mutations += 1
277
+
235
278
  self.seq_array = self.seq_array[mask]
236
279
 
237
280
  # Finalize valid mask
238
281
  self.seq_array['valid_mask'] = self.seq_array['nt'] != b'-'
282
+
283
+ if turn_back:
284
+ self.reverse_complement()
285
+
286
+ self.notes['total_mutations'] = self.notes.get('total_mutations', 0) + applied_mutations
239
287
  return self
240
288
 
241
289
  def complement(self) -> SeqMat:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.4.22
3
+ Version: 1.4.24
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -37,7 +37,7 @@ geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4w
37
37
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
38
38
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
39
39
  geney/utils/Fasta_segment.py,sha256=weB5NJ65P0XiyAJCiCHx4T9sHC1pWLpuQeOy0B85gyg,11364
40
- geney/utils/SeqMats.py,sha256=Hneqxz92WFrHi0lyHs2ZwTd091TtFclgybcvtUCktJA,15689
40
+ geney/utils/SeqMats.py,sha256=PaUp6PMKYDYZ8RTodmKAmCa9ywHnkqSEqTjPoATr82k,17616
41
41
  geney/utils/SeqMatsOld.py,sha256=syRU5DAuTh3xUfGW_qP9wlcBO5pHsG_y5PlrfXTIxUY,18502
42
42
  geney/utils/TranscriptLibrary.py,sha256=ma_ZVPgglxXDDneEvdqxxeqxG8eSFL-zgLUXyC6BqY8,2070
43
43
  geney/utils/__init__.py,sha256=-nJ-DMx1JzP-ZCe_QuQCeM0ZYIT_16jxoXDhUaO_4Oc,714
@@ -46,7 +46,7 @@ geney/utils/pangolin_utils.py,sha256=JQSPbWxdzqGFYfWQktkfLMaMSGR28eGQhNzO7MLMe5M
46
46
  geney/utils/spliceai_utils.py,sha256=VtrIbjyQxk_3lw86eWjftRYyal9OzxArJ0GV5u_ymTg,2721
47
47
  geney/utils/splicing_utils.py,sha256=vPCGnCPR1ooEZEHR79yFHLmRQXEJHXEQjjxpBR-YWOs,20635
48
48
  geney/utils/utils.py,sha256=m51Vd0cEbrcIHo6_8BAuI9YSPcKRs22e5LfVd2Qj6Is,2181
49
- geney-1.4.22.dist-info/METADATA,sha256=OH6exXPW8_IdusLX5g-xeLBXyyQx1DQyfZeguHjvyQY,990
50
- geney-1.4.22.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
51
- geney-1.4.22.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
52
- geney-1.4.22.dist-info/RECORD,,
49
+ geney-1.4.24.dist-info/METADATA,sha256=myrl1R2vXV7P8TYZX6rUP_xTDFfNBE9vrv2yAzUFpEM,990
50
+ geney-1.4.24.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
51
+ geney-1.4.24.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
52
+ geney-1.4.24.dist-info/RECORD,,
File without changes