geney 1.4.6__py2.py3-none-any.whl → 1.4.8__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

geney/Gene.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import copy
2
2
  # import random
3
- from . import config
3
+ from . import H
4
4
  from typing import Any, Dict, List, Tuple, Optional, Iterator, Union, TYPE_CHECKING
5
5
  from collections import Counter
6
6
  from .utils.utils import unload_pickle
geney/Transcript.py CHANGED
@@ -4,7 +4,7 @@ import numpy as np
4
4
  import copy
5
5
  from Bio.Seq import Seq # Assuming Biopython is used
6
6
  from . import config
7
- from .utils import unload_pickle
7
+ from .utils.utils import unload_pickle
8
8
  from .utils.SeqMats import SeqMat #, MutSeqMat
9
9
  from .utils.Fasta_segment import Fasta_segment
10
10
 
geney/utils/SeqMats.py CHANGED
@@ -79,6 +79,7 @@ class SeqMat:
79
79
  self.seq_array["cons"] = np.nan if conservation is None else conservation
80
80
  self.seq_array["valid_mask"] = self.seq_array["nt"] != b"-"
81
81
  self.insertion_counters = defaultdict(int)
82
+ self._pos_to_idx = {pos: i for i, pos in enumerate(self.seq_array["index"])}
82
83
 
83
84
  self.source = source if source is not None else "Unknown"
84
85
  self.notes = notes if notes is not None else {}
@@ -177,27 +178,35 @@ class SeqMat:
177
178
  self.reverse_complement()
178
179
 
179
180
  # Normalize shared prefix (similar to left-alignment in VCFs)
180
- if ref and alt and ref[0] == alt[0]:
181
+ while ref and alt and ref[0] == alt[0]:
181
182
  pos += 1
182
183
  ref = ref[1:] or "-"
183
184
  alt = alt[1:] or "-"
184
185
 
185
186
  # Case 1: SNP or multi-base substitution
186
187
  if ref != "-" and alt != "-":
187
- if len(ref) == len(alt):
188
- # print('Inserting SNP')
189
- pos_idx = np.searchsorted(self.seq_array["index"], pos)
190
- end_idx = pos_idx + len(ref)
191
- if end_idx > len(self.seq_array):
192
- raise ValueError(f"Substitution range exceeds sequence length at position {pos}.")
193
- segment = self.seq_array["ref"][pos_idx:end_idx].tobytes().decode()
194
- if segment != ref:
195
- raise ValueError(f"Reference mismatch at position {pos}: expected '{ref}', found '{segment}'")
196
- for i, nt in enumerate(alt):
197
- self.seq_array["nt"][pos_idx + i] = nt.encode()
198
- else:
188
+ if len(ref) != len(alt):
199
189
  raise ValueError("Substitution mutations must have alleles of equal length.")
200
190
 
191
+ # print('Inserting SNP')
192
+ # pos_idx = np.searchsorted(self.seq_array["index"], pos)
193
+ pos_idx = self._pos_to_idx.get(pos)
194
+ if pos_idx is None:
195
+ raise ValueError(f"Position {pos} not found in index")
196
+
197
+ end_idx = pos_idx + len(ref)
198
+ if end_idx > len(self.seq_array):
199
+ raise ValueError(f"Substitution range exceeds sequence length at position {pos}.")
200
+ # segment = self.seq_array["ref"][pos_idx:end_idx].tobytes().decode()
201
+ # if segment != ref:
202
+ # raise ValueError(f"Reference mismatch at position {pos}: expected '{ref}', found '{segment}'")
203
+ ref_segment = self.seq_array["ref"][pos_idx:end_idx]
204
+ if not np.all(ref_segment == np.frombuffer(ref.encode(), dtype='S1')):
205
+ raise ValueError(f"Reference mismatch at position {pos}")
206
+ self.seq_array["nt"][pos_idx:end_idx] = np.frombuffer(alt.encode(), dtype='S1')
207
+ # for i, nt in enumerate(alt):
208
+ # self.seq_array["nt"][pos_idx + i] = nt.encode()
209
+
201
210
  # Case 2: Insertion (ref is '-' means nothing was present, and we need to add bases)
202
211
  elif ref == "-" and alt != "-":
203
212
  if only_snps:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.4.6
3
+ Version: 1.4.8
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -1,9 +1,9 @@
1
1
  geney/Fasta_segment.py,sha256=99HxNGNh_MfdVW6hhtlb1vOn7eSmT7oFoEfHDFMxG8w,11275
2
- geney/Gene.py,sha256=6x1sEZV50Il4oydegW6iHIF12EZTGexniG3YUD-3DfM,7036
2
+ geney/Gene.py,sha256=QAST97ySepdZ3SnBzxIlVeJ3kQW8UEGkpNmxL_BwES0,7031
3
3
  geney/Oncosplice.py,sha256=ETAvMl_Oq6mEJQHPNwdDO5csX6Ahuped_om10KifCyM,17739
4
4
  geney/SeqMats.py,sha256=9-eJnfU2w3LGc0XvVvFEO_QrBneTkC6xkZKDfTcEw5o,19282
5
5
  geney/SpliceSimulator.py,sha256=iF6feVeSnsKFmn3WV60CgWLI0_rSLgpq5fVFL1IOv_4,18491
6
- geney/Transcript.py,sha256=Wu0UiubFOdasfPCpe9uGfhPDG4MNks5LzUqGzo85ong,14458
6
+ geney/Transcript.py,sha256=Ltlcnp93s3HxMiweUuyc4Ri3QT42l1qUtiBYH3RITFs,14464
7
7
  geney/__init__.py,sha256=YLWXJS53yeryp6nVhCgFg3_Du9Guj9y3iSrdfx61q5Y,3017
8
8
  geney/_config_setup.py,sha256=nblcGU3HIt8YjdrAoGfbEVKRxwJKv0PikJ5-7AL6axQ,723
9
9
  geney/_graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
@@ -37,7 +37,7 @@ geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4w
37
37
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
38
38
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
39
39
  geney/utils/Fasta_segment.py,sha256=weB5NJ65P0XiyAJCiCHx4T9sHC1pWLpuQeOy0B85gyg,11364
40
- geney/utils/SeqMats.py,sha256=vjU0lTkB0s0RoLjNXLqt0kJQDni-it09-iAOv5QAYFs,17686
40
+ geney/utils/SeqMats.py,sha256=kpv5v_6VgUc72Qip3fLYZV_7Jb6dIoQLIrECL5BCBl4,18207
41
41
  geney/utils/TranscriptLibrary.py,sha256=ma_ZVPgglxXDDneEvdqxxeqxG8eSFL-zgLUXyC6BqY8,2070
42
42
  geney/utils/__init__.py,sha256=-nJ-DMx1JzP-ZCe_QuQCeM0ZYIT_16jxoXDhUaO_4Oc,714
43
43
  geney/utils/mutation_utils.py,sha256=r-pHr56gEa5kh_DPX8MjFY3ZfYaOtyo4CUfJ5ZHlXPw,3243
@@ -45,7 +45,7 @@ geney/utils/pangolin_utils.py,sha256=JQSPbWxdzqGFYfWQktkfLMaMSGR28eGQhNzO7MLMe5M
45
45
  geney/utils/spliceai_utils.py,sha256=VtrIbjyQxk_3lw86eWjftRYyal9OzxArJ0GV5u_ymTg,2721
46
46
  geney/utils/splicing_utils.py,sha256=vPCGnCPR1ooEZEHR79yFHLmRQXEJHXEQjjxpBR-YWOs,20635
47
47
  geney/utils/utils.py,sha256=m51Vd0cEbrcIHo6_8BAuI9YSPcKRs22e5LfVd2Qj6Is,2181
48
- geney-1.4.6.dist-info/METADATA,sha256=6l8_LlR20Qo1KIhiBTve1A1kWax1IYHfYCyqv1dsMUk,989
49
- geney-1.4.6.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
50
- geney-1.4.6.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
51
- geney-1.4.6.dist-info/RECORD,,
48
+ geney-1.4.8.dist-info/METADATA,sha256=VBMjJtnqbqt_cumsy9JEjo-ZHk-_KneCBR5-315suQ8,989
49
+ geney-1.4.8.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
50
+ geney-1.4.8.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
51
+ geney-1.4.8.dist-info/RECORD,,
File without changes