geney 1.4.7__py2.py3-none-any.whl → 1.4.8__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/Gene.py +1 -1
- geney/utils/SeqMats.py +22 -13
- {geney-1.4.7.dist-info → geney-1.4.8.dist-info}/METADATA +1 -1
- {geney-1.4.7.dist-info → geney-1.4.8.dist-info}/RECORD +6 -6
- {geney-1.4.7.dist-info → geney-1.4.8.dist-info}/WHEEL +0 -0
- {geney-1.4.7.dist-info → geney-1.4.8.dist-info}/top_level.txt +0 -0
geney/Gene.py
CHANGED
geney/utils/SeqMats.py
CHANGED
|
@@ -79,6 +79,7 @@ class SeqMat:
|
|
|
79
79
|
self.seq_array["cons"] = np.nan if conservation is None else conservation
|
|
80
80
|
self.seq_array["valid_mask"] = self.seq_array["nt"] != b"-"
|
|
81
81
|
self.insertion_counters = defaultdict(int)
|
|
82
|
+
self._pos_to_idx = {pos: i for i, pos in enumerate(self.seq_array["index"])}
|
|
82
83
|
|
|
83
84
|
self.source = source if source is not None else "Unknown"
|
|
84
85
|
self.notes = notes if notes is not None else {}
|
|
@@ -177,27 +178,35 @@ class SeqMat:
|
|
|
177
178
|
self.reverse_complement()
|
|
178
179
|
|
|
179
180
|
# Normalize shared prefix (similar to left-alignment in VCFs)
|
|
180
|
-
|
|
181
|
+
while ref and alt and ref[0] == alt[0]:
|
|
181
182
|
pos += 1
|
|
182
183
|
ref = ref[1:] or "-"
|
|
183
184
|
alt = alt[1:] or "-"
|
|
184
185
|
|
|
185
186
|
# Case 1: SNP or multi-base substitution
|
|
186
187
|
if ref != "-" and alt != "-":
|
|
187
|
-
if len(ref)
|
|
188
|
-
# print('Inserting SNP')
|
|
189
|
-
pos_idx = np.searchsorted(self.seq_array["index"], pos)
|
|
190
|
-
end_idx = pos_idx + len(ref)
|
|
191
|
-
if end_idx > len(self.seq_array):
|
|
192
|
-
raise ValueError(f"Substitution range exceeds sequence length at position {pos}.")
|
|
193
|
-
segment = self.seq_array["ref"][pos_idx:end_idx].tobytes().decode()
|
|
194
|
-
if segment != ref:
|
|
195
|
-
raise ValueError(f"Reference mismatch at position {pos}: expected '{ref}', found '{segment}'")
|
|
196
|
-
for i, nt in enumerate(alt):
|
|
197
|
-
self.seq_array["nt"][pos_idx + i] = nt.encode()
|
|
198
|
-
else:
|
|
188
|
+
if len(ref) != len(alt):
|
|
199
189
|
raise ValueError("Substitution mutations must have alleles of equal length.")
|
|
200
190
|
|
|
191
|
+
# print('Inserting SNP')
|
|
192
|
+
# pos_idx = np.searchsorted(self.seq_array["index"], pos)
|
|
193
|
+
pos_idx = self._pos_to_idx.get(pos)
|
|
194
|
+
if pos_idx is None:
|
|
195
|
+
raise ValueError(f"Position {pos} not found in index")
|
|
196
|
+
|
|
197
|
+
end_idx = pos_idx + len(ref)
|
|
198
|
+
if end_idx > len(self.seq_array):
|
|
199
|
+
raise ValueError(f"Substitution range exceeds sequence length at position {pos}.")
|
|
200
|
+
# segment = self.seq_array["ref"][pos_idx:end_idx].tobytes().decode()
|
|
201
|
+
# if segment != ref:
|
|
202
|
+
# raise ValueError(f"Reference mismatch at position {pos}: expected '{ref}', found '{segment}'")
|
|
203
|
+
ref_segment = self.seq_array["ref"][pos_idx:end_idx]
|
|
204
|
+
if not np.all(ref_segment == np.frombuffer(ref.encode(), dtype='S1')):
|
|
205
|
+
raise ValueError(f"Reference mismatch at position {pos}")
|
|
206
|
+
self.seq_array["nt"][pos_idx:end_idx] = np.frombuffer(alt.encode(), dtype='S1')
|
|
207
|
+
# for i, nt in enumerate(alt):
|
|
208
|
+
# self.seq_array["nt"][pos_idx + i] = nt.encode()
|
|
209
|
+
|
|
201
210
|
# Case 2: Insertion (ref is '-' means nothing was present, and we need to add bases)
|
|
202
211
|
elif ref == "-" and alt != "-":
|
|
203
212
|
if only_snps:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
geney/Fasta_segment.py,sha256=99HxNGNh_MfdVW6hhtlb1vOn7eSmT7oFoEfHDFMxG8w,11275
|
|
2
|
-
geney/Gene.py,sha256=
|
|
2
|
+
geney/Gene.py,sha256=QAST97ySepdZ3SnBzxIlVeJ3kQW8UEGkpNmxL_BwES0,7031
|
|
3
3
|
geney/Oncosplice.py,sha256=ETAvMl_Oq6mEJQHPNwdDO5csX6Ahuped_om10KifCyM,17739
|
|
4
4
|
geney/SeqMats.py,sha256=9-eJnfU2w3LGc0XvVvFEO_QrBneTkC6xkZKDfTcEw5o,19282
|
|
5
5
|
geney/SpliceSimulator.py,sha256=iF6feVeSnsKFmn3WV60CgWLI0_rSLgpq5fVFL1IOv_4,18491
|
|
@@ -37,7 +37,7 @@ geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4w
|
|
|
37
37
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
38
38
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
39
39
|
geney/utils/Fasta_segment.py,sha256=weB5NJ65P0XiyAJCiCHx4T9sHC1pWLpuQeOy0B85gyg,11364
|
|
40
|
-
geney/utils/SeqMats.py,sha256=
|
|
40
|
+
geney/utils/SeqMats.py,sha256=kpv5v_6VgUc72Qip3fLYZV_7Jb6dIoQLIrECL5BCBl4,18207
|
|
41
41
|
geney/utils/TranscriptLibrary.py,sha256=ma_ZVPgglxXDDneEvdqxxeqxG8eSFL-zgLUXyC6BqY8,2070
|
|
42
42
|
geney/utils/__init__.py,sha256=-nJ-DMx1JzP-ZCe_QuQCeM0ZYIT_16jxoXDhUaO_4Oc,714
|
|
43
43
|
geney/utils/mutation_utils.py,sha256=r-pHr56gEa5kh_DPX8MjFY3ZfYaOtyo4CUfJ5ZHlXPw,3243
|
|
@@ -45,7 +45,7 @@ geney/utils/pangolin_utils.py,sha256=JQSPbWxdzqGFYfWQktkfLMaMSGR28eGQhNzO7MLMe5M
|
|
|
45
45
|
geney/utils/spliceai_utils.py,sha256=VtrIbjyQxk_3lw86eWjftRYyal9OzxArJ0GV5u_ymTg,2721
|
|
46
46
|
geney/utils/splicing_utils.py,sha256=vPCGnCPR1ooEZEHR79yFHLmRQXEJHXEQjjxpBR-YWOs,20635
|
|
47
47
|
geney/utils/utils.py,sha256=m51Vd0cEbrcIHo6_8BAuI9YSPcKRs22e5LfVd2Qj6Is,2181
|
|
48
|
-
geney-1.4.
|
|
49
|
-
geney-1.4.
|
|
50
|
-
geney-1.4.
|
|
51
|
-
geney-1.4.
|
|
48
|
+
geney-1.4.8.dist-info/METADATA,sha256=VBMjJtnqbqt_cumsy9JEjo-ZHk-_KneCBR5-315suQ8,989
|
|
49
|
+
geney-1.4.8.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
50
|
+
geney-1.4.8.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
51
|
+
geney-1.4.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|