geney 1.3.8__py2.py3-none-any.whl → 1.3.10__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/allele_linkage.py +0 -0
- geney/oncosplice.py +1 -1
- geney/splicing_utils.py +135 -11
- {geney-1.3.8.dist-info → geney-1.3.10.dist-info}/METADATA +4 -7
- {geney-1.3.8.dist-info → geney-1.3.10.dist-info}/RECORD +7 -6
- {geney-1.3.8.dist-info → geney-1.3.10.dist-info}/WHEEL +1 -1
- {geney-1.3.8.dist-info → geney-1.3.10.dist-info}/top_level.txt +0 -0
geney/allele_linkage.py
ADDED
|
File without changes
|
geney/oncosplice.py
CHANGED
|
@@ -365,7 +365,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
365
365
|
report['isoform_id'] = short_hash_of_list(mutated_transcript.exons)
|
|
366
366
|
report['isoform_prevalence'] = new_boundaries['path_weight']
|
|
367
367
|
report['full_missplicing'] = missplicing.aberrant_splicing
|
|
368
|
-
report['missplicing'] = missplicing.max_delta
|
|
368
|
+
report['missplicing'] = missplicing.max_delta()
|
|
369
369
|
report['reference_resemblance'] = reference_gene_proteins.get(mutated_transcript.protein, None)
|
|
370
370
|
results.append(report)
|
|
371
371
|
|
geney/splicing_utils.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
2
3
|
from .Gene import Gene
|
|
3
4
|
from .SeqMats import MutSeqMat
|
|
4
5
|
from collections import defaultdict
|
|
@@ -204,7 +205,7 @@ def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict
|
|
|
204
205
|
return donor_probs, acceptor_probs
|
|
205
206
|
|
|
206
207
|
|
|
207
|
-
def find_transcript_missplicing(mut_id, transcript=
|
|
208
|
+
def find_transcript_missplicing(mut_id, transcript=None, threshold=0.5, engine='spliceai', organism='hg38'):
|
|
208
209
|
gene = Gene.from_file(mut_id.split(':')[0], organism=organism)
|
|
209
210
|
reference_transcript = gene.transcript(transcript) if transcript is not None else gene.transcript()
|
|
210
211
|
variant_transcript = reference_transcript.clone()
|
|
@@ -217,7 +218,7 @@ def find_transcript_missplicing(mut_id, transcript='primary', threshold=0.5, eng
|
|
|
217
218
|
for mutation in mutations:
|
|
218
219
|
variant_transcript.mutate(mutation, inplace=True)
|
|
219
220
|
|
|
220
|
-
return find_transcript_missplicing_seqs(reference_transcript.get_context(center, 7500), variant_transcript.get_context(center, 7500), reference_transcript.donors, reference_transcript.acceptors, threshold=threshold, engine=engine)
|
|
221
|
+
return find_transcript_missplicing_seqs(reference_transcript.pre_mrna.get_context(center, 7500), variant_transcript.pre_mrna.get_context(center, 7500), reference_transcript.donors, reference_transcript.acceptors, threshold=threshold, engine=engine)
|
|
221
222
|
|
|
222
223
|
|
|
223
224
|
# from functools import reduce
|
|
@@ -300,7 +301,7 @@ def find_transcript_missplicing(mut_id, transcript='primary', threshold=0.5, eng
|
|
|
300
301
|
# missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap, 'discovered_donors': idp}
|
|
301
302
|
# missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
|
|
302
303
|
# temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
|
|
303
|
-
return temp
|
|
304
|
+
# return temp
|
|
304
305
|
|
|
305
306
|
|
|
306
307
|
def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, threshold=0.5, engine='spliceai'):
|
|
@@ -351,6 +352,91 @@ def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, thresh
|
|
|
351
352
|
return Missplicing(temp, threshold=threshold)
|
|
352
353
|
|
|
353
354
|
|
|
355
|
+
def process_pairwise_epistasis(mids, engine='pangolin', fprint=False):
|
|
356
|
+
results = []
|
|
357
|
+
for mid in mids:
|
|
358
|
+
m1, m2 = mid.split('|')
|
|
359
|
+
missplicing1 = find_transcript_missplicing(m1, threshold=0.25, engine=engine)
|
|
360
|
+
missplicing2 = find_transcript_missplicing(m2, threshold=0.25, engine=engine)
|
|
361
|
+
missplicing_both = find_transcript_missplicing(mid, threshold=0.25, engine=engine)
|
|
362
|
+
|
|
363
|
+
if fprint:
|
|
364
|
+
print(missplicing1)
|
|
365
|
+
print(missplicing2)
|
|
366
|
+
print(missplicing_both)
|
|
367
|
+
|
|
368
|
+
max_delta = 0
|
|
369
|
+
flag = {'mut_id': mid, 'corrective': 0, 'cummulative': 0}
|
|
370
|
+
cummulative_deltas, corrective_deltas = [], []
|
|
371
|
+
ms1, ms2, msb = [], [], []
|
|
372
|
+
increased_canonical_splicing = False
|
|
373
|
+
events = ['missed_acceptors', 'missed_donors', 'discovered_acceptors', 'discovered_donors']
|
|
374
|
+
for event in events:
|
|
375
|
+
m1, m2, mb = missplicing1[event], missplicing2[event], missplicing_both[event]
|
|
376
|
+
|
|
377
|
+
# Get all keys from the three dicts
|
|
378
|
+
keys = set(m1.keys()) | set(m2.keys()) | set(mb.keys())
|
|
379
|
+
|
|
380
|
+
for k in keys:
|
|
381
|
+
# Determine reference value
|
|
382
|
+
ref_val = (
|
|
383
|
+
m1.get(k, {}).get('reference') or
|
|
384
|
+
m2.get(k, {}).get('reference') or
|
|
385
|
+
mb.get(k, {}).get('reference')
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
# Compute deltas
|
|
390
|
+
delta1 = m1.get(k, {'delta': 0})['delta'] # max(-1, min(10, m1.get(k, {'delta': 0})['delta'] / ref_val))
|
|
391
|
+
delta2 = m2.get(k, {'delta': 0})['delta'] # max(-1, min(10, m2.get(k, {'delta': 0})['delta'] / ref_val))
|
|
392
|
+
deltab = mb.get(k, {'delta': 0})['delta'] # max(-1, min(10, mb.get(k, {'delta': 0})['delta'] / ref_val))
|
|
393
|
+
|
|
394
|
+
ms1.append(delta1)
|
|
395
|
+
ms2.append(delta2)
|
|
396
|
+
msb.append(deltab)
|
|
397
|
+
|
|
398
|
+
# Corrective condition
|
|
399
|
+
# if ((abs(delta1) * 0.5 > abs(deltab) or abs(delta2) * 0.5 > abs(deltab)) and abs(deltab) == 0):
|
|
400
|
+
if (abs(deltab) < 0.25) and (
|
|
401
|
+
(abs(delta1) > 0.25 and (abs(delta1 - deltab) > 0.25)) or
|
|
402
|
+
(abs(delta2) > 0.25 and (abs(delta2 - deltab) > 0.25))
|
|
403
|
+
):
|
|
404
|
+
flag['corrective'] += 1
|
|
405
|
+
corrective_deltas.append(max((deltab - delta1, deltab - delta2), key=abs))
|
|
406
|
+
|
|
407
|
+
if (abs(deltab) > 0.25) and (
|
|
408
|
+
(abs(delta1) < 0.25 and (abs(delta1 - deltab) > 0.25)) or
|
|
409
|
+
(abs(delta2) < 0.25 and (abs(delta2 - deltab) > 0.25))
|
|
410
|
+
):
|
|
411
|
+
flag['cummulative'] += 1
|
|
412
|
+
cummulative_deltas.append(max((deltab - delta1, deltab - delta2), key=abs))
|
|
413
|
+
|
|
414
|
+
if ((0.25 <= ref_val) <= 0.75) and (
|
|
415
|
+
((delta1 > 0.25 or delta2 > 0.25) and deltab < 0.25) or
|
|
416
|
+
(delta1 < 0.25 and delta2 < 0.25 and deltab > 0.25)
|
|
417
|
+
) and (
|
|
418
|
+
abs(delta1 - deltab) > 0.25 or
|
|
419
|
+
abs(delta2 - deltab) > 0.25
|
|
420
|
+
):
|
|
421
|
+
increased_canonical_splicing = True
|
|
422
|
+
|
|
423
|
+
vals = cummulative_deltas + corrective_deltas
|
|
424
|
+
flag['max_delta'] = max(vals, key=abs) if vals else 0
|
|
425
|
+
flag['ms1'] = max(ms1, key=abs)
|
|
426
|
+
flag['ms2'] = max(ms2, key=abs)
|
|
427
|
+
flag['msb'] = max(msb, key=abs)
|
|
428
|
+
flag['increased_canonical_splicing'] = increased_canonical_splicing
|
|
429
|
+
|
|
430
|
+
result_df = pd.Series(flag).to_frame().T
|
|
431
|
+
results.append(result_df)
|
|
432
|
+
|
|
433
|
+
if len(results) == 0:
|
|
434
|
+
return None
|
|
435
|
+
|
|
436
|
+
return pd.concat(results)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
|
|
354
440
|
class Missplicing:
|
|
355
441
|
def __init__(self, splicing_dict, threshold=0.5):
|
|
356
442
|
"""
|
|
@@ -390,6 +476,9 @@ class Missplicing:
|
|
|
390
476
|
for d in details.values():
|
|
391
477
|
yield d['delta']
|
|
392
478
|
|
|
479
|
+
def __getitem__(self, key):
|
|
480
|
+
return self.missplicing[key]
|
|
481
|
+
|
|
393
482
|
@property
|
|
394
483
|
def aberrant_splicing(self):
|
|
395
484
|
"""
|
|
@@ -510,37 +599,72 @@ CREATE TABLE IF NOT EXISTS mutations (
|
|
|
510
599
|
PRIMARY KEY (tool, gene, mutation_id, transcript_id)
|
|
511
600
|
)''')
|
|
512
601
|
|
|
602
|
+
|
|
603
|
+
def get_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
|
|
604
|
+
"""
|
|
605
|
+
Retrieve computed splicing data for a given mutation from a database,
|
|
606
|
+
Args:
|
|
607
|
+
tool (str): Name of the tool used for computation.
|
|
608
|
+
gene (str): Gene name or identifier.
|
|
609
|
+
mutation_id (str): A unique identifier for the mutation.
|
|
610
|
+
transcript_id (str): ID for the transcript.
|
|
611
|
+
force_recompute (bool): If True, ignore cached value and recompute.
|
|
612
|
+
Returns:
|
|
613
|
+
dict: The splicing data.
|
|
614
|
+
"""
|
|
615
|
+
# Lookup in the database
|
|
616
|
+
cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
|
|
617
|
+
(tool, gene, mutation_id, transcript_id))
|
|
618
|
+
row = cursor.fetchone()
|
|
619
|
+
# If found and no force recompute, return cached data
|
|
620
|
+
if row:
|
|
621
|
+
return json.loads(row[0])
|
|
622
|
+
return None
|
|
623
|
+
|
|
624
|
+
def save_splicing(tool, gene, mutation_id, transcript_id, splicing):
|
|
625
|
+
data_json = json.dumps(convert_numpy_to_native(splicing))
|
|
626
|
+
cursor.execute('REPLACE INTO mutations (tool, gene, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
|
|
627
|
+
(tool, gene, mutation_id, transcript_id, data_json))
|
|
628
|
+
return None
|
|
629
|
+
|
|
513
630
|
def get_or_compute_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
|
|
514
631
|
"""
|
|
515
632
|
Retrieve computed splicing data for a given mutation from a database,
|
|
516
633
|
or compute and store it if not found or if force_recompute is True.
|
|
517
|
-
|
|
518
634
|
Args:
|
|
519
635
|
tool (str): Name of the tool used for computation.
|
|
520
636
|
gene (str): Gene name or identifier.
|
|
521
637
|
mutation_id (str): A unique identifier for the mutation.
|
|
522
638
|
transcript_id (str): ID for the transcript.
|
|
523
639
|
force_recompute (bool): If True, ignore cached value and recompute.
|
|
524
|
-
|
|
525
640
|
Returns:
|
|
526
641
|
dict: The computed splicing data.
|
|
527
642
|
"""
|
|
528
|
-
|
|
529
643
|
# Lookup in the database
|
|
530
644
|
cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
|
|
531
645
|
(tool, gene, mutation_id, transcript_id))
|
|
532
646
|
row = cursor.fetchone()
|
|
533
|
-
|
|
534
647
|
# If found and no force recompute, return cached data
|
|
535
648
|
if row and not force_recompute:
|
|
536
649
|
return json.loads(row[0])
|
|
537
|
-
|
|
538
650
|
# Otherwise, compute the data
|
|
539
|
-
computed_data = find_transcript_missplicing(mutation_id,
|
|
540
|
-
|
|
651
|
+
computed_data = convert_numpy_to_native(find_transcript_missplicing(mutation_id, transcript=transcript_id, engine=tool).missplicing) # Replace with your actual function
|
|
541
652
|
# Store computed data in DB
|
|
542
653
|
data_json = json.dumps(computed_data)
|
|
543
654
|
cursor.execute('REPLACE INTO mutations (tool, gene, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
|
|
544
655
|
(tool, gene, mutation_id, transcript_id, data_json))
|
|
656
|
+
return Missplicing(computed_data)
|
|
657
|
+
|
|
545
658
|
|
|
546
|
-
|
|
659
|
+
def convert_numpy_to_native(obj):
|
|
660
|
+
"""
|
|
661
|
+
Recursively convert NumPy data types to native Python types.
|
|
662
|
+
"""
|
|
663
|
+
if isinstance(obj, dict):
|
|
664
|
+
return {key: convert_numpy_to_native(value) for key, value in obj.items()}
|
|
665
|
+
elif isinstance(obj, list):
|
|
666
|
+
return [convert_numpy_to_native(item) for item in obj]
|
|
667
|
+
elif isinstance(obj, np.generic): # Check for NumPy scalar types
|
|
668
|
+
return round(obj.item(), 3)
|
|
669
|
+
else:
|
|
670
|
+
return round(obj, 3)
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: geney
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.10
|
|
4
4
|
Summary: A Python package for gene expression modeling.
|
|
5
5
|
Home-page: https://github.com/nicolaslynn/geney
|
|
6
6
|
Author: Nicolas Lynn
|
|
7
7
|
Author-email: nicolasalynn@gmail.com
|
|
8
8
|
License: Free for non-commercial use
|
|
9
|
-
Platform: UNKNOWN
|
|
10
9
|
Classifier: Development Status :: 1 - Planning
|
|
11
10
|
Classifier: Intended Audience :: Science/Research
|
|
12
11
|
Classifier: License :: Free for non-commercial use
|
|
@@ -20,9 +19,9 @@ Requires-Dist: networkx
|
|
|
20
19
|
Requires-Dist: viennarna
|
|
21
20
|
Requires-Dist: tqdm
|
|
22
21
|
Requires-Dist: spliceai
|
|
23
|
-
Requires-Dist: biopython==1.81
|
|
24
|
-
Requires-Dist: gtfparse==1.3.0
|
|
25
|
-
Requires-Dist: sh==2.0.6
|
|
22
|
+
Requires-Dist: biopython ==1.81
|
|
23
|
+
Requires-Dist: gtfparse ==1.3.0
|
|
24
|
+
Requires-Dist: sh ==2.0.6
|
|
26
25
|
Requires-Dist: torch
|
|
27
26
|
Requires-Dist: lifelines
|
|
28
27
|
Requires-Dist: notebook
|
|
@@ -32,5 +31,3 @@ Requires-Dist: pyfastx
|
|
|
32
31
|
Requires-Dist: tensorflow
|
|
33
32
|
Requires-Dist: keras
|
|
34
33
|
|
|
35
|
-
UNKNOWN
|
|
36
|
-
|
|
@@ -4,18 +4,19 @@ geney/SeqMats.py,sha256=jkXmXAs0OpnFeyCfiJcKKpHHSi9JpKgiOIwsu63e1CQ,18557
|
|
|
4
4
|
geney/Transcript.py,sha256=eRZXVVxDVBbv0l385bnAOBFRBSzBwppXcbBq8KXkwlo,14443
|
|
5
5
|
geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
|
|
6
6
|
geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
|
|
7
|
+
geney/allele_linkage.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
8
|
geney/config_setup.py,sha256=nblcGU3HIt8YjdrAoGfbEVKRxwJKv0PikJ5-7AL6axQ,723
|
|
8
9
|
geney/data_setup.py,sha256=2RHmuvcGUQbEglXQEZr0C2QPDTQYRZOEm0EcmyfQJgU,12229
|
|
9
10
|
geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
|
|
10
11
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
11
12
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
12
13
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
13
|
-
geney/oncosplice.py,sha256=
|
|
14
|
+
geney/oncosplice.py,sha256=LOgpdsTC1JotC2qFnqNMefl0t77vvWCJ5XctZsbrLNA,22343
|
|
14
15
|
geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
|
|
15
16
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
16
17
|
geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
|
|
17
18
|
geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
|
|
18
|
-
geney/splicing_utils.py,sha256=
|
|
19
|
+
geney/splicing_utils.py,sha256=DXX-q9G0IcmPx-gI6h3b6P8x4CTPVIGVM0HXyPz4r8g,31848
|
|
19
20
|
geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
|
|
20
21
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
21
22
|
geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
|
|
@@ -24,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
24
25
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
25
26
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
26
27
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
27
|
-
geney-1.3.
|
|
28
|
-
geney-1.3.
|
|
29
|
-
geney-1.3.
|
|
30
|
-
geney-1.3.
|
|
28
|
+
geney-1.3.10.dist-info/METADATA,sha256=mNFs019vfrWS0iciRjkA2CcLgycPDaEuH2utIrerZdA,971
|
|
29
|
+
geney-1.3.10.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
30
|
+
geney-1.3.10.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
31
|
+
geney-1.3.10.dist-info/RECORD,,
|
|
File without changes
|