geney 1.3.9__py2.py3-none-any.whl → 1.3.11__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/allele_linkage.py +0 -0
- geney/splicing_utils.py +145 -18
- {geney-1.3.9.dist-info → geney-1.3.11.dist-info}/METADATA +4 -7
- {geney-1.3.9.dist-info → geney-1.3.11.dist-info}/RECORD +6 -5
- {geney-1.3.9.dist-info → geney-1.3.11.dist-info}/WHEEL +1 -1
- {geney-1.3.9.dist-info → geney-1.3.11.dist-info}/top_level.txt +0 -0
geney/allele_linkage.py
ADDED
|
File without changes
|
geney/splicing_utils.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
2
3
|
from .Gene import Gene
|
|
3
4
|
from .SeqMats import MutSeqMat
|
|
4
5
|
from collections import defaultdict
|
|
@@ -300,7 +301,7 @@ def find_transcript_missplicing(mut_id, transcript=None, threshold=0.5, engine='
|
|
|
300
301
|
# missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap, 'discovered_donors': idp}
|
|
301
302
|
# missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
|
|
302
303
|
# temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
|
|
303
|
-
return temp
|
|
304
|
+
# return temp
|
|
304
305
|
|
|
305
306
|
|
|
306
307
|
def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, threshold=0.5, engine='spliceai'):
|
|
@@ -351,6 +352,91 @@ def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, thresh
|
|
|
351
352
|
return Missplicing(temp, threshold=threshold)
|
|
352
353
|
|
|
353
354
|
|
|
355
|
+
def process_pairwise_epistasis(mids, engine='pangolin', fprint=False):
|
|
356
|
+
results = []
|
|
357
|
+
for mid in mids:
|
|
358
|
+
m1, m2 = mid.split('|')
|
|
359
|
+
missplicing1 = find_transcript_missplicing(m1, threshold=0.25, engine=engine)
|
|
360
|
+
missplicing2 = find_transcript_missplicing(m2, threshold=0.25, engine=engine)
|
|
361
|
+
missplicing_both = find_transcript_missplicing(mid, threshold=0.25, engine=engine)
|
|
362
|
+
|
|
363
|
+
if fprint:
|
|
364
|
+
print(missplicing1)
|
|
365
|
+
print(missplicing2)
|
|
366
|
+
print(missplicing_both)
|
|
367
|
+
|
|
368
|
+
max_delta = 0
|
|
369
|
+
flag = {'mut_id': mid, 'corrective': 0, 'cummulative': 0}
|
|
370
|
+
cummulative_deltas, corrective_deltas = [], []
|
|
371
|
+
ms1, ms2, msb = [], [], []
|
|
372
|
+
increased_canonical_splicing = False
|
|
373
|
+
events = ['missed_acceptors', 'missed_donors', 'discovered_acceptors', 'discovered_donors']
|
|
374
|
+
for event in events:
|
|
375
|
+
m1, m2, mb = missplicing1[event], missplicing2[event], missplicing_both[event]
|
|
376
|
+
|
|
377
|
+
# Get all keys from the three dicts
|
|
378
|
+
keys = set(m1.keys()) | set(m2.keys()) | set(mb.keys())
|
|
379
|
+
|
|
380
|
+
for k in keys:
|
|
381
|
+
# Determine reference value
|
|
382
|
+
ref_val = (
|
|
383
|
+
m1.get(k, {}).get('reference') or
|
|
384
|
+
m2.get(k, {}).get('reference') or
|
|
385
|
+
mb.get(k, {}).get('reference')
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
# Compute deltas
|
|
390
|
+
delta1 = m1.get(k, {'delta': 0})['delta'] # max(-1, min(10, m1.get(k, {'delta': 0})['delta'] / ref_val))
|
|
391
|
+
delta2 = m2.get(k, {'delta': 0})['delta'] # max(-1, min(10, m2.get(k, {'delta': 0})['delta'] / ref_val))
|
|
392
|
+
deltab = mb.get(k, {'delta': 0})['delta'] # max(-1, min(10, mb.get(k, {'delta': 0})['delta'] / ref_val))
|
|
393
|
+
|
|
394
|
+
ms1.append(delta1)
|
|
395
|
+
ms2.append(delta2)
|
|
396
|
+
msb.append(deltab)
|
|
397
|
+
|
|
398
|
+
# Corrective condition
|
|
399
|
+
# if ((abs(delta1) * 0.5 > abs(deltab) or abs(delta2) * 0.5 > abs(deltab)) and abs(deltab) == 0):
|
|
400
|
+
if (abs(deltab) < 0.25) and (
|
|
401
|
+
(abs(delta1) > 0.25 and (abs(delta1 - deltab) > 0.25)) or
|
|
402
|
+
(abs(delta2) > 0.25 and (abs(delta2 - deltab) > 0.25))
|
|
403
|
+
):
|
|
404
|
+
flag['corrective'] += 1
|
|
405
|
+
corrective_deltas.append(max((deltab - delta1, deltab - delta2), key=abs))
|
|
406
|
+
|
|
407
|
+
if (abs(deltab) > 0.25) and (
|
|
408
|
+
(abs(delta1) < 0.25 and (abs(delta1 - deltab) > 0.25)) or
|
|
409
|
+
(abs(delta2) < 0.25 and (abs(delta2 - deltab) > 0.25))
|
|
410
|
+
):
|
|
411
|
+
flag['cummulative'] += 1
|
|
412
|
+
cummulative_deltas.append(max((deltab - delta1, deltab - delta2), key=abs))
|
|
413
|
+
|
|
414
|
+
if ((0.25 <= ref_val) <= 0.75) and (
|
|
415
|
+
((delta1 > 0.25 or delta2 > 0.25) and deltab < 0.25) or
|
|
416
|
+
(delta1 < 0.25 and delta2 < 0.25 and deltab > 0.25)
|
|
417
|
+
) and (
|
|
418
|
+
abs(delta1 - deltab) > 0.25 or
|
|
419
|
+
abs(delta2 - deltab) > 0.25
|
|
420
|
+
):
|
|
421
|
+
increased_canonical_splicing = True
|
|
422
|
+
|
|
423
|
+
vals = cummulative_deltas + corrective_deltas
|
|
424
|
+
flag['max_delta'] = max(vals, key=abs) if vals else 0
|
|
425
|
+
flag['ms1'] = max(ms1, key=abs)
|
|
426
|
+
flag['ms2'] = max(ms2, key=abs)
|
|
427
|
+
flag['msb'] = max(msb, key=abs)
|
|
428
|
+
flag['increased_canonical_splicing'] = increased_canonical_splicing
|
|
429
|
+
|
|
430
|
+
result_df = pd.Series(flag).to_frame().T
|
|
431
|
+
results.append(result_df)
|
|
432
|
+
|
|
433
|
+
if len(results) == 0:
|
|
434
|
+
return None
|
|
435
|
+
|
|
436
|
+
return pd.concat(results)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
|
|
354
440
|
class Missplicing:
|
|
355
441
|
def __init__(self, splicing_dict, threshold=0.5):
|
|
356
442
|
"""
|
|
@@ -390,6 +476,9 @@ class Missplicing:
|
|
|
390
476
|
for d in details.values():
|
|
391
477
|
yield d['delta']
|
|
392
478
|
|
|
479
|
+
def __getitem__(self, key):
|
|
480
|
+
return self.missplicing[key]
|
|
481
|
+
|
|
393
482
|
@property
|
|
394
483
|
def aberrant_splicing(self):
|
|
395
484
|
"""
|
|
@@ -502,45 +591,83 @@ cursor = conn.cursor()
|
|
|
502
591
|
# Create table once at startup, not in the function
|
|
503
592
|
cursor.execute('''
|
|
504
593
|
CREATE TABLE IF NOT EXISTS mutations (
|
|
505
|
-
|
|
594
|
+
engine TEXT,
|
|
506
595
|
gene TEXT,
|
|
507
|
-
|
|
596
|
+
mut_id TEXT,
|
|
508
597
|
transcript_id TEXT,
|
|
509
598
|
data TEXT,
|
|
510
|
-
PRIMARY KEY (
|
|
599
|
+
PRIMARY KEY (engine, gene, mut_id, transcript_id)
|
|
511
600
|
)''')
|
|
512
601
|
|
|
513
|
-
|
|
602
|
+
|
|
603
|
+
def get_splicing(engine, gene, mut_id, transcript_id, force_recompute=False):
|
|
514
604
|
"""
|
|
515
605
|
Retrieve computed splicing data for a given mutation from a database,
|
|
516
|
-
or compute and store it if not found or if force_recompute is True.
|
|
517
|
-
|
|
518
606
|
Args:
|
|
519
|
-
|
|
607
|
+
engine (str): Name of the tool used for computation.
|
|
520
608
|
gene (str): Gene name or identifier.
|
|
521
|
-
|
|
609
|
+
mut_id (str): A unique identifier for the mutation.
|
|
522
610
|
transcript_id (str): ID for the transcript.
|
|
523
611
|
force_recompute (bool): If True, ignore cached value and recompute.
|
|
612
|
+
Returns:
|
|
613
|
+
dict: The splicing data.
|
|
614
|
+
"""
|
|
615
|
+
# Lookup in the database
|
|
616
|
+
cursor.execute('SELECT data FROM mutations WHERE engine=? AND gene=? AND mut_id=? AND transcript_id=?',
|
|
617
|
+
(engine, gene, mut_id, transcript_id))
|
|
618
|
+
row = cursor.fetchone()
|
|
619
|
+
# If found and no force recompute, return cached data
|
|
620
|
+
if row:
|
|
621
|
+
return json.loads(row[0])
|
|
622
|
+
return None
|
|
623
|
+
|
|
624
|
+
def save_splicing(engine, gene, mut_id, transcript_id, splicing):
|
|
625
|
+
data_json = json.dumps(convert_numpy_to_native(splicing))
|
|
626
|
+
cursor.execute('REPLACE INTO mutations (engine, gene, mut_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
|
|
627
|
+
(engine, gene, mut_id, transcript_id, data_json))
|
|
628
|
+
return None
|
|
524
629
|
|
|
630
|
+
def get_or_compute_splicing(mut_id, transcript_id=None, engine='spliceai', force_recompute=False):
|
|
631
|
+
"""
|
|
632
|
+
Retrieve computed splicing data for a given mutation from a database,
|
|
633
|
+
or compute and store it if not found or if force_recompute is True.
|
|
634
|
+
Args:
|
|
635
|
+
engine (str): Name of the tool used for computation.
|
|
636
|
+
mut_id (str): A unique identifier for the mutation.
|
|
637
|
+
transcript_id (str): ID for the transcript.
|
|
638
|
+
force_recompute (bool): If True, ignore cached value and recompute.
|
|
525
639
|
Returns:
|
|
526
640
|
dict: The computed splicing data.
|
|
527
641
|
"""
|
|
642
|
+
gene = mut_id.split(':')[0]
|
|
643
|
+
if transcript_id is None:
|
|
644
|
+
transcript_id = Gene.from_file(gene).transcript().transcript_id
|
|
528
645
|
|
|
529
646
|
# Lookup in the database
|
|
530
|
-
cursor.execute('SELECT data FROM mutations WHERE
|
|
531
|
-
(
|
|
647
|
+
cursor.execute('SELECT data FROM mutations WHERE engine=? AND gene=? AND mut_id=? AND transcript_id=?',
|
|
648
|
+
(engine, gene, mut_id, transcript_id))
|
|
532
649
|
row = cursor.fetchone()
|
|
533
|
-
|
|
534
650
|
# If found and no force recompute, return cached data
|
|
535
651
|
if row and not force_recompute:
|
|
536
652
|
return json.loads(row[0])
|
|
537
|
-
|
|
538
653
|
# Otherwise, compute the data
|
|
539
|
-
computed_data = find_transcript_missplicing(
|
|
540
|
-
|
|
654
|
+
computed_data = convert_numpy_to_native(find_transcript_missplicing(mut_id, transcript=transcript_id, engine=engine).missplicing) # Replace with your actual function
|
|
541
655
|
# Store computed data in DB
|
|
542
656
|
data_json = json.dumps(computed_data)
|
|
543
|
-
cursor.execute('REPLACE INTO mutations (
|
|
544
|
-
(
|
|
657
|
+
cursor.execute('REPLACE INTO mutations (engine, gene, mut_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
|
|
658
|
+
(engine, gene, mut_id, transcript_id, data_json))
|
|
659
|
+
return computed_data
|
|
545
660
|
|
|
546
|
-
|
|
661
|
+
|
|
662
|
+
def convert_numpy_to_native(obj):
|
|
663
|
+
"""
|
|
664
|
+
Recursively convert NumPy data types to native Python types.
|
|
665
|
+
"""
|
|
666
|
+
if isinstance(obj, dict):
|
|
667
|
+
return {key: convert_numpy_to_native(value) for key, value in obj.items()}
|
|
668
|
+
elif isinstance(obj, list):
|
|
669
|
+
return [convert_numpy_to_native(item) for item in obj]
|
|
670
|
+
elif isinstance(obj, np.generic): # Check for NumPy scalar types
|
|
671
|
+
return round(obj.item(), 3)
|
|
672
|
+
else:
|
|
673
|
+
return round(obj, 3)
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: geney
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.11
|
|
4
4
|
Summary: A Python package for gene expression modeling.
|
|
5
5
|
Home-page: https://github.com/nicolaslynn/geney
|
|
6
6
|
Author: Nicolas Lynn
|
|
7
7
|
Author-email: nicolasalynn@gmail.com
|
|
8
8
|
License: Free for non-commercial use
|
|
9
|
-
Platform: UNKNOWN
|
|
10
9
|
Classifier: Development Status :: 1 - Planning
|
|
11
10
|
Classifier: Intended Audience :: Science/Research
|
|
12
11
|
Classifier: License :: Free for non-commercial use
|
|
@@ -20,9 +19,9 @@ Requires-Dist: networkx
|
|
|
20
19
|
Requires-Dist: viennarna
|
|
21
20
|
Requires-Dist: tqdm
|
|
22
21
|
Requires-Dist: spliceai
|
|
23
|
-
Requires-Dist: biopython==1.81
|
|
24
|
-
Requires-Dist: gtfparse==1.3.0
|
|
25
|
-
Requires-Dist: sh==2.0.6
|
|
22
|
+
Requires-Dist: biopython ==1.81
|
|
23
|
+
Requires-Dist: gtfparse ==1.3.0
|
|
24
|
+
Requires-Dist: sh ==2.0.6
|
|
26
25
|
Requires-Dist: torch
|
|
27
26
|
Requires-Dist: lifelines
|
|
28
27
|
Requires-Dist: notebook
|
|
@@ -32,5 +31,3 @@ Requires-Dist: pyfastx
|
|
|
32
31
|
Requires-Dist: tensorflow
|
|
33
32
|
Requires-Dist: keras
|
|
34
33
|
|
|
35
|
-
UNKNOWN
|
|
36
|
-
|
|
@@ -4,6 +4,7 @@ geney/SeqMats.py,sha256=jkXmXAs0OpnFeyCfiJcKKpHHSi9JpKgiOIwsu63e1CQ,18557
|
|
|
4
4
|
geney/Transcript.py,sha256=eRZXVVxDVBbv0l385bnAOBFRBSzBwppXcbBq8KXkwlo,14443
|
|
5
5
|
geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
|
|
6
6
|
geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
|
|
7
|
+
geney/allele_linkage.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
8
|
geney/config_setup.py,sha256=nblcGU3HIt8YjdrAoGfbEVKRxwJKv0PikJ5-7AL6axQ,723
|
|
8
9
|
geney/data_setup.py,sha256=2RHmuvcGUQbEglXQEZr0C2QPDTQYRZOEm0EcmyfQJgU,12229
|
|
9
10
|
geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
|
|
@@ -15,7 +16,7 @@ geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
|
|
|
15
16
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
16
17
|
geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
|
|
17
18
|
geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
|
|
18
|
-
geney/splicing_utils.py,sha256=
|
|
19
|
+
geney/splicing_utils.py,sha256=Bj5YV-LHs684afjriep7N2QaRAAKdidFS-adihfDzfI,31887
|
|
19
20
|
geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
|
|
20
21
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
21
22
|
geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
|
|
@@ -24,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
24
25
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
25
26
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
26
27
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
27
|
-
geney-1.3.
|
|
28
|
-
geney-1.3.
|
|
29
|
-
geney-1.3.
|
|
30
|
-
geney-1.3.
|
|
28
|
+
geney-1.3.11.dist-info/METADATA,sha256=Por9VSaGxOGXy61knApS-3BefXSrh8hhQQQ0ULGQn2I,971
|
|
29
|
+
geney-1.3.11.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
30
|
+
geney-1.3.11.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
31
|
+
geney-1.3.11.dist-info/RECORD,,
|
|
File without changes
|