geney 1.3.9__py2.py3-none-any.whl → 1.3.11__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

File without changes
geney/splicing_utils.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import numpy as np
2
+ import pandas as pd
2
3
  from .Gene import Gene
3
4
  from .SeqMats import MutSeqMat
4
5
  from collections import defaultdict
@@ -300,7 +301,7 @@ def find_transcript_missplicing(mut_id, transcript=None, threshold=0.5, engine='
300
301
  # missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap, 'discovered_donors': idp}
301
302
  # missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
302
303
  # temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
303
- return temp
304
+ # return temp
304
305
 
305
306
 
306
307
  def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, threshold=0.5, engine='spliceai'):
@@ -351,6 +352,91 @@ def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, thresh
351
352
  return Missplicing(temp, threshold=threshold)
352
353
 
353
354
 
355
+ def process_pairwise_epistasis(mids, engine='pangolin', fprint=False):
356
+ results = []
357
+ for mid in mids:
358
+ m1, m2 = mid.split('|')
359
+ missplicing1 = find_transcript_missplicing(m1, threshold=0.25, engine=engine)
360
+ missplicing2 = find_transcript_missplicing(m2, threshold=0.25, engine=engine)
361
+ missplicing_both = find_transcript_missplicing(mid, threshold=0.25, engine=engine)
362
+
363
+ if fprint:
364
+ print(missplicing1)
365
+ print(missplicing2)
366
+ print(missplicing_both)
367
+
368
+ max_delta = 0
369
+ flag = {'mut_id': mid, 'corrective': 0, 'cummulative': 0}
370
+ cummulative_deltas, corrective_deltas = [], []
371
+ ms1, ms2, msb = [], [], []
372
+ increased_canonical_splicing = False
373
+ events = ['missed_acceptors', 'missed_donors', 'discovered_acceptors', 'discovered_donors']
374
+ for event in events:
375
+ m1, m2, mb = missplicing1[event], missplicing2[event], missplicing_both[event]
376
+
377
+ # Get all keys from the three dicts
378
+ keys = set(m1.keys()) | set(m2.keys()) | set(mb.keys())
379
+
380
+ for k in keys:
381
+ # Determine reference value
382
+ ref_val = (
383
+ m1.get(k, {}).get('reference') or
384
+ m2.get(k, {}).get('reference') or
385
+ mb.get(k, {}).get('reference')
386
+ )
387
+
388
+
389
+ # Compute deltas
390
+ delta1 = m1.get(k, {'delta': 0})['delta'] # max(-1, min(10, m1.get(k, {'delta': 0})['delta'] / ref_val))
391
+ delta2 = m2.get(k, {'delta': 0})['delta'] # max(-1, min(10, m2.get(k, {'delta': 0})['delta'] / ref_val))
392
+ deltab = mb.get(k, {'delta': 0})['delta'] # max(-1, min(10, mb.get(k, {'delta': 0})['delta'] / ref_val))
393
+
394
+ ms1.append(delta1)
395
+ ms2.append(delta2)
396
+ msb.append(deltab)
397
+
398
+ # Corrective condition
399
+ # if ((abs(delta1) * 0.5 > abs(deltab) or abs(delta2) * 0.5 > abs(deltab)) and abs(deltab) == 0):
400
+ if (abs(deltab) < 0.25) and (
401
+ (abs(delta1) > 0.25 and (abs(delta1 - deltab) > 0.25)) or
402
+ (abs(delta2) > 0.25 and (abs(delta2 - deltab) > 0.25))
403
+ ):
404
+ flag['corrective'] += 1
405
+ corrective_deltas.append(max((deltab - delta1, deltab - delta2), key=abs))
406
+
407
+ if (abs(deltab) > 0.25) and (
408
+ (abs(delta1) < 0.25 and (abs(delta1 - deltab) > 0.25)) or
409
+ (abs(delta2) < 0.25 and (abs(delta2 - deltab) > 0.25))
410
+ ):
411
+ flag['cummulative'] += 1
412
+ cummulative_deltas.append(max((deltab - delta1, deltab - delta2), key=abs))
413
+
414
+ if ((0.25 <= ref_val) <= 0.75) and (
415
+ ((delta1 > 0.25 or delta2 > 0.25) and deltab < 0.25) or
416
+ (delta1 < 0.25 and delta2 < 0.25 and deltab > 0.25)
417
+ ) and (
418
+ abs(delta1 - deltab) > 0.25 or
419
+ abs(delta2 - deltab) > 0.25
420
+ ):
421
+ increased_canonical_splicing = True
422
+
423
+ vals = cummulative_deltas + corrective_deltas
424
+ flag['max_delta'] = max(vals, key=abs) if vals else 0
425
+ flag['ms1'] = max(ms1, key=abs)
426
+ flag['ms2'] = max(ms2, key=abs)
427
+ flag['msb'] = max(msb, key=abs)
428
+ flag['increased_canonical_splicing'] = increased_canonical_splicing
429
+
430
+ result_df = pd.Series(flag).to_frame().T
431
+ results.append(result_df)
432
+
433
+ if len(results) == 0:
434
+ return None
435
+
436
+ return pd.concat(results)
437
+
438
+
439
+
354
440
  class Missplicing:
355
441
  def __init__(self, splicing_dict, threshold=0.5):
356
442
  """
@@ -390,6 +476,9 @@ class Missplicing:
390
476
  for d in details.values():
391
477
  yield d['delta']
392
478
 
479
+ def __getitem__(self, key):
480
+ return self.missplicing[key]
481
+
393
482
  @property
394
483
  def aberrant_splicing(self):
395
484
  """
@@ -502,45 +591,83 @@ cursor = conn.cursor()
502
591
  # Create table once at startup, not in the function
503
592
  cursor.execute('''
504
593
  CREATE TABLE IF NOT EXISTS mutations (
505
- tool TEXT,
594
+ engine TEXT,
506
595
  gene TEXT,
507
- mutation_id TEXT,
596
+ mut_id TEXT,
508
597
  transcript_id TEXT,
509
598
  data TEXT,
510
- PRIMARY KEY (tool, gene, mutation_id, transcript_id)
599
+ PRIMARY KEY (engine, gene, mut_id, transcript_id)
511
600
  )''')
512
601
 
513
- def get_or_compute_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
602
+
603
+ def get_splicing(engine, gene, mut_id, transcript_id, force_recompute=False):
514
604
  """
515
605
  Retrieve computed splicing data for a given mutation from a database,
516
- or compute and store it if not found or if force_recompute is True.
517
-
518
606
  Args:
519
- tool (str): Name of the tool used for computation.
607
+ engine (str): Name of the tool used for computation.
520
608
  gene (str): Gene name or identifier.
521
- mutation_id (str): A unique identifier for the mutation.
609
+ mut_id (str): A unique identifier for the mutation.
522
610
  transcript_id (str): ID for the transcript.
523
611
  force_recompute (bool): If True, ignore cached value and recompute.
612
+ Returns:
613
+ dict: The splicing data.
614
+ """
615
+ # Lookup in the database
616
+ cursor.execute('SELECT data FROM mutations WHERE engine=? AND gene=? AND mut_id=? AND transcript_id=?',
617
+ (engine, gene, mut_id, transcript_id))
618
+ row = cursor.fetchone()
619
+ # If found and no force recompute, return cached data
620
+ if row:
621
+ return json.loads(row[0])
622
+ return None
623
+
624
+ def save_splicing(engine, gene, mut_id, transcript_id, splicing):
625
+ data_json = json.dumps(convert_numpy_to_native(splicing))
626
+ cursor.execute('REPLACE INTO mutations (engine, gene, mut_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
627
+ (engine, gene, mut_id, transcript_id, data_json))
628
+ return None
524
629
 
630
+ def get_or_compute_splicing(mut_id, transcript_id=None, engine='spliceai', force_recompute=False):
631
+ """
632
+ Retrieve computed splicing data for a given mutation from a database,
633
+ or compute and store it if not found or if force_recompute is True.
634
+ Args:
635
+ engine (str): Name of the tool used for computation.
636
+ mut_id (str): A unique identifier for the mutation.
637
+ transcript_id (str): ID for the transcript.
638
+ force_recompute (bool): If True, ignore cached value and recompute.
525
639
  Returns:
526
640
  dict: The computed splicing data.
527
641
  """
642
+ gene = mut_id.split(':')[0]
643
+ if transcript_id is None:
644
+ transcript_id = Gene.from_file(gene).transcript().transcript_id
528
645
 
529
646
  # Lookup in the database
530
- cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
531
- (tool, gene, mutation_id, transcript_id))
647
+ cursor.execute('SELECT data FROM mutations WHERE engine=? AND gene=? AND mut_id=? AND transcript_id=?',
648
+ (engine, gene, mut_id, transcript_id))
532
649
  row = cursor.fetchone()
533
-
534
650
  # If found and no force recompute, return cached data
535
651
  if row and not force_recompute:
536
652
  return json.loads(row[0])
537
-
538
653
  # Otherwise, compute the data
539
- computed_data = find_transcript_missplicing(mutation_id, transcript_id=transcript_id, engine=tool) # Replace with your actual function
540
-
654
+ computed_data = convert_numpy_to_native(find_transcript_missplicing(mut_id, transcript=transcript_id, engine=engine).missplicing) # Replace with your actual function
541
655
  # Store computed data in DB
542
656
  data_json = json.dumps(computed_data)
543
- cursor.execute('REPLACE INTO mutations (tool, gene, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
544
- (tool, gene, mutation_id, transcript_id, data_json))
657
+ cursor.execute('REPLACE INTO mutations (engine, gene, mut_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
658
+ (engine, gene, mut_id, transcript_id, data_json))
659
+ return computed_data
545
660
 
546
- return computed_data
661
+
662
+ def convert_numpy_to_native(obj):
663
+ """
664
+ Recursively convert NumPy data types to native Python types.
665
+ """
666
+ if isinstance(obj, dict):
667
+ return {key: convert_numpy_to_native(value) for key, value in obj.items()}
668
+ elif isinstance(obj, list):
669
+ return [convert_numpy_to_native(item) for item in obj]
670
+ elif isinstance(obj, np.generic): # Check for NumPy scalar types
671
+ return round(obj.item(), 3)
672
+ else:
673
+ return round(obj, 3)
@@ -1,12 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.3.9
3
+ Version: 1.3.11
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
7
7
  Author-email: nicolasalynn@gmail.com
8
8
  License: Free for non-commercial use
9
- Platform: UNKNOWN
10
9
  Classifier: Development Status :: 1 - Planning
11
10
  Classifier: Intended Audience :: Science/Research
12
11
  Classifier: License :: Free for non-commercial use
@@ -20,9 +19,9 @@ Requires-Dist: networkx
20
19
  Requires-Dist: viennarna
21
20
  Requires-Dist: tqdm
22
21
  Requires-Dist: spliceai
23
- Requires-Dist: biopython==1.81
24
- Requires-Dist: gtfparse==1.3.0
25
- Requires-Dist: sh==2.0.6
22
+ Requires-Dist: biopython ==1.81
23
+ Requires-Dist: gtfparse ==1.3.0
24
+ Requires-Dist: sh ==2.0.6
26
25
  Requires-Dist: torch
27
26
  Requires-Dist: lifelines
28
27
  Requires-Dist: notebook
@@ -32,5 +31,3 @@ Requires-Dist: pyfastx
32
31
  Requires-Dist: tensorflow
33
32
  Requires-Dist: keras
34
33
 
35
- UNKNOWN
36
-
@@ -4,6 +4,7 @@ geney/SeqMats.py,sha256=jkXmXAs0OpnFeyCfiJcKKpHHSi9JpKgiOIwsu63e1CQ,18557
4
4
  geney/Transcript.py,sha256=eRZXVVxDVBbv0l385bnAOBFRBSzBwppXcbBq8KXkwlo,14443
5
5
  geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
6
6
  geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
7
+ geney/allele_linkage.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
8
  geney/config_setup.py,sha256=nblcGU3HIt8YjdrAoGfbEVKRxwJKv0PikJ5-7AL6axQ,723
8
9
  geney/data_setup.py,sha256=2RHmuvcGUQbEglXQEZr0C2QPDTQYRZOEm0EcmyfQJgU,12229
9
10
  geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
@@ -15,7 +16,7 @@ geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
15
16
  geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
16
17
  geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
17
18
  geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
18
- geney/splicing_utils.py,sha256=YDhYtns6pVHmOfk4Z1AC-acA1kKAqKZmJ-P2EZS946k,26378
19
+ geney/splicing_utils.py,sha256=Bj5YV-LHs684afjriep7N2QaRAAKdidFS-adihfDzfI,31887
19
20
  geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
20
21
  geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
21
22
  geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
@@ -24,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
24
25
  geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
25
26
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
26
27
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
27
- geney-1.3.9.dist-info/METADATA,sha256=tkuruBdfjmOTkAz6lM3xZ_nAEXm5Vh4wpaP5FihlTvk,994
28
- geney-1.3.9.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
29
- geney-1.3.9.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
30
- geney-1.3.9.dist-info/RECORD,,
28
+ geney-1.3.11.dist-info/METADATA,sha256=Por9VSaGxOGXy61knApS-3BefXSrh8hhQQQ0ULGQn2I,971
29
+ geney-1.3.11.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
30
+ geney-1.3.11.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
31
+ geney-1.3.11.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py2-none-any
5
5
  Tag: py3-none-any