geney 1.3.9__py2.py3-none-any.whl → 1.3.10__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

File without changes
geney/splicing_utils.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import numpy as np
2
+ import pandas as pd
2
3
  from .Gene import Gene
3
4
  from .SeqMats import MutSeqMat
4
5
  from collections import defaultdict
@@ -300,7 +301,7 @@ def find_transcript_missplicing(mut_id, transcript=None, threshold=0.5, engine='
300
301
  # missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap, 'discovered_donors': idp}
301
302
  # missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
302
303
  # temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
303
- return temp
304
+ # return temp
304
305
 
305
306
 
306
307
  def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, threshold=0.5, engine='spliceai'):
@@ -351,6 +352,91 @@ def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, thresh
351
352
  return Missplicing(temp, threshold=threshold)
352
353
 
353
354
 
355
+ def process_pairwise_epistasis(mids, engine='pangolin', fprint=False):
356
+ results = []
357
+ for mid in mids:
358
+ m1, m2 = mid.split('|')
359
+ missplicing1 = find_transcript_missplicing(m1, threshold=0.25, engine=engine)
360
+ missplicing2 = find_transcript_missplicing(m2, threshold=0.25, engine=engine)
361
+ missplicing_both = find_transcript_missplicing(mid, threshold=0.25, engine=engine)
362
+
363
+ if fprint:
364
+ print(missplicing1)
365
+ print(missplicing2)
366
+ print(missplicing_both)
367
+
368
+ max_delta = 0
369
+ flag = {'mut_id': mid, 'corrective': 0, 'cummulative': 0}
370
+ cummulative_deltas, corrective_deltas = [], []
371
+ ms1, ms2, msb = [], [], []
372
+ increased_canonical_splicing = False
373
+ events = ['missed_acceptors', 'missed_donors', 'discovered_acceptors', 'discovered_donors']
374
+ for event in events:
375
+ m1, m2, mb = missplicing1[event], missplicing2[event], missplicing_both[event]
376
+
377
+ # Get all keys from the three dicts
378
+ keys = set(m1.keys()) | set(m2.keys()) | set(mb.keys())
379
+
380
+ for k in keys:
381
+ # Determine reference value
382
+ ref_val = (
383
+ m1.get(k, {}).get('reference') or
384
+ m2.get(k, {}).get('reference') or
385
+ mb.get(k, {}).get('reference')
386
+ )
387
+
388
+
389
+ # Compute deltas
390
+ delta1 = m1.get(k, {'delta': 0})['delta'] # max(-1, min(10, m1.get(k, {'delta': 0})['delta'] / ref_val))
391
+ delta2 = m2.get(k, {'delta': 0})['delta'] # max(-1, min(10, m2.get(k, {'delta': 0})['delta'] / ref_val))
392
+ deltab = mb.get(k, {'delta': 0})['delta'] # max(-1, min(10, mb.get(k, {'delta': 0})['delta'] / ref_val))
393
+
394
+ ms1.append(delta1)
395
+ ms2.append(delta2)
396
+ msb.append(deltab)
397
+
398
+ # Corrective condition
399
+ # if ((abs(delta1) * 0.5 > abs(deltab) or abs(delta2) * 0.5 > abs(deltab)) and abs(deltab) == 0):
400
+ if (abs(deltab) < 0.25) and (
401
+ (abs(delta1) > 0.25 and (abs(delta1 - deltab) > 0.25)) or
402
+ (abs(delta2) > 0.25 and (abs(delta2 - deltab) > 0.25))
403
+ ):
404
+ flag['corrective'] += 1
405
+ corrective_deltas.append(max((deltab - delta1, deltab - delta2), key=abs))
406
+
407
+ if (abs(deltab) > 0.25) and (
408
+ (abs(delta1) < 0.25 and (abs(delta1 - deltab) > 0.25)) or
409
+ (abs(delta2) < 0.25 and (abs(delta2 - deltab) > 0.25))
410
+ ):
411
+ flag['cummulative'] += 1
412
+ cummulative_deltas.append(max((deltab - delta1, deltab - delta2), key=abs))
413
+
414
+ if ((0.25 <= ref_val) <= 0.75) and (
415
+ ((delta1 > 0.25 or delta2 > 0.25) and deltab < 0.25) or
416
+ (delta1 < 0.25 and delta2 < 0.25 and deltab > 0.25)
417
+ ) and (
418
+ abs(delta1 - deltab) > 0.25 or
419
+ abs(delta2 - deltab) > 0.25
420
+ ):
421
+ increased_canonical_splicing = True
422
+
423
+ vals = cummulative_deltas + corrective_deltas
424
+ flag['max_delta'] = max(vals, key=abs) if vals else 0
425
+ flag['ms1'] = max(ms1, key=abs)
426
+ flag['ms2'] = max(ms2, key=abs)
427
+ flag['msb'] = max(msb, key=abs)
428
+ flag['increased_canonical_splicing'] = increased_canonical_splicing
429
+
430
+ result_df = pd.Series(flag).to_frame().T
431
+ results.append(result_df)
432
+
433
+ if len(results) == 0:
434
+ return None
435
+
436
+ return pd.concat(results)
437
+
438
+
439
+
354
440
  class Missplicing:
355
441
  def __init__(self, splicing_dict, threshold=0.5):
356
442
  """
@@ -390,6 +476,9 @@ class Missplicing:
390
476
  for d in details.values():
391
477
  yield d['delta']
392
478
 
479
+ def __getitem__(self, key):
480
+ return self.missplicing[key]
481
+
393
482
  @property
394
483
  def aberrant_splicing(self):
395
484
  """
@@ -510,37 +599,72 @@ CREATE TABLE IF NOT EXISTS mutations (
510
599
  PRIMARY KEY (tool, gene, mutation_id, transcript_id)
511
600
  )''')
512
601
 
602
+
603
+ def get_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
604
+ """
605
+ Retrieve computed splicing data for a given mutation from a database,
606
+ Args:
607
+ tool (str): Name of the tool used for computation.
608
+ gene (str): Gene name or identifier.
609
+ mutation_id (str): A unique identifier for the mutation.
610
+ transcript_id (str): ID for the transcript.
611
+ force_recompute (bool): If True, ignore cached value and recompute.
612
+ Returns:
613
+ dict: The splicing data.
614
+ """
615
+ # Lookup in the database
616
+ cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
617
+ (tool, gene, mutation_id, transcript_id))
618
+ row = cursor.fetchone()
619
+ # If found and no force recompute, return cached data
620
+ if row:
621
+ return json.loads(row[0])
622
+ return None
623
+
624
+ def save_splicing(tool, gene, mutation_id, transcript_id, splicing):
625
+ data_json = json.dumps(convert_numpy_to_native(splicing))
626
+ cursor.execute('REPLACE INTO mutations (tool, gene, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
627
+ (tool, gene, mutation_id, transcript_id, data_json))
628
+ return None
629
+
513
630
  def get_or_compute_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
514
631
  """
515
632
  Retrieve computed splicing data for a given mutation from a database,
516
633
  or compute and store it if not found or if force_recompute is True.
517
-
518
634
  Args:
519
635
  tool (str): Name of the tool used for computation.
520
636
  gene (str): Gene name or identifier.
521
637
  mutation_id (str): A unique identifier for the mutation.
522
638
  transcript_id (str): ID for the transcript.
523
639
  force_recompute (bool): If True, ignore cached value and recompute.
524
-
525
640
  Returns:
526
641
  dict: The computed splicing data.
527
642
  """
528
-
529
643
  # Lookup in the database
530
644
  cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
531
645
  (tool, gene, mutation_id, transcript_id))
532
646
  row = cursor.fetchone()
533
-
534
647
  # If found and no force recompute, return cached data
535
648
  if row and not force_recompute:
536
649
  return json.loads(row[0])
537
-
538
650
  # Otherwise, compute the data
539
- computed_data = find_transcript_missplicing(mutation_id, transcript_id=transcript_id, engine=tool) # Replace with your actual function
540
-
651
+ computed_data = convert_numpy_to_native(find_transcript_missplicing(mutation_id, transcript=transcript_id, engine=tool).missplicing) # Replace with your actual function
541
652
  # Store computed data in DB
542
653
  data_json = json.dumps(computed_data)
543
654
  cursor.execute('REPLACE INTO mutations (tool, gene, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
544
655
  (tool, gene, mutation_id, transcript_id, data_json))
656
+ return Missplicing(computed_data)
657
+
545
658
 
546
- return computed_data
659
+ def convert_numpy_to_native(obj):
660
+ """
661
+ Recursively convert NumPy data types to native Python types.
662
+ """
663
+ if isinstance(obj, dict):
664
+ return {key: convert_numpy_to_native(value) for key, value in obj.items()}
665
+ elif isinstance(obj, list):
666
+ return [convert_numpy_to_native(item) for item in obj]
667
+ elif isinstance(obj, np.generic): # Check for NumPy scalar types
668
+ return round(obj.item(), 3)
669
+ else:
670
+ return round(obj, 3)
@@ -1,12 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.3.9
3
+ Version: 1.3.10
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
7
7
  Author-email: nicolasalynn@gmail.com
8
8
  License: Free for non-commercial use
9
- Platform: UNKNOWN
10
9
  Classifier: Development Status :: 1 - Planning
11
10
  Classifier: Intended Audience :: Science/Research
12
11
  Classifier: License :: Free for non-commercial use
@@ -20,9 +19,9 @@ Requires-Dist: networkx
20
19
  Requires-Dist: viennarna
21
20
  Requires-Dist: tqdm
22
21
  Requires-Dist: spliceai
23
- Requires-Dist: biopython==1.81
24
- Requires-Dist: gtfparse==1.3.0
25
- Requires-Dist: sh==2.0.6
22
+ Requires-Dist: biopython ==1.81
23
+ Requires-Dist: gtfparse ==1.3.0
24
+ Requires-Dist: sh ==2.0.6
26
25
  Requires-Dist: torch
27
26
  Requires-Dist: lifelines
28
27
  Requires-Dist: notebook
@@ -32,5 +31,3 @@ Requires-Dist: pyfastx
32
31
  Requires-Dist: tensorflow
33
32
  Requires-Dist: keras
34
33
 
35
- UNKNOWN
36
-
@@ -4,6 +4,7 @@ geney/SeqMats.py,sha256=jkXmXAs0OpnFeyCfiJcKKpHHSi9JpKgiOIwsu63e1CQ,18557
4
4
  geney/Transcript.py,sha256=eRZXVVxDVBbv0l385bnAOBFRBSzBwppXcbBq8KXkwlo,14443
5
5
  geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
6
6
  geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
7
+ geney/allele_linkage.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
8
  geney/config_setup.py,sha256=nblcGU3HIt8YjdrAoGfbEVKRxwJKv0PikJ5-7AL6axQ,723
8
9
  geney/data_setup.py,sha256=2RHmuvcGUQbEglXQEZr0C2QPDTQYRZOEm0EcmyfQJgU,12229
9
10
  geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
@@ -15,7 +16,7 @@ geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
15
16
  geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
16
17
  geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
17
18
  geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
18
- geney/splicing_utils.py,sha256=YDhYtns6pVHmOfk4Z1AC-acA1kKAqKZmJ-P2EZS946k,26378
19
+ geney/splicing_utils.py,sha256=DXX-q9G0IcmPx-gI6h3b6P8x4CTPVIGVM0HXyPz4r8g,31848
19
20
  geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
20
21
  geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
21
22
  geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
@@ -24,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
24
25
  geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
25
26
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
26
27
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
27
- geney-1.3.9.dist-info/METADATA,sha256=tkuruBdfjmOTkAz6lM3xZ_nAEXm5Vh4wpaP5FihlTvk,994
28
- geney-1.3.9.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
29
- geney-1.3.9.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
30
- geney-1.3.9.dist-info/RECORD,,
28
+ geney-1.3.10.dist-info/METADATA,sha256=mNFs019vfrWS0iciRjkA2CcLgycPDaEuH2utIrerZdA,971
29
+ geney-1.3.10.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
30
+ geney-1.3.10.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
31
+ geney-1.3.10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py2-none-any
5
5
  Tag: py3-none-any