python-katlas 0.0.9__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {python-katlas-0.0.9/python_katlas.egg-info → python-katlas-0.1.0}/PKG-INFO +1 -1
  2. python-katlas-0.1.0/katlas/__init__.py +1 -0
  3. {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/core.py +28 -19
  4. {python-katlas-0.0.9 → python-katlas-0.1.0/python_katlas.egg-info}/PKG-INFO +1 -1
  5. {python-katlas-0.0.9 → python-katlas-0.1.0}/settings.ini +1 -1
  6. python-katlas-0.0.9/katlas/__init__.py +0 -1
  7. {python-katlas-0.0.9 → python-katlas-0.1.0}/LICENSE +0 -0
  8. {python-katlas-0.0.9 → python-katlas-0.1.0}/MANIFEST.in +0 -0
  9. {python-katlas-0.0.9 → python-katlas-0.1.0}/README.md +0 -0
  10. {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/_modidx.py +0 -0
  11. {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/dl.py +0 -0
  12. {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/feature.py +0 -0
  13. {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/imports.py +0 -0
  14. {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/plot.py +0 -0
  15. {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/train.py +0 -0
  16. {python-katlas-0.0.9 → python-katlas-0.1.0}/python_katlas.egg-info/SOURCES.txt +0 -0
  17. {python-katlas-0.0.9 → python-katlas-0.1.0}/python_katlas.egg-info/dependency_links.txt +0 -0
  18. {python-katlas-0.0.9 → python-katlas-0.1.0}/python_katlas.egg-info/entry_points.txt +0 -0
  19. {python-katlas-0.0.9 → python-katlas-0.1.0}/python_katlas.egg-info/not-zip-safe +0 -0
  20. {python-katlas-0.0.9 → python-katlas-0.1.0}/python_katlas.egg-info/requires.txt +0 -0
  21. {python-katlas-0.0.9 → python-katlas-0.1.0}/python_katlas.egg-info/top_level.txt +0 -0
  22. {python-katlas-0.0.9 → python-katlas-0.1.0}/setup.cfg +0 -0
  23. {python-katlas-0.0.9 → python-katlas-0.1.0}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-katlas
3
- Version: 0.0.9
3
+ Version: 0.1.0
4
4
  Summary: tools for predicting kinome specificities
5
5
  Home-page: https://github.com/sky1ove/python-katlas
6
6
  Author: lily
@@ -0,0 +1 @@
1
+ __version__ = "0.0.9"
@@ -451,17 +451,18 @@ def predict_kinase(input_string: str, # site sequence
451
451
 
452
452
  # %% ../nbs/00_core.ipynb 41
453
453
  # PSPA
454
- param_PSPA_st = {'ref':Data.get_pspa_st_norm(), 'func':multiply} # Johnson et al. Nature official
455
- param_PSPA_y = {'ref':Data.get_pspa_tyr_norm(), 'func':multiply}
456
- param_PSPA = {'ref':Data.get_pspa_all_norm(), 'func':multiply}
454
+ param_PSPA_st = {'ref':Data.get_pspa_st_norm().astype('float32'), 'func':multiply} # Johnson et al. Nature official
455
+ param_PSPA_y = {'ref':Data.get_pspa_tyr_norm().astype('float32'), 'func':multiply}
456
+ param_PSPA = {'ref':Data.get_pspa_all_norm().astype('float32'), 'func':multiply}
457
457
 
458
458
 
459
459
  # Kinase-substrate dataset, CDDM
460
- param_CDDM = {'ref':Data.get_cddm(), 'func':sumup}
461
- param_CDDM_upper = {'ref':Data.get_cddm_upper(), 'func':sumup, 'to_upper':True} # specific for all uppercase
460
+ param_CDDM = {'ref':Data.get_cddm().astype('float32'), 'func':sumup}
461
+ param_CDDM_upper = {'ref':Data.get_cddm_upper().astype('float32'), 'func':sumup, 'to_upper':True} # specific for all uppercase
462
462
 
463
- # %% ../nbs/00_core.ipynb 45
463
+ # %% ../nbs/00_core.ipynb 46
464
464
  def predict_kinase_df(df, seq_col, ref, func, to_lower=False, to_upper=False):
465
+
465
466
  print('input dataframe has a length', df.shape[0])
466
467
  print('Preprocessing')
467
468
 
@@ -493,12 +494,20 @@ def predict_kinase_df(df, seq_col, ref, func, to_lower=False, to_upper=False):
493
494
  df['keys'] = df['site_seq'].apply(get_dict)
494
495
  input_keys_df = df[['keys']].explode('keys').reset_index()
495
496
  input_keys_df.columns = ['input_index', 'key']
497
+
498
+
496
499
  ref_T = ref.T
497
500
 
498
- merged_df = input_keys_df.merge(ref_T, left_on='key', right_index=True, how='inner')
501
+ input_keys_df = input_keys_df.set_index('key')
502
+
503
+
504
+ print('Merging reference')
505
+ merged_df = input_keys_df.merge(ref_T, left_index=True, right_index=True, how='inner')
506
+
507
+ print('Finish merging')
499
508
 
500
509
  if func == sumup:
501
- grouped_df = merged_df.drop(columns=['key']).groupby('input_index').sum()
510
+ grouped_df = merged_df.groupby('input_index').sum()
502
511
  out = grouped_df.reindex(df.index)
503
512
 
504
513
  elif func==multiply:
@@ -514,7 +523,7 @@ def predict_kinase_df(df, seq_col, ref, func, to_lower=False, to_upper=False):
514
523
  kinase_df = kinase_df.rename(columns={kinase: 'value'})
515
524
 
516
525
  # Compute log_value
517
- kinase_df['log_value'] = np.log2(kinase_df['value'],where=kinase_df['value']>0)
526
+ kinase_df['log_value'] = np.log2(kinase_df['value'].where(kinase_df['value'] > 0))
518
527
 
519
528
  # Group by 'input_index' and compute sum and count
520
529
  grouped = kinase_df.dropna().groupby('input_index')
@@ -541,7 +550,7 @@ def predict_kinase_df(df, seq_col, ref, func, to_lower=False, to_upper=False):
541
550
  # Return results as a DataFrame
542
551
  return out
543
552
 
544
- # %% ../nbs/00_core.ipynb 54
553
+ # %% ../nbs/00_core.ipynb 56
545
554
  def get_pct(site,ref,func,pct_ref):
546
555
 
547
556
  "Replicate the precentile results from The Kinase Library."
@@ -566,7 +575,7 @@ def get_pct(site,ref,func,pct_ref):
566
575
  final.columns=['log2(score)','percentile']
567
576
  return final
568
577
 
569
- # %% ../nbs/00_core.ipynb 60
578
+ # %% ../nbs/00_core.ipynb 62
570
579
  def get_pct_df(score_df, # output from predict_kinase_df
571
580
  pct_ref, # a reference df for percentile calculation
572
581
  ):
@@ -591,7 +600,7 @@ def get_pct_df(score_df, # output from predict_kinase_df
591
600
 
592
601
  return percentiles_df
593
602
 
594
- # %% ../nbs/00_core.ipynb 65
603
+ # %% ../nbs/00_core.ipynb 67
595
604
  def get_unique_site(df:pd.DataFrame = None,# dataframe that contains phosphorylation sites
596
605
  seq_col: str='site_seq', # column name of site sequence
597
606
  id_col: str='gene_site' # column name of site id
@@ -607,7 +616,7 @@ def get_unique_site(df:pd.DataFrame = None,# dataframe that contains phosphoryla
607
616
 
608
617
  return unique
609
618
 
610
- # %% ../nbs/00_core.ipynb 68
619
+ # %% ../nbs/00_core.ipynb 70
611
620
  def extract_site_seq(df: pd.DataFrame, # dataframe that contains protein sequence
612
621
  seq_col: str, # column name of protein sequence
613
622
  position_col: str # column name of position 0
@@ -633,7 +642,7 @@ def extract_site_seq(df: pd.DataFrame, # dataframe that contains protein sequenc
633
642
 
634
643
  return np.array(data)
635
644
 
636
- # %% ../nbs/00_core.ipynb 73
645
+ # %% ../nbs/00_core.ipynb 75
637
646
  def get_freq(df_k: pd.DataFrame, # a dataframe for a single kinase that contains phosphorylation sequence splitted by their position
638
647
  aa_order = [i for i in 'PGACSTVILMFYWHKRQNDEsty'], # amino acid to include in the full matrix
639
648
  aa_order_paper = [i for i in 'PGACSTVILMFYWHKRQNDEsty'], # amino acid to include in the partial matrix
@@ -674,7 +683,7 @@ def get_freq(df_k: pd.DataFrame, # a dataframe for a single kinase that contains
674
683
 
675
684
  return paper,full
676
685
 
677
- # %% ../nbs/00_core.ipynb 77
686
+ # %% ../nbs/00_core.ipynb 79
678
687
  def query_gene(df,gene):
679
688
 
680
689
  "Query gene in the phosphoproteomics dataset"
@@ -688,7 +697,7 @@ def query_gene(df,gene):
688
697
 
689
698
  return df_gene
690
699
 
691
- # %% ../nbs/00_core.ipynb 81
700
+ # %% ../nbs/00_core.ipynb 83
692
701
  def get_ttest(df,
693
702
  columns1, # list of column names for group1
694
703
  columns2, # list of column names for group2
@@ -758,7 +767,7 @@ def get_ttest(df,
758
767
 
759
768
  return results
760
769
 
761
- # %% ../nbs/00_core.ipynb 82
770
+ # %% ../nbs/00_core.ipynb 84
762
771
  def get_metaP(p_values):
763
772
 
764
773
  "Use Fisher's method to calculate a combined p value given a list of p values; this function also allows negative p values (negative correlation)"
@@ -770,7 +779,7 @@ def get_metaP(p_values):
770
779
 
771
780
  return score
772
781
 
773
- # %% ../nbs/00_core.ipynb 85
782
+ # %% ../nbs/00_core.ipynb 87
774
783
  def raw2norm(df: pd.DataFrame, # single kinase's df has position as index, and single amino acid as columns
775
784
  PDHK: bool=False, # whether this kinase belongs to PDHK family
776
785
  ):
@@ -793,7 +802,7 @@ def raw2norm(df: pd.DataFrame, # single kinase's df has position as index, and s
793
802
 
794
803
  return df2
795
804
 
796
- # %% ../nbs/00_core.ipynb 87
805
+ # %% ../nbs/00_core.ipynb 89
797
806
  def get_one_kinase(df: pd.DataFrame, #stacked dataframe (paper's raw data)
798
807
  kinase:str, # a specific kinase
799
808
  normalize: bool=False, # normalize according to the paper; special for PDHK1/4
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-katlas
3
- Version: 0.0.9
3
+ Version: 0.1.0
4
4
  Summary: tools for predicting kinome specificities
5
5
  Home-page: https://github.com/sky1ove/python-katlas
6
6
  Author: lily
@@ -5,7 +5,7 @@
5
5
  ### Python library ###
6
6
  repo = python-katlas
7
7
  lib_name = %(repo)s
8
- version = 0.0.9
8
+ version = 0.1.0
9
9
  min_python = 3.7
10
10
  license = apache2
11
11
  black_formatting = False
@@ -1 +0,0 @@
1
- __version__ = "0.0.8"
File without changes
File without changes
File without changes
File without changes
File without changes