python-katlas 0.0.9__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python-katlas-0.0.9/python_katlas.egg-info → python-katlas-0.1.0}/PKG-INFO +1 -1
- python-katlas-0.1.0/katlas/__init__.py +1 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/core.py +28 -19
- {python-katlas-0.0.9 → python-katlas-0.1.0/python_katlas.egg-info}/PKG-INFO +1 -1
- {python-katlas-0.0.9 → python-katlas-0.1.0}/settings.ini +1 -1
- python-katlas-0.0.9/katlas/__init__.py +0 -1
- {python-katlas-0.0.9 → python-katlas-0.1.0}/LICENSE +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/MANIFEST.in +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/README.md +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/_modidx.py +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/dl.py +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/feature.py +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/imports.py +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/plot.py +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/train.py +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/python_katlas.egg-info/SOURCES.txt +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/python_katlas.egg-info/dependency_links.txt +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/python_katlas.egg-info/entry_points.txt +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/python_katlas.egg-info/not-zip-safe +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/python_katlas.egg-info/requires.txt +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/python_katlas.egg-info/top_level.txt +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/setup.cfg +0 -0
- {python-katlas-0.0.9 → python-katlas-0.1.0}/setup.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.9"
|
|
@@ -451,17 +451,18 @@ def predict_kinase(input_string: str, # site sequence
|
|
|
451
451
|
|
|
452
452
|
# %% ../nbs/00_core.ipynb 41
|
|
453
453
|
# PSPA
|
|
454
|
-
param_PSPA_st = {'ref':Data.get_pspa_st_norm(), 'func':multiply} # Johnson et al. Nature official
|
|
455
|
-
param_PSPA_y = {'ref':Data.get_pspa_tyr_norm(), 'func':multiply}
|
|
456
|
-
param_PSPA = {'ref':Data.get_pspa_all_norm(), 'func':multiply}
|
|
454
|
+
param_PSPA_st = {'ref':Data.get_pspa_st_norm().astype('float32'), 'func':multiply} # Johnson et al. Nature official
|
|
455
|
+
param_PSPA_y = {'ref':Data.get_pspa_tyr_norm().astype('float32'), 'func':multiply}
|
|
456
|
+
param_PSPA = {'ref':Data.get_pspa_all_norm().astype('float32'), 'func':multiply}
|
|
457
457
|
|
|
458
458
|
|
|
459
459
|
# Kinase-substrate dataset, CDDM
|
|
460
|
-
param_CDDM = {'ref':Data.get_cddm(), 'func':sumup}
|
|
461
|
-
param_CDDM_upper = {'ref':Data.get_cddm_upper(), 'func':sumup, 'to_upper':True} # specific for all uppercase
|
|
460
|
+
param_CDDM = {'ref':Data.get_cddm().astype('float32'), 'func':sumup}
|
|
461
|
+
param_CDDM_upper = {'ref':Data.get_cddm_upper().astype('float32'), 'func':sumup, 'to_upper':True} # specific for all uppercase
|
|
462
462
|
|
|
463
|
-
# %% ../nbs/00_core.ipynb
|
|
463
|
+
# %% ../nbs/00_core.ipynb 46
|
|
464
464
|
def predict_kinase_df(df, seq_col, ref, func, to_lower=False, to_upper=False):
|
|
465
|
+
|
|
465
466
|
print('input dataframe has a length', df.shape[0])
|
|
466
467
|
print('Preprocessing')
|
|
467
468
|
|
|
@@ -493,12 +494,20 @@ def predict_kinase_df(df, seq_col, ref, func, to_lower=False, to_upper=False):
|
|
|
493
494
|
df['keys'] = df['site_seq'].apply(get_dict)
|
|
494
495
|
input_keys_df = df[['keys']].explode('keys').reset_index()
|
|
495
496
|
input_keys_df.columns = ['input_index', 'key']
|
|
497
|
+
|
|
498
|
+
|
|
496
499
|
ref_T = ref.T
|
|
497
500
|
|
|
498
|
-
|
|
501
|
+
input_keys_df = input_keys_df.set_index('key')
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
print('Merging reference')
|
|
505
|
+
merged_df = input_keys_df.merge(ref_T, left_index=True, right_index=True, how='inner')
|
|
506
|
+
|
|
507
|
+
print('Finish merging')
|
|
499
508
|
|
|
500
509
|
if func == sumup:
|
|
501
|
-
grouped_df = merged_df.
|
|
510
|
+
grouped_df = merged_df.groupby('input_index').sum()
|
|
502
511
|
out = grouped_df.reindex(df.index)
|
|
503
512
|
|
|
504
513
|
elif func==multiply:
|
|
@@ -514,7 +523,7 @@ def predict_kinase_df(df, seq_col, ref, func, to_lower=False, to_upper=False):
|
|
|
514
523
|
kinase_df = kinase_df.rename(columns={kinase: 'value'})
|
|
515
524
|
|
|
516
525
|
# Compute log_value
|
|
517
|
-
kinase_df['log_value'] = np.log2(kinase_df['value']
|
|
526
|
+
kinase_df['log_value'] = np.log2(kinase_df['value'].where(kinase_df['value'] > 0))
|
|
518
527
|
|
|
519
528
|
# Group by 'input_index' and compute sum and count
|
|
520
529
|
grouped = kinase_df.dropna().groupby('input_index')
|
|
@@ -541,7 +550,7 @@ def predict_kinase_df(df, seq_col, ref, func, to_lower=False, to_upper=False):
|
|
|
541
550
|
# Return results as a DataFrame
|
|
542
551
|
return out
|
|
543
552
|
|
|
544
|
-
# %% ../nbs/00_core.ipynb
|
|
553
|
+
# %% ../nbs/00_core.ipynb 56
|
|
545
554
|
def get_pct(site,ref,func,pct_ref):
|
|
546
555
|
|
|
547
556
|
"Replicate the precentile results from The Kinase Library."
|
|
@@ -566,7 +575,7 @@ def get_pct(site,ref,func,pct_ref):
|
|
|
566
575
|
final.columns=['log2(score)','percentile']
|
|
567
576
|
return final
|
|
568
577
|
|
|
569
|
-
# %% ../nbs/00_core.ipynb
|
|
578
|
+
# %% ../nbs/00_core.ipynb 62
|
|
570
579
|
def get_pct_df(score_df, # output from predict_kinase_df
|
|
571
580
|
pct_ref, # a reference df for percentile calculation
|
|
572
581
|
):
|
|
@@ -591,7 +600,7 @@ def get_pct_df(score_df, # output from predict_kinase_df
|
|
|
591
600
|
|
|
592
601
|
return percentiles_df
|
|
593
602
|
|
|
594
|
-
# %% ../nbs/00_core.ipynb
|
|
603
|
+
# %% ../nbs/00_core.ipynb 67
|
|
595
604
|
def get_unique_site(df:pd.DataFrame = None,# dataframe that contains phosphorylation sites
|
|
596
605
|
seq_col: str='site_seq', # column name of site sequence
|
|
597
606
|
id_col: str='gene_site' # column name of site id
|
|
@@ -607,7 +616,7 @@ def get_unique_site(df:pd.DataFrame = None,# dataframe that contains phosphoryla
|
|
|
607
616
|
|
|
608
617
|
return unique
|
|
609
618
|
|
|
610
|
-
# %% ../nbs/00_core.ipynb
|
|
619
|
+
# %% ../nbs/00_core.ipynb 70
|
|
611
620
|
def extract_site_seq(df: pd.DataFrame, # dataframe that contains protein sequence
|
|
612
621
|
seq_col: str, # column name of protein sequence
|
|
613
622
|
position_col: str # column name of position 0
|
|
@@ -633,7 +642,7 @@ def extract_site_seq(df: pd.DataFrame, # dataframe that contains protein sequenc
|
|
|
633
642
|
|
|
634
643
|
return np.array(data)
|
|
635
644
|
|
|
636
|
-
# %% ../nbs/00_core.ipynb
|
|
645
|
+
# %% ../nbs/00_core.ipynb 75
|
|
637
646
|
def get_freq(df_k: pd.DataFrame, # a dataframe for a single kinase that contains phosphorylation sequence splitted by their position
|
|
638
647
|
aa_order = [i for i in 'PGACSTVILMFYWHKRQNDEsty'], # amino acid to include in the full matrix
|
|
639
648
|
aa_order_paper = [i for i in 'PGACSTVILMFYWHKRQNDEsty'], # amino acid to include in the partial matrix
|
|
@@ -674,7 +683,7 @@ def get_freq(df_k: pd.DataFrame, # a dataframe for a single kinase that contains
|
|
|
674
683
|
|
|
675
684
|
return paper,full
|
|
676
685
|
|
|
677
|
-
# %% ../nbs/00_core.ipynb
|
|
686
|
+
# %% ../nbs/00_core.ipynb 79
|
|
678
687
|
def query_gene(df,gene):
|
|
679
688
|
|
|
680
689
|
"Query gene in the phosphoproteomics dataset"
|
|
@@ -688,7 +697,7 @@ def query_gene(df,gene):
|
|
|
688
697
|
|
|
689
698
|
return df_gene
|
|
690
699
|
|
|
691
|
-
# %% ../nbs/00_core.ipynb
|
|
700
|
+
# %% ../nbs/00_core.ipynb 83
|
|
692
701
|
def get_ttest(df,
|
|
693
702
|
columns1, # list of column names for group1
|
|
694
703
|
columns2, # list of column names for group2
|
|
@@ -758,7 +767,7 @@ def get_ttest(df,
|
|
|
758
767
|
|
|
759
768
|
return results
|
|
760
769
|
|
|
761
|
-
# %% ../nbs/00_core.ipynb
|
|
770
|
+
# %% ../nbs/00_core.ipynb 84
|
|
762
771
|
def get_metaP(p_values):
|
|
763
772
|
|
|
764
773
|
"Use Fisher's method to calculate a combined p value given a list of p values; this function also allows negative p values (negative correlation)"
|
|
@@ -770,7 +779,7 @@ def get_metaP(p_values):
|
|
|
770
779
|
|
|
771
780
|
return score
|
|
772
781
|
|
|
773
|
-
# %% ../nbs/00_core.ipynb
|
|
782
|
+
# %% ../nbs/00_core.ipynb 87
|
|
774
783
|
def raw2norm(df: pd.DataFrame, # single kinase's df has position as index, and single amino acid as columns
|
|
775
784
|
PDHK: bool=False, # whether this kinase belongs to PDHK family
|
|
776
785
|
):
|
|
@@ -793,7 +802,7 @@ def raw2norm(df: pd.DataFrame, # single kinase's df has position as index, and s
|
|
|
793
802
|
|
|
794
803
|
return df2
|
|
795
804
|
|
|
796
|
-
# %% ../nbs/00_core.ipynb
|
|
805
|
+
# %% ../nbs/00_core.ipynb 89
|
|
797
806
|
def get_one_kinase(df: pd.DataFrame, #stacked dataframe (paper's raw data)
|
|
798
807
|
kinase:str, # a specific kinase
|
|
799
808
|
normalize: bool=False, # normalize according to the paper; special for PDHK1/4
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.8"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|