PyPI - python-katlas - Versions diffs - 0.0.9__tar.gz → 0.1.0__tar.gz - Mend

python-katlas 0.0.9tar.gz → 0.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{python-katlas-0.0.9/python_katlas.egg-info → python-katlas-0.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: python-katlas
-Version: 0.0.9
+Version: 0.1.0
 Summary: tools for predicting kinome specificities
 Home-page: https://github.com/sky1ove/python-katlas
 Author: lily

python-katlas-0.1.0/katlas/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.0.9"

{python-katlas-0.0.9 → python-katlas-0.1.0}/katlas/core.py RENAMED Viewed

@@ -451,17 +451,18 @@ def predict_kinase(input_string: str, # site sequence
 # %% ../nbs/00_core.ipynb 41
 # PSPA
-param_PSPA_st = {'ref':Data.get_pspa_st_norm(), 'func':multiply} # Johnson et al. Nature official
-param_PSPA_y = {'ref':Data.get_pspa_tyr_norm(), 'func':multiply}
-param_PSPA = {'ref':Data.get_pspa_all_norm(), 'func':multiply}
+param_PSPA_st = {'ref':Data.get_pspa_st_norm().astype('float32'), 'func':multiply} # Johnson et al. Nature official
+param_PSPA_y = {'ref':Data.get_pspa_tyr_norm().astype('float32'), 'func':multiply}
+param_PSPA = {'ref':Data.get_pspa_all_norm().astype('float32'), 'func':multiply}
 # Kinase-substrate dataset, CDDM
-param_CDDM = {'ref':Data.get_cddm(), 'func':sumup}
-param_CDDM_upper = {'ref':Data.get_cddm_upper(), 'func':sumup, 'to_upper':True} # specific for all uppercase
+param_CDDM = {'ref':Data.get_cddm().astype('float32'), 'func':sumup}
+param_CDDM_upper = {'ref':Data.get_cddm_upper().astype('float32'), 'func':sumup, 'to_upper':True} # specific for all uppercase
-# %% ../nbs/00_core.ipynb 45
+# %% ../nbs/00_core.ipynb 46
 def predict_kinase_df(df, seq_col, ref, func, to_lower=False, to_upper=False):
     print('input dataframe has a length', df.shape[0])
     print('Preprocessing')
@@ -493,12 +494,20 @@ def predict_kinase_df(df, seq_col, ref, func, to_lower=False, to_upper=False):
     df['keys'] = df['site_seq'].apply(get_dict)
     input_keys_df  = df[['keys']].explode('keys').reset_index()
     input_keys_df.columns = ['input_index', 'key']
     ref_T = ref.T
-    merged_df = input_keys_df.merge(ref_T, left_on='key', right_index=True, how='inner')
+    input_keys_df = input_keys_df.set_index('key')
+    print('Merging reference')
+    merged_df = input_keys_df.merge(ref_T, left_index=True, right_index=True, how='inner')
+    print('Finish merging')
     if func == sumup:
-        grouped_df = merged_df.drop(columns=['key']).groupby('input_index').sum()
+        grouped_df = merged_df.groupby('input_index').sum()
         out = grouped_df.reindex(df.index)
     elif func==multiply:
@@ -514,7 +523,7 @@ def predict_kinase_df(df, seq_col, ref, func, to_lower=False, to_upper=False):
             kinase_df = kinase_df.rename(columns={kinase: 'value'})
             # Compute log_value
-            kinase_df['log_value'] = np.log2(kinase_df['value'],where=kinase_df['value']>0)
+            kinase_df['log_value'] = np.log2(kinase_df['value'].where(kinase_df['value'] > 0))
             # Group by 'input_index' and compute sum and count
             grouped = kinase_df.dropna().groupby('input_index')
@@ -541,7 +550,7 @@ def predict_kinase_df(df, seq_col, ref, func, to_lower=False, to_upper=False):
     # Return results as a DataFrame
     return out
-# %% ../nbs/00_core.ipynb 54
+# %% ../nbs/00_core.ipynb 56
 def get_pct(site,ref,func,pct_ref):
     "Replicate the precentile results from The Kinase Library."
@@ -566,7 +575,7 @@ def get_pct(site,ref,func,pct_ref):
     final.columns=['log2(score)','percentile']
     return final
-# %% ../nbs/00_core.ipynb 60
+# %% ../nbs/00_core.ipynb 62
 def get_pct_df(score_df, # output from predict_kinase_df
                pct_ref, # a reference df for percentile calculation
               ):
@@ -591,7 +600,7 @@ def get_pct_df(score_df, # output from predict_kinase_df
     return percentiles_df
-# %% ../nbs/00_core.ipynb 65
+# %% ../nbs/00_core.ipynb 67
 def get_unique_site(df:pd.DataFrame = None,# dataframe that contains phosphorylation sites
                     seq_col: str='site_seq', # column name of site sequence
                     id_col: str='gene_site' # column name of site id
@@ -607,7 +616,7 @@ def get_unique_site(df:pd.DataFrame = None,# dataframe that contains phosphoryla
     return unique
-# %% ../nbs/00_core.ipynb 68
+# %% ../nbs/00_core.ipynb 70
 def extract_site_seq(df: pd.DataFrame, # dataframe that contains protein sequence
                      seq_col: str, # column name of protein sequence
                      position_col: str # column name of position 0
@@ -633,7 +642,7 @@ def extract_site_seq(df: pd.DataFrame, # dataframe that contains protein sequenc
     return np.array(data)
-# %% ../nbs/00_core.ipynb 73
+# %% ../nbs/00_core.ipynb 75
 def get_freq(df_k: pd.DataFrame, # a dataframe for a single kinase that contains phosphorylation sequence splitted by their position
              aa_order = [i for i in 'PGACSTVILMFYWHKRQNDEsty'], # amino acid to include in the full matrix
              aa_order_paper = [i for i in 'PGACSTVILMFYWHKRQNDEsty'], # amino acid to include in the partial matrix
@@ -674,7 +683,7 @@ def get_freq(df_k: pd.DataFrame, # a dataframe for a single kinase that contains
     return paper,full
-# %% ../nbs/00_core.ipynb 77
+# %% ../nbs/00_core.ipynb 79
 def query_gene(df,gene):
     "Query gene in the phosphoproteomics dataset"
@@ -688,7 +697,7 @@ def query_gene(df,gene):
     return df_gene
-# %% ../nbs/00_core.ipynb 81
+# %% ../nbs/00_core.ipynb 83
 def get_ttest(df,
               columns1, # list of column names for group1
               columns2, # list of column names for group2
@@ -758,7 +767,7 @@ def get_ttest(df,
     return results
-# %% ../nbs/00_core.ipynb 82
+# %% ../nbs/00_core.ipynb 84
 def get_metaP(p_values):
     "Use Fisher's method to calculate a combined p value given a list of p values; this function also allows negative p values (negative correlation)"
@@ -770,7 +779,7 @@ def get_metaP(p_values):
     return score
-# %% ../nbs/00_core.ipynb 85
+# %% ../nbs/00_core.ipynb 87
 def raw2norm(df: pd.DataFrame, # single kinase's df has position as index, and single amino acid as columns
              PDHK: bool=False, # whether this kinase belongs to PDHK family
             ):
@@ -793,7 +802,7 @@ def raw2norm(df: pd.DataFrame, # single kinase's df has position as index, and s
     return df2
-# %% ../nbs/00_core.ipynb 87
+# %% ../nbs/00_core.ipynb 89
 def get_one_kinase(df: pd.DataFrame, #stacked dataframe (paper's raw data)
                    kinase:str, # a specific kinase
                    normalize: bool=False, # normalize according to the paper; special for PDHK1/4

{python-katlas-0.0.9 → python-katlas-0.1.0/python_katlas.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: python-katlas
-Version: 0.0.9
+Version: 0.1.0
 Summary: tools for predicting kinome specificities
 Home-page: https://github.com/sky1ove/python-katlas
 Author: lily

{python-katlas-0.0.9 → python-katlas-0.1.0}/settings.ini RENAMED Viewed

@@ -5,7 +5,7 @@
 ### Python library ###
 repo = python-katlas
 lib_name = %(repo)s
-version = 0.0.9
+version = 0.1.0
 min_python = 3.7
 license = apache2
 black_formatting = False