geney 1.2.20__py2.py3-none-any.whl → 1.2.22__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/oncosplice.py +1 -1
- {geney-1.2.20.dist-info → geney-1.2.22.dist-info}/METADATA +1 -1
- geney-1.2.22.dist-info/RECORD +19 -0
- geney/Gene.py +0 -258
- geney/analyzers/__init__.py +0 -0
- geney/analyzers/benchmark_clinvar.py +0 -158
- geney/analyzers/characterize_epistasis.py +0 -15
- geney/analyzers/compare_sets.py +0 -91
- geney/analyzers/group_comparison.py +0 -81
- geney/analyzers/survival.py +0 -144
- geney/analyzers/tcga_annotations.py +0 -194
- geney/analyzers/visualize_protein_conservation.py +0 -398
- geney/benchmark_clinvar.py +0 -158
- geney/compare_sets.py +0 -91
- geney/data_parsers/__init__.py +0 -0
- geney/data_parsers/gtex.py +0 -68
- geney/gtex.py +0 -68
- geney/immunotherapy/__init__.py +0 -0
- geney/immunotherapy/netchop.py +0 -78
- geney/mutations/__init__.py +0 -0
- geney/mutations/variant_utils.py +0 -125
- geney/netchop.py +0 -79
- geney/oncosplice/__init__.py +0 -0
- geney/oncosplice_mouse.py +0 -277
- geney/oncosplice_pipeline.py +0 -1588
- geney/performance_utils.py +0 -138
- geney/pipelines/__init__.py +0 -0
- geney/pipelines/dask_utils.py +0 -153
- geney/splicing/__init__.py +0 -2
- geney/splicing/spliceai_utils.py +0 -253
- geney/splicing/splicing_isoform_utils.py +0 -0
- geney/splicing/splicing_utils.py +0 -366
- geney/survival.py +0 -124
- geney/tcga_annotations.py +0 -352
- geney/translation_termination/__init__.py +0 -0
- geney/translation_termination/tts_utils.py +0 -0
- geney-1.2.20.dist-info/RECORD +0 -52
- {geney-1.2.20.dist-info → geney-1.2.22.dist-info}/WHEEL +0 -0
- {geney-1.2.20.dist-info → geney-1.2.22.dist-info}/top_level.txt +0 -0
geney/tcga_annotations.py
DELETED
|
@@ -1,352 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import random
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
class TCGACase:
|
|
6
|
-
def __init__(self, df):
|
|
7
|
-
# Here we get a dataframe of mutations within a gene
|
|
8
|
-
self.df = df
|
|
9
|
-
self.calculate_vaf()
|
|
10
|
-
self.space_variants(spacer_size=50)
|
|
11
|
-
self.case_id = df.case_id.tolist()[0]
|
|
12
|
-
|
|
13
|
-
def space_variants(self, spacer_size=100, group_likelihood_threshold=0):
|
|
14
|
-
df = self.df
|
|
15
|
-
if df.empty:
|
|
16
|
-
df['group'] = 0
|
|
17
|
-
return self
|
|
18
|
-
values = sorted(df.Start_Position.unique().tolist())
|
|
19
|
-
# groups = [list(group) for key, group in groupby(values, key=lambda x: (x - values[values.index(x) - 1] >
|
|
20
|
-
# spacer_size) if values.index(x) > 0 else False)] Initialize variables
|
|
21
|
-
groups = []
|
|
22
|
-
current_group = []
|
|
23
|
-
|
|
24
|
-
# Iterate through the values
|
|
25
|
-
for i in range(len(values)):
|
|
26
|
-
if i == 0:
|
|
27
|
-
current_group.append(values[i])
|
|
28
|
-
else:
|
|
29
|
-
if values[i] - values[i - 1] <= spacer_size:
|
|
30
|
-
current_group.append(values[i])
|
|
31
|
-
else:
|
|
32
|
-
groups.append(current_group)
|
|
33
|
-
current_group = [values[i]]
|
|
34
|
-
|
|
35
|
-
# Append the last group if it's not empty
|
|
36
|
-
if current_group:
|
|
37
|
-
groups.append(current_group)
|
|
38
|
-
|
|
39
|
-
df.loc[:, 'group'] = 0
|
|
40
|
-
for i, g in enumerate(groups):
|
|
41
|
-
df.loc[df.Start_Position.isin(g), 'group'] = i
|
|
42
|
-
self.df = df
|
|
43
|
-
return self
|
|
44
|
-
|
|
45
|
-
def calculate_vaf(self):
|
|
46
|
-
df = self.df
|
|
47
|
-
df = df[df.t_depth > 0]
|
|
48
|
-
df.loc[:, 'vaf'] = df.apply(lambda row: row.t_alt_count / row.t_depth, axis=1)
|
|
49
|
-
self.df = df
|
|
50
|
-
return self
|
|
51
|
-
|
|
52
|
-
def find_overlayed_variants(self):
|
|
53
|
-
df = self.df
|
|
54
|
-
mut_counts = df.mut_id.value_counts()
|
|
55
|
-
mut_counts = mut_counts[mut_counts > 1].index
|
|
56
|
-
|
|
57
|
-
small_df = df.groupby('mut_id', as_index=False).agg({
|
|
58
|
-
't_depth': 'sum',
|
|
59
|
-
't_alt_count': 'sum',
|
|
60
|
-
't_ref_count': 'sum',
|
|
61
|
-
})
|
|
62
|
-
|
|
63
|
-
df = df.drop_duplicates(subset='mut_id', keep='first')
|
|
64
|
-
|
|
65
|
-
small_df = small_df[small_df.t_depth > 0]
|
|
66
|
-
small_df['vaf'] = small_df.t_alt_count / small_df.t_depth
|
|
67
|
-
|
|
68
|
-
small_df = small_df.set_index('mut_id')
|
|
69
|
-
df.set_index('mut_id', inplace=True)
|
|
70
|
-
df.update(small_df)
|
|
71
|
-
df.reset_index(inplace=True)
|
|
72
|
-
self.df = df
|
|
73
|
-
return self
|
|
74
|
-
|
|
75
|
-
def find_epistasis(self, pth=3, rth=0):
|
|
76
|
-
df = self.df
|
|
77
|
-
if df.empty:
|
|
78
|
-
return None
|
|
79
|
-
# df = df[df.t_alt_count > rth].sort_values('Start_Position', ascending=True)
|
|
80
|
-
df = df[(df.t_alt_count > df.t_ref_count / pth) & (df.t_alt_count >= rth)].sort_values('Start_Position',
|
|
81
|
-
ascending=True)
|
|
82
|
-
|
|
83
|
-
# display(df[['mut_id', 't_alt_count', 't_ref_count']])
|
|
84
|
-
|
|
85
|
-
# Group by the group_key
|
|
86
|
-
grouped = df.groupby('group').agg({
|
|
87
|
-
'mut_id': lambda x: '|'.join(x),
|
|
88
|
-
't_alt_count': 'mean',
|
|
89
|
-
't_ref_count': 'mean',
|
|
90
|
-
'case_id': 'first'
|
|
91
|
-
}).reset_index(drop=True)
|
|
92
|
-
|
|
93
|
-
# Drop the group_key column
|
|
94
|
-
return grouped[grouped.mut_id.str.contains('\|')][['mut_id', 't_alt_count', 't_ref_count', 'case_id']]
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
class TCGAGene:
|
|
98
|
-
def __init__(self, gene, cancer_path=Path('/tamir2/cancer_proj/gdc_db/data/filtered_feb_2021/AllGenes/'),
|
|
99
|
-
valid_cases=None, extra_cols=[], exclude_filters=None, include_filter=None):
|
|
100
|
-
df = pd.read_csv(cancer_path / gene / 'GeneMutTble.txt',
|
|
101
|
-
usecols=['Variant_Type', 'FILTER', 'vcf_tumor_gt', 'vcf_normal_gt',
|
|
102
|
-
'COSMIC', 't_depth', 't_ref_count', 't_alt_count', 'Proj_name',
|
|
103
|
-
'HGVSc', 'Chromosome', 'Start_Position', 'Reference_Allele',
|
|
104
|
-
'Tumor_Seq_Allele2', 'case_id', 'Gene_name', 'Variant_Type'] + extra_cols,
|
|
105
|
-
low_memory=False).sort_values('Start_Position', ascending=True)
|
|
106
|
-
|
|
107
|
-
if df.empty:
|
|
108
|
-
self.df = df
|
|
109
|
-
|
|
110
|
-
else:
|
|
111
|
-
df = df[df.Variant_Type.isin(['SNP', 'INS', 'DEL'])]
|
|
112
|
-
|
|
113
|
-
if include_filter is not None:
|
|
114
|
-
df = df[df.FILTER == include_filter]
|
|
115
|
-
|
|
116
|
-
elif exclude_filters is not None:
|
|
117
|
-
for exclude_filter in exclude_filters:
|
|
118
|
-
df = df[~df.FILTER.str.contains(exclude_filter)]
|
|
119
|
-
|
|
120
|
-
if valid_cases is not None:
|
|
121
|
-
df = df[df.case_id.isin(valid_cases)]
|
|
122
|
-
|
|
123
|
-
df['mut_id'] = df.apply(lambda
|
|
124
|
-
row: f"{row.Gene_name}:{row.Chromosome.replace('chr', '')}:{row.Start_Position}:{row.Reference_Allele}:{row.Tumor_Seq_Allele2}",
|
|
125
|
-
axis=1)
|
|
126
|
-
|
|
127
|
-
df['ratio'] = df.t_alt_count + df.t_ref_count
|
|
128
|
-
df = df[df.ratio > 0]
|
|
129
|
-
df['ratio'] = df.t_alt_count / df.ratio
|
|
130
|
-
self.df = df
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
@property
|
|
134
|
-
def affected_cases(self):
|
|
135
|
-
return self.df.case_id.unique().tolist()
|
|
136
|
-
|
|
137
|
-
def get_patient_muts(self, case_id=None):
|
|
138
|
-
if case_id is None:
|
|
139
|
-
case_id = random.choice(self.affected_cases)
|
|
140
|
-
return self.df[self.df.case_id == case_id]
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
class TCGAMut:
|
|
144
|
-
def __init__(self, mut_id):
|
|
145
|
-
data = []
|
|
146
|
-
for mut in mut_id.split('|'):
|
|
147
|
-
data.append(mut.split(':'))
|
|
148
|
-
data = pd.DataFrame(data, columns=['Gene_name', 'Chromosome', 'Start_Position', 'Reference_Allele',
|
|
149
|
-
'Tumor_Seq_Allele2'])
|
|
150
|
-
data.Chromosome = data.apply(lambda row: f'chr{row.Chromosome}', axis=1)
|
|
151
|
-
data = data.astype({'Start_Position': int})
|
|
152
|
-
self.gene = data.Gene_name.unique().tolist()[0]
|
|
153
|
-
self.df = data
|
|
154
|
-
|
|
155
|
-
def find_affected_patients(self):
|
|
156
|
-
gene = TCGAGene(self.gene)
|
|
157
|
-
return pd.merge(self.df, gene.df,
|
|
158
|
-
on=['Gene_name', 'Chromosome', 'Start_Position', 'Reference_Allele', 'Tumor_Seq_Allele2'])
|
|
159
|
-
|
|
160
|
-
# CLINICAL_DATA_FILE = Path('/tamir2/nicolaslynn/data/TCGA/cancer_reports/new_df_p_proc.pkl')
|
|
161
|
-
# CLINICAL_DATA_FILE = Path('/tamir2/yoramzar/Projects/Cancer_mut/Explore_data/reports/df_p_all.pkl')
|
|
162
|
-
# CANCER_DATA_PATH = Path('/tamir2/cancer_proj/gdc_db/data/filtered_feb_2021/AllGenes')
|
|
163
|
-
# MAF_FILE_NAME = 'GeneMutTble.txt'
|
|
164
|
-
# CASE_TRACKER = pd.read_csv('/tamir2/nicolaslynn/projects/TCGAParsed/case2proj.csv', index_col=0)
|
|
165
|
-
# PROJ_COUNTS = CASE_TRACKER.proj.value_counts()
|
|
166
|
-
# OKGP_DATA_FILE = Path('/tamir2/nicolaslynn/projects/1000GenomesProjMutations/parsed_1kgp_mutations_in_target_genes.csv')
|
|
167
|
-
# MUTATION_FREQ_DF = pd.read_csv(OKGP_DATA_FILE, index_col=0)
|
|
168
|
-
# PROTEIN_ANNOTATIONS = pd.read_csv('/tamir2/nicolaslynn/data/BioMart/protein_annotations.csv').rename(columns={'Interpro start': 'start', 'Interpro end': 'end', 'Interpro Short Description': 'name'})[['Gene stable ID', 'Transcript stable ID', 'start', 'end', 'name']]
|
|
169
|
-
# PROTEIN_ANNOTATIONS['length'] = PROTEIN_ANNOTATIONS.apply(lambda row: abs(row.start - row.end), axis=1)
|
|
170
|
-
|
|
171
|
-
# def prepare_gene_sets():
|
|
172
|
-
# # gene_annotations_file = Path('/tamir2/nicolaslynn/data/COSMIC/cancer_gene_roles.csv')
|
|
173
|
-
# # GENE_DF = pd.read_csv(gene_annotations_file, index_col=0)
|
|
174
|
-
# # all_oncogenes = GENE_DF[GENE_DF.OG==True].index.tolist()
|
|
175
|
-
# # all_oncogenes = list(set(all_oncogenes))
|
|
176
|
-
# return [], [], []
|
|
177
|
-
#
|
|
178
|
-
# CLIN_DF = prepare_clinical_data()
|
|
179
|
-
# TSGS, ONCOGENES, CANCER_GENES = prepare_gene_sets()
|
|
180
|
-
#
|
|
181
|
-
#
|
|
182
|
-
# def generate_survival_quantitative(affected_df, nonaffected_df):
|
|
183
|
-
# if affected_df.empty or nonaffected_df.empty:
|
|
184
|
-
# return np.nan, np.nan, np.nan
|
|
185
|
-
# results = logrank_test(affected_df['duration'], nonaffected_df['duration'],
|
|
186
|
-
# event_observed_A=affected_df['event'],
|
|
187
|
-
# event_observed_B=nonaffected_df['event'])
|
|
188
|
-
# p_value = results.p_value
|
|
189
|
-
# kmf = KaplanMeierFitter()
|
|
190
|
-
# kmf.fit(affected_df['duration'], affected_df['event'], label=f'With Epistasis ({len(affected_df)})')
|
|
191
|
-
# times, surv_probs = kmf.survival_function_.index.values, kmf.survival_function_.values.flatten()
|
|
192
|
-
# auc1 = np.trapz(surv_probs, times)
|
|
193
|
-
# kmf.fit(nonaffected_df['duration'], nonaffected_df['event'], label=f'Without Epistasis ({len(nonaffected_df)})')
|
|
194
|
-
# times, surv_probs = kmf.survival_function_.index.values, kmf.survival_function_.values.flatten()
|
|
195
|
-
# auc2 = np.trapz(surv_probs, times)
|
|
196
|
-
# return p_value, auc1, auc2
|
|
197
|
-
#
|
|
198
|
-
# def generate_survival_pvalue(affected_df, unaffected_df):
|
|
199
|
-
# results = logrank_test(affected_df['duration'], unaffected_df['duration'],
|
|
200
|
-
# event_observed_A=affected_df['event'],
|
|
201
|
-
# event_observed_B=unaffected_df['event'])
|
|
202
|
-
#
|
|
203
|
-
# p_value = results.p_value
|
|
204
|
-
# kmf = KaplanMeierFitter()
|
|
205
|
-
# # Fit data
|
|
206
|
-
# kmf.fit(affected_df['duration'], affected_df['event'], label=f'Without Epistasis ({len(affected_df)})')
|
|
207
|
-
# ax = kmf.plot()
|
|
208
|
-
#
|
|
209
|
-
# kmf.fit(unaffected_df['duration'], unaffected_df['event'], label=f'With Epistasis ({len(unaffected_df)})')
|
|
210
|
-
# kmf.plot(ax=ax)
|
|
211
|
-
# plt.text(5, 0.95, f'pval: {p_value:.3e}')
|
|
212
|
-
# plt.show()
|
|
213
|
-
# return p_value
|
|
214
|
-
#
|
|
215
|
-
# def get_project_prevalence(cases_affected):
|
|
216
|
-
# ca = [c for c in cases_affected if c in CASE_TRACKER.index]
|
|
217
|
-
# prevalences = CASE_TRACKER.loc[ca].proj.value_counts() / PROJ_COUNTS
|
|
218
|
-
# prevalences.fillna(0, inplace=True)
|
|
219
|
-
# prevalences = prevalences[[i for i in prevalences.index if 'TCGA' in i]]
|
|
220
|
-
# prevalences.index = [s.replace('TCGA', 'prev') for s in prevalences.index]
|
|
221
|
-
# return prevalences
|
|
222
|
-
#
|
|
223
|
-
# def get_project_counts(cases_affected):
|
|
224
|
-
# ca = [c for c in cases_affected if c in CASE_TRACKER.index]
|
|
225
|
-
# prevalences = CASE_TRACKER.loc[ca].proj.value_counts()
|
|
226
|
-
# prevalences = prevalences[[i for i in prevalences.index if 'TCGA' in i]]
|
|
227
|
-
# prevalences.index = [s.replace('TCGA_', '') for s in prevalences.index]
|
|
228
|
-
# return prevalences
|
|
229
|
-
#
|
|
230
|
-
# def get_event_consequence(df):
|
|
231
|
-
# assert df.Transcript_ID.nunique() == 1, 'Too many transcripts to return a single consequenc.'
|
|
232
|
-
# return df.iloc[0].Consequence
|
|
233
|
-
#
|
|
234
|
-
# def get_dbSNP_id(df):
|
|
235
|
-
# return df.iloc[0].dbSNP_RS
|
|
236
|
-
#
|
|
237
|
-
# def load_variant_file(gene):
|
|
238
|
-
# df = pd.read_csv(CANCER_DATA_PATH / gene / MAF_FILE_NAME, low_memory=False)
|
|
239
|
-
# df['mut_id'] = df.apply(lambda row: f"{row.Gene_name}:{row.Chromosome.replace('chr', '')}:{row.Start_Position}:{row.Reference_Allele}:{row.Tumor_Seq_Allele2}", axis=1)
|
|
240
|
-
# return df
|
|
241
|
-
#
|
|
242
|
-
# def find_event_data(event):
|
|
243
|
-
# df = load_variant_file(event.gene)
|
|
244
|
-
# if df.empty:
|
|
245
|
-
# return None
|
|
246
|
-
#
|
|
247
|
-
# df = df.query \
|
|
248
|
-
# ('Chromosome == @event.chromosome & Start_Position == @event.start & Reference_Allele == @event.ref & Tumor_Seq_Allele2 == @event.alt')
|
|
249
|
-
#
|
|
250
|
-
# if df.empty:
|
|
251
|
-
# return None
|
|
252
|
-
#
|
|
253
|
-
# if event.transcript_id is not None:
|
|
254
|
-
# df = df[df.Transcript_ID == event.transcript_id]
|
|
255
|
-
# df['mut_id'] = event.event_id
|
|
256
|
-
# return df
|
|
257
|
-
#
|
|
258
|
-
#
|
|
259
|
-
# class GEvent:
|
|
260
|
-
# def __init__(self, event_id, transcript_id=None):
|
|
261
|
-
# self.gene, self.chromosome, self.start, self.ref, self.alt = event_id.split(':')
|
|
262
|
-
# self.transcript_id = transcript_id
|
|
263
|
-
# self.chromosome = f'chr{self.chromosome}'
|
|
264
|
-
# self.start = int(self.start)
|
|
265
|
-
# self.event_id = event_id
|
|
266
|
-
#
|
|
267
|
-
#
|
|
268
|
-
#
|
|
269
|
-
# def get_okgp_mutation_frequency(mut_id):
|
|
270
|
-
# if mut_id in MUTATION_FREQ_DF.index:
|
|
271
|
-
# return MUTATION_FREQ_DF.loc[mut_id].cases_affected
|
|
272
|
-
# else:
|
|
273
|
-
# return 0
|
|
274
|
-
#
|
|
275
|
-
# def get_df_filter_info(df):
|
|
276
|
-
# filter_artifact_values: list = ["oxog", "bPcr", "bSeq"]
|
|
277
|
-
# MuTect2_filters: list = ['Germline risk', 't_lod_fstar', 'alt_allele_in_normal', 'panel_of_normals', 'clustered_events',
|
|
278
|
-
# 'str_contraction', 'multi_event_alt_allele_in_normal', 'homologous_mapping_event', 'triallelic_site']
|
|
279
|
-
# filter_col_name: str = "FILTER_info" # column name to add to the dataframe
|
|
280
|
-
# filter_info_list: list = []
|
|
281
|
-
# f_cnr_info = {}
|
|
282
|
-
#
|
|
283
|
-
# for j, (prj, df_prj) in enumerate(df.groupby('Proj_name')):
|
|
284
|
-
# filter_vals = list(df_prj['FILTER'])
|
|
285
|
-
# num_pass, num_artifacts, num_mutect2_filters = 0, 0, 0
|
|
286
|
-
# for filter_val in filter_vals:
|
|
287
|
-
# num_pass += ('PASS' in filter_val)
|
|
288
|
-
# num_artifacts += any([x in filter_val for x in filter_artifact_values])
|
|
289
|
-
# num_mutect2_filters += any([x in filter_val for x in MuTect2_filters])
|
|
290
|
-
# num_rest = max(0, (len(filter_vals) - num_pass - num_artifacts - num_mutect2_filters))
|
|
291
|
-
# f_cnr_info[str(prj)[5:]] = (num_pass, num_mutect2_filters, num_artifacts, num_rest)
|
|
292
|
-
# return f_cnr_info
|
|
293
|
-
#
|
|
294
|
-
# def yoram_mutid(row):
|
|
295
|
-
# return f'{row.Gene_name}:{row.Chromosome}:{row.Consequence}:{row.Start_Position}:{row.Reference_Allele}:{row.Tumor_Seq_Allele2}'
|
|
296
|
-
#
|
|
297
|
-
#
|
|
298
|
-
# def annotate_level_two(mut_id, tid):
|
|
299
|
-
# mut = GEvent(mut_id, tid)
|
|
300
|
-
# df = find_event_data(mut)
|
|
301
|
-
#
|
|
302
|
-
# if df.empty or df is None:
|
|
303
|
-
# return None
|
|
304
|
-
#
|
|
305
|
-
# patients_affected = df.cases_affected.unique().tolist()
|
|
306
|
-
# p_val, auc_a, auc_n = generate_survival_quantitative(CLIN_DF[CLIN_DF.case_id.isin(patients_affected)], CLIN_DF[~CLIN_DF.case_id.isin(patients_affected)])
|
|
307
|
-
# project_prevalences = get_project_prevalence(patients_affected)
|
|
308
|
-
# prev_dict = project_prevalences.to_dict().sort()
|
|
309
|
-
# project_counts = get_project_counts(patients_affected)
|
|
310
|
-
#
|
|
311
|
-
# s = pd.Series({
|
|
312
|
-
# 'mut_id': mut_id,
|
|
313
|
-
# 'yoram_mut_id': yoram_mutid(df.iloc[0]),
|
|
314
|
-
# 'transcript_id': tid,
|
|
315
|
-
# 'affected_cases': len(patients_affected),
|
|
316
|
-
# 'dbSNP_id': get_dbSNP_id(df),
|
|
317
|
-
# 'consequence': get_event_consequence(df),
|
|
318
|
-
# 'survival_p_value': p_val,
|
|
319
|
-
# 'auc_affected': auc_a,
|
|
320
|
-
# 'auc_nonaffected': auc_n,
|
|
321
|
-
# 'TSG': contains(TSGS, mut.gene),
|
|
322
|
-
# 'oncogene': contains(ONCOGENES, mut.gene),
|
|
323
|
-
# 'cases_1kgp': get_okgp_mutation_frequency(mut.event_id),
|
|
324
|
-
# 'filter_inf': get_df_filter_info(df),
|
|
325
|
-
# 'strand': df.Strand.unique().tolist()[0],
|
|
326
|
-
# 'prevalences': prev_dict
|
|
327
|
-
# })
|
|
328
|
-
#
|
|
329
|
-
# s['max_prev'] = project_prevalences.max()
|
|
330
|
-
# s['rel_proj'] = ','.join([c.split('_')[-1] for c in project_prevalences[project_prevalences == project_prevalences.max()].index.tolist()])
|
|
331
|
-
# s = pd.concat([s, project_prevalences, project_counts])
|
|
332
|
-
# del df
|
|
333
|
-
# return s
|
|
334
|
-
#
|
|
335
|
-
# def get_mut_counts():
|
|
336
|
-
# cases = unload_json('/tamir2/nicolaslynn/projects/TCGAParsed/recurring_single_muts_tcga.json')
|
|
337
|
-
# cases = pd.Series(cases)
|
|
338
|
-
# cases.name = 'num_cases'
|
|
339
|
-
# cases.index.name = 'mut_id'
|
|
340
|
-
# cases = cases.to_frame()
|
|
341
|
-
# cases.reset_index(inplace=True)
|
|
342
|
-
# return cases
|
|
343
|
-
#
|
|
344
|
-
#
|
|
345
|
-
# def create_mut_id(row):
|
|
346
|
-
# return f"{row.Gene_name}:{row['Chromosome']}:{row['Start_Position']}:{row['Reference_Allele']}:{row['Tumor_Seq_Allele2']}"
|
|
347
|
-
#
|
|
348
|
-
#
|
|
349
|
-
# def is_in_exon(mut_id, tid):
|
|
350
|
-
# from geney.Gene import Gene
|
|
351
|
-
# transcript = Gene(mut_id.split(':')[0]).generate_transcript(tid)
|
|
352
|
-
# return int(mut_id.split(':')[2]) in transcript.exonic_indices
|
|
File without changes
|
|
File without changes
|
geney-1.2.20.dist-info/RECORD
DELETED
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
geney/Fasta_segment.py,sha256=0zCdzPUbDeM9Rz642woH5Q94pwI46O0fE3H8w0XWebc,11255
|
|
2
|
-
geney/Gene.py,sha256=abHnvZ4ytbjQmoBtrbsX5G0QAspm06NqZBINkjHgPog,10140
|
|
3
|
-
geney/__init__.py,sha256=knezxgbV2c2gcO2ek2-xxEC15HL4aO1WuoMiYOOvKf8,428
|
|
4
|
-
geney/benchmark_clinvar.py,sha256=LLl77e95Qbg9Kd-m2yL8ilmzubSz9SKogeARwssT4Ks,5532
|
|
5
|
-
geney/compare_sets.py,sha256=TcgL57V7BUPxBoW9lv3xr8qK2Acmykn85Ev3avicQr8,2977
|
|
6
|
-
geney/config_setup.py,sha256=VA6mhVGMRadwlpEx4m1wrssmDM8qpfKT21MAijIwjyQ,428
|
|
7
|
-
geney/data_setup.py,sha256=LTiJMYPgv9KnIgUNw-D57Fu4nxL4OojXMpmdhE8QSYU,12228
|
|
8
|
-
geney/gtex.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
9
|
-
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
10
|
-
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
11
|
-
geney/netchop.py,sha256=AMiy9YsdTmX4B3k3Y5Yh7EmoGAojM1O3AzhPKOiB--g,3050
|
|
12
|
-
geney/oncosplice.py,sha256=sp6kfKbFqwpZIuLZadvCq0aj-JUnM_GE99eaGRm19eY,78240
|
|
13
|
-
geney/oncosplice_mouse.py,sha256=LYLOukI9qI1IBkyl1qVRFR5d1NAw7Orlj8Zth-4xCW8,12962
|
|
14
|
-
geney/oncosplice_pipeline.py,sha256=hpGqFHOdn8i8tvvs1-t3-G9Ko18zInwoDXBJbbrfbC4,68036
|
|
15
|
-
geney/performance_utils.py,sha256=FQt7rA4r-Wuq3kceCxsSuMfj3wU1tMG8QnbL59aBohs,4700
|
|
16
|
-
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
17
|
-
geney/survival.py,sha256=gNKZGcwxDZ00ixVBHf3ZdjbY_AHQOCU9kKpBC_dokbM,5572
|
|
18
|
-
geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
|
|
19
|
-
geney/tcga_annotations.py,sha256=DjRl6Pk5VAOL1yhbt8SXD6FZhYbcYNu3FtXYMeveGB0,15016
|
|
20
|
-
geney/tcga_utils.py,sha256=vXSMf1OxoF_AdE_rMguy_BoYaart_E1t4FFMx2DS1Ak,15585
|
|
21
|
-
geney/utils.py,sha256=xJi7fk3g7DkR2rKOb8WePLQNM1ib83rcHecwRdwd5lA,2036
|
|
22
|
-
geney/analyzers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
-
geney/analyzers/benchmark_clinvar.py,sha256=ZAxvZ-Ue5T6au5mGbk8clfvbAYl13NIY7U92KzL0lXI,5531
|
|
24
|
-
geney/analyzers/characterize_epistasis.py,sha256=MvcYQMRwZ-qqlX9mn41vmr0Uxb5dIrrcaE3oiZMTYm8,648
|
|
25
|
-
geney/analyzers/compare_sets.py,sha256=TcgL57V7BUPxBoW9lv3xr8qK2Acmykn85Ev3avicQr8,2977
|
|
26
|
-
geney/analyzers/group_comparison.py,sha256=nK1ZabxQhJabD2p4vBCoFHvvmV58YGVRnXu6CyxNVQM,2671
|
|
27
|
-
geney/analyzers/survival.py,sha256=CPVELUlftyFvk_-w06a1Yw-BkBU1L5FtRW8Q7Qciw-s,6278
|
|
28
|
-
geney/analyzers/tcga_annotations.py,sha256=DscIIsdXl6BtDLKXvZJQnmUJtBXxZRN5iikdAn2tJco,8677
|
|
29
|
-
geney/analyzers/visualize_protein_conservation.py,sha256=tFZ8wQx0IqomqZlGUVGviHUZAeCbVmfpPN9Ovi4H0P0,16510
|
|
30
|
-
geney/data_parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
-
geney/data_parsers/gtex.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
32
|
-
geney/immunotherapy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
|
-
geney/immunotherapy/netchop.py,sha256=vLy-ahEKxU6IzwmnnqefXDJjZOeGIprLWbKU3t-M7sc,2800
|
|
34
|
-
geney/mutations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
geney/mutations/variant_utils.py,sha256=4exIP02lviMmsZTq8UYkjlunLpnBruGM4GLz0C7P0wM,4285
|
|
36
|
-
geney/oncosplice/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
-
geney/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
geney/pipelines/dask_utils.py,sha256=J68bpbikdUGGirPERczu1cf_ajZmEvDfWEj8GIMJvII,5641
|
|
39
|
-
geney/splicing/__init__.py,sha256=0x9Rt0znGnf3Hs92BYRBjdHZHOMsd_27QNTRlfohzLY,60
|
|
40
|
-
geney/splicing/spliceai_utils.py,sha256=-cmSJYMcgdUsRnjeep69y-MoubE-BFdNrwQPZj6h__M,11778
|
|
41
|
-
geney/splicing/splicing_isoform_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
-
geney/splicing/splicing_utils.py,sha256=pr9MbGmnSfKddkpoC1luquP2Trpobpzd7Pgyx2x7ymY,16892
|
|
43
|
-
geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
|
-
geney/translation_initiation/tis_utils.py,sha256=iXrWVijyPe-f8I9rEVGdxNnXBrOGPoKFjmvaOEnQYNE,4446
|
|
45
|
-
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
46
|
-
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
47
|
-
geney/translation_termination/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
|
-
geney/translation_termination/tts_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
-
geney-1.2.20.dist-info/METADATA,sha256=YRkn1hq4ARDlqMiAWPpDmbUny5v7NxjbN4YcRpaq2o8,1163
|
|
50
|
-
geney-1.2.20.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
51
|
-
geney-1.2.20.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
52
|
-
geney-1.2.20.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|