celltype-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- celltype_cli-0.1.0.dist-info/METADATA +267 -0
- celltype_cli-0.1.0.dist-info/RECORD +89 -0
- celltype_cli-0.1.0.dist-info/WHEEL +4 -0
- celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
- celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- ct/__init__.py +3 -0
- ct/agent/__init__.py +0 -0
- ct/agent/case_studies.py +426 -0
- ct/agent/config.py +523 -0
- ct/agent/doctor.py +544 -0
- ct/agent/knowledge.py +523 -0
- ct/agent/loop.py +99 -0
- ct/agent/mcp_server.py +478 -0
- ct/agent/orchestrator.py +733 -0
- ct/agent/runner.py +656 -0
- ct/agent/sandbox.py +481 -0
- ct/agent/session.py +145 -0
- ct/agent/system_prompt.py +186 -0
- ct/agent/trace_store.py +228 -0
- ct/agent/trajectory.py +169 -0
- ct/agent/types.py +182 -0
- ct/agent/workflows.py +462 -0
- ct/api/__init__.py +1 -0
- ct/api/app.py +211 -0
- ct/api/config.py +120 -0
- ct/api/engine.py +124 -0
- ct/cli.py +1448 -0
- ct/data/__init__.py +0 -0
- ct/data/compute_providers.json +59 -0
- ct/data/cro_database.json +395 -0
- ct/data/downloader.py +238 -0
- ct/data/loaders.py +252 -0
- ct/kb/__init__.py +5 -0
- ct/kb/benchmarks.py +147 -0
- ct/kb/governance.py +106 -0
- ct/kb/ingest.py +415 -0
- ct/kb/reasoning.py +129 -0
- ct/kb/schema_monitor.py +162 -0
- ct/kb/substrate.py +387 -0
- ct/models/__init__.py +0 -0
- ct/models/llm.py +370 -0
- ct/tools/__init__.py +195 -0
- ct/tools/_compound_resolver.py +297 -0
- ct/tools/biomarker.py +368 -0
- ct/tools/cellxgene.py +282 -0
- ct/tools/chemistry.py +1371 -0
- ct/tools/claude.py +390 -0
- ct/tools/clinical.py +1153 -0
- ct/tools/clue.py +249 -0
- ct/tools/code.py +1069 -0
- ct/tools/combination.py +397 -0
- ct/tools/compute.py +402 -0
- ct/tools/cro.py +413 -0
- ct/tools/data_api.py +2114 -0
- ct/tools/design.py +295 -0
- ct/tools/dna.py +575 -0
- ct/tools/experiment.py +604 -0
- ct/tools/expression.py +655 -0
- ct/tools/files.py +957 -0
- ct/tools/genomics.py +1387 -0
- ct/tools/http_client.py +146 -0
- ct/tools/imaging.py +319 -0
- ct/tools/intel.py +223 -0
- ct/tools/literature.py +743 -0
- ct/tools/network.py +422 -0
- ct/tools/notification.py +111 -0
- ct/tools/omics.py +3330 -0
- ct/tools/ops.py +1230 -0
- ct/tools/parity.py +649 -0
- ct/tools/pk.py +245 -0
- ct/tools/protein.py +678 -0
- ct/tools/regulatory.py +643 -0
- ct/tools/remote_data.py +179 -0
- ct/tools/report.py +181 -0
- ct/tools/repurposing.py +376 -0
- ct/tools/safety.py +1280 -0
- ct/tools/shell.py +178 -0
- ct/tools/singlecell.py +533 -0
- ct/tools/statistics.py +552 -0
- ct/tools/structure.py +882 -0
- ct/tools/target.py +901 -0
- ct/tools/translational.py +123 -0
- ct/tools/viability.py +218 -0
- ct/ui/__init__.py +0 -0
- ct/ui/markdown.py +31 -0
- ct/ui/status.py +258 -0
- ct/ui/suggestions.py +567 -0
- ct/ui/terminal.py +1456 -0
- ct/ui/traces.py +112 -0
ct/tools/combination.py
ADDED
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Combination therapy tools: synergy prediction, synthetic lethality, metabolic vulnerability.
|
|
3
|
+
|
|
4
|
+
References crews-glue-discovery/scripts/synergy_prediction.py and metabolic_vulnerability.py
|
|
5
|
+
for scoring logic.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import numpy as np
|
|
10
|
+
from ct.tools import registry
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# Metabolic pathway gene sets (from metabolic_vulnerability.py)
|
|
14
|
+
METABOLIC_PATHWAYS = {
|
|
15
|
+
"glycolysis": ["HK1", "HK2", "GPI", "PFKM", "PFKL", "ALDOA", "TPI1",
|
|
16
|
+
"GAPDH", "PGK1", "PGAM1", "ENO1", "ENO2", "PKM", "LDHA", "LDHB"],
|
|
17
|
+
"oxidative_phosphorylation": ["NDUFA1", "NDUFA2", "NDUFB1", "NDUFS1", "SDHA", "SDHB",
|
|
18
|
+
"UQCRC1", "UQCRC2", "COX5A", "COX5B", "ATP5F1A", "ATP5F1B"],
|
|
19
|
+
"fatty_acid_synthesis": ["FASN", "ACACA", "ACLY", "SCD", "ELOVL1", "ELOVL5", "ELOVL6"],
|
|
20
|
+
"fatty_acid_oxidation": ["CPT1A", "CPT1B", "CPT2", "ACADM", "ACADL", "ACADVL",
|
|
21
|
+
"HADHA", "HADHB", "ECHS1"],
|
|
22
|
+
"glutamine_metabolism": ["GLS", "GLS2", "GLUD1", "SLC1A5", "SLC7A5", "GOT1", "GOT2"],
|
|
23
|
+
"one_carbon_metabolism": ["MTHFR", "MTHFD1", "MTHFD2", "SHMT1", "SHMT2", "DHFR",
|
|
24
|
+
"TYMS", "MTR", "MAT2A"],
|
|
25
|
+
"nucleotide_synthesis": ["CAD", "DHODH", "UMPS", "CTPS1", "CTPS2", "IMPDH1", "IMPDH2",
|
|
26
|
+
"PAICS", "ATIC", "GART"],
|
|
27
|
+
"pentose_phosphate": ["G6PD", "PGLS", "PGD", "TKT", "TALDO1", "RPIA", "RPE"],
|
|
28
|
+
"tca_cycle": ["CS", "ACO1", "ACO2", "IDH1", "IDH2", "IDH3A", "OGDH", "SUCLA2",
|
|
29
|
+
"SDHA", "FH", "MDH1", "MDH2"],
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# Known metabolic inhibitors for combination suggestions
|
|
33
|
+
METABOLIC_INHIBITORS = {
|
|
34
|
+
"glycolysis": ["2-DG (2-deoxyglucose)", "3-bromopyruvate", "lonidamine"],
|
|
35
|
+
"oxidative_phosphorylation": ["metformin", "IACS-010759", "oligomycin A"],
|
|
36
|
+
"fatty_acid_synthesis": ["TVB-2640 (denifanstat)", "orlistat", "TOFA"],
|
|
37
|
+
"fatty_acid_oxidation": ["etomoxir", "ranolazine", "perhexiline"],
|
|
38
|
+
"glutamine_metabolism": ["CB-839 (telaglenastat)", "BPTES", "DON"],
|
|
39
|
+
"one_carbon_metabolism": ["methotrexate", "pemetrexed", "AG-270 (MAT2A inhibitor)"],
|
|
40
|
+
"nucleotide_synthesis": ["brequinar (DHODH)", "mycophenolate (IMPDH)", "leflunomide"],
|
|
41
|
+
"pentose_phosphate": ["6-AN (G6PD)", "DHEA (G6PD)", "oxythiamine (TKT)"],
|
|
42
|
+
"tca_cycle": ["ivosidenib (IDH1)", "enasidenib (IDH2)", "CPI-613 (devimistat)"],
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@registry.register(
|
|
47
|
+
name="combination.synergy_predict",
|
|
48
|
+
description="Predict synergistic compound pairs from anti-correlated L1000 transcriptomic signatures",
|
|
49
|
+
category="combination",
|
|
50
|
+
parameters={
|
|
51
|
+
"compound_id": "Query compound (or 'all' for full pairwise)",
|
|
52
|
+
"top_n": "Number of top synergy candidates to return",
|
|
53
|
+
},
|
|
54
|
+
requires_data=["l1000", "prism", "depmap_model"],
|
|
55
|
+
usage_guide="You want to find compounds that work well together — anti-correlated transcriptomic profiles suggest complementary mechanisms. Use for rational combination therapy design.",
|
|
56
|
+
)
|
|
57
|
+
def synergy_predict(compound_id: str = "all", top_n: int = 20, **kwargs) -> dict:
|
|
58
|
+
"""Find synergistic compound pairs based on anti-correlated L1000 signatures.
|
|
59
|
+
|
|
60
|
+
Synergy score = |anti-correlation| x tissue_complementarity x potency_bonus
|
|
61
|
+
"""
|
|
62
|
+
from ct.data.loaders import load_l1000, load_prism, load_model_metadata
|
|
63
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
64
|
+
from ct.tools._compound_resolver import resolve_compound
|
|
65
|
+
|
|
66
|
+
if compound_id != "all":
|
|
67
|
+
compound_id = resolve_compound(compound_id, dataset="l1000")
|
|
68
|
+
|
|
69
|
+
l1000 = load_l1000()
|
|
70
|
+
|
|
71
|
+
# Compute pairwise cosine similarity
|
|
72
|
+
sim_matrix = cosine_similarity(l1000.values)
|
|
73
|
+
compounds = l1000.index.tolist()
|
|
74
|
+
|
|
75
|
+
# Build PRISM tissue profiles for complementarity
|
|
76
|
+
prism = load_prism()
|
|
77
|
+
model = load_model_metadata()
|
|
78
|
+
ccle_to_lineage = {}
|
|
79
|
+
for _, row in model.iterrows():
|
|
80
|
+
ccle = row.get("CCLEName", "")
|
|
81
|
+
lin = row.get("OncotreeLineage", "Unknown")
|
|
82
|
+
if pd.notna(ccle) and pd.notna(lin):
|
|
83
|
+
ccle_to_lineage[ccle] = lin
|
|
84
|
+
|
|
85
|
+
# Compute per-compound tissue profiles (mean LFC per lineage at max dose)
|
|
86
|
+
tissue_profiles = {}
|
|
87
|
+
for cpd in prism["pert_name"].unique():
|
|
88
|
+
cpd_data = prism[prism["pert_name"] == cpd]
|
|
89
|
+
max_dose = cpd_data["pert_dose"].max()
|
|
90
|
+
cpd_hd = cpd_data[cpd_data["pert_dose"] == max_dose].copy()
|
|
91
|
+
cpd_hd["lineage"] = cpd_hd["ccle_name"].map(ccle_to_lineage)
|
|
92
|
+
tissue_mean = cpd_hd.groupby("lineage")["LFC"].mean()
|
|
93
|
+
tissue_profiles[cpd] = tissue_mean
|
|
94
|
+
|
|
95
|
+
# Find anti-correlated pairs
|
|
96
|
+
if compound_id != "all" and compound_id in compounds:
|
|
97
|
+
query_idx = compounds.index(compound_id)
|
|
98
|
+
query_compounds = [compound_id]
|
|
99
|
+
else:
|
|
100
|
+
query_compounds = compounds
|
|
101
|
+
query_idx = None
|
|
102
|
+
|
|
103
|
+
ANTICORR_THRESHOLD = -0.3
|
|
104
|
+
results = []
|
|
105
|
+
|
|
106
|
+
for i, cpd1 in enumerate(query_compounds):
|
|
107
|
+
idx1 = compounds.index(cpd1) if query_idx is None else query_idx
|
|
108
|
+
for j in range(len(compounds)):
|
|
109
|
+
if j <= idx1 and query_idx is None:
|
|
110
|
+
continue
|
|
111
|
+
cpd2 = compounds[j]
|
|
112
|
+
if cpd1 == cpd2:
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
cosine = sim_matrix[idx1, j]
|
|
116
|
+
if cosine >= ANTICORR_THRESHOLD:
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
# Tissue complementarity
|
|
120
|
+
tissue_comp = 0.5 # default
|
|
121
|
+
if cpd1 in tissue_profiles and cpd2 in tissue_profiles:
|
|
122
|
+
t1 = tissue_profiles[cpd1]
|
|
123
|
+
t2 = tissue_profiles[cpd2]
|
|
124
|
+
common = t1.index.intersection(t2.index)
|
|
125
|
+
if len(common) >= 3:
|
|
126
|
+
kills_1 = t1[common] < -0.3
|
|
127
|
+
kills_2 = t2[common] < -0.3
|
|
128
|
+
comp_tissues = (kills_1 & ~kills_2).sum() + (kills_2 & ~kills_1).sum()
|
|
129
|
+
overlap_tissues = (kills_1 & kills_2).sum()
|
|
130
|
+
tissue_comp = comp_tissues / (comp_tissues + overlap_tissues + 0.001)
|
|
131
|
+
|
|
132
|
+
# Synergy score
|
|
133
|
+
anticorr_strength = abs(cosine)
|
|
134
|
+
score = anticorr_strength * (0.4 + 0.6 * tissue_comp)
|
|
135
|
+
|
|
136
|
+
# Potency bonus
|
|
137
|
+
if cpd1 in tissue_profiles and cpd2 in tissue_profiles:
|
|
138
|
+
pot1 = abs(tissue_profiles[cpd1].mean()) if len(tissue_profiles[cpd1]) > 0 else 0
|
|
139
|
+
pot2 = abs(tissue_profiles[cpd2].mean()) if len(tissue_profiles[cpd2]) > 0 else 0
|
|
140
|
+
avg_potency = (pot1 + pot2) / 2.0
|
|
141
|
+
score *= (1.0 + min(avg_potency, 2.0) / 4.0)
|
|
142
|
+
|
|
143
|
+
results.append({
|
|
144
|
+
"compound_1": cpd1,
|
|
145
|
+
"compound_2": cpd2,
|
|
146
|
+
"cosine_similarity": round(float(cosine), 4),
|
|
147
|
+
"anticorrelation_strength": round(float(anticorr_strength), 4),
|
|
148
|
+
"tissue_complementarity": round(float(tissue_comp), 4),
|
|
149
|
+
"synergy_score": round(float(score), 4),
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
if not results:
|
|
153
|
+
return {
|
|
154
|
+
"summary": f"Synergy prediction: 0 anti-correlated pairs (cosine < {ANTICORR_THRESHOLD}). No synergistic candidates found.",
|
|
155
|
+
"n_pairs": 0,
|
|
156
|
+
"top_candidates": [],
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
df = pd.DataFrame(results).sort_values("synergy_score", ascending=False)
|
|
160
|
+
top_hits = df.head(top_n)
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
"summary": (
|
|
164
|
+
f"Synergy prediction: {len(df)} anti-correlated pairs (cosine < {ANTICORR_THRESHOLD})\n"
|
|
165
|
+
f"Top synergy score: {top_hits.iloc[0]['synergy_score']:.4f}" if len(top_hits) > 0 else "No pairs found"
|
|
166
|
+
),
|
|
167
|
+
"n_pairs": len(df),
|
|
168
|
+
"top_candidates": top_hits.to_dict("records"),
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@registry.register(
|
|
173
|
+
name="combination.synthetic_lethality",
|
|
174
|
+
description="Mine DepMap CRISPR data for synthetic lethal gene pairs with a target",
|
|
175
|
+
category="combination",
|
|
176
|
+
parameters={
|
|
177
|
+
"gene": "Target gene to find synthetic lethal partners for",
|
|
178
|
+
"top_n": "Number of top partners to return",
|
|
179
|
+
},
|
|
180
|
+
requires_data=["depmap_crispr"],
|
|
181
|
+
usage_guide="You want to find genes whose loss is lethal only when your target gene is also disrupted. Use for identifying combination targets and understanding genetic dependencies.",
|
|
182
|
+
)
|
|
183
|
+
def synthetic_lethality(gene: str, top_n: int = 20, **kwargs) -> dict:
|
|
184
|
+
"""Find synthetic lethal partners via anti-correlated CRISPR dependencies.
|
|
185
|
+
|
|
186
|
+
Genes with strong negative correlation in DepMap CRISPR effect = when one is
|
|
187
|
+
essential, the other is dispensable -> synthetic lethality.
|
|
188
|
+
"""
|
|
189
|
+
from ct.data.loaders import load_crispr
|
|
190
|
+
from scipy import stats
|
|
191
|
+
|
|
192
|
+
crispr = load_crispr()
|
|
193
|
+
|
|
194
|
+
if gene not in crispr.columns:
|
|
195
|
+
return {"error": f"Gene {gene} not found in DepMap CRISPR data", "summary": f"Gene {gene} not found in DepMap CRISPR data"}
|
|
196
|
+
target_vals = crispr[gene].dropna()
|
|
197
|
+
|
|
198
|
+
# Compute anti-correlations (negative r = synthetic lethal)
|
|
199
|
+
results = []
|
|
200
|
+
for other_gene in crispr.columns:
|
|
201
|
+
if other_gene == gene:
|
|
202
|
+
continue
|
|
203
|
+
other_vals = crispr[other_gene].dropna()
|
|
204
|
+
common = target_vals.index.intersection(other_vals.index)
|
|
205
|
+
if len(common) < 50:
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
r, p = stats.pearsonr(target_vals[common], other_vals[common])
|
|
209
|
+
if r < -0.1: # only anti-correlated
|
|
210
|
+
results.append({
|
|
211
|
+
"gene": other_gene,
|
|
212
|
+
"correlation": round(float(r), 4),
|
|
213
|
+
"p_value": float(p),
|
|
214
|
+
"n_cell_lines": len(common),
|
|
215
|
+
})
|
|
216
|
+
|
|
217
|
+
if not results:
|
|
218
|
+
return {
|
|
219
|
+
"summary": f"Synthetic lethality screen for {gene}: no anti-correlated genes found",
|
|
220
|
+
"target_gene": gene,
|
|
221
|
+
"n_candidates": 0,
|
|
222
|
+
"top_partners": [],
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
df = pd.DataFrame(results).sort_values("correlation")
|
|
226
|
+
|
|
227
|
+
# Classify synthetic lethal strength
|
|
228
|
+
for i, row in df.iterrows():
|
|
229
|
+
if row["correlation"] < -0.3:
|
|
230
|
+
df.at[i, "strength"] = "strong"
|
|
231
|
+
elif row["correlation"] < -0.2:
|
|
232
|
+
df.at[i, "strength"] = "moderate"
|
|
233
|
+
else:
|
|
234
|
+
df.at[i, "strength"] = "weak"
|
|
235
|
+
|
|
236
|
+
top_sl = df.head(top_n)
|
|
237
|
+
|
|
238
|
+
return {
|
|
239
|
+
"summary": (
|
|
240
|
+
f"Synthetic lethality screen for {gene}: {len(df)} anti-correlated genes\n"
|
|
241
|
+
f"Strong (r < -0.3): {(df['correlation'] < -0.3).sum()}, "
|
|
242
|
+
f"Moderate (r < -0.2): {((df['correlation'] >= -0.3) & (df['correlation'] < -0.2)).sum()}"
|
|
243
|
+
),
|
|
244
|
+
"target_gene": gene,
|
|
245
|
+
"n_candidates": len(df),
|
|
246
|
+
"top_partners": top_sl.to_dict("records"),
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@registry.register(
|
|
251
|
+
name="combination.metabolic_vulnerability",
|
|
252
|
+
description="Map metabolic vulnerabilities: compounds that suppress metabolic pathways where dependent cells are more sensitive",
|
|
253
|
+
category="combination",
|
|
254
|
+
parameters={
|
|
255
|
+
"compound_id": "Compound to profile (or 'all')",
|
|
256
|
+
"pathway": "Specific metabolic pathway (or 'all')",
|
|
257
|
+
},
|
|
258
|
+
requires_data=["l1000", "depmap_crispr", "prism", "depmap_model"],
|
|
259
|
+
usage_guide="You want to exploit metabolic dependencies — find pathways a compound suppresses where dependent cells are more sensitive. Use to identify metabolic inhibitor combinations (e.g., add metformin to exploit OxPhos dependency).",
|
|
260
|
+
)
|
|
261
|
+
def metabolic_vulnerability(compound_id: str = "all", pathway: str = "all", **kwargs) -> dict:
|
|
262
|
+
"""Identify exploitable metabolic vulnerabilities (the vulnerability triangle).
|
|
263
|
+
|
|
264
|
+
Triangle: compound suppresses pathway (L1000) + pathway-dependent cells more
|
|
265
|
+
sensitive (PRISM) = exploitable vulnerability.
|
|
266
|
+
"""
|
|
267
|
+
from ct.data.loaders import load_l1000, load_crispr, load_prism, load_model_metadata
|
|
268
|
+
from ct.tools._compound_resolver import resolve_compound
|
|
269
|
+
from scipy import stats
|
|
270
|
+
|
|
271
|
+
if compound_id != "all":
|
|
272
|
+
compound_id = resolve_compound(compound_id, dataset="l1000")
|
|
273
|
+
|
|
274
|
+
l1000 = load_l1000()
|
|
275
|
+
crispr = load_crispr()
|
|
276
|
+
prism = load_prism()
|
|
277
|
+
model = load_model_metadata()
|
|
278
|
+
|
|
279
|
+
# Step 1: Score L1000 metabolic pathways
|
|
280
|
+
l1000_genes = set(l1000.columns)
|
|
281
|
+
pathway_scores = {}
|
|
282
|
+
pathways_to_test = {pathway: METABOLIC_PATHWAYS[pathway]} if pathway != "all" else METABOLIC_PATHWAYS
|
|
283
|
+
|
|
284
|
+
for pw_name, genes in pathways_to_test.items():
|
|
285
|
+
found = [g for g in genes if g in l1000_genes]
|
|
286
|
+
if len(found) < 2:
|
|
287
|
+
continue
|
|
288
|
+
sub = l1000[found]
|
|
289
|
+
zscored = (sub - sub.mean()) / sub.std()
|
|
290
|
+
pathway_scores[pw_name] = zscored.mean(axis=1)
|
|
291
|
+
|
|
292
|
+
if not pathway_scores:
|
|
293
|
+
return {"error": "No metabolic pathways have sufficient gene coverage in L1000", "summary": "No metabolic pathways have sufficient gene coverage in L1000"}
|
|
294
|
+
pw_score_df = pd.DataFrame(pathway_scores)
|
|
295
|
+
|
|
296
|
+
# Step 2: DepMap metabolic dependency
|
|
297
|
+
crispr_genes = set(crispr.columns)
|
|
298
|
+
dep_binary = {}
|
|
299
|
+
for pw_name, genes in pathways_to_test.items():
|
|
300
|
+
found = [g for g in genes if g in crispr_genes]
|
|
301
|
+
if not found:
|
|
302
|
+
continue
|
|
303
|
+
dep_binary[pw_name] = (crispr[found].min(axis=1) < -0.5)
|
|
304
|
+
|
|
305
|
+
# Step 3: Map PRISM to DepMap and test vulnerability triangle
|
|
306
|
+
ccle_to_model_id = {}
|
|
307
|
+
for _, row in model.iterrows():
|
|
308
|
+
ccle = row.get("CCLEName", "")
|
|
309
|
+
mid = row.get("ModelID", "")
|
|
310
|
+
if pd.notna(ccle) and pd.notna(mid):
|
|
311
|
+
ccle_to_model_id[ccle] = mid
|
|
312
|
+
|
|
313
|
+
prism_10 = prism[prism["pert_dose"] == prism["pert_dose"].max()]
|
|
314
|
+
prism_wide = prism_10.pivot_table(index="ccle_name", columns="pert_name", values="LFC", aggfunc="mean")
|
|
315
|
+
prism_wide["ModelID"] = prism_wide.index.map(ccle_to_model_id)
|
|
316
|
+
prism_mapped = prism_wide.dropna(subset=["ModelID"])
|
|
317
|
+
|
|
318
|
+
overlap = set(crispr.index) & set(prism_mapped["ModelID"])
|
|
319
|
+
if len(overlap) < 20:
|
|
320
|
+
return {"error": f"Insufficient overlap: only {len(overlap)} cell lines in both PRISM and DepMap", "summary": f"Insufficient overlap: only {len(overlap)} cell lines in both PRISM and DepMap"}
|
|
321
|
+
overlap_list = sorted(overlap)
|
|
322
|
+
compounds_to_test = [compound_id] if compound_id != "all" else [
|
|
323
|
+
c for c in pw_score_df.index if c in prism_wide.columns
|
|
324
|
+
]
|
|
325
|
+
|
|
326
|
+
vulnerabilities = []
|
|
327
|
+
for pw_name in dep_binary:
|
|
328
|
+
if pw_name not in pw_score_df.columns:
|
|
329
|
+
continue
|
|
330
|
+
dep_mask = dep_binary[pw_name].reindex(overlap_list).fillna(False)
|
|
331
|
+
n_dep = dep_mask.sum()
|
|
332
|
+
n_indep = (~dep_mask).sum()
|
|
333
|
+
if n_dep < 5 or n_indep < 5:
|
|
334
|
+
continue
|
|
335
|
+
|
|
336
|
+
for cpd in compounds_to_test:
|
|
337
|
+
if cpd not in pw_score_df.index or cpd not in prism_wide.columns:
|
|
338
|
+
continue
|
|
339
|
+
|
|
340
|
+
l1000_z = pw_score_df.loc[cpd, pw_name]
|
|
341
|
+
if abs(l1000_z) < 1.0:
|
|
342
|
+
continue
|
|
343
|
+
|
|
344
|
+
# Get PRISM LFC for this compound in overlapping cell lines
|
|
345
|
+
prism_by_model = prism_mapped.set_index("ModelID")
|
|
346
|
+
if cpd not in prism_by_model.columns:
|
|
347
|
+
continue
|
|
348
|
+
lfc_vals = prism_by_model.loc[overlap_list, cpd].values
|
|
349
|
+
valid = ~np.isnan(lfc_vals)
|
|
350
|
+
|
|
351
|
+
dep_lfc = lfc_vals[valid & dep_mask.values[:len(valid)]]
|
|
352
|
+
indep_lfc = lfc_vals[valid & ~dep_mask.values[:len(valid)]]
|
|
353
|
+
|
|
354
|
+
if len(dep_lfc) < 5 or len(indep_lfc) < 5:
|
|
355
|
+
continue
|
|
356
|
+
|
|
357
|
+
t_stat, p_val = stats.ttest_ind(dep_lfc, indep_lfc, equal_var=False)
|
|
358
|
+
delta_lfc = float(np.mean(dep_lfc) - np.mean(indep_lfc))
|
|
359
|
+
|
|
360
|
+
# Classify
|
|
361
|
+
if l1000_z < -1.0 and delta_lfc < 0:
|
|
362
|
+
vuln_type = "EXPLOIT"
|
|
363
|
+
elif l1000_z > 1.0 and delta_lfc < 0:
|
|
364
|
+
vuln_type = "ACTIVATION_SENSITIZES"
|
|
365
|
+
elif l1000_z < -1.0 and delta_lfc > 0:
|
|
366
|
+
vuln_type = "PARADOXICAL_RESISTANCE"
|
|
367
|
+
else:
|
|
368
|
+
vuln_type = "WEAK_SIGNAL"
|
|
369
|
+
|
|
370
|
+
# Combination suggestion
|
|
371
|
+
combo_drugs = METABOLIC_INHIBITORS.get(pw_name, [])
|
|
372
|
+
|
|
373
|
+
vulnerabilities.append({
|
|
374
|
+
"compound": cpd,
|
|
375
|
+
"pathway": pw_name,
|
|
376
|
+
"l1000_zscore": round(float(l1000_z), 3),
|
|
377
|
+
"delta_lfc": round(delta_lfc, 3),
|
|
378
|
+
"p_value": round(float(p_val), 4),
|
|
379
|
+
"n_dependent": int(dep_mask.sum()),
|
|
380
|
+
"n_independent": int((~dep_mask).sum()),
|
|
381
|
+
"vulnerability_type": vuln_type,
|
|
382
|
+
"suggested_combinations": combo_drugs[:3] if vuln_type == "EXPLOIT" else [],
|
|
383
|
+
})
|
|
384
|
+
|
|
385
|
+
df = pd.DataFrame(vulnerabilities)
|
|
386
|
+
exploits = df[df["vulnerability_type"] == "EXPLOIT"] if len(df) > 0 else df
|
|
387
|
+
|
|
388
|
+
return {
|
|
389
|
+
"summary": (
|
|
390
|
+
f"Metabolic vulnerability analysis: {len(df)} compound-pathway pairs tested\n"
|
|
391
|
+
f"Exploitable vulnerabilities: {len(exploits)}\n"
|
|
392
|
+
f"Pathways screened: {', '.join(pathways_to_test.keys())}"
|
|
393
|
+
),
|
|
394
|
+
"n_total": len(df),
|
|
395
|
+
"n_exploitable": len(exploits),
|
|
396
|
+
"vulnerabilities": df.to_dict("records") if len(df) < 200 else exploits.to_dict("records"),
|
|
397
|
+
}
|