celltype-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. celltype_cli-0.1.0.dist-info/METADATA +267 -0
  2. celltype_cli-0.1.0.dist-info/RECORD +89 -0
  3. celltype_cli-0.1.0.dist-info/WHEEL +4 -0
  4. celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
  5. celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. ct/__init__.py +3 -0
  7. ct/agent/__init__.py +0 -0
  8. ct/agent/case_studies.py +426 -0
  9. ct/agent/config.py +523 -0
  10. ct/agent/doctor.py +544 -0
  11. ct/agent/knowledge.py +523 -0
  12. ct/agent/loop.py +99 -0
  13. ct/agent/mcp_server.py +478 -0
  14. ct/agent/orchestrator.py +733 -0
  15. ct/agent/runner.py +656 -0
  16. ct/agent/sandbox.py +481 -0
  17. ct/agent/session.py +145 -0
  18. ct/agent/system_prompt.py +186 -0
  19. ct/agent/trace_store.py +228 -0
  20. ct/agent/trajectory.py +169 -0
  21. ct/agent/types.py +182 -0
  22. ct/agent/workflows.py +462 -0
  23. ct/api/__init__.py +1 -0
  24. ct/api/app.py +211 -0
  25. ct/api/config.py +120 -0
  26. ct/api/engine.py +124 -0
  27. ct/cli.py +1448 -0
  28. ct/data/__init__.py +0 -0
  29. ct/data/compute_providers.json +59 -0
  30. ct/data/cro_database.json +395 -0
  31. ct/data/downloader.py +238 -0
  32. ct/data/loaders.py +252 -0
  33. ct/kb/__init__.py +5 -0
  34. ct/kb/benchmarks.py +147 -0
  35. ct/kb/governance.py +106 -0
  36. ct/kb/ingest.py +415 -0
  37. ct/kb/reasoning.py +129 -0
  38. ct/kb/schema_monitor.py +162 -0
  39. ct/kb/substrate.py +387 -0
  40. ct/models/__init__.py +0 -0
  41. ct/models/llm.py +370 -0
  42. ct/tools/__init__.py +195 -0
  43. ct/tools/_compound_resolver.py +297 -0
  44. ct/tools/biomarker.py +368 -0
  45. ct/tools/cellxgene.py +282 -0
  46. ct/tools/chemistry.py +1371 -0
  47. ct/tools/claude.py +390 -0
  48. ct/tools/clinical.py +1153 -0
  49. ct/tools/clue.py +249 -0
  50. ct/tools/code.py +1069 -0
  51. ct/tools/combination.py +397 -0
  52. ct/tools/compute.py +402 -0
  53. ct/tools/cro.py +413 -0
  54. ct/tools/data_api.py +2114 -0
  55. ct/tools/design.py +295 -0
  56. ct/tools/dna.py +575 -0
  57. ct/tools/experiment.py +604 -0
  58. ct/tools/expression.py +655 -0
  59. ct/tools/files.py +957 -0
  60. ct/tools/genomics.py +1387 -0
  61. ct/tools/http_client.py +146 -0
  62. ct/tools/imaging.py +319 -0
  63. ct/tools/intel.py +223 -0
  64. ct/tools/literature.py +743 -0
  65. ct/tools/network.py +422 -0
  66. ct/tools/notification.py +111 -0
  67. ct/tools/omics.py +3330 -0
  68. ct/tools/ops.py +1230 -0
  69. ct/tools/parity.py +649 -0
  70. ct/tools/pk.py +245 -0
  71. ct/tools/protein.py +678 -0
  72. ct/tools/regulatory.py +643 -0
  73. ct/tools/remote_data.py +179 -0
  74. ct/tools/report.py +181 -0
  75. ct/tools/repurposing.py +376 -0
  76. ct/tools/safety.py +1280 -0
  77. ct/tools/shell.py +178 -0
  78. ct/tools/singlecell.py +533 -0
  79. ct/tools/statistics.py +552 -0
  80. ct/tools/structure.py +882 -0
  81. ct/tools/target.py +901 -0
  82. ct/tools/translational.py +123 -0
  83. ct/tools/viability.py +218 -0
  84. ct/ui/__init__.py +0 -0
  85. ct/ui/markdown.py +31 -0
  86. ct/ui/status.py +258 -0
  87. ct/ui/suggestions.py +567 -0
  88. ct/ui/terminal.py +1456 -0
  89. ct/ui/traces.py +112 -0
@@ -0,0 +1,376 @@
1
+ """Drug repurposing tools: connectivity map queries, signature matching."""
2
+
3
+ import numpy as np
4
+ from ct.tools import registry
5
+ from ct.tools.http_client import request_json
6
+
7
+
8
+ def _to_float(value):
9
+ """Best-effort float conversion for heterogeneous API payloads."""
10
+ try:
11
+ return float(value)
12
+ except (TypeError, ValueError):
13
+ return None
14
+
15
+
16
+ def _extract_l1000fwd_hits(payload: dict | list, mode: str) -> list:
17
+ """Extract mode-specific hits from L1000FWD response payload."""
18
+ if isinstance(payload, list):
19
+ return payload
20
+
21
+ if not isinstance(payload, dict):
22
+ return []
23
+
24
+ if mode == "reverse":
25
+ primary_keys = ("opposite", "reverse", "discordant", "anti")
26
+ else:
27
+ primary_keys = ("similar", "mimic", "concordant")
28
+
29
+ secondary_keys = ("results", "topn", "data")
30
+
31
+ for key in primary_keys:
32
+ hits = payload.get(key)
33
+ if isinstance(hits, list):
34
+ return hits
35
+
36
+ for section in secondary_keys:
37
+ nested = payload.get(section)
38
+ if not isinstance(nested, dict):
39
+ continue
40
+ for key in primary_keys:
41
+ hits = nested.get(key)
42
+ if isinstance(hits, list):
43
+ return hits
44
+
45
+ return []
46
+
47
+
48
+ def _normalize_l1000fwd_hit(hit, rank: int) -> dict:
49
+ """Normalize one L1000FWD hit into a stable ct-friendly shape."""
50
+ if not isinstance(hit, dict):
51
+ return {
52
+ "rank": rank,
53
+ "compound": str(hit),
54
+ "connectivity_score": None,
55
+ "raw": hit,
56
+ }
57
+
58
+ compound = (
59
+ hit.get("pert_iname")
60
+ or hit.get("name")
61
+ or hit.get("drug")
62
+ or hit.get("sig_id")
63
+ or hit.get("id")
64
+ or f"hit_{rank}"
65
+ )
66
+ score = (
67
+ _to_float(hit.get("score"))
68
+ or _to_float(hit.get("combined_score"))
69
+ or _to_float(hit.get("combined_scores"))
70
+ or _to_float(hit.get("tau"))
71
+ or _to_float(hit.get("zscore"))
72
+ or _to_float(hit.get("zscores"))
73
+ )
74
+ p_value = _to_float(hit.get("p_value")) or _to_float(hit.get("pval")) or _to_float(hit.get("pvals"))
75
+ q_value = _to_float(hit.get("q_value")) or _to_float(hit.get("qval")) or _to_float(hit.get("qvals"))
76
+
77
+ normalized = {
78
+ "rank": rank,
79
+ "compound": compound,
80
+ "connectivity_score": score,
81
+ }
82
+ if p_value is not None:
83
+ normalized["p_value"] = p_value
84
+ if q_value is not None:
85
+ normalized["q_value"] = q_value
86
+ normalized["raw"] = hit
87
+ return normalized
88
+
89
+
90
+ def _query_l1000fwd(up_genes: list[str], down_genes: list[str], mode: str, top_n: int) -> tuple[list, str | None]:
91
+ """Query free L1000FWD API and return normalized hits."""
92
+ if not up_genes and not down_genes:
93
+ return [], "Signature is empty after separating up/down genes"
94
+
95
+ search_payload = {
96
+ "up_genes": up_genes,
97
+ "down_genes": down_genes,
98
+ }
99
+ search_data, search_error = request_json(
100
+ "POST",
101
+ "https://maayanlab.cloud/L1000FWD/sig_search",
102
+ json=search_payload,
103
+ timeout=45,
104
+ retries=1,
105
+ )
106
+ if search_error:
107
+ return [], f"L1000FWD sig_search failed: {search_error}"
108
+ if not isinstance(search_data, dict):
109
+ return [], "L1000FWD sig_search returned unexpected payload"
110
+
111
+ result_id = search_data.get("result_id") or search_data.get("id")
112
+ if not result_id:
113
+ return [], "L1000FWD did not return a result_id"
114
+
115
+ result_data, result_error = request_json(
116
+ "GET",
117
+ f"https://maayanlab.cloud/L1000FWD/result/topn/{result_id}",
118
+ timeout=45,
119
+ retries=1,
120
+ )
121
+ if result_error:
122
+ return [], f"L1000FWD topn lookup failed for {result_id}: {result_error}"
123
+
124
+ raw_hits = _extract_l1000fwd_hits(result_data, mode=mode)
125
+ if not raw_hits:
126
+ return [], f"L1000FWD returned no {mode} hits"
127
+
128
+ hits = [_normalize_l1000fwd_hit(hit, rank=i + 1) for i, hit in enumerate(raw_hits[:top_n])]
129
+ return hits, None
130
+
131
+
132
+ @registry.register(
133
+ name="repurposing.cmap_query",
134
+ description="Query for drug repurposing opportunities using L1000 connectivity map signature matching",
135
+ category="repurposing",
136
+ parameters={
137
+ "gene_signature": "Dict of gene:value (expression changes), used as query signature",
138
+ "compound_id": "Compound ID to use as query (pulls L1000 signature; alternative to gene_signature)",
139
+ "mode": "'similar' (same mechanism) or 'reverse' (opposing signature, e.g. disease reversal)",
140
+ "top_n": "Number of top hits to return (default 20)",
141
+ "allow_remote": "If true and local L1000 is unavailable, try free L1000FWD API fallback",
142
+ },
143
+ requires_data=[],
144
+ usage_guide="You want to find drug repurposing opportunities — compounds with similar signatures "
145
+ "(shared mechanism) or reverse signatures (disease-reversing). Provide a gene signature "
146
+ "dict or compound_id. Use mode='reverse' to find compounds that reverse a disease signature. "
147
+ "Works best with local L1000 data; if unavailable, can query the free L1000FWD API.",
148
+ )
149
+ def cmap_query(gene_signature: dict = None, compound_id: str = None,
150
+ mode: str = "similar", top_n: int = 20,
151
+ allow_remote: bool = True, **kwargs) -> dict:
152
+ """Query for drug repurposing via connectivity map signature matching.
153
+
154
+ Strategy:
155
+ 1. If compound_id provided + L1000 loaded: correlate that compound's signature
156
+ with all other compounds in the L1000 matrix.
157
+ 2. If gene_signature provided + L1000 loaded: correlate the query signature
158
+ against all compounds in L1000.
159
+ 3. If L1000 not available: return guidance for external CMap queries.
160
+
161
+ Parameters
162
+ ----------
163
+ gene_signature : dict, optional
164
+ Query signature as {gene_name: expression_value}. Positive values = upregulated,
165
+ negative = downregulated.
166
+ compound_id : str, optional
167
+ Compound ID to pull signature from L1000 data.
168
+ mode : str
169
+ "similar" to find compounds with correlated signatures (shared mechanism).
170
+ "reverse" to find compounds with anti-correlated signatures (disease reversal).
171
+ top_n : int
172
+ Number of top hits to return.
173
+ """
174
+ from scipy import stats as sp_stats
175
+
176
+ if gene_signature is None and compound_id is None:
177
+ return {"error": "Provide either gene_signature (dict) or compound_id", "summary": "Provide either gene_signature (dict) or compound_id"}
178
+ if mode not in ("similar", "reverse"):
179
+ return {"error": f"Unknown mode '{mode}'. Use 'similar' or 'reverse'", "summary": f"Unknown mode '{mode}'. Use 'similar' or 'reverse'"}
180
+ # Try to load L1000 data
181
+ l1000 = None
182
+ try:
183
+ from ct.data.loaders import load_l1000
184
+ l1000 = load_l1000()
185
+ except (FileNotFoundError, ImportError):
186
+ pass
187
+
188
+ # Build query vector
189
+ if compound_id is not None and l1000 is not None:
190
+ if compound_id not in l1000.index:
191
+ return {"error": f"Compound {compound_id} not found in L1000 data", "summary": f"Compound {compound_id} not found in L1000 data"}
192
+ query_series = l1000.loc[compound_id]
193
+ query_genes = set(query_series.index)
194
+ source = f"compound {compound_id} (L1000)"
195
+ elif gene_signature is not None:
196
+ query_genes = set(gene_signature.keys())
197
+ source = f"provided signature ({len(query_genes)} genes)"
198
+ query_series = None # handled below
199
+ else:
200
+ return {"error": "compound_id not found in L1000 and no gene_signature provided", "summary": "compound_id not found in L1000 and no gene_signature provided"}
201
+ # If L1000 available, do full matrix correlation
202
+ results = None
203
+ if l1000 is not None:
204
+ if query_series is not None:
205
+ # compound_id mode: correlate against all other compounds
206
+ query_vec = query_series.values
207
+ compounds = l1000.index.tolist()
208
+ results = []
209
+
210
+ for cpd in compounds:
211
+ if cpd == compound_id:
212
+ continue
213
+ other_vec = l1000.loc[cpd].values
214
+ # Pearson correlation
215
+ valid = ~(np.isnan(query_vec) | np.isnan(other_vec))
216
+ if valid.sum() < 10:
217
+ continue
218
+ r, p = sp_stats.pearsonr(query_vec[valid], other_vec[valid])
219
+ results.append({
220
+ "compound": cpd,
221
+ "correlation": round(float(r), 4),
222
+ "p_value": float(p),
223
+ "n_genes": int(valid.sum()),
224
+ })
225
+ else:
226
+ # gene_signature mode: correlate provided signature against L1000 compounds
227
+ common_genes = sorted(query_genes & set(l1000.columns))
228
+ if len(common_genes) >= 10:
229
+ query_vec = np.array([gene_signature[g] for g in common_genes])
230
+ compounds = l1000.index.tolist()
231
+ results = []
232
+
233
+ for cpd in compounds:
234
+ other_vec = l1000.loc[cpd, common_genes].values.astype(float)
235
+ valid = ~(np.isnan(query_vec) | np.isnan(other_vec))
236
+ if valid.sum() < 10:
237
+ continue
238
+ r, p = sp_stats.pearsonr(query_vec[valid], other_vec[valid])
239
+ results.append({
240
+ "compound": cpd,
241
+ "correlation": round(float(r), 4),
242
+ "p_value": float(p),
243
+ "n_genes": int(valid.sum()),
244
+ })
245
+ # else: results stays None, fall through to external guidance
246
+
247
+ if results is not None:
248
+ if not results:
249
+ return {
250
+ "summary": "No correlations computed -- insufficient overlapping data",
251
+ "source": source,
252
+ "mode": mode,
253
+ "hits": [],
254
+ }
255
+
256
+ # Sort by correlation
257
+ if mode == "similar":
258
+ results.sort(key=lambda x: x["correlation"], reverse=True)
259
+ else: # reverse
260
+ results.sort(key=lambda x: x["correlation"])
261
+
262
+ top_hits = results[:top_n]
263
+
264
+ # Classify hits
265
+ for hit in top_hits:
266
+ r = hit["correlation"]
267
+ if abs(r) > 0.5:
268
+ hit["strength"] = "strong"
269
+ elif abs(r) > 0.3:
270
+ hit["strength"] = "moderate"
271
+ else:
272
+ hit["strength"] = "weak"
273
+
274
+ # Summary
275
+ if mode == "similar":
276
+ desc = "similar mechanism (positively correlated)"
277
+ best_r = top_hits[0]["correlation"] if top_hits else 0
278
+ else:
279
+ desc = "signature-reversing (negatively correlated)"
280
+ best_r = top_hits[0]["correlation"] if top_hits else 0
281
+
282
+ strong = sum(1 for h in top_hits if h["strength"] == "strong")
283
+ moderate = sum(1 for h in top_hits if h["strength"] == "moderate")
284
+
285
+ top3_str = ", ".join(
286
+ f"{h['compound']}(r={h['correlation']:.3f})" for h in top_hits[:3]
287
+ )
288
+
289
+ summary = (
290
+ f"CMap query ({source}, mode={mode}):\n"
291
+ f"Searching for: {desc}\n"
292
+ f"Top {len(top_hits)} hits: {strong} strong, {moderate} moderate\n"
293
+ f"Best matches: {top3_str}\n"
294
+ f"Best correlation: {best_r:.4f}"
295
+ )
296
+
297
+ return {
298
+ "summary": summary,
299
+ "source": source,
300
+ "mode": mode,
301
+ "n_compounds_screened": len(results),
302
+ "hits": top_hits,
303
+ }
304
+
305
+ # L1000 not available -- remote fallback for gene signatures
306
+ if gene_signature is not None:
307
+ # Separate into up/down gene lists for external tools
308
+ up_genes = sorted([g for g, v in gene_signature.items() if v > 0])
309
+ down_genes = sorted([g for g, v in gene_signature.items() if v < 0])
310
+ remote_error = None
311
+
312
+ if allow_remote:
313
+ remote_hits, remote_error = _query_l1000fwd(
314
+ up_genes=up_genes,
315
+ down_genes=down_genes,
316
+ mode=mode,
317
+ top_n=top_n,
318
+ )
319
+ if remote_hits:
320
+ top3_str = ", ".join(
321
+ f"{h['compound']}(score={h['connectivity_score']:.3f})"
322
+ if isinstance(h.get("connectivity_score"), float)
323
+ else h["compound"]
324
+ for h in remote_hits[:3]
325
+ )
326
+ summary = (
327
+ f"Remote CMap query via L1000FWD ({source}, mode={mode}): "
328
+ f"{len(remote_hits)} hit(s) returned. "
329
+ f"Top hits: {top3_str}"
330
+ )
331
+ return {
332
+ "summary": summary,
333
+ "source": source,
334
+ "mode": mode,
335
+ "remote_source": "L1000FWD",
336
+ "local_data_unavailable": True,
337
+ "remote_used": True,
338
+ "hits": remote_hits,
339
+ "up_genes": up_genes[:100],
340
+ "down_genes": down_genes[:100],
341
+ }
342
+
343
+ remote_note = f"Remote fallback attempt failed: {remote_error}\n" if (allow_remote and remote_error) else ""
344
+ summary = (
345
+ f"L1000 data not loaded -- cannot compute correlations locally.\n"
346
+ f"Query signature: {len(up_genes)} up, {len(down_genes)} down genes.\n"
347
+ f"{remote_note}"
348
+ f"For external CMap query, use these gene lists at:\n"
349
+ f" - CLUE (https://clue.io/query)\n"
350
+ f" - SigCom LINCS (https://maayanlab.cloud/sigcom-lincs/)\n"
351
+ f" - L1000FWD (https://maayanlab.cloud/L1000FWD/)\n"
352
+ f"Upload up-genes and down-genes separately."
353
+ )
354
+
355
+ return {
356
+ "summary": summary,
357
+ "source": source,
358
+ "mode": mode,
359
+ "up_genes": up_genes[:100], # cap for readability
360
+ "down_genes": down_genes[:100],
361
+ "data_unavailable": True,
362
+ "remote_used": False,
363
+ "remote_error": remote_error if allow_remote else None,
364
+ "external_resources": [
365
+ {"name": "CLUE", "url": "https://clue.io/query"},
366
+ {"name": "SigCom LINCS", "url": "https://maayanlab.cloud/sigcom-lincs/"},
367
+ {"name": "L1000FWD", "url": "https://maayanlab.cloud/L1000FWD/"},
368
+ ],
369
+ "hits": [],
370
+ }
371
+
372
+ return {
373
+ "summary": "L1000 data not available and no gene_signature provided for external query",
374
+ "error": "Load L1000 data (ct data pull l1000) or provide a gene_signature dict",
375
+ "hits": [],
376
+ }