celltype-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. celltype_cli-0.1.0.dist-info/METADATA +267 -0
  2. celltype_cli-0.1.0.dist-info/RECORD +89 -0
  3. celltype_cli-0.1.0.dist-info/WHEEL +4 -0
  4. celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
  5. celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. ct/__init__.py +3 -0
  7. ct/agent/__init__.py +0 -0
  8. ct/agent/case_studies.py +426 -0
  9. ct/agent/config.py +523 -0
  10. ct/agent/doctor.py +544 -0
  11. ct/agent/knowledge.py +523 -0
  12. ct/agent/loop.py +99 -0
  13. ct/agent/mcp_server.py +478 -0
  14. ct/agent/orchestrator.py +733 -0
  15. ct/agent/runner.py +656 -0
  16. ct/agent/sandbox.py +481 -0
  17. ct/agent/session.py +145 -0
  18. ct/agent/system_prompt.py +186 -0
  19. ct/agent/trace_store.py +228 -0
  20. ct/agent/trajectory.py +169 -0
  21. ct/agent/types.py +182 -0
  22. ct/agent/workflows.py +462 -0
  23. ct/api/__init__.py +1 -0
  24. ct/api/app.py +211 -0
  25. ct/api/config.py +120 -0
  26. ct/api/engine.py +124 -0
  27. ct/cli.py +1448 -0
  28. ct/data/__init__.py +0 -0
  29. ct/data/compute_providers.json +59 -0
  30. ct/data/cro_database.json +395 -0
  31. ct/data/downloader.py +238 -0
  32. ct/data/loaders.py +252 -0
  33. ct/kb/__init__.py +5 -0
  34. ct/kb/benchmarks.py +147 -0
  35. ct/kb/governance.py +106 -0
  36. ct/kb/ingest.py +415 -0
  37. ct/kb/reasoning.py +129 -0
  38. ct/kb/schema_monitor.py +162 -0
  39. ct/kb/substrate.py +387 -0
  40. ct/models/__init__.py +0 -0
  41. ct/models/llm.py +370 -0
  42. ct/tools/__init__.py +195 -0
  43. ct/tools/_compound_resolver.py +297 -0
  44. ct/tools/biomarker.py +368 -0
  45. ct/tools/cellxgene.py +282 -0
  46. ct/tools/chemistry.py +1371 -0
  47. ct/tools/claude.py +390 -0
  48. ct/tools/clinical.py +1153 -0
  49. ct/tools/clue.py +249 -0
  50. ct/tools/code.py +1069 -0
  51. ct/tools/combination.py +397 -0
  52. ct/tools/compute.py +402 -0
  53. ct/tools/cro.py +413 -0
  54. ct/tools/data_api.py +2114 -0
  55. ct/tools/design.py +295 -0
  56. ct/tools/dna.py +575 -0
  57. ct/tools/experiment.py +604 -0
  58. ct/tools/expression.py +655 -0
  59. ct/tools/files.py +957 -0
  60. ct/tools/genomics.py +1387 -0
  61. ct/tools/http_client.py +146 -0
  62. ct/tools/imaging.py +319 -0
  63. ct/tools/intel.py +223 -0
  64. ct/tools/literature.py +743 -0
  65. ct/tools/network.py +422 -0
  66. ct/tools/notification.py +111 -0
  67. ct/tools/omics.py +3330 -0
  68. ct/tools/ops.py +1230 -0
  69. ct/tools/parity.py +649 -0
  70. ct/tools/pk.py +245 -0
  71. ct/tools/protein.py +678 -0
  72. ct/tools/regulatory.py +643 -0
  73. ct/tools/remote_data.py +179 -0
  74. ct/tools/report.py +181 -0
  75. ct/tools/repurposing.py +376 -0
  76. ct/tools/safety.py +1280 -0
  77. ct/tools/shell.py +178 -0
  78. ct/tools/singlecell.py +533 -0
  79. ct/tools/statistics.py +552 -0
  80. ct/tools/structure.py +882 -0
  81. ct/tools/target.py +901 -0
  82. ct/tools/translational.py +123 -0
  83. ct/tools/viability.py +218 -0
  84. ct/ui/__init__.py +0 -0
  85. ct/ui/markdown.py +31 -0
  86. ct/ui/status.py +258 -0
  87. ct/ui/suggestions.py +567 -0
  88. ct/ui/terminal.py +1456 -0
  89. ct/ui/traces.py +112 -0
ct/tools/clue.py ADDED
@@ -0,0 +1,249 @@
1
+ """
2
+ L1000/CMap compound signature and connectivity tools.
3
+
4
+ Uses local L1000 Level 5 compound profiles (19,811 compounds × 978 landmark genes)
5
+ built from the Broad LINCS GSE92742 dataset. Falls back to CLUE API if configured.
6
+ """
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ from functools import lru_cache
11
+ from pathlib import Path
12
+
13
+ from ct.tools import registry
14
+ from ct.agent.config import Config
15
+
16
+
17
+ def _get_clue_key() -> str | None:
18
+ """Get CLUE API key from config/environment (for API-based fallback)."""
19
+ import os
20
+ cfg = Config.load()
21
+ return cfg.get("clue.api_key") or os.environ.get("CLUE_API_KEY")
22
+
23
+
24
+ # ── Local data paths ──
25
+
26
+ _LINCS_DIR = Path("/mnt2/bronze/lincs")
27
+ _PROFILES_PATH = _LINCS_DIR / "l1000_compound_profiles.parquet"
28
+ _METADATA_PATH = _LINCS_DIR / "l1000_pert_metadata.parquet"
29
+
30
+
31
+ @lru_cache(maxsize=1)
32
+ def _load_profiles() -> pd.DataFrame:
33
+ """Load compound profiles (19,811 compounds × 978 landmark genes)."""
34
+ # Check configured path first
35
+ cfg = Config.load()
36
+ path = cfg.get("data.l1000_profiles")
37
+ if path:
38
+ p = Path(path)
39
+ if p.is_file():
40
+ return pd.read_parquet(p)
41
+
42
+ if _PROFILES_PATH.exists():
43
+ return pd.read_parquet(_PROFILES_PATH)
44
+
45
+ # Search in data.base
46
+ base = cfg.get("data.base")
47
+ if base:
48
+ candidate = Path(base) / "lincs" / "l1000_compound_profiles.parquet"
49
+ if candidate.exists():
50
+ return pd.read_parquet(candidate)
51
+
52
+ raise FileNotFoundError(
53
+ "L1000 compound profiles not found. "
54
+ "Expected at: /mnt2/bronze/lincs/l1000_compound_profiles.parquet"
55
+ )
56
+
57
+
58
+ @lru_cache(maxsize=1)
59
+ def _load_pert_metadata() -> pd.DataFrame:
60
+ """Load perturbagen metadata (SMILES, PubChem CID, etc.)."""
61
+ if _METADATA_PATH.exists():
62
+ return pd.read_parquet(_METADATA_PATH)
63
+ return pd.DataFrame()
64
+
65
+
66
+ def _find_compound(name: str, profiles: pd.DataFrame) -> str | None:
67
+ """Find a compound in profiles by case-insensitive name matching."""
68
+ name_lower = name.lower().strip()
69
+ # Build lowercase index for matching
70
+ idx_lower = {c.lower(): c for c in profiles.index}
71
+ if name_lower in idx_lower:
72
+ return idx_lower[name_lower]
73
+ # Try partial match
74
+ for key, original in idx_lower.items():
75
+ if name_lower in key or key in name_lower:
76
+ return original
77
+ return None
78
+
79
+
80
+ @registry.register(
81
+ name="clue.compound_signature",
82
+ description="Get the L1000 transcriptomic signature (up/down-regulated genes) for a compound",
83
+ category="clue",
84
+ parameters={
85
+ "compound": "Compound name (e.g. 'vorinostat', 'lenalidomide', 'bortezomib')",
86
+ "top_n": "Number of top up/down genes to return (default 50)",
87
+ },
88
+ usage_guide=(
89
+ "You need the transcriptomic signature (up/down genes) of a compound from L1000/CMap. "
90
+ "Use to understand a compound's mechanism of action or as input for connectivity queries. "
91
+ "Covers ~19,800 compounds from the Broad LINCS dataset."
92
+ ),
93
+ )
94
+ def compound_signature(compound: str, top_n: int = 50, **kwargs) -> dict:
95
+ """Get the L1000 transcriptomic signature for a compound from local data."""
96
+ if not compound or not isinstance(compound, str):
97
+ return {
98
+ "error": "compound parameter required",
99
+ "summary": "Provide a compound name (e.g. 'lenalidomide').",
100
+ }
101
+
102
+ try:
103
+ profiles = _load_profiles()
104
+ except FileNotFoundError as e:
105
+ return {"error": str(e), "summary": str(e)}
106
+
107
+ # Find compound in profiles
108
+ matched = _find_compound(compound, profiles)
109
+ if matched is None:
110
+ return {
111
+ "error": f"Compound '{compound}' not found in L1000 data ({len(profiles)} compounds available).",
112
+ "summary": f"Compound '{compound}' not found in L1000/CMap database.",
113
+ }
114
+
115
+ # Extract profile
116
+ profile = profiles.loc[matched]
117
+
118
+ # Get top up and down regulated genes
119
+ sorted_genes = profile.sort_values(ascending=False)
120
+ up_genes = [
121
+ {"gene": g, "z_score": round(float(v), 4)}
122
+ for g, v in sorted_genes.head(top_n).items()
123
+ ]
124
+ down_genes = [
125
+ {"gene": g, "z_score": round(float(v), 4)}
126
+ for g, v in sorted_genes.tail(top_n).iloc[::-1].items()
127
+ ]
128
+
129
+ # Get metadata if available
130
+ meta = _load_pert_metadata()
131
+ pert_id = ""
132
+ smiles = ""
133
+ pubchem_cid = ""
134
+ if matched in meta.index:
135
+ row = meta.loc[matched]
136
+ pert_id = str(row.get("pert_id", "")) if pd.notna(row.get("pert_id")) else ""
137
+ smiles = str(row.get("canonical_smiles", "")) if pd.notna(row.get("canonical_smiles")) else ""
138
+ pubchem_cid = str(row.get("pubchem_cid", "")) if pd.notna(row.get("pubchem_cid")) else ""
139
+
140
+ summary = (
141
+ f"L1000 signature for {matched}: "
142
+ f"{top_n} up-regulated genes (top: {up_genes[0]['gene']} z={up_genes[0]['z_score']}), "
143
+ f"{top_n} down-regulated genes (top: {down_genes[0]['gene']} z={down_genes[0]['z_score']})"
144
+ )
145
+
146
+ return {
147
+ "summary": summary,
148
+ "compound": matched,
149
+ "pert_id": pert_id,
150
+ "smiles": smiles,
151
+ "pubchem_cid": pubchem_cid,
152
+ "n_signatures_aggregated": len(profiles),
153
+ "up_genes": up_genes,
154
+ "down_genes": down_genes,
155
+ }
156
+
157
+
158
+ @registry.register(
159
+ name="clue.connectivity_query",
160
+ description="Find compounds with similar or opposing transcriptomic signatures to a gene set",
161
+ category="clue",
162
+ parameters={
163
+ "gene_list": "Dict with 'up' and 'down' keys, each a list of gene symbols",
164
+ "n_results": "Number of top results to return (default 20)",
165
+ },
166
+ usage_guide=(
167
+ "You have a gene signature (up/down-regulated genes) and want to find compounds "
168
+ "with similar or opposing transcriptomic effects. Core CMap analysis. "
169
+ "Use to find drug repurposing candidates or understand mechanism of action."
170
+ ),
171
+ )
172
+ def connectivity_query(gene_list: dict = None, n_results: int = 20, **kwargs) -> dict:
173
+ """Query local L1000 profiles with a gene signature using weighted connectivity scoring."""
174
+ if not gene_list or not isinstance(gene_list, dict):
175
+ return {
176
+ "error": "gene_list must be a dict with 'up' and 'down' keys",
177
+ "summary": "Invalid input: provide gene_list={'up': [...], 'down': [...]}",
178
+ }
179
+
180
+ up_genes = gene_list.get("up", [])
181
+ down_genes = gene_list.get("down", [])
182
+
183
+ if not up_genes and not down_genes:
184
+ return {
185
+ "error": "gene_list must have at least one gene in 'up' or 'down'",
186
+ "summary": "Provide at least one up- or down-regulated gene.",
187
+ }
188
+
189
+ try:
190
+ profiles = _load_profiles()
191
+ except FileNotFoundError as e:
192
+ return {"error": str(e), "summary": str(e)}
193
+
194
+ # Find which query genes are in the profile columns
195
+ available_up = [g for g in up_genes if g in profiles.columns]
196
+ available_down = [g for g in down_genes if g in profiles.columns]
197
+
198
+ if not available_up and not available_down:
199
+ return {
200
+ "error": "None of the query genes found in L1000 landmark genes.",
201
+ "summary": (
202
+ f"0/{len(up_genes)} up genes and 0/{len(down_genes)} down genes "
203
+ "matched L1000 landmark genes (978 genes)."
204
+ ),
205
+ }
206
+
207
+ # Compute connectivity score for each compound:
208
+ # score = mean(z-scores of up genes) - mean(z-scores of down genes)
209
+ # Positive score = compound mimics the signature
210
+ # Negative score = compound opposes the signature
211
+ score = np.zeros(len(profiles))
212
+
213
+ if available_up:
214
+ up_matrix = profiles[available_up].values
215
+ score += np.nanmean(up_matrix, axis=1)
216
+ if available_down:
217
+ down_matrix = profiles[available_down].values
218
+ score -= np.nanmean(down_matrix, axis=1)
219
+
220
+ # Rank compounds
221
+ results_df = pd.DataFrame({
222
+ "compound": profiles.index,
223
+ "connectivity_score": score,
224
+ }).sort_values("connectivity_score", ascending=False)
225
+
226
+ # Top similar (positive scores) and top opposing (negative scores)
227
+ top_similar = results_df.head(n_results).to_dict("records")
228
+ top_opposing = results_df.tail(n_results).iloc[::-1].to_dict("records")
229
+
230
+ for row in top_similar + top_opposing:
231
+ row["connectivity_score"] = round(row["connectivity_score"], 4)
232
+
233
+ top = top_similar[0] if top_similar else {}
234
+ summary = (
235
+ f"Connectivity query: {len(available_up)}/{len(up_genes)} up, "
236
+ f"{len(available_down)}/{len(down_genes)} down genes matched. "
237
+ f"Scored {len(profiles)} compounds. "
238
+ f"Top mimicker: {top.get('compound', 'N/A')} (score={top.get('connectivity_score', 0)}). "
239
+ f"Top opposer: {top_opposing[0]['compound']} (score={top_opposing[0]['connectivity_score']})"
240
+ )
241
+
242
+ return {
243
+ "summary": summary,
244
+ "n_up_matched": len(available_up),
245
+ "n_down_matched": len(available_down),
246
+ "n_compounds_scored": len(profiles),
247
+ "top_similar": top_similar,
248
+ "top_opposing": top_opposing,
249
+ }