celltype-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. celltype_cli-0.1.0.dist-info/METADATA +267 -0
  2. celltype_cli-0.1.0.dist-info/RECORD +89 -0
  3. celltype_cli-0.1.0.dist-info/WHEEL +4 -0
  4. celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
  5. celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. ct/__init__.py +3 -0
  7. ct/agent/__init__.py +0 -0
  8. ct/agent/case_studies.py +426 -0
  9. ct/agent/config.py +523 -0
  10. ct/agent/doctor.py +544 -0
  11. ct/agent/knowledge.py +523 -0
  12. ct/agent/loop.py +99 -0
  13. ct/agent/mcp_server.py +478 -0
  14. ct/agent/orchestrator.py +733 -0
  15. ct/agent/runner.py +656 -0
  16. ct/agent/sandbox.py +481 -0
  17. ct/agent/session.py +145 -0
  18. ct/agent/system_prompt.py +186 -0
  19. ct/agent/trace_store.py +228 -0
  20. ct/agent/trajectory.py +169 -0
  21. ct/agent/types.py +182 -0
  22. ct/agent/workflows.py +462 -0
  23. ct/api/__init__.py +1 -0
  24. ct/api/app.py +211 -0
  25. ct/api/config.py +120 -0
  26. ct/api/engine.py +124 -0
  27. ct/cli.py +1448 -0
  28. ct/data/__init__.py +0 -0
  29. ct/data/compute_providers.json +59 -0
  30. ct/data/cro_database.json +395 -0
  31. ct/data/downloader.py +238 -0
  32. ct/data/loaders.py +252 -0
  33. ct/kb/__init__.py +5 -0
  34. ct/kb/benchmarks.py +147 -0
  35. ct/kb/governance.py +106 -0
  36. ct/kb/ingest.py +415 -0
  37. ct/kb/reasoning.py +129 -0
  38. ct/kb/schema_monitor.py +162 -0
  39. ct/kb/substrate.py +387 -0
  40. ct/models/__init__.py +0 -0
  41. ct/models/llm.py +370 -0
  42. ct/tools/__init__.py +195 -0
  43. ct/tools/_compound_resolver.py +297 -0
  44. ct/tools/biomarker.py +368 -0
  45. ct/tools/cellxgene.py +282 -0
  46. ct/tools/chemistry.py +1371 -0
  47. ct/tools/claude.py +390 -0
  48. ct/tools/clinical.py +1153 -0
  49. ct/tools/clue.py +249 -0
  50. ct/tools/code.py +1069 -0
  51. ct/tools/combination.py +397 -0
  52. ct/tools/compute.py +402 -0
  53. ct/tools/cro.py +413 -0
  54. ct/tools/data_api.py +2114 -0
  55. ct/tools/design.py +295 -0
  56. ct/tools/dna.py +575 -0
  57. ct/tools/experiment.py +604 -0
  58. ct/tools/expression.py +655 -0
  59. ct/tools/files.py +957 -0
  60. ct/tools/genomics.py +1387 -0
  61. ct/tools/http_client.py +146 -0
  62. ct/tools/imaging.py +319 -0
  63. ct/tools/intel.py +223 -0
  64. ct/tools/literature.py +743 -0
  65. ct/tools/network.py +422 -0
  66. ct/tools/notification.py +111 -0
  67. ct/tools/omics.py +3330 -0
  68. ct/tools/ops.py +1230 -0
  69. ct/tools/parity.py +649 -0
  70. ct/tools/pk.py +245 -0
  71. ct/tools/protein.py +678 -0
  72. ct/tools/regulatory.py +643 -0
  73. ct/tools/remote_data.py +179 -0
  74. ct/tools/report.py +181 -0
  75. ct/tools/repurposing.py +376 -0
  76. ct/tools/safety.py +1280 -0
  77. ct/tools/shell.py +178 -0
  78. ct/tools/singlecell.py +533 -0
  79. ct/tools/statistics.py +552 -0
  80. ct/tools/structure.py +882 -0
  81. ct/tools/target.py +901 -0
  82. ct/tools/translational.py +123 -0
  83. ct/tools/viability.py +218 -0
  84. ct/ui/__init__.py +0 -0
  85. ct/ui/markdown.py +31 -0
  86. ct/ui/status.py +258 -0
  87. ct/ui/suggestions.py +567 -0
  88. ct/ui/terminal.py +1456 -0
  89. ct/ui/traces.py +112 -0
@@ -0,0 +1,146 @@
1
+ """
2
+ Shared HTTP helpers for ct tools.
3
+
4
+ Provides retry/backoff, normalized errors, and JSON parsing wrappers for
5
+ API-heavy tool modules.
6
+ """
7
+
8
+ import time
9
+
10
+
11
+ _RETRYABLE_STATUS = {429, 500, 502, 503, 504}
12
+
13
+
14
+ def _call_httpx(method: str, url: str, **kwargs):
15
+ import httpx
16
+
17
+ method = method.upper()
18
+ # Avoid passing unsupported kwargs (e.g., json/data to httpx.get).
19
+ cleaned_kwargs = {k: v for k, v in kwargs.items() if v is not None}
20
+ if method == "GET":
21
+ cleaned_kwargs.pop("json", None)
22
+ cleaned_kwargs.pop("data", None)
23
+ return httpx.get(url, **cleaned_kwargs)
24
+ if method == "POST":
25
+ return httpx.post(url, **cleaned_kwargs)
26
+ return httpx.request(method, url, **cleaned_kwargs)
27
+
28
+
29
+ def _format_http_error(response) -> str:
30
+ status = getattr(response, "status_code", "unknown")
31
+ body = (getattr(response, "text", "") or "").strip().replace("\n", " ")
32
+ body = body[:300]
33
+ return f"HTTP {status}" + (f": {body}" if body else "")
34
+
35
+
36
+ def request(
37
+ method: str,
38
+ url: str,
39
+ *,
40
+ params: dict | None = None,
41
+ json: dict | None = None,
42
+ data: dict | None = None,
43
+ headers: dict | None = None,
44
+ timeout: int = 30,
45
+ retries: int = 2,
46
+ backoff_seconds: float = 0.5,
47
+ raise_for_status: bool = True,
48
+ ) -> tuple[object | None, str | None]:
49
+ """Perform HTTP request with retry/backoff.
50
+
51
+ Returns `(response, error)`. Exactly one is non-None.
52
+ """
53
+ try:
54
+ import httpx
55
+ except ImportError:
56
+ return None, "httpx required (pip install httpx)"
57
+
58
+ delay = max(backoff_seconds, 0.0)
59
+ last_error = None
60
+
61
+ for attempt in range(max(retries, 0) + 1):
62
+ try:
63
+ resp = _call_httpx(
64
+ method,
65
+ url,
66
+ params=params,
67
+ json=json,
68
+ data=data,
69
+ headers=headers,
70
+ timeout=timeout,
71
+ )
72
+ except (httpx.TimeoutException, httpx.RequestError) as exc:
73
+ last_error = str(exc)
74
+ if attempt < retries:
75
+ time.sleep(delay)
76
+ delay *= 2
77
+ continue
78
+ return None, last_error
79
+ except Exception as exc:
80
+ return None, str(exc)
81
+
82
+ status = int(getattr(resp, "status_code", 0) or 0)
83
+ if status in _RETRYABLE_STATUS and attempt < retries:
84
+ time.sleep(delay)
85
+ delay *= 2
86
+ continue
87
+
88
+ if raise_for_status:
89
+ try:
90
+ resp.raise_for_status()
91
+ except httpx.HTTPStatusError:
92
+ return None, _format_http_error(resp)
93
+ except Exception as exc:
94
+ return None, str(exc)
95
+
96
+ return resp, None
97
+
98
+ return None, last_error or "Request failed"
99
+
100
+
101
+ def request_json(
102
+ method: str,
103
+ url: str,
104
+ *,
105
+ params: dict | None = None,
106
+ json: dict | None = None,
107
+ data: dict | None = None,
108
+ headers: dict | None = None,
109
+ timeout: int = 30,
110
+ retries: int = 2,
111
+ backoff_seconds: float = 0.5,
112
+ raise_for_status: bool = True,
113
+ ) -> tuple[dict | list | None, str | None]:
114
+ """Perform HTTP request and parse JSON body."""
115
+ resp, error = request(
116
+ method,
117
+ url,
118
+ params=params,
119
+ json=json,
120
+ data=data,
121
+ headers=headers,
122
+ timeout=timeout,
123
+ retries=retries,
124
+ backoff_seconds=backoff_seconds,
125
+ raise_for_status=raise_for_status,
126
+ )
127
+ if error:
128
+ return None, error
129
+
130
+ # Validate Content-Type before parsing — some APIs return HTML on 200
131
+ content_type = ""
132
+ try:
133
+ ct_raw = resp.headers.get("content-type", "")
134
+ if isinstance(ct_raw, str):
135
+ content_type = ct_raw.lower()
136
+ except Exception:
137
+ pass
138
+ if content_type and "json" not in content_type and "javascript" not in content_type:
139
+ status = getattr(resp, "status_code", "unknown")
140
+ return None, f"Expected JSON but got {content_type} (HTTP {status})"
141
+
142
+ try:
143
+ return resp.json(), None
144
+ except Exception:
145
+ status = getattr(resp, "status_code", "unknown")
146
+ return None, f"Invalid JSON response (HTTP {status})"
ct/tools/imaging.py ADDED
@@ -0,0 +1,319 @@
1
+ """
2
+ Imaging tools: compound bioactivity profiling via PubChem and structural similarity.
3
+
4
+ Uses PubChem bioactivity data and RDKit molecular descriptors for mechanism
5
+ classification. Structural fingerprint similarity as a proxy for phenotypic similarity.
6
+ """
7
+
8
+ from ct.tools import registry
9
+ from ct.tools.http_client import request
10
+
11
+
12
+ @registry.register(
13
+ name="imaging.cellpainting_lookup",
14
+ description="Look up compound bioactivity and compute mechanism class via PubChem assays and RDKit descriptors",
15
+ category="imaging",
16
+ parameters={
17
+ "compound": "Compound name, InChIKey, or SMILES string",
18
+ "source": "Data source: 'pubchem' (default). JUMP Cell Painting data requires local parquet files (not yet integrated).",
19
+ },
20
+ usage_guide="You want to understand a compound's bioactivity profile and infer its mechanism class. Queries PubChem bioassay data and computes RDKit molecular descriptors for heuristic mechanism classification. Note: full Cell Painting morphological profiles from JUMP require downloading parquet files from the JUMP Cell Painting Gallery (S3-hosted, no REST API).",
21
+ )
22
+ def cellpainting_lookup(compound: str, source: str = "pubchem", **kwargs) -> dict:
23
+ """Look up compound bioactivity and mechanism class.
24
+
25
+ Queries PubChem for bioassay data and computes RDKit molecular descriptors
26
+ for heuristic mechanism classification. Full JUMP Cell Painting morphological
27
+ profiles are not yet integrated (data is S3-hosted parquet, no REST API).
28
+ """
29
+ compound_info = {"query": compound, "source": source}
30
+
31
+ # Step 1: Try to resolve compound via PubChem for identifiers
32
+ cid = None
33
+ canonical_smiles = None
34
+ inchikey = None
35
+ compound_name = compound
36
+
37
+ # Check if input looks like SMILES (contains special chars)
38
+ is_smiles = any(c in compound for c in "()=#/\\@[]")
39
+ # Check if input looks like InChIKey (14-10-1 pattern)
40
+ is_inchikey = len(compound) == 27 and compound.count("-") == 2
41
+
42
+ if is_smiles:
43
+ resp, error = request(
44
+ "POST",
45
+ "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/property/CID,CanonicalSMILES,InChIKey,IUPACName/JSON",
46
+ data={"smiles": compound},
47
+ timeout=10,
48
+ raise_for_status=False,
49
+ )
50
+ elif is_inchikey:
51
+ resp, error = request(
52
+ "GET",
53
+ f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{compound}/property/CID,CanonicalSMILES,InChIKey,IUPACName/JSON",
54
+ timeout=10,
55
+ raise_for_status=False,
56
+ )
57
+ else:
58
+ import urllib.parse
59
+ encoded = urllib.parse.quote(compound, safe="")
60
+ resp, error = request(
61
+ "GET",
62
+ f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{encoded}/property/CID,CanonicalSMILES,InChIKey,IUPACName/JSON",
63
+ timeout=10,
64
+ raise_for_status=False,
65
+ )
66
+
67
+ if not error and resp.status_code == 200:
68
+ try:
69
+ props = resp.json().get("PropertyTable", {}).get("Properties", [])
70
+ except Exception:
71
+ props = []
72
+ if props:
73
+ cid = props[0].get("CID")
74
+ canonical_smiles = props[0].get("CanonicalSMILES")
75
+ inchikey = props[0].get("InChIKey")
76
+ compound_name = props[0].get("IUPACName", compound)
77
+
78
+ compound_info["cid"] = cid
79
+ compound_info["canonical_smiles"] = canonical_smiles
80
+ compound_info["inchikey"] = inchikey
81
+
82
+ # Step 2: Search PubChem for bioactivity data
83
+ mechanism_cluster = None
84
+ bioactivity_data = []
85
+ if cid:
86
+ bio_resp, bio_error = request(
87
+ "GET",
88
+ f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/assaysummary/JSON",
89
+ timeout=10,
90
+ raise_for_status=False,
91
+ )
92
+ if not bio_error and bio_resp.status_code == 200:
93
+ try:
94
+ assays = bio_resp.json().get("Table", {}).get("Row", [])
95
+ except Exception:
96
+ assays = []
97
+ # Filter for cell-based / imaging assays
98
+ for row in assays[:50]:
99
+ cells = row.get("Cell", [])
100
+ # Each row is a dict with Cell entries
101
+ if isinstance(cells, list) and len(cells) > 5:
102
+ aid = cells[0].get("StringValue", "") if isinstance(cells[0], dict) else str(cells[0])
103
+ activity = cells[3].get("StringValue", "") if len(cells) > 3 and isinstance(cells[3], dict) else ""
104
+ bioactivity_data.append({
105
+ "aid": aid,
106
+ "activity_outcome": activity,
107
+ })
108
+
109
+ # Step 3: Compute molecular descriptors using RDKit if SMILES available
110
+ rdkit_descriptors = None
111
+ if canonical_smiles or is_smiles:
112
+ try:
113
+ from rdkit import Chem
114
+ from rdkit.Chem import Descriptors, rdMolDescriptors
115
+
116
+ smi = canonical_smiles or compound
117
+ mol = Chem.MolFromSmiles(smi)
118
+ if mol is not None:
119
+ rdkit_descriptors = {
120
+ "molecular_weight": round(Descriptors.MolWt(mol), 2),
121
+ "logp": round(Descriptors.MolLogP(mol), 2),
122
+ "tpsa": round(Descriptors.TPSA(mol), 2),
123
+ "hba": Descriptors.NumHAcceptors(mol),
124
+ "hbd": Descriptors.NumHDonors(mol),
125
+ "rotatable_bonds": Descriptors.NumRotatableBonds(mol),
126
+ "aromatic_rings": Descriptors.NumAromaticRings(mol),
127
+ "fsp3": round(rdMolDescriptors.CalcFractionCSP3(mol), 3),
128
+ }
129
+
130
+ # Heuristic mechanism class based on molecular properties
131
+ mw = rdkit_descriptors["molecular_weight"]
132
+ logp = rdkit_descriptors["logp"]
133
+ if mw < 500 and rdkit_descriptors["aromatic_rings"] >= 2:
134
+ mechanism_cluster = "kinase_inhibitor_like"
135
+ elif mw < 600 and logp < 2:
136
+ mechanism_cluster = "protein_degrader_like"
137
+ elif mw > 800:
138
+ mechanism_cluster = "macrocycle_like"
139
+ else:
140
+ mechanism_cluster = "small_molecule"
141
+ except ImportError:
142
+ pass
143
+
144
+ # Build summary
145
+ has_data = bool(bioactivity_data or rdkit_descriptors)
146
+ if has_data:
147
+ cluster_str = f", mechanism cluster: '{mechanism_cluster}'" if mechanism_cluster else ""
148
+ n_assays = len(bioactivity_data)
149
+ assay_str = f", {n_assays} PubChem bioassay(s)" if n_assays > 0 else ""
150
+ summary = (
151
+ f"Compound profile for {compound}: "
152
+ f"CID={cid or 'N/A'}{cluster_str}{assay_str}"
153
+ )
154
+ else:
155
+ summary = (
156
+ f"Compound profile for {compound}: no bioactivity data found in PubChem. "
157
+ f"CID={cid or 'N/A'}"
158
+ )
159
+
160
+ result = {
161
+ "summary": summary,
162
+ "compound_info": compound_info,
163
+ "compound_name": compound_name,
164
+ "mechanism_cluster": mechanism_cluster,
165
+ "bioactivity_assays": bioactivity_data[:20],
166
+ "n_assays": len(bioactivity_data),
167
+ }
168
+
169
+ if rdkit_descriptors:
170
+ result["molecular_descriptors"] = rdkit_descriptors
171
+
172
+ return result
173
+
174
+
175
+ @registry.register(
176
+ name="imaging.morphology_similarity",
177
+ description="Compare two compounds by structural fingerprint similarity (Morgan/MACCS Tanimoto) as a proxy for phenotypic similarity",
178
+ category="imaging",
179
+ parameters={
180
+ "smiles_a": "SMILES string for compound A",
181
+ "smiles_b": "SMILES string for compound B",
182
+ },
183
+ usage_guide="You want to compare two compounds by structural similarity as a proxy for phenotypic similarity. Uses Morgan fingerprints (radius=2, 2048 bits), MACCS keys, and physicochemical property comparison. Structural similarity correlates with morphological similarity for ~60% of compound pairs (Bray et al. 2017). For actual Cell Painting profile comparison, pre-computed profiles from JUMP would be needed.",
184
+ )
185
+ def morphology_similarity(smiles_a: str, smiles_b: str, **kwargs) -> dict:
186
+ """Compare two compounds by morphological similarity.
187
+
188
+ Uses RDKit Morgan fingerprints (radius=2, 2048 bits) as a structural proxy
189
+ for morphological similarity. Structural similarity correlates with morphological
190
+ similarity for ~60% of compound pairs (Bray et al., Nat Biotechnol 2017).
191
+ Also computes MACCS keys similarity and physicochemical property comparison.
192
+ """
193
+ try:
194
+ from rdkit import Chem, DataStructs
195
+ from rdkit.Chem import AllChem, Descriptors, rdMolDescriptors, MACCSkeys
196
+ except ImportError:
197
+ return {
198
+ "error": "RDKit is required for morphology similarity. Install with: pip install rdkit",
199
+ "summary": "RDKit not installed — needed for fingerprint-based similarity",
200
+ }
201
+
202
+ import numpy as np
203
+
204
+ mol_a = Chem.MolFromSmiles(smiles_a)
205
+ mol_b = Chem.MolFromSmiles(smiles_b)
206
+
207
+ if mol_a is None:
208
+ return {"error": f"Invalid SMILES for compound A: {smiles_a}", "summary": f"Could not parse SMILES: {smiles_a}"}
209
+ if mol_b is None:
210
+ return {"error": f"Invalid SMILES for compound B: {smiles_b}", "summary": f"Could not parse SMILES: {smiles_b}"}
211
+
212
+ # Morgan fingerprint similarity (main metric)
213
+ fp_a = AllChem.GetMorganFingerprintAsBitVect(mol_a, 2, nBits=2048)
214
+ fp_b = AllChem.GetMorganFingerprintAsBitVect(mol_b, 2, nBits=2048)
215
+ morgan_sim = DataStructs.TanimotoSimilarity(fp_a, fp_b)
216
+
217
+ # MACCS keys similarity (complementary metric)
218
+ maccs_a = MACCSkeys.GenMACCSKeys(mol_a)
219
+ maccs_b = MACCSkeys.GenMACCSKeys(mol_b)
220
+ maccs_sim = DataStructs.TanimotoSimilarity(maccs_a, maccs_b)
221
+
222
+ # Dice similarity (alternative metric)
223
+ dice_sim = DataStructs.DiceSimilarity(fp_a, fp_b)
224
+
225
+ # Physicochemical property comparison
226
+ def _get_props(mol):
227
+ return {
228
+ "mw": round(Descriptors.MolWt(mol), 2),
229
+ "logp": round(Descriptors.MolLogP(mol), 2),
230
+ "tpsa": round(Descriptors.TPSA(mol), 2),
231
+ "hba": Descriptors.NumHAcceptors(mol),
232
+ "hbd": Descriptors.NumHDonors(mol),
233
+ "rotatable_bonds": Descriptors.NumRotatableBonds(mol),
234
+ "aromatic_rings": Descriptors.NumAromaticRings(mol),
235
+ "fsp3": round(rdMolDescriptors.CalcFractionCSP3(mol), 3),
236
+ }
237
+
238
+ props_a = _get_props(mol_a)
239
+ props_b = _get_props(mol_b)
240
+
241
+ # Compute property similarity (normalized)
242
+ prop_diffs = {}
243
+ shared_features = []
244
+ for key in props_a:
245
+ diff = abs(props_a[key] - props_b[key])
246
+ prop_diffs[key] = round(diff, 3)
247
+
248
+ # Flag shared features
249
+ if key == "mw" and diff < 50:
250
+ shared_features.append("similar molecular weight")
251
+ elif key == "logp" and diff < 1:
252
+ shared_features.append("similar lipophilicity")
253
+ elif key == "tpsa" and diff < 20:
254
+ shared_features.append("similar polarity")
255
+ elif key == "aromatic_rings" and diff == 0:
256
+ shared_features.append(f"same aromatic ring count ({props_a[key]})")
257
+ elif key == "hbd" and diff == 0 and props_a[key] > 0:
258
+ shared_features.append(f"same H-bond donors ({props_a[key]})")
259
+
260
+ # Infer morphological similarity class
261
+ combined_sim = 0.6 * morgan_sim + 0.3 * maccs_sim + 0.1 * dice_sim
262
+ if combined_sim > 0.85:
263
+ sim_class = "highly similar"
264
+ morphology_prediction = "Very likely similar morphological profiles"
265
+ elif combined_sim > 0.6:
266
+ sim_class = "moderately similar"
267
+ morphology_prediction = "Possibly similar morphological effects"
268
+ elif combined_sim > 0.4:
269
+ sim_class = "weakly similar"
270
+ morphology_prediction = "Some shared structural features; morphology may differ"
271
+ else:
272
+ sim_class = "dissimilar"
273
+ morphology_prediction = "Likely different morphological profiles"
274
+
275
+ # Heuristic mechanism class
276
+ def _mechanism_class(props):
277
+ if props["aromatic_rings"] >= 3 and props["hba"] >= 2:
278
+ return "kinase_inhibitor_like"
279
+ elif props["mw"] < 600 and props["logp"] < 2:
280
+ return "polar_small_molecule"
281
+ elif props["mw"] > 800:
282
+ return "macrocycle_like"
283
+ else:
284
+ return "standard_small_molecule"
285
+
286
+ mech_a = _mechanism_class(props_a)
287
+ mech_b = _mechanism_class(props_b)
288
+
289
+ summary = (
290
+ f"Morphological similarity between compounds: {combined_sim:.2f} ({sim_class}). "
291
+ f"Morgan Tanimoto: {morgan_sim:.3f}, MACCS: {maccs_sim:.3f}. "
292
+ f"{morphology_prediction}"
293
+ )
294
+ if shared_features:
295
+ summary += f". Shared: {', '.join(shared_features[:4])}"
296
+
297
+ return {
298
+ "summary": summary,
299
+ "similarity_scores": {
300
+ "morgan_tanimoto": round(morgan_sim, 4),
301
+ "maccs_tanimoto": round(maccs_sim, 4),
302
+ "dice": round(dice_sim, 4),
303
+ "combined": round(combined_sim, 4),
304
+ },
305
+ "similarity_class": sim_class,
306
+ "morphology_prediction": morphology_prediction,
307
+ "shared_features": shared_features,
308
+ "compound_a": {
309
+ "smiles": smiles_a,
310
+ "properties": props_a,
311
+ "mechanism_class": mech_a,
312
+ },
313
+ "compound_b": {
314
+ "smiles": smiles_b,
315
+ "properties": props_b,
316
+ "mechanism_class": mech_b,
317
+ },
318
+ "property_differences": prop_diffs,
319
+ }
ct/tools/intel.py ADDED
@@ -0,0 +1,223 @@
1
+ """
2
+ Competitive and pipeline intelligence tools for pharma R&D.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from datetime import datetime
8
+ from typing import Any
9
+
10
+ from ct.tools import registry
11
+
12
+
13
+ def _to_int(value: Any, default: int = 0) -> int:
14
+ try:
15
+ return int(value)
16
+ except Exception:
17
+ return default
18
+
19
+
20
+ @registry.register(
21
+ name="intel.pipeline_watch",
22
+ description="Track pipeline activity for a target/indication across trials and literature",
23
+ category="intel",
24
+ parameters={
25
+ "query": "Target, drug class, or mechanism to monitor",
26
+ "indication": "Optional disease/indication filter",
27
+ "max_trials": "Maximum trial records to retain (default 20)",
28
+ "max_papers": "Maximum papers per source to retain (default 10)",
29
+ },
30
+ usage_guide=(
31
+ "Use for ongoing landscape monitoring. Aggregates clinical trial momentum and publication "
32
+ "velocity into a concise watchlist snapshot for strategy discussions."
33
+ ),
34
+ )
35
+ def pipeline_watch(
36
+ query: str,
37
+ indication: str = "",
38
+ max_trials: int = 20,
39
+ max_papers: int = 10,
40
+ **kwargs,
41
+ ) -> dict:
42
+ """Create a compact pipeline watch snapshot from public sources."""
43
+ del kwargs
44
+ if not query or not query.strip():
45
+ return {"summary": "query is required.", "error": "missing_query"}
46
+
47
+ from ct.tools.clinical import trial_search
48
+ from ct.tools.literature import openalex_search, pubmed_search
49
+
50
+ max_trials = max(1, min(int(max_trials or 20), 100))
51
+ max_papers = max(1, min(int(max_papers or 10), 50))
52
+
53
+ search_query = f"{query} {indication}".strip()
54
+ trial_result = trial_search(query=search_query)
55
+ pubmed_result = pubmed_search(query=search_query, max_results=max_papers)
56
+ openalex_result = openalex_search(query=search_query, max_results=max_papers)
57
+
58
+ if "error" in trial_result and "error" in pubmed_result and "error" in openalex_result:
59
+ return {
60
+ "summary": (
61
+ f"Pipeline watch failed for '{search_query}': all upstream sources returned errors."
62
+ ),
63
+ "error": "all_sources_failed",
64
+ "sources": {
65
+ "trials_error": trial_result.get("error"),
66
+ "pubmed_error": pubmed_result.get("error"),
67
+ "openalex_error": openalex_result.get("error"),
68
+ },
69
+ }
70
+
71
+ trials = (trial_result.get("trials") or [])[:max_trials]
72
+ phase_dist = trial_result.get("phase_distribution", {}) or {}
73
+ status_dist = trial_result.get("status_distribution", {}) or {}
74
+ recruiting = _to_int(status_dist.get("RECRUITING", 0), 0)
75
+ phase3 = _to_int(phase_dist.get("PHASE3", 0), 0)
76
+
77
+ pubmed_articles = pubmed_result.get("articles", []) if isinstance(pubmed_result, dict) else []
78
+ openalex_articles = openalex_result.get("articles", []) if isinstance(openalex_result, dict) else []
79
+
80
+ current_year = datetime.utcnow().year
81
+ recent_pubmed = 0
82
+ for item in pubmed_articles:
83
+ pub_date = str(item.get("pub_date", ""))
84
+ if str(current_year) in pub_date or str(current_year - 1) in pub_date:
85
+ recent_pubmed += 1
86
+
87
+ recent_openalex = 0
88
+ for item in openalex_articles:
89
+ year = _to_int(item.get("publication_year"), 0)
90
+ if year >= current_year - 1:
91
+ recent_openalex += 1
92
+
93
+ momentum_score = 0
94
+ momentum_score += min(40, _to_int(trial_result.get("total_count", 0), 0))
95
+ momentum_score += min(25, recruiting * 3)
96
+ momentum_score += min(20, phase3 * 5)
97
+ momentum_score += min(15, recent_pubmed + recent_openalex)
98
+ momentum_score = min(100, momentum_score)
99
+
100
+ if momentum_score >= 70:
101
+ momentum = "high"
102
+ elif momentum_score >= 40:
103
+ momentum = "moderate"
104
+ else:
105
+ momentum = "early"
106
+
107
+ summary = (
108
+ f"Pipeline watch for '{search_query}': momentum={momentum} ({momentum_score}/100). "
109
+ f"Trials={trial_result.get('total_count', 0)}, recruiting={recruiting}, phase3={phase3}, "
110
+ f"recent publications={recent_pubmed + recent_openalex}."
111
+ )
112
+
113
+ return {
114
+ "summary": summary,
115
+ "query": query,
116
+ "indication": indication or None,
117
+ "momentum": momentum,
118
+ "momentum_score": momentum_score,
119
+ "trials": {
120
+ "total_count": trial_result.get("total_count", 0),
121
+ "phase_distribution": phase_dist,
122
+ "status_distribution": status_dist,
123
+ "records": trials,
124
+ "error": trial_result.get("error"),
125
+ },
126
+ "literature": {
127
+ "pubmed_total": pubmed_result.get("total_count", 0),
128
+ "pubmed_recent_last_2y": recent_pubmed,
129
+ "openalex_total": openalex_result.get("total_count", 0),
130
+ "openalex_recent_last_2y": recent_openalex,
131
+ "pubmed_top": pubmed_articles[:max_papers],
132
+ "openalex_top": openalex_articles[:max_papers],
133
+ "pubmed_error": pubmed_result.get("error"),
134
+ "openalex_error": openalex_result.get("error"),
135
+ },
136
+ }
137
+
138
+
139
+ @registry.register(
140
+ name="intel.competitor_snapshot",
141
+ description="Generate a one-shot competitor snapshot for a target and indication",
142
+ category="intel",
143
+ parameters={
144
+ "gene": "Target gene symbol (e.g., LRRK2, IL23R)",
145
+ "indication": "Optional indication filter",
146
+ "max_programs": "Maximum trial/program records to include (default 15)",
147
+ },
148
+ usage_guide=(
149
+ "Use for decision meetings and external positioning. Summarizes active sponsors, phases, "
150
+ "mechanism diversity, and top benchmark endpoints around a target."
151
+ ),
152
+ )
153
+ def competitor_snapshot(
154
+ gene: str,
155
+ indication: str = "",
156
+ max_programs: int = 15,
157
+ **kwargs,
158
+ ) -> dict:
159
+ """Build a compact competitor snapshot using clinical and target landscape tools."""
160
+ del kwargs
161
+ if not gene or not gene.strip():
162
+ return {"summary": "gene is required.", "error": "missing_gene"}
163
+
164
+ from ct.tools.clinical import competitive_landscape, trial_design_benchmark
165
+
166
+ max_programs = max(1, min(int(max_programs or 15), 50))
167
+ landscape = competitive_landscape(gene=gene.strip(), indication=indication.strip())
168
+ benchmark = trial_design_benchmark(
169
+ query=f"{gene} {indication}".strip(),
170
+ max_results=min(100, max_programs * 2),
171
+ )
172
+
173
+ if "error" in landscape and "error" in benchmark:
174
+ return {
175
+ "summary": f"Competitor snapshot failed for {gene}: upstream sources unavailable.",
176
+ "error": "snapshot_failed",
177
+ "sources": {
178
+ "landscape_error": landscape.get("error"),
179
+ "benchmark_error": benchmark.get("error"),
180
+ },
181
+ }
182
+
183
+ trial_records = ((landscape.get("trials") or {}).get("top_trials") or [])[:max_programs]
184
+ sponsors = {}
185
+ for trial in trial_records:
186
+ sponsor = str(trial.get("sponsor", "")).strip()
187
+ if sponsor:
188
+ sponsors[sponsor] = sponsors.get(sponsor, 0) + 1
189
+ top_sponsors = sorted(sponsors.items(), key=lambda kv: kv[1], reverse=True)[:10]
190
+
191
+ phase_dist = ((landscape.get("trials") or {}).get("phase_distribution") or {})
192
+ chembl = (landscape.get("chembl") or {})
193
+ ot = (landscape.get("open_targets") or {})
194
+ top_endpoints = (benchmark.get("top_primary_endpoints") or [])[:5]
195
+
196
+ differentiation_flags = []
197
+ if _to_int(phase_dist.get("PHASE3", 0), 0) == 0:
198
+ differentiation_flags.append("No Phase 3 pressure detected in returned trial window.")
199
+ if _to_int(chembl.get("unique_compounds", 0), 0) < 10:
200
+ differentiation_flags.append("Limited small-molecule density; potential white space.")
201
+ if _to_int(ot.get("n_known_drugs", 0), 0) == 0:
202
+ differentiation_flags.append("No known drugs in Open Targets snapshot for this target.")
203
+
204
+ summary = (
205
+ f"Competitor snapshot for {gene}{f' in {indication}' if indication else ''}: "
206
+ f"{_to_int((landscape.get('trials') or {}).get('total_count', 0), 0)} trial records, "
207
+ f"{_to_int(chembl.get('unique_compounds', 0), 0)} ChEMBL compounds, "
208
+ f"{_to_int(ot.get('n_known_drugs', 0), 0)} known drugs."
209
+ )
210
+
211
+ return {
212
+ "summary": summary,
213
+ "gene": gene,
214
+ "indication": indication or None,
215
+ "top_sponsors": [{"sponsor": name, "trial_count": count} for name, count in top_sponsors],
216
+ "phase_distribution": phase_dist,
217
+ "top_primary_endpoints": top_endpoints,
218
+ "mechanism_classes": sorted(chembl.get("moa_types", []) or []),
219
+ "differentiation_flags": differentiation_flags,
220
+ "programs": trial_records,
221
+ "landscape": landscape,
222
+ "benchmark": benchmark,
223
+ }