celltype-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- celltype_cli-0.1.0.dist-info/METADATA +267 -0
- celltype_cli-0.1.0.dist-info/RECORD +89 -0
- celltype_cli-0.1.0.dist-info/WHEEL +4 -0
- celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
- celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- ct/__init__.py +3 -0
- ct/agent/__init__.py +0 -0
- ct/agent/case_studies.py +426 -0
- ct/agent/config.py +523 -0
- ct/agent/doctor.py +544 -0
- ct/agent/knowledge.py +523 -0
- ct/agent/loop.py +99 -0
- ct/agent/mcp_server.py +478 -0
- ct/agent/orchestrator.py +733 -0
- ct/agent/runner.py +656 -0
- ct/agent/sandbox.py +481 -0
- ct/agent/session.py +145 -0
- ct/agent/system_prompt.py +186 -0
- ct/agent/trace_store.py +228 -0
- ct/agent/trajectory.py +169 -0
- ct/agent/types.py +182 -0
- ct/agent/workflows.py +462 -0
- ct/api/__init__.py +1 -0
- ct/api/app.py +211 -0
- ct/api/config.py +120 -0
- ct/api/engine.py +124 -0
- ct/cli.py +1448 -0
- ct/data/__init__.py +0 -0
- ct/data/compute_providers.json +59 -0
- ct/data/cro_database.json +395 -0
- ct/data/downloader.py +238 -0
- ct/data/loaders.py +252 -0
- ct/kb/__init__.py +5 -0
- ct/kb/benchmarks.py +147 -0
- ct/kb/governance.py +106 -0
- ct/kb/ingest.py +415 -0
- ct/kb/reasoning.py +129 -0
- ct/kb/schema_monitor.py +162 -0
- ct/kb/substrate.py +387 -0
- ct/models/__init__.py +0 -0
- ct/models/llm.py +370 -0
- ct/tools/__init__.py +195 -0
- ct/tools/_compound_resolver.py +297 -0
- ct/tools/biomarker.py +368 -0
- ct/tools/cellxgene.py +282 -0
- ct/tools/chemistry.py +1371 -0
- ct/tools/claude.py +390 -0
- ct/tools/clinical.py +1153 -0
- ct/tools/clue.py +249 -0
- ct/tools/code.py +1069 -0
- ct/tools/combination.py +397 -0
- ct/tools/compute.py +402 -0
- ct/tools/cro.py +413 -0
- ct/tools/data_api.py +2114 -0
- ct/tools/design.py +295 -0
- ct/tools/dna.py +575 -0
- ct/tools/experiment.py +604 -0
- ct/tools/expression.py +655 -0
- ct/tools/files.py +957 -0
- ct/tools/genomics.py +1387 -0
- ct/tools/http_client.py +146 -0
- ct/tools/imaging.py +319 -0
- ct/tools/intel.py +223 -0
- ct/tools/literature.py +743 -0
- ct/tools/network.py +422 -0
- ct/tools/notification.py +111 -0
- ct/tools/omics.py +3330 -0
- ct/tools/ops.py +1230 -0
- ct/tools/parity.py +649 -0
- ct/tools/pk.py +245 -0
- ct/tools/protein.py +678 -0
- ct/tools/regulatory.py +643 -0
- ct/tools/remote_data.py +179 -0
- ct/tools/report.py +181 -0
- ct/tools/repurposing.py +376 -0
- ct/tools/safety.py +1280 -0
- ct/tools/shell.py +178 -0
- ct/tools/singlecell.py +533 -0
- ct/tools/statistics.py +552 -0
- ct/tools/structure.py +882 -0
- ct/tools/target.py +901 -0
- ct/tools/translational.py +123 -0
- ct/tools/viability.py +218 -0
- ct/ui/__init__.py +0 -0
- ct/ui/markdown.py +31 -0
- ct/ui/status.py +258 -0
- ct/ui/suggestions.py +567 -0
- ct/ui/terminal.py +1456 -0
- ct/ui/traces.py +112 -0
ct/tools/http_client.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared HTTP helpers for ct tools.
|
|
3
|
+
|
|
4
|
+
Provides retry/backoff, normalized errors, and JSON parsing wrappers for
|
|
5
|
+
API-heavy tool modules.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
_RETRYABLE_STATUS = {429, 500, 502, 503, 504}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _call_httpx(method: str, url: str, **kwargs):
|
|
15
|
+
import httpx
|
|
16
|
+
|
|
17
|
+
method = method.upper()
|
|
18
|
+
# Avoid passing unsupported kwargs (e.g., json/data to httpx.get).
|
|
19
|
+
cleaned_kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
|
20
|
+
if method == "GET":
|
|
21
|
+
cleaned_kwargs.pop("json", None)
|
|
22
|
+
cleaned_kwargs.pop("data", None)
|
|
23
|
+
return httpx.get(url, **cleaned_kwargs)
|
|
24
|
+
if method == "POST":
|
|
25
|
+
return httpx.post(url, **cleaned_kwargs)
|
|
26
|
+
return httpx.request(method, url, **cleaned_kwargs)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _format_http_error(response) -> str:
|
|
30
|
+
status = getattr(response, "status_code", "unknown")
|
|
31
|
+
body = (getattr(response, "text", "") or "").strip().replace("\n", " ")
|
|
32
|
+
body = body[:300]
|
|
33
|
+
return f"HTTP {status}" + (f": {body}" if body else "")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def request(
|
|
37
|
+
method: str,
|
|
38
|
+
url: str,
|
|
39
|
+
*,
|
|
40
|
+
params: dict | None = None,
|
|
41
|
+
json: dict | None = None,
|
|
42
|
+
data: dict | None = None,
|
|
43
|
+
headers: dict | None = None,
|
|
44
|
+
timeout: int = 30,
|
|
45
|
+
retries: int = 2,
|
|
46
|
+
backoff_seconds: float = 0.5,
|
|
47
|
+
raise_for_status: bool = True,
|
|
48
|
+
) -> tuple[object | None, str | None]:
|
|
49
|
+
"""Perform HTTP request with retry/backoff.
|
|
50
|
+
|
|
51
|
+
Returns `(response, error)`. Exactly one is non-None.
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
import httpx
|
|
55
|
+
except ImportError:
|
|
56
|
+
return None, "httpx required (pip install httpx)"
|
|
57
|
+
|
|
58
|
+
delay = max(backoff_seconds, 0.0)
|
|
59
|
+
last_error = None
|
|
60
|
+
|
|
61
|
+
for attempt in range(max(retries, 0) + 1):
|
|
62
|
+
try:
|
|
63
|
+
resp = _call_httpx(
|
|
64
|
+
method,
|
|
65
|
+
url,
|
|
66
|
+
params=params,
|
|
67
|
+
json=json,
|
|
68
|
+
data=data,
|
|
69
|
+
headers=headers,
|
|
70
|
+
timeout=timeout,
|
|
71
|
+
)
|
|
72
|
+
except (httpx.TimeoutException, httpx.RequestError) as exc:
|
|
73
|
+
last_error = str(exc)
|
|
74
|
+
if attempt < retries:
|
|
75
|
+
time.sleep(delay)
|
|
76
|
+
delay *= 2
|
|
77
|
+
continue
|
|
78
|
+
return None, last_error
|
|
79
|
+
except Exception as exc:
|
|
80
|
+
return None, str(exc)
|
|
81
|
+
|
|
82
|
+
status = int(getattr(resp, "status_code", 0) or 0)
|
|
83
|
+
if status in _RETRYABLE_STATUS and attempt < retries:
|
|
84
|
+
time.sleep(delay)
|
|
85
|
+
delay *= 2
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
if raise_for_status:
|
|
89
|
+
try:
|
|
90
|
+
resp.raise_for_status()
|
|
91
|
+
except httpx.HTTPStatusError:
|
|
92
|
+
return None, _format_http_error(resp)
|
|
93
|
+
except Exception as exc:
|
|
94
|
+
return None, str(exc)
|
|
95
|
+
|
|
96
|
+
return resp, None
|
|
97
|
+
|
|
98
|
+
return None, last_error or "Request failed"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def request_json(
|
|
102
|
+
method: str,
|
|
103
|
+
url: str,
|
|
104
|
+
*,
|
|
105
|
+
params: dict | None = None,
|
|
106
|
+
json: dict | None = None,
|
|
107
|
+
data: dict | None = None,
|
|
108
|
+
headers: dict | None = None,
|
|
109
|
+
timeout: int = 30,
|
|
110
|
+
retries: int = 2,
|
|
111
|
+
backoff_seconds: float = 0.5,
|
|
112
|
+
raise_for_status: bool = True,
|
|
113
|
+
) -> tuple[dict | list | None, str | None]:
|
|
114
|
+
"""Perform HTTP request and parse JSON body."""
|
|
115
|
+
resp, error = request(
|
|
116
|
+
method,
|
|
117
|
+
url,
|
|
118
|
+
params=params,
|
|
119
|
+
json=json,
|
|
120
|
+
data=data,
|
|
121
|
+
headers=headers,
|
|
122
|
+
timeout=timeout,
|
|
123
|
+
retries=retries,
|
|
124
|
+
backoff_seconds=backoff_seconds,
|
|
125
|
+
raise_for_status=raise_for_status,
|
|
126
|
+
)
|
|
127
|
+
if error:
|
|
128
|
+
return None, error
|
|
129
|
+
|
|
130
|
+
# Validate Content-Type before parsing — some APIs return HTML on 200
|
|
131
|
+
content_type = ""
|
|
132
|
+
try:
|
|
133
|
+
ct_raw = resp.headers.get("content-type", "")
|
|
134
|
+
if isinstance(ct_raw, str):
|
|
135
|
+
content_type = ct_raw.lower()
|
|
136
|
+
except Exception:
|
|
137
|
+
pass
|
|
138
|
+
if content_type and "json" not in content_type and "javascript" not in content_type:
|
|
139
|
+
status = getattr(resp, "status_code", "unknown")
|
|
140
|
+
return None, f"Expected JSON but got {content_type} (HTTP {status})"
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
return resp.json(), None
|
|
144
|
+
except Exception:
|
|
145
|
+
status = getattr(resp, "status_code", "unknown")
|
|
146
|
+
return None, f"Invalid JSON response (HTTP {status})"
|
ct/tools/imaging.py
ADDED
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Imaging tools: compound bioactivity profiling via PubChem and structural similarity.
|
|
3
|
+
|
|
4
|
+
Uses PubChem bioactivity data and RDKit molecular descriptors for mechanism
|
|
5
|
+
classification. Structural fingerprint similarity as a proxy for phenotypic similarity.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from ct.tools import registry
|
|
9
|
+
from ct.tools.http_client import request
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@registry.register(
|
|
13
|
+
name="imaging.cellpainting_lookup",
|
|
14
|
+
description="Look up compound bioactivity and compute mechanism class via PubChem assays and RDKit descriptors",
|
|
15
|
+
category="imaging",
|
|
16
|
+
parameters={
|
|
17
|
+
"compound": "Compound name, InChIKey, or SMILES string",
|
|
18
|
+
"source": "Data source: 'pubchem' (default). JUMP Cell Painting data requires local parquet files (not yet integrated).",
|
|
19
|
+
},
|
|
20
|
+
usage_guide="You want to understand a compound's bioactivity profile and infer its mechanism class. Queries PubChem bioassay data and computes RDKit molecular descriptors for heuristic mechanism classification. Note: full Cell Painting morphological profiles from JUMP require downloading parquet files from the JUMP Cell Painting Gallery (S3-hosted, no REST API).",
|
|
21
|
+
)
|
|
22
|
+
def cellpainting_lookup(compound: str, source: str = "pubchem", **kwargs) -> dict:
|
|
23
|
+
"""Look up compound bioactivity and mechanism class.
|
|
24
|
+
|
|
25
|
+
Queries PubChem for bioassay data and computes RDKit molecular descriptors
|
|
26
|
+
for heuristic mechanism classification. Full JUMP Cell Painting morphological
|
|
27
|
+
profiles are not yet integrated (data is S3-hosted parquet, no REST API).
|
|
28
|
+
"""
|
|
29
|
+
compound_info = {"query": compound, "source": source}
|
|
30
|
+
|
|
31
|
+
# Step 1: Try to resolve compound via PubChem for identifiers
|
|
32
|
+
cid = None
|
|
33
|
+
canonical_smiles = None
|
|
34
|
+
inchikey = None
|
|
35
|
+
compound_name = compound
|
|
36
|
+
|
|
37
|
+
# Check if input looks like SMILES (contains special chars)
|
|
38
|
+
is_smiles = any(c in compound for c in "()=#/\\@[]")
|
|
39
|
+
# Check if input looks like InChIKey (14-10-1 pattern)
|
|
40
|
+
is_inchikey = len(compound) == 27 and compound.count("-") == 2
|
|
41
|
+
|
|
42
|
+
if is_smiles:
|
|
43
|
+
resp, error = request(
|
|
44
|
+
"POST",
|
|
45
|
+
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/property/CID,CanonicalSMILES,InChIKey,IUPACName/JSON",
|
|
46
|
+
data={"smiles": compound},
|
|
47
|
+
timeout=10,
|
|
48
|
+
raise_for_status=False,
|
|
49
|
+
)
|
|
50
|
+
elif is_inchikey:
|
|
51
|
+
resp, error = request(
|
|
52
|
+
"GET",
|
|
53
|
+
f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/{compound}/property/CID,CanonicalSMILES,InChIKey,IUPACName/JSON",
|
|
54
|
+
timeout=10,
|
|
55
|
+
raise_for_status=False,
|
|
56
|
+
)
|
|
57
|
+
else:
|
|
58
|
+
import urllib.parse
|
|
59
|
+
encoded = urllib.parse.quote(compound, safe="")
|
|
60
|
+
resp, error = request(
|
|
61
|
+
"GET",
|
|
62
|
+
f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{encoded}/property/CID,CanonicalSMILES,InChIKey,IUPACName/JSON",
|
|
63
|
+
timeout=10,
|
|
64
|
+
raise_for_status=False,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if not error and resp.status_code == 200:
|
|
68
|
+
try:
|
|
69
|
+
props = resp.json().get("PropertyTable", {}).get("Properties", [])
|
|
70
|
+
except Exception:
|
|
71
|
+
props = []
|
|
72
|
+
if props:
|
|
73
|
+
cid = props[0].get("CID")
|
|
74
|
+
canonical_smiles = props[0].get("CanonicalSMILES")
|
|
75
|
+
inchikey = props[0].get("InChIKey")
|
|
76
|
+
compound_name = props[0].get("IUPACName", compound)
|
|
77
|
+
|
|
78
|
+
compound_info["cid"] = cid
|
|
79
|
+
compound_info["canonical_smiles"] = canonical_smiles
|
|
80
|
+
compound_info["inchikey"] = inchikey
|
|
81
|
+
|
|
82
|
+
# Step 2: Search PubChem for bioactivity data
|
|
83
|
+
mechanism_cluster = None
|
|
84
|
+
bioactivity_data = []
|
|
85
|
+
if cid:
|
|
86
|
+
bio_resp, bio_error = request(
|
|
87
|
+
"GET",
|
|
88
|
+
f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/assaysummary/JSON",
|
|
89
|
+
timeout=10,
|
|
90
|
+
raise_for_status=False,
|
|
91
|
+
)
|
|
92
|
+
if not bio_error and bio_resp.status_code == 200:
|
|
93
|
+
try:
|
|
94
|
+
assays = bio_resp.json().get("Table", {}).get("Row", [])
|
|
95
|
+
except Exception:
|
|
96
|
+
assays = []
|
|
97
|
+
# Filter for cell-based / imaging assays
|
|
98
|
+
for row in assays[:50]:
|
|
99
|
+
cells = row.get("Cell", [])
|
|
100
|
+
# Each row is a dict with Cell entries
|
|
101
|
+
if isinstance(cells, list) and len(cells) > 5:
|
|
102
|
+
aid = cells[0].get("StringValue", "") if isinstance(cells[0], dict) else str(cells[0])
|
|
103
|
+
activity = cells[3].get("StringValue", "") if len(cells) > 3 and isinstance(cells[3], dict) else ""
|
|
104
|
+
bioactivity_data.append({
|
|
105
|
+
"aid": aid,
|
|
106
|
+
"activity_outcome": activity,
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
# Step 3: Compute molecular descriptors using RDKit if SMILES available
|
|
110
|
+
rdkit_descriptors = None
|
|
111
|
+
if canonical_smiles or is_smiles:
|
|
112
|
+
try:
|
|
113
|
+
from rdkit import Chem
|
|
114
|
+
from rdkit.Chem import Descriptors, rdMolDescriptors
|
|
115
|
+
|
|
116
|
+
smi = canonical_smiles or compound
|
|
117
|
+
mol = Chem.MolFromSmiles(smi)
|
|
118
|
+
if mol is not None:
|
|
119
|
+
rdkit_descriptors = {
|
|
120
|
+
"molecular_weight": round(Descriptors.MolWt(mol), 2),
|
|
121
|
+
"logp": round(Descriptors.MolLogP(mol), 2),
|
|
122
|
+
"tpsa": round(Descriptors.TPSA(mol), 2),
|
|
123
|
+
"hba": Descriptors.NumHAcceptors(mol),
|
|
124
|
+
"hbd": Descriptors.NumHDonors(mol),
|
|
125
|
+
"rotatable_bonds": Descriptors.NumRotatableBonds(mol),
|
|
126
|
+
"aromatic_rings": Descriptors.NumAromaticRings(mol),
|
|
127
|
+
"fsp3": round(rdMolDescriptors.CalcFractionCSP3(mol), 3),
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
# Heuristic mechanism class based on molecular properties
|
|
131
|
+
mw = rdkit_descriptors["molecular_weight"]
|
|
132
|
+
logp = rdkit_descriptors["logp"]
|
|
133
|
+
if mw < 500 and rdkit_descriptors["aromatic_rings"] >= 2:
|
|
134
|
+
mechanism_cluster = "kinase_inhibitor_like"
|
|
135
|
+
elif mw < 600 and logp < 2:
|
|
136
|
+
mechanism_cluster = "protein_degrader_like"
|
|
137
|
+
elif mw > 800:
|
|
138
|
+
mechanism_cluster = "macrocycle_like"
|
|
139
|
+
else:
|
|
140
|
+
mechanism_cluster = "small_molecule"
|
|
141
|
+
except ImportError:
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
# Build summary
|
|
145
|
+
has_data = bool(bioactivity_data or rdkit_descriptors)
|
|
146
|
+
if has_data:
|
|
147
|
+
cluster_str = f", mechanism cluster: '{mechanism_cluster}'" if mechanism_cluster else ""
|
|
148
|
+
n_assays = len(bioactivity_data)
|
|
149
|
+
assay_str = f", {n_assays} PubChem bioassay(s)" if n_assays > 0 else ""
|
|
150
|
+
summary = (
|
|
151
|
+
f"Compound profile for {compound}: "
|
|
152
|
+
f"CID={cid or 'N/A'}{cluster_str}{assay_str}"
|
|
153
|
+
)
|
|
154
|
+
else:
|
|
155
|
+
summary = (
|
|
156
|
+
f"Compound profile for {compound}: no bioactivity data found in PubChem. "
|
|
157
|
+
f"CID={cid or 'N/A'}"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
result = {
|
|
161
|
+
"summary": summary,
|
|
162
|
+
"compound_info": compound_info,
|
|
163
|
+
"compound_name": compound_name,
|
|
164
|
+
"mechanism_cluster": mechanism_cluster,
|
|
165
|
+
"bioactivity_assays": bioactivity_data[:20],
|
|
166
|
+
"n_assays": len(bioactivity_data),
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if rdkit_descriptors:
|
|
170
|
+
result["molecular_descriptors"] = rdkit_descriptors
|
|
171
|
+
|
|
172
|
+
return result
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@registry.register(
|
|
176
|
+
name="imaging.morphology_similarity",
|
|
177
|
+
description="Compare two compounds by structural fingerprint similarity (Morgan/MACCS Tanimoto) as a proxy for phenotypic similarity",
|
|
178
|
+
category="imaging",
|
|
179
|
+
parameters={
|
|
180
|
+
"smiles_a": "SMILES string for compound A",
|
|
181
|
+
"smiles_b": "SMILES string for compound B",
|
|
182
|
+
},
|
|
183
|
+
usage_guide="You want to compare two compounds by structural similarity as a proxy for phenotypic similarity. Uses Morgan fingerprints (radius=2, 2048 bits), MACCS keys, and physicochemical property comparison. Structural similarity correlates with morphological similarity for ~60% of compound pairs (Bray et al. 2017). For actual Cell Painting profile comparison, pre-computed profiles from JUMP would be needed.",
|
|
184
|
+
)
|
|
185
|
+
def morphology_similarity(smiles_a: str, smiles_b: str, **kwargs) -> dict:
|
|
186
|
+
"""Compare two compounds by morphological similarity.
|
|
187
|
+
|
|
188
|
+
Uses RDKit Morgan fingerprints (radius=2, 2048 bits) as a structural proxy
|
|
189
|
+
for morphological similarity. Structural similarity correlates with morphological
|
|
190
|
+
similarity for ~60% of compound pairs (Bray et al., Nat Biotechnol 2017).
|
|
191
|
+
Also computes MACCS keys similarity and physicochemical property comparison.
|
|
192
|
+
"""
|
|
193
|
+
try:
|
|
194
|
+
from rdkit import Chem, DataStructs
|
|
195
|
+
from rdkit.Chem import AllChem, Descriptors, rdMolDescriptors, MACCSkeys
|
|
196
|
+
except ImportError:
|
|
197
|
+
return {
|
|
198
|
+
"error": "RDKit is required for morphology similarity. Install with: pip install rdkit",
|
|
199
|
+
"summary": "RDKit not installed — needed for fingerprint-based similarity",
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
import numpy as np
|
|
203
|
+
|
|
204
|
+
mol_a = Chem.MolFromSmiles(smiles_a)
|
|
205
|
+
mol_b = Chem.MolFromSmiles(smiles_b)
|
|
206
|
+
|
|
207
|
+
if mol_a is None:
|
|
208
|
+
return {"error": f"Invalid SMILES for compound A: {smiles_a}", "summary": f"Could not parse SMILES: {smiles_a}"}
|
|
209
|
+
if mol_b is None:
|
|
210
|
+
return {"error": f"Invalid SMILES for compound B: {smiles_b}", "summary": f"Could not parse SMILES: {smiles_b}"}
|
|
211
|
+
|
|
212
|
+
# Morgan fingerprint similarity (main metric)
|
|
213
|
+
fp_a = AllChem.GetMorganFingerprintAsBitVect(mol_a, 2, nBits=2048)
|
|
214
|
+
fp_b = AllChem.GetMorganFingerprintAsBitVect(mol_b, 2, nBits=2048)
|
|
215
|
+
morgan_sim = DataStructs.TanimotoSimilarity(fp_a, fp_b)
|
|
216
|
+
|
|
217
|
+
# MACCS keys similarity (complementary metric)
|
|
218
|
+
maccs_a = MACCSkeys.GenMACCSKeys(mol_a)
|
|
219
|
+
maccs_b = MACCSkeys.GenMACCSKeys(mol_b)
|
|
220
|
+
maccs_sim = DataStructs.TanimotoSimilarity(maccs_a, maccs_b)
|
|
221
|
+
|
|
222
|
+
# Dice similarity (alternative metric)
|
|
223
|
+
dice_sim = DataStructs.DiceSimilarity(fp_a, fp_b)
|
|
224
|
+
|
|
225
|
+
# Physicochemical property comparison
|
|
226
|
+
def _get_props(mol):
|
|
227
|
+
return {
|
|
228
|
+
"mw": round(Descriptors.MolWt(mol), 2),
|
|
229
|
+
"logp": round(Descriptors.MolLogP(mol), 2),
|
|
230
|
+
"tpsa": round(Descriptors.TPSA(mol), 2),
|
|
231
|
+
"hba": Descriptors.NumHAcceptors(mol),
|
|
232
|
+
"hbd": Descriptors.NumHDonors(mol),
|
|
233
|
+
"rotatable_bonds": Descriptors.NumRotatableBonds(mol),
|
|
234
|
+
"aromatic_rings": Descriptors.NumAromaticRings(mol),
|
|
235
|
+
"fsp3": round(rdMolDescriptors.CalcFractionCSP3(mol), 3),
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
props_a = _get_props(mol_a)
|
|
239
|
+
props_b = _get_props(mol_b)
|
|
240
|
+
|
|
241
|
+
# Compute property similarity (normalized)
|
|
242
|
+
prop_diffs = {}
|
|
243
|
+
shared_features = []
|
|
244
|
+
for key in props_a:
|
|
245
|
+
diff = abs(props_a[key] - props_b[key])
|
|
246
|
+
prop_diffs[key] = round(diff, 3)
|
|
247
|
+
|
|
248
|
+
# Flag shared features
|
|
249
|
+
if key == "mw" and diff < 50:
|
|
250
|
+
shared_features.append("similar molecular weight")
|
|
251
|
+
elif key == "logp" and diff < 1:
|
|
252
|
+
shared_features.append("similar lipophilicity")
|
|
253
|
+
elif key == "tpsa" and diff < 20:
|
|
254
|
+
shared_features.append("similar polarity")
|
|
255
|
+
elif key == "aromatic_rings" and diff == 0:
|
|
256
|
+
shared_features.append(f"same aromatic ring count ({props_a[key]})")
|
|
257
|
+
elif key == "hbd" and diff == 0 and props_a[key] > 0:
|
|
258
|
+
shared_features.append(f"same H-bond donors ({props_a[key]})")
|
|
259
|
+
|
|
260
|
+
# Infer morphological similarity class
|
|
261
|
+
combined_sim = 0.6 * morgan_sim + 0.3 * maccs_sim + 0.1 * dice_sim
|
|
262
|
+
if combined_sim > 0.85:
|
|
263
|
+
sim_class = "highly similar"
|
|
264
|
+
morphology_prediction = "Very likely similar morphological profiles"
|
|
265
|
+
elif combined_sim > 0.6:
|
|
266
|
+
sim_class = "moderately similar"
|
|
267
|
+
morphology_prediction = "Possibly similar morphological effects"
|
|
268
|
+
elif combined_sim > 0.4:
|
|
269
|
+
sim_class = "weakly similar"
|
|
270
|
+
morphology_prediction = "Some shared structural features; morphology may differ"
|
|
271
|
+
else:
|
|
272
|
+
sim_class = "dissimilar"
|
|
273
|
+
morphology_prediction = "Likely different morphological profiles"
|
|
274
|
+
|
|
275
|
+
# Heuristic mechanism class
|
|
276
|
+
def _mechanism_class(props):
|
|
277
|
+
if props["aromatic_rings"] >= 3 and props["hba"] >= 2:
|
|
278
|
+
return "kinase_inhibitor_like"
|
|
279
|
+
elif props["mw"] < 600 and props["logp"] < 2:
|
|
280
|
+
return "polar_small_molecule"
|
|
281
|
+
elif props["mw"] > 800:
|
|
282
|
+
return "macrocycle_like"
|
|
283
|
+
else:
|
|
284
|
+
return "standard_small_molecule"
|
|
285
|
+
|
|
286
|
+
mech_a = _mechanism_class(props_a)
|
|
287
|
+
mech_b = _mechanism_class(props_b)
|
|
288
|
+
|
|
289
|
+
summary = (
|
|
290
|
+
f"Morphological similarity between compounds: {combined_sim:.2f} ({sim_class}). "
|
|
291
|
+
f"Morgan Tanimoto: {morgan_sim:.3f}, MACCS: {maccs_sim:.3f}. "
|
|
292
|
+
f"{morphology_prediction}"
|
|
293
|
+
)
|
|
294
|
+
if shared_features:
|
|
295
|
+
summary += f". Shared: {', '.join(shared_features[:4])}"
|
|
296
|
+
|
|
297
|
+
return {
|
|
298
|
+
"summary": summary,
|
|
299
|
+
"similarity_scores": {
|
|
300
|
+
"morgan_tanimoto": round(morgan_sim, 4),
|
|
301
|
+
"maccs_tanimoto": round(maccs_sim, 4),
|
|
302
|
+
"dice": round(dice_sim, 4),
|
|
303
|
+
"combined": round(combined_sim, 4),
|
|
304
|
+
},
|
|
305
|
+
"similarity_class": sim_class,
|
|
306
|
+
"morphology_prediction": morphology_prediction,
|
|
307
|
+
"shared_features": shared_features,
|
|
308
|
+
"compound_a": {
|
|
309
|
+
"smiles": smiles_a,
|
|
310
|
+
"properties": props_a,
|
|
311
|
+
"mechanism_class": mech_a,
|
|
312
|
+
},
|
|
313
|
+
"compound_b": {
|
|
314
|
+
"smiles": smiles_b,
|
|
315
|
+
"properties": props_b,
|
|
316
|
+
"mechanism_class": mech_b,
|
|
317
|
+
},
|
|
318
|
+
"property_differences": prop_diffs,
|
|
319
|
+
}
|
ct/tools/intel.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Competitive and pipeline intelligence tools for pharma R&D.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from ct.tools import registry
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _to_int(value: Any, default: int = 0) -> int:
|
|
14
|
+
try:
|
|
15
|
+
return int(value)
|
|
16
|
+
except Exception:
|
|
17
|
+
return default
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@registry.register(
|
|
21
|
+
name="intel.pipeline_watch",
|
|
22
|
+
description="Track pipeline activity for a target/indication across trials and literature",
|
|
23
|
+
category="intel",
|
|
24
|
+
parameters={
|
|
25
|
+
"query": "Target, drug class, or mechanism to monitor",
|
|
26
|
+
"indication": "Optional disease/indication filter",
|
|
27
|
+
"max_trials": "Maximum trial records to retain (default 20)",
|
|
28
|
+
"max_papers": "Maximum papers per source to retain (default 10)",
|
|
29
|
+
},
|
|
30
|
+
usage_guide=(
|
|
31
|
+
"Use for ongoing landscape monitoring. Aggregates clinical trial momentum and publication "
|
|
32
|
+
"velocity into a concise watchlist snapshot for strategy discussions."
|
|
33
|
+
),
|
|
34
|
+
)
|
|
35
|
+
def pipeline_watch(
|
|
36
|
+
query: str,
|
|
37
|
+
indication: str = "",
|
|
38
|
+
max_trials: int = 20,
|
|
39
|
+
max_papers: int = 10,
|
|
40
|
+
**kwargs,
|
|
41
|
+
) -> dict:
|
|
42
|
+
"""Create a compact pipeline watch snapshot from public sources."""
|
|
43
|
+
del kwargs
|
|
44
|
+
if not query or not query.strip():
|
|
45
|
+
return {"summary": "query is required.", "error": "missing_query"}
|
|
46
|
+
|
|
47
|
+
from ct.tools.clinical import trial_search
|
|
48
|
+
from ct.tools.literature import openalex_search, pubmed_search
|
|
49
|
+
|
|
50
|
+
max_trials = max(1, min(int(max_trials or 20), 100))
|
|
51
|
+
max_papers = max(1, min(int(max_papers or 10), 50))
|
|
52
|
+
|
|
53
|
+
search_query = f"{query} {indication}".strip()
|
|
54
|
+
trial_result = trial_search(query=search_query)
|
|
55
|
+
pubmed_result = pubmed_search(query=search_query, max_results=max_papers)
|
|
56
|
+
openalex_result = openalex_search(query=search_query, max_results=max_papers)
|
|
57
|
+
|
|
58
|
+
if "error" in trial_result and "error" in pubmed_result and "error" in openalex_result:
|
|
59
|
+
return {
|
|
60
|
+
"summary": (
|
|
61
|
+
f"Pipeline watch failed for '{search_query}': all upstream sources returned errors."
|
|
62
|
+
),
|
|
63
|
+
"error": "all_sources_failed",
|
|
64
|
+
"sources": {
|
|
65
|
+
"trials_error": trial_result.get("error"),
|
|
66
|
+
"pubmed_error": pubmed_result.get("error"),
|
|
67
|
+
"openalex_error": openalex_result.get("error"),
|
|
68
|
+
},
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
trials = (trial_result.get("trials") or [])[:max_trials]
|
|
72
|
+
phase_dist = trial_result.get("phase_distribution", {}) or {}
|
|
73
|
+
status_dist = trial_result.get("status_distribution", {}) or {}
|
|
74
|
+
recruiting = _to_int(status_dist.get("RECRUITING", 0), 0)
|
|
75
|
+
phase3 = _to_int(phase_dist.get("PHASE3", 0), 0)
|
|
76
|
+
|
|
77
|
+
pubmed_articles = pubmed_result.get("articles", []) if isinstance(pubmed_result, dict) else []
|
|
78
|
+
openalex_articles = openalex_result.get("articles", []) if isinstance(openalex_result, dict) else []
|
|
79
|
+
|
|
80
|
+
current_year = datetime.utcnow().year
|
|
81
|
+
recent_pubmed = 0
|
|
82
|
+
for item in pubmed_articles:
|
|
83
|
+
pub_date = str(item.get("pub_date", ""))
|
|
84
|
+
if str(current_year) in pub_date or str(current_year - 1) in pub_date:
|
|
85
|
+
recent_pubmed += 1
|
|
86
|
+
|
|
87
|
+
recent_openalex = 0
|
|
88
|
+
for item in openalex_articles:
|
|
89
|
+
year = _to_int(item.get("publication_year"), 0)
|
|
90
|
+
if year >= current_year - 1:
|
|
91
|
+
recent_openalex += 1
|
|
92
|
+
|
|
93
|
+
momentum_score = 0
|
|
94
|
+
momentum_score += min(40, _to_int(trial_result.get("total_count", 0), 0))
|
|
95
|
+
momentum_score += min(25, recruiting * 3)
|
|
96
|
+
momentum_score += min(20, phase3 * 5)
|
|
97
|
+
momentum_score += min(15, recent_pubmed + recent_openalex)
|
|
98
|
+
momentum_score = min(100, momentum_score)
|
|
99
|
+
|
|
100
|
+
if momentum_score >= 70:
|
|
101
|
+
momentum = "high"
|
|
102
|
+
elif momentum_score >= 40:
|
|
103
|
+
momentum = "moderate"
|
|
104
|
+
else:
|
|
105
|
+
momentum = "early"
|
|
106
|
+
|
|
107
|
+
summary = (
|
|
108
|
+
f"Pipeline watch for '{search_query}': momentum={momentum} ({momentum_score}/100). "
|
|
109
|
+
f"Trials={trial_result.get('total_count', 0)}, recruiting={recruiting}, phase3={phase3}, "
|
|
110
|
+
f"recent publications={recent_pubmed + recent_openalex}."
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
"summary": summary,
|
|
115
|
+
"query": query,
|
|
116
|
+
"indication": indication or None,
|
|
117
|
+
"momentum": momentum,
|
|
118
|
+
"momentum_score": momentum_score,
|
|
119
|
+
"trials": {
|
|
120
|
+
"total_count": trial_result.get("total_count", 0),
|
|
121
|
+
"phase_distribution": phase_dist,
|
|
122
|
+
"status_distribution": status_dist,
|
|
123
|
+
"records": trials,
|
|
124
|
+
"error": trial_result.get("error"),
|
|
125
|
+
},
|
|
126
|
+
"literature": {
|
|
127
|
+
"pubmed_total": pubmed_result.get("total_count", 0),
|
|
128
|
+
"pubmed_recent_last_2y": recent_pubmed,
|
|
129
|
+
"openalex_total": openalex_result.get("total_count", 0),
|
|
130
|
+
"openalex_recent_last_2y": recent_openalex,
|
|
131
|
+
"pubmed_top": pubmed_articles[:max_papers],
|
|
132
|
+
"openalex_top": openalex_articles[:max_papers],
|
|
133
|
+
"pubmed_error": pubmed_result.get("error"),
|
|
134
|
+
"openalex_error": openalex_result.get("error"),
|
|
135
|
+
},
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@registry.register(
|
|
140
|
+
name="intel.competitor_snapshot",
|
|
141
|
+
description="Generate a one-shot competitor snapshot for a target and indication",
|
|
142
|
+
category="intel",
|
|
143
|
+
parameters={
|
|
144
|
+
"gene": "Target gene symbol (e.g., LRRK2, IL23R)",
|
|
145
|
+
"indication": "Optional indication filter",
|
|
146
|
+
"max_programs": "Maximum trial/program records to include (default 15)",
|
|
147
|
+
},
|
|
148
|
+
usage_guide=(
|
|
149
|
+
"Use for decision meetings and external positioning. Summarizes active sponsors, phases, "
|
|
150
|
+
"mechanism diversity, and top benchmark endpoints around a target."
|
|
151
|
+
),
|
|
152
|
+
)
|
|
153
|
+
def competitor_snapshot(
|
|
154
|
+
gene: str,
|
|
155
|
+
indication: str = "",
|
|
156
|
+
max_programs: int = 15,
|
|
157
|
+
**kwargs,
|
|
158
|
+
) -> dict:
|
|
159
|
+
"""Build a compact competitor snapshot using clinical and target landscape tools."""
|
|
160
|
+
del kwargs
|
|
161
|
+
if not gene or not gene.strip():
|
|
162
|
+
return {"summary": "gene is required.", "error": "missing_gene"}
|
|
163
|
+
|
|
164
|
+
from ct.tools.clinical import competitive_landscape, trial_design_benchmark
|
|
165
|
+
|
|
166
|
+
max_programs = max(1, min(int(max_programs or 15), 50))
|
|
167
|
+
landscape = competitive_landscape(gene=gene.strip(), indication=indication.strip())
|
|
168
|
+
benchmark = trial_design_benchmark(
|
|
169
|
+
query=f"{gene} {indication}".strip(),
|
|
170
|
+
max_results=min(100, max_programs * 2),
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if "error" in landscape and "error" in benchmark:
|
|
174
|
+
return {
|
|
175
|
+
"summary": f"Competitor snapshot failed for {gene}: upstream sources unavailable.",
|
|
176
|
+
"error": "snapshot_failed",
|
|
177
|
+
"sources": {
|
|
178
|
+
"landscape_error": landscape.get("error"),
|
|
179
|
+
"benchmark_error": benchmark.get("error"),
|
|
180
|
+
},
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
trial_records = ((landscape.get("trials") or {}).get("top_trials") or [])[:max_programs]
|
|
184
|
+
sponsors = {}
|
|
185
|
+
for trial in trial_records:
|
|
186
|
+
sponsor = str(trial.get("sponsor", "")).strip()
|
|
187
|
+
if sponsor:
|
|
188
|
+
sponsors[sponsor] = sponsors.get(sponsor, 0) + 1
|
|
189
|
+
top_sponsors = sorted(sponsors.items(), key=lambda kv: kv[1], reverse=True)[:10]
|
|
190
|
+
|
|
191
|
+
phase_dist = ((landscape.get("trials") or {}).get("phase_distribution") or {})
|
|
192
|
+
chembl = (landscape.get("chembl") or {})
|
|
193
|
+
ot = (landscape.get("open_targets") or {})
|
|
194
|
+
top_endpoints = (benchmark.get("top_primary_endpoints") or [])[:5]
|
|
195
|
+
|
|
196
|
+
differentiation_flags = []
|
|
197
|
+
if _to_int(phase_dist.get("PHASE3", 0), 0) == 0:
|
|
198
|
+
differentiation_flags.append("No Phase 3 pressure detected in returned trial window.")
|
|
199
|
+
if _to_int(chembl.get("unique_compounds", 0), 0) < 10:
|
|
200
|
+
differentiation_flags.append("Limited small-molecule density; potential white space.")
|
|
201
|
+
if _to_int(ot.get("n_known_drugs", 0), 0) == 0:
|
|
202
|
+
differentiation_flags.append("No known drugs in Open Targets snapshot for this target.")
|
|
203
|
+
|
|
204
|
+
summary = (
|
|
205
|
+
f"Competitor snapshot for {gene}{f' in {indication}' if indication else ''}: "
|
|
206
|
+
f"{_to_int((landscape.get('trials') or {}).get('total_count', 0), 0)} trial records, "
|
|
207
|
+
f"{_to_int(chembl.get('unique_compounds', 0), 0)} ChEMBL compounds, "
|
|
208
|
+
f"{_to_int(ot.get('n_known_drugs', 0), 0)} known drugs."
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
return {
|
|
212
|
+
"summary": summary,
|
|
213
|
+
"gene": gene,
|
|
214
|
+
"indication": indication or None,
|
|
215
|
+
"top_sponsors": [{"sponsor": name, "trial_count": count} for name, count in top_sponsors],
|
|
216
|
+
"phase_distribution": phase_dist,
|
|
217
|
+
"top_primary_endpoints": top_endpoints,
|
|
218
|
+
"mechanism_classes": sorted(chembl.get("moa_types", []) or []),
|
|
219
|
+
"differentiation_flags": differentiation_flags,
|
|
220
|
+
"programs": trial_records,
|
|
221
|
+
"landscape": landscape,
|
|
222
|
+
"benchmark": benchmark,
|
|
223
|
+
}
|