celltype-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- celltype_cli-0.1.0.dist-info/METADATA +267 -0
- celltype_cli-0.1.0.dist-info/RECORD +89 -0
- celltype_cli-0.1.0.dist-info/WHEEL +4 -0
- celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
- celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- ct/__init__.py +3 -0
- ct/agent/__init__.py +0 -0
- ct/agent/case_studies.py +426 -0
- ct/agent/config.py +523 -0
- ct/agent/doctor.py +544 -0
- ct/agent/knowledge.py +523 -0
- ct/agent/loop.py +99 -0
- ct/agent/mcp_server.py +478 -0
- ct/agent/orchestrator.py +733 -0
- ct/agent/runner.py +656 -0
- ct/agent/sandbox.py +481 -0
- ct/agent/session.py +145 -0
- ct/agent/system_prompt.py +186 -0
- ct/agent/trace_store.py +228 -0
- ct/agent/trajectory.py +169 -0
- ct/agent/types.py +182 -0
- ct/agent/workflows.py +462 -0
- ct/api/__init__.py +1 -0
- ct/api/app.py +211 -0
- ct/api/config.py +120 -0
- ct/api/engine.py +124 -0
- ct/cli.py +1448 -0
- ct/data/__init__.py +0 -0
- ct/data/compute_providers.json +59 -0
- ct/data/cro_database.json +395 -0
- ct/data/downloader.py +238 -0
- ct/data/loaders.py +252 -0
- ct/kb/__init__.py +5 -0
- ct/kb/benchmarks.py +147 -0
- ct/kb/governance.py +106 -0
- ct/kb/ingest.py +415 -0
- ct/kb/reasoning.py +129 -0
- ct/kb/schema_monitor.py +162 -0
- ct/kb/substrate.py +387 -0
- ct/models/__init__.py +0 -0
- ct/models/llm.py +370 -0
- ct/tools/__init__.py +195 -0
- ct/tools/_compound_resolver.py +297 -0
- ct/tools/biomarker.py +368 -0
- ct/tools/cellxgene.py +282 -0
- ct/tools/chemistry.py +1371 -0
- ct/tools/claude.py +390 -0
- ct/tools/clinical.py +1153 -0
- ct/tools/clue.py +249 -0
- ct/tools/code.py +1069 -0
- ct/tools/combination.py +397 -0
- ct/tools/compute.py +402 -0
- ct/tools/cro.py +413 -0
- ct/tools/data_api.py +2114 -0
- ct/tools/design.py +295 -0
- ct/tools/dna.py +575 -0
- ct/tools/experiment.py +604 -0
- ct/tools/expression.py +655 -0
- ct/tools/files.py +957 -0
- ct/tools/genomics.py +1387 -0
- ct/tools/http_client.py +146 -0
- ct/tools/imaging.py +319 -0
- ct/tools/intel.py +223 -0
- ct/tools/literature.py +743 -0
- ct/tools/network.py +422 -0
- ct/tools/notification.py +111 -0
- ct/tools/omics.py +3330 -0
- ct/tools/ops.py +1230 -0
- ct/tools/parity.py +649 -0
- ct/tools/pk.py +245 -0
- ct/tools/protein.py +678 -0
- ct/tools/regulatory.py +643 -0
- ct/tools/remote_data.py +179 -0
- ct/tools/report.py +181 -0
- ct/tools/repurposing.py +376 -0
- ct/tools/safety.py +1280 -0
- ct/tools/shell.py +178 -0
- ct/tools/singlecell.py +533 -0
- ct/tools/statistics.py +552 -0
- ct/tools/structure.py +882 -0
- ct/tools/target.py +901 -0
- ct/tools/translational.py +123 -0
- ct/tools/viability.py +218 -0
- ct/ui/__init__.py +0 -0
- ct/ui/markdown.py +31 -0
- ct/ui/status.py +258 -0
- ct/ui/suggestions.py +567 -0
- ct/ui/terminal.py +1456 -0
- ct/ui/traces.py +112 -0
ct/tools/remote_data.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Remote data query tool — queries the hosted ct Data API.
|
|
3
|
+
|
|
4
|
+
Connects to a running ct Data API instance to query large datasets
|
|
5
|
+
(PerturbAtlas, ChEMBL, scPerturb, etc.) that are too large for local download.
|
|
6
|
+
|
|
7
|
+
Configure the endpoint with: ct config set api.data_endpoint http://host:8000
|
|
8
|
+
Or set CT_DATA_ENDPOINT environment variable.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import httpx
|
|
13
|
+
from ct.tools import registry
|
|
14
|
+
from ct.agent.config import Config
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _get_endpoint() -> str | None:
|
|
18
|
+
"""Get the configured data API endpoint."""
|
|
19
|
+
endpoint = os.environ.get("CT_DATA_ENDPOINT")
|
|
20
|
+
if endpoint:
|
|
21
|
+
return endpoint.rstrip("/")
|
|
22
|
+
cfg = Config.load()
|
|
23
|
+
val = cfg.get("api.data_endpoint")
|
|
24
|
+
return val.rstrip("/") if val else None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _no_endpoint_error() -> dict:
|
|
28
|
+
return {
|
|
29
|
+
"error": "Data API endpoint not configured.",
|
|
30
|
+
"summary": (
|
|
31
|
+
"No data API endpoint configured. Set with: "
|
|
32
|
+
"ct config set api.data_endpoint http://your-host:8000 "
|
|
33
|
+
"or set CT_DATA_ENDPOINT environment variable."
|
|
34
|
+
),
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@registry.register(
|
|
39
|
+
name="remote_data.query",
|
|
40
|
+
description="Query a dataset on the hosted ct Data API (PerturbAtlas, ChEMBL, etc.)",
|
|
41
|
+
category="remote_data",
|
|
42
|
+
parameters={
|
|
43
|
+
"dataset": "Dataset name (e.g. 'perturbatlas', 'chembl')",
|
|
44
|
+
"gene": "Filter by gene symbol (optional)",
|
|
45
|
+
"compound": "Filter by compound name/ID (optional)",
|
|
46
|
+
"filters": "Additional column filters as dict (optional)",
|
|
47
|
+
"limit": "Max rows to return (default 100, max 10000)",
|
|
48
|
+
},
|
|
49
|
+
usage_guide=(
|
|
50
|
+
"You need to query a large dataset that's hosted on the data API (PerturbAtlas, ChEMBL, "
|
|
51
|
+
"scPerturb). Use when the dataset is too large for local download. "
|
|
52
|
+
"Requires api.data_endpoint to be configured."
|
|
53
|
+
),
|
|
54
|
+
)
|
|
55
|
+
def query(dataset: str, gene: str = None, compound: str = None,
|
|
56
|
+
filters: dict = None, limit: int = 100, **kwargs) -> dict:
|
|
57
|
+
"""Query a dataset on the hosted Data API."""
|
|
58
|
+
endpoint = _get_endpoint()
|
|
59
|
+
if not endpoint:
|
|
60
|
+
return _no_endpoint_error()
|
|
61
|
+
|
|
62
|
+
payload = {"dataset": dataset, "limit": limit}
|
|
63
|
+
if gene:
|
|
64
|
+
payload["gene"] = gene
|
|
65
|
+
if compound:
|
|
66
|
+
payload["compound"] = compound
|
|
67
|
+
if filters:
|
|
68
|
+
payload["filters"] = filters
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
resp = httpx.post(
|
|
72
|
+
f"{endpoint}/query",
|
|
73
|
+
json=payload,
|
|
74
|
+
timeout=30,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
if resp.status_code == 404:
|
|
78
|
+
return {
|
|
79
|
+
"error": f"Dataset '{dataset}' not found on the data API.",
|
|
80
|
+
"summary": f"Dataset '{dataset}' is not available. Use remote_data.list_datasets to see what's available.",
|
|
81
|
+
}
|
|
82
|
+
if resp.status_code == 503:
|
|
83
|
+
return {
|
|
84
|
+
"error": f"Dataset '{dataset}' files not found on disk.",
|
|
85
|
+
"summary": f"Dataset '{dataset}' is registered but files are missing on the server.",
|
|
86
|
+
}
|
|
87
|
+
if resp.status_code != 200:
|
|
88
|
+
return {
|
|
89
|
+
"error": f"Data API error: HTTP {resp.status_code}",
|
|
90
|
+
"summary": f"Data API returned HTTP {resp.status_code}: {resp.text[:200]}",
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
data = resp.json()
|
|
94
|
+
rows = data.get("data", [])
|
|
95
|
+
total = data.get("total_rows", len(rows))
|
|
96
|
+
|
|
97
|
+
# Build summary
|
|
98
|
+
filter_desc = []
|
|
99
|
+
if gene:
|
|
100
|
+
filter_desc.append(f"gene={gene}")
|
|
101
|
+
if compound:
|
|
102
|
+
filter_desc.append(f"compound={compound}")
|
|
103
|
+
filter_str = f" ({', '.join(filter_desc)})" if filter_desc else ""
|
|
104
|
+
|
|
105
|
+
summary = f"Query {dataset}{filter_str}: {total} rows returned"
|
|
106
|
+
if rows:
|
|
107
|
+
cols = list(rows[0].keys())
|
|
108
|
+
summary += f". Columns: {', '.join(cols[:8])}"
|
|
109
|
+
|
|
110
|
+
return {
|
|
111
|
+
"summary": summary,
|
|
112
|
+
"dataset": dataset,
|
|
113
|
+
"total_rows": total,
|
|
114
|
+
"columns": list(rows[0].keys()) if rows else [],
|
|
115
|
+
"data": rows,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
except httpx.ConnectError:
|
|
119
|
+
return {
|
|
120
|
+
"error": f"Cannot connect to Data API at {endpoint}",
|
|
121
|
+
"summary": f"Data API unreachable at {endpoint}. Check the server is running.",
|
|
122
|
+
}
|
|
123
|
+
except httpx.HTTPError as e:
|
|
124
|
+
return {
|
|
125
|
+
"error": f"Data API request failed: {e}",
|
|
126
|
+
"summary": f"Failed to query Data API: {e}",
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@registry.register(
|
|
131
|
+
name="remote_data.list_datasets",
|
|
132
|
+
description="List datasets available on the hosted ct Data API",
|
|
133
|
+
category="remote_data",
|
|
134
|
+
parameters={},
|
|
135
|
+
usage_guide=(
|
|
136
|
+
"You want to see what datasets are available on the configured data API. "
|
|
137
|
+
"Run this first to discover available data before querying."
|
|
138
|
+
),
|
|
139
|
+
)
|
|
140
|
+
def list_datasets(**kwargs) -> dict:
|
|
141
|
+
"""List datasets available on the Data API."""
|
|
142
|
+
endpoint = _get_endpoint()
|
|
143
|
+
if not endpoint:
|
|
144
|
+
return _no_endpoint_error()
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
resp = httpx.get(f"{endpoint}/datasets", timeout=10)
|
|
148
|
+
|
|
149
|
+
if resp.status_code != 200:
|
|
150
|
+
return {
|
|
151
|
+
"error": f"Data API error: HTTP {resp.status_code}",
|
|
152
|
+
"summary": f"Failed to list datasets: HTTP {resp.status_code}",
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
datasets = resp.json()
|
|
156
|
+
if not datasets:
|
|
157
|
+
return {
|
|
158
|
+
"summary": "No datasets available on the Data API.",
|
|
159
|
+
"datasets": [],
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
names = [d["name"] for d in datasets]
|
|
163
|
+
summary = f"Data API has {len(datasets)} datasets: {', '.join(names)}"
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
"summary": summary,
|
|
167
|
+
"datasets": datasets,
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
except httpx.ConnectError:
|
|
171
|
+
return {
|
|
172
|
+
"error": f"Cannot connect to Data API at {endpoint}",
|
|
173
|
+
"summary": f"Data API unreachable at {endpoint}. Check the server is running.",
|
|
174
|
+
}
|
|
175
|
+
except httpx.HTTPError as e:
|
|
176
|
+
return {
|
|
177
|
+
"error": f"Data API request failed: {e}",
|
|
178
|
+
"summary": f"Failed to list datasets: {e}",
|
|
179
|
+
}
|
ct/tools/report.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Report utility tools for decision-ready communication outputs.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import re
|
|
10
|
+
|
|
11
|
+
from ct.tools import registry
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _slug(text: str, max_len: int = 64) -> str:
|
|
15
|
+
slug = re.sub(r"[^a-zA-Z0-9]+", "_", str(text or "").strip().lower()).strip("_")
|
|
16
|
+
return (slug or "pharma_brief")[:max_len]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _extract_evidence_lines(evidence) -> list[str]:
|
|
20
|
+
lines: list[str] = []
|
|
21
|
+
if evidence is None:
|
|
22
|
+
return lines
|
|
23
|
+
if isinstance(evidence, str):
|
|
24
|
+
for line in evidence.splitlines():
|
|
25
|
+
line = line.strip()
|
|
26
|
+
if line:
|
|
27
|
+
lines.append(line)
|
|
28
|
+
return lines
|
|
29
|
+
if isinstance(evidence, dict):
|
|
30
|
+
if evidence.get("summary"):
|
|
31
|
+
lines.append(str(evidence["summary"]))
|
|
32
|
+
for key in ("key_evidence", "evidence", "findings", "signals"):
|
|
33
|
+
value = evidence.get(key)
|
|
34
|
+
if isinstance(value, list):
|
|
35
|
+
for item in value:
|
|
36
|
+
lines.append(str(item))
|
|
37
|
+
return lines
|
|
38
|
+
if isinstance(evidence, list):
|
|
39
|
+
for item in evidence:
|
|
40
|
+
if isinstance(item, dict):
|
|
41
|
+
if "summary" in item:
|
|
42
|
+
lines.append(str(item["summary"]))
|
|
43
|
+
else:
|
|
44
|
+
lines.append(str(item))
|
|
45
|
+
else:
|
|
46
|
+
lines.append(str(item))
|
|
47
|
+
return [x.strip() for x in lines if str(x).strip()]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@registry.register(
|
|
51
|
+
name="report.pharma_brief",
|
|
52
|
+
description="Generate a one-page pharma decision brief and optionally save as markdown/HTML",
|
|
53
|
+
category="report",
|
|
54
|
+
parameters={
|
|
55
|
+
"query": "Original research question or program objective",
|
|
56
|
+
"program_thesis": "Concise recommendation statement",
|
|
57
|
+
"target_rationale": "Mechanistic rationale (optional)",
|
|
58
|
+
"biomarker_strategy": "Biomarker/patient-selection strategy (optional)",
|
|
59
|
+
"safety_review": "Top liabilities and mitigation strategy (optional)",
|
|
60
|
+
"competitive_differentiation": "Differentiation thesis vs landscape (optional)",
|
|
61
|
+
"evidence": "Optional evidence payload (string/list/dict) to anchor key bullets",
|
|
62
|
+
"save": "Whether to save markdown brief to output directory (default True)",
|
|
63
|
+
"publish_html": "Whether to also publish a shareable HTML page (default True)",
|
|
64
|
+
"filename": "Optional output filename stem",
|
|
65
|
+
},
|
|
66
|
+
usage_guide=(
|
|
67
|
+
"Use at the end of an analysis to create a board/investor/pharma-partner-ready brief "
|
|
68
|
+
"with explicit thesis, risk, and differentiation framing."
|
|
69
|
+
),
|
|
70
|
+
)
|
|
71
|
+
def pharma_brief(
|
|
72
|
+
query: str,
|
|
73
|
+
program_thesis: str = "",
|
|
74
|
+
target_rationale: str = "",
|
|
75
|
+
biomarker_strategy: str = "",
|
|
76
|
+
safety_review: str = "",
|
|
77
|
+
competitive_differentiation: str = "",
|
|
78
|
+
evidence=None,
|
|
79
|
+
save: bool = True,
|
|
80
|
+
publish_html: bool = True,
|
|
81
|
+
filename: str = "",
|
|
82
|
+
_session=None,
|
|
83
|
+
**kwargs,
|
|
84
|
+
) -> dict:
|
|
85
|
+
"""Create a decision-ready pharma brief and optionally persist it."""
|
|
86
|
+
del kwargs
|
|
87
|
+
query = (query or "").strip()
|
|
88
|
+
if not query:
|
|
89
|
+
return {"summary": "query is required.", "error": "missing_query"}
|
|
90
|
+
|
|
91
|
+
thesis = (program_thesis or "").strip() or "Recommendation pending deeper evidence synthesis."
|
|
92
|
+
target_rationale = (target_rationale or "").strip() or "Mechanism rationale not yet specified."
|
|
93
|
+
biomarker_strategy = (biomarker_strategy or "").strip() or "Biomarker strategy not yet specified."
|
|
94
|
+
safety_review = (safety_review or "").strip() or "Safety review pending."
|
|
95
|
+
competitive_differentiation = (
|
|
96
|
+
(competitive_differentiation or "").strip()
|
|
97
|
+
or "Differentiation statement pending competitor benchmarking."
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
evidence_lines = _extract_evidence_lines(evidence)[:12]
|
|
101
|
+
|
|
102
|
+
generated_at = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
|
|
103
|
+
md_lines = [
|
|
104
|
+
f"# Pharma Brief: {query}",
|
|
105
|
+
"",
|
|
106
|
+
f"*Generated by celltype-cli on {generated_at}*",
|
|
107
|
+
"",
|
|
108
|
+
"## Program Thesis",
|
|
109
|
+
thesis,
|
|
110
|
+
"",
|
|
111
|
+
"## Target / Mechanism Rationale",
|
|
112
|
+
target_rationale,
|
|
113
|
+
"",
|
|
114
|
+
"## Biomarker / Patient Stratification",
|
|
115
|
+
biomarker_strategy,
|
|
116
|
+
"",
|
|
117
|
+
"## Safety / Liability Review",
|
|
118
|
+
safety_review,
|
|
119
|
+
"",
|
|
120
|
+
"## Competitive Landscape & Differentiation",
|
|
121
|
+
competitive_differentiation,
|
|
122
|
+
"",
|
|
123
|
+
"## Key Evidence Highlights",
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
if evidence_lines:
|
|
127
|
+
for line in evidence_lines:
|
|
128
|
+
md_lines.append(f"- {line}")
|
|
129
|
+
else:
|
|
130
|
+
md_lines.append("- No structured evidence payload provided.")
|
|
131
|
+
md_lines.extend(
|
|
132
|
+
[
|
|
133
|
+
"",
|
|
134
|
+
"## Suggested Next Decisions",
|
|
135
|
+
"1. Confirm go/no-go assumptions with one orthogonal dataset.",
|
|
136
|
+
"2. Define a first-in-human stratification + endpoint strategy.",
|
|
137
|
+
"3. Quantify competitive differentiation against active Phase 2/3 programs.",
|
|
138
|
+
]
|
|
139
|
+
)
|
|
140
|
+
markdown = "\n".join(md_lines)
|
|
141
|
+
|
|
142
|
+
output_md = None
|
|
143
|
+
output_html = None
|
|
144
|
+
|
|
145
|
+
if save:
|
|
146
|
+
output_base = None
|
|
147
|
+
if _session is not None and getattr(_session, "config", None) is not None:
|
|
148
|
+
output_base = _session.config.get("sandbox.output_dir")
|
|
149
|
+
out_dir = (
|
|
150
|
+
Path(output_base) / "reports"
|
|
151
|
+
if output_base
|
|
152
|
+
else Path.cwd() / "outputs" / "reports"
|
|
153
|
+
)
|
|
154
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
155
|
+
|
|
156
|
+
stem = _slug(filename or query)
|
|
157
|
+
output_md = out_dir / f"{stem}_pharma_brief.md"
|
|
158
|
+
counter = 2
|
|
159
|
+
while output_md.exists():
|
|
160
|
+
output_md = out_dir / f"{stem}_pharma_brief_{counter}.md"
|
|
161
|
+
counter += 1
|
|
162
|
+
output_md.write_text(markdown, encoding="utf-8")
|
|
163
|
+
|
|
164
|
+
if publish_html:
|
|
165
|
+
from ct.reports.html import publish_report
|
|
166
|
+
|
|
167
|
+
output_html = publish_report(output_md)
|
|
168
|
+
|
|
169
|
+
summary = (
|
|
170
|
+
f"Generated pharma brief for '{query}'."
|
|
171
|
+
+ (f" Saved: {output_md}" if output_md else "")
|
|
172
|
+
+ (f" HTML: {output_html}" if output_html else "")
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
"summary": summary,
|
|
177
|
+
"query": query,
|
|
178
|
+
"markdown": markdown,
|
|
179
|
+
"markdown_path": str(output_md) if output_md else None,
|
|
180
|
+
"html_path": str(output_html) if output_html else None,
|
|
181
|
+
}
|