celltype-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. celltype_cli-0.1.0.dist-info/METADATA +267 -0
  2. celltype_cli-0.1.0.dist-info/RECORD +89 -0
  3. celltype_cli-0.1.0.dist-info/WHEEL +4 -0
  4. celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
  5. celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. ct/__init__.py +3 -0
  7. ct/agent/__init__.py +0 -0
  8. ct/agent/case_studies.py +426 -0
  9. ct/agent/config.py +523 -0
  10. ct/agent/doctor.py +544 -0
  11. ct/agent/knowledge.py +523 -0
  12. ct/agent/loop.py +99 -0
  13. ct/agent/mcp_server.py +478 -0
  14. ct/agent/orchestrator.py +733 -0
  15. ct/agent/runner.py +656 -0
  16. ct/agent/sandbox.py +481 -0
  17. ct/agent/session.py +145 -0
  18. ct/agent/system_prompt.py +186 -0
  19. ct/agent/trace_store.py +228 -0
  20. ct/agent/trajectory.py +169 -0
  21. ct/agent/types.py +182 -0
  22. ct/agent/workflows.py +462 -0
  23. ct/api/__init__.py +1 -0
  24. ct/api/app.py +211 -0
  25. ct/api/config.py +120 -0
  26. ct/api/engine.py +124 -0
  27. ct/cli.py +1448 -0
  28. ct/data/__init__.py +0 -0
  29. ct/data/compute_providers.json +59 -0
  30. ct/data/cro_database.json +395 -0
  31. ct/data/downloader.py +238 -0
  32. ct/data/loaders.py +252 -0
  33. ct/kb/__init__.py +5 -0
  34. ct/kb/benchmarks.py +147 -0
  35. ct/kb/governance.py +106 -0
  36. ct/kb/ingest.py +415 -0
  37. ct/kb/reasoning.py +129 -0
  38. ct/kb/schema_monitor.py +162 -0
  39. ct/kb/substrate.py +387 -0
  40. ct/models/__init__.py +0 -0
  41. ct/models/llm.py +370 -0
  42. ct/tools/__init__.py +195 -0
  43. ct/tools/_compound_resolver.py +297 -0
  44. ct/tools/biomarker.py +368 -0
  45. ct/tools/cellxgene.py +282 -0
  46. ct/tools/chemistry.py +1371 -0
  47. ct/tools/claude.py +390 -0
  48. ct/tools/clinical.py +1153 -0
  49. ct/tools/clue.py +249 -0
  50. ct/tools/code.py +1069 -0
  51. ct/tools/combination.py +397 -0
  52. ct/tools/compute.py +402 -0
  53. ct/tools/cro.py +413 -0
  54. ct/tools/data_api.py +2114 -0
  55. ct/tools/design.py +295 -0
  56. ct/tools/dna.py +575 -0
  57. ct/tools/experiment.py +604 -0
  58. ct/tools/expression.py +655 -0
  59. ct/tools/files.py +957 -0
  60. ct/tools/genomics.py +1387 -0
  61. ct/tools/http_client.py +146 -0
  62. ct/tools/imaging.py +319 -0
  63. ct/tools/intel.py +223 -0
  64. ct/tools/literature.py +743 -0
  65. ct/tools/network.py +422 -0
  66. ct/tools/notification.py +111 -0
  67. ct/tools/omics.py +3330 -0
  68. ct/tools/ops.py +1230 -0
  69. ct/tools/parity.py +649 -0
  70. ct/tools/pk.py +245 -0
  71. ct/tools/protein.py +678 -0
  72. ct/tools/regulatory.py +643 -0
  73. ct/tools/remote_data.py +179 -0
  74. ct/tools/report.py +181 -0
  75. ct/tools/repurposing.py +376 -0
  76. ct/tools/safety.py +1280 -0
  77. ct/tools/shell.py +178 -0
  78. ct/tools/singlecell.py +533 -0
  79. ct/tools/statistics.py +552 -0
  80. ct/tools/structure.py +882 -0
  81. ct/tools/target.py +901 -0
  82. ct/tools/translational.py +123 -0
  83. ct/tools/viability.py +218 -0
  84. ct/ui/__init__.py +0 -0
  85. ct/ui/markdown.py +31 -0
  86. ct/ui/status.py +258 -0
  87. ct/ui/suggestions.py +567 -0
  88. ct/ui/terminal.py +1456 -0
  89. ct/ui/traces.py +112 -0
@@ -0,0 +1,179 @@
1
+ """
2
+ Remote data query tool — queries the hosted ct Data API.
3
+
4
+ Connects to a running ct Data API instance to query large datasets
5
+ (PerturbAtlas, ChEMBL, scPerturb, etc.) that are too large for local download.
6
+
7
+ Configure the endpoint with: ct config set api.data_endpoint http://host:8000
8
+ Or set CT_DATA_ENDPOINT environment variable.
9
+ """
10
+
11
+ import os
12
+ import httpx
13
+ from ct.tools import registry
14
+ from ct.agent.config import Config
15
+
16
+
17
+ def _get_endpoint() -> str | None:
18
+ """Get the configured data API endpoint."""
19
+ endpoint = os.environ.get("CT_DATA_ENDPOINT")
20
+ if endpoint:
21
+ return endpoint.rstrip("/")
22
+ cfg = Config.load()
23
+ val = cfg.get("api.data_endpoint")
24
+ return val.rstrip("/") if val else None
25
+
26
+
27
+ def _no_endpoint_error() -> dict:
28
+ return {
29
+ "error": "Data API endpoint not configured.",
30
+ "summary": (
31
+ "No data API endpoint configured. Set with: "
32
+ "ct config set api.data_endpoint http://your-host:8000 "
33
+ "or set CT_DATA_ENDPOINT environment variable."
34
+ ),
35
+ }
36
+
37
+
38
+ @registry.register(
39
+ name="remote_data.query",
40
+ description="Query a dataset on the hosted ct Data API (PerturbAtlas, ChEMBL, etc.)",
41
+ category="remote_data",
42
+ parameters={
43
+ "dataset": "Dataset name (e.g. 'perturbatlas', 'chembl')",
44
+ "gene": "Filter by gene symbol (optional)",
45
+ "compound": "Filter by compound name/ID (optional)",
46
+ "filters": "Additional column filters as dict (optional)",
47
+ "limit": "Max rows to return (default 100, max 10000)",
48
+ },
49
+ usage_guide=(
50
+ "You need to query a large dataset that's hosted on the data API (PerturbAtlas, ChEMBL, "
51
+ "scPerturb). Use when the dataset is too large for local download. "
52
+ "Requires api.data_endpoint to be configured."
53
+ ),
54
+ )
55
+ def query(dataset: str, gene: str = None, compound: str = None,
56
+ filters: dict = None, limit: int = 100, **kwargs) -> dict:
57
+ """Query a dataset on the hosted Data API."""
58
+ endpoint = _get_endpoint()
59
+ if not endpoint:
60
+ return _no_endpoint_error()
61
+
62
+ payload = {"dataset": dataset, "limit": limit}
63
+ if gene:
64
+ payload["gene"] = gene
65
+ if compound:
66
+ payload["compound"] = compound
67
+ if filters:
68
+ payload["filters"] = filters
69
+
70
+ try:
71
+ resp = httpx.post(
72
+ f"{endpoint}/query",
73
+ json=payload,
74
+ timeout=30,
75
+ )
76
+
77
+ if resp.status_code == 404:
78
+ return {
79
+ "error": f"Dataset '{dataset}' not found on the data API.",
80
+ "summary": f"Dataset '{dataset}' is not available. Use remote_data.list_datasets to see what's available.",
81
+ }
82
+ if resp.status_code == 503:
83
+ return {
84
+ "error": f"Dataset '{dataset}' files not found on disk.",
85
+ "summary": f"Dataset '{dataset}' is registered but files are missing on the server.",
86
+ }
87
+ if resp.status_code != 200:
88
+ return {
89
+ "error": f"Data API error: HTTP {resp.status_code}",
90
+ "summary": f"Data API returned HTTP {resp.status_code}: {resp.text[:200]}",
91
+ }
92
+
93
+ data = resp.json()
94
+ rows = data.get("data", [])
95
+ total = data.get("total_rows", len(rows))
96
+
97
+ # Build summary
98
+ filter_desc = []
99
+ if gene:
100
+ filter_desc.append(f"gene={gene}")
101
+ if compound:
102
+ filter_desc.append(f"compound={compound}")
103
+ filter_str = f" ({', '.join(filter_desc)})" if filter_desc else ""
104
+
105
+ summary = f"Query {dataset}{filter_str}: {total} rows returned"
106
+ if rows:
107
+ cols = list(rows[0].keys())
108
+ summary += f". Columns: {', '.join(cols[:8])}"
109
+
110
+ return {
111
+ "summary": summary,
112
+ "dataset": dataset,
113
+ "total_rows": total,
114
+ "columns": list(rows[0].keys()) if rows else [],
115
+ "data": rows,
116
+ }
117
+
118
+ except httpx.ConnectError:
119
+ return {
120
+ "error": f"Cannot connect to Data API at {endpoint}",
121
+ "summary": f"Data API unreachable at {endpoint}. Check the server is running.",
122
+ }
123
+ except httpx.HTTPError as e:
124
+ return {
125
+ "error": f"Data API request failed: {e}",
126
+ "summary": f"Failed to query Data API: {e}",
127
+ }
128
+
129
+
130
+ @registry.register(
131
+ name="remote_data.list_datasets",
132
+ description="List datasets available on the hosted ct Data API",
133
+ category="remote_data",
134
+ parameters={},
135
+ usage_guide=(
136
+ "You want to see what datasets are available on the configured data API. "
137
+ "Run this first to discover available data before querying."
138
+ ),
139
+ )
140
+ def list_datasets(**kwargs) -> dict:
141
+ """List datasets available on the Data API."""
142
+ endpoint = _get_endpoint()
143
+ if not endpoint:
144
+ return _no_endpoint_error()
145
+
146
+ try:
147
+ resp = httpx.get(f"{endpoint}/datasets", timeout=10)
148
+
149
+ if resp.status_code != 200:
150
+ return {
151
+ "error": f"Data API error: HTTP {resp.status_code}",
152
+ "summary": f"Failed to list datasets: HTTP {resp.status_code}",
153
+ }
154
+
155
+ datasets = resp.json()
156
+ if not datasets:
157
+ return {
158
+ "summary": "No datasets available on the Data API.",
159
+ "datasets": [],
160
+ }
161
+
162
+ names = [d["name"] for d in datasets]
163
+ summary = f"Data API has {len(datasets)} datasets: {', '.join(names)}"
164
+
165
+ return {
166
+ "summary": summary,
167
+ "datasets": datasets,
168
+ }
169
+
170
+ except httpx.ConnectError:
171
+ return {
172
+ "error": f"Cannot connect to Data API at {endpoint}",
173
+ "summary": f"Data API unreachable at {endpoint}. Check the server is running.",
174
+ }
175
+ except httpx.HTTPError as e:
176
+ return {
177
+ "error": f"Data API request failed: {e}",
178
+ "summary": f"Failed to list datasets: {e}",
179
+ }
ct/tools/report.py ADDED
@@ -0,0 +1,181 @@
1
+ """
2
+ Report utility tools for decision-ready communication outputs.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ import re
10
+
11
+ from ct.tools import registry
12
+
13
+
14
+ def _slug(text: str, max_len: int = 64) -> str:
15
+ slug = re.sub(r"[^a-zA-Z0-9]+", "_", str(text or "").strip().lower()).strip("_")
16
+ return (slug or "pharma_brief")[:max_len]
17
+
18
+
19
+ def _extract_evidence_lines(evidence) -> list[str]:
20
+ lines: list[str] = []
21
+ if evidence is None:
22
+ return lines
23
+ if isinstance(evidence, str):
24
+ for line in evidence.splitlines():
25
+ line = line.strip()
26
+ if line:
27
+ lines.append(line)
28
+ return lines
29
+ if isinstance(evidence, dict):
30
+ if evidence.get("summary"):
31
+ lines.append(str(evidence["summary"]))
32
+ for key in ("key_evidence", "evidence", "findings", "signals"):
33
+ value = evidence.get(key)
34
+ if isinstance(value, list):
35
+ for item in value:
36
+ lines.append(str(item))
37
+ return lines
38
+ if isinstance(evidence, list):
39
+ for item in evidence:
40
+ if isinstance(item, dict):
41
+ if "summary" in item:
42
+ lines.append(str(item["summary"]))
43
+ else:
44
+ lines.append(str(item))
45
+ else:
46
+ lines.append(str(item))
47
+ return [x.strip() for x in lines if str(x).strip()]
48
+
49
+
50
+ @registry.register(
51
+ name="report.pharma_brief",
52
+ description="Generate a one-page pharma decision brief and optionally save as markdown/HTML",
53
+ category="report",
54
+ parameters={
55
+ "query": "Original research question or program objective",
56
+ "program_thesis": "Concise recommendation statement",
57
+ "target_rationale": "Mechanistic rationale (optional)",
58
+ "biomarker_strategy": "Biomarker/patient-selection strategy (optional)",
59
+ "safety_review": "Top liabilities and mitigation strategy (optional)",
60
+ "competitive_differentiation": "Differentiation thesis vs landscape (optional)",
61
+ "evidence": "Optional evidence payload (string/list/dict) to anchor key bullets",
62
+ "save": "Whether to save markdown brief to output directory (default True)",
63
+ "publish_html": "Whether to also publish a shareable HTML page (default True)",
64
+ "filename": "Optional output filename stem",
65
+ },
66
+ usage_guide=(
67
+ "Use at the end of an analysis to create a board/investor/pharma-partner-ready brief "
68
+ "with explicit thesis, risk, and differentiation framing."
69
+ ),
70
+ )
71
+ def pharma_brief(
72
+ query: str,
73
+ program_thesis: str = "",
74
+ target_rationale: str = "",
75
+ biomarker_strategy: str = "",
76
+ safety_review: str = "",
77
+ competitive_differentiation: str = "",
78
+ evidence=None,
79
+ save: bool = True,
80
+ publish_html: bool = True,
81
+ filename: str = "",
82
+ _session=None,
83
+ **kwargs,
84
+ ) -> dict:
85
+ """Create a decision-ready pharma brief and optionally persist it."""
86
+ del kwargs
87
+ query = (query or "").strip()
88
+ if not query:
89
+ return {"summary": "query is required.", "error": "missing_query"}
90
+
91
+ thesis = (program_thesis or "").strip() or "Recommendation pending deeper evidence synthesis."
92
+ target_rationale = (target_rationale or "").strip() or "Mechanism rationale not yet specified."
93
+ biomarker_strategy = (biomarker_strategy or "").strip() or "Biomarker strategy not yet specified."
94
+ safety_review = (safety_review or "").strip() or "Safety review pending."
95
+ competitive_differentiation = (
96
+ (competitive_differentiation or "").strip()
97
+ or "Differentiation statement pending competitor benchmarking."
98
+ )
99
+
100
+ evidence_lines = _extract_evidence_lines(evidence)[:12]
101
+
102
+ generated_at = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
103
+ md_lines = [
104
+ f"# Pharma Brief: {query}",
105
+ "",
106
+ f"*Generated by celltype-cli on {generated_at}*",
107
+ "",
108
+ "## Program Thesis",
109
+ thesis,
110
+ "",
111
+ "## Target / Mechanism Rationale",
112
+ target_rationale,
113
+ "",
114
+ "## Biomarker / Patient Stratification",
115
+ biomarker_strategy,
116
+ "",
117
+ "## Safety / Liability Review",
118
+ safety_review,
119
+ "",
120
+ "## Competitive Landscape & Differentiation",
121
+ competitive_differentiation,
122
+ "",
123
+ "## Key Evidence Highlights",
124
+ ]
125
+
126
+ if evidence_lines:
127
+ for line in evidence_lines:
128
+ md_lines.append(f"- {line}")
129
+ else:
130
+ md_lines.append("- No structured evidence payload provided.")
131
+ md_lines.extend(
132
+ [
133
+ "",
134
+ "## Suggested Next Decisions",
135
+ "1. Confirm go/no-go assumptions with one orthogonal dataset.",
136
+ "2. Define a first-in-human stratification + endpoint strategy.",
137
+ "3. Quantify competitive differentiation against active Phase 2/3 programs.",
138
+ ]
139
+ )
140
+ markdown = "\n".join(md_lines)
141
+
142
+ output_md = None
143
+ output_html = None
144
+
145
+ if save:
146
+ output_base = None
147
+ if _session is not None and getattr(_session, "config", None) is not None:
148
+ output_base = _session.config.get("sandbox.output_dir")
149
+ out_dir = (
150
+ Path(output_base) / "reports"
151
+ if output_base
152
+ else Path.cwd() / "outputs" / "reports"
153
+ )
154
+ out_dir.mkdir(parents=True, exist_ok=True)
155
+
156
+ stem = _slug(filename or query)
157
+ output_md = out_dir / f"{stem}_pharma_brief.md"
158
+ counter = 2
159
+ while output_md.exists():
160
+ output_md = out_dir / f"{stem}_pharma_brief_{counter}.md"
161
+ counter += 1
162
+ output_md.write_text(markdown, encoding="utf-8")
163
+
164
+ if publish_html:
165
+ from ct.reports.html import publish_report
166
+
167
+ output_html = publish_report(output_md)
168
+
169
+ summary = (
170
+ f"Generated pharma brief for '{query}'."
171
+ + (f" Saved: {output_md}" if output_md else "")
172
+ + (f" HTML: {output_html}" if output_html else "")
173
+ )
174
+
175
+ return {
176
+ "summary": summary,
177
+ "query": query,
178
+ "markdown": markdown,
179
+ "markdown_path": str(output_md) if output_md else None,
180
+ "html_path": str(output_html) if output_html else None,
181
+ }