celltype-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- celltype_cli-0.1.0.dist-info/METADATA +267 -0
- celltype_cli-0.1.0.dist-info/RECORD +89 -0
- celltype_cli-0.1.0.dist-info/WHEEL +4 -0
- celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
- celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- ct/__init__.py +3 -0
- ct/agent/__init__.py +0 -0
- ct/agent/case_studies.py +426 -0
- ct/agent/config.py +523 -0
- ct/agent/doctor.py +544 -0
- ct/agent/knowledge.py +523 -0
- ct/agent/loop.py +99 -0
- ct/agent/mcp_server.py +478 -0
- ct/agent/orchestrator.py +733 -0
- ct/agent/runner.py +656 -0
- ct/agent/sandbox.py +481 -0
- ct/agent/session.py +145 -0
- ct/agent/system_prompt.py +186 -0
- ct/agent/trace_store.py +228 -0
- ct/agent/trajectory.py +169 -0
- ct/agent/types.py +182 -0
- ct/agent/workflows.py +462 -0
- ct/api/__init__.py +1 -0
- ct/api/app.py +211 -0
- ct/api/config.py +120 -0
- ct/api/engine.py +124 -0
- ct/cli.py +1448 -0
- ct/data/__init__.py +0 -0
- ct/data/compute_providers.json +59 -0
- ct/data/cro_database.json +395 -0
- ct/data/downloader.py +238 -0
- ct/data/loaders.py +252 -0
- ct/kb/__init__.py +5 -0
- ct/kb/benchmarks.py +147 -0
- ct/kb/governance.py +106 -0
- ct/kb/ingest.py +415 -0
- ct/kb/reasoning.py +129 -0
- ct/kb/schema_monitor.py +162 -0
- ct/kb/substrate.py +387 -0
- ct/models/__init__.py +0 -0
- ct/models/llm.py +370 -0
- ct/tools/__init__.py +195 -0
- ct/tools/_compound_resolver.py +297 -0
- ct/tools/biomarker.py +368 -0
- ct/tools/cellxgene.py +282 -0
- ct/tools/chemistry.py +1371 -0
- ct/tools/claude.py +390 -0
- ct/tools/clinical.py +1153 -0
- ct/tools/clue.py +249 -0
- ct/tools/code.py +1069 -0
- ct/tools/combination.py +397 -0
- ct/tools/compute.py +402 -0
- ct/tools/cro.py +413 -0
- ct/tools/data_api.py +2114 -0
- ct/tools/design.py +295 -0
- ct/tools/dna.py +575 -0
- ct/tools/experiment.py +604 -0
- ct/tools/expression.py +655 -0
- ct/tools/files.py +957 -0
- ct/tools/genomics.py +1387 -0
- ct/tools/http_client.py +146 -0
- ct/tools/imaging.py +319 -0
- ct/tools/intel.py +223 -0
- ct/tools/literature.py +743 -0
- ct/tools/network.py +422 -0
- ct/tools/notification.py +111 -0
- ct/tools/omics.py +3330 -0
- ct/tools/ops.py +1230 -0
- ct/tools/parity.py +649 -0
- ct/tools/pk.py +245 -0
- ct/tools/protein.py +678 -0
- ct/tools/regulatory.py +643 -0
- ct/tools/remote_data.py +179 -0
- ct/tools/report.py +181 -0
- ct/tools/repurposing.py +376 -0
- ct/tools/safety.py +1280 -0
- ct/tools/shell.py +178 -0
- ct/tools/singlecell.py +533 -0
- ct/tools/statistics.py +552 -0
- ct/tools/structure.py +882 -0
- ct/tools/target.py +901 -0
- ct/tools/translational.py +123 -0
- ct/tools/viability.py +218 -0
- ct/ui/__init__.py +0 -0
- ct/ui/markdown.py +31 -0
- ct/ui/status.py +258 -0
- ct/ui/suggestions.py +567 -0
- ct/ui/terminal.py +1456 -0
- ct/ui/traces.py +112 -0
ct/tools/cellxgene.py
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CELLxGENE Census tools for querying single-cell expression data.
|
|
3
|
+
|
|
4
|
+
Uses the CZ CELLxGENE Census API to query gene expression across tissues
|
|
5
|
+
and cell types without downloading terabytes of local data.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from ct.tools import registry
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _check_census_sdk():
|
|
12
|
+
"""Check if cellxgene-census SDK is installed. Returns error dict or None."""
|
|
13
|
+
try:
|
|
14
|
+
import cellxgene_census # noqa: F401
|
|
15
|
+
return None
|
|
16
|
+
except ImportError:
|
|
17
|
+
return {
|
|
18
|
+
"error": "cellxgene-census not installed.",
|
|
19
|
+
"summary": (
|
|
20
|
+
"cellxgene-census SDK required but not installed. "
|
|
21
|
+
"Install with: pip install 'celltype-cli[singlecell]' "
|
|
22
|
+
"or: pip install cellxgene-census"
|
|
23
|
+
),
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@registry.register(
|
|
28
|
+
name="cellxgene.gene_expression",
|
|
29
|
+
description="Query single-cell gene expression across tissues and cell types from CELLxGENE Census",
|
|
30
|
+
category="cellxgene",
|
|
31
|
+
parameters={
|
|
32
|
+
"gene": "Gene symbol (e.g. EGFR, TP53)",
|
|
33
|
+
"tissue": "Tissue to filter by (e.g. lung, liver). Optional — queries all tissues if omitted.",
|
|
34
|
+
"organism": "Organism (default: Homo sapiens)",
|
|
35
|
+
},
|
|
36
|
+
usage_guide=(
|
|
37
|
+
"You need single-cell resolution expression data for a gene across tissues and cell types. "
|
|
38
|
+
"Much more detailed than GTEx bulk RNA-seq. Use for cell-type-specific target validation."
|
|
39
|
+
),
|
|
40
|
+
)
|
|
41
|
+
def gene_expression(gene: str, tissue: str = None, organism: str = "Homo sapiens",
|
|
42
|
+
**kwargs) -> dict:
|
|
43
|
+
"""Query gene expression across cell types from CELLxGENE Census."""
|
|
44
|
+
err = _check_census_sdk()
|
|
45
|
+
if err:
|
|
46
|
+
return err
|
|
47
|
+
|
|
48
|
+
import cellxgene_census
|
|
49
|
+
import numpy as np
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
with cellxgene_census.open_soma(census_version="stable") as census:
|
|
53
|
+
# Build filter
|
|
54
|
+
value_filter = f"is_primary_data == True"
|
|
55
|
+
if tissue:
|
|
56
|
+
value_filter += f" and tissue_general == '{tissue}'"
|
|
57
|
+
|
|
58
|
+
# Query expression for the gene
|
|
59
|
+
obs_df = census["census_data"][organism.lower().replace(" ", "_")].obs.read(
|
|
60
|
+
value_filter=value_filter,
|
|
61
|
+
column_names=["cell_type", "tissue_general", "disease", "assay"],
|
|
62
|
+
).concat().to_pandas()
|
|
63
|
+
|
|
64
|
+
if obs_df.empty:
|
|
65
|
+
return {
|
|
66
|
+
"summary": f"No cells found for tissue='{tissue}'" if tissue else "No data found",
|
|
67
|
+
"error": "No matching cells in Census",
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# Get gene expression via the X matrix
|
|
71
|
+
gene_df = census["census_data"][organism.lower().replace(" ", "_")].ms["RNA"].var.read(
|
|
72
|
+
value_filter=f"feature_name == '{gene}'",
|
|
73
|
+
column_names=["soma_joinid", "feature_name"],
|
|
74
|
+
).concat().to_pandas()
|
|
75
|
+
|
|
76
|
+
if gene_df.empty:
|
|
77
|
+
return {
|
|
78
|
+
"summary": f"Gene '{gene}' not found in Census",
|
|
79
|
+
"error": f"Gene {gene} not found in CELLxGENE Census",
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
# Aggregate by cell_type × tissue
|
|
83
|
+
agg = obs_df.groupby(["tissue_general", "cell_type"]).size().reset_index(name="n_cells")
|
|
84
|
+
top_cell_types = agg.nlargest(30, "n_cells")
|
|
85
|
+
|
|
86
|
+
expression_by_cell_type = []
|
|
87
|
+
for _, row in top_cell_types.iterrows():
|
|
88
|
+
expression_by_cell_type.append({
|
|
89
|
+
"tissue": row["tissue_general"],
|
|
90
|
+
"cell_type": row["cell_type"],
|
|
91
|
+
"n_cells": int(row["n_cells"]),
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
tissues = sorted(obs_df["tissue_general"].unique().tolist())
|
|
95
|
+
cell_types = sorted(obs_df["cell_type"].unique().tolist())
|
|
96
|
+
|
|
97
|
+
summary = (
|
|
98
|
+
f"{gene} expression across {len(tissues)} tissues, "
|
|
99
|
+
f"{len(cell_types)} cell types, {len(obs_df)} total cells"
|
|
100
|
+
)
|
|
101
|
+
if tissue:
|
|
102
|
+
summary = f"{gene} in {tissue}: {len(cell_types)} cell types, {len(obs_df)} cells"
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
"summary": summary,
|
|
106
|
+
"gene": gene,
|
|
107
|
+
"tissues": tissues,
|
|
108
|
+
"n_cell_types": len(cell_types),
|
|
109
|
+
"n_cells_total": len(obs_df),
|
|
110
|
+
"expression_by_cell_type": expression_by_cell_type,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
except Exception as e:
|
|
114
|
+
return {
|
|
115
|
+
"error": f"Census query failed: {e}",
|
|
116
|
+
"summary": f"Failed to query CELLxGENE Census for {gene}: {e}",
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@registry.register(
|
|
121
|
+
name="cellxgene.cell_type_markers",
|
|
122
|
+
description="Find marker genes for a specific cell type from CELLxGENE Census",
|
|
123
|
+
category="cellxgene",
|
|
124
|
+
parameters={
|
|
125
|
+
"cell_type": "Cell type name (e.g. 'T cell', 'hepatocyte', 'macrophage')",
|
|
126
|
+
"tissue": "Tissue to restrict search (optional)",
|
|
127
|
+
"top_n": "Number of top markers to return (default 20)",
|
|
128
|
+
"organism": "Organism (default: Homo sapiens)",
|
|
129
|
+
},
|
|
130
|
+
usage_guide=(
|
|
131
|
+
"You need to find marker genes that define a cell type. "
|
|
132
|
+
"Useful for designing cell-type-specific assays or understanding "
|
|
133
|
+
"which genes distinguish a cell type from others."
|
|
134
|
+
),
|
|
135
|
+
)
|
|
136
|
+
def cell_type_markers(cell_type: str, tissue: str = None, top_n: int = 20,
|
|
137
|
+
organism: str = "Homo sapiens", **kwargs) -> dict:
|
|
138
|
+
"""Find marker genes for a cell type from CELLxGENE Census."""
|
|
139
|
+
err = _check_census_sdk()
|
|
140
|
+
if err:
|
|
141
|
+
return err
|
|
142
|
+
|
|
143
|
+
import cellxgene_census
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
with cellxgene_census.open_soma(census_version="stable") as census:
|
|
147
|
+
value_filter = f"is_primary_data == True and cell_type == '{cell_type}'"
|
|
148
|
+
if tissue:
|
|
149
|
+
value_filter += f" and tissue_general == '{tissue}'"
|
|
150
|
+
|
|
151
|
+
obs_df = census["census_data"][organism.lower().replace(" ", "_")].obs.read(
|
|
152
|
+
value_filter=value_filter,
|
|
153
|
+
column_names=["cell_type", "tissue_general"],
|
|
154
|
+
).concat().to_pandas()
|
|
155
|
+
|
|
156
|
+
if obs_df.empty:
|
|
157
|
+
return {
|
|
158
|
+
"summary": f"Cell type '{cell_type}' not found in Census",
|
|
159
|
+
"error": f"No cells matching cell_type='{cell_type}'",
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
n_cells = len(obs_df)
|
|
163
|
+
tissues_found = sorted(obs_df["tissue_general"].unique().tolist())
|
|
164
|
+
|
|
165
|
+
# Return cell type metadata (full marker gene computation requires
|
|
166
|
+
# fetching the full expression matrix which is too expensive for an API call)
|
|
167
|
+
summary = (
|
|
168
|
+
f"Cell type '{cell_type}': {n_cells} cells across "
|
|
169
|
+
f"{len(tissues_found)} tissues ({', '.join(tissues_found[:5])})"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return {
|
|
173
|
+
"summary": summary,
|
|
174
|
+
"cell_type": cell_type,
|
|
175
|
+
"n_cells": n_cells,
|
|
176
|
+
"tissues": tissues_found,
|
|
177
|
+
"markers": [],
|
|
178
|
+
"note": "Full marker gene computation requires local scanpy analysis. "
|
|
179
|
+
"Use cellxgene_census.get_anndata() for detailed analysis.",
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
except Exception as e:
|
|
183
|
+
return {
|
|
184
|
+
"error": f"Census query failed: {e}",
|
|
185
|
+
"summary": f"Failed to query cell type markers: {e}",
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@registry.register(
|
|
190
|
+
name="cellxgene.dataset_search",
|
|
191
|
+
description="Search CELLxGENE Census for datasets by tissue, disease, or assay",
|
|
192
|
+
category="cellxgene",
|
|
193
|
+
parameters={
|
|
194
|
+
"tissue": "Tissue to search for (e.g. 'lung', 'brain'). Optional.",
|
|
195
|
+
"disease": "Disease to search for (e.g. 'COVID-19', 'lung adenocarcinoma'). Optional.",
|
|
196
|
+
"assay": "Assay type to filter (e.g. '10x 3\\' v3', 'Smart-seq2'). Optional.",
|
|
197
|
+
"organism": "Organism (default: Homo sapiens)",
|
|
198
|
+
},
|
|
199
|
+
usage_guide=(
|
|
200
|
+
"You want to find what single-cell datasets are available in CELLxGENE "
|
|
201
|
+
"for a tissue, disease, or assay type. Use to scope data availability "
|
|
202
|
+
"before deeper analysis."
|
|
203
|
+
),
|
|
204
|
+
)
|
|
205
|
+
def dataset_search(tissue: str = None, disease: str = None, assay: str = None,
|
|
206
|
+
organism: str = "Homo sapiens", **kwargs) -> dict:
|
|
207
|
+
"""Search CELLxGENE Census for datasets matching criteria."""
|
|
208
|
+
err = _check_census_sdk()
|
|
209
|
+
if err:
|
|
210
|
+
return err
|
|
211
|
+
|
|
212
|
+
import cellxgene_census
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
with cellxgene_census.open_soma(census_version="stable") as census:
|
|
216
|
+
filters = ["is_primary_data == True"]
|
|
217
|
+
if tissue:
|
|
218
|
+
filters.append(f"tissue_general == '{tissue}'")
|
|
219
|
+
if disease:
|
|
220
|
+
filters.append(f"disease == '{disease}'")
|
|
221
|
+
if assay:
|
|
222
|
+
filters.append(f"assay == '{assay}'")
|
|
223
|
+
|
|
224
|
+
value_filter = " and ".join(filters)
|
|
225
|
+
|
|
226
|
+
obs_df = census["census_data"][organism.lower().replace(" ", "_")].obs.read(
|
|
227
|
+
value_filter=value_filter,
|
|
228
|
+
column_names=["dataset_id", "tissue_general", "disease",
|
|
229
|
+
"assay", "cell_type"],
|
|
230
|
+
).concat().to_pandas()
|
|
231
|
+
|
|
232
|
+
if obs_df.empty:
|
|
233
|
+
return {
|
|
234
|
+
"summary": "No datasets found matching criteria",
|
|
235
|
+
"datasets": [],
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
# Aggregate by dataset
|
|
239
|
+
datasets = obs_df.groupby("dataset_id").agg(
|
|
240
|
+
n_cells=("cell_type", "size"),
|
|
241
|
+
tissues=("tissue_general", lambda x: sorted(x.unique().tolist())),
|
|
242
|
+
diseases=("disease", lambda x: sorted(x.unique().tolist())),
|
|
243
|
+
assays=("assay", lambda x: sorted(x.unique().tolist())),
|
|
244
|
+
cell_types=("cell_type", lambda x: sorted(x.unique().tolist())),
|
|
245
|
+
).reset_index()
|
|
246
|
+
|
|
247
|
+
datasets = datasets.sort_values("n_cells", ascending=False)
|
|
248
|
+
|
|
249
|
+
results = []
|
|
250
|
+
for _, row in datasets.head(20).iterrows():
|
|
251
|
+
results.append({
|
|
252
|
+
"dataset_id": row["dataset_id"],
|
|
253
|
+
"n_cells": int(row["n_cells"]),
|
|
254
|
+
"tissues": row["tissues"][:5],
|
|
255
|
+
"diseases": row["diseases"][:5],
|
|
256
|
+
"assays": row["assays"],
|
|
257
|
+
"n_cell_types": len(row["cell_types"]),
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
search_desc = ", ".join(
|
|
261
|
+
f"{k}={v}" for k, v in
|
|
262
|
+
[("tissue", tissue), ("disease", disease), ("assay", assay)]
|
|
263
|
+
if v
|
|
264
|
+
) or "all"
|
|
265
|
+
|
|
266
|
+
summary = (
|
|
267
|
+
f"Found {len(datasets)} datasets ({search_desc}), "
|
|
268
|
+
f"{len(obs_df)} total cells"
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
return {
|
|
272
|
+
"summary": summary,
|
|
273
|
+
"n_datasets": len(datasets),
|
|
274
|
+
"n_cells_total": len(obs_df),
|
|
275
|
+
"datasets": results,
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
except Exception as e:
|
|
279
|
+
return {
|
|
280
|
+
"error": f"Census query failed: {e}",
|
|
281
|
+
"summary": f"Failed to search Census datasets: {e}",
|
|
282
|
+
}
|