PyPI - celltype-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

celltype-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

celltype_cli-0.1.0.dist-info/METADATA +267 -0
celltype_cli-0.1.0.dist-info/RECORD +89 -0
celltype_cli-0.1.0.dist-info/WHEEL +4 -0
celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
ct/__init__.py +3 -0
ct/agent/__init__.py +0 -0
ct/agent/case_studies.py +426 -0
ct/agent/config.py +523 -0
ct/agent/doctor.py +544 -0
ct/agent/knowledge.py +523 -0
ct/agent/loop.py +99 -0
ct/agent/mcp_server.py +478 -0
ct/agent/orchestrator.py +733 -0
ct/agent/runner.py +656 -0
ct/agent/sandbox.py +481 -0
ct/agent/session.py +145 -0
ct/agent/system_prompt.py +186 -0
ct/agent/trace_store.py +228 -0
ct/agent/trajectory.py +169 -0
ct/agent/types.py +182 -0
ct/agent/workflows.py +462 -0
ct/api/__init__.py +1 -0
ct/api/app.py +211 -0
ct/api/config.py +120 -0
ct/api/engine.py +124 -0
ct/cli.py +1448 -0
ct/data/__init__.py +0 -0
ct/data/compute_providers.json +59 -0
ct/data/cro_database.json +395 -0
ct/data/downloader.py +238 -0
ct/data/loaders.py +252 -0
ct/kb/__init__.py +5 -0
ct/kb/benchmarks.py +147 -0
ct/kb/governance.py +106 -0
ct/kb/ingest.py +415 -0
ct/kb/reasoning.py +129 -0
ct/kb/schema_monitor.py +162 -0
ct/kb/substrate.py +387 -0
ct/models/__init__.py +0 -0
ct/models/llm.py +370 -0
ct/tools/__init__.py +195 -0
ct/tools/_compound_resolver.py +297 -0
ct/tools/biomarker.py +368 -0
ct/tools/cellxgene.py +282 -0
ct/tools/chemistry.py +1371 -0
ct/tools/claude.py +390 -0
ct/tools/clinical.py +1153 -0
ct/tools/clue.py +249 -0
ct/tools/code.py +1069 -0
ct/tools/combination.py +397 -0
ct/tools/compute.py +402 -0
ct/tools/cro.py +413 -0
ct/tools/data_api.py +2114 -0
ct/tools/design.py +295 -0
ct/tools/dna.py +575 -0
ct/tools/experiment.py +604 -0
ct/tools/expression.py +655 -0
ct/tools/files.py +957 -0
ct/tools/genomics.py +1387 -0
ct/tools/http_client.py +146 -0
ct/tools/imaging.py +319 -0
ct/tools/intel.py +223 -0
ct/tools/literature.py +743 -0
ct/tools/network.py +422 -0
ct/tools/notification.py +111 -0
ct/tools/omics.py +3330 -0
ct/tools/ops.py +1230 -0
ct/tools/parity.py +649 -0
ct/tools/pk.py +245 -0
ct/tools/protein.py +678 -0
ct/tools/regulatory.py +643 -0
ct/tools/remote_data.py +179 -0
ct/tools/report.py +181 -0
ct/tools/repurposing.py +376 -0
ct/tools/safety.py +1280 -0
ct/tools/shell.py +178 -0
ct/tools/singlecell.py +533 -0
ct/tools/statistics.py +552 -0
ct/tools/structure.py +882 -0
ct/tools/target.py +901 -0
ct/tools/translational.py +123 -0
ct/tools/viability.py +218 -0
ct/ui/__init__.py +0 -0
ct/ui/markdown.py +31 -0
ct/ui/status.py +258 -0
ct/ui/suggestions.py +567 -0
ct/ui/terminal.py +1456 -0
ct/ui/traces.py +112 -0

ct/tools/repurposing.py ADDED Viewed

@@ -0,0 +1,376 @@
+"""Drug repurposing tools: connectivity map queries, signature matching."""
+import numpy as np
+from ct.tools import registry
+from ct.tools.http_client import request_json
+def _to_float(value):
+    """Best-effort float conversion for heterogeneous API payloads."""
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return None
+def _extract_l1000fwd_hits(payload: dict | list, mode: str) -> list:
+    """Extract mode-specific hits from L1000FWD response payload."""
+    if isinstance(payload, list):
+        return payload
+    if not isinstance(payload, dict):
+        return []
+    if mode == "reverse":
+        primary_keys = ("opposite", "reverse", "discordant", "anti")
+    else:
+        primary_keys = ("similar", "mimic", "concordant")
+    secondary_keys = ("results", "topn", "data")
+    for key in primary_keys:
+        hits = payload.get(key)
+        if isinstance(hits, list):
+            return hits
+    for section in secondary_keys:
+        nested = payload.get(section)
+        if not isinstance(nested, dict):
+            continue
+        for key in primary_keys:
+            hits = nested.get(key)
+            if isinstance(hits, list):
+                return hits
+    return []
+def _normalize_l1000fwd_hit(hit, rank: int) -> dict:
+    """Normalize one L1000FWD hit into a stable ct-friendly shape."""
+    if not isinstance(hit, dict):
+        return {
+            "rank": rank,
+            "compound": str(hit),
+            "connectivity_score": None,
+            "raw": hit,
+        }
+    compound = (
+        hit.get("pert_iname")
+        or hit.get("name")
+        or hit.get("drug")
+        or hit.get("sig_id")
+        or hit.get("id")
+        or f"hit_{rank}"
+    )
+    score = (
+        _to_float(hit.get("score"))
+        or _to_float(hit.get("combined_score"))
+        or _to_float(hit.get("combined_scores"))
+        or _to_float(hit.get("tau"))
+        or _to_float(hit.get("zscore"))
+        or _to_float(hit.get("zscores"))
+    )
+    p_value = _to_float(hit.get("p_value")) or _to_float(hit.get("pval")) or _to_float(hit.get("pvals"))
+    q_value = _to_float(hit.get("q_value")) or _to_float(hit.get("qval")) or _to_float(hit.get("qvals"))
+    normalized = {
+        "rank": rank,
+        "compound": compound,
+        "connectivity_score": score,
+    }
+    if p_value is not None:
+        normalized["p_value"] = p_value
+    if q_value is not None:
+        normalized["q_value"] = q_value
+    normalized["raw"] = hit
+    return normalized
+def _query_l1000fwd(up_genes: list[str], down_genes: list[str], mode: str, top_n: int) -> tuple[list, str | None]:
+    """Query free L1000FWD API and return normalized hits."""
+    if not up_genes and not down_genes:
+        return [], "Signature is empty after separating up/down genes"
+    search_payload = {
+        "up_genes": up_genes,
+        "down_genes": down_genes,
+    }
+    search_data, search_error = request_json(
+        "POST",
+        "https://maayanlab.cloud/L1000FWD/sig_search",
+        json=search_payload,
+        timeout=45,
+        retries=1,
+    )
+    if search_error:
+        return [], f"L1000FWD sig_search failed: {search_error}"
+    if not isinstance(search_data, dict):
+        return [], "L1000FWD sig_search returned unexpected payload"
+    result_id = search_data.get("result_id") or search_data.get("id")
+    if not result_id:
+        return [], "L1000FWD did not return a result_id"
+    result_data, result_error = request_json(
+        "GET",
+        f"https://maayanlab.cloud/L1000FWD/result/topn/{result_id}",
+        timeout=45,
+        retries=1,
+    )
+    if result_error:
+        return [], f"L1000FWD topn lookup failed for {result_id}: {result_error}"
+    raw_hits = _extract_l1000fwd_hits(result_data, mode=mode)
+    if not raw_hits:
+        return [], f"L1000FWD returned no {mode} hits"
+    hits = [_normalize_l1000fwd_hit(hit, rank=i + 1) for i, hit in enumerate(raw_hits[:top_n])]
+    return hits, None
+@registry.register(
+    name="repurposing.cmap_query",
+    description="Query for drug repurposing opportunities using L1000 connectivity map signature matching",
+    category="repurposing",
+    parameters={
+        "gene_signature": "Dict of gene:value (expression changes), used as query signature",
+        "compound_id": "Compound ID to use as query (pulls L1000 signature; alternative to gene_signature)",
+        "mode": "'similar' (same mechanism) or 'reverse' (opposing signature, e.g. disease reversal)",
+        "top_n": "Number of top hits to return (default 20)",
+        "allow_remote": "If true and local L1000 is unavailable, try free L1000FWD API fallback",
+    },
+    requires_data=[],
+    usage_guide="You want to find drug repurposing opportunities — compounds with similar signatures "
+                "(shared mechanism) or reverse signatures (disease-reversing). Provide a gene signature "
+                "dict or compound_id. Use mode='reverse' to find compounds that reverse a disease signature. "
+                "Works best with local L1000 data; if unavailable, can query the free L1000FWD API.",
+)
+def cmap_query(gene_signature: dict = None, compound_id: str = None,
+               mode: str = "similar", top_n: int = 20,
+               allow_remote: bool = True, **kwargs) -> dict:
+    """Query for drug repurposing via connectivity map signature matching.
+    Strategy:
+    1. If compound_id provided + L1000 loaded: correlate that compound's signature
+       with all other compounds in the L1000 matrix.
+    2. If gene_signature provided + L1000 loaded: correlate the query signature
+       against all compounds in L1000.
+    3. If L1000 not available: return guidance for external CMap queries.
+    Parameters
+    ----------
+    gene_signature : dict, optional
+        Query signature as {gene_name: expression_value}. Positive values = upregulated,
+        negative = downregulated.
+    compound_id : str, optional
+        Compound ID to pull signature from L1000 data.
+    mode : str
+        "similar" to find compounds with correlated signatures (shared mechanism).
+        "reverse" to find compounds with anti-correlated signatures (disease reversal).
+    top_n : int
+        Number of top hits to return.
+    """
+    from scipy import stats as sp_stats
+    if gene_signature is None and compound_id is None:
+        return {"error": "Provide either gene_signature (dict) or compound_id", "summary": "Provide either gene_signature (dict) or compound_id"}
+    if mode not in ("similar", "reverse"):
+        return {"error": f"Unknown mode '{mode}'. Use 'similar' or 'reverse'", "summary": f"Unknown mode '{mode}'. Use 'similar' or 'reverse'"}
+    # Try to load L1000 data
+    l1000 = None
+    try:
+        from ct.data.loaders import load_l1000
+        l1000 = load_l1000()
+    except (FileNotFoundError, ImportError):
+        pass
+    # Build query vector
+    if compound_id is not None and l1000 is not None:
+        if compound_id not in l1000.index:
+            return {"error": f"Compound {compound_id} not found in L1000 data", "summary": f"Compound {compound_id} not found in L1000 data"}
+        query_series = l1000.loc[compound_id]
+        query_genes = set(query_series.index)
+        source = f"compound {compound_id} (L1000)"
+    elif gene_signature is not None:
+        query_genes = set(gene_signature.keys())
+        source = f"provided signature ({len(query_genes)} genes)"
+        query_series = None  # handled below
+    else:
+        return {"error": "compound_id not found in L1000 and no gene_signature provided", "summary": "compound_id not found in L1000 and no gene_signature provided"}
+    # If L1000 available, do full matrix correlation
+    results = None
+    if l1000 is not None:
+        if query_series is not None:
+            # compound_id mode: correlate against all other compounds
+            query_vec = query_series.values
+            compounds = l1000.index.tolist()
+            results = []
+            for cpd in compounds:
+                if cpd == compound_id:
+                    continue
+                other_vec = l1000.loc[cpd].values
+                # Pearson correlation
+                valid = ~(np.isnan(query_vec) | np.isnan(other_vec))
+                if valid.sum() < 10:
+                    continue
+                r, p = sp_stats.pearsonr(query_vec[valid], other_vec[valid])
+                results.append({
+                    "compound": cpd,
+                    "correlation": round(float(r), 4),
+                    "p_value": float(p),
+                    "n_genes": int(valid.sum()),
+                })
+        else:
+            # gene_signature mode: correlate provided signature against L1000 compounds
+            common_genes = sorted(query_genes & set(l1000.columns))
+            if len(common_genes) >= 10:
+                query_vec = np.array([gene_signature[g] for g in common_genes])
+                compounds = l1000.index.tolist()
+                results = []
+                for cpd in compounds:
+                    other_vec = l1000.loc[cpd, common_genes].values.astype(float)
+                    valid = ~(np.isnan(query_vec) | np.isnan(other_vec))
+                    if valid.sum() < 10:
+                        continue
+                    r, p = sp_stats.pearsonr(query_vec[valid], other_vec[valid])
+                    results.append({
+                        "compound": cpd,
+                        "correlation": round(float(r), 4),
+                        "p_value": float(p),
+                        "n_genes": int(valid.sum()),
+                    })
+            # else: results stays None, fall through to external guidance
+    if results is not None:
+        if not results:
+            return {
+                "summary": "No correlations computed -- insufficient overlapping data",
+                "source": source,
+                "mode": mode,
+                "hits": [],
+            }
+        # Sort by correlation
+        if mode == "similar":
+            results.sort(key=lambda x: x["correlation"], reverse=True)
+        else:  # reverse
+            results.sort(key=lambda x: x["correlation"])
+        top_hits = results[:top_n]
+        # Classify hits
+        for hit in top_hits:
+            r = hit["correlation"]
+            if abs(r) > 0.5:
+                hit["strength"] = "strong"
+            elif abs(r) > 0.3:
+                hit["strength"] = "moderate"
+            else:
+                hit["strength"] = "weak"
+        # Summary
+        if mode == "similar":
+            desc = "similar mechanism (positively correlated)"
+            best_r = top_hits[0]["correlation"] if top_hits else 0
+        else:
+            desc = "signature-reversing (negatively correlated)"
+            best_r = top_hits[0]["correlation"] if top_hits else 0
+        strong = sum(1 for h in top_hits if h["strength"] == "strong")
+        moderate = sum(1 for h in top_hits if h["strength"] == "moderate")
+        top3_str = ", ".join(
+            f"{h['compound']}(r={h['correlation']:.3f})" for h in top_hits[:3]
+        )
+        summary = (
+            f"CMap query ({source}, mode={mode}):\n"
+            f"Searching for: {desc}\n"
+            f"Top {len(top_hits)} hits: {strong} strong, {moderate} moderate\n"
+            f"Best matches: {top3_str}\n"
+            f"Best correlation: {best_r:.4f}"
+        )
+        return {
+            "summary": summary,
+            "source": source,
+            "mode": mode,
+            "n_compounds_screened": len(results),
+            "hits": top_hits,
+        }
+    # L1000 not available -- remote fallback for gene signatures
+    if gene_signature is not None:
+        # Separate into up/down gene lists for external tools
+        up_genes = sorted([g for g, v in gene_signature.items() if v > 0])
+        down_genes = sorted([g for g, v in gene_signature.items() if v < 0])
+        remote_error = None
+        if allow_remote:
+            remote_hits, remote_error = _query_l1000fwd(
+                up_genes=up_genes,
+                down_genes=down_genes,
+                mode=mode,
+                top_n=top_n,
+            )
+            if remote_hits:
+                top3_str = ", ".join(
+                    f"{h['compound']}(score={h['connectivity_score']:.3f})"
+                    if isinstance(h.get("connectivity_score"), float)
+                    else h["compound"]
+                    for h in remote_hits[:3]
+                )
+                summary = (
+                    f"Remote CMap query via L1000FWD ({source}, mode={mode}): "
+                    f"{len(remote_hits)} hit(s) returned. "
+                    f"Top hits: {top3_str}"
+                )
+                return {
+                    "summary": summary,
+                    "source": source,
+                    "mode": mode,
+                    "remote_source": "L1000FWD",
+                    "local_data_unavailable": True,
+                    "remote_used": True,
+                    "hits": remote_hits,
+                    "up_genes": up_genes[:100],
+                    "down_genes": down_genes[:100],
+                }
+        remote_note = f"Remote fallback attempt failed: {remote_error}\n" if (allow_remote and remote_error) else ""
+        summary = (
+            f"L1000 data not loaded -- cannot compute correlations locally.\n"
+            f"Query signature: {len(up_genes)} up, {len(down_genes)} down genes.\n"
+            f"{remote_note}"
+            f"For external CMap query, use these gene lists at:\n"
+            f"  - CLUE (https://clue.io/query)\n"
+            f"  - SigCom LINCS (https://maayanlab.cloud/sigcom-lincs/)\n"
+            f"  - L1000FWD (https://maayanlab.cloud/L1000FWD/)\n"
+            f"Upload up-genes and down-genes separately."
+        )
+        return {
+            "summary": summary,
+            "source": source,
+            "mode": mode,
+            "up_genes": up_genes[:100],  # cap for readability
+            "down_genes": down_genes[:100],
+            "data_unavailable": True,
+            "remote_used": False,
+            "remote_error": remote_error if allow_remote else None,
+            "external_resources": [
+                {"name": "CLUE", "url": "https://clue.io/query"},
+                {"name": "SigCom LINCS", "url": "https://maayanlab.cloud/sigcom-lincs/"},
+                {"name": "L1000FWD", "url": "https://maayanlab.cloud/L1000FWD/"},
+            ],
+            "hits": [],
+        }
+    return {
+        "summary": "L1000 data not available and no gene_signature provided for external query",
+        "error": "Load L1000 data (ct data pull l1000) or provide a gene_signature dict",
+        "hits": [],
+    }