npm - @nahisaho/satori - Versions diffs - 0.17.0 → 0.19.0 - Mend

@nahisaho/satori 0.17.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/src/.github/skills/scientific-arrayexpress-expression/SKILL.md ADDED Viewed

@@ -0,0 +1,264 @@
+---
+name: scientific-arrayexpress-expression
+description: |
+  ArrayExpress 発現アーカイブスキル。BioStudies/ArrayExpress
+  REST API によるマイクロアレイ・RNA-seq 発現実験検索・メタ
+  データ取得・データ再解析。ToolUniverse 連携: arrayexpress。
+tu_tools:
+  - key: arrayexpress
+    name: ArrayExpress
+    description: ArrayExpress 発現実験検索・メタデータ・ファイル取得
+---
+# Scientific ArrayExpress Expression
+EBI ArrayExpress / BioStudies REST API を活用した発現データ
+アーカイブ検索・再解析パイプラインを提供する。
+## When to Use
+- ArrayExpress/BioStudies の発現実験を検索するとき
+- マイクロアレイ/RNA-seq 発現データのメタデータを取得するとき
+- SDRF サンプル情報テーブルを解析するとき
+- E-MTAB/E-GEOD アクセッションからデータ再解析するとき
+- 発現データアーカイブを横断検索するとき
+- GEO と ArrayExpress の両方でデータを探すとき
+---
+## Quick Start
+## 1. BioStudies 発現実験検索
+```python
+import requests
+import pandas as pd
+BIOSTUDIES_BASE = "https://www.ebi.ac.uk/biostudies/api/v1"
+AE_BASE = "https://www.ebi.ac.uk/arrayexpress/json/v3"
+def arrayexpress_search_experiments(query, organism=None,
+                                       experiment_type=None,
+                                       limit=50):
+    """
+    ArrayExpress — 発現実験検索 (BioStudies API)。
+    Parameters:
+        query: str — 検索クエリ (例: "breast cancer RNA-seq")
+        organism: str — 生物種 (例: "Homo sapiens")
+        experiment_type: str — 実験タイプ (例: "RNA-seq of coding RNA")
+        limit: int — 最大結果数
+    """
+    url = f"{BIOSTUDIES_BASE}/search"
+    params = {
+        "query": query,
+        "type": "study",
+        "pageSize": limit,
+    }
+    if organism:
+        params["organism"] = organism
+    if experiment_type:
+        params["experimenttype"] = experiment_type
+    resp = requests.get(url, params=params, timeout=30)
+    resp.raise_for_status()
+    data = resp.json()
+    hits = data.get("hits", [])
+    results = []
+    for h in hits:
+        attrs = {a.get("name", ""): a.get("value", "")
+                 for a in h.get("attributes", [])}
+        results.append({
+            "accession": h.get("accession", ""),
+            "title": attrs.get("Title", h.get("title", "")),
+            "organism": attrs.get("Organism", ""),
+            "experiment_type": attrs.get("Experiment type", ""),
+            "release_date": h.get("releaseDate", ""),
+            "files_count": h.get("filesCount", 0),
+            "links_count": h.get("linksCount", 0),
+        })
+    df = pd.DataFrame(results)
+    print(f"ArrayExpress search: {len(df)} experiments "
+          f"(query={query})")
+    return df
+```
+## 2. 実験メタデータ・SDRF 取得
+```python
+def arrayexpress_get_experiment(accession):
+    """
+    ArrayExpress — 実験メタデータ & SDRF 取得。
+    Parameters:
+        accession: str — アクセッション (例: "E-MTAB-12345")
+    """
+    url = f"{BIOSTUDIES_BASE}/studies/{accession}"
+    resp = requests.get(url, timeout=30)
+    resp.raise_for_status()
+    data = resp.json()
+    # メタデータ
+    attrs = {a.get("name", ""): a.get("value", "")
+             for a in data.get("attributes", [])}
+    metadata = {
+        "accession": accession,
+        "title": attrs.get("Title", ""),
+        "description": attrs.get("Description", "")[:500],
+        "organism": attrs.get("Organism", ""),
+        "experiment_type": attrs.get("Experiment type", ""),
+        "release_date": data.get("releaseDate", ""),
+    }
+    # ファイル一覧
+    files = []
+    for section in data.get("section", {}).get("files", []):
+        if isinstance(section, list):
+            for f in section:
+                files.append({
+                    "filename": f.get("path", ""),
+                    "type": f.get("type", ""),
+                    "size": f.get("size", 0),
+                })
+        elif isinstance(section, dict):
+            files.append({
+                "filename": section.get("path", ""),
+                "type": section.get("type", ""),
+                "size": section.get("size", 0),
+            })
+    files_df = pd.DataFrame(files)
+    # SDRF 取得試行
+    sdrf_url = (f"https://www.ebi.ac.uk/biostudies/files/"
+                f"{accession}/{accession}.sdrf.txt")
+    sdrf_df = pd.DataFrame()
+    try:
+        sdrf_resp = requests.get(sdrf_url, timeout=30)
+        if sdrf_resp.status_code == 200:
+            from io import StringIO
+            sdrf_df = pd.read_csv(StringIO(sdrf_resp.text), sep="\t")
+    except Exception:
+        pass
+    print(f"ArrayExpress {accession}: {len(files_df)} files, "
+          f"{len(sdrf_df)} SDRF rows")
+    return metadata, files_df, sdrf_df
+```
+## 3. 発現データダウンロード・処理
+```python
+def arrayexpress_download_matrix(accession, output_dir="results"):
+    """
+    ArrayExpress — 発現マトリクスダウンロード。
+    Parameters:
+        accession: str — アクセッション
+        output_dir: str — 出力ディレクトリ
+    """
+    from pathlib import Path
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    metadata, files_df, sdrf_df = arrayexpress_get_experiment(accession)
+    # 処理済み発現ファイル検索
+    expr_files = files_df[
+        files_df["filename"].str.contains(
+            r"processed|normalized|expression|counts",
+            case=False, na=False)
+    ]
+    downloaded = []
+    for _, frow in expr_files.iterrows():
+        fname = frow["filename"]
+        url = (f"https://www.ebi.ac.uk/biostudies/files/"
+               f"{accession}/{fname}")
+        try:
+            resp = requests.get(url, timeout=120)
+            if resp.status_code == 200:
+                fpath = output_dir / fname.split("/")[-1]
+                fpath.write_bytes(resp.content)
+                downloaded.append(str(fpath))
+        except Exception:
+            continue
+    # SDRF 保存
+    if not sdrf_df.empty:
+        sdrf_df.to_csv(output_dir / "sdrf.csv", index=False)
+    print(f"ArrayExpress download: {len(downloaded)} files → "
+          f"{output_dir}")
+    return {
+        "metadata": metadata,
+        "files": downloaded,
+        "sdrf": sdrf_df,
+    }
+```
+## 4. ArrayExpress 統合パイプライン
+```python
+def arrayexpress_pipeline(query, organism="Homo sapiens",
+                            output_dir="results"):
+    """
+    ArrayExpress 統合パイプライン。
+    Parameters:
+        query: str — 検索クエリ
+        organism: str — 生物種
+        output_dir: str — 出力ディレクトリ
+    """
+    from pathlib import Path
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # 1) 実験検索
+    experiments = arrayexpress_search_experiments(
+        query, organism=organism)
+    experiments.to_csv(output_dir / "experiments.csv", index=False)
+    # 2) トップ実験の詳細
+    if not experiments.empty:
+        top_acc = experiments.iloc[0]["accession"]
+        metadata, files, sdrf = arrayexpress_get_experiment(top_acc)
+        files.to_csv(output_dir / "experiment_files.csv", index=False)
+        if not sdrf.empty:
+            sdrf.to_csv(output_dir / "sdrf.csv", index=False)
+    print(f"ArrayExpress pipeline: {output_dir}")
+    return {"experiments": experiments}
+```
+---
+## ToolUniverse 連携
+| TU Key | ツール名 | 連携内容 |
+|--------|---------|---------|
+| `arrayexpress` | ArrayExpress | 発現実験検索・メタデータ・ファイル取得 |
+## パイプライン統合
+```
+ebi-databases → arrayexpress-expression → gene-expression-transcriptomics
+  (EBI Search)   (ArrayExpress/BioStudies)  (DESeq2/GSEA)
+       │                   │                       ↓
+  geo-expression ─────────┘               pathway-enrichment
+  (GEO データ)        │                   (KEGG/Reactome)
+                       ↓
+                  multi-omics
+                  (統合解析)
+```
+## パイプライン出力
+| ファイル | 説明 | 次スキル |
+|---------|------|---------|
+| `results/experiments.csv` | 実験一覧 | → geo-expression |
+| `results/sdrf.csv` | サンプル情報 | → gene-expression-transcriptomics |
+| `results/experiment_files.csv` | ファイルリスト | → data-preprocessing |

package/src/.github/skills/scientific-civic-evidence/SKILL.md ADDED Viewed

@@ -0,0 +1,292 @@
+---
+name: scientific-civic-evidence
+description: |
+  CIViC 臨床エビデンススキル。CIViC (Clinical Interpretation
+  of Variants in Cancer) REST API を用いたバリアント臨床解釈・
+  エビデンスアイテム・分子プロファイル・アサーション検索。
+  ToolUniverse 連携: civic。
+tu_tools:
+  - key: civic
+    name: CIViC
+    description: がんバリアント臨床解釈データベース
+---
+# Scientific CIViC Evidence
+CIViC (Clinical Interpretation of Variants in Cancer) REST API
+を活用したバリアント臨床解釈・エビデンスアイテム取得・
+分子プロファイル・アサーションパイプラインを提供する。
+## When to Use
+- がんバリアントの臨床的解釈を検索するとき
+- エビデンスアイテム (薬剤応答・予後・診断) を取得するとき
+- 遺伝子ごとのバリアントサマリーを確認するとき
+- 分子プロファイル (Molecular Profile) を検索するとき
+- アサーション (ガイドライン推奨) を取得するとき
+---
+## Quick Start
+## 1. バリアント検索・臨床解釈
+```python
+import requests
+import pandas as pd
+CIVIC_API = "https://civicdb.org/api"
+def civic_variant_search(gene_name, variant_name=None,
+                            limit=50):
+    """
+    CIViC — バリアント検索。
+    Parameters:
+        gene_name: str — 遺伝子名 (例: "BRAF")
+        variant_name: str — バリアント名
+            (例: "V600E")
+        limit: int — 最大結果数
+    """
+    url = f"{CIVIC_API}/variants"
+    params = {"count": limit}
+    # 遺伝子名で検索
+    gene_url = f"{CIVIC_API}/genes/{gene_name}"
+    try:
+        resp = requests.get(gene_url, timeout=30)
+        if resp.status_code == 200:
+            gene_data = resp.json()
+        else:
+            # 検索 API フォールバック
+            search_url = f"{CIVIC_API}/genes"
+            params_g = {"name": gene_name, "count": 5}
+            resp = requests.get(search_url,
+                                params=params_g,
+                                timeout=30)
+            resp.raise_for_status()
+            records = resp.json().get("records", [])
+            gene_data = records[0] if records else {}
+    except Exception as e:
+        print(f"  CIViC gene lookup: {e}")
+        gene_data = {}
+    if not gene_data:
+        return pd.DataFrame()
+    variants = gene_data.get("variants", [])
+    rows = []
+    for v in variants[:limit]:
+        name = v.get("name", "")
+        if variant_name and variant_name.lower() \
+                not in name.lower():
+            continue
+        rows.append({
+            "variant_id": v.get("id", ""),
+            "gene": gene_name,
+            "variant_name": name,
+            "description": (v.get("description", "")
+                            [:200]),
+            "evidence_count": len(
+                v.get("evidence_items", [])),
+        })
+    df = pd.DataFrame(rows)
+    print(f"CIViC variants: {gene_name} → {len(df)}")
+    return df
+def civic_gene_summary(gene_name):
+    """
+    CIViC — 遺伝子サマリー取得。
+    Parameters:
+        gene_name: str — 遺伝子名 (例: "EGFR")
+    """
+    url = f"{CIVIC_API}/genes/{gene_name}"
+    resp = requests.get(url, timeout=30)
+    resp.raise_for_status()
+    data = resp.json()
+    result = {
+        "gene_id": data.get("id", ""),
+        "name": data.get("name", ""),
+        "description": data.get("description", ""),
+        "n_variants": len(data.get("variants", [])),
+        "aliases": "; ".join(
+            data.get("aliases", [])),
+    }
+    return result
+```
+## 2. エビデンスアイテム取得
+```python
+def civic_evidence_items(variant_id, limit=50):
+    """
+    CIViC — エビデンスアイテム取得。
+    Parameters:
+        variant_id: int — バリアント ID
+        limit: int — 最大結果数
+    """
+    url = f"{CIVIC_API}/variants/{variant_id}"
+    resp = requests.get(url, timeout=30)
+    resp.raise_for_status()
+    data = resp.json()
+    rows = []
+    for ev in data.get("evidence_items", [])[:limit]:
+        drugs = [d.get("name", "")
+                 for d in ev.get("drugs", [])]
+        rows.append({
+            "evidence_id": ev.get("id", ""),
+            "variant_id": variant_id,
+            "evidence_type": ev.get(
+                "evidence_type", ""),
+            "evidence_level": ev.get(
+                "evidence_level", ""),
+            "evidence_direction": ev.get(
+                "evidence_direction", ""),
+            "clinical_significance": ev.get(
+                "clinical_significance", ""),
+            "disease": ev.get("disease", {}).get(
+                "name", ""),
+            "drugs": "; ".join(drugs),
+            "rating": ev.get("rating", ""),
+            "status": ev.get("status", ""),
+            "source_citation": ev.get(
+                "source", {}).get("citation", ""),
+        })
+    df = pd.DataFrame(rows)
+    print(f"CIViC evidence: variant {variant_id} "
+          f"→ {len(df)} items")
+    return df
+```
+## 3. アサーション取得
+```python
+def civic_assertions(gene_name=None, limit=50):
+    """
+    CIViC — アサーション (ガイドライン推奨) 取得。
+    Parameters:
+        gene_name: str — 遺伝子名フィルタ
+        limit: int — 最大結果数
+    """
+    url = f"{CIVIC_API}/assertions"
+    params = {"count": limit}
+    resp = requests.get(url, params=params, timeout=30)
+    resp.raise_for_status()
+    data = resp.json()
+    rows = []
+    for a in data.get("records", []):
+        genes = [g.get("name", "")
+                 for g in a.get("genes", [])]
+        if gene_name and gene_name not in genes:
+            continue
+        drugs = [d.get("name", "")
+                 for d in a.get("drugs", [])]
+        rows.append({
+            "assertion_id": a.get("id", ""),
+            "genes": "; ".join(genes),
+            "variant": a.get("variant", {}).get(
+                "name", ""),
+            "disease": a.get("disease", {}).get(
+                "name", ""),
+            "drugs": "; ".join(drugs),
+            "assertion_type": a.get(
+                "assertion_type", ""),
+            "assertion_direction": a.get(
+                "assertion_direction", ""),
+            "clinical_significance": a.get(
+                "clinical_significance", ""),
+            "amp_level": a.get("amp_level", ""),
+            "status": a.get("status", ""),
+        })
+    df = pd.DataFrame(rows)
+    print(f"CIViC assertions: {len(df)}")
+    return df
+```
+## 4. CIViC 統合パイプライン
+```python
+def civic_pipeline(gene_name, variant_name=None,
+                      output_dir="results"):
+    """
+    CIViC 統合パイプライン。
+    Parameters:
+        gene_name: str — 遺伝子名 (例: "BRAF")
+        variant_name: str — バリアント名 (例: "V600E")
+        output_dir: str — 出力ディレクトリ
+    """
+    from pathlib import Path
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # 1) 遺伝子サマリー
+    summary = civic_gene_summary(gene_name)
+    pd.DataFrame([summary]).to_csv(
+        output_dir / "civic_gene.csv", index=False)
+    # 2) バリアント検索
+    variants = civic_variant_search(gene_name,
+                                    variant_name)
+    variants.to_csv(output_dir / "civic_variants.csv",
+                    index=False)
+    # 3) トップバリアントのエビデンス
+    if not variants.empty:
+        top_vid = variants.iloc[0]["variant_id"]
+        evidence = civic_evidence_items(top_vid)
+        evidence.to_csv(
+            output_dir / "civic_evidence.csv",
+            index=False)
+    # 4) アサーション
+    assertions = civic_assertions(gene_name)
+    assertions.to_csv(
+        output_dir / "civic_assertions.csv",
+        index=False)
+    print(f"CIViC pipeline: {gene_name} → {output_dir}")
+    return {"variants": variants}
+```
+---
+## ToolUniverse 連携
+| TU Key | ツール名 | 連携内容 |
+|--------|---------|---------|
+| `civic` | CIViC | がんバリアント臨床解釈 (~12 tools) |
+## パイプライン統合
+```
+variant-interpretation → civic-evidence → precision-oncology
+  (ClinVar バリアント)    (CIViC REST)    (精密腫瘍学)
+          │                    │                ↓
+  gnomad-variants ────────────┘     drug-target-profiling
+  (集団頻度)           │           (標的プロファイリング)
+                       ↓
+             opentargets-genetics
+             (OT 標的-疾患)
+```
+## パイプライン出力
+| ファイル | 説明 | 次スキル |
+|---------|------|---------|
+| `results/civic_gene.csv` | 遺伝子サマリー | → cancer-genomics |
+| `results/civic_variants.csv` | バリアント一覧 | → variant-interpretation |
+| `results/civic_evidence.csv` | エビデンス | → precision-oncology |
+| `results/civic_assertions.csv` | アサーション | → pharmacogenomics |

package/src/.github/skills/scientific-compound-screening/SKILL.md CHANGED Viewed

@@ -4,6 +4,10 @@ description: |
   化合物スクリーニングスキル。ZINC データベースを活用した購入可能化合物検索、
   SMILES/名前ベースの類似性検索、カタログフィルタリング、
   バーチャルスクリーニング前処理パイプライン。
+tu_tools:
+  - key: zinc
+    name: ZINC
+    description: 購入可能化合物データベース
 ---
 # Scientific Compound Screening