npm - @nahisaho/satori - Versions diffs - 0.15.0 → 0.17.0 - Mend

@nahisaho/satori 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/src/.github/skills/scientific-encode-screen/SKILL.md ADDED Viewed

@@ -0,0 +1,315 @@
+---
+name: scientific-encode-screen
+description: |
+  ENCODE / ChIP-Atlas エピゲノムアトラススキル。ENCODE REST API
+  実験・ファイル・バイオサンプル検索、SCREEN cis 制御エレメント、
+  ChIP-Atlas エンリッチメント解析、エピゲノムアノテーション統合。
+  ToolUniverse 連携: encode, chipatlas。
+tu_tools:
+  - key: encode
+    name: ENCODE
+    description: ENCODE プロジェクトの実験・バイオサンプル・ファイルデータ検索
+  - key: chipatlas
+    name: ChIP-Atlas
+    description: ChIP-seq/ATAC-seq エンリッチメント解析・ピークブラウザ
+---
+# Scientific ENCODE / SCREEN / ChIP-Atlas
+ENCODE REST API / SCREEN / ChIP-Atlas を活用したエピゲノム
+アトラス統合解析パイプラインを提供する。
+## When to Use
+- ENCODE 実験データ (ChIP-seq/ATAC-seq/DNase) を検索するとき
+- SCREEN cCRE (candidate cis-Regulatory Elements) を照会するとき
+- ChIP-Atlas で転写因子結合・ヒストン修飾エンリッチメントを解析するとき
+- バリアントの制御領域アノテーションを強化するとき
+- エピゲノムデータを下流のクロマチン解析に統合するとき
+- 組織/細胞型特異的エピゲノムプロファイルを比較するとき
+---
+## Quick Start
+## 1. ENCODE 実験検索・ファイル取得
+```python
+import requests
+import pandas as pd
+import json
+ENCODE_BASE = "https://www.encodeproject.org"
+HEADERS = {"Accept": "application/json"}
+def encode_search_experiments(assay_title=None, biosample=None,
+                               target=None, organism="Homo sapiens",
+                               limit=50):
+    """
+    ENCODE — 実験メタデータ検索。
+    Parameters:
+        assay_title: str — アッセイ種別 (例: "ChIP-seq", "ATAC-seq")
+        biosample: str — バイオサンプル (例: "K562")
+        target: str — ターゲット (例: "CTCF")
+        organism: str — 生物種
+        limit: int — 最大結果数
+    """
+    url = f"{ENCODE_BASE}/search/"
+    params = {
+        "type": "Experiment",
+        "status": "released",
+        "replicates.library.biosample.donor.organism.scientific_name": organism,
+        "limit": limit,
+        "format": "json",
+    }
+    if assay_title:
+        params["assay_title"] = assay_title
+    if biosample:
+        params["biosample_ontology.term_name"] = biosample
+    if target:
+        params["target.label"] = target
+    resp = requests.get(url, params=params, headers=HEADERS, timeout=30)
+    resp.raise_for_status()
+    data = resp.json()
+    results = []
+    for exp in data.get("@graph", []):
+        results.append({
+            "accession": exp.get("accession", ""),
+            "assay": exp.get("assay_title", ""),
+            "biosample": exp.get("biosample_summary", ""),
+            "target": exp.get("target", {}).get("label", ""),
+            "status": exp.get("status", ""),
+            "lab": exp.get("lab", {}).get("title", ""),
+            "date_released": exp.get("date_released", ""),
+            "files_count": len(exp.get("files", [])),
+        })
+    df = pd.DataFrame(results)
+    print(f"ENCODE: {len(df)} experiments found")
+    return df
+def encode_get_files(experiment_accession, file_type="bigWig",
+                      output_type="signal p-value", assembly="GRCh38"):
+    """
+    ENCODE — 実験ファイル URL 取得。
+    Parameters:
+        experiment_accession: str — 実験アクセッション
+        file_type: str — ファイルタイプ
+        output_type: str — 出力タイプ
+        assembly: str — アセンブリ
+    """
+    url = f"{ENCODE_BASE}/search/"
+    params = {
+        "type": "File",
+        "dataset": f"/experiments/{experiment_accession}/",
+        "file_format": file_type,
+        "output_type": output_type,
+        "assembly": assembly,
+        "status": "released",
+        "format": "json",
+    }
+    resp = requests.get(url, params=params, headers=HEADERS, timeout=30)
+    resp.raise_for_status()
+    data = resp.json()
+    files = []
+    for f in data.get("@graph", []):
+        files.append({
+            "accession": f.get("accession", ""),
+            "file_format": f.get("file_format", ""),
+            "output_type": f.get("output_type", ""),
+            "assembly": f.get("assembly", ""),
+            "href": ENCODE_BASE + f.get("href", ""),
+            "file_size": f.get("file_size", 0),
+            "biological_replicate": f.get("biological_replicates", []),
+        })
+    df = pd.DataFrame(files)
+    print(f"ENCODE files ({experiment_accession}): {len(df)} files")
+    return df
+```
+## 2. SCREEN cCRE 検索
+```python
+SCREEN_BASE = "https://api.wenglab.org/screen/v2"
+def screen_ccre_search(gene=None, region=None, biosample=None,
+                        assembly="GRCh38"):
+    """
+    SCREEN — candidate cis-Regulatory Element 検索。
+    Parameters:
+        gene: str — 遺伝子名 (例: "TP53")
+        region: str — ゲノム領域 (例: "chr17:7668421-7687490")
+        biosample: str — バイオサンプル名
+        assembly: str — アセンブリ
+    """
+    url = f"{SCREEN_BASE}/search"
+    query = {"assembly": assembly}
+    if gene:
+        query["gene"] = gene
+    if region:
+        chrom, pos = region.split(":")
+        start, end = pos.split("-")
+        query["coordinates"] = {
+            "chromosome": chrom,
+            "start": int(start),
+            "end": int(end),
+        }
+    resp = requests.post(url, json=query, timeout=30)
+    resp.raise_for_status()
+    data = resp.json()
+    ccres = []
+    for cre in data.get("data", []):
+        ccres.append({
+            "accession": cre.get("accession", ""),
+            "chromosome": cre.get("chrom", ""),
+            "start": cre.get("start", 0),
+            "end": cre.get("end", 0),
+            "ccre_class": cre.get("group", ""),
+            "dnase_zscore": cre.get("dnase_zscore", None),
+            "h3k4me3_zscore": cre.get("h3k4me3_zscore", None),
+            "h3k27ac_zscore": cre.get("h3k27ac_zscore", None),
+            "ctcf_zscore": cre.get("ctcf_zscore", None),
+        })
+    df = pd.DataFrame(ccres)
+    print(f"SCREEN cCREs: {len(df)} elements")
+    return df
+```
+## 3. ChIP-Atlas エンリッチメント解析
+```python
+CHIPATLAS_BASE = "https://chip-atlas.org/api"
+def chipatlas_enrichment(gene_list, cell_type=None,
+                          antigen_class="TFs and others",
+                          genome="hg38", threshold=5):
+    """
+    ChIP-Atlas — 遺伝子リストのエンリッチメント解析。
+    Parameters:
+        gene_list: list[str] — 遺伝子リスト
+        cell_type: str — 細胞型 (None = 全細胞型)
+        antigen_class: str — 抗原クラス
+        genome: str — ゲノムアセンブリ
+        threshold: int — 距離閾値 (kb)
+    """
+    url = f"{CHIPATLAS_BASE}/enrichment"
+    payload = {
+        "genome": genome,
+        "geneList": gene_list,
+        "antigenClass": antigen_class,
+        "distanceThreshold": threshold * 1000,
+    }
+    if cell_type:
+        payload["cellType"] = cell_type
+    resp = requests.post(url, json=payload, timeout=60)
+    resp.raise_for_status()
+    data = resp.json()
+    results = []
+    for hit in data.get("results", []):
+        results.append({
+            "antigen": hit.get("antigen", ""),
+            "cell_type": hit.get("cellType", ""),
+            "experiment_id": hit.get("experimentId", ""),
+            "p_value": hit.get("pValue", 1.0),
+            "log_p": hit.get("logPValue", 0),
+            "overlap_genes": hit.get("overlapGenes", 0),
+            "total_peaks": hit.get("totalPeaks", 0),
+        })
+    df = pd.DataFrame(results)
+    df = df.sort_values("p_value")
+    print(f"ChIP-Atlas enrichment: {len(df)} TF/antigen hits")
+    return df
+```
+## 4. ENCODE + SCREEN + ChIP-Atlas 統合パイプライン
+```python
+def encode_epigenome_pipeline(gene_name, biosample="K562",
+                               output_dir="results"):
+    """
+    ENCODE/SCREEN/ChIP-Atlas エピゲノム統合パイプライン。
+    Parameters:
+        gene_name: str — 遺伝子名
+        biosample: str — バイオサンプル
+        output_dir: str — 出力ディレクトリ
+    """
+    from pathlib import Path
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # 1) SCREEN cCRE
+    ccres = screen_ccre_search(gene=gene_name)
+    ccres.to_csv(output_dir / "screen_ccres.csv", index=False)
+    # 2) ENCODE 実験
+    experiments = encode_search_experiments(
+        assay_title="ChIP-seq",
+        biosample=biosample,
+        target="H3K27ac",
+    )
+    experiments.to_csv(output_dir / "encode_experiments.csv", index=False)
+    # 3) ChIP-Atlas エンリッチメント
+    enrichment = chipatlas_enrichment(
+        gene_list=[gene_name],
+        cell_type=biosample,
+    )
+    enrichment.to_csv(output_dir / "chipatlas_enrichment.csv", index=False)
+    print(f"ENCODE epigenome pipeline: {output_dir}")
+    return {
+        "ccres": ccres,
+        "experiments": experiments,
+        "enrichment": enrichment,
+    }
+```
+---
+## ToolUniverse 連携
+| TU Key | ツール名 | 連携内容 |
+|--------|---------|---------|
+| `encode` | ENCODE | 実験・バイオサンプル・ファイル検索 |
+| `chipatlas` | ChIP-Atlas | 転写因子エンリッチメント解析 |
+## パイプライン統合
+```
+regulatory-genomics → encode-screen → epigenomics-chromatin
+  (RegulomeDB/ReMap)   (ENCODE/SCREEN)  (ChIP/ATAC bulk)
+       │                     │                ↓
+  variant-interpretation ───┘         scatac-signac
+  (ACMG バリアント)     │              (scATAC-seq)
+                        ↓
+                  gene-regulatory-network
+                  (GRN 推定)
+```
+## パイプライン出力
+| ファイル | 説明 | 次スキル |
+|---------|------|---------|
+| `results/screen_ccres.csv` | cCRE アノテーション | → variant-interpretation |
+| `results/encode_experiments.csv` | ENCODE 実験メタデータ | → epigenomics-chromatin |
+| `results/chipatlas_enrichment.csv` | TF エンリッチメント | → gene-regulatory-network |

package/src/.github/skills/scientific-environmental-geodata/SKILL.md ADDED Viewed

@@ -0,0 +1,255 @@
+---
+name: scientific-environmental-geodata
+description: |
+  環境地理空間データスキル。SoilGrids REST API による土壌特性
+  取得、WorldClim/CHELSA 気候データ、生物多様性-環境モデリング
+  統合。直接 REST API 連携 (TU 外)。
+tu_tools: []
+---
+# Scientific Environmental Geodata
+SoilGrids・WorldClim 等の地球観測/環境データ API を活用した
+生態学的環境モデリングパイプラインを提供する。
+## When to Use
+- グローバル土壌特性 (pH, SOC, 粘土含量) を取得するとき
+- バイオクリマティック変数 (BIO1-BIO19) を取得するとき
+- 種分布モデル (SDM) の環境変数を準備するとき
+- 気候変動シナリオの生息地適性を評価するとき
+- 環境ニッチモデリングを実施するとき
+- 土壌-植生-気候の相互作用を解析するとき
+---
+## Quick Start
+## 1. SoilGrids 土壌特性取得
+```python
+import requests
+import pandas as pd
+import numpy as np
+SOILGRIDS_BASE = "https://rest.isric.org/soilgrids/v2.0"
+def soilgrids_get_properties(lat, lon, properties=None,
+                              depths=None, values=None):
+    """
+    SoilGrids — 地点の土壌特性取得。
+    Parameters:
+        lat: float — 緯度
+        lon: float — 経度
+        properties: list[str] — 土壌特性 (例: ["phh2o", "soc", "clay"])
+        depths: list[str] — 深度 (例: ["0-5cm", "5-15cm"])
+        values: list[str] — 値の種類 (例: ["mean", "Q0.05", "Q0.95"])
+    """
+    if properties is None:
+        properties = ["phh2o", "soc", "clay", "sand", "nitrogen",
+                       "bdod", "cec", "ocd"]
+    if depths is None:
+        depths = ["0-5cm", "5-15cm", "15-30cm", "30-60cm"]
+    if values is None:
+        values = ["mean", "Q0.05", "Q0.95"]
+    url = f"{SOILGRIDS_BASE}/properties/query"
+    params = {
+        "lat": lat,
+        "lon": lon,
+        "property": properties,
+        "depth": depths,
+        "value": values,
+    }
+    resp = requests.get(url, params=params, timeout=30)
+    resp.raise_for_status()
+    data = resp.json()
+    results = []
+    for layer in data.get("properties", {}).get("layers", []):
+        prop_name = layer.get("name", "")
+        unit = layer.get("unit_measure", {})
+        conversion = unit.get("mapped_units", "")
+        for depth_info in layer.get("depths", []):
+            row = {
+                "property": prop_name,
+                "depth": depth_info.get("label", ""),
+                "unit": conversion,
+            }
+            for val_key, val_val in depth_info.get("values", {}).items():
+                row[val_key] = val_val
+            results.append(row)
+    df = pd.DataFrame(results)
+    print(f"SoilGrids ({lat}, {lon}): {len(df)} records, "
+          f"{len(properties)} properties")
+    return df
+```
+## 2. WorldClim バイオクリマティック変数
+```python
+import rasterio
+from rasterio.sample import sample_gen
+def worldclim_get_bioclim(lat, lon, resolution="2.5m",
+                           data_dir="worldclim"):
+    """
+    WorldClim — バイオクリマティック変数取得。
+    Parameters:
+        lat: float — 緯度
+        lon: float — 経度
+        resolution: str — 空間解像度 ("30s", "2.5m", "5m", "10m")
+        data_dir: str — WorldClim データディレクトリ
+    """
+    from pathlib import Path
+    bio_dir = Path(data_dir) / f"wc2.1_{resolution}_bio"
+    bioclim_names = {
+        1: "Annual Mean Temperature",
+        2: "Mean Diurnal Range",
+        3: "Isothermality",
+        4: "Temperature Seasonality",
+        5: "Max Temperature Warmest Month",
+        6: "Min Temperature Coldest Month",
+        7: "Temperature Annual Range",
+        8: "Mean Temperature Wettest Quarter",
+        9: "Mean Temperature Driest Quarter",
+        10: "Mean Temperature Warmest Quarter",
+        11: "Mean Temperature Coldest Quarter",
+        12: "Annual Precipitation",
+        13: "Precipitation Wettest Month",
+        14: "Precipitation Driest Month",
+        15: "Precipitation Seasonality",
+        16: "Precipitation Wettest Quarter",
+        17: "Precipitation Driest Quarter",
+        18: "Precipitation Warmest Quarter",
+        19: "Precipitation Coldest Quarter",
+    }
+    results = []
+    for bio_num, bio_name in bioclim_names.items():
+        tif_path = bio_dir / f"wc2.1_{resolution}_bio_{bio_num}.tif"
+        if not tif_path.exists():
+            continue
+        with rasterio.open(tif_path) as src:
+            vals = list(sample_gen(src, [(lon, lat)]))
+            value = vals[0][0] if vals else None
+        results.append({
+            "variable": f"BIO{bio_num}",
+            "name": bio_name,
+            "value": value,
+        })
+    df = pd.DataFrame(results)
+    print(f"WorldClim ({lat}, {lon}): {len(df)} bioclim variables")
+    return df
+```
+## 3. 種分布モデル環境変数統合
+```python
+def sdm_environmental_stack(occurrences_df, lat_col="latitude",
+                              lon_col="longitude", buffer_deg=0.5):
+    """
+    SDM — 種の出現記録に対する環境変数スタック生成。
+    Parameters:
+        occurrences_df: pd.DataFrame — 種出現記録
+        lat_col: str — 緯度カラム名
+        lon_col: str — 経度カラム名
+        buffer_deg: float — バッファ距離 (度)
+    """
+    results = []
+    for _, row in occurrences_df.iterrows():
+        lat, lon = row[lat_col], row[lon_col]
+        # SoilGrids
+        soil = soilgrids_get_properties(lat, lon,
+            properties=["phh2o", "soc", "clay"])
+        soil_mean = {}
+        for _, s in soil.iterrows():
+            if s.get("depth") == "0-5cm":
+                soil_mean[f"soil_{s['property']}"] = s.get("mean", None)
+        # WorldClim (if available)
+        bioclim = {}
+        try:
+            bio_df = worldclim_get_bioclim(lat, lon)
+            bioclim = {r["variable"]: r["value"]
+                        for _, r in bio_df.iterrows()}
+        except Exception:
+            pass
+        combined = {
+            lat_col: lat,
+            lon_col: lon,
+            **soil_mean,
+            **bioclim,
+        }
+        results.append(combined)
+    df = pd.DataFrame(results)
+    print(f"SDM env stack: {len(df)} points, {len(df.columns)} variables")
+    return df
+```
+## 4. 環境地理空間統合パイプライン
+```python
+def environmental_geodata_pipeline(occurrences_csv, output_dir="results"):
+    """
+    環境地理空間統合パイプライン。
+    Parameters:
+        occurrences_csv: str — 種出現記録 CSV パス
+        output_dir: str — 出力ディレクトリ
+    """
+    from pathlib import Path
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    occ = pd.read_csv(occurrences_csv)
+    print(f"Occurrences: {len(occ)} records")
+    # 環境変数スタック
+    env_df = sdm_environmental_stack(occ)
+    env_df.to_csv(output_dir / "env_stack.csv", index=False)
+    # 環境空間要約
+    summary = env_df.describe().T
+    summary.to_csv(output_dir / "env_summary.csv")
+    print(f"Environmental pipeline: {output_dir}")
+    return {"occurrences": occ, "env_stack": env_df, "summary": summary}
+```
+---
+## ToolUniverse 連携
+直接 REST API 使用 (SoilGrids, WorldClim は ToolUniverse 外)。
+## パイプライン統合
+```
+environmental-ecology → environmental-geodata → marine-ecology
+  (GBIF/iNaturalist)   (SoilGrids/WorldClim)   (OBIS/WoRMS)
+       │                        │                     ↓
+  phylogenetics ───────────────┘              biodiversity-indices
+  (系統情報)             │                    (多様性指標)
+                         ↓
+                  species-distribution-model
+                  (SDM 統合)
+```
+## パイプライン出力
+| ファイル | 説明 | 次スキル |
+|---------|------|---------|
+| `results/env_stack.csv` | 環境変数スタック | → species-distribution-model |
+| `results/env_summary.csv` | 環境空間要約 | → environmental-ecology |