npm - @nahisaho/satori - Versions diffs - 0.12.0 → 0.14.0 - Mend

@nahisaho/satori 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/src/.github/skills/scientific-healthcare-ai/SKILL.md ADDED Viewed

@@ -0,0 +1,273 @@
+---
+name: scientific-healthcare-ai
+description: |
+  ヘルスケア AI スキル。PyHealth 臨床 ML パイプライン、
+  フローサイトメトリー (FlowIO) 解析、電子健康記録 (EHR) 処理、
+  臨床予測モデル構築のガイダンス。
+---
+# Scientific Healthcare AI
+臨床データ解析・ヘルスケア機械学習パイプラインを提供する。
+PyHealth フレームワーク、フローサイトメトリー解析ツールを活用。
+## When to Use
+- 臨床予測モデル (再入院予測, 死亡率予測等) を構築するとき
+- EHR (電子健康記録) データの前処理・特徴量エンジニアリングを行うとき
+- フローサイトメトリー (FACS) データを読み込み・解析するとき
+- 臨床タスク向けの ML パイプラインを設計するとき
+- 医療コード (ICD-10, SNOMED, ATC) のマッピングを行うとき
+---
+## Quick Start
+## 1. PyHealth 臨床予測パイプライン
+```python
+"""
+PyHealth による臨床予測モデル構築。
+pip install pyhealth
+K-Dense-AI 参照: pyhealth — 臨床 ML フレームワーク
+"""
+from pyhealth.datasets import MIMIC3Dataset
+from pyhealth.tasks import readmission_prediction_mimic3_fn
+def build_clinical_pipeline(
+    mimic3_root,
+    tables=("DIAGNOSES_ICD", "PROCEDURES_ICD", "PRESCRIPTIONS"),
+    code_mapping=None,
+):
+    """
+    MIMIC-III データセットから臨床予測パイプラインを構築。
+    Parameters:
+        mimic3_root: str — MIMIC-III CSV ディレクトリパス
+        tables: tuple — 使用するテーブル
+        code_mapping: dict | None — コードマッピング設定
+    """
+    # Step 1: Dataset loading
+    if code_mapping is None:
+        code_mapping = {
+            "NDC": ("ATC", {"target_kwargs": {"level": 3}}),
+            "ICD9CM": "CCSCM",
+            "ICD9PROC": "CCSPROC",
+        }
+    dataset = MIMIC3Dataset(
+        root=mimic3_root,
+        tables=tables,
+        code_mapping=code_mapping,
+    )
+    print(f"MIMIC-III dataset: {len(dataset.patients)} patients")
+    return dataset
+def apply_clinical_task(dataset, task_fn=None):
+    """
+    臨床タスク関数を適用しサンプルを生成。
+    """
+    from pyhealth.datasets import split_by_patient
+    if task_fn is None:
+        task_fn = readmission_prediction_mimic3_fn
+    samples = dataset.set_task(task_fn)
+    train, val, test = split_by_patient(samples, [0.8, 0.1, 0.1])
+    print(f"Clinical task samples: "
+          f"train={len(train)}, val={len(val)}, test={len(test)}")
+    return train, val, test
+```
+## 2. PyHealth モデル学習
+```python
+def train_clinical_model(
+    train_dataset,
+    val_dataset,
+    model_type="Transformer",
+    epochs=20,
+    batch_size=64,
+):
+    """
+    PyHealth モデルの学習。
+    Parameters:
+        train_dataset: SampleDataset
+        val_dataset: SampleDataset
+        model_type: str — "Transformer", "RETAIN", "GRU", "CNN"
+        epochs: int — 学習エポック数
+    """
+    from pyhealth.models import Transformer
+    from pyhealth.trainer import Trainer
+    model_classes = {
+        "Transformer": Transformer,
+    }
+    ModelClass = model_classes.get(model_type, Transformer)
+    model = ModelClass(
+        dataset=train_dataset,
+        feature_keys=["conditions", "procedures", "drugs"],
+        label_key="label",
+        mode="binary",
+    )
+    trainer = Trainer(model=model)
+    trainer.train(
+        train_dataloader=train_dataset,
+        val_dataloader=val_dataset,
+        epochs=epochs,
+        monitor="pr_auc",
+    )
+    print(f"Clinical model ({model_type}): trained for {epochs} epochs")
+    return model, trainer
+```
+## 3. フローサイトメトリー解析
+```python
+def read_fcs_file(fcs_path):
+    """
+    FCS ファイルの読み込みと前処理。
+    pip install flowio
+    K-Dense-AI 参照: flowio — FCS file I/O
+    Parameters:
+        fcs_path: str — FCS ファイルパス
+    """
+    import flowio
+    import numpy as np
+    import pandas as pd
+    fcs_data = flowio.FlowData(fcs_path)
+    # Extract channel names
+    channels = []
+    for i in range(1, fcs_data.channel_count + 1):
+        name = fcs_data.channels.get(f"P{i}N", f"Channel_{i}")
+        channels.append(name)
+    # Convert to DataFrame
+    events = np.array(fcs_data.events).reshape(-1, fcs_data.channel_count)
+    df = pd.DataFrame(events, columns=channels)
+    print(f"FCS '{fcs_path}': {len(df)} events x {len(channels)} channels")
+    return df, fcs_data
+def gate_fcs_data(df, channel, low=None, high=None):
+    """
+    単純な矩形ゲーティング。
+    Parameters:
+        df: pd.DataFrame — FCS データ
+        channel: str — チャネル名
+        low: float | None — 下限
+        high: float | None — 上限
+    """
+    mask = pd.Series([True] * len(df))
+    if low is not None:
+        mask &= df[channel] >= low
+    if high is not None:
+        mask &= df[channel] <= high
+    gated = df[mask]
+    pct = len(gated) / len(df) * 100
+    print(f"Gate '{channel}' [{low},{high}]: "
+          f"{len(gated)}/{len(df)} events ({pct:.1f}%)")
+    return gated
+```
+## 4. 医療コードマッピング
+```python
+def map_medical_codes(codes, source_system, target_system):
+    """
+    医療コード間のマッピング。
+    Parameters:
+        codes: list[str] — ソースコードのリスト
+        source_system: str — "ICD9CM", "ICD10CM", "NDC", "ATC", "SNOMED"
+        target_system: str — 変換先コード体系
+    """
+    try:
+        from pyhealth.medcode import CrossMap
+        mapper = CrossMap(source_system, target_system)
+        results = {}
+        for code in codes:
+            mapped = mapper.map(code)
+            results[code] = mapped
+        mapped_count = sum(1 for v in results.values() if v)
+        print(f"Code mapping {source_system}→{target_system}: "
+              f"{mapped_count}/{len(codes)} mapped")
+        return results
+    except ImportError:
+        print("pyhealth.medcode not available; install pyhealth")
+        return {}
+```
+## 5. 臨床モデル評価
+```python
+def evaluate_clinical_model(trainer, test_dataset):
+    """
+    臨床予測モデルの評価。
+    Parameters:
+        trainer: Trainer — 学習済み Trainer
+        test_dataset: SampleDataset — テストデータ
+    """
+    metrics = trainer.evaluate(test_dataset)
+    print("Clinical model evaluation:")
+    for metric_name, value in metrics.items():
+        print(f"  {metric_name}: {value:.4f}")
+    return metrics
+```
+## References
+### Output Files
+| ファイル | 形式 |
+|---|---|
+| `results/clinical_predictions.csv` | CSV |
+| `results/clinical_metrics.json` | JSON |
+| `results/fcs_processed.csv` | CSV |
+| `results/code_mapping.json` | JSON |
+### 利用可能ツール
+| カテゴリ | 主要ツール | 用途 |
+|---|---|---|
+| (K-Dense) | `pyhealth` | 臨床 ML フレームワーク |
+| (K-Dense) | `flowio` | FCS ファイル I/O |
+> **注**: 本スキルは ToolUniverse ツールを持たず、
+> K-Dense-AI Scientific Skills からの参照のみ。
+### 参照スキル
+| スキル | 関連 |
+|---|---|
+| `scientific-clinical-nlp` | 臨床 NLP |
+| `scientific-biostatistics-survival` | 生存時間解析 |
+| `scientific-single-cell-rnaseq` | 単一細胞解析 |
+| `scientific-machine-learning-omics` | ML x オミクス |
+| `scientific-biothings-idmapping` | ID マッピング |
+### 依存パッケージ
+`pyhealth`, `flowio`, `numpy`, `pandas`, `scikit-learn`

package/src/.github/skills/scientific-human-protein-atlas/SKILL.md ADDED Viewed

@@ -0,0 +1,244 @@
+---
+name: scientific-human-protein-atlas
+description: |
+  Human Protein Atlas (HPA) 統合スキル。組織/細胞タンパク質発現、
+  がん予後バイオマーカー、RNA 発現プロファイル、細胞内局在、
+  タンパク質相互作用の包括的検索・解析パイプライン。
+---
+# Scientific Human Protein Atlas
+HPA REST API を活用した組織・細胞レベルの
+タンパク質発現プロファイリングパイプラインを提供する。
+## When to Use
+- 遺伝子/タンパク質の組織発現パターンを調べるとき
+- がん予後バイオマーカー候補を評価するとき
+- 細胞内局在 (subcellular localization) を確認するとき
+- 細胞株間の発現比較を行うとき
+- RNA 発現データ (HPA/GTEx/FANTOM5) を統合するとき
+---
+## Quick Start
+## 1. HPA 遺伝子基本情報取得
+```python
+import requests
+import pandas as pd
+HPA_API = "https://www.proteinatlas.org/api"
+def get_hpa_gene_info(ensembl_id):
+    """
+    HPA 遺伝子基本情報取得。
+    Parameters:
+        ensembl_id: str — Ensembl gene ID (e.g., "ENSG00000141510")
+    ToolUniverse:
+        HPA_get_gene_basic_info_by_ensembl_id(ensembl_id=ensembl_id)
+        HPA_get_comprehensive_gene_details_by_ensembl_id(ensembl_id=ensembl_id)
+    """
+    url = f"https://www.proteinatlas.org/{ensembl_id}.json"
+    resp = requests.get(url)
+    resp.raise_for_status()
+    data = resp.json()
+    info = {
+        "ensembl_id": ensembl_id,
+        "gene_name": data.get("Gene", ""),
+        "gene_description": data.get("Gene description", ""),
+        "uniprot_id": data.get("Uniprot", []),
+        "chromosome": data.get("Chromosome", ""),
+        "protein_class": data.get("Protein class", []),
+        "evidence": data.get("Evidence", ""),
+    }
+    print(f"HPA gene: {info['gene_name']} ({ensembl_id})")
+    return info, data
+```
+## 2. 組織 RNA 発現プロファイル
+```python
+def get_tissue_rna_expression(gene_name):
+    """
+    HPA 組織別 RNA 発現データ取得。
+    ToolUniverse:
+        HPA_get_rna_expression_by_source(gene=gene_name, source="HPA")
+        HPA_get_rna_expression_in_specific_tissues(gene=gene_name, tissues=tissues)
+    """
+    url = f"https://www.proteinatlas.org/{gene_name}.json"
+    resp = requests.get(url)
+    resp.raise_for_status()
+    data = resp.json()
+    rna_data = data.get("RNA tissue specific nTPM", [])
+    results = []
+    for entry in rna_data:
+        results.append({
+            "tissue": entry.get("Tissue", ""),
+            "cell_type": entry.get("Cell type", ""),
+            "ntpm": float(entry.get("nTPM", 0)),
+            "detection": entry.get("Detection", ""),
+        })
+    df = pd.DataFrame(results)
+    if not df.empty:
+        df = df.sort_values("ntpm", ascending=False)
+    print(f"HPA RNA expression '{gene_name}': {len(df)} tissue entries")
+    return df
+```
+## 3. がん予後バイオマーカー解析
+```python
+def get_cancer_prognostics(gene_name):
+    """
+    HPA がん予後データ取得。
+    ToolUniverse:
+        HPA_get_cancer_prognostics_by_gene(gene=gene_name)
+    """
+    url = f"https://www.proteinatlas.org/{gene_name}.json"
+    resp = requests.get(url)
+    resp.raise_for_status()
+    data = resp.json()
+    prognostics = data.get("Pathology prognostics", [])
+    results = []
+    for entry in prognostics:
+        results.append({
+            "cancer_type": entry.get("Cancer type", ""),
+            "prognostic_type": entry.get("Prognostic type", ""),
+            "is_prognostic": entry.get("Is prognostic", False),
+            "p_value": float(entry.get("p-value", 1.0)),
+            "high_expression_favorable": entry.get(
+                "High expression is favorable", None
+            ),
+        })
+    df = pd.DataFrame(results)
+    if not df.empty:
+        df = df.sort_values("p_value")
+        significant = df[df["p_value"] < 0.05]
+        print(f"HPA cancer prognostics '{gene_name}': "
+              f"{len(significant)}/{len(df)} significant")
+    else:
+        print(f"HPA cancer prognostics '{gene_name}': no data")
+    return df
+```
+## 4. 細胞内局在
+```python
+def get_subcellular_location(gene_name):
+    """
+    HPA 細胞内局在データ取得。
+    ToolUniverse:
+        HPA_get_subcellular_location(gene=gene_name)
+    """
+    url = f"https://www.proteinatlas.org/{gene_name}.json"
+    resp = requests.get(url)
+    resp.raise_for_status()
+    data = resp.json()
+    sc = data.get("Subcellular location", [])
+    results = []
+    for entry in sc:
+        results.append({
+            "location": entry.get("Location", ""),
+            "reliability": entry.get("Reliability", ""),
+            "enhanced": entry.get("Enhanced", False),
+            "supported": entry.get("Supported", False),
+            "cell_lines": entry.get("Cell lines", []),
+        })
+    df = pd.DataFrame(results)
+    print(f"HPA subcellular '{gene_name}': {len(df)} locations")
+    return df
+```
+## 5. タンパク質相互作用ネットワーク (HPA)
+```python
+def get_hpa_protein_interactions(gene_name):
+    """
+    HPA タンパク質相互作用データ取得。
+    ToolUniverse:
+        HPA_get_protein_interactions_by_gene(gene=gene_name)
+        HPA_get_biological_processes_by_gene(gene=gene_name)
+        HPA_get_contextual_biological_process_analysis(gene=gene_name)
+    """
+    url = f"https://www.proteinatlas.org/{gene_name}.json"
+    resp = requests.get(url)
+    resp.raise_for_status()
+    data = resp.json()
+    interactions = data.get("Protein interaction partners", [])
+    results = []
+    for partner in interactions:
+        results.append({
+            "partner_gene": partner.get("Gene", ""),
+            "partner_ensembl": partner.get("Ensembl", ""),
+            "confidence": partner.get("Confidence", ""),
+            "source": partner.get("Source", ""),
+        })
+    df = pd.DataFrame(results)
+    print(f"HPA interactions '{gene_name}': {len(df)} partners")
+    return df
+```
+## References
+### Output Files
+| ファイル | 形式 |
+|---|---|
+| `results/hpa_gene_info.json` | JSON |
+| `results/hpa_tissue_expression.csv` | CSV |
+| `results/hpa_cancer_prognostics.csv` | CSV |
+| `results/hpa_subcellular.csv` | CSV |
+| `results/hpa_interactions.csv` | CSV |
+### 利用可能ツール
+| カテゴリ | 主要ツール | 用途 |
+|---|---|---|
+| HPA | `HPA_generic_search` | 汎用検索 |
+| HPA | `HPA_get_gene_basic_info_by_ensembl_id` | 遺伝子基本情報 |
+| HPA | `HPA_get_comprehensive_gene_details_by_ensembl_id` | 包括的詳細 |
+| HPA | `HPA_get_rna_expression_by_source` | RNA 発現 |
+| HPA | `HPA_get_rna_expression_in_specific_tissues` | 組織別発現 |
+| HPA | `HPA_get_cancer_prognostics_by_gene` | がん予後 |
+| HPA | `HPA_get_subcellular_location` | 細胞内局在 |
+| HPA | `HPA_get_protein_interactions_by_gene` | PPI |
+| HPA | `HPA_get_biological_processes_by_gene` | 生物学的プロセス |
+| HPA | `HPA_get_contextual_biological_process_analysis` | プロセス解析 |
+| HPA | `HPA_get_disease_expression_by_gene_tissue_disease` | 疾患発現 |
+| HPA | `HPA_get_comparative_expression_by_gene_and_cellline` | 細胞株比較 |
+| HPA | `HPA_get_gene_tsv_data_by_ensembl_id` | TSV データ |
+| HPA | `HPA_search_genes_by_query` | 遺伝子検索 |
+### 参照スキル
+| スキル | 関連 |
+|---|---|
+| `scientific-gene-expression-transcriptomics` | GEO/GTEx 発現解析 |
+| `scientific-proteomics-mass-spectrometry` | プロテオミクス |
+| `scientific-cancer-genomics` | がんゲノミクス |
+| `scientific-protein-interaction-network` | PPI ネットワーク |
+| `scientific-pathway-enrichment` | パスウェイ濃縮 |
+### 依存パッケージ
+`requests`, `pandas`