npm - @nahisaho/satori - Versions diffs - 0.10.0 → 0.11.0 - Mend

@nahisaho/satori 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/src/.github/skills/scientific-gene-expression-transcriptomics/SKILL.md ADDED Viewed

@@ -0,0 +1,330 @@
+---
+name: scientific-gene-expression-transcriptomics
+description: |
+  遺伝子発現・トランスクリプトミクス解析スキル。GEO (Gene Expression Omnibus) からの
+  公開データセット取得・前処理、DESeq2 (PyDESeq2) による差次発現解析、
+  GTEx 組織発現参照・eQTL 解析、Expression Atlas (EBI GXA) 統合照会、
+  遺伝子セット濃縮解析 (GSEA)、バルク RNA-seq カウントデータの
+  標準解析パイプライン。
+---
+# Scientific Gene Expression & Transcriptomics
+バルク RNA-seq / マイクロアレイの遺伝子発現データを対象に、
+GEO データセット取得→前処理→差次発現→GSEA→組織発現参照の
+統合トランスクリプトミクスパイプラインを提供する。
+## When to Use
+- GEO からバルク RNA-seq/マイクロアレイデータセットを取得・前処理するとき
+- DESeq2 による差次発現遺伝子 (DEG) 解析が必要なとき
+- GTEx 組織発現プロファイル・eQTL データを照会するとき
+- 遺伝子セット濃縮解析 (GSEA/ORA) を行うとき
+- Expression Atlas でベースライン/差次発現実験を検索するとき
+---
+## Quick Start
+## 1. GEO データセット取得
+```python
+import pandas as pd
+import GEOparse
+def fetch_geo_dataset(accession, output_dir="data/geo"):
+    """
+    GEO (Gene Expression Omnibus) データセットの取得・前処理。
+    GEO ID 形式:
+    - GSE: Series (発現データセット)
+    - GPL: Platform (アレイ/シーケンサー定義)
+    - GSM: Sample (個別サンプル)
+    - GDS: Dataset (キュレーション済み)
+    """
+    import os
+    os.makedirs(output_dir, exist_ok=True)
+    gse = GEOparse.get_GEO(geo=accession, destdir=output_dir)
+    print(f"  GEO Accession: {accession}")
+    print(f"  Title: {gse.metadata['title'][0]}")
+    print(f"  Platform: {list(gse.gpls.keys())}")
+    print(f"  Samples: {len(gse.gsms)}")
+    print(f"  Type: {gse.metadata.get('type', ['unknown'])}")
+    # サンプルメタデータ抽出
+    metadata = []
+    for gsm_name, gsm in gse.gsms.items():
+        meta = {"sample_id": gsm_name}
+        meta.update({k: v[0] if v else None
+                     for k, v in gsm.metadata.items()
+                     if k in ["title", "source_name_ch1", "characteristics_ch1"]})
+        metadata.append(meta)
+    metadata_df = pd.DataFrame(metadata)
+    # 発現マトリクス取得
+    pivot_df = gse.pivot_samples("VALUE")
+    print(f"  Expression matrix: {pivot_df.shape[0]} genes × {pivot_df.shape[1]} samples")
+    return gse, metadata_df, pivot_df
+```
+## 2. DESeq2 差次発現解析 (PyDESeq2)
+```python
+import numpy as np
+import pandas as pd
+def deseq2_differential_expression(count_matrix, metadata, design_factor,
+                                     contrast=None, alpha=0.05,
+                                     lfc_threshold=1.0):
+    """
+    PyDESeq2 による差次発現解析パイプライン。
+    1. カウントマトリクス入力 (genes × samples)
+    2. サイズファクター正規化 (median of ratios)
+    3. 分散推定 (shrinkage)
+    4. GLM フィッティング (NB 分布)
+    5. Wald 検定
+    6. LFC 収縮 (apeglm)
+    7. FDR 補正 (Benjamini-Hochberg)
+    """
+    from pydeseq2.dds import DeseqDataSet
+    from pydeseq2.ds import DeseqStats
+    # DeseqDataSet 構築
+    dds = DeseqDataSet(
+        counts=count_matrix,
+        metadata=metadata,
+        design_factors=design_factor,
+    )
+    # 正規化 + 分散推定 + 統計検定
+    dds.deseq2()
+    # 結果取得
+    stat_res = DeseqStats(dds, contrast=contrast, alpha=alpha)
+    stat_res.summary()
+    results_df = stat_res.results_df.copy()
+    # LFC 収縮
+    stat_res.lfc_shrink(coeff=contrast)
+    results_df["log2FoldChange_shrunk"] = stat_res.results_df["log2FoldChange"]
+    # フィルタリング
+    sig = results_df[
+        (results_df["padj"] < alpha) &
+        (results_df["log2FoldChange"].abs() > lfc_threshold)
+    ]
+    sig_up = sig[sig["log2FoldChange"] > 0]
+    sig_down = sig[sig["log2FoldChange"] < 0]
+    print(f"  DESeq2 results:")
+    print(f"    Total genes tested: {len(results_df)}")
+    print(f"    Significant (FDR < {alpha}, |log2FC| > {lfc_threshold}):")
+    print(f"      UP: {len(sig_up)}")
+    print(f"      DOWN: {len(sig_down)}")
+    return results_df, sig
+def generate_volcano_plot(results_df, alpha=0.05, lfc_threshold=1.0,
+                           output_file="figures/volcano_rnaseq.png"):
+    """
+    Volcano プロット生成。
+    """
+    import matplotlib.pyplot as plt
+    fig, ax = plt.subplots(figsize=(8, 6))
+    results_df["-log10_padj"] = -np.log10(results_df["padj"].clip(lower=1e-300))
+    # 色分け
+    colors = []
+    for _, row in results_df.iterrows():
+        if row["padj"] < alpha and row["log2FoldChange"] > lfc_threshold:
+            colors.append("red")
+        elif row["padj"] < alpha and row["log2FoldChange"] < -lfc_threshold:
+            colors.append("blue")
+        else:
+            colors.append("gray")
+    ax.scatter(results_df["log2FoldChange"], results_df["-log10_padj"],
+              c=colors, alpha=0.5, s=5)
+    ax.axhline(-np.log10(alpha), color="gray", linestyle="--", lw=0.5)
+    ax.axvline(lfc_threshold, color="gray", linestyle="--", lw=0.5)
+    ax.axvline(-lfc_threshold, color="gray", linestyle="--", lw=0.5)
+    ax.set_xlabel("log2 Fold Change")
+    ax.set_ylabel("-log10(adjusted p-value)")
+    ax.set_title("Volcano Plot — Differential Expression")
+    plt.tight_layout()
+    plt.savefig(output_file, dpi=300)
+    plt.close()
+    return output_file
+```
+## 3. GTEx 組織発現・eQTL 照会
+```python
+import pandas as pd
+def query_gtex_expression(gene_name, tissue=None):
+    """
+    GTEx (Genotype-Tissue Expression) 組織発現プロファイル照会。
+    GTEx v8: 54 組織, 948 ドナー, 17,382 サンプル。
+    TPM (Transcripts Per Million) ベースの発現量。
+    """
+    print(f"  GTEx gene expression query: {gene_name}")
+    if tissue:
+        print(f"  Tissue: {tissue}")
+    else:
+        print("  All tissues (54 tissue sites)")
+    return {"gene": gene_name, "tissue": tissue}
+def query_gtex_eqtl(gene_name, tissue, pvalue_threshold=1e-5):
+    """
+    GTEx eQTL (expression Quantitative Trait Loci) 照会。
+    eQTL = 遺伝子発現量に影響する遺伝的変異
+    - cis-eQTL: 遺伝子の ±1 Mb 以内の変異
+    - trans-eQTL: 遺伝子から離れた変異
+    """
+    print(f"  GTEx eQTL query: gene={gene_name}, tissue={tissue}")
+    print(f"  P-value threshold: {pvalue_threshold}")
+    print("  Types: cis-eQTL (primary), trans-eQTL")
+    return {"gene": gene_name, "tissue": tissue}
+```
+## 4. 遺伝子セット濃縮解析 (GSEA)
+```python
+import pandas as pd
+import numpy as np
+def gsea_preranked(ranked_gene_list, gene_sets="MSigDB_Hallmark_2020",
+                    n_permutations=1000, min_size=15, max_size=500):
+    """
+    GSEA (Gene Set Enrichment Analysis) — Preranked。
+    入力: log2FC × -log10(p) でランク付けされた遺伝子リスト
+    遺伝子セットDB:
+    - MSigDB Hallmark (H)
+    - GO Biological Process (C5:BP)
+    - KEGG Pathways (C2:KEGG)
+    - Reactome (C2:REACTOME)
+    """
+    import gseapy as gp
+    # ランクスコア = sign(log2FC) × -log10(pvalue)
+    results = gp.prerank(
+        rnk=ranked_gene_list,
+        gene_sets=gene_sets,
+        processes=4,
+        permutation_num=n_permutations,
+        min_size=min_size,
+        max_size=max_size,
+        outdir="results/gsea",
+        seed=42,
+    )
+    sig_terms = results.res2d[results.res2d["FDR q-val"] < 0.05]
+    print(f"  GSEA results ({gene_sets}):")
+    print(f"    Gene sets tested: {len(results.res2d)}")
+    print(f"    Significant (FDR < 0.05): {len(sig_terms)}")
+    if len(sig_terms) > 0:
+        print(f"    Top enriched:")
+        for _, row in sig_terms.head(5).iterrows():
+            direction = "UP" if row["NES"] > 0 else "DOWN"
+            print(f"      {row['Term']} (NES={row['NES']:.2f}, {direction})")
+    return results
+def overrepresentation_analysis(gene_list, background=None,
+                                  gene_sets="GO_Biological_Process_2021"):
+    """
+    遺伝子オーバーリプレゼンテーション解析 (ORA)。
+    Fisher exact test ベースの濃縮解析。
+    DEG リスト → 機能カテゴリへのマッピング。
+    """
+    import gseapy as gp
+    results = gp.enrich(
+        gene_list=gene_list,
+        gene_sets=gene_sets,
+        background=background,
+        outdir="results/ora",
+    )
+    sig = results.res2d[results.res2d["Adjusted P-value"] < 0.05]
+    print(f"  ORA results ({gene_sets}):")
+    print(f"    Input genes: {len(gene_list)}")
+    print(f"    Significant terms: {len(sig)}")
+    return results
+```
+## References
+### Output Files
+| ファイル | 形式 |
+|---|---|
+| `results/geo_expression_matrix.csv` | CSV |
+| `results/deseq2_results.csv` | CSV |
+| `results/gsea/` | ディレクトリ |
+| `results/ora/` | ディレクトリ |
+| `figures/volcano_rnaseq.png` | PNG |
+| `figures/ma_plot.png` | PNG |
+| `figures/gsea_dotplot.png` | PNG |
+### 利用可能ツール
+> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
+| カテゴリ | 主要ツール | 用途 |
+|---|---|---|
+| GEO | `geo_search_datasets` | GEO データセット検索 |
+| GEO | `geo_get_dataset_info` | データセット詳細取得 |
+| GEO | `geo_get_sample_info` | サンプル情報取得 |
+| GTEx | `GTEx_get_median_gene_expression` | 組織間中央値発現量 |
+| GTEx | `GTEx_get_gene_expression` | サンプルレベル発現データ |
+| GTEx | `GTEx_get_top_expressed_genes` | 高発現遺伝子取得 |
+| GTEx | `GTEx_get_eqtl_genes` | eQTL 遺伝子 (eGenes) |
+| GTEx | `GTEx_get_single_tissue_eqtls` | 単一組織 eQTL |
+| GTEx | `GTEx_get_multi_tissue_eqtls` | 多組織 eQTL |
+| GTEx | `GTEx_calculate_eqtl` | eQTL 計算 |
+| Expression Atlas | `ExpressionAtlas_search_experiments` | 実験検索 |
+| Expression Atlas | `ExpressionAtlas_get_baseline` | ベースライン発現 |
+| Expression Atlas | `ExpressionAtlas_search_differential` | 差次発現実験 |
+| ArrayExpress | `arrayexpress_search_experiments` | ArrayExpress 実験検索 |
+### 参照スキル
+| スキル | 関連 |
+|---|---|
+| `scientific-bioinformatics` | バルク RNA-seq 基盤 |
+| `scientific-single-cell-genomics` | scRNA-seq (単一細胞) |
+| `scientific-epigenomics-chromatin` | 発現-エピゲノム統合 |
+| `scientific-multi-omics` | マルチオミクス統合 |
+| `scientific-network-analysis` | 共発現ネットワーク |
+### 依存パッケージ
+`pydeseq2`, `GEOparse`, `gseapy`, `pandas`, `numpy`, `matplotlib`, `scipy`

package/src/.github/skills/scientific-lab-data-management/SKILL.md ADDED Viewed

@@ -0,0 +1,334 @@
+---
+name: scientific-lab-data-management
+description: |
+  ラボデータ管理スキル。Benchling (ELN/DNA 設計/レジストリ)、
+  DNAnexus (ゲノミクス PaaS)、LatchBio (ワークフロー)、
+  OMERO (バイオイメージング)、Protocols.io (プロトコル共有)
+  を統合したウェット・ドライラボデータ管理パイプライン。
+---
+# Scientific Lab Data Management
+ウェットラボ実験管理からゲノミクスデータ処理まで、
+ラボデータの生成・記録・解析・共有を統合管理するパイプライン。
+## When to Use
+- 電子実験ノート (ELN) でプロトコル・結果を記録するとき
+- DNA 配列設計・クローニング計画を管理するとき
+- ゲノミクス大規模データを PaaS 上で解析するとき
+- バイオイメージングデータを構造化管理するとき
+- 実験プロトコルを共有・再現するとき
+---
+## Quick Start
+## 1. Benchling ELN / DNA 設計
+```python
+import json
+import requests
+class BenchlingClient:
+    """
+    Benchling API クライアント。
+    Benchling 機能:
+    - ELN (Electronic Lab Notebook): 実験記録
+    - Molecular Biology: DNA 配列設計, プライマー設計, クローニング
+    - Registry: サンプル・試薬レジストリ
+    - Inventory: 在庫管理
+    """
+    def __init__(self, api_key, tenant_url):
+        self.base_url = f"https://{tenant_url}/api/v2"
+        self.headers = {
+            "Authorization": f"Basic {api_key}",
+            "Content-Type": "application/json",
+        }
+    def create_dna_sequence(self, name, bases, folder_id,
+                              annotations=None):
+        """
+        DNA 配列の登録。
+        Parameters:
+        - name: 配列名
+        - bases: 塩基配列 (ATCG)
+        - folder_id: 保存先フォルダ
+        - annotations: アノテーション [{name, start, end, type, strand}]
+        """
+        payload = {
+            "name": name,
+            "bases": bases,
+            "folderId": folder_id,
+            "isCircular": False,
+            "annotations": annotations or [],
+        }
+        print(f"  Benchling DNA sequence: {name}")
+        print(f"    Length: {len(bases)} bp")
+        if annotations:
+            print(f"    Annotations: {len(annotations)}")
+        return payload
+    def search_registry(self, query, schema_id=None, page_size=50):
+        """
+        Benchling Registry 検索。
+        レジストリエンティティ:
+        - プラスミド, 菌株, 抗体, 細胞株, 化合物
+        """
+        params = {
+            "query": query,
+            "pageSize": page_size,
+        }
+        if schema_id:
+            params["schemaId"] = schema_id
+        print(f"  Benchling registry search: '{query}'")
+        return params
+    def create_entry(self, name, folder_id, template_id=None):
+        """
+        ELN エントリ (実験ノート) 作成。
+        """
+        payload = {
+            "name": name,
+            "folderId": folder_id,
+        }
+        if template_id:
+            payload["entryTemplateId"] = template_id
+        print(f"  Benchling ELN entry: {name}")
+        return payload
+```
+## 2. DNAnexus ゲノミクス PaaS
+```python
+import json
+class DNAnexusClient:
+    """
+    DNAnexus Platform API クライアント。
+    DNAnexus 機能:
+    - データストレージ: FASTQ, BAM, VCF 等の大規模ファイル
+    - ワークフロー実行: WDL/CWL/Applet ベース
+    - コンプライアンス: HIPAA, GxP, FedRAMP
+    - コラボレーション: プロジェクト単位のアクセス管理
+    """
+    def __init__(self, token):
+        self.token = token
+        self.base_url = "https://api.dnanexus.com"
+    def upload_file(self, local_path, project_id, folder="/"):
+        """
+        ファイルアップロード。
+        対応形式: FASTQ(.gz), BAM, CRAM, VCF, BED, etc.
+        """
+        print(f"  DNAnexus upload: {local_path}")
+        print(f"    Project: {project_id}")
+        print(f"    Destination: {folder}")
+        return {"local_path": local_path, "project_id": project_id}
+    def run_workflow(self, workflow_id, project_id, inputs):
+        """
+        ワークフロー実行。
+        ワークフロー例:
+        - GATK Best Practices (germline/somatic)
+        - RNA-STAR alignment + featureCounts
+        - DeepVariant caller
+        - Structural variant calling
+        """
+        print(f"  DNAnexus workflow: {workflow_id}")
+        print(f"    Project: {project_id}")
+        print(f"    Inputs: {len(inputs)} parameters")
+        return {
+            "workflow_id": workflow_id,
+            "project_id": project_id,
+            "inputs": inputs,
+        }
+    def list_project_files(self, project_id, folder="/", name_glob=None):
+        """
+        プロジェクト内ファイル一覧。
+        """
+        params = {"folder": folder}
+        if name_glob:
+            params["name"] = {"glob": name_glob}
+        print(f"  DNAnexus list: {project_id}{folder}")
+        return params
+```
+## 3. OMERO バイオイメージング管理
+```python
+import json
+class OMEROClient:
+    """
+    OMERO (Open Microscopy Environment Remote Objects) クライアント。
+    OMERO 機能:
+    - 画像データ管理: 150+ 画像フォーマット (Bio-Formats)
+    - メタデータ: Key-Value, タグ, ROI
+    - 解析統合: ImageJ/Fiji, CellProfiler, napari
+    - アクセス制御: プロジェクト/グループ権限
+    """
+    def __init__(self, host, port=4064):
+        self.host = host
+        self.port = port
+    def import_images(self, file_paths, dataset_id):
+        """
+        画像インポート。
+        対応フォーマット (Bio-Formats):
+        - OME-TIFF, ND2 (Nikon), CZI (Zeiss), LIF (Leica)
+        - VSI (Olympus), SVS (Aperio), DICOM
+        """
+        print(f"  OMERO import: {len(file_paths)} images → Dataset {dataset_id}")
+        return {"files": file_paths, "dataset_id": dataset_id}
+    def create_roi(self, image_id, shapes):
+        """
+        ROI (Region of Interest) 作成。
+        Shape タイプ:
+        - Rectangle, Ellipse, Polygon
+        - Line, Polyline, Point
+        - Mask (binary mask)
+        """
+        print(f"  OMERO ROI: Image {image_id}, {len(shapes)} shapes")
+        return {"image_id": image_id, "shapes": shapes}
+    def query_images(self, project=None, dataset=None,
+                      key_value_pairs=None):
+        """
+        画像検索 (メタデータベース)。
+        フィルタ:
+        - プロジェクト/データセット階層
+        - Key-Value annotation
+        - タグ
+        - 取得日, 機器名
+        """
+        print(f"  OMERO query:")
+        if project:
+            print(f"    Project: {project}")
+        if key_value_pairs:
+            print(f"    Key-Value: {key_value_pairs}")
+        return {"project": project, "dataset": dataset}
+```
+## 4. Protocols.io プロトコル共有
+```python
+import json
+def create_protocol(title, description, steps, reagents=None,
+                      doi_prefix="dx.doi.org/10.17504"):
+    """
+    Protocols.io プロトコル作成。
+    Protocols.io:
+    - DOI 付与による引用可能なプロトコル
+    - バージョン管理
+    - フォーク・改変・派生
+    - JOVE, Nature Protocol Exchange 連携
+    """
+    protocol = {
+        "title": title,
+        "description": description,
+        "steps": [],
+        "reagents": reagents or [],
+    }
+    for i, step in enumerate(steps, 1):
+        protocol["steps"].append({
+            "step_number": i,
+            "description": step.get("description", ""),
+            "duration": step.get("duration"),
+            "temperature": step.get("temperature"),
+            "critical_step": step.get("critical", False),
+            "expected_result": step.get("expected_result"),
+        })
+    print(f"  Protocol: {title}")
+    print(f"    Steps: {len(steps)}")
+    if reagents:
+        print(f"    Reagents: {len(reagents)}")
+    print(f"    DOI: {doi_prefix}/protocols.io...")
+    return protocol
+def fork_protocol(original_protocol_id, modifications):
+    """
+    既存プロトコルのフォークと改変。
+    - 変更点の追跡
+    - 元プロトコルへのリンク
+    - バージョン番号の自動付与
+    """
+    print(f"  Forking protocol: {original_protocol_id}")
+    print(f"    Modifications: {len(modifications)}")
+    return {
+        "forked_from": original_protocol_id,
+        "modifications": modifications,
+    }
+```
+## References
+### Output Files
+| ファイル | 形式 |
+|---|---|
+| `results/benchling_sequences.json` | JSON |
+| `results/benchling_registry.json` | JSON |
+| `results/dnanexus_workflow_output.json` | JSON |
+| `results/omero_image_metadata.json` | JSON |
+| `results/protocol.json` | JSON |
+### 利用可能ツール
+> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
+なし — 各プラットフォームの REST API を直接利用。
+### 参照スキル
+| スキル | 関連 |
+|---|---|
+| `scientific-bioinformatics` | ゲノミクスデータ解析 |
+| `scientific-imaging-analysis` | 顕微鏡画像解析 |
+| `scientific-gene-expression-transcriptomics` | RNA-seq データ管理 |
+| `scientific-single-cell-genomics` | scRNA-seq データ管理 |
+| `scientific-data-analysis` | データ前処理 |
+### 依存パッケージ
+`requests`, `json`, `pandas` (各プラットフォーム SDK: `benchling-sdk`, `dxpy`, `omero-py`)