@nahisaho/satori 0.17.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. package/README.md +85 -38
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-alphafold-structures/SKILL.md +256 -0
  4. package/src/.github/skills/scientific-arrayexpress-expression/SKILL.md +264 -0
  5. package/src/.github/skills/scientific-civic-evidence/SKILL.md +292 -0
  6. package/src/.github/skills/scientific-compound-screening/SKILL.md +4 -0
  7. package/src/.github/skills/scientific-crossref-metadata/SKILL.md +313 -0
  8. package/src/.github/skills/scientific-depmap-dependencies/SKILL.md +239 -0
  9. package/src/.github/skills/scientific-disease-research/SKILL.md +4 -0
  10. package/src/.github/skills/scientific-drugbank-resources/SKILL.md +269 -0
  11. package/src/.github/skills/scientific-gnomad-variants/SKILL.md +356 -0
  12. package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +271 -0
  13. package/src/.github/skills/scientific-gwas-catalog/SKILL.md +267 -0
  14. package/src/.github/skills/scientific-icgc-cancer-data/SKILL.md +351 -0
  15. package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +4 -0
  16. package/src/.github/skills/scientific-opentargets-genetics/SKILL.md +299 -0
  17. package/src/.github/skills/scientific-pharmgkb-pgx/SKILL.md +306 -0
  18. package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +4 -0
  19. package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +4 -0
  20. package/src/.github/skills/scientific-rcsb-pdb-search/SKILL.md +280 -0
  21. package/src/.github/skills/scientific-reactome-pathways/SKILL.md +242 -0
  22. package/src/.github/skills/scientific-semantic-scholar/SKILL.md +298 -0
  23. package/src/.github/skills/scientific-uniprot-proteome/SKILL.md +273 -0
  24. package/src/.github/skills/scientific-variant-interpretation/SKILL.md +4 -0
@@ -0,0 +1,271 @@
1
+ ---
2
+ name: scientific-gtex-tissue-expression
3
+ description: |
4
+ GTEx 組織発現スキル。GTEx Portal REST API v2 による
5
+ 組織特異的遺伝子発現パターン解析・eQTL ルックアップ・
6
+ 多組織比較。直接 API (ToolUniverse 非連携)。
7
+ tu_tools: []
8
+ ---
9
+
10
+ # Scientific GTEx Tissue Expression
11
+
12
+ GTEx (Genotype-Tissue Expression) Portal REST API v2 を活用した
13
+ 組織特異的遺伝子発現解析・eQTL 検索・多組織比較パイプライン
14
+ を提供する。
15
+
16
+ ## When to Use
17
+
18
+ - 遺伝子の組織特異的発現パターンを調べるとき
19
+ - 特定組織における eQTL (発現量的形質遺伝子座) を検索するとき
20
+ - 複数組織間で遺伝子発現レベルを比較するとき
21
+ - TPM (Transcripts Per Million) 発現データを取得するとき
22
+ - バリアントが遺伝子発現に与える影響を評価するとき
23
+ - 組織間の遺伝子共発現パターンを分析するとき
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. 組織特異的遺伝子発現取得
30
+
31
+ ```python
32
+ import requests
33
+ import pandas as pd
34
+
35
+ GTEX_BASE = "https://gtexportal.org/api/v2"
36
+
37
+
38
+ def gtex_gene_expression(gene_id, tissue=None):
39
+ """
40
+ GTEx — 組織別遺伝子発現 (中央値 TPM) 取得。
41
+
42
+ Parameters:
43
+ gene_id: str — 遺伝子シンボル or Ensembl ID
44
+ (例: "BRCA1", "ENSG00000012048")
45
+ tissue: str — 組織 ID (None で全組織)
46
+ (例: "Breast_Mammary_Tissue")
47
+ """
48
+ url = f"{GTEX_BASE}/expression/medianGeneExpression"
49
+ params = {
50
+ "gencodeId": gene_id,
51
+ "datasetId": "gtex_v8",
52
+ }
53
+ if tissue:
54
+ params["tissueSiteDetailId"] = tissue
55
+
56
+ resp = requests.get(url, params=params, timeout=30)
57
+ resp.raise_for_status()
58
+ data = resp.json()
59
+
60
+ results = []
61
+ for item in data.get("data", []):
62
+ results.append({
63
+ "gene_symbol": item.get("geneSymbol", ""),
64
+ "gencode_id": item.get("gencodeId", ""),
65
+ "tissue": item.get("tissueSiteDetailId", ""),
66
+ "tissue_name": item.get("tissueSiteDetail", ""),
67
+ "median_tpm": item.get("median", 0),
68
+ "sample_count": item.get("numSamples", 0),
69
+ })
70
+
71
+ df = pd.DataFrame(results)
72
+ if not df.empty:
73
+ df = df.sort_values("median_tpm", ascending=False)
74
+
75
+ print(f"GTEx expression: {gene_id} → "
76
+ f"{len(df)} tissues")
77
+ return df
78
+
79
+
80
+ def gtex_top_tissues(gene_id, top_n=10):
81
+ """
82
+ GTEx — 発現量上位組織。
83
+
84
+ Parameters:
85
+ gene_id: str — 遺伝子シンボル or Ensembl ID
86
+ top_n: int — 上位組織数
87
+ """
88
+ df = gtex_gene_expression(gene_id)
89
+ top = df.head(top_n) if not df.empty else df
90
+ print(f"GTEx top {top_n} tissues for {gene_id}:")
91
+ for _, row in top.iterrows():
92
+ print(f" {row['tissue_name']}: "
93
+ f"{row['median_tpm']:.2f} TPM "
94
+ f"(n={row['sample_count']})")
95
+ return top
96
+ ```
97
+
98
+ ## 2. eQTL 検索
99
+
100
+ ```python
101
+ def gtex_eqtl_lookup(gene_id, tissue, variant_id=None):
102
+ """
103
+ GTEx — eQTL ルックアップ。
104
+
105
+ Parameters:
106
+ gene_id: str — 遺伝子シンボル or Ensembl ID
107
+ tissue: str — 組織 ID
108
+ (例: "Liver", "Whole_Blood")
109
+ variant_id: str — バリアント ID (任意)
110
+ (例: "rs12345")
111
+ """
112
+ url = f"{GTEX_BASE}/association/singleTissueEqtl"
113
+ params = {
114
+ "gencodeId": gene_id,
115
+ "tissueSiteDetailId": tissue,
116
+ "datasetId": "gtex_v8",
117
+ }
118
+ if variant_id:
119
+ params["variantId"] = variant_id
120
+
121
+ resp = requests.get(url, params=params, timeout=30)
122
+ resp.raise_for_status()
123
+ data = resp.json()
124
+
125
+ results = []
126
+ for item in data.get("data", []):
127
+ results.append({
128
+ "gene_symbol": item.get("geneSymbol", ""),
129
+ "variant_id": item.get("variantId", ""),
130
+ "tissue": tissue,
131
+ "pvalue": item.get("pValue"),
132
+ "nes": item.get("nes"), # normalized effect size
133
+ "maf": item.get("maf"),
134
+ "ref": item.get("ref", ""),
135
+ "alt": item.get("alt", ""),
136
+ })
137
+
138
+ df = pd.DataFrame(results)
139
+ if not df.empty:
140
+ df = df.sort_values("pvalue")
141
+
142
+ print(f"GTEx eQTL: {gene_id} in {tissue} → "
143
+ f"{len(df)} associations")
144
+ return df
145
+ ```
146
+
147
+ ## 3. 多組織比較
148
+
149
+ ```python
150
+ def gtex_multi_gene_comparison(gene_ids, tissues=None):
151
+ """
152
+ GTEx — 複数遺伝子・複数組織の発現比較。
153
+
154
+ Parameters:
155
+ gene_ids: list[str] — 遺伝子リスト
156
+ tissues: list[str] — 組織リスト (None で全組織)
157
+ """
158
+ all_data = []
159
+ for gid in gene_ids:
160
+ try:
161
+ df = gtex_gene_expression(gid)
162
+ if tissues:
163
+ df = df[df["tissue"].isin(tissues)]
164
+ all_data.append(df)
165
+ except Exception as e:
166
+ print(f" Warning: {gid} — {e}")
167
+ continue
168
+
169
+ if not all_data:
170
+ return pd.DataFrame()
171
+
172
+ combined = pd.concat(all_data, ignore_index=True)
173
+
174
+ # ピボットテーブル: 行=組織, 列=遺伝子, 値=TPM
175
+ if not combined.empty:
176
+ pivot = combined.pivot_table(
177
+ index="tissue_name",
178
+ columns="gene_symbol",
179
+ values="median_tpm",
180
+ aggfunc="first",
181
+ )
182
+ print(f"GTEx comparison: {len(gene_ids)} genes × "
183
+ f"{len(pivot)} tissues")
184
+ return pivot
185
+
186
+ return combined
187
+ ```
188
+
189
+ ## 4. GTEx 統合パイプライン
190
+
191
+ ```python
192
+ def gtex_pipeline(gene_ids, tissues=None,
193
+ output_dir="results"):
194
+ """
195
+ GTEx 統合パイプライン。
196
+
197
+ Parameters:
198
+ gene_ids: list[str] — 遺伝子リスト
199
+ tissues: list[str] — 組織リスト (None で全組織)
200
+ output_dir: str — 出力ディレクトリ
201
+ """
202
+ from pathlib import Path
203
+ output_dir = Path(output_dir)
204
+ output_dir.mkdir(parents=True, exist_ok=True)
205
+
206
+ # 1) 全遺伝子の組織発現
207
+ all_expr = []
208
+ for gid in gene_ids:
209
+ try:
210
+ df = gtex_gene_expression(gid)
211
+ df.to_csv(output_dir / f"expression_{gid}.csv",
212
+ index=False)
213
+ all_expr.append(df)
214
+ except Exception:
215
+ continue
216
+
217
+ # 2) 多組織比較マトリクス
218
+ pivot = gtex_multi_gene_comparison(gene_ids, tissues)
219
+ if isinstance(pivot, pd.DataFrame) and not pivot.empty:
220
+ pivot.to_csv(output_dir / "expression_matrix.csv")
221
+
222
+ # 3) eQTL 検索 (上位組織)
223
+ eqtl_results = []
224
+ for gid in gene_ids:
225
+ if all_expr:
226
+ top = all_expr[-1].head(3)
227
+ for _, row in top.iterrows():
228
+ try:
229
+ eqtl = gtex_eqtl_lookup(gid,
230
+ row["tissue"])
231
+ eqtl_results.append(eqtl)
232
+ except Exception:
233
+ continue
234
+ if eqtl_results:
235
+ eqtl_combined = pd.concat(eqtl_results,
236
+ ignore_index=True)
237
+ eqtl_combined.to_csv(output_dir / "eqtl_results.csv",
238
+ index=False)
239
+
240
+ print(f"GTEx pipeline: {output_dir}")
241
+ return {"expression": all_expr, "matrix": pivot}
242
+ ```
243
+
244
+ ---
245
+
246
+ ## ToolUniverse 連携
247
+
248
+ | TU Key | ツール名 | 連携内容 |
249
+ |--------|---------|---------|
250
+ | (direct) | GTEx Portal API v2 | 直接 REST API — TU 非連携 |
251
+
252
+ ## パイプライン統合
253
+
254
+ ```
255
+ gene-expression-transcriptomics → gtex-tissue-expression → variant-interpretation
256
+ (DESeq2/edgeR 差分発現) (組織別 TPM + eQTL) (臨床変異評価)
257
+ │ │ ↓
258
+ arrayexpress-expression ──────────┘ gwas-catalog
259
+ (ArrayExpress データ) │ (GWAS 関連解析)
260
+
261
+ disease-research
262
+ (疾患関連遺伝子)
263
+ ```
264
+
265
+ ## パイプライン出力
266
+
267
+ | ファイル | 説明 | 次スキル |
268
+ |---------|------|---------|
269
+ | `results/expression_*.csv` | 遺伝子別組織発現 | → disease-research |
270
+ | `results/expression_matrix.csv` | 多遺伝子比較 | → pathway-enrichment |
271
+ | `results/eqtl_results.csv` | eQTL 関連 | → variant-interpretation |
@@ -0,0 +1,267 @@
1
+ ---
2
+ name: scientific-gwas-catalog
3
+ description: |
4
+ GWAS カタログスキル。NHGRI-EBI GWAS Catalog REST API によるゲノム
5
+ ワイド関連研究メタデータ・関連シグナル・形質・遺伝子座検索。
6
+ ToolUniverse 連携: gwas。
7
+ tu_tools:
8
+ - key: gwas
9
+ name: GWAS Catalog
10
+ description: GWAS 関連シグナル・形質・遺伝子座検索
11
+ ---
12
+
13
+ # Scientific GWAS Catalog
14
+
15
+ NHGRI-EBI GWAS Catalog REST API を活用した GWAS メタデータ
16
+ 解析・遺伝子座レベル解釈パイプラインを提供する。
17
+
18
+ ## When to Use
19
+
20
+ - GWAS Catalog から疾患/形質の関連バリアントを検索するとき
21
+ - 遺伝的関連シグナルのエフェクトサイズ・P値を取得するとき
22
+ - 特定遺伝子座の LD ブロック情報を解析するとき
23
+ - 多形質 PheWAS-like 解析を実施するとき
24
+ - GWAS サマリ統計量を下流解析に準備するとき
25
+ - 公開 GWAS データから PRS ウェイトを抽出するとき
26
+
27
+ ---
28
+
29
+ ## Quick Start
30
+
31
+ ## 1. GWAS 関連シグナル検索
32
+
33
+ ```python
34
+ import requests
35
+ import pandas as pd
36
+ import numpy as np
37
+
38
+ GWAS_BASE = "https://www.ebi.ac.uk/gwas/rest/api"
39
+
40
+
41
+ def gwas_search_associations(trait=None, gene=None, variant=None,
42
+ p_upper=5e-8, limit=100):
43
+ """
44
+ GWAS Catalog — 関連シグナル検索。
45
+
46
+ Parameters:
47
+ trait: str — 形質/疾患 EFO ID or 名前 (例: "EFO_0001645")
48
+ gene: str — 遺伝子名 (例: "BRCA1")
49
+ variant: str — rsID (例: "rs1234567")
50
+ p_upper: float — P値上限
51
+ limit: int — 最大結果数
52
+ """
53
+ if trait:
54
+ url = f"{GWAS_BASE}/efoTraits/{trait}/associations"
55
+ elif gene:
56
+ url = f"{GWAS_BASE}/associations/search/findByGene"
57
+ elif variant:
58
+ url = f"{GWAS_BASE}/singleNucleotidePolymorphisms/{variant}/associations"
59
+ else:
60
+ url = f"{GWAS_BASE}/associations"
61
+
62
+ params = {"size": limit}
63
+ if gene:
64
+ params["geneName"] = gene
65
+
66
+ resp = requests.get(url, params=params, timeout=30)
67
+ resp.raise_for_status()
68
+ data = resp.json()
69
+
70
+ associations = data.get("_embedded", {}).get("associations", [])
71
+ results = []
72
+ for assoc in associations:
73
+ p_value = assoc.get("pvalue", 1.0)
74
+ if p_value and float(p_value) > p_upper:
75
+ continue
76
+
77
+ loci = assoc.get("loci", [{}])
78
+ genes = []
79
+ for locus in loci:
80
+ for gene_info in locus.get("authorReportedGenes", []):
81
+ genes.append(gene_info.get("geneName", ""))
82
+
83
+ snps = []
84
+ for snp_info in assoc.get("snps", []):
85
+ snps.append(snp_info.get("rsId", ""))
86
+
87
+ results.append({
88
+ "association_id": assoc.get("associationId", ""),
89
+ "p_value": float(p_value) if p_value else None,
90
+ "p_value_mlog": assoc.get("pvalueMantissa", 0),
91
+ "or_beta": assoc.get("orPerCopyNum", None),
92
+ "beta_num": assoc.get("betaNum", None),
93
+ "beta_direction": assoc.get("betaDirection", ""),
94
+ "ci": assoc.get("range", ""),
95
+ "risk_allele_freq": assoc.get("riskFrequency", ""),
96
+ "snps": "; ".join(snps),
97
+ "genes": "; ".join(genes),
98
+ "trait": assoc.get("efoTraits", [{}])[0].get("trait", "")
99
+ if assoc.get("efoTraits") else "",
100
+ "study_accession": assoc.get("study", {}).get(
101
+ "accessionId", ""),
102
+ })
103
+
104
+ df = pd.DataFrame(results)
105
+ print(f"GWAS associations: {len(df)} results "
106
+ f"(trait={trait}, gene={gene}, p<{p_upper})")
107
+ return df.sort_values("p_value") if not df.empty else df
108
+ ```
109
+
110
+ ## 2. GWAS 研究メタデータ検索
111
+
112
+ ```python
113
+ def gwas_search_studies(query=None, efo_trait=None, limit=50):
114
+ """
115
+ GWAS Catalog — 研究メタデータ検索。
116
+
117
+ Parameters:
118
+ query: str — フリーテキスト検索
119
+ efo_trait: str — EFO 形質 ID
120
+ limit: int — 最大結果数
121
+ """
122
+ if efo_trait:
123
+ url = f"{GWAS_BASE}/efoTraits/{efo_trait}/studies"
124
+ else:
125
+ url = f"{GWAS_BASE}/studies/search/findByDiseaseTrait"
126
+
127
+ params = {"size": limit}
128
+ if query:
129
+ params["diseaseTrait"] = query
130
+
131
+ resp = requests.get(url, params=params, timeout=30)
132
+ resp.raise_for_status()
133
+ data = resp.json()
134
+
135
+ studies = data.get("_embedded", {}).get("studies", [])
136
+ results = []
137
+ for s in studies:
138
+ results.append({
139
+ "accession": s.get("accessionId", ""),
140
+ "title": s.get("title", ""),
141
+ "pubmed_id": s.get("publicationInfo", {}).get(
142
+ "pubmedId", ""),
143
+ "author": s.get("publicationInfo", {}).get(
144
+ "author", {}).get("fullname", ""),
145
+ "journal": s.get("publicationInfo", {}).get(
146
+ "publication", ""),
147
+ "date": s.get("publicationInfo", {}).get(
148
+ "publicationDate", ""),
149
+ "initial_sample_size": s.get("initialSampleSize", ""),
150
+ "replication_sample_size": s.get(
151
+ "replicationSampleSize", ""),
152
+ "ancestry": s.get("ancestries", []),
153
+ })
154
+
155
+ df = pd.DataFrame(results)
156
+ print(f"GWAS studies: {len(df)} results")
157
+ return df
158
+ ```
159
+
160
+ ## 3. GWAS 形質検索・PheWAS
161
+
162
+ ```python
163
+ def gwas_phewas(variant_rsid, p_threshold=5e-8):
164
+ """
165
+ GWAS Catalog — バリアント PheWAS (形質横断検索)。
166
+
167
+ Parameters:
168
+ variant_rsid: str — rsID (例: "rs7903146")
169
+ p_threshold: float — P値閾値
170
+ """
171
+ url = (f"{GWAS_BASE}/singleNucleotidePolymorphisms/"
172
+ f"{variant_rsid}/associations")
173
+ resp = requests.get(url, params={"size": 500}, timeout=30)
174
+ resp.raise_for_status()
175
+ data = resp.json()
176
+
177
+ associations = data.get("_embedded", {}).get("associations", [])
178
+ results = []
179
+ for assoc in associations:
180
+ p_val = assoc.get("pvalue", 1.0)
181
+ if p_val and float(p_val) > p_threshold:
182
+ continue
183
+ for trait in assoc.get("efoTraits", []):
184
+ results.append({
185
+ "variant": variant_rsid,
186
+ "trait": trait.get("trait", ""),
187
+ "efo_uri": trait.get("shortForm", ""),
188
+ "p_value": float(p_val) if p_val else None,
189
+ "or_beta": assoc.get("orPerCopyNum", None),
190
+ "study": assoc.get("study", {}).get(
191
+ "accessionId", ""),
192
+ })
193
+
194
+ df = pd.DataFrame(results)
195
+ if not df.empty:
196
+ df = df.sort_values("p_value")
197
+ print(f"PheWAS {variant_rsid}: {len(df)} trait associations")
198
+ return df
199
+ ```
200
+
201
+ ## 4. GWAS 統合パイプライン
202
+
203
+ ```python
204
+ def gwas_catalog_pipeline(trait_query, output_dir="results"):
205
+ """
206
+ GWAS Catalog 統合パイプライン。
207
+
208
+ Parameters:
209
+ trait_query: str — 形質/疾患名
210
+ output_dir: str — 出力ディレクトリ
211
+ """
212
+ from pathlib import Path
213
+ output_dir = Path(output_dir)
214
+ output_dir.mkdir(parents=True, exist_ok=True)
215
+
216
+ # 1) 研究検索
217
+ studies = gwas_search_studies(query=trait_query)
218
+ studies.to_csv(output_dir / "gwas_studies.csv", index=False)
219
+
220
+ # 2) 関連シグナル
221
+ assocs = gwas_search_associations(gene=None, trait=None)
222
+ assocs.to_csv(output_dir / "gwas_associations.csv", index=False)
223
+
224
+ # 3) トップバリアントの PheWAS
225
+ if not assocs.empty:
226
+ top_snps = assocs["snps"].str.split("; ").explode().unique()[:5]
227
+ phewas_all = []
228
+ for rsid in top_snps:
229
+ if rsid.startswith("rs"):
230
+ phewas = gwas_phewas(rsid)
231
+ phewas_all.append(phewas)
232
+ if phewas_all:
233
+ phewas_df = pd.concat(phewas_all, ignore_index=True)
234
+ phewas_df.to_csv(output_dir / "phewas.csv", index=False)
235
+
236
+ print(f"GWAS pipeline: {output_dir}")
237
+ return {"studies": studies, "associations": assocs}
238
+ ```
239
+
240
+ ---
241
+
242
+ ## ToolUniverse 連携
243
+
244
+ | TU Key | ツール名 | 連携内容 |
245
+ |--------|---------|---------|
246
+ | `gwas` | GWAS Catalog | 関連シグナル・形質・研究メタデータ検索 |
247
+
248
+ ## パイプライン統合
249
+
250
+ ```
251
+ disease-research → gwas-catalog → variant-interpretation
252
+ (DisGeNET/OMIM) (GWAS Catalog) (ACMG/AMP)
253
+ │ │ ↓
254
+ population-genetics ──┘ variant-effect-prediction
255
+ (Fst/PCA) │ (CADD/SpliceAI)
256
+
257
+ precision-oncology
258
+ (臨床的意義判定)
259
+ ```
260
+
261
+ ## パイプライン出力
262
+
263
+ | ファイル | 説明 | 次スキル |
264
+ |---------|------|---------|
265
+ | `results/gwas_studies.csv` | GWAS 研究メタデータ | → literature-search |
266
+ | `results/gwas_associations.csv` | 関連シグナル | → variant-interpretation |
267
+ | `results/phewas.csv` | PheWAS 結果 | → disease-research |