@nahisaho/satori 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/README.md +150 -54
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-biomedical-pubtator/SKILL.md +331 -0
  4. package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +298 -0
  5. package/src/.github/skills/scientific-cell-line-resources/SKILL.md +258 -0
  6. package/src/.github/skills/scientific-compound-screening/SKILL.md +245 -0
  7. package/src/.github/skills/scientific-ebi-databases/SKILL.md +280 -0
  8. package/src/.github/skills/scientific-genome-sequence-tools/SKILL.md +304 -0
  9. package/src/.github/skills/scientific-healthcare-ai/SKILL.md +273 -0
  10. package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +244 -0
  11. package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +288 -0
  12. package/src/.github/skills/scientific-noncoding-rna/SKILL.md +262 -0
  13. package/src/.github/skills/scientific-ontology-enrichment/SKILL.md +340 -0
  14. package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +323 -0
  15. package/src/.github/skills/scientific-phylogenetics/SKILL.md +297 -0
  16. package/src/.github/skills/scientific-preprint-archive/SKILL.md +476 -0
  17. package/src/.github/skills/scientific-public-health-data/SKILL.md +322 -0
  18. package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +327 -0
  19. package/src/.github/skills/scientific-regulatory-genomics/SKILL.md +274 -0
  20. package/src/.github/skills/scientific-reinforcement-learning/SKILL.md +280 -0
  21. package/src/.github/skills/scientific-structural-proteomics/SKILL.md +317 -0
  22. package/src/.github/skills/scientific-symbolic-mathematics/SKILL.md +277 -0
@@ -0,0 +1,331 @@
1
+ ---
2
+ name: scientific-biomedical-pubtator
3
+ description: |
4
+ バイオメディカルテキストマイニングスキル。PubTator3 API による
5
+ 遺伝子・疾患・化合物・変異・種のエンティティ認識、関係抽出、
6
+ バイオメディカル文献アノテーション自動化パイプライン。
7
+ ---
8
+
9
+ # Scientific Biomedical PubTator
10
+
11
+ PubTator3 API を活用したバイオメディカル文献エンティティ認識・
12
+ 関係抽出パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - PubMed 論文から遺伝子・疾患・化合物のエンティティを自動抽出するとき
17
+ - バイオメディカル NER (Named Entity Recognition) を実行するとき
18
+ - 遺伝子-疾患・薬物-標的の関係を文献から抽出するとき
19
+ - 大規模文献コーパスのバイオアノテーションを行うとき
20
+ - テキストマイニング結果を知識グラフに統合するとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. PubTator3 エンティティアノテーション
27
+
28
+ ```python
29
+ import requests
30
+ import pandas as pd
31
+ import json
32
+ import time
33
+
34
+ PUBTATOR_API = "https://www.ncbi.nlm.nih.gov/research/pubtator3-api"
35
+
36
+
37
+ def annotate_pmids(pmids, concepts=None):
38
+ """
39
+ PubTator3 — PMID リストのバイオメディカルエンティティアノテーション。
40
+
41
+ Parameters:
42
+ pmids: list — PMID リスト (e.g., [12345678, 23456789])
43
+ concepts: list — エンティティタイプ
44
+ "gene", "disease", "chemical", "mutation", "species", "cellline"
45
+
46
+ ToolUniverse:
47
+ PubTator_annotate(pmids=pmids, concepts=concepts)
48
+ PubTator_search(query=query)
49
+ """
50
+ if concepts is None:
51
+ concepts = ["gene", "disease", "chemical", "mutation", "species"]
52
+
53
+ pmid_str = ",".join(str(p) for p in pmids)
54
+ params = {
55
+ "pmids": pmid_str,
56
+ "concepts": ",".join(concepts),
57
+ "format": "biocjson",
58
+ }
59
+
60
+ resp = requests.get(f"{PUBTATOR_API}/publications/export/biocjson", params=params)
61
+ resp.raise_for_status()
62
+ data = resp.json()
63
+
64
+ # Parse annotations
65
+ all_annotations = []
66
+ for doc in data.get("PubTator3", []) if isinstance(data, dict) else [data]:
67
+ pmid = doc.get("pmid", doc.get("id", ""))
68
+ for passage in doc.get("passages", []):
69
+ for annotation in passage.get("annotations", []):
70
+ infons = annotation.get("infons", {})
71
+ all_annotations.append({
72
+ "pmid": pmid,
73
+ "text": annotation.get("text", ""),
74
+ "type": infons.get("type", ""),
75
+ "identifier": infons.get("identifier", ""),
76
+ "offset": annotation.get("locations", [{}])[0].get("offset", ""),
77
+ "length": annotation.get("locations", [{}])[0].get("length", ""),
78
+ "passage_type": passage.get("infons", {}).get("type", ""),
79
+ })
80
+
81
+ df = pd.DataFrame(all_annotations)
82
+ type_counts = df["type"].value_counts().to_dict() if not df.empty else {}
83
+ print(f"PubTator annotation: {len(pmids)} PMIDs → "
84
+ f"{len(df)} entities {type_counts}")
85
+ return df
86
+ ```
87
+
88
+ ## 2. PubTator3 テキスト検索
89
+
90
+ ```python
91
+ def search_pubtator(query, max_results=100):
92
+ """
93
+ PubTator3 テキスト検索 — バイオメディカルエンティティ付き論文検索。
94
+
95
+ Parameters:
96
+ query: str — 検索クエリ (遺伝子名、疾患名、化合物名)
97
+ max_results: int — 最大取得数
98
+ """
99
+ params = {
100
+ "text": query,
101
+ "sort": "score",
102
+ "page_size": min(max_results, 100),
103
+ }
104
+ resp = requests.get(f"{PUBTATOR_API}/search/", params=params)
105
+ resp.raise_for_status()
106
+ data = resp.json()
107
+
108
+ results = []
109
+ for hit in data.get("results", []):
110
+ results.append({
111
+ "pmid": hit.get("pmid", ""),
112
+ "title": hit.get("title", ""),
113
+ "journal": hit.get("journal", ""),
114
+ "year": hit.get("year", ""),
115
+ "score": hit.get("score", 0),
116
+ "genes": hit.get("genes", []),
117
+ "diseases": hit.get("diseases", []),
118
+ "chemicals": hit.get("chemicals", []),
119
+ "mutations": hit.get("mutations", []),
120
+ })
121
+
122
+ df = pd.DataFrame(results)
123
+ total = data.get("count", 0)
124
+ print(f"PubTator search '{query}': {total} total, {len(df)} returned")
125
+ return df
126
+ ```
127
+
128
+ ## 3. エンティティ関係抽出
129
+
130
+ ```python
131
+ def extract_entity_relations(pmids, relation_types=None):
132
+ """
133
+ PubTator3 — エンティティ間関係 (gene-disease, drug-target 等) 抽出。
134
+
135
+ Parameters:
136
+ pmids: list — PMID リスト
137
+ relation_types: list — 関係タイプフィルタ
138
+ "GDA" (gene-disease), "CDA" (chemical-disease),
139
+ "CGA" (chemical-gene), "PPI" (protein-protein)
140
+ """
141
+ # Get annotations with relations
142
+ df_annotations = annotate_pmids(pmids)
143
+
144
+ # Extract co-occurrences within same passage
145
+ relations = []
146
+ for pmid in df_annotations["pmid"].unique():
147
+ pmid_df = df_annotations[df_annotations["pmid"] == pmid]
148
+
149
+ # Gene-Disease relations
150
+ genes = pmid_df[pmid_df["type"] == "Gene"]
151
+ diseases = pmid_df[pmid_df["type"] == "Disease"]
152
+ chemicals = pmid_df[pmid_df["type"] == "Chemical"]
153
+
154
+ if not relation_types or "GDA" in relation_types:
155
+ for _, gene in genes.iterrows():
156
+ for _, disease in diseases.iterrows():
157
+ relations.append({
158
+ "pmid": pmid,
159
+ "relation_type": "GDA",
160
+ "entity1_type": "Gene",
161
+ "entity1_text": gene["text"],
162
+ "entity1_id": gene["identifier"],
163
+ "entity2_type": "Disease",
164
+ "entity2_text": disease["text"],
165
+ "entity2_id": disease["identifier"],
166
+ })
167
+
168
+ if not relation_types or "CGA" in relation_types:
169
+ for _, chem in chemicals.iterrows():
170
+ for _, gene in genes.iterrows():
171
+ relations.append({
172
+ "pmid": pmid,
173
+ "relation_type": "CGA",
174
+ "entity1_type": "Chemical",
175
+ "entity1_text": chem["text"],
176
+ "entity1_id": chem["identifier"],
177
+ "entity2_type": "Gene",
178
+ "entity2_text": gene["text"],
179
+ "entity2_id": gene["identifier"],
180
+ })
181
+
182
+ if not relation_types or "CDA" in relation_types:
183
+ for _, chem in chemicals.iterrows():
184
+ for _, disease in diseases.iterrows():
185
+ relations.append({
186
+ "pmid": pmid,
187
+ "relation_type": "CDA",
188
+ "entity1_type": "Chemical",
189
+ "entity1_text": chem["text"],
190
+ "entity1_id": chem["identifier"],
191
+ "entity2_type": "Disease",
192
+ "entity2_text": disease["text"],
193
+ "entity2_id": disease["identifier"],
194
+ })
195
+
196
+ rel_df = pd.DataFrame(relations)
197
+ rel_counts = rel_df["relation_type"].value_counts().to_dict() if not rel_df.empty else {}
198
+ print(f"Entity relations: {len(rel_df)} total {rel_counts}")
199
+ return rel_df
200
+ ```
201
+
202
+ ## 4. バイオアノテーション集計ダッシュボード
203
+
204
+ ```python
205
+ def annotation_summary_dashboard(pmids, output_prefix="pubtator"):
206
+ """
207
+ PubTator アノテーション集計・可視化。
208
+
209
+ Parameters:
210
+ pmids: list — PMID リスト
211
+ output_prefix: str — 出力ファイルプレフィックス
212
+ """
213
+ import matplotlib.pyplot as plt
214
+
215
+ # Get annotations
216
+ df = annotate_pmids(pmids)
217
+ if df.empty:
218
+ print("No annotations found")
219
+ return {}
220
+
221
+ # Entity type distribution
222
+ fig, axes = plt.subplots(1, 3, figsize=(15, 5))
223
+
224
+ # 1. Entity type counts
225
+ type_counts = df["type"].value_counts()
226
+ type_counts.plot(kind="bar", ax=axes[0], color="#2196F3")
227
+ axes[0].set_title("Entity Type Distribution")
228
+ axes[0].set_ylabel("Count")
229
+
230
+ # 2. Top entities per type
231
+ for entity_type in ["Gene", "Disease", "Chemical"]:
232
+ sub = df[df["type"] == entity_type]
233
+ top = sub["text"].value_counts().head(10)
234
+ if not top.empty:
235
+ print(f"\nTop {entity_type}s: {top.to_dict()}")
236
+
237
+ # 3. Articles per entity count
238
+ per_article = df.groupby("pmid")["type"].count()
239
+ per_article.hist(ax=axes[1], bins=20, color="#4CAF50")
240
+ axes[1].set_title("Entities per Article")
241
+ axes[1].set_xlabel("Number of entities")
242
+
243
+ # Entity type per article
244
+ pivot = df.groupby(["pmid", "type"]).size().unstack(fill_value=0)
245
+ pivot.plot(kind="box", ax=axes[2])
246
+ axes[2].set_title("Entity Types per Article")
247
+
248
+ plt.tight_layout()
249
+ fig_path = f"figures/{output_prefix}_dashboard.png"
250
+ plt.savefig(fig_path, dpi=150, bbox_inches="tight")
251
+ plt.close()
252
+
253
+ # Save results
254
+ df.to_csv(f"results/{output_prefix}_annotations.csv", index=False)
255
+
256
+ summary = {
257
+ "total_pmids": df["pmid"].nunique(),
258
+ "total_annotations": len(df),
259
+ "entity_types": type_counts.to_dict(),
260
+ "unique_entities": df.groupby("type")["text"].nunique().to_dict(),
261
+ }
262
+ print(f"\nSummary: {summary}")
263
+ return summary
264
+ ```
265
+
266
+ ## 5. 知識グラフ構築用エンティティネットワーク
267
+
268
+ ```python
269
+ def build_entity_network(pmids, min_cooccurrence=2):
270
+ """
271
+ PubTator エンティティ共起ネットワーク構築。
272
+
273
+ Parameters:
274
+ pmids: list — PMID リスト
275
+ min_cooccurrence: int — 最小共起回数
276
+ """
277
+ import networkx as nx
278
+ from collections import Counter
279
+
280
+ rel_df = extract_entity_relations(pmids)
281
+ if rel_df.empty:
282
+ return nx.Graph()
283
+
284
+ # Count co-occurrences
285
+ edge_counter = Counter()
286
+ for _, row in rel_df.iterrows():
287
+ key = tuple(sorted([
288
+ f"{row['entity1_type']}:{row['entity1_text']}",
289
+ f"{row['entity2_type']}:{row['entity2_text']}",
290
+ ]))
291
+ edge_counter[key] += 1
292
+
293
+ # Build network
294
+ G = nx.Graph()
295
+ for (node1, node2), count in edge_counter.items():
296
+ if count >= min_cooccurrence:
297
+ G.add_edge(node1, node2, weight=count)
298
+
299
+ print(f"Entity network: {G.number_of_nodes()} nodes, "
300
+ f"{G.number_of_edges()} edges "
301
+ f"(min cooccurrence = {min_cooccurrence})")
302
+ return G
303
+ ```
304
+
305
+ ---
306
+
307
+ ## 利用可能ツール
308
+
309
+ | ToolUniverse カテゴリ | 主なツール |
310
+ |---|---|
311
+ | `pubtator` | `PubTator_annotate`, `PubTator_search` |
312
+
313
+ ## パイプライン出力
314
+
315
+ | 出力ファイル | 説明 | 連携先スキル |
316
+ |---|---|---|
317
+ | `results/pubtator_annotations.csv` | エンティティアノテーション | → text-mining-nlp, knowledge-graph |
318
+ | `results/entity_relations.csv` | エンティティ間関係 | → network-analysis, disease-research |
319
+ | `results/entity_network.graphml` | エンティティ共起ネットワーク | → graph-neural-networks |
320
+ | `figures/pubtator_dashboard.png` | アノテーション集計 | → publication-figures |
321
+
322
+ ## パイプライン統合
323
+
324
+ ```
325
+ literature-search ──→ biomedical-pubtator ──→ text-mining-nlp
326
+ (PubMed/OpenAlex) (PubTator NER) (KG 構築)
327
+
328
+ ├──→ disease-research (GDA 関係)
329
+ ├──→ drug-target-profiling (CGA 関係)
330
+ └──→ preprint-archive (プレプリント NER)
331
+ ```
@@ -0,0 +1,298 @@
1
+ ---
2
+ name: scientific-biothings-idmapping
3
+ description: |
4
+ BioThings API (MyGene.info, MyVariant.info, MyChem.info) を活用した
5
+ 遺伝子・変異・化合物の横断的 ID マッピングおよびアノテーション統合スキル。
6
+ ---
7
+
8
+ # Scientific BioThings ID Mapping
9
+
10
+ BioThings API スイート (MyGene, MyVariant, MyChem) を活用した
11
+ 多データベース横断の ID 変換・アノテーション取得パイプラインを提供する。
12
+
13
+ ## When to Use
14
+
15
+ - 遺伝子 ID 間の変換 (Entrez ↔ Ensembl ↔ Symbol ↔ UniProt) を行うとき
16
+ - 変異 ID のアノテーション (ClinVar, dbSNP, CADD 等) を取得するとき
17
+ - 化合物 ID の変換 (DrugBank ↔ ChEMBL ↔ InChIKey ↔ PubChem) を行うとき
18
+ - バッチクエリで多数の ID を一括アノテーションするとき
19
+ - 複数データベースのメタ情報を統合するとき
20
+
21
+ ---
22
+
23
+ ## Quick Start
24
+
25
+ ## 1. MyGene.info 遺伝子アノテーション
26
+
27
+ ```python
28
+ import requests
29
+ import pandas as pd
30
+
31
+ MYGENE_API = "https://mygene.info/v3"
32
+
33
+
34
+ def mygene_query(query, fields=None, species="human", size=10):
35
+ """
36
+ MyGene.info で遺伝子検索。
37
+
38
+ Parameters:
39
+ query: str — gene symbol, Entrez ID, or keyword
40
+ fields: str | None — comma-separated fields
41
+ species: str — "human", "mouse", etc.
42
+
43
+ ToolUniverse:
44
+ MyGene_query_genes(q=query, fields=fields, species=species)
45
+ """
46
+ params = {
47
+ "q": query,
48
+ "species": species,
49
+ "size": size,
50
+ }
51
+ if fields:
52
+ params["fields"] = fields
53
+
54
+ resp = requests.get(f"{MYGENE_API}/query", params=params)
55
+ resp.raise_for_status()
56
+ data = resp.json()
57
+
58
+ hits = data.get("hits", [])
59
+ print(f"MyGene query '{query}': {data.get('total', 0)} total, "
60
+ f"{len(hits)} returned")
61
+ return hits
62
+
63
+
64
+ def mygene_get_gene(gene_id, fields=None):
65
+ """
66
+ MyGene.info 遺伝子詳細アノテーション取得。
67
+
68
+ ToolUniverse:
69
+ MyGene_get_gene_annotation(gene_id=gene_id, fields=fields)
70
+ """
71
+ params = {}
72
+ if fields:
73
+ params["fields"] = fields
74
+
75
+ resp = requests.get(f"{MYGENE_API}/gene/{gene_id}", params=params)
76
+ resp.raise_for_status()
77
+ data = resp.json()
78
+
79
+ print(f"MyGene gene {gene_id}: {data.get('symbol', '?')} "
80
+ f"({data.get('name', '')})")
81
+ return data
82
+
83
+
84
+ def mygene_batch_query(gene_ids, fields=None, species="human"):
85
+ """
86
+ MyGene.info バッチ遺伝子アノテーション。
87
+
88
+ ToolUniverse:
89
+ MyGene_batch_query(ids=gene_ids, fields=fields, species=species)
90
+ """
91
+ payload = {
92
+ "ids": ",".join(str(g) for g in gene_ids),
93
+ "species": species,
94
+ }
95
+ if fields:
96
+ payload["fields"] = fields
97
+
98
+ resp = requests.post(f"{MYGENE_API}/gene", json=payload)
99
+ resp.raise_for_status()
100
+ data = resp.json()
101
+
102
+ print(f"MyGene batch: {len(gene_ids)} queried → {len(data)} results")
103
+ return data
104
+ ```
105
+
106
+ ## 2. MyVariant.info 変異アノテーション
107
+
108
+ ```python
109
+ MYVARIANT_API = "https://myvariant.info/v1"
110
+
111
+
112
+ def myvariant_get(variant_id, fields=None):
113
+ """
114
+ MyVariant.info 変異アノテーション取得。
115
+
116
+ Parameters:
117
+ variant_id: str — HGVS notation (e.g., "chr17:g.7674220C>T")
118
+
119
+ ToolUniverse:
120
+ MyVariant_get_variant_annotation(variant_id=variant_id, fields=fields)
121
+ """
122
+ params = {}
123
+ if fields:
124
+ params["fields"] = fields
125
+
126
+ resp = requests.get(f"{MYVARIANT_API}/variant/{variant_id}", params=params)
127
+ resp.raise_for_status()
128
+ data = resp.json()
129
+
130
+ clinvar = data.get("clinvar", {})
131
+ cadd = data.get("cadd", {})
132
+ print(f"MyVariant {variant_id}: "
133
+ f"ClinVar={clinvar.get('clinical_significance', 'N/A')}, "
134
+ f"CADD={cadd.get('phred', 'N/A')}")
135
+ return data
136
+
137
+
138
+ def myvariant_query(query, fields=None, size=10):
139
+ """
140
+ MyVariant.info 変異検索。
141
+
142
+ ToolUniverse:
143
+ MyVariant_query_variants(q=query, fields=fields, size=size)
144
+ """
145
+ params = {"q": query, "size": size}
146
+ if fields:
147
+ params["fields"] = fields
148
+
149
+ resp = requests.get(f"{MYVARIANT_API}/query", params=params)
150
+ resp.raise_for_status()
151
+ data = resp.json()
152
+
153
+ hits = data.get("hits", [])
154
+ print(f"MyVariant query '{query}': {data.get('total', 0)} total")
155
+ return hits
156
+ ```
157
+
158
+ ## 3. MyChem.info 化合物アノテーション
159
+
160
+ ```python
161
+ MYCHEM_API = "https://mychem.info/v1"
162
+
163
+
164
+ def mychem_get(chem_id, fields=None):
165
+ """
166
+ MyChem.info 化合物アノテーション取得。
167
+
168
+ Parameters:
169
+ chem_id: str — InChIKey, DrugBank ID, ChEMBL ID, etc.
170
+
171
+ ToolUniverse:
172
+ MyChem_get_chemical_annotation(chem_id=chem_id, fields=fields)
173
+ """
174
+ params = {}
175
+ if fields:
176
+ params["fields"] = fields
177
+
178
+ resp = requests.get(f"{MYCHEM_API}/chem/{chem_id}", params=params)
179
+ resp.raise_for_status()
180
+ data = resp.json()
181
+
182
+ drugbank = data.get("drugbank", {})
183
+ print(f"MyChem {chem_id}: {drugbank.get('name', 'N/A')}")
184
+ return data
185
+
186
+
187
+ def mychem_query(query, fields=None, size=10):
188
+ """
189
+ MyChem.info 化合物検索。
190
+
191
+ ToolUniverse:
192
+ MyChem_query_chemicals(q=query, fields=fields, size=size)
193
+ """
194
+ params = {"q": query, "size": size}
195
+ if fields:
196
+ params["fields"] = fields
197
+
198
+ resp = requests.get(f"{MYCHEM_API}/query", params=params)
199
+ resp.raise_for_status()
200
+ data = resp.json()
201
+
202
+ hits = data.get("hits", [])
203
+ print(f"MyChem query '{query}': {data.get('total', 0)} total")
204
+ return hits
205
+ ```
206
+
207
+ ## 4. クロスデータベース ID マッピング
208
+
209
+ ```python
210
+ def cross_db_id_mapping(gene_symbol):
211
+ """
212
+ 遺伝子シンボルから Entrez, Ensembl, UniProt, RefSeq を一括取得。
213
+
214
+ ToolUniverse (横断):
215
+ MyGene_query_genes(q=gene_symbol, fields="entrezgene,ensembl.gene,uniprot,refseq")
216
+ """
217
+ fields = "entrezgene,ensembl.gene,uniprot.Swiss-Prot,refseq.rna,symbol,name"
218
+ hits = mygene_query(gene_symbol, fields=fields)
219
+
220
+ results = []
221
+ for hit in hits:
222
+ ensembl = hit.get("ensembl", {})
223
+ if isinstance(ensembl, list):
224
+ ensembl = ensembl[0] if ensembl else {}
225
+ uniprot = hit.get("uniprot", {})
226
+
227
+ results.append({
228
+ "symbol": hit.get("symbol", ""),
229
+ "name": hit.get("name", ""),
230
+ "entrez_id": hit.get("entrezgene", ""),
231
+ "ensembl_gene": ensembl.get("gene", ""),
232
+ "uniprot_swissprot": uniprot.get("Swiss-Prot", ""),
233
+ "refseq_rna": hit.get("refseq", {}).get("rna", []),
234
+ })
235
+
236
+ df = pd.DataFrame(results)
237
+ print(f"ID mapping '{gene_symbol}': {len(df)} entries")
238
+ return df
239
+ ```
240
+
241
+ ## 5. バッチ統合アノテーション
242
+
243
+ ```python
244
+ def batch_integrated_annotation(gene_symbols, include_variants=False):
245
+ """
246
+ 複数遺伝子のバッチ統合アノテーション。
247
+
248
+ ToolUniverse (横断):
249
+ MyGene_batch_query(ids=entrez_ids, fields=fields)
250
+ MyVariant_query_variants(q=gene_query) [optional]
251
+ """
252
+ # Step 1: Batch gene annotation
253
+ all_hits = []
254
+ for symbol in gene_symbols:
255
+ hits = mygene_query(symbol, fields="entrezgene,symbol,name,summary")
256
+ all_hits.extend(hits[:1]) # top hit per symbol
257
+
258
+ df = pd.DataFrame(all_hits)
259
+ print(f"Batch annotation: {len(gene_symbols)} genes → {len(df)} results")
260
+ return df
261
+ ```
262
+
263
+ ## References
264
+
265
+ ### Output Files
266
+
267
+ | ファイル | 形式 |
268
+ |---|---|
269
+ | `results/mygene_annotation.json` | JSON |
270
+ | `results/myvariant_annotation.json` | JSON |
271
+ | `results/mychem_annotation.json` | JSON |
272
+ | `results/id_mapping.csv` | CSV |
273
+
274
+ ### 利用可能ツール
275
+
276
+ | カテゴリ | 主要ツール | 用途 |
277
+ |---|---|---|
278
+ | BioThings | `MyGene_query_genes` | 遺伝子検索 |
279
+ | BioThings | `MyGene_get_gene_annotation` | 遺伝子詳細 |
280
+ | BioThings | `MyGene_batch_query` | バッチアノテーション |
281
+ | BioThings | `MyVariant_get_variant_annotation` | 変異アノテーション |
282
+ | BioThings | `MyVariant_query_variants` | 変異検索 |
283
+ | BioThings | `MyChem_get_chemical_annotation` | 化合物アノテーション |
284
+ | BioThings | `MyChem_query_chemicals` | 化合物検索 |
285
+
286
+ ### 参照スキル
287
+
288
+ | スキル | 関連 |
289
+ |---|---|
290
+ | `scientific-variant-interpretation` | 変異アノテーション |
291
+ | `scientific-gene-expression-transcriptomics` | 遺伝子発現 |
292
+ | `scientific-drug-target-interaction` | DTI 解析 |
293
+ | `scientific-rare-disease-genetics` | 希少疾患 |
294
+ | `scientific-pathway-enrichment` | パスウェイ解析 |
295
+
296
+ ### 依存パッケージ
297
+
298
+ `requests`, `pandas`