@nahisaho/satori 0.16.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -41
- package/package.json +1 -1
- package/src/.github/skills/scientific-alphafold-structures/SKILL.md +256 -0
- package/src/.github/skills/scientific-arrayexpress-expression/SKILL.md +264 -0
- package/src/.github/skills/scientific-crossref-metadata/SKILL.md +313 -0
- package/src/.github/skills/scientific-encode-screen/SKILL.md +315 -0
- package/src/.github/skills/scientific-environmental-geodata/SKILL.md +255 -0
- package/src/.github/skills/scientific-geo-expression/SKILL.md +274 -0
- package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +271 -0
- package/src/.github/skills/scientific-gwas-catalog/SKILL.md +267 -0
- package/src/.github/skills/scientific-human-cell-atlas/SKILL.md +294 -0
- package/src/.github/skills/scientific-icgc-cancer-data/SKILL.md +351 -0
- package/src/.github/skills/scientific-metabolic-atlas/SKILL.md +263 -0
- package/src/.github/skills/scientific-paleobiology/SKILL.md +265 -0
- package/src/.github/skills/scientific-parasite-genomics/SKILL.md +280 -0
- package/src/.github/skills/scientific-pharmgkb-pgx/SKILL.md +306 -0
- package/src/.github/skills/scientific-semantic-scholar/SKILL.md +298 -0
- package/src/.github/skills/scientific-squidpy-advanced/SKILL.md +251 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-pharmgkb-pgx
|
|
3
|
+
description: |
|
|
4
|
+
PharmGKB 薬理ゲノミクススキル。PharmGKB REST API による
|
|
5
|
+
臨床アノテーション・薬物遺伝子関連・投与量ガイドライン・
|
|
6
|
+
スターアレル解析。ToolUniverse 連携: pharmgkb。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: pharmgkb
|
|
9
|
+
name: PharmGKB
|
|
10
|
+
description: 臨床アノテーション・薬物遺伝子関連・PGx ガイドライン
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Scientific PharmGKB PGx
|
|
14
|
+
|
|
15
|
+
PharmGKB (Pharmacogenomics Knowledgebase) REST API を活用した
|
|
16
|
+
薬理ゲノミクス臨床アノテーション・薬物遺伝子相互作用・投与量
|
|
17
|
+
ガイドライン検索パイプラインを提供する。
|
|
18
|
+
|
|
19
|
+
## When to Use
|
|
20
|
+
|
|
21
|
+
- 薬物と遺伝子変異の関連を調べるとき
|
|
22
|
+
- 臨床アノテーション (エビデンスレベル付き) を検索するとき
|
|
23
|
+
- 投与量調整ガイドライン (CPIC/DPWG) を取得するとき
|
|
24
|
+
- スターアレルと表現型の対応を確認するとき
|
|
25
|
+
- 特定薬物の薬理ゲノミクス情報を包括的に取得するとき
|
|
26
|
+
- 精密医療の薬物選択を支援するとき
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
## 1. 薬物・遺伝子検索
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
import requests
|
|
36
|
+
import pandas as pd
|
|
37
|
+
|
|
38
|
+
PGKB_BASE = "https://api.pharmgkb.org/v1/data"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def pharmgkb_search_drugs(query, limit=50):
|
|
42
|
+
"""
|
|
43
|
+
PharmGKB — 薬物検索。
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
query: str — 薬物名 (例: "warfarin", "clopidogrel")
|
|
47
|
+
limit: int — 最大結果数
|
|
48
|
+
"""
|
|
49
|
+
url = f"{PGKB_BASE}/chemical"
|
|
50
|
+
params = {"name": query, "view": "max"}
|
|
51
|
+
|
|
52
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
53
|
+
resp.raise_for_status()
|
|
54
|
+
data = resp.json()
|
|
55
|
+
|
|
56
|
+
results = []
|
|
57
|
+
for item in data.get("data", []):
|
|
58
|
+
results.append({
|
|
59
|
+
"pharmgkb_id": item.get("id", ""),
|
|
60
|
+
"name": item.get("name", ""),
|
|
61
|
+
"generic_names": "; ".join(
|
|
62
|
+
item.get("genericNames", [])),
|
|
63
|
+
"trade_names": "; ".join(
|
|
64
|
+
item.get("tradeNames", [])[:5]),
|
|
65
|
+
"type": item.get("type", ""),
|
|
66
|
+
"cross_references": len(
|
|
67
|
+
item.get("crossReferences", [])),
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
df = pd.DataFrame(results)
|
|
71
|
+
print(f"PharmGKB drugs: {len(df)} results "
|
|
72
|
+
f"(query='{query}')")
|
|
73
|
+
return df
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def pharmgkb_search_genes(query, limit=50):
|
|
77
|
+
"""
|
|
78
|
+
PharmGKB — 遺伝子検索。
|
|
79
|
+
|
|
80
|
+
Parameters:
|
|
81
|
+
query: str — 遺伝子シンボル (例: "CYP2D6")
|
|
82
|
+
limit: int — 最大結果数
|
|
83
|
+
"""
|
|
84
|
+
url = f"{PGKB_BASE}/gene"
|
|
85
|
+
params = {"symbol": query, "view": "max"}
|
|
86
|
+
|
|
87
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
88
|
+
resp.raise_for_status()
|
|
89
|
+
data = resp.json()
|
|
90
|
+
|
|
91
|
+
results = []
|
|
92
|
+
for item in data.get("data", []):
|
|
93
|
+
results.append({
|
|
94
|
+
"pharmgkb_id": item.get("id", ""),
|
|
95
|
+
"symbol": item.get("symbol", ""),
|
|
96
|
+
"name": item.get("name", ""),
|
|
97
|
+
"chromosome": item.get("chromosomeFormatted", ""),
|
|
98
|
+
"cpic_gene": item.get("cpicGene", False),
|
|
99
|
+
"has_prescribing_info": item.get(
|
|
100
|
+
"hasPrescribingInfo", False),
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
df = pd.DataFrame(results)
|
|
104
|
+
print(f"PharmGKB genes: {len(df)} results "
|
|
105
|
+
f"(query='{query}')")
|
|
106
|
+
return df
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## 2. 臨床アノテーション取得
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
def pharmgkb_clinical_annotations(gene_or_drug,
|
|
113
|
+
search_type="gene"):
|
|
114
|
+
"""
|
|
115
|
+
PharmGKB — 臨床アノテーション検索。
|
|
116
|
+
|
|
117
|
+
Parameters:
|
|
118
|
+
gene_or_drug: str — 遺伝子シンボル or 薬物名
|
|
119
|
+
search_type: str — "gene" or "drug"
|
|
120
|
+
"""
|
|
121
|
+
url = f"{PGKB_BASE}/clinicalAnnotation"
|
|
122
|
+
params = {"view": "max"}
|
|
123
|
+
|
|
124
|
+
if search_type == "gene":
|
|
125
|
+
# 遺伝子で検索
|
|
126
|
+
gene_url = f"{PGKB_BASE}/gene"
|
|
127
|
+
g_resp = requests.get(gene_url,
|
|
128
|
+
params={"symbol": gene_or_drug},
|
|
129
|
+
timeout=30)
|
|
130
|
+
g_resp.raise_for_status()
|
|
131
|
+
genes = g_resp.json().get("data", [])
|
|
132
|
+
if genes:
|
|
133
|
+
params["relatedGenes.id"] = genes[0].get("id", "")
|
|
134
|
+
else:
|
|
135
|
+
# 薬物で検索
|
|
136
|
+
drug_url = f"{PGKB_BASE}/chemical"
|
|
137
|
+
d_resp = requests.get(drug_url,
|
|
138
|
+
params={"name": gene_or_drug},
|
|
139
|
+
timeout=30)
|
|
140
|
+
d_resp.raise_for_status()
|
|
141
|
+
drugs = d_resp.json().get("data", [])
|
|
142
|
+
if drugs:
|
|
143
|
+
params["relatedChemicals.id"] = drugs[0].get("id", "")
|
|
144
|
+
|
|
145
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
146
|
+
resp.raise_for_status()
|
|
147
|
+
data = resp.json()
|
|
148
|
+
|
|
149
|
+
results = []
|
|
150
|
+
for item in data.get("data", []):
|
|
151
|
+
genes = [g.get("symbol", "")
|
|
152
|
+
for g in item.get("relatedGenes", [])]
|
|
153
|
+
drugs = [c.get("name", "")
|
|
154
|
+
for c in item.get("relatedChemicals", [])]
|
|
155
|
+
results.append({
|
|
156
|
+
"annotation_id": item.get("id", ""),
|
|
157
|
+
"level": item.get("level", ""),
|
|
158
|
+
"score": item.get("score", ""),
|
|
159
|
+
"genes": "; ".join(genes),
|
|
160
|
+
"drugs": "; ".join(drugs),
|
|
161
|
+
"phenotype_category": item.get(
|
|
162
|
+
"phenotypeCategory", ""),
|
|
163
|
+
"sentences": (item.get("textHtml") or "")[:300],
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
df = pd.DataFrame(results)
|
|
167
|
+
if not df.empty:
|
|
168
|
+
df = df.sort_values("level")
|
|
169
|
+
|
|
170
|
+
print(f"PharmGKB annotations: {len(df)} "
|
|
171
|
+
f"({search_type}={gene_or_drug})")
|
|
172
|
+
return df
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## 3. 投与量ガイドライン取得
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
def pharmgkb_dosing_guidelines(drug_name=None, gene=None):
|
|
179
|
+
"""
|
|
180
|
+
PharmGKB — 投与量ガイドライン (CPIC/DPWG) 検索。
|
|
181
|
+
|
|
182
|
+
Parameters:
|
|
183
|
+
drug_name: str — 薬物名
|
|
184
|
+
gene: str — 遺伝子シンボル
|
|
185
|
+
"""
|
|
186
|
+
url = f"{PGKB_BASE}/guideline"
|
|
187
|
+
params = {"view": "max"}
|
|
188
|
+
|
|
189
|
+
if drug_name:
|
|
190
|
+
params["relatedChemicals.name"] = drug_name
|
|
191
|
+
if gene:
|
|
192
|
+
params["relatedGenes.symbol"] = gene
|
|
193
|
+
|
|
194
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
195
|
+
resp.raise_for_status()
|
|
196
|
+
data = resp.json()
|
|
197
|
+
|
|
198
|
+
results = []
|
|
199
|
+
for item in data.get("data", []):
|
|
200
|
+
genes = [g.get("symbol", "")
|
|
201
|
+
for g in item.get("relatedGenes", [])]
|
|
202
|
+
drugs = [c.get("name", "")
|
|
203
|
+
for c in item.get("relatedChemicals", [])]
|
|
204
|
+
results.append({
|
|
205
|
+
"guideline_id": item.get("id", ""),
|
|
206
|
+
"name": item.get("name", ""),
|
|
207
|
+
"source": item.get("source", ""),
|
|
208
|
+
"genes": "; ".join(genes),
|
|
209
|
+
"drugs": "; ".join(drugs),
|
|
210
|
+
"recommendation": (item.get("textHtml") or "")[:500],
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
df = pd.DataFrame(results)
|
|
214
|
+
print(f"PharmGKB guidelines: {len(df)} "
|
|
215
|
+
f"(drug={drug_name}, gene={gene})")
|
|
216
|
+
return df
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## 4. PharmGKB 統合パイプライン
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
def pharmgkb_pipeline(drug_name, genes=None,
|
|
223
|
+
output_dir="results"):
|
|
224
|
+
"""
|
|
225
|
+
PharmGKB 統合パイプライン。
|
|
226
|
+
|
|
227
|
+
Parameters:
|
|
228
|
+
drug_name: str — 薬物名
|
|
229
|
+
genes: list[str] — 関連遺伝子リスト
|
|
230
|
+
output_dir: str — 出力ディレクトリ
|
|
231
|
+
"""
|
|
232
|
+
from pathlib import Path
|
|
233
|
+
output_dir = Path(output_dir)
|
|
234
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
235
|
+
|
|
236
|
+
# 1) 薬物検索
|
|
237
|
+
drugs = pharmgkb_search_drugs(drug_name)
|
|
238
|
+
drugs.to_csv(output_dir / "drugs.csv", index=False)
|
|
239
|
+
|
|
240
|
+
# 2) 薬物の臨床アノテーション
|
|
241
|
+
annotations = pharmgkb_clinical_annotations(
|
|
242
|
+
drug_name, search_type="drug")
|
|
243
|
+
annotations.to_csv(output_dir / "annotations.csv",
|
|
244
|
+
index=False)
|
|
245
|
+
|
|
246
|
+
# 3) 投与量ガイドライン
|
|
247
|
+
guidelines = pharmgkb_dosing_guidelines(
|
|
248
|
+
drug_name=drug_name)
|
|
249
|
+
guidelines.to_csv(output_dir / "guidelines.csv",
|
|
250
|
+
index=False)
|
|
251
|
+
|
|
252
|
+
# 4) 関連遺伝子解析
|
|
253
|
+
if genes:
|
|
254
|
+
gene_results = []
|
|
255
|
+
for g in genes:
|
|
256
|
+
try:
|
|
257
|
+
g_ann = pharmgkb_clinical_annotations(
|
|
258
|
+
g, search_type="gene")
|
|
259
|
+
g_ann["query_gene"] = g
|
|
260
|
+
gene_results.append(g_ann)
|
|
261
|
+
except Exception:
|
|
262
|
+
continue
|
|
263
|
+
if gene_results:
|
|
264
|
+
gene_df = pd.concat(gene_results,
|
|
265
|
+
ignore_index=True)
|
|
266
|
+
gene_df.to_csv(
|
|
267
|
+
output_dir / "gene_annotations.csv",
|
|
268
|
+
index=False)
|
|
269
|
+
|
|
270
|
+
print(f"PharmGKB pipeline: {output_dir}")
|
|
271
|
+
return {
|
|
272
|
+
"drugs": drugs,
|
|
273
|
+
"annotations": annotations,
|
|
274
|
+
"guidelines": guidelines,
|
|
275
|
+
}
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
## ToolUniverse 連携
|
|
281
|
+
|
|
282
|
+
| TU Key | ツール名 | 連携内容 |
|
|
283
|
+
|--------|---------|---------|
|
|
284
|
+
| `pharmgkb` | PharmGKB | 臨床アノテーション・薬物遺伝子・PGx ガイドライン |
|
|
285
|
+
|
|
286
|
+
## パイプライン統合
|
|
287
|
+
|
|
288
|
+
```
|
|
289
|
+
pharmacogenomics → pharmgkb-pgx → clinical-decision-support
|
|
290
|
+
(PGx 解析全般) (PharmGKB API) (臨床意思決定)
|
|
291
|
+
│ │ ↓
|
|
292
|
+
drug-discovery ──────┘ precision-oncology
|
|
293
|
+
(薬物開発) │ (精密腫瘍学)
|
|
294
|
+
↓
|
|
295
|
+
variant-interpretation
|
|
296
|
+
(変異臨床解釈)
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
## パイプライン出力
|
|
300
|
+
|
|
301
|
+
| ファイル | 説明 | 次スキル |
|
|
302
|
+
|---------|------|---------|
|
|
303
|
+
| `results/drugs.csv` | 薬物情報 | → drug-discovery |
|
|
304
|
+
| `results/annotations.csv` | 臨床アノテーション | → variant-interpretation |
|
|
305
|
+
| `results/guidelines.csv` | 投与量ガイドライン | → clinical-decision-support |
|
|
306
|
+
| `results/gene_annotations.csv` | 遺伝子別アノテーション | → pharmacogenomics |
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-semantic-scholar
|
|
3
|
+
description: |
|
|
4
|
+
Semantic Scholar 学術グラフスキル。Semantic Scholar Academic
|
|
5
|
+
Graph API による論文検索・著者プロファイル・引用グラフ・
|
|
6
|
+
推薦・TLDR 要約。ToolUniverse 連携: semantic_scholar。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: semantic_scholar
|
|
9
|
+
name: Semantic Scholar
|
|
10
|
+
description: 学術論文検索・引用解析・著者プロファイル
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Scientific Semantic Scholar
|
|
14
|
+
|
|
15
|
+
Semantic Scholar Academic Graph API を活用した学術論文検索・
|
|
16
|
+
引用ネットワーク解析・著者プロファイル・論文推薦パイプライン
|
|
17
|
+
を提供する。
|
|
18
|
+
|
|
19
|
+
## When to Use
|
|
20
|
+
|
|
21
|
+
- 学術論文を高精度で検索するとき
|
|
22
|
+
- 引用・被引用ネットワークを解析するとき
|
|
23
|
+
- 著者の h-index・論文数・研究領域を調べるとき
|
|
24
|
+
- 関連論文の推薦を受けるとき
|
|
25
|
+
- TLDR (自動要約) を取得するとき
|
|
26
|
+
- 特定分野の引用傾向を分析するとき
|
|
27
|
+
- PubMed/OpenAlex 以外の学術検索エンジンを使うとき
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
## 1. 論文検索
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import requests
|
|
37
|
+
import pandas as pd
|
|
38
|
+
|
|
39
|
+
S2_BASE = "https://api.semanticscholar.org/graph/v1"
|
|
40
|
+
S2_HEADERS = {} # API key: {"x-api-key": "YOUR_KEY"}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def semantic_scholar_search(query, limit=50,
|
|
44
|
+
year_range=None,
|
|
45
|
+
fields_of_study=None):
|
|
46
|
+
"""
|
|
47
|
+
Semantic Scholar — 論文検索。
|
|
48
|
+
|
|
49
|
+
Parameters:
|
|
50
|
+
query: str — 検索クエリ
|
|
51
|
+
limit: int — 最大結果数
|
|
52
|
+
year_range: str — 年範囲 (例: "2020-2024")
|
|
53
|
+
fields_of_study: list[str] — 分野フィルタ
|
|
54
|
+
"""
|
|
55
|
+
url = f"{S2_BASE}/paper/search"
|
|
56
|
+
params = {
|
|
57
|
+
"query": query,
|
|
58
|
+
"limit": min(limit, 100),
|
|
59
|
+
"fields": ("paperId,title,year,citationCount,"
|
|
60
|
+
"influentialCitationCount,authors,"
|
|
61
|
+
"journal,tldr,openAccessPdf,fieldsOfStudy"),
|
|
62
|
+
}
|
|
63
|
+
if year_range:
|
|
64
|
+
params["year"] = year_range
|
|
65
|
+
if fields_of_study:
|
|
66
|
+
params["fieldsOfStudy"] = ",".join(fields_of_study)
|
|
67
|
+
|
|
68
|
+
resp = requests.get(url, params=params,
|
|
69
|
+
headers=S2_HEADERS, timeout=30)
|
|
70
|
+
resp.raise_for_status()
|
|
71
|
+
data = resp.json()
|
|
72
|
+
|
|
73
|
+
results = []
|
|
74
|
+
for p in data.get("data", []):
|
|
75
|
+
authors = [a.get("name", "") for a in p.get("authors", [])]
|
|
76
|
+
tldr_text = ""
|
|
77
|
+
if p.get("tldr"):
|
|
78
|
+
tldr_text = p["tldr"].get("text", "")
|
|
79
|
+
results.append({
|
|
80
|
+
"paper_id": p.get("paperId", ""),
|
|
81
|
+
"title": p.get("title", ""),
|
|
82
|
+
"year": p.get("year"),
|
|
83
|
+
"citation_count": p.get("citationCount", 0),
|
|
84
|
+
"influential_citations": p.get(
|
|
85
|
+
"influentialCitationCount", 0),
|
|
86
|
+
"authors": "; ".join(authors[:5]),
|
|
87
|
+
"journal": (p.get("journal") or {}).get("name", ""),
|
|
88
|
+
"fields": ", ".join(p.get("fieldsOfStudy") or []),
|
|
89
|
+
"tldr": tldr_text[:300],
|
|
90
|
+
"pdf_url": (p.get("openAccessPdf") or {}).get("url", ""),
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
df = pd.DataFrame(results)
|
|
94
|
+
print(f"Semantic Scholar: {len(df)} papers "
|
|
95
|
+
f"(query='{query}')")
|
|
96
|
+
return df
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def semantic_scholar_get_paper(paper_id):
|
|
100
|
+
"""
|
|
101
|
+
Semantic Scholar — 論文詳細取得。
|
|
102
|
+
|
|
103
|
+
Parameters:
|
|
104
|
+
paper_id: str — S2 Paper ID / DOI / ArXiv ID
|
|
105
|
+
"""
|
|
106
|
+
url = f"{S2_BASE}/paper/{paper_id}"
|
|
107
|
+
params = {
|
|
108
|
+
"fields": ("paperId,title,year,abstract,citationCount,"
|
|
109
|
+
"influentialCitationCount,authors,references,"
|
|
110
|
+
"citations,journal,tldr,openAccessPdf,"
|
|
111
|
+
"fieldsOfStudy,publicationDate,venue"),
|
|
112
|
+
}
|
|
113
|
+
resp = requests.get(url, params=params,
|
|
114
|
+
headers=S2_HEADERS, timeout=30)
|
|
115
|
+
resp.raise_for_status()
|
|
116
|
+
return resp.json()
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## 2. 著者プロファイル・引用解析
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
def semantic_scholar_author(author_id, paper_limit=100):
|
|
123
|
+
"""
|
|
124
|
+
Semantic Scholar — 著者プロファイル取得。
|
|
125
|
+
|
|
126
|
+
Parameters:
|
|
127
|
+
author_id: str — S2 Author ID
|
|
128
|
+
paper_limit: int — 取得論文数上限
|
|
129
|
+
"""
|
|
130
|
+
url = f"{S2_BASE}/author/{author_id}"
|
|
131
|
+
params = {
|
|
132
|
+
"fields": ("authorId,name,affiliations,homepage,"
|
|
133
|
+
"paperCount,citationCount,hIndex"),
|
|
134
|
+
}
|
|
135
|
+
resp = requests.get(url, params=params,
|
|
136
|
+
headers=S2_HEADERS, timeout=30)
|
|
137
|
+
resp.raise_for_status()
|
|
138
|
+
profile = resp.json()
|
|
139
|
+
|
|
140
|
+
# 論文一覧
|
|
141
|
+
papers_url = f"{S2_BASE}/author/{author_id}/papers"
|
|
142
|
+
p_params = {
|
|
143
|
+
"fields": "paperId,title,year,citationCount,venue",
|
|
144
|
+
"limit": min(paper_limit, 1000),
|
|
145
|
+
}
|
|
146
|
+
p_resp = requests.get(papers_url, params=p_params,
|
|
147
|
+
headers=S2_HEADERS, timeout=30)
|
|
148
|
+
p_resp.raise_for_status()
|
|
149
|
+
|
|
150
|
+
papers = []
|
|
151
|
+
for p in p_resp.json().get("data", []):
|
|
152
|
+
papers.append({
|
|
153
|
+
"paper_id": p.get("paperId", ""),
|
|
154
|
+
"title": p.get("title", ""),
|
|
155
|
+
"year": p.get("year"),
|
|
156
|
+
"citations": p.get("citationCount", 0),
|
|
157
|
+
"venue": p.get("venue", ""),
|
|
158
|
+
})
|
|
159
|
+
|
|
160
|
+
papers_df = pd.DataFrame(papers)
|
|
161
|
+
|
|
162
|
+
print(f"Author {profile.get('name', '')}: "
|
|
163
|
+
f"h-index={profile.get('hIndex', 0)}, "
|
|
164
|
+
f"{profile.get('paperCount', 0)} papers, "
|
|
165
|
+
f"{profile.get('citationCount', 0)} citations")
|
|
166
|
+
return profile, papers_df
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## 3. 引用ネットワーク・影響度分析
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
def semantic_scholar_citation_graph(paper_id,
|
|
173
|
+
direction="both",
|
|
174
|
+
limit=100):
|
|
175
|
+
"""
|
|
176
|
+
Semantic Scholar — 引用グラフ取得。
|
|
177
|
+
|
|
178
|
+
Parameters:
|
|
179
|
+
paper_id: str — S2 Paper ID
|
|
180
|
+
direction: str — "citations", "references", "both"
|
|
181
|
+
limit: int — 各方向の上限
|
|
182
|
+
"""
|
|
183
|
+
graphs = {}
|
|
184
|
+
fields = "paperId,title,year,citationCount,authors"
|
|
185
|
+
|
|
186
|
+
if direction in ("citations", "both"):
|
|
187
|
+
url = f"{S2_BASE}/paper/{paper_id}/citations"
|
|
188
|
+
resp = requests.get(url, params={"fields": fields,
|
|
189
|
+
"limit": limit},
|
|
190
|
+
headers=S2_HEADERS, timeout=30)
|
|
191
|
+
resp.raise_for_status()
|
|
192
|
+
cites = []
|
|
193
|
+
for c in resp.json().get("data", []):
|
|
194
|
+
cp = c.get("citingPaper", {})
|
|
195
|
+
cites.append({
|
|
196
|
+
"paper_id": cp.get("paperId", ""),
|
|
197
|
+
"title": cp.get("title", ""),
|
|
198
|
+
"year": cp.get("year"),
|
|
199
|
+
"citations": cp.get("citationCount", 0),
|
|
200
|
+
})
|
|
201
|
+
graphs["citations"] = pd.DataFrame(cites)
|
|
202
|
+
|
|
203
|
+
if direction in ("references", "both"):
|
|
204
|
+
url = f"{S2_BASE}/paper/{paper_id}/references"
|
|
205
|
+
resp = requests.get(url, params={"fields": fields,
|
|
206
|
+
"limit": limit},
|
|
207
|
+
headers=S2_HEADERS, timeout=30)
|
|
208
|
+
resp.raise_for_status()
|
|
209
|
+
refs = []
|
|
210
|
+
for r in resp.json().get("data", []):
|
|
211
|
+
rp = r.get("citedPaper", {})
|
|
212
|
+
refs.append({
|
|
213
|
+
"paper_id": rp.get("paperId", ""),
|
|
214
|
+
"title": rp.get("title", ""),
|
|
215
|
+
"year": rp.get("year"),
|
|
216
|
+
"citations": rp.get("citationCount", 0),
|
|
217
|
+
})
|
|
218
|
+
graphs["references"] = pd.DataFrame(refs)
|
|
219
|
+
|
|
220
|
+
for k, v in graphs.items():
|
|
221
|
+
print(f" {k}: {len(v)} papers")
|
|
222
|
+
return graphs
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
## 4. 学術文献統合パイプライン
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
def semantic_scholar_pipeline(query, year_range=None,
|
|
229
|
+
output_dir="results"):
|
|
230
|
+
"""
|
|
231
|
+
Semantic Scholar 統合パイプライン。
|
|
232
|
+
|
|
233
|
+
Parameters:
|
|
234
|
+
query: str — 検索クエリ
|
|
235
|
+
year_range: str — 年範囲
|
|
236
|
+
output_dir: str — 出力ディレクトリ
|
|
237
|
+
"""
|
|
238
|
+
from pathlib import Path
|
|
239
|
+
output_dir = Path(output_dir)
|
|
240
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
241
|
+
|
|
242
|
+
# 1) 論文検索
|
|
243
|
+
papers = semantic_scholar_search(query,
|
|
244
|
+
year_range=year_range)
|
|
245
|
+
papers.to_csv(output_dir / "papers.csv", index=False)
|
|
246
|
+
|
|
247
|
+
# 2) トップ被引用論文の引用グラフ
|
|
248
|
+
if not papers.empty:
|
|
249
|
+
top = papers.sort_values("citation_count",
|
|
250
|
+
ascending=False).iloc[0]
|
|
251
|
+
pid = top["paper_id"]
|
|
252
|
+
graphs = semantic_scholar_citation_graph(pid)
|
|
253
|
+
for k, df in graphs.items():
|
|
254
|
+
df.to_csv(output_dir / f"{k}.csv", index=False)
|
|
255
|
+
|
|
256
|
+
# 3) 年次引用傾向
|
|
257
|
+
if not papers.empty and "year" in papers.columns:
|
|
258
|
+
yearly = papers.groupby("year").agg(
|
|
259
|
+
papers_count=("paper_id", "count"),
|
|
260
|
+
total_citations=("citation_count", "sum"),
|
|
261
|
+
avg_citations=("citation_count", "mean"),
|
|
262
|
+
).reset_index()
|
|
263
|
+
yearly.to_csv(output_dir / "yearly_trend.csv",
|
|
264
|
+
index=False)
|
|
265
|
+
|
|
266
|
+
print(f"Semantic Scholar pipeline: {output_dir}")
|
|
267
|
+
return {"papers": papers}
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
---
|
|
271
|
+
|
|
272
|
+
## ToolUniverse 連携
|
|
273
|
+
|
|
274
|
+
| TU Key | ツール名 | 連携内容 |
|
|
275
|
+
|--------|---------|---------|
|
|
276
|
+
| `semantic_scholar` | Semantic Scholar | 論文検索・引用解析・著者・TLDR |
|
|
277
|
+
|
|
278
|
+
## パイプライン統合
|
|
279
|
+
|
|
280
|
+
```
|
|
281
|
+
literature-search → semantic-scholar → deep-research
|
|
282
|
+
(PubMed/NCBI) (Academic Graph API) (knowledge synthesis)
|
|
283
|
+
│ │ ↓
|
|
284
|
+
crossref-metadata ─────┘ citation-checker
|
|
285
|
+
(DOI/metadata) │ (引用品質検証)
|
|
286
|
+
↓
|
|
287
|
+
gene-expression-transcriptomics
|
|
288
|
+
(論文引用データからの解析)
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
## パイプライン出力
|
|
292
|
+
|
|
293
|
+
| ファイル | 説明 | 次スキル |
|
|
294
|
+
|---------|------|---------|
|
|
295
|
+
| `results/papers.csv` | 論文検索結果 | → deep-research |
|
|
296
|
+
| `results/citations.csv` | 被引用論文 | → citation-checker |
|
|
297
|
+
| `results/references.csv` | 引用論文 | → meta-analysis |
|
|
298
|
+
| `results/yearly_trend.csv` | 年次引用傾向 | → bibliometrics |
|