@nahisaho/satori 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -30
- package/package.json +1 -1
- package/src/.github/skills/scientific-alphafold-structures/SKILL.md +256 -0
- package/src/.github/skills/scientific-arrayexpress-expression/SKILL.md +264 -0
- package/src/.github/skills/scientific-crossref-metadata/SKILL.md +313 -0
- package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +271 -0
- package/src/.github/skills/scientific-gwas-catalog/SKILL.md +267 -0
- package/src/.github/skills/scientific-icgc-cancer-data/SKILL.md +351 -0
- package/src/.github/skills/scientific-pharmgkb-pgx/SKILL.md +306 -0
- package/src/.github/skills/scientific-semantic-scholar/SKILL.md +298 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-arrayexpress-expression
|
|
3
|
+
description: |
|
|
4
|
+
ArrayExpress 発現アーカイブスキル。BioStudies/ArrayExpress
|
|
5
|
+
REST API によるマイクロアレイ・RNA-seq 発現実験検索・メタ
|
|
6
|
+
データ取得・データ再解析。ToolUniverse 連携: arrayexpress。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: arrayexpress
|
|
9
|
+
name: ArrayExpress
|
|
10
|
+
description: ArrayExpress 発現実験検索・メタデータ・ファイル取得
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Scientific ArrayExpress Expression
|
|
14
|
+
|
|
15
|
+
EBI ArrayExpress / BioStudies REST API を活用した発現データ
|
|
16
|
+
アーカイブ検索・再解析パイプラインを提供する。
|
|
17
|
+
|
|
18
|
+
## When to Use
|
|
19
|
+
|
|
20
|
+
- ArrayExpress/BioStudies の発現実験を検索するとき
|
|
21
|
+
- マイクロアレイ/RNA-seq 発現データのメタデータを取得するとき
|
|
22
|
+
- SDRF サンプル情報テーブルを解析するとき
|
|
23
|
+
- E-MTAB/E-GEOD アクセッションからデータ再解析するとき
|
|
24
|
+
- 発現データアーカイブを横断検索するとき
|
|
25
|
+
- GEO と ArrayExpress の両方でデータを探すとき
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
## 1. BioStudies 発現実験検索
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import requests
|
|
35
|
+
import pandas as pd
|
|
36
|
+
|
|
37
|
+
BIOSTUDIES_BASE = "https://www.ebi.ac.uk/biostudies/api/v1"
|
|
38
|
+
AE_BASE = "https://www.ebi.ac.uk/arrayexpress/json/v3"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def arrayexpress_search_experiments(query, organism=None,
|
|
42
|
+
experiment_type=None,
|
|
43
|
+
limit=50):
|
|
44
|
+
"""
|
|
45
|
+
ArrayExpress — 発現実験検索 (BioStudies API)。
|
|
46
|
+
|
|
47
|
+
Parameters:
|
|
48
|
+
query: str — 検索クエリ (例: "breast cancer RNA-seq")
|
|
49
|
+
organism: str — 生物種 (例: "Homo sapiens")
|
|
50
|
+
experiment_type: str — 実験タイプ (例: "RNA-seq of coding RNA")
|
|
51
|
+
limit: int — 最大結果数
|
|
52
|
+
"""
|
|
53
|
+
url = f"{BIOSTUDIES_BASE}/search"
|
|
54
|
+
params = {
|
|
55
|
+
"query": query,
|
|
56
|
+
"type": "study",
|
|
57
|
+
"pageSize": limit,
|
|
58
|
+
}
|
|
59
|
+
if organism:
|
|
60
|
+
params["organism"] = organism
|
|
61
|
+
if experiment_type:
|
|
62
|
+
params["experimenttype"] = experiment_type
|
|
63
|
+
|
|
64
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
65
|
+
resp.raise_for_status()
|
|
66
|
+
data = resp.json()
|
|
67
|
+
|
|
68
|
+
hits = data.get("hits", [])
|
|
69
|
+
results = []
|
|
70
|
+
for h in hits:
|
|
71
|
+
attrs = {a.get("name", ""): a.get("value", "")
|
|
72
|
+
for a in h.get("attributes", [])}
|
|
73
|
+
results.append({
|
|
74
|
+
"accession": h.get("accession", ""),
|
|
75
|
+
"title": attrs.get("Title", h.get("title", "")),
|
|
76
|
+
"organism": attrs.get("Organism", ""),
|
|
77
|
+
"experiment_type": attrs.get("Experiment type", ""),
|
|
78
|
+
"release_date": h.get("releaseDate", ""),
|
|
79
|
+
"files_count": h.get("filesCount", 0),
|
|
80
|
+
"links_count": h.get("linksCount", 0),
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
df = pd.DataFrame(results)
|
|
84
|
+
print(f"ArrayExpress search: {len(df)} experiments "
|
|
85
|
+
f"(query={query})")
|
|
86
|
+
return df
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## 2. 実験メタデータ・SDRF 取得
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
def arrayexpress_get_experiment(accession):
|
|
93
|
+
"""
|
|
94
|
+
ArrayExpress — 実験メタデータ & SDRF 取得。
|
|
95
|
+
|
|
96
|
+
Parameters:
|
|
97
|
+
accession: str — アクセッション (例: "E-MTAB-12345")
|
|
98
|
+
"""
|
|
99
|
+
url = f"{BIOSTUDIES_BASE}/studies/{accession}"
|
|
100
|
+
resp = requests.get(url, timeout=30)
|
|
101
|
+
resp.raise_for_status()
|
|
102
|
+
data = resp.json()
|
|
103
|
+
|
|
104
|
+
# メタデータ
|
|
105
|
+
attrs = {a.get("name", ""): a.get("value", "")
|
|
106
|
+
for a in data.get("attributes", [])}
|
|
107
|
+
metadata = {
|
|
108
|
+
"accession": accession,
|
|
109
|
+
"title": attrs.get("Title", ""),
|
|
110
|
+
"description": attrs.get("Description", "")[:500],
|
|
111
|
+
"organism": attrs.get("Organism", ""),
|
|
112
|
+
"experiment_type": attrs.get("Experiment type", ""),
|
|
113
|
+
"release_date": data.get("releaseDate", ""),
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
# ファイル一覧
|
|
117
|
+
files = []
|
|
118
|
+
for section in data.get("section", {}).get("files", []):
|
|
119
|
+
if isinstance(section, list):
|
|
120
|
+
for f in section:
|
|
121
|
+
files.append({
|
|
122
|
+
"filename": f.get("path", ""),
|
|
123
|
+
"type": f.get("type", ""),
|
|
124
|
+
"size": f.get("size", 0),
|
|
125
|
+
})
|
|
126
|
+
elif isinstance(section, dict):
|
|
127
|
+
files.append({
|
|
128
|
+
"filename": section.get("path", ""),
|
|
129
|
+
"type": section.get("type", ""),
|
|
130
|
+
"size": section.get("size", 0),
|
|
131
|
+
})
|
|
132
|
+
|
|
133
|
+
files_df = pd.DataFrame(files)
|
|
134
|
+
|
|
135
|
+
# SDRF 取得試行
|
|
136
|
+
sdrf_url = (f"https://www.ebi.ac.uk/biostudies/files/"
|
|
137
|
+
f"{accession}/{accession}.sdrf.txt")
|
|
138
|
+
sdrf_df = pd.DataFrame()
|
|
139
|
+
try:
|
|
140
|
+
sdrf_resp = requests.get(sdrf_url, timeout=30)
|
|
141
|
+
if sdrf_resp.status_code == 200:
|
|
142
|
+
from io import StringIO
|
|
143
|
+
sdrf_df = pd.read_csv(StringIO(sdrf_resp.text), sep="\t")
|
|
144
|
+
except Exception:
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
print(f"ArrayExpress {accession}: {len(files_df)} files, "
|
|
148
|
+
f"{len(sdrf_df)} SDRF rows")
|
|
149
|
+
return metadata, files_df, sdrf_df
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## 3. 発現データダウンロード・処理
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
def arrayexpress_download_matrix(accession, output_dir="results"):
|
|
156
|
+
"""
|
|
157
|
+
ArrayExpress — 発現マトリクスダウンロード。
|
|
158
|
+
|
|
159
|
+
Parameters:
|
|
160
|
+
accession: str — アクセッション
|
|
161
|
+
output_dir: str — 出力ディレクトリ
|
|
162
|
+
"""
|
|
163
|
+
from pathlib import Path
|
|
164
|
+
output_dir = Path(output_dir)
|
|
165
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
166
|
+
|
|
167
|
+
metadata, files_df, sdrf_df = arrayexpress_get_experiment(accession)
|
|
168
|
+
|
|
169
|
+
# 処理済み発現ファイル検索
|
|
170
|
+
expr_files = files_df[
|
|
171
|
+
files_df["filename"].str.contains(
|
|
172
|
+
r"processed|normalized|expression|counts",
|
|
173
|
+
case=False, na=False)
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
downloaded = []
|
|
177
|
+
for _, frow in expr_files.iterrows():
|
|
178
|
+
fname = frow["filename"]
|
|
179
|
+
url = (f"https://www.ebi.ac.uk/biostudies/files/"
|
|
180
|
+
f"{accession}/{fname}")
|
|
181
|
+
try:
|
|
182
|
+
resp = requests.get(url, timeout=120)
|
|
183
|
+
if resp.status_code == 200:
|
|
184
|
+
fpath = output_dir / fname.split("/")[-1]
|
|
185
|
+
fpath.write_bytes(resp.content)
|
|
186
|
+
downloaded.append(str(fpath))
|
|
187
|
+
except Exception:
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
# SDRF 保存
|
|
191
|
+
if not sdrf_df.empty:
|
|
192
|
+
sdrf_df.to_csv(output_dir / "sdrf.csv", index=False)
|
|
193
|
+
|
|
194
|
+
print(f"ArrayExpress download: {len(downloaded)} files → "
|
|
195
|
+
f"{output_dir}")
|
|
196
|
+
return {
|
|
197
|
+
"metadata": metadata,
|
|
198
|
+
"files": downloaded,
|
|
199
|
+
"sdrf": sdrf_df,
|
|
200
|
+
}
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## 4. ArrayExpress 統合パイプライン
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
def arrayexpress_pipeline(query, organism="Homo sapiens",
|
|
207
|
+
output_dir="results"):
|
|
208
|
+
"""
|
|
209
|
+
ArrayExpress 統合パイプライン。
|
|
210
|
+
|
|
211
|
+
Parameters:
|
|
212
|
+
query: str — 検索クエリ
|
|
213
|
+
organism: str — 生物種
|
|
214
|
+
output_dir: str — 出力ディレクトリ
|
|
215
|
+
"""
|
|
216
|
+
from pathlib import Path
|
|
217
|
+
output_dir = Path(output_dir)
|
|
218
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
219
|
+
|
|
220
|
+
# 1) 実験検索
|
|
221
|
+
experiments = arrayexpress_search_experiments(
|
|
222
|
+
query, organism=organism)
|
|
223
|
+
experiments.to_csv(output_dir / "experiments.csv", index=False)
|
|
224
|
+
|
|
225
|
+
# 2) トップ実験の詳細
|
|
226
|
+
if not experiments.empty:
|
|
227
|
+
top_acc = experiments.iloc[0]["accession"]
|
|
228
|
+
metadata, files, sdrf = arrayexpress_get_experiment(top_acc)
|
|
229
|
+
files.to_csv(output_dir / "experiment_files.csv", index=False)
|
|
230
|
+
if not sdrf.empty:
|
|
231
|
+
sdrf.to_csv(output_dir / "sdrf.csv", index=False)
|
|
232
|
+
|
|
233
|
+
print(f"ArrayExpress pipeline: {output_dir}")
|
|
234
|
+
return {"experiments": experiments}
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
## ToolUniverse 連携
|
|
240
|
+
|
|
241
|
+
| TU Key | ツール名 | 連携内容 |
|
|
242
|
+
|--------|---------|---------|
|
|
243
|
+
| `arrayexpress` | ArrayExpress | 発現実験検索・メタデータ・ファイル取得 |
|
|
244
|
+
|
|
245
|
+
## パイプライン統合
|
|
246
|
+
|
|
247
|
+
```
|
|
248
|
+
ebi-databases → arrayexpress-expression → gene-expression-transcriptomics
|
|
249
|
+
(EBI Search) (ArrayExpress/BioStudies) (DESeq2/GSEA)
|
|
250
|
+
│ │ ↓
|
|
251
|
+
geo-expression ─────────┘ pathway-enrichment
|
|
252
|
+
(GEO データ) │ (KEGG/Reactome)
|
|
253
|
+
↓
|
|
254
|
+
multi-omics
|
|
255
|
+
(統合解析)
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## パイプライン出力
|
|
259
|
+
|
|
260
|
+
| ファイル | 説明 | 次スキル |
|
|
261
|
+
|---------|------|---------|
|
|
262
|
+
| `results/experiments.csv` | 実験一覧 | → geo-expression |
|
|
263
|
+
| `results/sdrf.csv` | サンプル情報 | → gene-expression-transcriptomics |
|
|
264
|
+
| `results/experiment_files.csv` | ファイルリスト | → data-preprocessing |
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-crossref-metadata
|
|
3
|
+
description: |
|
|
4
|
+
CrossRef メタデータスキル。CrossRef REST API による
|
|
5
|
+
DOI 解決・論文メタデータ・引用数・ジャーナル情報・
|
|
6
|
+
助成金情報検索。ToolUniverse 連携: crossref。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: crossref
|
|
9
|
+
name: CrossRef
|
|
10
|
+
description: DOI 解決・論文メタデータ・引用数・ジャーナル情報
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Scientific CrossRef Metadata
|
|
14
|
+
|
|
15
|
+
CrossRef REST API を活用した学術文献 DOI 解決・メタデータ検索・
|
|
16
|
+
引用分析・ジャーナル情報・助成金レジストリ検索パイプラインを
|
|
17
|
+
提供する。
|
|
18
|
+
|
|
19
|
+
## When to Use
|
|
20
|
+
|
|
21
|
+
- DOI から論文メタデータを取得するとき
|
|
22
|
+
- 学術文献をタイトル・著者で検索するとき
|
|
23
|
+
- ジャーナルの ISSN やインパクト情報を調べるとき
|
|
24
|
+
- 論文の引用数・被引用数を確認するとき
|
|
25
|
+
- 研究助成金の情報を検索するとき
|
|
26
|
+
- 参考文献リストのメタデータを一括取得するとき
|
|
27
|
+
- 特定出版社やジャーナルの出版傾向を分析するとき
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
## 1. DOI 解決・論文メタデータ
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import requests
|
|
37
|
+
import pandas as pd
|
|
38
|
+
|
|
39
|
+
CR_BASE = "https://api.crossref.org"
|
|
40
|
+
CR_HEADERS = {
|
|
41
|
+
"User-Agent": "SATORI/0.18.0 (mailto:your@email.com)",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def crossref_resolve_doi(doi):
|
|
46
|
+
"""
|
|
47
|
+
CrossRef — DOI からメタデータ取得。
|
|
48
|
+
|
|
49
|
+
Parameters:
|
|
50
|
+
doi: str — DOI (例: "10.1038/s41586-020-2649-2")
|
|
51
|
+
"""
|
|
52
|
+
url = f"{CR_BASE}/works/{doi}"
|
|
53
|
+
resp = requests.get(url, headers=CR_HEADERS, timeout=30)
|
|
54
|
+
resp.raise_for_status()
|
|
55
|
+
item = resp.json().get("message", {})
|
|
56
|
+
|
|
57
|
+
authors = []
|
|
58
|
+
for a in item.get("author", []):
|
|
59
|
+
name = f"{a.get('given', '')} {a.get('family', '')}"
|
|
60
|
+
authors.append(name.strip())
|
|
61
|
+
|
|
62
|
+
result = {
|
|
63
|
+
"doi": item.get("DOI", ""),
|
|
64
|
+
"title": " ".join(item.get("title", [])),
|
|
65
|
+
"authors": "; ".join(authors[:10]),
|
|
66
|
+
"journal": " ".join(
|
|
67
|
+
item.get("container-title", [])),
|
|
68
|
+
"publisher": item.get("publisher", ""),
|
|
69
|
+
"type": item.get("type", ""),
|
|
70
|
+
"published_date": _cr_date(
|
|
71
|
+
item.get("published-print") or
|
|
72
|
+
item.get("published-online")),
|
|
73
|
+
"citation_count": item.get(
|
|
74
|
+
"is-referenced-by-count", 0),
|
|
75
|
+
"reference_count": item.get("reference-count", 0),
|
|
76
|
+
"issn": ", ".join(item.get("ISSN", [])),
|
|
77
|
+
"url": item.get("URL", ""),
|
|
78
|
+
"abstract": (item.get("abstract") or "")[:500],
|
|
79
|
+
"funder": "; ".join(
|
|
80
|
+
f.get("name", "")
|
|
81
|
+
for f in item.get("funder", [])),
|
|
82
|
+
"license": _cr_license(item),
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
print(f"CrossRef DOI: {doi}")
|
|
86
|
+
print(f" {result['title'][:80]}")
|
|
87
|
+
print(f" Citations: {result['citation_count']}")
|
|
88
|
+
return result
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _cr_date(date_obj):
|
|
92
|
+
if not date_obj:
|
|
93
|
+
return ""
|
|
94
|
+
parts = date_obj.get("date-parts", [[]])[0]
|
|
95
|
+
return "-".join(str(p) for p in parts)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _cr_license(item):
|
|
99
|
+
licenses = item.get("license", [])
|
|
100
|
+
if licenses:
|
|
101
|
+
return licenses[0].get("content-version", "")
|
|
102
|
+
return ""
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## 2. 論文検索
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
def crossref_search_works(query, limit=50,
|
|
109
|
+
sort="relevance",
|
|
110
|
+
filter_type=None,
|
|
111
|
+
from_date=None):
|
|
112
|
+
"""
|
|
113
|
+
CrossRef — 論文検索。
|
|
114
|
+
|
|
115
|
+
Parameters:
|
|
116
|
+
query: str — 検索クエリ
|
|
117
|
+
limit: int — 最大結果数
|
|
118
|
+
sort: str — ソート ("relevance", "published",
|
|
119
|
+
"is-referenced-by-count")
|
|
120
|
+
filter_type: str — 文献タイプフィルタ
|
|
121
|
+
(例: "journal-article")
|
|
122
|
+
from_date: str — 開始日 (例: "2020-01-01")
|
|
123
|
+
"""
|
|
124
|
+
url = f"{CR_BASE}/works"
|
|
125
|
+
params = {
|
|
126
|
+
"query": query,
|
|
127
|
+
"rows": min(limit, 1000),
|
|
128
|
+
"sort": sort,
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
filters = []
|
|
132
|
+
if filter_type:
|
|
133
|
+
filters.append(f"type:{filter_type}")
|
|
134
|
+
if from_date:
|
|
135
|
+
filters.append(f"from-pub-date:{from_date}")
|
|
136
|
+
if filters:
|
|
137
|
+
params["filter"] = ",".join(filters)
|
|
138
|
+
|
|
139
|
+
resp = requests.get(url, params=params,
|
|
140
|
+
headers=CR_HEADERS, timeout=30)
|
|
141
|
+
resp.raise_for_status()
|
|
142
|
+
data = resp.json().get("message", {})
|
|
143
|
+
|
|
144
|
+
results = []
|
|
145
|
+
for item in data.get("items", []):
|
|
146
|
+
authors = []
|
|
147
|
+
for a in item.get("author", []):
|
|
148
|
+
name = f"{a.get('given', '')} {a.get('family', '')}"
|
|
149
|
+
authors.append(name.strip())
|
|
150
|
+
results.append({
|
|
151
|
+
"doi": item.get("DOI", ""),
|
|
152
|
+
"title": " ".join(item.get("title", [])),
|
|
153
|
+
"authors": "; ".join(authors[:5]),
|
|
154
|
+
"journal": " ".join(
|
|
155
|
+
item.get("container-title", [])),
|
|
156
|
+
"year": _cr_date(
|
|
157
|
+
item.get("published-print") or
|
|
158
|
+
item.get("published-online")),
|
|
159
|
+
"citations": item.get(
|
|
160
|
+
"is-referenced-by-count", 0),
|
|
161
|
+
"type": item.get("type", ""),
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
df = pd.DataFrame(results)
|
|
165
|
+
total = data.get("total-results", 0)
|
|
166
|
+
print(f"CrossRef search: {len(df)}/{total} works "
|
|
167
|
+
f"(query='{query}')")
|
|
168
|
+
return df
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## 3. ジャーナル情報・助成金検索
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
def crossref_journal_info(issn):
|
|
175
|
+
"""
|
|
176
|
+
CrossRef — ジャーナル情報取得。
|
|
177
|
+
|
|
178
|
+
Parameters:
|
|
179
|
+
issn: str — ISSN (例: "0028-0836")
|
|
180
|
+
"""
|
|
181
|
+
url = f"{CR_BASE}/journals/{issn}"
|
|
182
|
+
resp = requests.get(url, headers=CR_HEADERS, timeout=30)
|
|
183
|
+
resp.raise_for_status()
|
|
184
|
+
data = resp.json().get("message", {})
|
|
185
|
+
|
|
186
|
+
counts = data.get("counts", {})
|
|
187
|
+
result = {
|
|
188
|
+
"issn": issn,
|
|
189
|
+
"title": data.get("title", ""),
|
|
190
|
+
"publisher": data.get("publisher", ""),
|
|
191
|
+
"subjects": "; ".join(
|
|
192
|
+
s.get("name", "")
|
|
193
|
+
for s in data.get("subjects", [])),
|
|
194
|
+
"total_dois": counts.get("total-dois", 0),
|
|
195
|
+
"current_dois": counts.get("current-dois", 0),
|
|
196
|
+
"backfile_dois": counts.get("backfile-dois", 0),
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
print(f"CrossRef journal: {result['title']} "
|
|
200
|
+
f"({result['total_dois']} DOIs)")
|
|
201
|
+
return result
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def crossref_search_funders(query, limit=20):
|
|
205
|
+
"""
|
|
206
|
+
CrossRef — 助成金機関検索。
|
|
207
|
+
|
|
208
|
+
Parameters:
|
|
209
|
+
query: str — 機関名 (例: "NIH", "JSPS")
|
|
210
|
+
limit: int — 最大結果数
|
|
211
|
+
"""
|
|
212
|
+
url = f"{CR_BASE}/funders"
|
|
213
|
+
params = {"query": query, "rows": limit}
|
|
214
|
+
resp = requests.get(url, params=params,
|
|
215
|
+
headers=CR_HEADERS, timeout=30)
|
|
216
|
+
resp.raise_for_status()
|
|
217
|
+
data = resp.json().get("message", {})
|
|
218
|
+
|
|
219
|
+
results = []
|
|
220
|
+
for item in data.get("items", []):
|
|
221
|
+
results.append({
|
|
222
|
+
"funder_id": item.get("id", ""),
|
|
223
|
+
"name": item.get("name", ""),
|
|
224
|
+
"location": item.get("location", ""),
|
|
225
|
+
"alt_names": "; ".join(
|
|
226
|
+
item.get("alt-names", [])[:3]),
|
|
227
|
+
"work_count": item.get("work-count", 0),
|
|
228
|
+
})
|
|
229
|
+
|
|
230
|
+
df = pd.DataFrame(results)
|
|
231
|
+
print(f"CrossRef funders: {len(df)} (query='{query}')")
|
|
232
|
+
return df
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
## 4. CrossRef 統合パイプライン
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
def crossref_pipeline(query, dois=None,
|
|
239
|
+
output_dir="results"):
|
|
240
|
+
"""
|
|
241
|
+
CrossRef 統合パイプライン。
|
|
242
|
+
|
|
243
|
+
Parameters:
|
|
244
|
+
query: str — 検索クエリ
|
|
245
|
+
dois: list[str] — DOI リスト (直接解決)
|
|
246
|
+
output_dir: str — 出力ディレクトリ
|
|
247
|
+
"""
|
|
248
|
+
from pathlib import Path
|
|
249
|
+
output_dir = Path(output_dir)
|
|
250
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
251
|
+
|
|
252
|
+
# 1) 論文検索
|
|
253
|
+
works = crossref_search_works(query)
|
|
254
|
+
works.to_csv(output_dir / "works.csv", index=False)
|
|
255
|
+
|
|
256
|
+
# 2) DOI 一括解決
|
|
257
|
+
if dois:
|
|
258
|
+
resolved = []
|
|
259
|
+
for doi in dois:
|
|
260
|
+
try:
|
|
261
|
+
meta = crossref_resolve_doi(doi)
|
|
262
|
+
resolved.append(meta)
|
|
263
|
+
except Exception as e:
|
|
264
|
+
print(f" Warning: {doi} — {e}")
|
|
265
|
+
continue
|
|
266
|
+
resolved_df = pd.DataFrame(resolved)
|
|
267
|
+
resolved_df.to_csv(output_dir / "doi_resolved.csv",
|
|
268
|
+
index=False)
|
|
269
|
+
|
|
270
|
+
# 3) 引用分析
|
|
271
|
+
if not works.empty:
|
|
272
|
+
stats = {
|
|
273
|
+
"total_works": len(works),
|
|
274
|
+
"total_citations": works["citations"].sum(),
|
|
275
|
+
"mean_citations": works["citations"].mean(),
|
|
276
|
+
"median_citations": works["citations"].median(),
|
|
277
|
+
"max_citations": works["citations"].max(),
|
|
278
|
+
}
|
|
279
|
+
pd.DataFrame([stats]).to_csv(
|
|
280
|
+
output_dir / "citation_stats.csv", index=False)
|
|
281
|
+
|
|
282
|
+
print(f"CrossRef pipeline: {output_dir}")
|
|
283
|
+
return {"works": works}
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
## ToolUniverse 連携
|
|
289
|
+
|
|
290
|
+
| TU Key | ツール名 | 連携内容 |
|
|
291
|
+
|--------|---------|---------|
|
|
292
|
+
| `crossref` | CrossRef | DOI 解決・メタデータ・引用・ジャーナル情報 |
|
|
293
|
+
|
|
294
|
+
## パイプライン統合
|
|
295
|
+
|
|
296
|
+
```
|
|
297
|
+
literature-search → crossref-metadata → citation-checker
|
|
298
|
+
(PubMed/NCBI) (CrossRef REST API) (引用品質検証)
|
|
299
|
+
│ │ ↓
|
|
300
|
+
semantic-scholar ──────┘ deep-research
|
|
301
|
+
(S2 Academic Graph) │ (知識統合)
|
|
302
|
+
↓
|
|
303
|
+
bibliometrics
|
|
304
|
+
(書誌計量分析)
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
## パイプライン出力
|
|
308
|
+
|
|
309
|
+
| ファイル | 説明 | 次スキル |
|
|
310
|
+
|---------|------|---------|
|
|
311
|
+
| `results/works.csv` | 論文検索結果 | → semantic-scholar |
|
|
312
|
+
| `results/doi_resolved.csv` | DOI メタデータ | → citation-checker |
|
|
313
|
+
| `results/citation_stats.csv` | 引用統計 | → bibliometrics |
|