@nahisaho/satori 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +106 -39
- package/package.json +1 -1
- package/src/.github/skills/scientific-biomedical-pubtator/SKILL.md +331 -0
- package/src/.github/skills/scientific-cell-line-resources/SKILL.md +258 -0
- package/src/.github/skills/scientific-ebi-databases/SKILL.md +280 -0
- package/src/.github/skills/scientific-ontology-enrichment/SKILL.md +340 -0
- package/src/.github/skills/scientific-phylogenetics/SKILL.md +297 -0
- package/src/.github/skills/scientific-preprint-archive/SKILL.md +476 -0
- package/src/.github/skills/scientific-public-health-data/SKILL.md +322 -0
- package/src/.github/skills/scientific-regulatory-genomics/SKILL.md +274 -0
- package/src/.github/skills/scientific-reinforcement-learning/SKILL.md +280 -0
- package/src/.github/skills/scientific-symbolic-mathematics/SKILL.md +277 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-public-health-data
|
|
3
|
+
description: |
|
|
4
|
+
公衆衛生データアクセススキル。NHANES 疫学調査データ、MedlinePlus 一般向け
|
|
5
|
+
健康情報、RxNorm 薬剤標準語彙、ODPHP 健康目標・ガイドライン、
|
|
6
|
+
Health Disparities 健康格差データ統合パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Public Health Data
|
|
10
|
+
|
|
11
|
+
NHANES / MedlinePlus / RxNorm / ODPHP / Health Disparities /
|
|
12
|
+
Guidelines を統合した公衆衛生データアクセスパイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- NHANES 疫学調査データ (検査値・アンケート) を取得するとき
|
|
17
|
+
- MedlinePlus で一般向け健康情報を検索するとき
|
|
18
|
+
- RxNorm で薬剤名の標準化・マッピングを行うとき
|
|
19
|
+
- ODPHP Healthy People 目標や健康ガイドラインを参照するとき
|
|
20
|
+
- 健康格差 (Health Disparities) データを分析するとき
|
|
21
|
+
- 臨床ガイドライン (USPSTF/WHO) を検索するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. NHANES 疫学調査データ取得
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import requests
|
|
31
|
+
import pandas as pd
|
|
32
|
+
import io
|
|
33
|
+
|
|
34
|
+
NHANES_BASE = "https://wwwn.cdc.gov/nchs/nhanes"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_nhanes_dataset(cycle, dataset_name):
|
|
38
|
+
"""
|
|
39
|
+
NHANES データセット (XPT/SAS 形式) 取得。
|
|
40
|
+
|
|
41
|
+
Parameters:
|
|
42
|
+
cycle: str — 調査サイクル (e.g., "2017-2018", "2019-2020")
|
|
43
|
+
dataset_name: str — データセット名 (e.g., "DEMO_J", "BIOPRO_J")
|
|
44
|
+
|
|
45
|
+
ToolUniverse:
|
|
46
|
+
NHANES_get_dataset(cycle=cycle, dataset=dataset_name)
|
|
47
|
+
NHANES_list_datasets(cycle=cycle)
|
|
48
|
+
"""
|
|
49
|
+
cycle_code = cycle.replace("-", "_")
|
|
50
|
+
url = f"{NHANES_BASE}/search/DataPage.aspx"
|
|
51
|
+
|
|
52
|
+
# XPT ファイルの直接ダウンロード
|
|
53
|
+
xpt_url = f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle}/{dataset_name}.XPT"
|
|
54
|
+
resp = requests.get(xpt_url)
|
|
55
|
+
resp.raise_for_status()
|
|
56
|
+
|
|
57
|
+
df = pd.read_sas(io.BytesIO(resp.content), format="xport")
|
|
58
|
+
print(f"NHANES {cycle} {dataset_name}: {df.shape[0]} rows × {df.shape[1]} columns")
|
|
59
|
+
return df
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def search_nhanes_variables(keyword):
|
|
63
|
+
"""
|
|
64
|
+
NHANES 変数検索。
|
|
65
|
+
|
|
66
|
+
Parameters:
|
|
67
|
+
keyword: str — 変数名/説明の検索語
|
|
68
|
+
|
|
69
|
+
ToolUniverse:
|
|
70
|
+
NHANES_search_variables(keyword=keyword)
|
|
71
|
+
"""
|
|
72
|
+
url = f"{NHANES_BASE}/search/variablelist.aspx"
|
|
73
|
+
params = {"SearchTarget": keyword}
|
|
74
|
+
resp = requests.get(url, params=params)
|
|
75
|
+
resp.raise_for_status()
|
|
76
|
+
|
|
77
|
+
print(f"NHANES variable search '{keyword}': response received")
|
|
78
|
+
return resp.text
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## 2. MedlinePlus 健康情報検索
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
MEDLINEPLUS_API = "https://connect.medlineplus.gov/service"
|
|
85
|
+
MEDLINEPLUS_WS = "https://wsearch.nlm.nih.gov/ws/query"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def search_medlineplus_health_topics(query, language="English"):
|
|
89
|
+
"""
|
|
90
|
+
MedlinePlus 健康トピック検索。
|
|
91
|
+
|
|
92
|
+
ToolUniverse:
|
|
93
|
+
MedlinePlus_search_health_topics(query=query)
|
|
94
|
+
MedlinePlus_get_health_topic(topic_id=topic_id)
|
|
95
|
+
MedlinePlus_search_drugs(query=query)
|
|
96
|
+
MedlinePlus_search_labs(query=query)
|
|
97
|
+
MedlinePlus_connect(code=code, code_system=system)
|
|
98
|
+
"""
|
|
99
|
+
params = {
|
|
100
|
+
"db": "healthTopics",
|
|
101
|
+
"term": query,
|
|
102
|
+
}
|
|
103
|
+
resp = requests.get(MEDLINEPLUS_WS, params=params)
|
|
104
|
+
resp.raise_for_status()
|
|
105
|
+
|
|
106
|
+
# XML response parsing
|
|
107
|
+
import xml.etree.ElementTree as ET
|
|
108
|
+
root = ET.fromstring(resp.text)
|
|
109
|
+
|
|
110
|
+
results = []
|
|
111
|
+
for doc in root.findall(".//document"):
|
|
112
|
+
results.append({
|
|
113
|
+
"title": doc.find(".//content[@name='title']").text
|
|
114
|
+
if doc.find(".//content[@name='title']") is not None else "",
|
|
115
|
+
"url": doc.get("url", ""),
|
|
116
|
+
"summary": doc.find(".//content[@name='FullSummary']").text[:300]
|
|
117
|
+
if doc.find(".//content[@name='FullSummary']") is not None else "",
|
|
118
|
+
"rank": doc.get("rank", ""),
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
df = pd.DataFrame(results)
|
|
122
|
+
print(f"MedlinePlus search '{query}': {len(df)} health topics")
|
|
123
|
+
return df
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## 3. RxNorm 薬剤標準語彙
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
RXNORM_API = "https://rxnav.nlm.nih.gov/REST"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def rxnorm_lookup(drug_name):
|
|
133
|
+
"""
|
|
134
|
+
RxNorm 薬剤名正規化・コードマッピング。
|
|
135
|
+
|
|
136
|
+
Parameters:
|
|
137
|
+
drug_name: str — 薬剤名 (商品名 or 一般名)
|
|
138
|
+
|
|
139
|
+
ToolUniverse:
|
|
140
|
+
RxNorm_get_rxcui(name=drug_name)
|
|
141
|
+
"""
|
|
142
|
+
resp = requests.get(
|
|
143
|
+
f"{RXNORM_API}/rxcui.json",
|
|
144
|
+
params={"name": drug_name}
|
|
145
|
+
)
|
|
146
|
+
resp.raise_for_status()
|
|
147
|
+
data = resp.json()
|
|
148
|
+
|
|
149
|
+
rxcui = data.get("idGroup", {}).get("rxnormId", [None])[0]
|
|
150
|
+
if not rxcui:
|
|
151
|
+
print(f"RxNorm: '{drug_name}' not found")
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
# Get properties
|
|
155
|
+
props_resp = requests.get(f"{RXNORM_API}/rxcui/{rxcui}/properties.json")
|
|
156
|
+
props_resp.raise_for_status()
|
|
157
|
+
props = props_resp.json().get("properties", {})
|
|
158
|
+
|
|
159
|
+
# Get related concepts
|
|
160
|
+
related_resp = requests.get(
|
|
161
|
+
f"{RXNORM_API}/rxcui/{rxcui}/related.json",
|
|
162
|
+
params={"tty": "IN+BN+SBD+SCD"}
|
|
163
|
+
)
|
|
164
|
+
related_resp.raise_for_status()
|
|
165
|
+
related = related_resp.json()
|
|
166
|
+
|
|
167
|
+
result = {
|
|
168
|
+
"rxcui": rxcui,
|
|
169
|
+
"name": props.get("name", ""),
|
|
170
|
+
"tty": props.get("tty", ""),
|
|
171
|
+
"synonym": props.get("synonym", ""),
|
|
172
|
+
"related_concepts": [
|
|
173
|
+
{
|
|
174
|
+
"rxcui": c.get("rxcui"),
|
|
175
|
+
"name": c.get("name"),
|
|
176
|
+
"tty": c.get("tty"),
|
|
177
|
+
}
|
|
178
|
+
for group in related.get("relatedGroup", {}).get("conceptGroup", [])
|
|
179
|
+
for c in group.get("conceptProperties", [])
|
|
180
|
+
],
|
|
181
|
+
}
|
|
182
|
+
print(f"RxNorm '{drug_name}': RXCUI={rxcui}, TTY={result['tty']}")
|
|
183
|
+
return result
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## 4. Health Disparities データ取得
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
HD_API = "https://data.cdc.gov/resource"
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def get_health_disparities(indicator, dataset_id="pqnx-3xr5"):
|
|
193
|
+
"""
|
|
194
|
+
CDC 健康格差データ取得。
|
|
195
|
+
|
|
196
|
+
Parameters:
|
|
197
|
+
indicator: str — 健康指標名
|
|
198
|
+
dataset_id: str — CDC Socrata データセット ID
|
|
199
|
+
|
|
200
|
+
ToolUniverse:
|
|
201
|
+
HealthDisparities_search(query=indicator)
|
|
202
|
+
HealthDisparities_get_indicators(category=category)
|
|
203
|
+
"""
|
|
204
|
+
params = {
|
|
205
|
+
"$where": f"indicator LIKE '%{indicator}%'",
|
|
206
|
+
"$limit": 1000,
|
|
207
|
+
}
|
|
208
|
+
resp = requests.get(f"{HD_API}/{dataset_id}.json", params=params)
|
|
209
|
+
resp.raise_for_status()
|
|
210
|
+
data = resp.json()
|
|
211
|
+
|
|
212
|
+
df = pd.DataFrame(data)
|
|
213
|
+
print(f"Health Disparities '{indicator}': {len(df)} records")
|
|
214
|
+
return df
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## 5. ODPHP 健康ガイドライン
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
ODPHP_API = "https://health.gov/myhealthfinder/api/v3"
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def search_health_guidelines(keyword, category=None):
|
|
224
|
+
"""
|
|
225
|
+
ODPHP MyHealthfinder ガイドライン検索。
|
|
226
|
+
|
|
227
|
+
ToolUniverse:
|
|
228
|
+
ODPHP_search_topics(keyword=keyword)
|
|
229
|
+
ODPHP_get_topic(topic_id=topic_id)
|
|
230
|
+
"""
|
|
231
|
+
params = {"keyword": keyword}
|
|
232
|
+
if category:
|
|
233
|
+
params["categoryId"] = category
|
|
234
|
+
resp = requests.get(f"{ODPHP_API}/topicsearch.json", params=params)
|
|
235
|
+
resp.raise_for_status()
|
|
236
|
+
data = resp.json()
|
|
237
|
+
|
|
238
|
+
results = []
|
|
239
|
+
for topic in data.get("Result", {}).get("Resources", {}).get("Resource", []):
|
|
240
|
+
results.append({
|
|
241
|
+
"title": topic.get("Title", ""),
|
|
242
|
+
"categories": topic.get("Categories", ""),
|
|
243
|
+
"url": topic.get("AccessibleVersion", ""),
|
|
244
|
+
"sections": [
|
|
245
|
+
s.get("Title", "") for s in topic.get("Sections", {}).get("section", [])
|
|
246
|
+
],
|
|
247
|
+
})
|
|
248
|
+
|
|
249
|
+
df = pd.DataFrame(results)
|
|
250
|
+
print(f"ODPHP search '{keyword}': {len(df)} guidelines")
|
|
251
|
+
return df
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
## 6. 臨床ガイドライン検索 (USPSTF)
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
def search_clinical_guidelines(query, source="uspstf"):
|
|
258
|
+
"""
|
|
259
|
+
USPSTF/WHO 臨床ガイドライン検索。
|
|
260
|
+
|
|
261
|
+
ToolUniverse:
|
|
262
|
+
Guidelines_search(query=query, source=source)
|
|
263
|
+
Guidelines_get_recommendations(topic_id=topic_id)
|
|
264
|
+
"""
|
|
265
|
+
sources = {
|
|
266
|
+
"uspstf": "https://www.uspreventiveservicestaskforce.org/uspstf/api",
|
|
267
|
+
"who": "https://app.magicapp.org/api",
|
|
268
|
+
}
|
|
269
|
+
base_url = sources.get(source, sources["uspstf"])
|
|
270
|
+
|
|
271
|
+
resp = requests.get(f"{base_url}/search", params={"q": query})
|
|
272
|
+
if resp.status_code == 200:
|
|
273
|
+
data = resp.json()
|
|
274
|
+
results = []
|
|
275
|
+
for item in data.get("results", []):
|
|
276
|
+
results.append({
|
|
277
|
+
"title": item.get("title", ""),
|
|
278
|
+
"grade": item.get("grade", ""),
|
|
279
|
+
"population": item.get("population", ""),
|
|
280
|
+
"date": item.get("date", ""),
|
|
281
|
+
"recommendation": item.get("recommendation", ""),
|
|
282
|
+
})
|
|
283
|
+
df = pd.DataFrame(results)
|
|
284
|
+
else:
|
|
285
|
+
df = pd.DataFrame()
|
|
286
|
+
|
|
287
|
+
print(f"Guidelines ({source}) search '{query}': {len(df)} recommendations")
|
|
288
|
+
return df
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
---
|
|
292
|
+
|
|
293
|
+
## 利用可能ツール
|
|
294
|
+
|
|
295
|
+
| ToolUniverse カテゴリ | 主なツール |
|
|
296
|
+
|---|---|
|
|
297
|
+
| `nhanes` | `NHANES_get_dataset`, `NHANES_list_datasets`, `NHANES_search_variables` |
|
|
298
|
+
| `health_disparities` | `HealthDisparities_search`, `HealthDisparities_get_indicators` |
|
|
299
|
+
| `medlineplus` | `MedlinePlus_search_health_topics`, `MedlinePlus_get_health_topic`, `MedlinePlus_search_drugs`, `MedlinePlus_search_labs`, `MedlinePlus_connect` |
|
|
300
|
+
| `odphp` | `ODPHP_search_topics`, `ODPHP_get_topic` |
|
|
301
|
+
| `rxnorm` | `RxNorm_get_rxcui` |
|
|
302
|
+
| `guidelines_tools` | `Guidelines_search`, `Guidelines_get_recommendations` |
|
|
303
|
+
|
|
304
|
+
## パイプライン出力
|
|
305
|
+
|
|
306
|
+
| 出力ファイル | 説明 | 連携先スキル |
|
|
307
|
+
|---|---|---|
|
|
308
|
+
| `results/nhanes_data.csv` | NHANES 疫学データ | → epidemiology-public-health, survival-clinical |
|
|
309
|
+
| `results/drug_mapping.json` | RxNorm 薬剤マッピング | → pharmacovigilance, pharmacogenomics |
|
|
310
|
+
| `results/health_guidelines.json` | 臨床ガイドライン | → clinical-decision-support |
|
|
311
|
+
| `results/health_disparities.csv` | 健康格差指標 | → epidemiology-public-health, causal-inference |
|
|
312
|
+
|
|
313
|
+
## パイプライン統合
|
|
314
|
+
|
|
315
|
+
```
|
|
316
|
+
epidemiology-public-health ──→ public-health-data ──→ clinical-decision-support
|
|
317
|
+
(RR/OR/DAG) (NHANES/CDC/ODPHP) (GRADE エビデンス)
|
|
318
|
+
│
|
|
319
|
+
├──→ pharmacovigilance (RxNorm + 安全性)
|
|
320
|
+
├──→ pharmacogenomics (RxNorm + PGx)
|
|
321
|
+
└──→ survival-clinical (NHANES コホート)
|
|
322
|
+
```
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-regulatory-genomics
|
|
3
|
+
description: |
|
|
4
|
+
レギュラトリーゲノミクススキル。RegulomeDB バリアント制御機能スコア、
|
|
5
|
+
ReMap 転写因子結合マッピング、4D Nucleome (4DN) 三次元ゲノム構造
|
|
6
|
+
解析の統合パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Regulatory Genomics
|
|
10
|
+
|
|
11
|
+
RegulomeDB / ReMap / 4D Nucleome を統合した
|
|
12
|
+
レギュラトリーゲノミクス (制御領域バリアント解析) パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- 非コード領域バリアントの制御機能を評価するとき
|
|
17
|
+
- RegulomeDB で SNP の調節的影響をスコアリングするとき
|
|
18
|
+
- ReMap で転写因子結合部位のマッピングを確認するとき
|
|
19
|
+
- 4DN データから三次元ゲノム構造 (TAD/ループ) を解析するとき
|
|
20
|
+
- GWAS ヒットの制御メカニズムを解明するとき
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
## 1. RegulomeDB バリアント制御スコア
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import requests
|
|
30
|
+
import pandas as pd
|
|
31
|
+
|
|
32
|
+
REGULOMEDB_API = "https://regulomedb.org/regulome-search"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def score_regulome_variants(variants):
|
|
36
|
+
"""
|
|
37
|
+
RegulomeDB — 非コード領域バリアントの制御機能スコアリング。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
variants: list — バリアントリスト (rsID or chr:pos 形式)
|
|
41
|
+
e.g., ["rs12345", "chr1:109274570"]
|
|
42
|
+
|
|
43
|
+
ToolUniverse:
|
|
44
|
+
RegulomeDB_score_variant(variant=variant)
|
|
45
|
+
"""
|
|
46
|
+
results = []
|
|
47
|
+
for variant in variants:
|
|
48
|
+
params = {"regions": variant, "genome": "GRCh38", "format": "json"}
|
|
49
|
+
resp = requests.get(REGULOMEDB_API, params=params)
|
|
50
|
+
if resp.status_code != 200:
|
|
51
|
+
results.append({"variant": variant, "score": None, "error": True})
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
data = resp.json()
|
|
55
|
+
for hit in data.get("@graph", []):
|
|
56
|
+
results.append({
|
|
57
|
+
"variant": variant,
|
|
58
|
+
"regulome_score": hit.get("regulome_score", {}).get("ranking", ""),
|
|
59
|
+
"probability": hit.get("regulome_score", {}).get("probability", ""),
|
|
60
|
+
"chrom": hit.get("chrom", ""),
|
|
61
|
+
"start": hit.get("start", ""),
|
|
62
|
+
"end": hit.get("end", ""),
|
|
63
|
+
"dnase": hit.get("dnase", ""),
|
|
64
|
+
"proteins_binding": hit.get("proteins_binding", []),
|
|
65
|
+
"motifs": hit.get("motifs", []),
|
|
66
|
+
"eqtls": hit.get("eqtls", []),
|
|
67
|
+
"chromatin_state": hit.get("chromatin_state", {}),
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
df = pd.DataFrame(results)
|
|
71
|
+
if not df.empty and "regulome_score" in df.columns:
|
|
72
|
+
high_func = (df["regulome_score"].astype(str).str.match(r"^[12]")).sum()
|
|
73
|
+
print(f"RegulomeDB: {len(variants)} variants scored, "
|
|
74
|
+
f"{high_func} with high regulatory function (score 1-2)")
|
|
75
|
+
return df
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## 2. ReMap 転写因子結合マッピング
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
REMAP_API = "https://remap.univ-amu.fr/api/v1"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def search_remap_binding(chrom, start, end, genome="hg38"):
|
|
85
|
+
"""
|
|
86
|
+
ReMap — ゲノム領域の転写因子/コレギュレーター結合マッピング。
|
|
87
|
+
|
|
88
|
+
Parameters:
|
|
89
|
+
chrom: str — 染色体 (e.g., "chr1")
|
|
90
|
+
start: int — 開始座標
|
|
91
|
+
end: int — 終了座標
|
|
92
|
+
genome: str — ゲノムアセンブリ ("hg38", "hg19", "mm10")
|
|
93
|
+
|
|
94
|
+
ToolUniverse:
|
|
95
|
+
ReMap_search_peaks(chrom=chrom, start=start, end=end)
|
|
96
|
+
ReMap_get_tf_targets(tf_name=tf_name)
|
|
97
|
+
"""
|
|
98
|
+
params = {
|
|
99
|
+
"chrom": chrom,
|
|
100
|
+
"start": start,
|
|
101
|
+
"end": end,
|
|
102
|
+
"genome": genome,
|
|
103
|
+
}
|
|
104
|
+
resp = requests.get(f"{REMAP_API}/peaks/search", params=params)
|
|
105
|
+
resp.raise_for_status()
|
|
106
|
+
data = resp.json()
|
|
107
|
+
|
|
108
|
+
results = []
|
|
109
|
+
for peak in data.get("peaks", []):
|
|
110
|
+
results.append({
|
|
111
|
+
"tf_name": peak.get("tf_name", ""),
|
|
112
|
+
"biotype": peak.get("biotype", ""),
|
|
113
|
+
"cell_type": peak.get("cell_type", ""),
|
|
114
|
+
"experiment": peak.get("experiment_accession", ""),
|
|
115
|
+
"peak_start": peak.get("start", ""),
|
|
116
|
+
"peak_end": peak.get("end", ""),
|
|
117
|
+
"score": peak.get("score", ""),
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
df = pd.DataFrame(results)
|
|
121
|
+
unique_tfs = df["tf_name"].nunique() if not df.empty else 0
|
|
122
|
+
print(f"ReMap {chrom}:{start}-{end}: {len(df)} peaks, {unique_tfs} unique TFs")
|
|
123
|
+
return df
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def get_remap_tf_targets(tf_name, genome="hg38"):
|
|
127
|
+
"""
|
|
128
|
+
ReMap — 特定転写因子の全結合部位取得。
|
|
129
|
+
|
|
130
|
+
Parameters:
|
|
131
|
+
tf_name: str — 転写因子名 (e.g., "TP53", "CTCF", "STAT3")
|
|
132
|
+
"""
|
|
133
|
+
params = {"tf": tf_name, "genome": genome}
|
|
134
|
+
resp = requests.get(f"{REMAP_API}/peaks/tf", params=params)
|
|
135
|
+
resp.raise_for_status()
|
|
136
|
+
data = resp.json()
|
|
137
|
+
|
|
138
|
+
results = []
|
|
139
|
+
for peak in data.get("peaks", [])[:1000]: # Limit for large TFs
|
|
140
|
+
results.append({
|
|
141
|
+
"chrom": peak.get("chrom", ""),
|
|
142
|
+
"start": peak.get("start", ""),
|
|
143
|
+
"end": peak.get("end", ""),
|
|
144
|
+
"cell_type": peak.get("cell_type", ""),
|
|
145
|
+
"score": peak.get("score", ""),
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
df = pd.DataFrame(results)
|
|
149
|
+
print(f"ReMap TF '{tf_name}': {len(df)} binding sites")
|
|
150
|
+
return df
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## 3. 4D Nucleome (4DN) 三次元ゲノム構造
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
FOURDN_API = "https://data.4dnucleome.org"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def search_4dn_experiments(query, experiment_type=None):
|
|
160
|
+
"""
|
|
161
|
+
4D Nucleome ポータル — 三次元ゲノム実験データ検索。
|
|
162
|
+
|
|
163
|
+
Parameters:
|
|
164
|
+
query: str — 検索クエリ (細胞株名、タンパク質名等)
|
|
165
|
+
experiment_type: str — 実験タイプ ("in situ Hi-C", "SPRITE", "GAM")
|
|
166
|
+
|
|
167
|
+
ToolUniverse:
|
|
168
|
+
FourDN_search_experiments(query=query)
|
|
169
|
+
"""
|
|
170
|
+
params = {
|
|
171
|
+
"searchTerm": query,
|
|
172
|
+
"type": "ExperimentSetReplicate",
|
|
173
|
+
"format": "json",
|
|
174
|
+
}
|
|
175
|
+
if experiment_type:
|
|
176
|
+
params["experiment_type.display_title"] = experiment_type
|
|
177
|
+
|
|
178
|
+
resp = requests.get(f"{FOURDN_API}/search/", params=params)
|
|
179
|
+
resp.raise_for_status()
|
|
180
|
+
data = resp.json()
|
|
181
|
+
|
|
182
|
+
results = []
|
|
183
|
+
for item in data.get("@graph", []):
|
|
184
|
+
results.append({
|
|
185
|
+
"accession": item.get("accession", ""),
|
|
186
|
+
"title": item.get("display_title", ""),
|
|
187
|
+
"experiment_type": item.get("experiment_type", {}).get("display_title", ""),
|
|
188
|
+
"biosource": item.get("biosource_summary", ""),
|
|
189
|
+
"lab": item.get("lab", {}).get("display_title", ""),
|
|
190
|
+
"status": item.get("status", ""),
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
df = pd.DataFrame(results)
|
|
194
|
+
print(f"4DN search '{query}': {len(df)} experiment sets")
|
|
195
|
+
return df
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## 4. 制御バリアント統合解析パイプライン
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
def regulatory_variant_pipeline(variants, genome="hg38"):
|
|
202
|
+
"""
|
|
203
|
+
制御領域バリアント統合解析。
|
|
204
|
+
|
|
205
|
+
Parameters:
|
|
206
|
+
variants: list — バリアントリスト (rsID or chr:pos)
|
|
207
|
+
"""
|
|
208
|
+
print("=" * 60)
|
|
209
|
+
print("Regulatory Variant Analysis Pipeline")
|
|
210
|
+
print("=" * 60)
|
|
211
|
+
|
|
212
|
+
# Step 1: RegulomeDB scoring
|
|
213
|
+
print("\n[1/3] RegulomeDB scoring...")
|
|
214
|
+
regulome_df = score_regulome_variants(variants)
|
|
215
|
+
|
|
216
|
+
# Step 2: ReMap TF binding for high-scoring variants
|
|
217
|
+
print("\n[2/3] ReMap TF binding analysis...")
|
|
218
|
+
remap_results = {}
|
|
219
|
+
for _, row in regulome_df.iterrows():
|
|
220
|
+
if row.get("chrom") and row.get("start"):
|
|
221
|
+
chrom = row["chrom"]
|
|
222
|
+
start = int(row["start"]) - 500
|
|
223
|
+
end = int(row["end"]) + 500
|
|
224
|
+
try:
|
|
225
|
+
remap_df = search_remap_binding(chrom, start, end, genome)
|
|
226
|
+
remap_results[row["variant"]] = remap_df
|
|
227
|
+
except Exception as e:
|
|
228
|
+
print(f" ReMap error for {row['variant']}: {e}")
|
|
229
|
+
|
|
230
|
+
# Step 3: Summary
|
|
231
|
+
print("\n[3/3] Summary")
|
|
232
|
+
summary = {
|
|
233
|
+
"total_variants": len(variants),
|
|
234
|
+
"regulome_scored": len(regulome_df),
|
|
235
|
+
"high_regulatory": (
|
|
236
|
+
regulome_df["regulome_score"].astype(str).str.match(r"^[12]")
|
|
237
|
+
).sum() if "regulome_score" in regulome_df.columns else 0,
|
|
238
|
+
"remap_annotated": len(remap_results),
|
|
239
|
+
}
|
|
240
|
+
print(f" Total: {summary['total_variants']}, "
|
|
241
|
+
f"High regulatory: {summary['high_regulatory']}, "
|
|
242
|
+
f"ReMap annotated: {summary['remap_annotated']}")
|
|
243
|
+
|
|
244
|
+
return {"regulome": regulome_df, "remap": remap_results, "summary": summary}
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
---
|
|
248
|
+
|
|
249
|
+
## 利用可能ツール
|
|
250
|
+
|
|
251
|
+
| ToolUniverse カテゴリ | 主なツール |
|
|
252
|
+
|---|---|
|
|
253
|
+
| `regulomedb` | `RegulomeDB_score_variant` |
|
|
254
|
+
| `remap` | `ReMap_search_peaks`, `ReMap_get_tf_targets` |
|
|
255
|
+
| `fourdn_portal` | `FourDN_search_experiments` |
|
|
256
|
+
|
|
257
|
+
## パイプライン出力
|
|
258
|
+
|
|
259
|
+
| 出力ファイル | 説明 | 連携先スキル |
|
|
260
|
+
|---|---|---|
|
|
261
|
+
| `results/regulome_scores.csv` | バリアント制御スコア | → variant-interpretation, variant-effect-prediction |
|
|
262
|
+
| `results/remap_binding.csv` | TF 結合マッピング | → epigenomics-chromatin, disease-research |
|
|
263
|
+
| `results/4dn_contacts.json` | 3D ゲノム構造データ | → single-cell-genomics, epigenomics-chromatin |
|
|
264
|
+
|
|
265
|
+
## パイプライン統合
|
|
266
|
+
|
|
267
|
+
```
|
|
268
|
+
variant-interpretation ──→ regulatory-genomics ──→ epigenomics-chromatin
|
|
269
|
+
(ACMG/AMP) (RegulomeDB/ReMap/4DN) (ChIP-seq/ATAC)
|
|
270
|
+
│
|
|
271
|
+
├──→ disease-research (GWAS enhancer)
|
|
272
|
+
├──→ gene-expression (eQTL/制御)
|
|
273
|
+
└──→ noncoding-rna (ncRNA 制御)
|
|
274
|
+
```
|