@nahisaho/satori 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,322 @@
1
+ ---
2
+ name: scientific-public-health-data
3
+ description: |
4
+ 公衆衛生データアクセススキル。NHANES 疫学調査データ、MedlinePlus 一般向け
5
+ 健康情報、RxNorm 薬剤標準語彙、ODPHP 健康目標・ガイドライン、
6
+ Health Disparities 健康格差データ統合パイプライン。
7
+ ---
8
+
9
+ # Scientific Public Health Data
10
+
11
+ NHANES / MedlinePlus / RxNorm / ODPHP / Health Disparities /
12
+ Guidelines を統合した公衆衛生データアクセスパイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - NHANES 疫学調査データ (検査値・アンケート) を取得するとき
17
+ - MedlinePlus で一般向け健康情報を検索するとき
18
+ - RxNorm で薬剤名の標準化・マッピングを行うとき
19
+ - ODPHP Healthy People 目標や健康ガイドラインを参照するとき
20
+ - 健康格差 (Health Disparities) データを分析するとき
21
+ - 臨床ガイドライン (USPSTF/WHO) を検索するとき
22
+
23
+ ---
24
+
25
+ ## Quick Start
26
+
27
+ ## 1. NHANES 疫学調査データ取得
28
+
29
+ ```python
30
+ import requests
31
+ import pandas as pd
32
+ import io
33
+
34
+ NHANES_BASE = "https://wwwn.cdc.gov/nchs/nhanes"
35
+
36
+
37
+ def get_nhanes_dataset(cycle, dataset_name):
38
+ """
39
+ NHANES データセット (XPT/SAS 形式) 取得。
40
+
41
+ Parameters:
42
+ cycle: str — 調査サイクル (e.g., "2017-2018", "2019-2020")
43
+ dataset_name: str — データセット名 (e.g., "DEMO_J", "BIOPRO_J")
44
+
45
+ ToolUniverse:
46
+ NHANES_get_dataset(cycle=cycle, dataset=dataset_name)
47
+ NHANES_list_datasets(cycle=cycle)
48
+ """
49
+ cycle_code = cycle.replace("-", "_")
50
+ url = f"{NHANES_BASE}/search/DataPage.aspx"
51
+
52
+ # XPT ファイルの直接ダウンロード
53
+ xpt_url = f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle}/{dataset_name}.XPT"
54
+ resp = requests.get(xpt_url)
55
+ resp.raise_for_status()
56
+
57
+ df = pd.read_sas(io.BytesIO(resp.content), format="xport")
58
+ print(f"NHANES {cycle} {dataset_name}: {df.shape[0]} rows × {df.shape[1]} columns")
59
+ return df
60
+
61
+
62
+ def search_nhanes_variables(keyword):
63
+ """
64
+ NHANES 変数検索。
65
+
66
+ Parameters:
67
+ keyword: str — 変数名/説明の検索語
68
+
69
+ ToolUniverse:
70
+ NHANES_search_variables(keyword=keyword)
71
+ """
72
+ url = f"{NHANES_BASE}/search/variablelist.aspx"
73
+ params = {"SearchTarget": keyword}
74
+ resp = requests.get(url, params=params)
75
+ resp.raise_for_status()
76
+
77
+ print(f"NHANES variable search '{keyword}': response received")
78
+ return resp.text
79
+ ```
80
+
81
+ ## 2. MedlinePlus 健康情報検索
82
+
83
+ ```python
84
+ MEDLINEPLUS_API = "https://connect.medlineplus.gov/service"
85
+ MEDLINEPLUS_WS = "https://wsearch.nlm.nih.gov/ws/query"
86
+
87
+
88
+ def search_medlineplus_health_topics(query, language="English"):
89
+ """
90
+ MedlinePlus 健康トピック検索。
91
+
92
+ ToolUniverse:
93
+ MedlinePlus_search_health_topics(query=query)
94
+ MedlinePlus_get_health_topic(topic_id=topic_id)
95
+ MedlinePlus_search_drugs(query=query)
96
+ MedlinePlus_search_labs(query=query)
97
+ MedlinePlus_connect(code=code, code_system=system)
98
+ """
99
+ params = {
100
+ "db": "healthTopics",
101
+ "term": query,
102
+ }
103
+ resp = requests.get(MEDLINEPLUS_WS, params=params)
104
+ resp.raise_for_status()
105
+
106
+ # XML response parsing
107
+ import xml.etree.ElementTree as ET
108
+ root = ET.fromstring(resp.text)
109
+
110
+ results = []
111
+ for doc in root.findall(".//document"):
112
+ results.append({
113
+ "title": doc.find(".//content[@name='title']").text
114
+ if doc.find(".//content[@name='title']") is not None else "",
115
+ "url": doc.get("url", ""),
116
+ "summary": doc.find(".//content[@name='FullSummary']").text[:300]
117
+ if doc.find(".//content[@name='FullSummary']") is not None else "",
118
+ "rank": doc.get("rank", ""),
119
+ })
120
+
121
+ df = pd.DataFrame(results)
122
+ print(f"MedlinePlus search '{query}': {len(df)} health topics")
123
+ return df
124
+ ```
125
+
126
+ ## 3. RxNorm 薬剤標準語彙
127
+
128
+ ```python
129
+ RXNORM_API = "https://rxnav.nlm.nih.gov/REST"
130
+
131
+
132
+ def rxnorm_lookup(drug_name):
133
+ """
134
+ RxNorm 薬剤名正規化・コードマッピング。
135
+
136
+ Parameters:
137
+ drug_name: str — 薬剤名 (商品名 or 一般名)
138
+
139
+ ToolUniverse:
140
+ RxNorm_get_rxcui(name=drug_name)
141
+ """
142
+ resp = requests.get(
143
+ f"{RXNORM_API}/rxcui.json",
144
+ params={"name": drug_name}
145
+ )
146
+ resp.raise_for_status()
147
+ data = resp.json()
148
+
149
+ rxcui = data.get("idGroup", {}).get("rxnormId", [None])[0]
150
+ if not rxcui:
151
+ print(f"RxNorm: '{drug_name}' not found")
152
+ return None
153
+
154
+ # Get properties
155
+ props_resp = requests.get(f"{RXNORM_API}/rxcui/{rxcui}/properties.json")
156
+ props_resp.raise_for_status()
157
+ props = props_resp.json().get("properties", {})
158
+
159
+ # Get related concepts
160
+ related_resp = requests.get(
161
+ f"{RXNORM_API}/rxcui/{rxcui}/related.json",
162
+ params={"tty": "IN+BN+SBD+SCD"}
163
+ )
164
+ related_resp.raise_for_status()
165
+ related = related_resp.json()
166
+
167
+ result = {
168
+ "rxcui": rxcui,
169
+ "name": props.get("name", ""),
170
+ "tty": props.get("tty", ""),
171
+ "synonym": props.get("synonym", ""),
172
+ "related_concepts": [
173
+ {
174
+ "rxcui": c.get("rxcui"),
175
+ "name": c.get("name"),
176
+ "tty": c.get("tty"),
177
+ }
178
+ for group in related.get("relatedGroup", {}).get("conceptGroup", [])
179
+ for c in group.get("conceptProperties", [])
180
+ ],
181
+ }
182
+ print(f"RxNorm '{drug_name}': RXCUI={rxcui}, TTY={result['tty']}")
183
+ return result
184
+ ```
185
+
186
+ ## 4. Health Disparities データ取得
187
+
188
+ ```python
189
+ HD_API = "https://data.cdc.gov/resource"
190
+
191
+
192
+ def get_health_disparities(indicator, dataset_id="pqnx-3xr5"):
193
+ """
194
+ CDC 健康格差データ取得。
195
+
196
+ Parameters:
197
+ indicator: str — 健康指標名
198
+ dataset_id: str — CDC Socrata データセット ID
199
+
200
+ ToolUniverse:
201
+ HealthDisparities_search(query=indicator)
202
+ HealthDisparities_get_indicators(category=category)
203
+ """
204
+ params = {
205
+ "$where": f"indicator LIKE '%{indicator}%'",
206
+ "$limit": 1000,
207
+ }
208
+ resp = requests.get(f"{HD_API}/{dataset_id}.json", params=params)
209
+ resp.raise_for_status()
210
+ data = resp.json()
211
+
212
+ df = pd.DataFrame(data)
213
+ print(f"Health Disparities '{indicator}': {len(df)} records")
214
+ return df
215
+ ```
216
+
217
+ ## 5. ODPHP 健康ガイドライン
218
+
219
+ ```python
220
+ ODPHP_API = "https://health.gov/myhealthfinder/api/v3"
221
+
222
+
223
+ def search_health_guidelines(keyword, category=None):
224
+ """
225
+ ODPHP MyHealthfinder ガイドライン検索。
226
+
227
+ ToolUniverse:
228
+ ODPHP_search_topics(keyword=keyword)
229
+ ODPHP_get_topic(topic_id=topic_id)
230
+ """
231
+ params = {"keyword": keyword}
232
+ if category:
233
+ params["categoryId"] = category
234
+ resp = requests.get(f"{ODPHP_API}/topicsearch.json", params=params)
235
+ resp.raise_for_status()
236
+ data = resp.json()
237
+
238
+ results = []
239
+ for topic in data.get("Result", {}).get("Resources", {}).get("Resource", []):
240
+ results.append({
241
+ "title": topic.get("Title", ""),
242
+ "categories": topic.get("Categories", ""),
243
+ "url": topic.get("AccessibleVersion", ""),
244
+ "sections": [
245
+ s.get("Title", "") for s in topic.get("Sections", {}).get("section", [])
246
+ ],
247
+ })
248
+
249
+ df = pd.DataFrame(results)
250
+ print(f"ODPHP search '{keyword}': {len(df)} guidelines")
251
+ return df
252
+ ```
253
+
254
+ ## 6. 臨床ガイドライン検索 (USPSTF)
255
+
256
+ ```python
257
+ def search_clinical_guidelines(query, source="uspstf"):
258
+ """
259
+ USPSTF/WHO 臨床ガイドライン検索。
260
+
261
+ ToolUniverse:
262
+ Guidelines_search(query=query, source=source)
263
+ Guidelines_get_recommendations(topic_id=topic_id)
264
+ """
265
+ sources = {
266
+ "uspstf": "https://www.uspreventiveservicestaskforce.org/uspstf/api",
267
+ "who": "https://app.magicapp.org/api",
268
+ }
269
+ base_url = sources.get(source, sources["uspstf"])
270
+
271
+ resp = requests.get(f"{base_url}/search", params={"q": query})
272
+ if resp.status_code == 200:
273
+ data = resp.json()
274
+ results = []
275
+ for item in data.get("results", []):
276
+ results.append({
277
+ "title": item.get("title", ""),
278
+ "grade": item.get("grade", ""),
279
+ "population": item.get("population", ""),
280
+ "date": item.get("date", ""),
281
+ "recommendation": item.get("recommendation", ""),
282
+ })
283
+ df = pd.DataFrame(results)
284
+ else:
285
+ df = pd.DataFrame()
286
+
287
+ print(f"Guidelines ({source}) search '{query}': {len(df)} recommendations")
288
+ return df
289
+ ```
290
+
291
+ ---
292
+
293
+ ## 利用可能ツール
294
+
295
+ | ToolUniverse カテゴリ | 主なツール |
296
+ |---|---|
297
+ | `nhanes` | `NHANES_get_dataset`, `NHANES_list_datasets`, `NHANES_search_variables` |
298
+ | `health_disparities` | `HealthDisparities_search`, `HealthDisparities_get_indicators` |
299
+ | `medlineplus` | `MedlinePlus_search_health_topics`, `MedlinePlus_get_health_topic`, `MedlinePlus_search_drugs`, `MedlinePlus_search_labs`, `MedlinePlus_connect` |
300
+ | `odphp` | `ODPHP_search_topics`, `ODPHP_get_topic` |
301
+ | `rxnorm` | `RxNorm_get_rxcui` |
302
+ | `guidelines_tools` | `Guidelines_search`, `Guidelines_get_recommendations` |
303
+
304
+ ## パイプライン出力
305
+
306
+ | 出力ファイル | 説明 | 連携先スキル |
307
+ |---|---|---|
308
+ | `results/nhanes_data.csv` | NHANES 疫学データ | → epidemiology-public-health, survival-clinical |
309
+ | `results/drug_mapping.json` | RxNorm 薬剤マッピング | → pharmacovigilance, pharmacogenomics |
310
+ | `results/health_guidelines.json` | 臨床ガイドライン | → clinical-decision-support |
311
+ | `results/health_disparities.csv` | 健康格差指標 | → epidemiology-public-health, causal-inference |
312
+
313
+ ## パイプライン統合
314
+
315
+ ```
316
+ epidemiology-public-health ──→ public-health-data ──→ clinical-decision-support
317
+ (RR/OR/DAG) (NHANES/CDC/ODPHP) (GRADE エビデンス)
318
+
319
+ ├──→ pharmacovigilance (RxNorm + 安全性)
320
+ ├──→ pharmacogenomics (RxNorm + PGx)
321
+ └──→ survival-clinical (NHANES コホート)
322
+ ```
@@ -0,0 +1,274 @@
1
+ ---
2
+ name: scientific-regulatory-genomics
3
+ description: |
4
+ レギュラトリーゲノミクススキル。RegulomeDB バリアント制御機能スコア、
5
+ ReMap 転写因子結合マッピング、4D Nucleome (4DN) 三次元ゲノム構造
6
+ 解析の統合パイプライン。
7
+ ---
8
+
9
+ # Scientific Regulatory Genomics
10
+
11
+ RegulomeDB / ReMap / 4D Nucleome を統合した
12
+ レギュラトリーゲノミクス (制御領域バリアント解析) パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - 非コード領域バリアントの制御機能を評価するとき
17
+ - RegulomeDB で SNP の調節的影響をスコアリングするとき
18
+ - ReMap で転写因子結合部位のマッピングを確認するとき
19
+ - 4DN データから三次元ゲノム構造 (TAD/ループ) を解析するとき
20
+ - GWAS ヒットの制御メカニズムを解明するとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. RegulomeDB バリアント制御スコア
27
+
28
+ ```python
29
+ import requests
30
+ import pandas as pd
31
+
32
+ REGULOMEDB_API = "https://regulomedb.org/regulome-search"
33
+
34
+
35
+ def score_regulome_variants(variants):
36
+ """
37
+ RegulomeDB — 非コード領域バリアントの制御機能スコアリング。
38
+
39
+ Parameters:
40
+ variants: list — バリアントリスト (rsID or chr:pos 形式)
41
+ e.g., ["rs12345", "chr1:109274570"]
42
+
43
+ ToolUniverse:
44
+ RegulomeDB_score_variant(variant=variant)
45
+ """
46
+ results = []
47
+ for variant in variants:
48
+ params = {"regions": variant, "genome": "GRCh38", "format": "json"}
49
+ resp = requests.get(REGULOMEDB_API, params=params)
50
+ if resp.status_code != 200:
51
+ results.append({"variant": variant, "score": None, "error": True})
52
+ continue
53
+
54
+ data = resp.json()
55
+ for hit in data.get("@graph", []):
56
+ results.append({
57
+ "variant": variant,
58
+ "regulome_score": hit.get("regulome_score", {}).get("ranking", ""),
59
+ "probability": hit.get("regulome_score", {}).get("probability", ""),
60
+ "chrom": hit.get("chrom", ""),
61
+ "start": hit.get("start", ""),
62
+ "end": hit.get("end", ""),
63
+ "dnase": hit.get("dnase", ""),
64
+ "proteins_binding": hit.get("proteins_binding", []),
65
+ "motifs": hit.get("motifs", []),
66
+ "eqtls": hit.get("eqtls", []),
67
+ "chromatin_state": hit.get("chromatin_state", {}),
68
+ })
69
+
70
+ df = pd.DataFrame(results)
71
+ if not df.empty and "regulome_score" in df.columns:
72
+ high_func = (df["regulome_score"].astype(str).str.match(r"^[12]")).sum()
73
+ print(f"RegulomeDB: {len(variants)} variants scored, "
74
+ f"{high_func} with high regulatory function (score 1-2)")
75
+ return df
76
+ ```
77
+
78
+ ## 2. ReMap 転写因子結合マッピング
79
+
80
+ ```python
81
+ REMAP_API = "https://remap.univ-amu.fr/api/v1"
82
+
83
+
84
+ def search_remap_binding(chrom, start, end, genome="hg38"):
85
+ """
86
+ ReMap — ゲノム領域の転写因子/コレギュレーター結合マッピング。
87
+
88
+ Parameters:
89
+ chrom: str — 染色体 (e.g., "chr1")
90
+ start: int — 開始座標
91
+ end: int — 終了座標
92
+ genome: str — ゲノムアセンブリ ("hg38", "hg19", "mm10")
93
+
94
+ ToolUniverse:
95
+ ReMap_search_peaks(chrom=chrom, start=start, end=end)
96
+ ReMap_get_tf_targets(tf_name=tf_name)
97
+ """
98
+ params = {
99
+ "chrom": chrom,
100
+ "start": start,
101
+ "end": end,
102
+ "genome": genome,
103
+ }
104
+ resp = requests.get(f"{REMAP_API}/peaks/search", params=params)
105
+ resp.raise_for_status()
106
+ data = resp.json()
107
+
108
+ results = []
109
+ for peak in data.get("peaks", []):
110
+ results.append({
111
+ "tf_name": peak.get("tf_name", ""),
112
+ "biotype": peak.get("biotype", ""),
113
+ "cell_type": peak.get("cell_type", ""),
114
+ "experiment": peak.get("experiment_accession", ""),
115
+ "peak_start": peak.get("start", ""),
116
+ "peak_end": peak.get("end", ""),
117
+ "score": peak.get("score", ""),
118
+ })
119
+
120
+ df = pd.DataFrame(results)
121
+ unique_tfs = df["tf_name"].nunique() if not df.empty else 0
122
+ print(f"ReMap {chrom}:{start}-{end}: {len(df)} peaks, {unique_tfs} unique TFs")
123
+ return df
124
+
125
+
126
+ def get_remap_tf_targets(tf_name, genome="hg38"):
127
+ """
128
+ ReMap — 特定転写因子の全結合部位取得。
129
+
130
+ Parameters:
131
+ tf_name: str — 転写因子名 (e.g., "TP53", "CTCF", "STAT3")
132
+ """
133
+ params = {"tf": tf_name, "genome": genome}
134
+ resp = requests.get(f"{REMAP_API}/peaks/tf", params=params)
135
+ resp.raise_for_status()
136
+ data = resp.json()
137
+
138
+ results = []
139
+ for peak in data.get("peaks", [])[:1000]: # Limit for large TFs
140
+ results.append({
141
+ "chrom": peak.get("chrom", ""),
142
+ "start": peak.get("start", ""),
143
+ "end": peak.get("end", ""),
144
+ "cell_type": peak.get("cell_type", ""),
145
+ "score": peak.get("score", ""),
146
+ })
147
+
148
+ df = pd.DataFrame(results)
149
+ print(f"ReMap TF '{tf_name}': {len(df)} binding sites")
150
+ return df
151
+ ```
152
+
153
+ ## 3. 4D Nucleome (4DN) 三次元ゲノム構造
154
+
155
+ ```python
156
+ FOURDN_API = "https://data.4dnucleome.org"
157
+
158
+
159
+ def search_4dn_experiments(query, experiment_type=None):
160
+ """
161
+ 4D Nucleome ポータル — 三次元ゲノム実験データ検索。
162
+
163
+ Parameters:
164
+ query: str — 検索クエリ (細胞株名、タンパク質名等)
165
+ experiment_type: str — 実験タイプ ("in situ Hi-C", "SPRITE", "GAM")
166
+
167
+ ToolUniverse:
168
+ FourDN_search_experiments(query=query)
169
+ """
170
+ params = {
171
+ "searchTerm": query,
172
+ "type": "ExperimentSetReplicate",
173
+ "format": "json",
174
+ }
175
+ if experiment_type:
176
+ params["experiment_type.display_title"] = experiment_type
177
+
178
+ resp = requests.get(f"{FOURDN_API}/search/", params=params)
179
+ resp.raise_for_status()
180
+ data = resp.json()
181
+
182
+ results = []
183
+ for item in data.get("@graph", []):
184
+ results.append({
185
+ "accession": item.get("accession", ""),
186
+ "title": item.get("display_title", ""),
187
+ "experiment_type": item.get("experiment_type", {}).get("display_title", ""),
188
+ "biosource": item.get("biosource_summary", ""),
189
+ "lab": item.get("lab", {}).get("display_title", ""),
190
+ "status": item.get("status", ""),
191
+ })
192
+
193
+ df = pd.DataFrame(results)
194
+ print(f"4DN search '{query}': {len(df)} experiment sets")
195
+ return df
196
+ ```
197
+
198
+ ## 4. 制御バリアント統合解析パイプライン
199
+
200
+ ```python
201
+ def regulatory_variant_pipeline(variants, genome="hg38"):
202
+ """
203
+ 制御領域バリアント統合解析。
204
+
205
+ Parameters:
206
+ variants: list — バリアントリスト (rsID or chr:pos)
207
+ """
208
+ print("=" * 60)
209
+ print("Regulatory Variant Analysis Pipeline")
210
+ print("=" * 60)
211
+
212
+ # Step 1: RegulomeDB scoring
213
+ print("\n[1/3] RegulomeDB scoring...")
214
+ regulome_df = score_regulome_variants(variants)
215
+
216
+ # Step 2: ReMap TF binding for high-scoring variants
217
+ print("\n[2/3] ReMap TF binding analysis...")
218
+ remap_results = {}
219
+ for _, row in regulome_df.iterrows():
220
+ if row.get("chrom") and row.get("start"):
221
+ chrom = row["chrom"]
222
+ start = int(row["start"]) - 500
223
+ end = int(row["end"]) + 500
224
+ try:
225
+ remap_df = search_remap_binding(chrom, start, end, genome)
226
+ remap_results[row["variant"]] = remap_df
227
+ except Exception as e:
228
+ print(f" ReMap error for {row['variant']}: {e}")
229
+
230
+ # Step 3: Summary
231
+ print("\n[3/3] Summary")
232
+ summary = {
233
+ "total_variants": len(variants),
234
+ "regulome_scored": len(regulome_df),
235
+ "high_regulatory": (
236
+ regulome_df["regulome_score"].astype(str).str.match(r"^[12]")
237
+ ).sum() if "regulome_score" in regulome_df.columns else 0,
238
+ "remap_annotated": len(remap_results),
239
+ }
240
+ print(f" Total: {summary['total_variants']}, "
241
+ f"High regulatory: {summary['high_regulatory']}, "
242
+ f"ReMap annotated: {summary['remap_annotated']}")
243
+
244
+ return {"regulome": regulome_df, "remap": remap_results, "summary": summary}
245
+ ```
246
+
247
+ ---
248
+
249
+ ## 利用可能ツール
250
+
251
+ | ToolUniverse カテゴリ | 主なツール |
252
+ |---|---|
253
+ | `regulomedb` | `RegulomeDB_score_variant` |
254
+ | `remap` | `ReMap_search_peaks`, `ReMap_get_tf_targets` |
255
+ | `fourdn_portal` | `FourDN_search_experiments` |
256
+
257
+ ## パイプライン出力
258
+
259
+ | 出力ファイル | 説明 | 連携先スキル |
260
+ |---|---|---|
261
+ | `results/regulome_scores.csv` | バリアント制御スコア | → variant-interpretation, variant-effect-prediction |
262
+ | `results/remap_binding.csv` | TF 結合マッピング | → epigenomics-chromatin, disease-research |
263
+ | `results/4dn_contacts.json` | 3D ゲノム構造データ | → single-cell-genomics, epigenomics-chromatin |
264
+
265
+ ## パイプライン統合
266
+
267
+ ```
268
+ variant-interpretation ──→ regulatory-genomics ──→ epigenomics-chromatin
269
+ (ACMG/AMP) (RegulomeDB/ReMap/4DN) (ChIP-seq/ATAC)
270
+
271
+ ├──→ disease-research (GWAS enhancer)
272
+ ├──→ gene-expression (eQTL/制御)
273
+ └──→ noncoding-rna (ncRNA 制御)
274
+ ```