@nahisaho/satori 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -23
- package/package.json +1 -1
- package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +4 -0
- package/src/.github/skills/scientific-cellxgene-census/SKILL.md +257 -0
- package/src/.github/skills/scientific-clingen-curation/SKILL.md +258 -0
- package/src/.github/skills/scientific-clinical-nlp/SKILL.md +250 -0
- package/src/.github/skills/scientific-drug-repurposing/SKILL.md +4 -0
- package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +5 -2
- package/src/.github/skills/scientific-hgnc-nomenclature/SKILL.md +282 -0
- package/src/.github/skills/scientific-human-cell-atlas/SKILL.md +3 -0
- package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolomics-network/SKILL.md +311 -0
- package/src/.github/skills/scientific-pharmacogenomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-pharos-targets/SKILL.md +276 -0
- package/src/.github/skills/scientific-protein-structure-analysis/SKILL.md +4 -0
- package/src/.github/skills/scientific-variant-effect-prediction/SKILL.md +7 -0
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-clingen-curation
|
|
3
|
+
description: |
|
|
4
|
+
ClinGen 臨床ゲノム資源キュレーションスキル。ClinGen API に
|
|
5
|
+
よる遺伝子-疾患バリディティ、臨床アクショナビリティ、
|
|
6
|
+
投与量感受性、バリアントレベルエビデンス評価パイプライン。
|
|
7
|
+
ToolUniverse 連携: clingen。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: clingen
|
|
10
|
+
name: ClinGen
|
|
11
|
+
description: ClinGen 臨床ゲノムリソース キュレーションデータ
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Scientific ClinGen Curation
|
|
15
|
+
|
|
16
|
+
ClinGen (Clinical Genome Resource) API を活用した
|
|
17
|
+
遺伝子-疾患バリディティ分類・臨床アクショナビリティ
|
|
18
|
+
スコアリング・投与量感受性評価・バリアントキュレーション
|
|
19
|
+
パイプラインを提供する。
|
|
20
|
+
|
|
21
|
+
## When to Use
|
|
22
|
+
|
|
23
|
+
- 遺伝子-疾患関連のエビデンスレベルを評価するとき
|
|
24
|
+
- 臨床アクショナビリティ (介入可能性) を判定するとき
|
|
25
|
+
- ハプロ不全/トリプロ感受性を評価するとき
|
|
26
|
+
- ClinGen キュレーション済みバリアント分類を取得するとき
|
|
27
|
+
- ACMG ガイドラインに基づくバリアント解釈を行うとき
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
## 1. 遺伝子-疾患バリディティ
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import requests
|
|
37
|
+
import pandas as pd
|
|
38
|
+
|
|
39
|
+
CLINGEN_BASE = "https://search.clinicalgenome.org/kb"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def clingen_gene_validity(gene_symbol):
|
|
43
|
+
"""
|
|
44
|
+
ClinGen — 遺伝子-疾患バリディティ分類取得。
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
gene_symbol: str — 遺伝子シンボル (例: "BRCA1")
|
|
48
|
+
"""
|
|
49
|
+
url = (f"{CLINGEN_BASE}/gene-validity/"
|
|
50
|
+
f"?search={gene_symbol}&format=json")
|
|
51
|
+
resp = requests.get(url, timeout=30)
|
|
52
|
+
resp.raise_for_status()
|
|
53
|
+
data = resp.json()
|
|
54
|
+
|
|
55
|
+
results = data if isinstance(data, list) else \
|
|
56
|
+
data.get("results", [])
|
|
57
|
+
|
|
58
|
+
rows = []
|
|
59
|
+
for item in results:
|
|
60
|
+
rows.append({
|
|
61
|
+
"gene": item.get("gene", {}).get(
|
|
62
|
+
"symbol", gene_symbol),
|
|
63
|
+
"disease": item.get("disease", {}).get(
|
|
64
|
+
"label", ""),
|
|
65
|
+
"classification": item.get(
|
|
66
|
+
"classification", ""),
|
|
67
|
+
"moi": item.get("moi", ""),
|
|
68
|
+
"sop": item.get("sopVersion", ""),
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
df = pd.DataFrame(rows)
|
|
72
|
+
print(f"ClinGen validity: {gene_symbol} → "
|
|
73
|
+
f"{len(df)} gene-disease pairs")
|
|
74
|
+
return df
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def clingen_gene_validity_batch(gene_symbols):
|
|
78
|
+
"""
|
|
79
|
+
ClinGen — 複数遺伝子バリディティバッチ取得。
|
|
80
|
+
|
|
81
|
+
Parameters:
|
|
82
|
+
gene_symbols: list[str] — 遺伝子シンボルリスト
|
|
83
|
+
"""
|
|
84
|
+
all_results = []
|
|
85
|
+
for sym in gene_symbols:
|
|
86
|
+
df = clingen_gene_validity(sym)
|
|
87
|
+
if not df.empty:
|
|
88
|
+
all_results.append(df)
|
|
89
|
+
if all_results:
|
|
90
|
+
combined = pd.concat(all_results,
|
|
91
|
+
ignore_index=True)
|
|
92
|
+
cls_dist = combined["classification"].value_counts()
|
|
93
|
+
print(f"Validity distribution: "
|
|
94
|
+
f"{cls_dist.to_dict()}")
|
|
95
|
+
return combined
|
|
96
|
+
return pd.DataFrame()
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## 2. 投与量感受性
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
def clingen_dosage_sensitivity(gene_symbol):
|
|
103
|
+
"""
|
|
104
|
+
ClinGen — 投与量感受性 (haplo/triplo) 評価取得。
|
|
105
|
+
|
|
106
|
+
Parameters:
|
|
107
|
+
gene_symbol: str — 遺伝子シンボル
|
|
108
|
+
"""
|
|
109
|
+
url = (f"{CLINGEN_BASE}/gene-dosage/"
|
|
110
|
+
f"?search={gene_symbol}&format=json")
|
|
111
|
+
resp = requests.get(url, timeout=30)
|
|
112
|
+
resp.raise_for_status()
|
|
113
|
+
data = resp.json()
|
|
114
|
+
|
|
115
|
+
results = data if isinstance(data, list) else \
|
|
116
|
+
data.get("results", [])
|
|
117
|
+
|
|
118
|
+
rows = []
|
|
119
|
+
for item in results:
|
|
120
|
+
rows.append({
|
|
121
|
+
"gene": item.get("gene", {}).get(
|
|
122
|
+
"symbol", gene_symbol),
|
|
123
|
+
"haplo_score": item.get(
|
|
124
|
+
"haploinsufficiency", {}).get(
|
|
125
|
+
"score", ""),
|
|
126
|
+
"haplo_label": item.get(
|
|
127
|
+
"haploinsufficiency", {}).get(
|
|
128
|
+
"label", ""),
|
|
129
|
+
"triplo_score": item.get(
|
|
130
|
+
"triplosensitivity", {}).get(
|
|
131
|
+
"score", ""),
|
|
132
|
+
"triplo_label": item.get(
|
|
133
|
+
"triplosensitivity", {}).get(
|
|
134
|
+
"label", ""),
|
|
135
|
+
})
|
|
136
|
+
|
|
137
|
+
df = pd.DataFrame(rows)
|
|
138
|
+
print(f"ClinGen dosage: {gene_symbol} → "
|
|
139
|
+
f"{len(df)} entries")
|
|
140
|
+
return df
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## 3. 臨床アクショナビリティ
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
def clingen_actionability(gene_symbol):
|
|
147
|
+
"""
|
|
148
|
+
ClinGen — 臨床アクショナビリティスコア取得。
|
|
149
|
+
|
|
150
|
+
Parameters:
|
|
151
|
+
gene_symbol: str — 遺伝子シンボル
|
|
152
|
+
"""
|
|
153
|
+
url = (f"{CLINGEN_BASE}/actionability/"
|
|
154
|
+
f"?search={gene_symbol}&format=json")
|
|
155
|
+
resp = requests.get(url, timeout=30)
|
|
156
|
+
resp.raise_for_status()
|
|
157
|
+
data = resp.json()
|
|
158
|
+
|
|
159
|
+
results = data if isinstance(data, list) else \
|
|
160
|
+
data.get("results", [])
|
|
161
|
+
|
|
162
|
+
rows = []
|
|
163
|
+
for item in results:
|
|
164
|
+
rows.append({
|
|
165
|
+
"gene": item.get("gene", {}).get(
|
|
166
|
+
"symbol", gene_symbol),
|
|
167
|
+
"disease": item.get("disease", {}).get(
|
|
168
|
+
"label", ""),
|
|
169
|
+
"classification": item.get(
|
|
170
|
+
"classification", ""),
|
|
171
|
+
"date": item.get("date", ""),
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
df = pd.DataFrame(rows)
|
|
175
|
+
print(f"ClinGen actionability: {gene_symbol} → "
|
|
176
|
+
f"{len(df)} entries")
|
|
177
|
+
return df
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## 4. ClinGen 統合パイプライン
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
def clingen_pipeline(gene_symbols,
|
|
184
|
+
output_dir="results"):
|
|
185
|
+
"""
|
|
186
|
+
ClinGen 統合キュレーションパイプライン。
|
|
187
|
+
|
|
188
|
+
Parameters:
|
|
189
|
+
gene_symbols: list[str] — 遺伝子シンボルリスト
|
|
190
|
+
output_dir: str — 出力ディレクトリ
|
|
191
|
+
"""
|
|
192
|
+
from pathlib import Path
|
|
193
|
+
output_dir = Path(output_dir)
|
|
194
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
195
|
+
|
|
196
|
+
# 1) Gene-disease validity
|
|
197
|
+
validity_df = clingen_gene_validity_batch(
|
|
198
|
+
gene_symbols)
|
|
199
|
+
if not validity_df.empty:
|
|
200
|
+
validity_df.to_csv(
|
|
201
|
+
output_dir / "clingen_validity.csv",
|
|
202
|
+
index=False)
|
|
203
|
+
|
|
204
|
+
# 2) Dosage sensitivity
|
|
205
|
+
dosage_results = []
|
|
206
|
+
for sym in gene_symbols:
|
|
207
|
+
dos = clingen_dosage_sensitivity(sym)
|
|
208
|
+
if not dos.empty:
|
|
209
|
+
dosage_results.append(dos)
|
|
210
|
+
if dosage_results:
|
|
211
|
+
dosage_df = pd.concat(dosage_results,
|
|
212
|
+
ignore_index=True)
|
|
213
|
+
dosage_df.to_csv(
|
|
214
|
+
output_dir / "clingen_dosage.csv",
|
|
215
|
+
index=False)
|
|
216
|
+
|
|
217
|
+
# 3) Actionability
|
|
218
|
+
action_results = []
|
|
219
|
+
for sym in gene_symbols:
|
|
220
|
+
act = clingen_actionability(sym)
|
|
221
|
+
if not act.empty:
|
|
222
|
+
action_results.append(act)
|
|
223
|
+
if action_results:
|
|
224
|
+
action_df = pd.concat(action_results,
|
|
225
|
+
ignore_index=True)
|
|
226
|
+
action_df.to_csv(
|
|
227
|
+
output_dir / "clingen_actionability.csv",
|
|
228
|
+
index=False)
|
|
229
|
+
|
|
230
|
+
print(f"ClinGen pipeline → {output_dir}")
|
|
231
|
+
return {"validity": validity_df}
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
## ToolUniverse 連携
|
|
237
|
+
|
|
238
|
+
| TU Key | ツール名 | 連携内容 |
|
|
239
|
+
|--------|---------|---------|
|
|
240
|
+
| `clingen` | ClinGen | ClinGen 臨床ゲノムリソース キュレーションデータ |
|
|
241
|
+
|
|
242
|
+
## パイプライン統合
|
|
243
|
+
|
|
244
|
+
```
|
|
245
|
+
variant-interpretation → clingen-curation → clinical-decision-support
|
|
246
|
+
(ClinVar/ACMG) (GDV/DOS/ACT) (臨床判断支援)
|
|
247
|
+
│ │ ↓
|
|
248
|
+
variant-effect-prediction ─┘ pharmacogenomics
|
|
249
|
+
(SpliceAI/CADD) (PGx 処方)
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
## パイプライン出力
|
|
253
|
+
|
|
254
|
+
| ファイル | 説明 | 次スキル |
|
|
255
|
+
|---------|------|---------|
|
|
256
|
+
| `results/clingen_validity.csv` | 遺伝子-疾患バリディティ | → genetic-counseling |
|
|
257
|
+
| `results/clingen_dosage.csv` | 投与量感受性 | → cnv-analysis |
|
|
258
|
+
| `results/clingen_actionability.csv` | 臨床介入可能性 | → precision-medicine |
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-clinical-nlp
|
|
3
|
+
description: |
|
|
4
|
+
臨床自然言語処理スキル。MedSpaCy / cTAKES / scispaCy
|
|
5
|
+
による臨床テキスト NER、セクション検出、否定文検出、
|
|
6
|
+
ICD-10/SNOMED-CT エンティティリンキング、
|
|
7
|
+
匿名化 (De-identification) パイプライン。
|
|
8
|
+
TU 外スキル (直接 Python ライブラリ)。
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Scientific Clinical NLP
|
|
12
|
+
|
|
13
|
+
MedSpaCy・scispaCy を中心とした臨床テキスト自然言語処理
|
|
14
|
+
パイプラインを提供する。電子カルテテキストからの臨床エンティティ
|
|
15
|
+
抽出・否定文検出 (NegEx)・セクション検出・標準用語へのリンキング
|
|
16
|
+
を行う。
|
|
17
|
+
|
|
18
|
+
## When to Use
|
|
19
|
+
|
|
20
|
+
- 電子カルテ / 臨床ノートから疾患・薬剤・症状を抽出するとき
|
|
21
|
+
- 臨床テキストの否定文 (NegEx/ConText) を検出するとき
|
|
22
|
+
- テキストセクション (主訴/HPI/Assessment/Plan) を分類するとき
|
|
23
|
+
- ICD-10 / SNOMED-CT コードへのリンキングを行うとき
|
|
24
|
+
- PHI 匿名化 (De-identification) を実施するとき
|
|
25
|
+
- バイオメディカル文献テキストマイニングとの連携
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
## 1. MedSpaCy 臨床 NER
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import medspacy
|
|
35
|
+
from medspacy.ner import TargetRule
|
|
36
|
+
from medspacy.visualization import visualize_ent
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def clinical_ner(text, rules=None):
|
|
40
|
+
"""
|
|
41
|
+
MedSpaCy — 臨床テキスト NER パイプライン。
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
text: str — 臨床テキスト
|
|
45
|
+
rules: list[dict] | None — カスタムルール
|
|
46
|
+
"""
|
|
47
|
+
nlp = medspacy.load(
|
|
48
|
+
enable=["medspacy_pyrush",
|
|
49
|
+
"medspacy_target_matcher",
|
|
50
|
+
"medspacy_context"])
|
|
51
|
+
|
|
52
|
+
if rules:
|
|
53
|
+
target_matcher = nlp.get_pipe(
|
|
54
|
+
"medspacy_target_matcher")
|
|
55
|
+
for r in rules:
|
|
56
|
+
target_matcher.add(TargetRule(
|
|
57
|
+
literal=r["literal"],
|
|
58
|
+
category=r.get("category",
|
|
59
|
+
"CONDITION")))
|
|
60
|
+
|
|
61
|
+
doc = nlp(text)
|
|
62
|
+
|
|
63
|
+
entities = []
|
|
64
|
+
for ent in doc.ents:
|
|
65
|
+
entities.append({
|
|
66
|
+
"text": ent.text,
|
|
67
|
+
"label": ent.label_,
|
|
68
|
+
"start": ent.start_char,
|
|
69
|
+
"end": ent.end_char,
|
|
70
|
+
"is_negated": ent._.is_negated,
|
|
71
|
+
"is_uncertain": ent._.is_uncertain,
|
|
72
|
+
"is_historical": ent._.is_historical,
|
|
73
|
+
"is_family": ent._.is_family,
|
|
74
|
+
})
|
|
75
|
+
|
|
76
|
+
n_neg = sum(1 for e in entities
|
|
77
|
+
if e["is_negated"])
|
|
78
|
+
print(f"Clinical NER: {len(entities)} entities, "
|
|
79
|
+
f"{n_neg} negated")
|
|
80
|
+
return entities
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def clinical_ner_batch(texts, rules=None):
|
|
84
|
+
"""
|
|
85
|
+
MedSpaCy — バッチ臨床 NER。
|
|
86
|
+
|
|
87
|
+
Parameters:
|
|
88
|
+
texts: list[str] — 臨床テキストリスト
|
|
89
|
+
rules: list[dict] | None — カスタムルール
|
|
90
|
+
"""
|
|
91
|
+
all_entities = []
|
|
92
|
+
for i, text in enumerate(texts):
|
|
93
|
+
ents = clinical_ner(text, rules)
|
|
94
|
+
for e in ents:
|
|
95
|
+
e["doc_id"] = i
|
|
96
|
+
all_entities.extend(ents)
|
|
97
|
+
|
|
98
|
+
import pandas as pd
|
|
99
|
+
df = pd.DataFrame(all_entities)
|
|
100
|
+
print(f"Batch NER: {len(texts)} docs, "
|
|
101
|
+
f"{len(df)} total entities")
|
|
102
|
+
return df
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## 2. セクション検出
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
def clinical_section_detect(text):
|
|
109
|
+
"""
|
|
110
|
+
MedSpaCy — 臨床テキストセクション検出。
|
|
111
|
+
|
|
112
|
+
Parameters:
|
|
113
|
+
text: str — 臨床テキスト
|
|
114
|
+
"""
|
|
115
|
+
import medspacy
|
|
116
|
+
nlp = medspacy.load(
|
|
117
|
+
enable=["medspacy_pyrush",
|
|
118
|
+
"medspacy_sectionizer"])
|
|
119
|
+
|
|
120
|
+
doc = nlp(text)
|
|
121
|
+
|
|
122
|
+
sections = []
|
|
123
|
+
for section in doc._.sections:
|
|
124
|
+
sections.append({
|
|
125
|
+
"category": section.category,
|
|
126
|
+
"title": (section.title_span.text
|
|
127
|
+
if section.title_span else ""),
|
|
128
|
+
"body": (section.body_span.text[:200]
|
|
129
|
+
if section.body_span else ""),
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
print(f"Sections detected: {len(sections)}")
|
|
133
|
+
for s in sections:
|
|
134
|
+
print(f" [{s['category']}] "
|
|
135
|
+
f"{s['title'][:50]}")
|
|
136
|
+
return sections
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## 3. SNOMED-CT / ICD-10 リンキング
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
def clinical_entity_linking(text,
|
|
143
|
+
linker_name="umls"):
|
|
144
|
+
"""
|
|
145
|
+
scispaCy — 臨床エンティティの UMLS/SNOMED リンキング。
|
|
146
|
+
|
|
147
|
+
Parameters:
|
|
148
|
+
text: str — 臨床テキスト
|
|
149
|
+
linker_name: str — リンカー ("umls", "mesh",
|
|
150
|
+
"snomed")
|
|
151
|
+
"""
|
|
152
|
+
import spacy
|
|
153
|
+
import scispacy
|
|
154
|
+
from scispacy.linking import EntityLinker
|
|
155
|
+
|
|
156
|
+
nlp = spacy.load("en_core_sci_md")
|
|
157
|
+
nlp.add_pipe("scispacy_linker",
|
|
158
|
+
config={"resolve_abbreviations": True,
|
|
159
|
+
"linker_name": linker_name})
|
|
160
|
+
|
|
161
|
+
doc = nlp(text)
|
|
162
|
+
linker = nlp.get_pipe("scispacy_linker")
|
|
163
|
+
|
|
164
|
+
linked = []
|
|
165
|
+
for ent in doc.ents:
|
|
166
|
+
for cui, score in ent._.kb_ents[:3]:
|
|
167
|
+
concept = linker.kb.cui_to_entity.get(
|
|
168
|
+
cui, {})
|
|
169
|
+
linked.append({
|
|
170
|
+
"text": ent.text,
|
|
171
|
+
"cui": cui,
|
|
172
|
+
"score": round(score, 3),
|
|
173
|
+
"canonical_name": (
|
|
174
|
+
concept.canonical_name
|
|
175
|
+
if hasattr(concept,
|
|
176
|
+
"canonical_name")
|
|
177
|
+
else str(concept)),
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
import pandas as pd
|
|
181
|
+
df = pd.DataFrame(linked)
|
|
182
|
+
print(f"Entity linking: {len(doc.ents)} entities → "
|
|
183
|
+
f"{len(df)} CUI mappings")
|
|
184
|
+
return df
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## 4. 臨床 NLP 統合パイプライン
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
def clinical_nlp_pipeline(texts,
|
|
191
|
+
output_dir="results"):
|
|
192
|
+
"""
|
|
193
|
+
臨床 NLP 統合パイプライン。
|
|
194
|
+
|
|
195
|
+
Parameters:
|
|
196
|
+
texts: list[str] — 臨床テキストリスト
|
|
197
|
+
output_dir: str — 出力ディレクトリ
|
|
198
|
+
"""
|
|
199
|
+
import pandas as pd
|
|
200
|
+
from pathlib import Path
|
|
201
|
+
output_dir = Path(output_dir)
|
|
202
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
203
|
+
|
|
204
|
+
# 1) NER + 否定文検出
|
|
205
|
+
ner_df = clinical_ner_batch(texts)
|
|
206
|
+
ner_df.to_csv(output_dir / "clinical_ner.csv",
|
|
207
|
+
index=False)
|
|
208
|
+
|
|
209
|
+
# 2) セクション検出
|
|
210
|
+
all_sections = []
|
|
211
|
+
for i, text in enumerate(texts):
|
|
212
|
+
secs = clinical_section_detect(text)
|
|
213
|
+
for s in secs:
|
|
214
|
+
s["doc_id"] = i
|
|
215
|
+
all_sections.extend(secs)
|
|
216
|
+
section_df = pd.DataFrame(all_sections)
|
|
217
|
+
section_df.to_csv(
|
|
218
|
+
output_dir / "clinical_sections.csv",
|
|
219
|
+
index=False)
|
|
220
|
+
|
|
221
|
+
# 3) エンティティリンキング (最初のテキスト)
|
|
222
|
+
if texts:
|
|
223
|
+
link_df = clinical_entity_linking(texts[0])
|
|
224
|
+
link_df.to_csv(
|
|
225
|
+
output_dir / "entity_linking.csv",
|
|
226
|
+
index=False)
|
|
227
|
+
|
|
228
|
+
print(f"Clinical NLP pipeline → {output_dir}")
|
|
229
|
+
return {"ner": ner_df, "sections": section_df}
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## パイプライン統合
|
|
235
|
+
|
|
236
|
+
```
|
|
237
|
+
text-mining-nlp → clinical-nlp → clinical-reporting
|
|
238
|
+
(PubMed/文献) (NER/NegEx) (構造化レポート)
|
|
239
|
+
│ │ ↓
|
|
240
|
+
biomedical-ner ───────┘ pharmacogenomics
|
|
241
|
+
(scispaCy) (PGx 処方支援)
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
## パイプライン出力
|
|
245
|
+
|
|
246
|
+
| ファイル | 説明 | 次スキル |
|
|
247
|
+
|---------|------|---------|
|
|
248
|
+
| `results/clinical_ner.csv` | 臨床エンティティ+否定 | → phenotype-hpo |
|
|
249
|
+
| `results/clinical_sections.csv` | セクション分類 | → clinical-reporting |
|
|
250
|
+
| `results/entity_linking.csv` | UMLS/SNOMED リンキング | → disease-research |
|
|
@@ -3,8 +3,11 @@ name: scientific-gtex-tissue-expression
|
|
|
3
3
|
description: |
|
|
4
4
|
GTEx 組織発現スキル。GTEx Portal REST API v2 による
|
|
5
5
|
組織特異的遺伝子発現パターン解析・eQTL ルックアップ・
|
|
6
|
-
|
|
7
|
-
tu_tools:
|
|
6
|
+
多組織比較。ToolUniverse 連携: gtex_v2。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: gtex_v2
|
|
9
|
+
name: GTEx v2
|
|
10
|
+
description: GTEx Portal REST API v2 組織特異的発現・eQTL
|
|
8
11
|
---
|
|
9
12
|
|
|
10
13
|
# Scientific GTEx Tissue Expression
|