@nahisaho/satori 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +70 -39
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +4 -0
  4. package/src/.github/skills/scientific-cellxgene-census/SKILL.md +257 -0
  5. package/src/.github/skills/scientific-clingen-curation/SKILL.md +258 -0
  6. package/src/.github/skills/scientific-clinical-nlp/SKILL.md +250 -0
  7. package/src/.github/skills/scientific-clinical-pharmacology/SKILL.md +361 -0
  8. package/src/.github/skills/scientific-clinical-standards/SKILL.md +444 -0
  9. package/src/.github/skills/scientific-crispr-design/SKILL.md +369 -0
  10. package/src/.github/skills/scientific-drug-repurposing/SKILL.md +4 -0
  11. package/src/.github/skills/scientific-environmental-ecology/SKILL.md +5 -0
  12. package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +5 -0
  13. package/src/.github/skills/scientific-epigenomics-chromatin/SKILL.md +5 -0
  14. package/src/.github/skills/scientific-glycomics/SKILL.md +274 -0
  15. package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +5 -2
  16. package/src/.github/skills/scientific-hgnc-nomenclature/SKILL.md +282 -0
  17. package/src/.github/skills/scientific-human-cell-atlas/SKILL.md +3 -0
  18. package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +4 -0
  19. package/src/.github/skills/scientific-immunoinformatics/SKILL.md +9 -0
  20. package/src/.github/skills/scientific-lipidomics/SKILL.md +284 -0
  21. package/src/.github/skills/scientific-metabolomics/SKILL.md +3 -0
  22. package/src/.github/skills/scientific-metabolomics-network/SKILL.md +311 -0
  23. package/src/.github/skills/scientific-metagenome-assembled-genomes/SKILL.md +299 -0
  24. package/src/.github/skills/scientific-model-organism-db/SKILL.md +8 -0
  25. package/src/.github/skills/scientific-pharmacogenomics/SKILL.md +4 -0
  26. package/src/.github/skills/scientific-pharos-targets/SKILL.md +276 -0
  27. package/src/.github/skills/scientific-protein-structure-analysis/SKILL.md +4 -0
  28. package/src/.github/skills/scientific-public-health-data/SKILL.md +11 -0
  29. package/src/.github/skills/scientific-systems-biology/SKILL.md +11 -0
  30. package/src/.github/skills/scientific-variant-effect-prediction/SKILL.md +7 -0
@@ -0,0 +1,258 @@
1
+ ---
2
+ name: scientific-clingen-curation
3
+ description: |
4
+ ClinGen 臨床ゲノム資源キュレーションスキル。ClinGen API に
5
+ よる遺伝子-疾患バリディティ、臨床アクショナビリティ、
6
+ 投与量感受性、バリアントレベルエビデンス評価パイプライン。
7
+ ToolUniverse 連携: clingen。
8
+ tu_tools:
9
+ - key: clingen
10
+ name: ClinGen
11
+ description: ClinGen 臨床ゲノムリソース キュレーションデータ
12
+ ---
13
+
14
+ # Scientific ClinGen Curation
15
+
16
+ ClinGen (Clinical Genome Resource) API を活用した
17
+ 遺伝子-疾患バリディティ分類・臨床アクショナビリティ
18
+ スコアリング・投与量感受性評価・バリアントキュレーション
19
+ パイプラインを提供する。
20
+
21
+ ## When to Use
22
+
23
+ - 遺伝子-疾患関連のエビデンスレベルを評価するとき
24
+ - 臨床アクショナビリティ (介入可能性) を判定するとき
25
+ - ハプロ不全/トリプロ感受性を評価するとき
26
+ - ClinGen キュレーション済みバリアント分類を取得するとき
27
+ - ACMG ガイドラインに基づくバリアント解釈を行うとき
28
+
29
+ ---
30
+
31
+ ## Quick Start
32
+
33
+ ## 1. 遺伝子-疾患バリディティ
34
+
35
+ ```python
36
+ import requests
37
+ import pandas as pd
38
+
39
+ CLINGEN_BASE = "https://search.clinicalgenome.org/kb"
40
+
41
+
42
+ def clingen_gene_validity(gene_symbol):
43
+ """
44
+ ClinGen — 遺伝子-疾患バリディティ分類取得。
45
+
46
+ Parameters:
47
+ gene_symbol: str — 遺伝子シンボル (例: "BRCA1")
48
+ """
49
+ url = (f"{CLINGEN_BASE}/gene-validity/"
50
+ f"?search={gene_symbol}&format=json")
51
+ resp = requests.get(url, timeout=30)
52
+ resp.raise_for_status()
53
+ data = resp.json()
54
+
55
+ results = data if isinstance(data, list) else \
56
+ data.get("results", [])
57
+
58
+ rows = []
59
+ for item in results:
60
+ rows.append({
61
+ "gene": item.get("gene", {}).get(
62
+ "symbol", gene_symbol),
63
+ "disease": item.get("disease", {}).get(
64
+ "label", ""),
65
+ "classification": item.get(
66
+ "classification", ""),
67
+ "moi": item.get("moi", ""),
68
+ "sop": item.get("sopVersion", ""),
69
+ })
70
+
71
+ df = pd.DataFrame(rows)
72
+ print(f"ClinGen validity: {gene_symbol} → "
73
+ f"{len(df)} gene-disease pairs")
74
+ return df
75
+
76
+
77
+ def clingen_gene_validity_batch(gene_symbols):
78
+ """
79
+ ClinGen — 複数遺伝子バリディティバッチ取得。
80
+
81
+ Parameters:
82
+ gene_symbols: list[str] — 遺伝子シンボルリスト
83
+ """
84
+ all_results = []
85
+ for sym in gene_symbols:
86
+ df = clingen_gene_validity(sym)
87
+ if not df.empty:
88
+ all_results.append(df)
89
+ if all_results:
90
+ combined = pd.concat(all_results,
91
+ ignore_index=True)
92
+ cls_dist = combined["classification"].value_counts()
93
+ print(f"Validity distribution: "
94
+ f"{cls_dist.to_dict()}")
95
+ return combined
96
+ return pd.DataFrame()
97
+ ```
98
+
99
+ ## 2. 投与量感受性
100
+
101
+ ```python
102
+ def clingen_dosage_sensitivity(gene_symbol):
103
+ """
104
+ ClinGen — 投与量感受性 (haplo/triplo) 評価取得。
105
+
106
+ Parameters:
107
+ gene_symbol: str — 遺伝子シンボル
108
+ """
109
+ url = (f"{CLINGEN_BASE}/gene-dosage/"
110
+ f"?search={gene_symbol}&format=json")
111
+ resp = requests.get(url, timeout=30)
112
+ resp.raise_for_status()
113
+ data = resp.json()
114
+
115
+ results = data if isinstance(data, list) else \
116
+ data.get("results", [])
117
+
118
+ rows = []
119
+ for item in results:
120
+ rows.append({
121
+ "gene": item.get("gene", {}).get(
122
+ "symbol", gene_symbol),
123
+ "haplo_score": item.get(
124
+ "haploinsufficiency", {}).get(
125
+ "score", ""),
126
+ "haplo_label": item.get(
127
+ "haploinsufficiency", {}).get(
128
+ "label", ""),
129
+ "triplo_score": item.get(
130
+ "triplosensitivity", {}).get(
131
+ "score", ""),
132
+ "triplo_label": item.get(
133
+ "triplosensitivity", {}).get(
134
+ "label", ""),
135
+ })
136
+
137
+ df = pd.DataFrame(rows)
138
+ print(f"ClinGen dosage: {gene_symbol} → "
139
+ f"{len(df)} entries")
140
+ return df
141
+ ```
142
+
143
+ ## 3. 臨床アクショナビリティ
144
+
145
+ ```python
146
+ def clingen_actionability(gene_symbol):
147
+ """
148
+ ClinGen — 臨床アクショナビリティスコア取得。
149
+
150
+ Parameters:
151
+ gene_symbol: str — 遺伝子シンボル
152
+ """
153
+ url = (f"{CLINGEN_BASE}/actionability/"
154
+ f"?search={gene_symbol}&format=json")
155
+ resp = requests.get(url, timeout=30)
156
+ resp.raise_for_status()
157
+ data = resp.json()
158
+
159
+ results = data if isinstance(data, list) else \
160
+ data.get("results", [])
161
+
162
+ rows = []
163
+ for item in results:
164
+ rows.append({
165
+ "gene": item.get("gene", {}).get(
166
+ "symbol", gene_symbol),
167
+ "disease": item.get("disease", {}).get(
168
+ "label", ""),
169
+ "classification": item.get(
170
+ "classification", ""),
171
+ "date": item.get("date", ""),
172
+ })
173
+
174
+ df = pd.DataFrame(rows)
175
+ print(f"ClinGen actionability: {gene_symbol} → "
176
+ f"{len(df)} entries")
177
+ return df
178
+ ```
179
+
180
+ ## 4. ClinGen 統合パイプライン
181
+
182
+ ```python
183
+ def clingen_pipeline(gene_symbols,
184
+ output_dir="results"):
185
+ """
186
+ ClinGen 統合キュレーションパイプライン。
187
+
188
+ Parameters:
189
+ gene_symbols: list[str] — 遺伝子シンボルリスト
190
+ output_dir: str — 出力ディレクトリ
191
+ """
192
+ from pathlib import Path
193
+ output_dir = Path(output_dir)
194
+ output_dir.mkdir(parents=True, exist_ok=True)
195
+
196
+ # 1) Gene-disease validity
197
+ validity_df = clingen_gene_validity_batch(
198
+ gene_symbols)
199
+ if not validity_df.empty:
200
+ validity_df.to_csv(
201
+ output_dir / "clingen_validity.csv",
202
+ index=False)
203
+
204
+ # 2) Dosage sensitivity
205
+ dosage_results = []
206
+ for sym in gene_symbols:
207
+ dos = clingen_dosage_sensitivity(sym)
208
+ if not dos.empty:
209
+ dosage_results.append(dos)
210
+ if dosage_results:
211
+ dosage_df = pd.concat(dosage_results,
212
+ ignore_index=True)
213
+ dosage_df.to_csv(
214
+ output_dir / "clingen_dosage.csv",
215
+ index=False)
216
+
217
+ # 3) Actionability
218
+ action_results = []
219
+ for sym in gene_symbols:
220
+ act = clingen_actionability(sym)
221
+ if not act.empty:
222
+ action_results.append(act)
223
+ if action_results:
224
+ action_df = pd.concat(action_results,
225
+ ignore_index=True)
226
+ action_df.to_csv(
227
+ output_dir / "clingen_actionability.csv",
228
+ index=False)
229
+
230
+ print(f"ClinGen pipeline → {output_dir}")
231
+ return {"validity": validity_df}
232
+ ```
233
+
234
+ ---
235
+
236
+ ## ToolUniverse 連携
237
+
238
+ | TU Key | ツール名 | 連携内容 |
239
+ |--------|---------|---------|
240
+ | `clingen` | ClinGen | ClinGen 臨床ゲノムリソース キュレーションデータ |
241
+
242
+ ## パイプライン統合
243
+
244
+ ```
245
+ variant-interpretation → clingen-curation → clinical-decision-support
246
+ (ClinVar/ACMG) (GDV/DOS/ACT) (臨床判断支援)
247
+ │ │ ↓
248
+ variant-effect-prediction ─┘ pharmacogenomics
249
+ (SpliceAI/CADD) (PGx 処方)
250
+ ```
251
+
252
+ ## パイプライン出力
253
+
254
+ | ファイル | 説明 | 次スキル |
255
+ |---------|------|---------|
256
+ | `results/clingen_validity.csv` | 遺伝子-疾患バリディティ | → genetic-counseling |
257
+ | `results/clingen_dosage.csv` | 投与量感受性 | → cnv-analysis |
258
+ | `results/clingen_actionability.csv` | 臨床介入可能性 | → precision-medicine |
@@ -0,0 +1,250 @@
1
+ ---
2
+ name: scientific-clinical-nlp
3
+ description: |
4
+ 臨床自然言語処理スキル。MedSpaCy / cTAKES / scispaCy
5
+ による臨床テキスト NER、セクション検出、否定文検出、
6
+ ICD-10/SNOMED-CT エンティティリンキング、
7
+ 匿名化 (De-identification) パイプライン。
8
+ TU 外スキル (直接 Python ライブラリ)。
9
+ ---
10
+
11
+ # Scientific Clinical NLP
12
+
13
+ MedSpaCy・scispaCy を中心とした臨床テキスト自然言語処理
14
+ パイプラインを提供する。電子カルテテキストからの臨床エンティティ
15
+ 抽出・否定文検出 (NegEx)・セクション検出・標準用語へのリンキング
16
+ を行う。
17
+
18
+ ## When to Use
19
+
20
+ - 電子カルテ / 臨床ノートから疾患・薬剤・症状を抽出するとき
21
+ - 臨床テキストの否定文 (NegEx/ConText) を検出するとき
22
+ - テキストセクション (主訴/HPI/Assessment/Plan) を分類するとき
23
+ - ICD-10 / SNOMED-CT コードへのリンキングを行うとき
24
+ - PHI 匿名化 (De-identification) を実施するとき
25
+ - バイオメディカル文献テキストマイニングとの連携
26
+
27
+ ---
28
+
29
+ ## Quick Start
30
+
31
+ ## 1. MedSpaCy 臨床 NER
32
+
33
+ ```python
34
+ import medspacy
35
+ from medspacy.ner import TargetRule
36
+ from medspacy.visualization import visualize_ent
37
+
38
+
39
+ def clinical_ner(text, rules=None):
40
+ """
41
+ MedSpaCy — 臨床テキスト NER パイプライン。
42
+
43
+ Parameters:
44
+ text: str — 臨床テキスト
45
+ rules: list[dict] | None — カスタムルール
46
+ """
47
+ nlp = medspacy.load(
48
+ enable=["medspacy_pyrush",
49
+ "medspacy_target_matcher",
50
+ "medspacy_context"])
51
+
52
+ if rules:
53
+ target_matcher = nlp.get_pipe(
54
+ "medspacy_target_matcher")
55
+ for r in rules:
56
+ target_matcher.add(TargetRule(
57
+ literal=r["literal"],
58
+ category=r.get("category",
59
+ "CONDITION")))
60
+
61
+ doc = nlp(text)
62
+
63
+ entities = []
64
+ for ent in doc.ents:
65
+ entities.append({
66
+ "text": ent.text,
67
+ "label": ent.label_,
68
+ "start": ent.start_char,
69
+ "end": ent.end_char,
70
+ "is_negated": ent._.is_negated,
71
+ "is_uncertain": ent._.is_uncertain,
72
+ "is_historical": ent._.is_historical,
73
+ "is_family": ent._.is_family,
74
+ })
75
+
76
+ n_neg = sum(1 for e in entities
77
+ if e["is_negated"])
78
+ print(f"Clinical NER: {len(entities)} entities, "
79
+ f"{n_neg} negated")
80
+ return entities
81
+
82
+
83
+ def clinical_ner_batch(texts, rules=None):
84
+ """
85
+ MedSpaCy — バッチ臨床 NER。
86
+
87
+ Parameters:
88
+ texts: list[str] — 臨床テキストリスト
89
+ rules: list[dict] | None — カスタムルール
90
+ """
91
+ all_entities = []
92
+ for i, text in enumerate(texts):
93
+ ents = clinical_ner(text, rules)
94
+ for e in ents:
95
+ e["doc_id"] = i
96
+ all_entities.extend(ents)
97
+
98
+ import pandas as pd
99
+ df = pd.DataFrame(all_entities)
100
+ print(f"Batch NER: {len(texts)} docs, "
101
+ f"{len(df)} total entities")
102
+ return df
103
+ ```
104
+
105
+ ## 2. セクション検出
106
+
107
+ ```python
108
+ def clinical_section_detect(text):
109
+ """
110
+ MedSpaCy — 臨床テキストセクション検出。
111
+
112
+ Parameters:
113
+ text: str — 臨床テキスト
114
+ """
115
+ import medspacy
116
+ nlp = medspacy.load(
117
+ enable=["medspacy_pyrush",
118
+ "medspacy_sectionizer"])
119
+
120
+ doc = nlp(text)
121
+
122
+ sections = []
123
+ for section in doc._.sections:
124
+ sections.append({
125
+ "category": section.category,
126
+ "title": (section.title_span.text
127
+ if section.title_span else ""),
128
+ "body": (section.body_span.text[:200]
129
+ if section.body_span else ""),
130
+ })
131
+
132
+ print(f"Sections detected: {len(sections)}")
133
+ for s in sections:
134
+ print(f" [{s['category']}] "
135
+ f"{s['title'][:50]}")
136
+ return sections
137
+ ```
138
+
139
+ ## 3. SNOMED-CT / ICD-10 リンキング
140
+
141
+ ```python
142
+ def clinical_entity_linking(text,
143
+ linker_name="umls"):
144
+ """
145
+ scispaCy — 臨床エンティティの UMLS/SNOMED リンキング。
146
+
147
+ Parameters:
148
+ text: str — 臨床テキスト
149
+ linker_name: str — リンカー ("umls", "mesh",
150
+ "snomed")
151
+ """
152
+ import spacy
153
+ import scispacy
154
+ from scispacy.linking import EntityLinker
155
+
156
+ nlp = spacy.load("en_core_sci_md")
157
+ nlp.add_pipe("scispacy_linker",
158
+ config={"resolve_abbreviations": True,
159
+ "linker_name": linker_name})
160
+
161
+ doc = nlp(text)
162
+ linker = nlp.get_pipe("scispacy_linker")
163
+
164
+ linked = []
165
+ for ent in doc.ents:
166
+ for cui, score in ent._.kb_ents[:3]:
167
+ concept = linker.kb.cui_to_entity.get(
168
+ cui, {})
169
+ linked.append({
170
+ "text": ent.text,
171
+ "cui": cui,
172
+ "score": round(score, 3),
173
+ "canonical_name": (
174
+ concept.canonical_name
175
+ if hasattr(concept,
176
+ "canonical_name")
177
+ else str(concept)),
178
+ })
179
+
180
+ import pandas as pd
181
+ df = pd.DataFrame(linked)
182
+ print(f"Entity linking: {len(doc.ents)} entities → "
183
+ f"{len(df)} CUI mappings")
184
+ return df
185
+ ```
186
+
187
+ ## 4. 臨床 NLP 統合パイプライン
188
+
189
+ ```python
190
+ def clinical_nlp_pipeline(texts,
191
+ output_dir="results"):
192
+ """
193
+ 臨床 NLP 統合パイプライン。
194
+
195
+ Parameters:
196
+ texts: list[str] — 臨床テキストリスト
197
+ output_dir: str — 出力ディレクトリ
198
+ """
199
+ import pandas as pd
200
+ from pathlib import Path
201
+ output_dir = Path(output_dir)
202
+ output_dir.mkdir(parents=True, exist_ok=True)
203
+
204
+ # 1) NER + 否定文検出
205
+ ner_df = clinical_ner_batch(texts)
206
+ ner_df.to_csv(output_dir / "clinical_ner.csv",
207
+ index=False)
208
+
209
+ # 2) セクション検出
210
+ all_sections = []
211
+ for i, text in enumerate(texts):
212
+ secs = clinical_section_detect(text)
213
+ for s in secs:
214
+ s["doc_id"] = i
215
+ all_sections.extend(secs)
216
+ section_df = pd.DataFrame(all_sections)
217
+ section_df.to_csv(
218
+ output_dir / "clinical_sections.csv",
219
+ index=False)
220
+
221
+ # 3) エンティティリンキング (最初のテキスト)
222
+ if texts:
223
+ link_df = clinical_entity_linking(texts[0])
224
+ link_df.to_csv(
225
+ output_dir / "entity_linking.csv",
226
+ index=False)
227
+
228
+ print(f"Clinical NLP pipeline → {output_dir}")
229
+ return {"ner": ner_df, "sections": section_df}
230
+ ```
231
+
232
+ ---
233
+
234
+ ## パイプライン統合
235
+
236
+ ```
237
+ text-mining-nlp → clinical-nlp → clinical-reporting
238
+ (PubMed/文献) (NER/NegEx) (構造化レポート)
239
+ │ │ ↓
240
+ biomedical-ner ───────┘ pharmacogenomics
241
+ (scispaCy) (PGx 処方支援)
242
+ ```
243
+
244
+ ## パイプライン出力
245
+
246
+ | ファイル | 説明 | 次スキル |
247
+ |---------|------|---------|
248
+ | `results/clinical_ner.csv` | 臨床エンティティ+否定 | → phenotype-hpo |
249
+ | `results/clinical_sections.csv` | セクション分類 | → clinical-reporting |
250
+ | `results/entity_linking.csv` | UMLS/SNOMED リンキング | → disease-research |