@nahisaho/satori 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/README.md +150 -54
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-biomedical-pubtator/SKILL.md +331 -0
  4. package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +298 -0
  5. package/src/.github/skills/scientific-cell-line-resources/SKILL.md +258 -0
  6. package/src/.github/skills/scientific-compound-screening/SKILL.md +245 -0
  7. package/src/.github/skills/scientific-ebi-databases/SKILL.md +280 -0
  8. package/src/.github/skills/scientific-genome-sequence-tools/SKILL.md +304 -0
  9. package/src/.github/skills/scientific-healthcare-ai/SKILL.md +273 -0
  10. package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +244 -0
  11. package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +288 -0
  12. package/src/.github/skills/scientific-noncoding-rna/SKILL.md +262 -0
  13. package/src/.github/skills/scientific-ontology-enrichment/SKILL.md +340 -0
  14. package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +323 -0
  15. package/src/.github/skills/scientific-phylogenetics/SKILL.md +297 -0
  16. package/src/.github/skills/scientific-preprint-archive/SKILL.md +476 -0
  17. package/src/.github/skills/scientific-public-health-data/SKILL.md +322 -0
  18. package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +327 -0
  19. package/src/.github/skills/scientific-regulatory-genomics/SKILL.md +274 -0
  20. package/src/.github/skills/scientific-reinforcement-learning/SKILL.md +280 -0
  21. package/src/.github/skills/scientific-structural-proteomics/SKILL.md +317 -0
  22. package/src/.github/skills/scientific-symbolic-mathematics/SKILL.md +277 -0
@@ -0,0 +1,273 @@
1
+ ---
2
+ name: scientific-healthcare-ai
3
+ description: |
4
+ ヘルスケア AI スキル。PyHealth 臨床 ML パイプライン、
5
+ フローサイトメトリー (FlowIO) 解析、電子健康記録 (EHR) 処理、
6
+ 臨床予測モデル構築のガイダンス。
7
+ ---
8
+
9
+ # Scientific Healthcare AI
10
+
11
+ 臨床データ解析・ヘルスケア機械学習パイプラインを提供する。
12
+ PyHealth フレームワーク、フローサイトメトリー解析ツールを活用。
13
+
14
+ ## When to Use
15
+
16
+ - 臨床予測モデル (再入院予測, 死亡率予測等) を構築するとき
17
+ - EHR (電子健康記録) データの前処理・特徴量エンジニアリングを行うとき
18
+ - フローサイトメトリー (FACS) データを読み込み・解析するとき
19
+ - 臨床タスク向けの ML パイプラインを設計するとき
20
+ - 医療コード (ICD-10, SNOMED, ATC) のマッピングを行うとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. PyHealth 臨床予測パイプライン
27
+
28
+ ```python
29
+ """
30
+ PyHealth による臨床予測モデル構築。
31
+ pip install pyhealth
32
+
33
+ K-Dense-AI 参照: pyhealth — 臨床 ML フレームワーク
34
+ """
35
+ from pyhealth.datasets import MIMIC3Dataset
36
+ from pyhealth.tasks import readmission_prediction_mimic3_fn
37
+
38
+
39
+ def build_clinical_pipeline(
40
+ mimic3_root,
41
+ tables=("DIAGNOSES_ICD", "PROCEDURES_ICD", "PRESCRIPTIONS"),
42
+ code_mapping=None,
43
+ ):
44
+ """
45
+ MIMIC-III データセットから臨床予測パイプラインを構築。
46
+
47
+ Parameters:
48
+ mimic3_root: str — MIMIC-III CSV ディレクトリパス
49
+ tables: tuple — 使用するテーブル
50
+ code_mapping: dict | None — コードマッピング設定
51
+ """
52
+ # Step 1: Dataset loading
53
+ if code_mapping is None:
54
+ code_mapping = {
55
+ "NDC": ("ATC", {"target_kwargs": {"level": 3}}),
56
+ "ICD9CM": "CCSCM",
57
+ "ICD9PROC": "CCSPROC",
58
+ }
59
+
60
+ dataset = MIMIC3Dataset(
61
+ root=mimic3_root,
62
+ tables=tables,
63
+ code_mapping=code_mapping,
64
+ )
65
+
66
+ print(f"MIMIC-III dataset: {len(dataset.patients)} patients")
67
+ return dataset
68
+
69
+
70
+ def apply_clinical_task(dataset, task_fn=None):
71
+ """
72
+ 臨床タスク関数を適用しサンプルを生成。
73
+ """
74
+ from pyhealth.datasets import split_by_patient
75
+
76
+ if task_fn is None:
77
+ task_fn = readmission_prediction_mimic3_fn
78
+
79
+ samples = dataset.set_task(task_fn)
80
+ train, val, test = split_by_patient(samples, [0.8, 0.1, 0.1])
81
+
82
+ print(f"Clinical task samples: "
83
+ f"train={len(train)}, val={len(val)}, test={len(test)}")
84
+ return train, val, test
85
+ ```
86
+
87
+ ## 2. PyHealth モデル学習
88
+
89
+ ```python
90
+ def train_clinical_model(
91
+ train_dataset,
92
+ val_dataset,
93
+ model_type="Transformer",
94
+ epochs=20,
95
+ batch_size=64,
96
+ ):
97
+ """
98
+ PyHealth モデルの学習。
99
+
100
+ Parameters:
101
+ train_dataset: SampleDataset
102
+ val_dataset: SampleDataset
103
+ model_type: str — "Transformer", "RETAIN", "GRU", "CNN"
104
+ epochs: int — 学習エポック数
105
+ """
106
+ from pyhealth.models import Transformer
107
+ from pyhealth.trainer import Trainer
108
+
109
+ model_classes = {
110
+ "Transformer": Transformer,
111
+ }
112
+ ModelClass = model_classes.get(model_type, Transformer)
113
+
114
+ model = ModelClass(
115
+ dataset=train_dataset,
116
+ feature_keys=["conditions", "procedures", "drugs"],
117
+ label_key="label",
118
+ mode="binary",
119
+ )
120
+
121
+ trainer = Trainer(model=model)
122
+ trainer.train(
123
+ train_dataloader=train_dataset,
124
+ val_dataloader=val_dataset,
125
+ epochs=epochs,
126
+ monitor="pr_auc",
127
+ )
128
+
129
+ print(f"Clinical model ({model_type}): trained for {epochs} epochs")
130
+ return model, trainer
131
+ ```
132
+
133
+ ## 3. フローサイトメトリー解析
134
+
135
+ ```python
136
+ def read_fcs_file(fcs_path):
137
+ """
138
+ FCS ファイルの読み込みと前処理。
139
+ pip install flowio
140
+
141
+ K-Dense-AI 参照: flowio — FCS file I/O
142
+
143
+ Parameters:
144
+ fcs_path: str — FCS ファイルパス
145
+ """
146
+ import flowio
147
+ import numpy as np
148
+ import pandas as pd
149
+
150
+ fcs_data = flowio.FlowData(fcs_path)
151
+
152
+ # Extract channel names
153
+ channels = []
154
+ for i in range(1, fcs_data.channel_count + 1):
155
+ name = fcs_data.channels.get(f"P{i}N", f"Channel_{i}")
156
+ channels.append(name)
157
+
158
+ # Convert to DataFrame
159
+ events = np.array(fcs_data.events).reshape(-1, fcs_data.channel_count)
160
+ df = pd.DataFrame(events, columns=channels)
161
+
162
+ print(f"FCS '{fcs_path}': {len(df)} events x {len(channels)} channels")
163
+ return df, fcs_data
164
+
165
+
166
+ def gate_fcs_data(df, channel, low=None, high=None):
167
+ """
168
+ 単純な矩形ゲーティング。
169
+
170
+ Parameters:
171
+ df: pd.DataFrame — FCS データ
172
+ channel: str — チャネル名
173
+ low: float | None — 下限
174
+ high: float | None — 上限
175
+ """
176
+ mask = pd.Series([True] * len(df))
177
+ if low is not None:
178
+ mask &= df[channel] >= low
179
+ if high is not None:
180
+ mask &= df[channel] <= high
181
+
182
+ gated = df[mask]
183
+ pct = len(gated) / len(df) * 100
184
+ print(f"Gate '{channel}' [{low},{high}]: "
185
+ f"{len(gated)}/{len(df)} events ({pct:.1f}%)")
186
+ return gated
187
+ ```
188
+
189
+ ## 4. 医療コードマッピング
190
+
191
+ ```python
192
+ def map_medical_codes(codes, source_system, target_system):
193
+ """
194
+ 医療コード間のマッピング。
195
+
196
+ Parameters:
197
+ codes: list[str] — ソースコードのリスト
198
+ source_system: str — "ICD9CM", "ICD10CM", "NDC", "ATC", "SNOMED"
199
+ target_system: str — 変換先コード体系
200
+ """
201
+ try:
202
+ from pyhealth.medcode import CrossMap
203
+
204
+ mapper = CrossMap(source_system, target_system)
205
+ results = {}
206
+ for code in codes:
207
+ mapped = mapper.map(code)
208
+ results[code] = mapped
209
+
210
+ mapped_count = sum(1 for v in results.values() if v)
211
+ print(f"Code mapping {source_system}→{target_system}: "
212
+ f"{mapped_count}/{len(codes)} mapped")
213
+ return results
214
+
215
+ except ImportError:
216
+ print("pyhealth.medcode not available; install pyhealth")
217
+ return {}
218
+ ```
219
+
220
+ ## 5. 臨床モデル評価
221
+
222
+ ```python
223
+ def evaluate_clinical_model(trainer, test_dataset):
224
+ """
225
+ 臨床予測モデルの評価。
226
+
227
+ Parameters:
228
+ trainer: Trainer — 学習済み Trainer
229
+ test_dataset: SampleDataset — テストデータ
230
+ """
231
+ metrics = trainer.evaluate(test_dataset)
232
+
233
+ print("Clinical model evaluation:")
234
+ for metric_name, value in metrics.items():
235
+ print(f" {metric_name}: {value:.4f}")
236
+
237
+ return metrics
238
+ ```
239
+
240
+ ## References
241
+
242
+ ### Output Files
243
+
244
+ | ファイル | 形式 |
245
+ |---|---|
246
+ | `results/clinical_predictions.csv` | CSV |
247
+ | `results/clinical_metrics.json` | JSON |
248
+ | `results/fcs_processed.csv` | CSV |
249
+ | `results/code_mapping.json` | JSON |
250
+
251
+ ### 利用可能ツール
252
+
253
+ | カテゴリ | 主要ツール | 用途 |
254
+ |---|---|---|
255
+ | (K-Dense) | `pyhealth` | 臨床 ML フレームワーク |
256
+ | (K-Dense) | `flowio` | FCS ファイル I/O |
257
+
258
+ > **注**: 本スキルは ToolUniverse ツールを持たず、
259
+ > K-Dense-AI Scientific Skills からの参照のみ。
260
+
261
+ ### 参照スキル
262
+
263
+ | スキル | 関連 |
264
+ |---|---|
265
+ | `scientific-clinical-nlp` | 臨床 NLP |
266
+ | `scientific-biostatistics-survival` | 生存時間解析 |
267
+ | `scientific-single-cell-rnaseq` | 単一細胞解析 |
268
+ | `scientific-machine-learning-omics` | ML x オミクス |
269
+ | `scientific-biothings-idmapping` | ID マッピング |
270
+
271
+ ### 依存パッケージ
272
+
273
+ `pyhealth`, `flowio`, `numpy`, `pandas`, `scikit-learn`
@@ -0,0 +1,244 @@
1
+ ---
2
+ name: scientific-human-protein-atlas
3
+ description: |
4
+ Human Protein Atlas (HPA) 統合スキル。組織/細胞タンパク質発現、
5
+ がん予後バイオマーカー、RNA 発現プロファイル、細胞内局在、
6
+ タンパク質相互作用の包括的検索・解析パイプライン。
7
+ ---
8
+
9
+ # Scientific Human Protein Atlas
10
+
11
+ HPA REST API を活用した組織・細胞レベルの
12
+ タンパク質発現プロファイリングパイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - 遺伝子/タンパク質の組織発現パターンを調べるとき
17
+ - がん予後バイオマーカー候補を評価するとき
18
+ - 細胞内局在 (subcellular localization) を確認するとき
19
+ - 細胞株間の発現比較を行うとき
20
+ - RNA 発現データ (HPA/GTEx/FANTOM5) を統合するとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. HPA 遺伝子基本情報取得
27
+
28
+ ```python
29
+ import requests
30
+ import pandas as pd
31
+
32
+ HPA_API = "https://www.proteinatlas.org/api"
33
+
34
+
35
+ def get_hpa_gene_info(ensembl_id):
36
+ """
37
+ HPA 遺伝子基本情報取得。
38
+
39
+ Parameters:
40
+ ensembl_id: str — Ensembl gene ID (e.g., "ENSG00000141510")
41
+
42
+ ToolUniverse:
43
+ HPA_get_gene_basic_info_by_ensembl_id(ensembl_id=ensembl_id)
44
+ HPA_get_comprehensive_gene_details_by_ensembl_id(ensembl_id=ensembl_id)
45
+ """
46
+ url = f"https://www.proteinatlas.org/{ensembl_id}.json"
47
+ resp = requests.get(url)
48
+ resp.raise_for_status()
49
+ data = resp.json()
50
+
51
+ info = {
52
+ "ensembl_id": ensembl_id,
53
+ "gene_name": data.get("Gene", ""),
54
+ "gene_description": data.get("Gene description", ""),
55
+ "uniprot_id": data.get("Uniprot", []),
56
+ "chromosome": data.get("Chromosome", ""),
57
+ "protein_class": data.get("Protein class", []),
58
+ "evidence": data.get("Evidence", ""),
59
+ }
60
+
61
+ print(f"HPA gene: {info['gene_name']} ({ensembl_id})")
62
+ return info, data
63
+ ```
64
+
65
+ ## 2. 組織 RNA 発現プロファイル
66
+
67
+ ```python
68
+ def get_tissue_rna_expression(gene_name):
69
+ """
70
+ HPA 組織別 RNA 発現データ取得。
71
+
72
+ ToolUniverse:
73
+ HPA_get_rna_expression_by_source(gene=gene_name, source="HPA")
74
+ HPA_get_rna_expression_in_specific_tissues(gene=gene_name, tissues=tissues)
75
+ """
76
+ url = f"https://www.proteinatlas.org/{gene_name}.json"
77
+ resp = requests.get(url)
78
+ resp.raise_for_status()
79
+ data = resp.json()
80
+
81
+ rna_data = data.get("RNA tissue specific nTPM", [])
82
+ results = []
83
+ for entry in rna_data:
84
+ results.append({
85
+ "tissue": entry.get("Tissue", ""),
86
+ "cell_type": entry.get("Cell type", ""),
87
+ "ntpm": float(entry.get("nTPM", 0)),
88
+ "detection": entry.get("Detection", ""),
89
+ })
90
+
91
+ df = pd.DataFrame(results)
92
+ if not df.empty:
93
+ df = df.sort_values("ntpm", ascending=False)
94
+
95
+ print(f"HPA RNA expression '{gene_name}': {len(df)} tissue entries")
96
+ return df
97
+ ```
98
+
99
+ ## 3. がん予後バイオマーカー解析
100
+
101
+ ```python
102
+ def get_cancer_prognostics(gene_name):
103
+ """
104
+ HPA がん予後データ取得。
105
+
106
+ ToolUniverse:
107
+ HPA_get_cancer_prognostics_by_gene(gene=gene_name)
108
+ """
109
+ url = f"https://www.proteinatlas.org/{gene_name}.json"
110
+ resp = requests.get(url)
111
+ resp.raise_for_status()
112
+ data = resp.json()
113
+
114
+ prognostics = data.get("Pathology prognostics", [])
115
+ results = []
116
+ for entry in prognostics:
117
+ results.append({
118
+ "cancer_type": entry.get("Cancer type", ""),
119
+ "prognostic_type": entry.get("Prognostic type", ""),
120
+ "is_prognostic": entry.get("Is prognostic", False),
121
+ "p_value": float(entry.get("p-value", 1.0)),
122
+ "high_expression_favorable": entry.get(
123
+ "High expression is favorable", None
124
+ ),
125
+ })
126
+
127
+ df = pd.DataFrame(results)
128
+ if not df.empty:
129
+ df = df.sort_values("p_value")
130
+ significant = df[df["p_value"] < 0.05]
131
+ print(f"HPA cancer prognostics '{gene_name}': "
132
+ f"{len(significant)}/{len(df)} significant")
133
+ else:
134
+ print(f"HPA cancer prognostics '{gene_name}': no data")
135
+ return df
136
+ ```
137
+
138
+ ## 4. 細胞内局在
139
+
140
+ ```python
141
+ def get_subcellular_location(gene_name):
142
+ """
143
+ HPA 細胞内局在データ取得。
144
+
145
+ ToolUniverse:
146
+ HPA_get_subcellular_location(gene=gene_name)
147
+ """
148
+ url = f"https://www.proteinatlas.org/{gene_name}.json"
149
+ resp = requests.get(url)
150
+ resp.raise_for_status()
151
+ data = resp.json()
152
+
153
+ sc = data.get("Subcellular location", [])
154
+ results = []
155
+ for entry in sc:
156
+ results.append({
157
+ "location": entry.get("Location", ""),
158
+ "reliability": entry.get("Reliability", ""),
159
+ "enhanced": entry.get("Enhanced", False),
160
+ "supported": entry.get("Supported", False),
161
+ "cell_lines": entry.get("Cell lines", []),
162
+ })
163
+
164
+ df = pd.DataFrame(results)
165
+ print(f"HPA subcellular '{gene_name}': {len(df)} locations")
166
+ return df
167
+ ```
168
+
169
+ ## 5. タンパク質相互作用ネットワーク (HPA)
170
+
171
+ ```python
172
+ def get_hpa_protein_interactions(gene_name):
173
+ """
174
+ HPA タンパク質相互作用データ取得。
175
+
176
+ ToolUniverse:
177
+ HPA_get_protein_interactions_by_gene(gene=gene_name)
178
+ HPA_get_biological_processes_by_gene(gene=gene_name)
179
+ HPA_get_contextual_biological_process_analysis(gene=gene_name)
180
+ """
181
+ url = f"https://www.proteinatlas.org/{gene_name}.json"
182
+ resp = requests.get(url)
183
+ resp.raise_for_status()
184
+ data = resp.json()
185
+
186
+ interactions = data.get("Protein interaction partners", [])
187
+ results = []
188
+ for partner in interactions:
189
+ results.append({
190
+ "partner_gene": partner.get("Gene", ""),
191
+ "partner_ensembl": partner.get("Ensembl", ""),
192
+ "confidence": partner.get("Confidence", ""),
193
+ "source": partner.get("Source", ""),
194
+ })
195
+
196
+ df = pd.DataFrame(results)
197
+ print(f"HPA interactions '{gene_name}': {len(df)} partners")
198
+ return df
199
+ ```
200
+
201
+ ## References
202
+
203
+ ### Output Files
204
+
205
+ | ファイル | 形式 |
206
+ |---|---|
207
+ | `results/hpa_gene_info.json` | JSON |
208
+ | `results/hpa_tissue_expression.csv` | CSV |
209
+ | `results/hpa_cancer_prognostics.csv` | CSV |
210
+ | `results/hpa_subcellular.csv` | CSV |
211
+ | `results/hpa_interactions.csv` | CSV |
212
+
213
+ ### 利用可能ツール
214
+
215
+ | カテゴリ | 主要ツール | 用途 |
216
+ |---|---|---|
217
+ | HPA | `HPA_generic_search` | 汎用検索 |
218
+ | HPA | `HPA_get_gene_basic_info_by_ensembl_id` | 遺伝子基本情報 |
219
+ | HPA | `HPA_get_comprehensive_gene_details_by_ensembl_id` | 包括的詳細 |
220
+ | HPA | `HPA_get_rna_expression_by_source` | RNA 発現 |
221
+ | HPA | `HPA_get_rna_expression_in_specific_tissues` | 組織別発現 |
222
+ | HPA | `HPA_get_cancer_prognostics_by_gene` | がん予後 |
223
+ | HPA | `HPA_get_subcellular_location` | 細胞内局在 |
224
+ | HPA | `HPA_get_protein_interactions_by_gene` | PPI |
225
+ | HPA | `HPA_get_biological_processes_by_gene` | 生物学的プロセス |
226
+ | HPA | `HPA_get_contextual_biological_process_analysis` | プロセス解析 |
227
+ | HPA | `HPA_get_disease_expression_by_gene_tissue_disease` | 疾患発現 |
228
+ | HPA | `HPA_get_comparative_expression_by_gene_and_cellline` | 細胞株比較 |
229
+ | HPA | `HPA_get_gene_tsv_data_by_ensembl_id` | TSV データ |
230
+ | HPA | `HPA_search_genes_by_query` | 遺伝子検索 |
231
+
232
+ ### 参照スキル
233
+
234
+ | スキル | 関連 |
235
+ |---|---|
236
+ | `scientific-gene-expression-transcriptomics` | GEO/GTEx 発現解析 |
237
+ | `scientific-proteomics-mass-spectrometry` | プロテオミクス |
238
+ | `scientific-cancer-genomics` | がんゲノミクス |
239
+ | `scientific-protein-interaction-network` | PPI ネットワーク |
240
+ | `scientific-pathway-enrichment` | パスウェイ濃縮 |
241
+
242
+ ### 依存パッケージ
243
+
244
+ `requests`, `pandas`