@nahisaho/satori 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -2
- package/package.json +1 -1
- package/src/.github/skills/scientific-admet-pharmacokinetics/SKILL.md +14 -0
- package/src/.github/skills/scientific-bioinformatics/SKILL.md +13 -0
- package/src/.github/skills/scientific-cheminformatics/SKILL.md +13 -0
- package/src/.github/skills/scientific-citation-checker/SKILL.md +12 -0
- package/src/.github/skills/scientific-clinical-decision-support/SKILL.md +14 -0
- package/src/.github/skills/scientific-deep-research/SKILL.md +15 -0
- package/src/.github/skills/scientific-disease-research/SKILL.md +14 -0
- package/src/.github/skills/scientific-drug-repurposing/SKILL.md +14 -0
- package/src/.github/skills/scientific-drug-target-profiling/SKILL.md +14 -0
- package/src/.github/skills/scientific-environmental-ecology/SKILL.md +295 -0
- package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +332 -0
- package/src/.github/skills/scientific-grant-writing/SKILL.md +12 -0
- package/src/.github/skills/scientific-graph-neural-networks/SKILL.md +12 -0
- package/src/.github/skills/scientific-immunoinformatics/SKILL.md +341 -0
- package/src/.github/skills/scientific-infectious-disease/SKILL.md +342 -0
- package/src/.github/skills/scientific-meta-analysis/SKILL.md +11 -0
- package/src/.github/skills/scientific-metabolomics/SKILL.md +13 -0
- package/src/.github/skills/scientific-microbiome-metagenomics/SKILL.md +349 -0
- package/src/.github/skills/scientific-multi-omics/SKILL.md +13 -0
- package/src/.github/skills/scientific-network-analysis/SKILL.md +13 -0
- package/src/.github/skills/scientific-pharmacovigilance/SKILL.md +15 -0
- package/src/.github/skills/scientific-population-genetics/SKILL.md +336 -0
- package/src/.github/skills/scientific-precision-oncology/SKILL.md +14 -0
- package/src/.github/skills/scientific-protein-design/SKILL.md +13 -0
- package/src/.github/skills/scientific-protein-structure-analysis/SKILL.md +13 -0
- package/src/.github/skills/scientific-sequence-analysis/SKILL.md +13 -0
- package/src/.github/skills/scientific-single-cell-genomics/SKILL.md +361 -0
- package/src/.github/skills/scientific-spatial-transcriptomics/SKILL.md +281 -0
- package/src/.github/skills/scientific-survival-clinical/SKILL.md +12 -0
- package/src/.github/skills/scientific-systems-biology/SKILL.md +310 -0
- package/src/.github/skills/scientific-text-mining-nlp/SKILL.md +358 -0
- package/src/.github/skills/scientific-variant-interpretation/SKILL.md +14 -0
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-environmental-ecology
|
|
3
|
+
description: |
|
|
4
|
+
環境科学・生態学解析スキル。種分布モデリング(SDM / MaxEnt)・
|
|
5
|
+
生物多様性指標(α/β/γ 多様性)・群集構造解析(NMDS/CCA/RDA)・
|
|
6
|
+
生態学的ニッチモデリング・保全優先順位評価・OBIS/GBIF データ統合パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Environmental Ecology
|
|
10
|
+
|
|
11
|
+
環境科学・生態学に特化した解析パイプラインを提供する。
|
|
12
|
+
種分布モデリング、生物多様性評価、群集構造解析、
|
|
13
|
+
保全優先順位付け、海洋/陸域の生態系データ統合を扱う。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- 種分布モデル(SDM)を構築して生息適地を推定するとき
|
|
18
|
+
- 群集の生物多様性指標を算出・比較するとき
|
|
19
|
+
- 群集構造の環境要因への応答を解析するとき(CCA / RDA)
|
|
20
|
+
- GBIF / OBIS から出現データを取得して空間解析を行うとき
|
|
21
|
+
- 保全優先区域の評価・ランキングを行うとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. 種分布モデリング(SDM)
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import numpy as np
|
|
31
|
+
import pandas as pd
|
|
32
|
+
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
|
33
|
+
from sklearn.model_selection import cross_val_score
|
|
34
|
+
from sklearn.metrics import roc_auc_score
|
|
35
|
+
|
|
36
|
+
def species_distribution_model(occurrences, background, env_layers,
|
|
37
|
+
method="maxent", n_folds=5):
|
|
38
|
+
"""
|
|
39
|
+
種分布モデリング(SDM)パイプライン。
|
|
40
|
+
|
|
41
|
+
method:
|
|
42
|
+
- "maxent": MaxEnt — 最大エントロピーモデル(在データのみ可)
|
|
43
|
+
- "rf": Random Forest — 在/不在データ
|
|
44
|
+
- "gbm": Gradient Boosting — アンサンブル学習
|
|
45
|
+
- "ensemble": 複数モデルの加重平均
|
|
46
|
+
|
|
47
|
+
MaxEnt 原理:
|
|
48
|
+
P(x) を環境変数 x の関数として推定。
|
|
49
|
+
情報エントロピーを最大化する分布を選択:
|
|
50
|
+
H(P) = -Σ P(x) log P(x) → maximize
|
|
51
|
+
制約: E_P[fⱼ] = E_data[fⱼ] (特徴量の期待値一致)
|
|
52
|
+
|
|
53
|
+
入力:
|
|
54
|
+
- occurrences: 種の出現座標 (lon, lat)
|
|
55
|
+
- background: 疑似不在点 (lon, lat)
|
|
56
|
+
- env_layers: 環境変数ラスタ(Bio1-Bio19 等)
|
|
57
|
+
"""
|
|
58
|
+
# 環境変数を出現/不在点で抽出
|
|
59
|
+
X_pres = extract_env_values(occurrences, env_layers)
|
|
60
|
+
X_bg = extract_env_values(background, env_layers)
|
|
61
|
+
X = np.vstack([X_pres, X_bg])
|
|
62
|
+
y = np.concatenate([np.ones(len(X_pres)), np.zeros(len(X_bg))])
|
|
63
|
+
|
|
64
|
+
if method == "maxent":
|
|
65
|
+
from elapid import MaxentModel
|
|
66
|
+
model = MaxentModel()
|
|
67
|
+
model.fit(X_pres, X_bg)
|
|
68
|
+
pred = model.predict(env_layers)
|
|
69
|
+
|
|
70
|
+
elif method == "rf":
|
|
71
|
+
model = RandomForestClassifier(n_estimators=500, random_state=42)
|
|
72
|
+
model.fit(X, y)
|
|
73
|
+
auc_scores = cross_val_score(model, X, y, cv=n_folds, scoring="roc_auc")
|
|
74
|
+
print(f" RF AUC: {np.mean(auc_scores):.3f} ± {np.std(auc_scores):.3f}")
|
|
75
|
+
pred = model.predict_proba(env_layers.reshape(-1, env_layers.shape[-1]))[:, 1]
|
|
76
|
+
|
|
77
|
+
elif method == "gbm":
|
|
78
|
+
model = GradientBoostingClassifier(n_estimators=300, max_depth=5,
|
|
79
|
+
random_state=42)
|
|
80
|
+
model.fit(X, y)
|
|
81
|
+
auc_scores = cross_val_score(model, X, y, cv=n_folds, scoring="roc_auc")
|
|
82
|
+
print(f" GBM AUC: {np.mean(auc_scores):.3f} ± {np.std(auc_scores):.3f}")
|
|
83
|
+
|
|
84
|
+
return model, pred
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def extract_env_values(coords, env_layers):
|
|
88
|
+
"""座標から環境変数値を抽出する。"""
|
|
89
|
+
import rasterio
|
|
90
|
+
values = []
|
|
91
|
+
for lon, lat in coords:
|
|
92
|
+
row, col = env_layers.index(lon, lat)
|
|
93
|
+
values.append(env_layers.read()[:, row, col])
|
|
94
|
+
return np.array(values)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## 2. 生物多様性指標
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from scipy.stats import entropy
|
|
101
|
+
|
|
102
|
+
def biodiversity_indices(community_matrix, metadata=None):
|
|
103
|
+
"""
|
|
104
|
+
群集ベースの生物多様性指標算出。
|
|
105
|
+
|
|
106
|
+
α 多様性(サイト内):
|
|
107
|
+
- Species richness: S = 種数
|
|
108
|
+
- Shannon: H' = -Σ pᵢ ln(pᵢ)
|
|
109
|
+
- Simpson: D = 1 - Σ pᵢ²
|
|
110
|
+
- Pielou's Evenness: J = H' / ln(S)
|
|
111
|
+
- Chao1: S_est = S_obs + f₁²/(2·f₂)
|
|
112
|
+
|
|
113
|
+
β 多様性(サイト間):
|
|
114
|
+
- Bray-Curtis dissimilarity
|
|
115
|
+
- Jaccard distance
|
|
116
|
+
- Sørensen index
|
|
117
|
+
- Whittaker's β: γ/ᾱ - 1
|
|
118
|
+
|
|
119
|
+
γ 多様性(景観全体):
|
|
120
|
+
- 全サイトの合計種数
|
|
121
|
+
"""
|
|
122
|
+
results = []
|
|
123
|
+
for idx, row in community_matrix.iterrows():
|
|
124
|
+
counts = row[row > 0].values
|
|
125
|
+
freqs = counts / counts.sum()
|
|
126
|
+
S = len(counts)
|
|
127
|
+
|
|
128
|
+
H = entropy(freqs)
|
|
129
|
+
D_simpson = 1 - np.sum(freqs ** 2)
|
|
130
|
+
J = H / np.log(S) if S > 1 else 0
|
|
131
|
+
|
|
132
|
+
f1 = np.sum(counts == 1)
|
|
133
|
+
f2 = max(np.sum(counts == 2), 1)
|
|
134
|
+
chao1 = S + (f1 ** 2) / (2 * f2)
|
|
135
|
+
|
|
136
|
+
results.append({
|
|
137
|
+
"site": idx,
|
|
138
|
+
"richness": S,
|
|
139
|
+
"shannon": round(H, 4),
|
|
140
|
+
"simpson": round(D_simpson, 4),
|
|
141
|
+
"evenness": round(J, 4),
|
|
142
|
+
"chao1": round(chao1, 1),
|
|
143
|
+
"total_abundance": int(counts.sum()),
|
|
144
|
+
})
|
|
145
|
+
|
|
146
|
+
alpha_df = pd.DataFrame(results).set_index("site")
|
|
147
|
+
|
|
148
|
+
# γ 多様性
|
|
149
|
+
gamma = (community_matrix > 0).any(axis=0).sum()
|
|
150
|
+
mean_alpha = alpha_df["richness"].mean()
|
|
151
|
+
beta_whittaker = gamma / mean_alpha - 1
|
|
152
|
+
|
|
153
|
+
summary = {
|
|
154
|
+
"gamma_diversity": gamma,
|
|
155
|
+
"mean_alpha": round(mean_alpha, 2),
|
|
156
|
+
"beta_whittaker": round(beta_whittaker, 3),
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
print(f" Biodiversity: γ={gamma}, ᾱ={mean_alpha:.1f}, β_w={beta_whittaker:.3f}")
|
|
160
|
+
return alpha_df, summary
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## 3. 群集構造解析(NMDS / CCA / RDA)
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
def community_ordination(community_matrix, env_df=None, method="nmds",
|
|
167
|
+
n_dims=2, distance="bray"):
|
|
168
|
+
"""
|
|
169
|
+
群集構造の序列化(Ordination)。
|
|
170
|
+
|
|
171
|
+
method:
|
|
172
|
+
- "nmds": Non-metric Multidimensional Scaling — ランクベース
|
|
173
|
+
- "cca": Canonical Correspondence Analysis — 制約付き(単峰型応答)
|
|
174
|
+
- "rda": Redundancy Analysis — 制約付き(線形応答)
|
|
175
|
+
- "dca": Detrended Correspondence Analysis — 勾配長評価
|
|
176
|
+
|
|
177
|
+
NMDS stress 基準:
|
|
178
|
+
- < 0.05: Excellent
|
|
179
|
+
- < 0.10: Good
|
|
180
|
+
- < 0.20: Acceptable
|
|
181
|
+
- > 0.20: Poor(次元数増加を検討)
|
|
182
|
+
"""
|
|
183
|
+
from skbio.stats.ordination import pcoa
|
|
184
|
+
from skbio.diversity import beta_diversity
|
|
185
|
+
from scipy.spatial.distance import squareform
|
|
186
|
+
|
|
187
|
+
if method == "nmds":
|
|
188
|
+
from sklearn.manifold import MDS
|
|
189
|
+
dm = beta_diversity(distance, community_matrix.values,
|
|
190
|
+
community_matrix.index)
|
|
191
|
+
mds = MDS(n_components=n_dims, dissimilarity="precomputed",
|
|
192
|
+
metric=False, random_state=42, max_iter=500)
|
|
193
|
+
coords = mds.fit_transform(squareform(dm.data))
|
|
194
|
+
stress = mds.stress_
|
|
195
|
+
print(f" NMDS: stress={stress:.4f} ({n_dims}D)")
|
|
196
|
+
return coords, stress
|
|
197
|
+
|
|
198
|
+
elif method == "pcoa":
|
|
199
|
+
dm = beta_diversity(distance, community_matrix.values,
|
|
200
|
+
community_matrix.index)
|
|
201
|
+
result = pcoa(dm)
|
|
202
|
+
return result.samples.values[:, :n_dims], result.proportion_explained[:n_dims]
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## 4. 種の保全優先順位評価
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
def conservation_priority(species_data, criteria_weights=None):
|
|
209
|
+
"""
|
|
210
|
+
保全優先順位の多基準評価。
|
|
211
|
+
|
|
212
|
+
IUCN レッドリスト基準:
|
|
213
|
+
- CR: Critically Endangered
|
|
214
|
+
- EN: Endangered
|
|
215
|
+
- VU: Vulnerable
|
|
216
|
+
- NT: Near Threatened
|
|
217
|
+
|
|
218
|
+
評価基準:
|
|
219
|
+
1. 絶滅リスク(IUCN カテゴリ)
|
|
220
|
+
2. 系統的独自性(Evolutionary Distinctiveness)
|
|
221
|
+
3. 生息地面積減少率
|
|
222
|
+
4. Endemic 性(固有種かどうか)
|
|
223
|
+
5. 生態系サービス寄与
|
|
224
|
+
"""
|
|
225
|
+
if criteria_weights is None:
|
|
226
|
+
criteria_weights = {
|
|
227
|
+
"iucn_score": 0.30,
|
|
228
|
+
"evolutionary_distinctiveness": 0.20,
|
|
229
|
+
"habitat_loss_rate": 0.20,
|
|
230
|
+
"endemism": 0.15,
|
|
231
|
+
"ecosystem_service": 0.15,
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
iucn_mapping = {"CR": 5, "EN": 4, "VU": 3, "NT": 2, "LC": 1, "DD": 0}
|
|
235
|
+
species_data["iucn_score"] = species_data["iucn_category"].map(iucn_mapping)
|
|
236
|
+
|
|
237
|
+
# 正規化
|
|
238
|
+
for col in criteria_weights:
|
|
239
|
+
if col in species_data.columns:
|
|
240
|
+
min_v = species_data[col].min()
|
|
241
|
+
max_v = species_data[col].max()
|
|
242
|
+
if max_v > min_v:
|
|
243
|
+
species_data[f"{col}_norm"] = (species_data[col] - min_v) / (max_v - min_v)
|
|
244
|
+
|
|
245
|
+
# Composite score
|
|
246
|
+
species_data["priority_score"] = sum(
|
|
247
|
+
w * species_data.get(f"{col}_norm", 0) for col, w in criteria_weights.items()
|
|
248
|
+
)
|
|
249
|
+
species_data = species_data.sort_values("priority_score", ascending=False)
|
|
250
|
+
|
|
251
|
+
print(f" Conservation: {len(species_data)} species ranked")
|
|
252
|
+
return species_data
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
## References
|
|
256
|
+
|
|
257
|
+
### Output Files
|
|
258
|
+
|
|
259
|
+
| ファイル | 形式 |
|
|
260
|
+
|---|---|
|
|
261
|
+
| `results/sdm_predictions.tif` | GeoTIFF |
|
|
262
|
+
| `results/biodiversity_indices.csv` | CSV |
|
|
263
|
+
| `results/ordination_scores.csv` | CSV |
|
|
264
|
+
| `results/conservation_priority.csv` | CSV |
|
|
265
|
+
| `figures/sdm_map.png` | PNG |
|
|
266
|
+
| `figures/nmds_plot.png` | PNG |
|
|
267
|
+
| `figures/diversity_comparison.png` | PNG |
|
|
268
|
+
|
|
269
|
+
### 利用可能ツール
|
|
270
|
+
|
|
271
|
+
> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
|
|
272
|
+
|
|
273
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
274
|
+
|---|---|---|
|
|
275
|
+
| OBIS | `OBIS_search_taxa` | 海洋生物分類検索 |
|
|
276
|
+
| OBIS | `OBIS_search_occurrences` | 海洋生物出現データ |
|
|
277
|
+
| GBIF | `GBIF_search_species` | 種名検索 |
|
|
278
|
+
| GBIF | `GBIF_search_occurrences` | 出現記録検索 |
|
|
279
|
+
| Paleobiology | `Paleobiology_get_fossils` | 化石記録データ |
|
|
280
|
+
| OLS | `ols_search_terms` | 生態学オントロジー検索 |
|
|
281
|
+
| PubMed | `PubMed_search_articles` | 生態学文献検索 |
|
|
282
|
+
|
|
283
|
+
### 参照スキル
|
|
284
|
+
|
|
285
|
+
| スキル | 連携内容 |
|
|
286
|
+
|---|---|
|
|
287
|
+
| [scientific-statistical-testing](../scientific-statistical-testing/SKILL.md) | 多様性有意差検定 |
|
|
288
|
+
| [scientific-pca-tsne](../scientific-pca-tsne/SKILL.md) | 次元削減・序列化 |
|
|
289
|
+
| [scientific-ml-classification](../scientific-ml-classification/SKILL.md) | SDM モデル(RF/GBM) |
|
|
290
|
+
| [scientific-image-analysis](../scientific-image-analysis/SKILL.md) | リモートセンシング画像解析 |
|
|
291
|
+
| [scientific-time-series](../scientific-time-series/SKILL.md) | 生態系時系列トレンド |
|
|
292
|
+
|
|
293
|
+
#### 依存パッケージ
|
|
294
|
+
|
|
295
|
+
- scikit-bio, rasterio, geopandas, elapid, shapely, pygbif
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-epidemiology-public-health
|
|
3
|
+
description: |
|
|
4
|
+
疫学・公衆衛生解析スキル。観察研究デザイン(コホート/症例対照/横断)・
|
|
5
|
+
リスク指標(RR/OR/HR/NNT)・標準化死亡比(SMR)・年齢調整率・
|
|
6
|
+
空間疫学(GIS / 空間クラスタリング)・因果推論ダイアグラム(DAG)・
|
|
7
|
+
WHO/CDC/EU 公衆衛生データ統合パイプライン。
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific Epidemiology & Public Health
|
|
11
|
+
|
|
12
|
+
疫学研究と公衆衛生データ解析のパイプラインを提供する。
|
|
13
|
+
研究デザイン設計、リスク指標算出、交絡調整、
|
|
14
|
+
空間疫学、健康格差評価、公衆衛生データベース連携を体系的に扱う。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- 観察研究のリスク指標(RR / OR / HR)を算出するとき
|
|
19
|
+
- 年齢調整率・標準化死亡比(SMR)を計算するとき
|
|
20
|
+
- 空間疫学(疾患クラスタリング・GIS マッピング)を行うとき
|
|
21
|
+
- DAG(有向非巡回グラフ)で交絡構造を分析するとき
|
|
22
|
+
- WHO / CDC / EU の公衆衛生データを取得・解析するとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. リスク指標算出
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import numpy as np
|
|
32
|
+
import pandas as pd
|
|
33
|
+
from scipy.stats import norm
|
|
34
|
+
|
|
35
|
+
def calculate_risk_measures(a, b, c, d, alpha=0.05):
|
|
36
|
+
"""
|
|
37
|
+
2×2 分割表からリスク指標を算出する。
|
|
38
|
+
|
|
39
|
+
Disease+ Disease-
|
|
40
|
+
Exposed+ a b → a+b
|
|
41
|
+
Exposed- c d → c+d
|
|
42
|
+
a+c b+d N
|
|
43
|
+
|
|
44
|
+
指標:
|
|
45
|
+
- Risk (Incidence): R = cases / total
|
|
46
|
+
- Risk Ratio (RR): R_exposed / R_unexposed(コホート研究)
|
|
47
|
+
- Odds Ratio (OR): (a·d) / (b·c)(症例対照研究)
|
|
48
|
+
- Risk Difference (RD): R_exposed - R_unexposed
|
|
49
|
+
- NNT (Number Needed to Treat): 1 / |RD|
|
|
50
|
+
- Attributable Fraction (AF): (RR - 1) / RR
|
|
51
|
+
"""
|
|
52
|
+
z = norm.ppf(1 - alpha / 2)
|
|
53
|
+
|
|
54
|
+
# Risk
|
|
55
|
+
R1 = a / (a + b) # Exposed
|
|
56
|
+
R0 = c / (c + d) # Unexposed
|
|
57
|
+
|
|
58
|
+
# Risk Ratio
|
|
59
|
+
RR = R1 / R0
|
|
60
|
+
ln_RR_se = np.sqrt(1/a - 1/(a+b) + 1/c - 1/(c+d))
|
|
61
|
+
RR_ci = (RR * np.exp(-z * ln_RR_se), RR * np.exp(z * ln_RR_se))
|
|
62
|
+
|
|
63
|
+
# Odds Ratio
|
|
64
|
+
OR = (a * d) / (b * c)
|
|
65
|
+
ln_OR_se = np.sqrt(1/a + 1/b + 1/c + 1/d)
|
|
66
|
+
OR_ci = (OR * np.exp(-z * ln_OR_se), OR * np.exp(z * ln_OR_se))
|
|
67
|
+
|
|
68
|
+
# Risk Difference
|
|
69
|
+
RD = R1 - R0
|
|
70
|
+
RD_se = np.sqrt(R1*(1-R1)/(a+b) + R0*(1-R0)/(c+d))
|
|
71
|
+
RD_ci = (RD - z * RD_se, RD + z * RD_se)
|
|
72
|
+
|
|
73
|
+
# NNT
|
|
74
|
+
NNT = 1 / abs(RD) if RD != 0 else np.inf
|
|
75
|
+
|
|
76
|
+
# Attributable fraction
|
|
77
|
+
AF = (RR - 1) / RR if RR > 0 else 0
|
|
78
|
+
|
|
79
|
+
results = {
|
|
80
|
+
"risk_exposed": round(R1, 4),
|
|
81
|
+
"risk_unexposed": round(R0, 4),
|
|
82
|
+
"RR": round(RR, 4), "RR_CI": [round(x, 4) for x in RR_ci],
|
|
83
|
+
"OR": round(OR, 4), "OR_CI": [round(x, 4) for x in OR_ci],
|
|
84
|
+
"RD": round(RD, 4), "RD_CI": [round(x, 4) for x in RD_ci],
|
|
85
|
+
"NNT": round(NNT, 1),
|
|
86
|
+
"AF": round(AF, 4),
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
print(f" RR={RR:.3f} ({RR_ci[0]:.3f}–{RR_ci[1]:.3f}), "
|
|
90
|
+
f"OR={OR:.3f} ({OR_ci[0]:.3f}–{OR_ci[1]:.3f})")
|
|
91
|
+
return results
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## 2. 年齢調整率・SMR
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
def age_standardization(observed_df, standard_pop, method="direct"):
|
|
98
|
+
"""
|
|
99
|
+
年齢調整率と標準化死亡比。
|
|
100
|
+
|
|
101
|
+
method:
|
|
102
|
+
- "direct": 直接法 — 標準人口の年齢構成で重み付け
|
|
103
|
+
ASR = Σ(年齢別率ᵢ × 標準人口割合ᵢ)
|
|
104
|
+
- "indirect": 間接法 — SMR (Standardized Mortality Ratio)
|
|
105
|
+
SMR = 観察死亡数 / 期待死亡数
|
|
106
|
+
期待死亡数 = Σ(標準年齢別率ᵢ × 対象人口ᵢ)
|
|
107
|
+
|
|
108
|
+
SMR の 95% CI(Byar's approximation):
|
|
109
|
+
SMR_lower = SMR × (1 - 1/(9·O) - z/(3·√O))³
|
|
110
|
+
SMR_upper = (O+1)/E × (1 - 1/(9·(O+1)) + z/(3·√(O+1)))³
|
|
111
|
+
"""
|
|
112
|
+
if method == "direct":
|
|
113
|
+
# 直接法年齢調整率
|
|
114
|
+
merged = observed_df.merge(standard_pop, on="age_group")
|
|
115
|
+
merged["weighted_rate"] = merged["rate"] * merged["std_proportion"]
|
|
116
|
+
asr = merged["weighted_rate"].sum()
|
|
117
|
+
|
|
118
|
+
# 分散(二項近似)
|
|
119
|
+
merged["var_component"] = (merged["std_proportion"] ** 2 *
|
|
120
|
+
merged["rate"] * (1 - merged["rate"]) /
|
|
121
|
+
merged["population"])
|
|
122
|
+
se = np.sqrt(merged["var_component"].sum())
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
"ASR": round(asr, 6),
|
|
126
|
+
"ASR_per_100k": round(asr * 1e5, 2),
|
|
127
|
+
"SE": round(se, 6),
|
|
128
|
+
"CI_95": [round((asr - 1.96*se)*1e5, 2), round((asr + 1.96*se)*1e5, 2)],
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
elif method == "indirect":
|
|
132
|
+
# 間接法 SMR
|
|
133
|
+
merged = observed_df.merge(standard_pop, on="age_group",
|
|
134
|
+
suffixes=("_obs", "_std"))
|
|
135
|
+
merged["expected"] = merged["rate_std"] * merged["population_obs"]
|
|
136
|
+
O = merged["deaths_obs"].sum()
|
|
137
|
+
E = merged["expected"].sum()
|
|
138
|
+
|
|
139
|
+
SMR = O / E
|
|
140
|
+
z = 1.96
|
|
141
|
+
|
|
142
|
+
# Byar's approximation
|
|
143
|
+
lower = SMR * (1 - 1/(9*O) - z/(3*np.sqrt(O)))**3
|
|
144
|
+
upper = ((O+1)/E) * (1 - 1/(9*(O+1)) + z/(3*np.sqrt(O+1)))**3
|
|
145
|
+
|
|
146
|
+
print(f" SMR={SMR:.3f} ({lower:.3f}–{upper:.3f}), O={O}, E={E:.1f}")
|
|
147
|
+
return {"SMR": round(SMR, 4), "CI_95": [round(lower, 4), round(upper, 4)],
|
|
148
|
+
"observed": O, "expected": round(E, 1)}
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## 3. 空間疫学・疾患クラスタリング
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
def spatial_cluster_detection(cases_gdf, population_gdf, method="kulldorff"):
|
|
155
|
+
"""
|
|
156
|
+
空間疾患クラスタリング。
|
|
157
|
+
|
|
158
|
+
method:
|
|
159
|
+
- "kulldorff": Kulldorff's spatial scan statistic(SaTScan)
|
|
160
|
+
H₀: λ(s) = 常数(一様リスク)
|
|
161
|
+
H₁: ∃ 円形ウィンドウ Z で λ_in > λ_out
|
|
162
|
+
LLR = (O_Z/E_Z)^{O_Z} × ((O-O_Z)/(O-E_Z))^{O-O_Z}
|
|
163
|
+
- "moran": Local Moran's I(局所空間自己相関)
|
|
164
|
+
Iᵢ = zᵢ Σⱼ wᵢⱼ zⱼ
|
|
165
|
+
- "getis_ord": Getis-Ord Gi* — ホットスポット検出
|
|
166
|
+
|
|
167
|
+
用途:
|
|
168
|
+
- 疾患の地理的集積(クラスター)の検出
|
|
169
|
+
- ホットスポット / コールドスポットの同定
|
|
170
|
+
"""
|
|
171
|
+
import geopandas as gpd
|
|
172
|
+
from libpysal.weights import Queen
|
|
173
|
+
from esda.moran import Moran_Local
|
|
174
|
+
from esda.getisord import G_Local
|
|
175
|
+
|
|
176
|
+
if method == "moran":
|
|
177
|
+
W = Queen.from_dataframe(cases_gdf)
|
|
178
|
+
W.transform = "r"
|
|
179
|
+
rates = cases_gdf["cases"] / cases_gdf["population"]
|
|
180
|
+
lisa = Moran_Local(rates.values, W)
|
|
181
|
+
|
|
182
|
+
cases_gdf["local_moran_I"] = lisa.Is
|
|
183
|
+
cases_gdf["local_moran_p"] = lisa.p_sim
|
|
184
|
+
cases_gdf["cluster_type"] = classify_lisa(lisa)
|
|
185
|
+
|
|
186
|
+
n_hotspots = (cases_gdf["cluster_type"] == "HH").sum()
|
|
187
|
+
n_coldspots = (cases_gdf["cluster_type"] == "LL").sum()
|
|
188
|
+
print(f" LISA: {n_hotspots} hotspots, {n_coldspots} coldspots")
|
|
189
|
+
|
|
190
|
+
elif method == "getis_ord":
|
|
191
|
+
W = Queen.from_dataframe(cases_gdf)
|
|
192
|
+
W.transform = "b"
|
|
193
|
+
rates = cases_gdf["cases"] / cases_gdf["population"]
|
|
194
|
+
g_local = G_Local(rates.values, W)
|
|
195
|
+
|
|
196
|
+
cases_gdf["gi_star"] = g_local.Zs
|
|
197
|
+
cases_gdf["gi_p"] = g_local.p_sim
|
|
198
|
+
cases_gdf["hotspot"] = (g_local.Zs > 1.96) & (g_local.p_sim < 0.05)
|
|
199
|
+
|
|
200
|
+
return cases_gdf
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def classify_lisa(lisa, p_threshold=0.05):
|
|
204
|
+
"""LISA クラスタ分類(HH/HL/LH/LL/NS)。"""
|
|
205
|
+
types = []
|
|
206
|
+
for i in range(len(lisa.Is)):
|
|
207
|
+
if lisa.p_sim[i] > p_threshold:
|
|
208
|
+
types.append("NS")
|
|
209
|
+
elif lisa.q[i] == 1:
|
|
210
|
+
types.append("HH")
|
|
211
|
+
elif lisa.q[i] == 2:
|
|
212
|
+
types.append("LH")
|
|
213
|
+
elif lisa.q[i] == 3:
|
|
214
|
+
types.append("LL")
|
|
215
|
+
elif lisa.q[i] == 4:
|
|
216
|
+
types.append("HL")
|
|
217
|
+
return types
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## 4. DAG ベース交絡分析
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
def dag_confounding_analysis(dag_edges, exposure, outcome):
|
|
224
|
+
"""
|
|
225
|
+
DAG(有向非巡回グラフ)ベースの交絡分析。
|
|
226
|
+
|
|
227
|
+
パイプライン:
|
|
228
|
+
1. DAG 構築
|
|
229
|
+
2. バックドアパス列挙
|
|
230
|
+
3. 最小調整セット(Sufficient Adjustment Set)同定
|
|
231
|
+
4. d-分離判定
|
|
232
|
+
|
|
233
|
+
Pearl のバックドア基準:
|
|
234
|
+
変数セット Z がバックドア基準を満たす ⟺
|
|
235
|
+
Z が X→Y の全バックドアパスをブロックし、
|
|
236
|
+
Z に X の子孫が含まれない
|
|
237
|
+
"""
|
|
238
|
+
import networkx as nx
|
|
239
|
+
from dowhy import CausalModel
|
|
240
|
+
|
|
241
|
+
G = nx.DiGraph()
|
|
242
|
+
G.add_edges_from(dag_edges)
|
|
243
|
+
|
|
244
|
+
# バックドアパス
|
|
245
|
+
backdoor_paths = find_backdoor_paths(G, exposure, outcome)
|
|
246
|
+
|
|
247
|
+
# 最小調整セット
|
|
248
|
+
adjustment_sets = find_adjustment_sets(G, exposure, outcome)
|
|
249
|
+
|
|
250
|
+
result = {
|
|
251
|
+
"n_backdoor_paths": len(backdoor_paths),
|
|
252
|
+
"backdoor_paths": backdoor_paths,
|
|
253
|
+
"adjustment_sets": adjustment_sets,
|
|
254
|
+
"minimal_adjustment": min(adjustment_sets, key=len) if adjustment_sets else [],
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
print(f" DAG: {len(backdoor_paths)} backdoor paths, "
|
|
258
|
+
f"minimal adjustment = {result['minimal_adjustment']}")
|
|
259
|
+
return result
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def find_backdoor_paths(G, source, target):
|
|
263
|
+
"""バックドアパス(X ← ... → Y)を列挙する。"""
|
|
264
|
+
undirected = G.to_undirected()
|
|
265
|
+
all_paths = list(nx.all_simple_paths(undirected, source, target))
|
|
266
|
+
backdoor = [p for p in all_paths if G.has_edge(p[1], source)]
|
|
267
|
+
return backdoor
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def find_adjustment_sets(G, exposure, outcome):
|
|
271
|
+
"""最小十分調整セットを求める(簡易実装)。"""
|
|
272
|
+
from itertools import combinations
|
|
273
|
+
nodes = set(G.nodes()) - {exposure, outcome}
|
|
274
|
+
sets = []
|
|
275
|
+
for r in range(len(nodes) + 1):
|
|
276
|
+
for combo in combinations(nodes, r):
|
|
277
|
+
if blocks_all_backdoor(G, exposure, outcome, set(combo)):
|
|
278
|
+
sets.append(list(combo))
|
|
279
|
+
return sets
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def blocks_all_backdoor(G, X, Y, Z):
|
|
283
|
+
"""Z がすべてのバックドアパスをブロックするか判定。"""
|
|
284
|
+
# 簡易 d-separation チェック
|
|
285
|
+
return True # 要完全実装
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
## References
|
|
289
|
+
|
|
290
|
+
### Output Files
|
|
291
|
+
|
|
292
|
+
| ファイル | 形式 |
|
|
293
|
+
|---|---|
|
|
294
|
+
| `results/risk_measures.json` | JSON |
|
|
295
|
+
| `results/age_standardized_rates.csv` | CSV |
|
|
296
|
+
| `results/spatial_clusters.geojson` | GeoJSON |
|
|
297
|
+
| `results/dag_analysis.json` | JSON |
|
|
298
|
+
| `figures/disease_map.png` | PNG |
|
|
299
|
+
| `figures/dag_diagram.png` | PNG |
|
|
300
|
+
| `figures/forest_plot.png` | PNG |
|
|
301
|
+
|
|
302
|
+
### 利用可能ツール
|
|
303
|
+
|
|
304
|
+
> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
|
|
305
|
+
|
|
306
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
307
|
+
|---|---|---|
|
|
308
|
+
| WHO | `who_gho_get_data` | WHO GHO データ取得 |
|
|
309
|
+
| WHO | `who_gho_query_health_data` | WHO 健康指標クエリ |
|
|
310
|
+
| CDC | `cdc_data_search_datasets` | CDC データセット検索 |
|
|
311
|
+
| CDC | `cdc_data_get_dataset` | CDC データ取得 |
|
|
312
|
+
| EUHealthInfo | `euhealthinfo_search_surveillance_mortality_rates` | 死亡率データ |
|
|
313
|
+
| EUHealthInfo | `euhealthinfo_search_healthcare_expenditure` | 医療費データ |
|
|
314
|
+
| EUHealthInfo | `euhealthinfo_search_population_health_survey` | 健康調査データ |
|
|
315
|
+
| HealthDisparities | `health_disparities_get_svi_info` | 社会脆弱性指標 |
|
|
316
|
+
| HealthDisparities | `health_disparities_get_county_rankings_info` | 地域健康ランキング |
|
|
317
|
+
| ClinicalTrials | `search_clinical_trials` | 臨床試験検索 |
|
|
318
|
+
| PubMed | `PubMed_Guidelines_Search` | 公衆衛生ガイドライン |
|
|
319
|
+
|
|
320
|
+
### 参照スキル
|
|
321
|
+
|
|
322
|
+
| スキル | 連携内容 |
|
|
323
|
+
|---|---|
|
|
324
|
+
| [scientific-causal-inference](../scientific-causal-inference/SKILL.md) | 因果推論・傾向スコア |
|
|
325
|
+
| [scientific-survival-clinical](../scientific-survival-clinical/SKILL.md) | 生存解析・Cox 回帰 |
|
|
326
|
+
| [scientific-meta-analysis](../scientific-meta-analysis/SKILL.md) | メタアナリシス・系統的レビュー |
|
|
327
|
+
| [scientific-infectious-disease](../scientific-infectious-disease/SKILL.md) | 感染症疫学 |
|
|
328
|
+
| [scientific-bayesian-statistics](../scientific-bayesian-statistics/SKILL.md) | ベイズ空間モデル |
|
|
329
|
+
|
|
330
|
+
#### 依存パッケージ
|
|
331
|
+
|
|
332
|
+
- geopandas, libpysal, esda, dowhy, lifelines, scipy, statsmodels
|
|
@@ -293,6 +293,18 @@ BUDGET_JUSTIFICATION_TEMPLATE = """
|
|
|
293
293
|
| `grants/research_strategy.md` | Research Strategy(Markdown) | 全セクション完了時 |
|
|
294
294
|
| `grants/budget.json` | 予算計画(JSON) | 予算見積完了時 |
|
|
295
295
|
|
|
296
|
+
### 利用可能ツール
|
|
297
|
+
|
|
298
|
+
> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
|
|
299
|
+
|
|
300
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
301
|
+
|---|---|---|
|
|
302
|
+
| PubMed | `PubMed_search_articles` | 先行研究検索 |
|
|
303
|
+
| PubMed | `PubMed_get_cited_by` | 被引用構造分析 |
|
|
304
|
+
| EuropePMC | `EuropePMC_search_articles` | ヨーロッパ文献検索 |
|
|
305
|
+
| Crossref | `Crossref_search_works` | DOI・出版情報検索 |
|
|
306
|
+
| OpenAlex | `OpenAlex_Guidelines_Search` | オープンアクセス文献検索 |
|
|
307
|
+
|
|
296
308
|
### 参照スキル
|
|
297
309
|
|
|
298
310
|
| スキル | 連携 |
|
|
@@ -370,6 +370,18 @@ def scaffold_split(dataset, train_ratio=0.8, val_ratio=0.1):
|
|
|
370
370
|
| `figures/gnn_training_curve.png` | 学習曲線プロット | トレーニング完了時 |
|
|
371
371
|
| `figures/gnn_explanation.png` | GNNExplainer 可視化 | 解釈性分析時 |
|
|
372
372
|
|
|
373
|
+
### 利用可能ツール
|
|
374
|
+
|
|
375
|
+
> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
|
|
376
|
+
|
|
377
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
378
|
+
|---|---|---|
|
|
379
|
+
| PubChem | `PubChem_get_compound_properties_by_CID` | 分子物性取得 |
|
|
380
|
+
| ChEMBL | `ChEMBL_get_molecule` | 分子情報取得 |
|
|
381
|
+
| ChEMBL | `ChEMBL_get_activity` | バイオアッセイデータ |
|
|
382
|
+
| UniProt | `UniProt_get_entry_by_accession` | タンパク質グラフ構築用 |
|
|
383
|
+
| BindingDB | `BindingDB_get_ligands_by_uniprot` | リガンド-ターゲットデータ |
|
|
384
|
+
|
|
373
385
|
### 参照スキル
|
|
374
386
|
|
|
375
387
|
| スキル | 連携 |
|