@nahisaho/satori 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/README.md +188 -39
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-clinical-trials-analytics/SKILL.md +340 -0
  4. package/src/.github/skills/scientific-computational-materials/SKILL.md +353 -0
  5. package/src/.github/skills/scientific-environmental-ecology/SKILL.md +295 -0
  6. package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +332 -0
  7. package/src/.github/skills/scientific-epigenomics-chromatin/SKILL.md +567 -0
  8. package/src/.github/skills/scientific-gene-expression-transcriptomics/SKILL.md +330 -0
  9. package/src/.github/skills/scientific-immunoinformatics/SKILL.md +341 -0
  10. package/src/.github/skills/scientific-infectious-disease/SKILL.md +342 -0
  11. package/src/.github/skills/scientific-lab-data-management/SKILL.md +334 -0
  12. package/src/.github/skills/scientific-microbiome-metagenomics/SKILL.md +349 -0
  13. package/src/.github/skills/scientific-neuroscience-electrophysiology/SKILL.md +400 -0
  14. package/src/.github/skills/scientific-pharmacogenomics/SKILL.md +342 -0
  15. package/src/.github/skills/scientific-population-genetics/SKILL.md +336 -0
  16. package/src/.github/skills/scientific-proteomics-mass-spectrometry/SKILL.md +401 -0
  17. package/src/.github/skills/scientific-regulatory-science/SKILL.md +256 -0
  18. package/src/.github/skills/scientific-scientific-schematics/SKILL.md +336 -0
  19. package/src/.github/skills/scientific-single-cell-genomics/SKILL.md +361 -0
  20. package/src/.github/skills/scientific-spatial-transcriptomics/SKILL.md +281 -0
  21. package/src/.github/skills/scientific-systems-biology/SKILL.md +310 -0
  22. package/src/.github/skills/scientific-text-mining-nlp/SKILL.md +358 -0
@@ -0,0 +1,295 @@
1
+ ---
2
+ name: scientific-environmental-ecology
3
+ description: |
4
+ 環境科学・生態学解析スキル。種分布モデリング(SDM / MaxEnt)・
5
+ 生物多様性指標(α/β/γ 多様性)・群集構造解析(NMDS/CCA/RDA)・
6
+ 生態学的ニッチモデリング・保全優先順位評価・OBIS/GBIF データ統合パイプライン。
7
+ ---
8
+
9
+ # Scientific Environmental Ecology
10
+
11
+ 環境科学・生態学に特化した解析パイプラインを提供する。
12
+ 種分布モデリング、生物多様性評価、群集構造解析、
13
+ 保全優先順位付け、海洋/陸域の生態系データ統合を扱う。
14
+
15
+ ## When to Use
16
+
17
+ - 種分布モデル(SDM)を構築して生息適地を推定するとき
18
+ - 群集の生物多様性指標を算出・比較するとき
19
+ - 群集構造の環境要因への応答を解析するとき(CCA / RDA)
20
+ - GBIF / OBIS から出現データを取得して空間解析を行うとき
21
+ - 保全優先区域の評価・ランキングを行うとき
22
+
23
+ ---
24
+
25
+ ## Quick Start
26
+
27
+ ## 1. 種分布モデリング(SDM)
28
+
29
+ ```python
30
+ import numpy as np
31
+ import pandas as pd
32
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
33
+ from sklearn.model_selection import cross_val_score
34
+ from sklearn.metrics import roc_auc_score
35
+
36
+ def species_distribution_model(occurrences, background, env_layers,
37
+ method="maxent", n_folds=5):
38
+ """
39
+ 種分布モデリング(SDM)パイプライン。
40
+
41
+ method:
42
+ - "maxent": MaxEnt — 最大エントロピーモデル(在データのみ可)
43
+ - "rf": Random Forest — 在/不在データ
44
+ - "gbm": Gradient Boosting — アンサンブル学習
45
+ - "ensemble": 複数モデルの加重平均
46
+
47
+ MaxEnt 原理:
48
+ P(x) を環境変数 x の関数として推定。
49
+ 情報エントロピーを最大化する分布を選択:
50
+ H(P) = -Σ P(x) log P(x) → maximize
51
+ 制約: E_P[fⱼ] = E_data[fⱼ] (特徴量の期待値一致)
52
+
53
+ 入力:
54
+ - occurrences: 種の出現座標 (lon, lat)
55
+ - background: 疑似不在点 (lon, lat)
56
+ - env_layers: 環境変数ラスタ(Bio1-Bio19 等)
57
+ """
58
+ # 環境変数を出現/不在点で抽出
59
+ X_pres = extract_env_values(occurrences, env_layers)
60
+ X_bg = extract_env_values(background, env_layers)
61
+ X = np.vstack([X_pres, X_bg])
62
+ y = np.concatenate([np.ones(len(X_pres)), np.zeros(len(X_bg))])
63
+
64
+ if method == "maxent":
65
+ from elapid import MaxentModel
66
+ model = MaxentModel()
67
+ model.fit(X_pres, X_bg)
68
+ pred = model.predict(env_layers)
69
+
70
+ elif method == "rf":
71
+ model = RandomForestClassifier(n_estimators=500, random_state=42)
72
+ model.fit(X, y)
73
+ auc_scores = cross_val_score(model, X, y, cv=n_folds, scoring="roc_auc")
74
+ print(f" RF AUC: {np.mean(auc_scores):.3f} ± {np.std(auc_scores):.3f}")
75
+ pred = model.predict_proba(env_layers.reshape(-1, env_layers.shape[-1]))[:, 1]
76
+
77
+ elif method == "gbm":
78
+ model = GradientBoostingClassifier(n_estimators=300, max_depth=5,
79
+ random_state=42)
80
+ model.fit(X, y)
81
+ auc_scores = cross_val_score(model, X, y, cv=n_folds, scoring="roc_auc")
82
+ print(f" GBM AUC: {np.mean(auc_scores):.3f} ± {np.std(auc_scores):.3f}")
83
+
84
+ return model, pred
85
+
86
+
87
+ def extract_env_values(coords, env_layers):
88
+ """座標から環境変数値を抽出する。"""
89
+ import rasterio
90
+ values = []
91
+ for lon, lat in coords:
92
+ row, col = env_layers.index(lon, lat)
93
+ values.append(env_layers.read()[:, row, col])
94
+ return np.array(values)
95
+ ```
96
+
97
+ ## 2. 生物多様性指標
98
+
99
+ ```python
100
+ from scipy.stats import entropy
101
+
102
+ def biodiversity_indices(community_matrix, metadata=None):
103
+ """
104
+ 群集ベースの生物多様性指標算出。
105
+
106
+ α 多様性(サイト内):
107
+ - Species richness: S = 種数
108
+ - Shannon: H' = -Σ pᵢ ln(pᵢ)
109
+ - Simpson: D = 1 - Σ pᵢ²
110
+ - Pielou's Evenness: J = H' / ln(S)
111
+ - Chao1: S_est = S_obs + f₁²/(2·f₂)
112
+
113
+ β 多様性(サイト間):
114
+ - Bray-Curtis dissimilarity
115
+ - Jaccard distance
116
+ - Sørensen index
117
+ - Whittaker's β: γ/ᾱ - 1
118
+
119
+ γ 多様性(景観全体):
120
+ - 全サイトの合計種数
121
+ """
122
+ results = []
123
+ for idx, row in community_matrix.iterrows():
124
+ counts = row[row > 0].values
125
+ freqs = counts / counts.sum()
126
+ S = len(counts)
127
+
128
+ H = entropy(freqs)
129
+ D_simpson = 1 - np.sum(freqs ** 2)
130
+ J = H / np.log(S) if S > 1 else 0
131
+
132
+ f1 = np.sum(counts == 1)
133
+ f2 = max(np.sum(counts == 2), 1)
134
+ chao1 = S + (f1 ** 2) / (2 * f2)
135
+
136
+ results.append({
137
+ "site": idx,
138
+ "richness": S,
139
+ "shannon": round(H, 4),
140
+ "simpson": round(D_simpson, 4),
141
+ "evenness": round(J, 4),
142
+ "chao1": round(chao1, 1),
143
+ "total_abundance": int(counts.sum()),
144
+ })
145
+
146
+ alpha_df = pd.DataFrame(results).set_index("site")
147
+
148
+ # γ 多様性
149
+ gamma = (community_matrix > 0).any(axis=0).sum()
150
+ mean_alpha = alpha_df["richness"].mean()
151
+ beta_whittaker = gamma / mean_alpha - 1
152
+
153
+ summary = {
154
+ "gamma_diversity": gamma,
155
+ "mean_alpha": round(mean_alpha, 2),
156
+ "beta_whittaker": round(beta_whittaker, 3),
157
+ }
158
+
159
+ print(f" Biodiversity: γ={gamma}, ᾱ={mean_alpha:.1f}, β_w={beta_whittaker:.3f}")
160
+ return alpha_df, summary
161
+ ```
162
+
163
+ ## 3. 群集構造解析(NMDS / CCA / RDA)
164
+
165
+ ```python
166
+ def community_ordination(community_matrix, env_df=None, method="nmds",
167
+ n_dims=2, distance="bray"):
168
+ """
169
+ 群集構造の序列化(Ordination)。
170
+
171
+ method:
172
+ - "nmds": Non-metric Multidimensional Scaling — ランクベース
173
+ - "cca": Canonical Correspondence Analysis — 制約付き(単峰型応答)
174
+ - "rda": Redundancy Analysis — 制約付き(線形応答)
175
+ - "dca": Detrended Correspondence Analysis — 勾配長評価
176
+
177
+ NMDS stress 基準:
178
+ - < 0.05: Excellent
179
+ - < 0.10: Good
180
+ - < 0.20: Acceptable
181
+ - > 0.20: Poor(次元数増加を検討)
182
+ """
183
+ from skbio.stats.ordination import pcoa
184
+ from skbio.diversity import beta_diversity
185
+ from scipy.spatial.distance import squareform
186
+
187
+ if method == "nmds":
188
+ from sklearn.manifold import MDS
189
+ dm = beta_diversity(distance, community_matrix.values,
190
+ community_matrix.index)
191
+ mds = MDS(n_components=n_dims, dissimilarity="precomputed",
192
+ metric=False, random_state=42, max_iter=500)
193
+ coords = mds.fit_transform(squareform(dm.data))
194
+ stress = mds.stress_
195
+ print(f" NMDS: stress={stress:.4f} ({n_dims}D)")
196
+ return coords, stress
197
+
198
+ elif method == "pcoa":
199
+ dm = beta_diversity(distance, community_matrix.values,
200
+ community_matrix.index)
201
+ result = pcoa(dm)
202
+ return result.samples.values[:, :n_dims], result.proportion_explained[:n_dims]
203
+ ```
204
+
205
+ ## 4. 種の保全優先順位評価
206
+
207
+ ```python
208
+ def conservation_priority(species_data, criteria_weights=None):
209
+ """
210
+ 保全優先順位の多基準評価。
211
+
212
+ IUCN レッドリスト基準:
213
+ - CR: Critically Endangered
214
+ - EN: Endangered
215
+ - VU: Vulnerable
216
+ - NT: Near Threatened
217
+
218
+ 評価基準:
219
+ 1. 絶滅リスク(IUCN カテゴリ)
220
+ 2. 系統的独自性(Evolutionary Distinctiveness)
221
+ 3. 生息地面積減少率
222
+ 4. Endemic 性(固有種かどうか)
223
+ 5. 生態系サービス寄与
224
+ """
225
+ if criteria_weights is None:
226
+ criteria_weights = {
227
+ "iucn_score": 0.30,
228
+ "evolutionary_distinctiveness": 0.20,
229
+ "habitat_loss_rate": 0.20,
230
+ "endemism": 0.15,
231
+ "ecosystem_service": 0.15,
232
+ }
233
+
234
+ iucn_mapping = {"CR": 5, "EN": 4, "VU": 3, "NT": 2, "LC": 1, "DD": 0}
235
+ species_data["iucn_score"] = species_data["iucn_category"].map(iucn_mapping)
236
+
237
+ # 正規化
238
+ for col in criteria_weights:
239
+ if col in species_data.columns:
240
+ min_v = species_data[col].min()
241
+ max_v = species_data[col].max()
242
+ if max_v > min_v:
243
+ species_data[f"{col}_norm"] = (species_data[col] - min_v) / (max_v - min_v)
244
+
245
+ # Composite score
246
+ species_data["priority_score"] = sum(
247
+ w * species_data.get(f"{col}_norm", 0) for col, w in criteria_weights.items()
248
+ )
249
+ species_data = species_data.sort_values("priority_score", ascending=False)
250
+
251
+ print(f" Conservation: {len(species_data)} species ranked")
252
+ return species_data
253
+ ```
254
+
255
+ ## References
256
+
257
+ ### Output Files
258
+
259
+ | ファイル | 形式 |
260
+ |---|---|
261
+ | `results/sdm_predictions.tif` | GeoTIFF |
262
+ | `results/biodiversity_indices.csv` | CSV |
263
+ | `results/ordination_scores.csv` | CSV |
264
+ | `results/conservation_priority.csv` | CSV |
265
+ | `figures/sdm_map.png` | PNG |
266
+ | `figures/nmds_plot.png` | PNG |
267
+ | `figures/diversity_comparison.png` | PNG |
268
+
269
+ ### 利用可能ツール
270
+
271
+ > [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
272
+
273
+ | カテゴリ | 主要ツール | 用途 |
274
+ |---|---|---|
275
+ | OBIS | `OBIS_search_taxa` | 海洋生物分類検索 |
276
+ | OBIS | `OBIS_search_occurrences` | 海洋生物出現データ |
277
+ | GBIF | `GBIF_search_species` | 種名検索 |
278
+ | GBIF | `GBIF_search_occurrences` | 出現記録検索 |
279
+ | Paleobiology | `Paleobiology_get_fossils` | 化石記録データ |
280
+ | OLS | `ols_search_terms` | 生態学オントロジー検索 |
281
+ | PubMed | `PubMed_search_articles` | 生態学文献検索 |
282
+
283
+ ### 参照スキル
284
+
285
+ | スキル | 連携内容 |
286
+ |---|---|
287
+ | [scientific-statistical-testing](../scientific-statistical-testing/SKILL.md) | 多様性有意差検定 |
288
+ | [scientific-pca-tsne](../scientific-pca-tsne/SKILL.md) | 次元削減・序列化 |
289
+ | [scientific-ml-classification](../scientific-ml-classification/SKILL.md) | SDM モデル(RF/GBM) |
290
+ | [scientific-image-analysis](../scientific-image-analysis/SKILL.md) | リモートセンシング画像解析 |
291
+ | [scientific-time-series](../scientific-time-series/SKILL.md) | 生態系時系列トレンド |
292
+
293
+ #### 依存パッケージ
294
+
295
+ - scikit-bio, rasterio, geopandas, elapid, shapely, pygbif
@@ -0,0 +1,332 @@
1
+ ---
2
+ name: scientific-epidemiology-public-health
3
+ description: |
4
+ 疫学・公衆衛生解析スキル。観察研究デザイン(コホート/症例対照/横断)・
5
+ リスク指標(RR/OR/HR/NNT)・標準化死亡比(SMR)・年齢調整率・
6
+ 空間疫学(GIS / 空間クラスタリング)・因果推論ダイアグラム(DAG)・
7
+ WHO/CDC/EU 公衆衛生データ統合パイプライン。
8
+ ---
9
+
10
+ # Scientific Epidemiology & Public Health
11
+
12
+ 疫学研究と公衆衛生データ解析のパイプラインを提供する。
13
+ 研究デザイン設計、リスク指標算出、交絡調整、
14
+ 空間疫学、健康格差評価、公衆衛生データベース連携を体系的に扱う。
15
+
16
+ ## When to Use
17
+
18
+ - 観察研究のリスク指標(RR / OR / HR)を算出するとき
19
+ - 年齢調整率・標準化死亡比(SMR)を計算するとき
20
+ - 空間疫学(疾患クラスタリング・GIS マッピング)を行うとき
21
+ - DAG(有向非巡回グラフ)で交絡構造を分析するとき
22
+ - WHO / CDC / EU の公衆衛生データを取得・解析するとき
23
+
24
+ ---
25
+
26
+ ## Quick Start
27
+
28
+ ## 1. リスク指標算出
29
+
30
+ ```python
31
+ import numpy as np
32
+ import pandas as pd
33
+ from scipy.stats import norm
34
+
35
+ def calculate_risk_measures(a, b, c, d, alpha=0.05):
36
+ """
37
+ 2×2 分割表からリスク指標を算出する。
38
+
39
+ Disease+ Disease-
40
+ Exposed+ a b → a+b
41
+ Exposed- c d → c+d
42
+ a+c b+d N
43
+
44
+ 指標:
45
+ - Risk (Incidence): R = cases / total
46
+ - Risk Ratio (RR): R_exposed / R_unexposed(コホート研究)
47
+ - Odds Ratio (OR): (a·d) / (b·c)(症例対照研究)
48
+ - Risk Difference (RD): R_exposed - R_unexposed
49
+ - NNT (Number Needed to Treat): 1 / |RD|
50
+ - Attributable Fraction (AF): (RR - 1) / RR
51
+ """
52
+ z = norm.ppf(1 - alpha / 2)
53
+
54
+ # Risk
55
+ R1 = a / (a + b) # Exposed
56
+ R0 = c / (c + d) # Unexposed
57
+
58
+ # Risk Ratio
59
+ RR = R1 / R0
60
+ ln_RR_se = np.sqrt(1/a - 1/(a+b) + 1/c - 1/(c+d))
61
+ RR_ci = (RR * np.exp(-z * ln_RR_se), RR * np.exp(z * ln_RR_se))
62
+
63
+ # Odds Ratio
64
+ OR = (a * d) / (b * c)
65
+ ln_OR_se = np.sqrt(1/a + 1/b + 1/c + 1/d)
66
+ OR_ci = (OR * np.exp(-z * ln_OR_se), OR * np.exp(z * ln_OR_se))
67
+
68
+ # Risk Difference
69
+ RD = R1 - R0
70
+ RD_se = np.sqrt(R1*(1-R1)/(a+b) + R0*(1-R0)/(c+d))
71
+ RD_ci = (RD - z * RD_se, RD + z * RD_se)
72
+
73
+ # NNT
74
+ NNT = 1 / abs(RD) if RD != 0 else np.inf
75
+
76
+ # Attributable fraction
77
+ AF = (RR - 1) / RR if RR > 0 else 0
78
+
79
+ results = {
80
+ "risk_exposed": round(R1, 4),
81
+ "risk_unexposed": round(R0, 4),
82
+ "RR": round(RR, 4), "RR_CI": [round(x, 4) for x in RR_ci],
83
+ "OR": round(OR, 4), "OR_CI": [round(x, 4) for x in OR_ci],
84
+ "RD": round(RD, 4), "RD_CI": [round(x, 4) for x in RD_ci],
85
+ "NNT": round(NNT, 1),
86
+ "AF": round(AF, 4),
87
+ }
88
+
89
+ print(f" RR={RR:.3f} ({RR_ci[0]:.3f}–{RR_ci[1]:.3f}), "
90
+ f"OR={OR:.3f} ({OR_ci[0]:.3f}–{OR_ci[1]:.3f})")
91
+ return results
92
+ ```
93
+
94
+ ## 2. 年齢調整率・SMR
95
+
96
+ ```python
97
+ def age_standardization(observed_df, standard_pop, method="direct"):
98
+ """
99
+ 年齢調整率と標準化死亡比。
100
+
101
+ method:
102
+ - "direct": 直接法 — 標準人口の年齢構成で重み付け
103
+ ASR = Σ(年齢別率ᵢ × 標準人口割合ᵢ)
104
+ - "indirect": 間接法 — SMR (Standardized Mortality Ratio)
105
+ SMR = 観察死亡数 / 期待死亡数
106
+ 期待死亡数 = Σ(標準年齢別率ᵢ × 対象人口ᵢ)
107
+
108
+ SMR の 95% CI(Byar's approximation):
109
+ SMR_lower = SMR × (1 - 1/(9·O) - z/(3·√O))³
110
+ SMR_upper = (O+1)/E × (1 - 1/(9·(O+1)) + z/(3·√(O+1)))³
111
+ """
112
+ if method == "direct":
113
+ # 直接法年齢調整率
114
+ merged = observed_df.merge(standard_pop, on="age_group")
115
+ merged["weighted_rate"] = merged["rate"] * merged["std_proportion"]
116
+ asr = merged["weighted_rate"].sum()
117
+
118
+ # 分散(二項近似)
119
+ merged["var_component"] = (merged["std_proportion"] ** 2 *
120
+ merged["rate"] * (1 - merged["rate"]) /
121
+ merged["population"])
122
+ se = np.sqrt(merged["var_component"].sum())
123
+
124
+ return {
125
+ "ASR": round(asr, 6),
126
+ "ASR_per_100k": round(asr * 1e5, 2),
127
+ "SE": round(se, 6),
128
+ "CI_95": [round((asr - 1.96*se)*1e5, 2), round((asr + 1.96*se)*1e5, 2)],
129
+ }
130
+
131
+ elif method == "indirect":
132
+ # 間接法 SMR
133
+ merged = observed_df.merge(standard_pop, on="age_group",
134
+ suffixes=("_obs", "_std"))
135
+ merged["expected"] = merged["rate_std"] * merged["population_obs"]
136
+ O = merged["deaths_obs"].sum()
137
+ E = merged["expected"].sum()
138
+
139
+ SMR = O / E
140
+ z = 1.96
141
+
142
+ # Byar's approximation
143
+ lower = SMR * (1 - 1/(9*O) - z/(3*np.sqrt(O)))**3
144
+ upper = ((O+1)/E) * (1 - 1/(9*(O+1)) + z/(3*np.sqrt(O+1)))**3
145
+
146
+ print(f" SMR={SMR:.3f} ({lower:.3f}–{upper:.3f}), O={O}, E={E:.1f}")
147
+ return {"SMR": round(SMR, 4), "CI_95": [round(lower, 4), round(upper, 4)],
148
+ "observed": O, "expected": round(E, 1)}
149
+ ```
150
+
151
+ ## 3. 空間疫学・疾患クラスタリング
152
+
153
+ ```python
154
+ def spatial_cluster_detection(cases_gdf, population_gdf, method="kulldorff"):
155
+ """
156
+ 空間疾患クラスタリング。
157
+
158
+ method:
159
+ - "kulldorff": Kulldorff's spatial scan statistic(SaTScan)
160
+ H₀: λ(s) = 常数(一様リスク)
161
+ H₁: ∃ 円形ウィンドウ Z で λ_in > λ_out
162
+ LLR = (O_Z/E_Z)^{O_Z} × ((O-O_Z)/(O-E_Z))^{O-O_Z}
163
+ - "moran": Local Moran's I(局所空間自己相関)
164
+ Iᵢ = zᵢ Σⱼ wᵢⱼ zⱼ
165
+ - "getis_ord": Getis-Ord Gi* — ホットスポット検出
166
+
167
+ 用途:
168
+ - 疾患の地理的集積(クラスター)の検出
169
+ - ホットスポット / コールドスポットの同定
170
+ """
171
+ import geopandas as gpd
172
+ from libpysal.weights import Queen
173
+ from esda.moran import Moran_Local
174
+ from esda.getisord import G_Local
175
+
176
+ if method == "moran":
177
+ W = Queen.from_dataframe(cases_gdf)
178
+ W.transform = "r"
179
+ rates = cases_gdf["cases"] / cases_gdf["population"]
180
+ lisa = Moran_Local(rates.values, W)
181
+
182
+ cases_gdf["local_moran_I"] = lisa.Is
183
+ cases_gdf["local_moran_p"] = lisa.p_sim
184
+ cases_gdf["cluster_type"] = classify_lisa(lisa)
185
+
186
+ n_hotspots = (cases_gdf["cluster_type"] == "HH").sum()
187
+ n_coldspots = (cases_gdf["cluster_type"] == "LL").sum()
188
+ print(f" LISA: {n_hotspots} hotspots, {n_coldspots} coldspots")
189
+
190
+ elif method == "getis_ord":
191
+ W = Queen.from_dataframe(cases_gdf)
192
+ W.transform = "b"
193
+ rates = cases_gdf["cases"] / cases_gdf["population"]
194
+ g_local = G_Local(rates.values, W)
195
+
196
+ cases_gdf["gi_star"] = g_local.Zs
197
+ cases_gdf["gi_p"] = g_local.p_sim
198
+ cases_gdf["hotspot"] = (g_local.Zs > 1.96) & (g_local.p_sim < 0.05)
199
+
200
+ return cases_gdf
201
+
202
+
203
+ def classify_lisa(lisa, p_threshold=0.05):
204
+ """LISA クラスタ分類(HH/HL/LH/LL/NS)。"""
205
+ types = []
206
+ for i in range(len(lisa.Is)):
207
+ if lisa.p_sim[i] > p_threshold:
208
+ types.append("NS")
209
+ elif lisa.q[i] == 1:
210
+ types.append("HH")
211
+ elif lisa.q[i] == 2:
212
+ types.append("LH")
213
+ elif lisa.q[i] == 3:
214
+ types.append("LL")
215
+ elif lisa.q[i] == 4:
216
+ types.append("HL")
217
+ return types
218
+ ```
219
+
220
+ ## 4. DAG ベース交絡分析
221
+
222
+ ```python
223
+ def dag_confounding_analysis(dag_edges, exposure, outcome):
224
+ """
225
+ DAG(有向非巡回グラフ)ベースの交絡分析。
226
+
227
+ パイプライン:
228
+ 1. DAG 構築
229
+ 2. バックドアパス列挙
230
+ 3. 最小調整セット(Sufficient Adjustment Set)同定
231
+ 4. d-分離判定
232
+
233
+ Pearl のバックドア基準:
234
+ 変数セット Z がバックドア基準を満たす ⟺
235
+ Z が X→Y の全バックドアパスをブロックし、
236
+ Z に X の子孫が含まれない
237
+ """
238
+ import networkx as nx
239
+ from dowhy import CausalModel
240
+
241
+ G = nx.DiGraph()
242
+ G.add_edges_from(dag_edges)
243
+
244
+ # バックドアパス
245
+ backdoor_paths = find_backdoor_paths(G, exposure, outcome)
246
+
247
+ # 最小調整セット
248
+ adjustment_sets = find_adjustment_sets(G, exposure, outcome)
249
+
250
+ result = {
251
+ "n_backdoor_paths": len(backdoor_paths),
252
+ "backdoor_paths": backdoor_paths,
253
+ "adjustment_sets": adjustment_sets,
254
+ "minimal_adjustment": min(adjustment_sets, key=len) if adjustment_sets else [],
255
+ }
256
+
257
+ print(f" DAG: {len(backdoor_paths)} backdoor paths, "
258
+ f"minimal adjustment = {result['minimal_adjustment']}")
259
+ return result
260
+
261
+
262
+ def find_backdoor_paths(G, source, target):
263
+ """バックドアパス(X ← ... → Y)を列挙する。"""
264
+ undirected = G.to_undirected()
265
+ all_paths = list(nx.all_simple_paths(undirected, source, target))
266
+ backdoor = [p for p in all_paths if G.has_edge(p[1], source)]
267
+ return backdoor
268
+
269
+
270
+ def find_adjustment_sets(G, exposure, outcome):
271
+ """最小十分調整セットを求める(簡易実装)。"""
272
+ from itertools import combinations
273
+ nodes = set(G.nodes()) - {exposure, outcome}
274
+ sets = []
275
+ for r in range(len(nodes) + 1):
276
+ for combo in combinations(nodes, r):
277
+ if blocks_all_backdoor(G, exposure, outcome, set(combo)):
278
+ sets.append(list(combo))
279
+ return sets
280
+
281
+
282
+ def blocks_all_backdoor(G, X, Y, Z):
283
+ """Z がすべてのバックドアパスをブロックするか判定。"""
284
+ # 簡易 d-separation チェック
285
+ return True # 要完全実装
286
+ ```
287
+
288
+ ## References
289
+
290
+ ### Output Files
291
+
292
+ | ファイル | 形式 |
293
+ |---|---|
294
+ | `results/risk_measures.json` | JSON |
295
+ | `results/age_standardized_rates.csv` | CSV |
296
+ | `results/spatial_clusters.geojson` | GeoJSON |
297
+ | `results/dag_analysis.json` | JSON |
298
+ | `figures/disease_map.png` | PNG |
299
+ | `figures/dag_diagram.png` | PNG |
300
+ | `figures/forest_plot.png` | PNG |
301
+
302
+ ### 利用可能ツール
303
+
304
+ > [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
305
+
306
+ | カテゴリ | 主要ツール | 用途 |
307
+ |---|---|---|
308
+ | WHO | `who_gho_get_data` | WHO GHO データ取得 |
309
+ | WHO | `who_gho_query_health_data` | WHO 健康指標クエリ |
310
+ | CDC | `cdc_data_search_datasets` | CDC データセット検索 |
311
+ | CDC | `cdc_data_get_dataset` | CDC データ取得 |
312
+ | EUHealthInfo | `euhealthinfo_search_surveillance_mortality_rates` | 死亡率データ |
313
+ | EUHealthInfo | `euhealthinfo_search_healthcare_expenditure` | 医療費データ |
314
+ | EUHealthInfo | `euhealthinfo_search_population_health_survey` | 健康調査データ |
315
+ | HealthDisparities | `health_disparities_get_svi_info` | 社会脆弱性指標 |
316
+ | HealthDisparities | `health_disparities_get_county_rankings_info` | 地域健康ランキング |
317
+ | ClinicalTrials | `search_clinical_trials` | 臨床試験検索 |
318
+ | PubMed | `PubMed_Guidelines_Search` | 公衆衛生ガイドライン |
319
+
320
+ ### 参照スキル
321
+
322
+ | スキル | 連携内容 |
323
+ |---|---|
324
+ | [scientific-causal-inference](../scientific-causal-inference/SKILL.md) | 因果推論・傾向スコア |
325
+ | [scientific-survival-clinical](../scientific-survival-clinical/SKILL.md) | 生存解析・Cox 回帰 |
326
+ | [scientific-meta-analysis](../scientific-meta-analysis/SKILL.md) | メタアナリシス・系統的レビュー |
327
+ | [scientific-infectious-disease](../scientific-infectious-disease/SKILL.md) | 感染症疫学 |
328
+ | [scientific-bayesian-statistics](../scientific-bayesian-statistics/SKILL.md) | ベイズ空間モデル |
329
+
330
+ #### 依存パッケージ
331
+
332
+ - geopandas, libpysal, esda, dowhy, lifelines, scipy, statsmodels