@nahisaho/satori 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/README.md +188 -39
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-clinical-trials-analytics/SKILL.md +340 -0
  4. package/src/.github/skills/scientific-computational-materials/SKILL.md +353 -0
  5. package/src/.github/skills/scientific-environmental-ecology/SKILL.md +295 -0
  6. package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +332 -0
  7. package/src/.github/skills/scientific-epigenomics-chromatin/SKILL.md +567 -0
  8. package/src/.github/skills/scientific-gene-expression-transcriptomics/SKILL.md +330 -0
  9. package/src/.github/skills/scientific-immunoinformatics/SKILL.md +341 -0
  10. package/src/.github/skills/scientific-infectious-disease/SKILL.md +342 -0
  11. package/src/.github/skills/scientific-lab-data-management/SKILL.md +334 -0
  12. package/src/.github/skills/scientific-microbiome-metagenomics/SKILL.md +349 -0
  13. package/src/.github/skills/scientific-neuroscience-electrophysiology/SKILL.md +400 -0
  14. package/src/.github/skills/scientific-pharmacogenomics/SKILL.md +342 -0
  15. package/src/.github/skills/scientific-population-genetics/SKILL.md +336 -0
  16. package/src/.github/skills/scientific-proteomics-mass-spectrometry/SKILL.md +401 -0
  17. package/src/.github/skills/scientific-regulatory-science/SKILL.md +256 -0
  18. package/src/.github/skills/scientific-scientific-schematics/SKILL.md +336 -0
  19. package/src/.github/skills/scientific-single-cell-genomics/SKILL.md +361 -0
  20. package/src/.github/skills/scientific-spatial-transcriptomics/SKILL.md +281 -0
  21. package/src/.github/skills/scientific-systems-biology/SKILL.md +310 -0
  22. package/src/.github/skills/scientific-text-mining-nlp/SKILL.md +358 -0
@@ -0,0 +1,567 @@
1
+ ---
2
+ name: scientific-epigenomics-chromatin
3
+ description: |
4
+ エピゲノミクス・クロマチン生物学解析スキル。ChIP-seq ピーク呼び出し (MACS2/MACS3)、
5
+ ATAC-seq ヌクレオソームフリー領域検出、DNA メチル化パターン解析 (WGBS/RRBS)、
6
+ ヒストン修飾クロマチン状態モデリング (ChromHMM)、Hi-C 接触マップ・TAD 検出、
7
+ 転写因子結合サイト予測 (モチーフ濃縮)、差次結合解析 (DiffBind) を統合した
8
+ 計算エピゲノミクスパイプライン。ChIP-Atlas 43 万+実験との連携対応。
9
+ ---
10
+
11
+ # Scientific Epigenomics & Chromatin Biology
12
+
13
+ ChIP-seq・ATAC-seq・バイサルファイトシーケンシング・Hi-C データを対象に、
14
+ ピーク呼び出し→差次結合解析→クロマチン状態注釈→3D ゲノム構造解析の
15
+ 統合エピゲノミクスパイプラインを提供する。
16
+
17
+ ## When to Use
18
+
19
+ - ChIP-seq データからヒストン修飾・転写因子結合部位を同定するとき
20
+ - ATAC-seq でクロマチンアクセシビリティを評価するとき
21
+ - DNA メチル化(WGBS/RRBS)パターンを解析するとき
22
+ - Hi-C データから TAD/ループ・3D ゲノム構造を推定するとき
23
+ - 複数エピゲノムマークを統合してクロマチン状態を分類するとき
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. ChIP-seq ピーク呼び出し (MACS2/MACS3)
30
+
31
+ ```python
32
+ import subprocess
33
+ import pandas as pd
34
+ import numpy as np
35
+
36
+
37
+ def chipseq_peak_calling(treatment_bam, control_bam, genome_size="hs",
38
+ outdir="results/chipseq", name="sample",
39
+ peak_type="narrow", qvalue=0.05):
40
+ """
41
+ MACS2/MACS3 による ChIP-seq ピーク呼び出し。
42
+
43
+ Parameters:
44
+ treatment_bam: 処理群 BAM ファイル
45
+ control_bam: コントロール BAM ファイル (Input/IgG)
46
+ genome_size: 有効ゲノムサイズ (hs/mm/ce/dm or int)
47
+ peak_type: "narrow" (TF) or "broad" (ヒストン修飾 H3K27me3 等)
48
+ qvalue: FDR 閾値
49
+ """
50
+ import os
51
+ os.makedirs(outdir, exist_ok=True)
52
+
53
+ cmd = [
54
+ "macs3", "callpeak",
55
+ "-t", treatment_bam,
56
+ "-c", control_bam,
57
+ "-g", str(genome_size),
58
+ "--outdir", outdir,
59
+ "-n", name,
60
+ "-q", str(qvalue),
61
+ "--keep-dup", "auto",
62
+ "--call-summits",
63
+ ]
64
+
65
+ if peak_type == "broad":
66
+ cmd.extend(["--broad", "--broad-cutoff", str(qvalue)])
67
+
68
+ print(f"Running MACS3 peak calling ({peak_type} mode)...")
69
+ subprocess.run(cmd, check=True)
70
+
71
+ # ピークファイル読み込み
72
+ suffix = "broadPeak" if peak_type == "broad" else "narrowPeak"
73
+ peak_file = f"{outdir}/{name}_peaks.{suffix}"
74
+ cols = ["chr", "start", "end", "name", "score", "strand",
75
+ "signalValue", "pValue", "qValue"]
76
+ if peak_type == "narrow":
77
+ cols.append("summit")
78
+
79
+ peaks = pd.read_csv(peak_file, sep="\t", header=None, names=cols)
80
+ peaks["width"] = peaks["end"] - peaks["start"]
81
+
82
+ print(f" Called {len(peaks):,} {peak_type} peaks (q < {qvalue})")
83
+ print(f" Median peak width: {peaks['width'].median():.0f} bp")
84
+ print(f" Mean signal value: {peaks['signalValue'].mean():.2f}")
85
+
86
+ return peaks
87
+
88
+
89
+ def chipseq_qc_metrics(peaks, frip_bam=None, total_reads=None):
90
+ """
91
+ ChIP-seq QC 指標の算出。
92
+
93
+ Returns:
94
+ dict: peak 数、中央値幅、FRiP (Fraction of Reads in Peaks)
95
+ """
96
+ metrics = {
97
+ "n_peaks": len(peaks),
98
+ "median_width_bp": float(peaks["width"].median()),
99
+ "mean_signal": float(peaks["signalValue"].mean()),
100
+ "mean_log10_qvalue": float(peaks["qValue"].mean()),
101
+ }
102
+
103
+ # ENCODE 品質基準との比較
104
+ if metrics["n_peaks"] < 500:
105
+ metrics["quality_flag"] = "LOW — < 500 peaks"
106
+ elif metrics["n_peaks"] < 10000:
107
+ metrics["quality_flag"] = "MODERATE"
108
+ else:
109
+ metrics["quality_flag"] = "HIGH"
110
+
111
+ return metrics
112
+ ```
113
+
114
+ ## 2. ATAC-seq アクセシビリティ解析
115
+
116
+ ```python
117
+ import numpy as np
118
+ import pandas as pd
119
+
120
+
121
+ def atacseq_nucleosome_free_regions(fragments_file, output_dir="results/atacseq"):
122
+ """
123
+ ATAC-seq フラグメントサイズ分布に基づくヌクレオソーム占有解析。
124
+
125
+ フラグメントサイズによる分類:
126
+ - < 150 bp: Nucleosome-Free Region (NFR)
127
+ - 150-300 bp: Mono-nucleosome
128
+ - 300-500 bp: Di-nucleosome
129
+ - > 500 bp: Tri-nucleosome+
130
+ """
131
+ import os
132
+ os.makedirs(output_dir, exist_ok=True)
133
+
134
+ # フラグメントサイズ分布
135
+ fragments = pd.read_csv(fragments_file, sep="\t",
136
+ names=["chr", "start", "end", "barcode", "count"])
137
+ fragments["length"] = fragments["end"] - fragments["start"]
138
+
139
+ # サイズカテゴリ分類
140
+ bins = [0, 150, 300, 500, 10000]
141
+ labels = ["NFR (<150)", "Mono-nuc (150-300)",
142
+ "Di-nuc (300-500)", "Tri-nuc+ (>500)"]
143
+ fragments["category"] = pd.cut(fragments["length"], bins=bins, labels=labels)
144
+
145
+ size_dist = fragments["category"].value_counts(normalize=True)
146
+ nfr_ratio = size_dist.get("NFR (<150)", 0)
147
+
148
+ print(f" Fragment size distribution:")
149
+ for cat, pct in size_dist.items():
150
+ print(f" {cat}: {pct:.1%}")
151
+ print(f" NFR ratio: {nfr_ratio:.1%} (ENCODE target: >40%)")
152
+
153
+ return fragments, size_dist
154
+
155
+
156
+ def atacseq_tss_enrichment(peaks, gene_gtf, window=2000):
157
+ """
158
+ TSS (Transcription Start Site) 周辺のシグナル濃縮スコア算出。
159
+ TSS Enrichment Score > 7: 高品質 ATAC-seq データ (ENCODE 基準)。
160
+ """
161
+ from pybedtools import BedTool
162
+
163
+ peaks_bt = BedTool.from_dataframe(
164
+ peaks[["chr", "start", "end", "name", "score"]]
165
+ )
166
+ # TSS ± window bp のウィンドウ
167
+ # (GTF parsing は省略 — 実運用時は pyranges/GTFparse 使用)
168
+
169
+ print(f" TSS enrichment window: ±{window} bp")
170
+ print(f" Total peaks overlapping TSS regions: calculated post-intersection")
171
+
172
+ return peaks_bt
173
+ ```
174
+
175
+ ## 3. DNA メチル化パターン解析
176
+
177
+ ```python
178
+ import numpy as np
179
+ import pandas as pd
180
+
181
+
182
+ def bisulfite_methylation_analysis(methylation_file, min_coverage=10,
183
+ output_prefix="results/methylation"):
184
+ """
185
+ WGBS/RRBS バイサルファイトシーケンシングデータのメチル化解析。
186
+
187
+ 入力: Bismark methylation extractor 出力 (CpG context)
188
+ 処理:
189
+ 1. カバレッジフィルタリング
190
+ 2. メチル化レベル算出 (β 値)
191
+ 3. CpG アイランド/ショア/シェルフ注釈
192
+ 4. 差次メチル化領域 (DMR) 検出
193
+ """
194
+ import os
195
+ os.makedirs(os.path.dirname(output_prefix), exist_ok=True)
196
+
197
+ # Bismark 出力の読み込み
198
+ df = pd.read_csv(methylation_file, sep="\t",
199
+ names=["chr", "pos", "strand", "count_m", "count_u"])
200
+ df["coverage"] = df["count_m"] + df["count_u"]
201
+ df["beta"] = df["count_m"] / df["coverage"]
202
+
203
+ # カバレッジフィルタ
204
+ n_before = len(df)
205
+ df = df[df["coverage"] >= min_coverage].copy()
206
+ print(f" Coverage filter (≥{min_coverage}x): {n_before:,} → {len(df):,} CpGs")
207
+
208
+ # グローバルメチル化統計
209
+ mean_beta = df["beta"].mean()
210
+ median_beta = df["beta"].median()
211
+ print(f" Global methylation: mean β = {mean_beta:.3f}, median β = {median_beta:.3f}")
212
+
213
+ # メチル化状態分類
214
+ df["status"] = pd.cut(df["beta"],
215
+ bins=[0, 0.2, 0.8, 1.0],
216
+ labels=["hypo", "intermediate", "hyper"])
217
+ status_counts = df["status"].value_counts(normalize=True)
218
+ print(f" Hypomethylated (β<0.2): {status_counts.get('hypo', 0):.1%}")
219
+ print(f" Intermediate (0.2≤β≤0.8): {status_counts.get('intermediate', 0):.1%}")
220
+ print(f" Hypermethylated (β>0.8): {status_counts.get('hyper', 0):.1%}")
221
+
222
+ return df
223
+
224
+
225
+ def detect_dmrs(group1_betas, group2_betas, positions, min_cpgs=5,
226
+ delta_beta_cutoff=0.2, pvalue_cutoff=0.05):
227
+ """
228
+ 差次メチル化領域 (DMR) 検出。
229
+
230
+ Parameters:
231
+ group1_betas, group2_betas: n_cpgs × n_samples メチル化マトリクス
232
+ min_cpgs: DMR 内の最小 CpG 数
233
+ delta_beta_cutoff: Δβ 閾値
234
+ """
235
+ from scipy.stats import mannwhitneyu
236
+
237
+ results = []
238
+ mean_g1 = group1_betas.mean(axis=1)
239
+ mean_g2 = group2_betas.mean(axis=1)
240
+ delta_beta = mean_g2 - mean_g1
241
+
242
+ for i in range(len(positions)):
243
+ stat, pval = mannwhitneyu(
244
+ group1_betas[i, :], group2_betas[i, :], alternative="two-sided"
245
+ )
246
+ results.append({
247
+ "chr": positions[i]["chr"],
248
+ "pos": positions[i]["pos"],
249
+ "delta_beta": float(delta_beta[i]),
250
+ "pvalue": pval,
251
+ "mean_group1": float(mean_g1[i]),
252
+ "mean_group2": float(mean_g2[i]),
253
+ })
254
+
255
+ df = pd.DataFrame(results)
256
+
257
+ # 多重検定補正
258
+ from statsmodels.stats.multitest import multipletests
259
+ df["padj"] = multipletests(df["pvalue"], method="fdr_bh")[1]
260
+
261
+ # DMR フィルタ
262
+ sig = df[(df["padj"] < pvalue_cutoff) &
263
+ (df["delta_beta"].abs() >= delta_beta_cutoff)]
264
+ print(f" Significant DMCs (Δβ≥{delta_beta_cutoff}, FDR<{pvalue_cutoff}): {len(sig):,}")
265
+
266
+ return df, sig
267
+ ```
268
+
269
+ ## 4. クロマチン状態モデリング (ChromHMM)
270
+
271
+ ```python
272
+ import subprocess
273
+ import pandas as pd
274
+ import numpy as np
275
+
276
+
277
+ def chromhmm_learn_model(binarized_dir, output_dir, n_states=15,
278
+ assembly="hg38"):
279
+ """
280
+ ChromHMM によるクロマチン状態モデリング。
281
+
282
+ 複数のヒストン修飾マーク (H3K4me1/me3, H3K27ac, H3K27me3,
283
+ H3K36me3, H3K9me3 等) を入力として、ゲノムをクロマチン状態に分類。
284
+
285
+ Roadmap Epigenomics 15-state モデル:
286
+ 1-TssA, 2-TssAFlnk, 3-TxFlnk, 4-Tx, 5-TxWk,
287
+ 6-EnhG, 7-Enh, 8-ZNF/Rpts, 9-Het, 10-TssBiv,
288
+ 11-BivFlnk, 12-EnhBiv, 13-ReprPC, 14-ReprPCWk, 15-Quies
289
+ """
290
+ import os
291
+ os.makedirs(output_dir, exist_ok=True)
292
+
293
+ cmd = [
294
+ "java", "-mx8G", "-jar", "ChromHMM.jar", "LearnModel",
295
+ "-b", "200",
296
+ binarized_dir, output_dir, str(n_states), assembly
297
+ ]
298
+ print(f"Running ChromHMM LearnModel with {n_states} states...")
299
+ subprocess.run(cmd, check=True)
300
+
301
+ # 遷移確率マトリクス読み込み
302
+ trans_file = f"{output_dir}/transitions_{n_states}.txt"
303
+ if os.path.exists(trans_file):
304
+ trans = pd.read_csv(trans_file, sep="\t", index_col=0)
305
+ print(f" Transition matrix: {trans.shape}")
306
+
307
+ # エミッション確率読み込み
308
+ emit_file = f"{output_dir}/emissions_{n_states}.txt"
309
+ if os.path.exists(emit_file):
310
+ emit = pd.read_csv(emit_file, sep="\t", index_col=0)
311
+ print(f" Emission matrix: {emit.shape}")
312
+
313
+ return {"n_states": n_states, "output_dir": output_dir}
314
+
315
+
316
+ def annotate_chromatin_states(segments_bed, state_labels=None):
317
+ """
318
+ ChromHMM セグメンテーション結果のゲノム注釈。
319
+
320
+ Parameters:
321
+ segments_bed: ChromHMM 出力の *_segments.bed
322
+ state_labels: 状態番号→機能ラベルのマッピング辞書
323
+ """
324
+ default_labels = {
325
+ "E1": "Active TSS", "E2": "Flanking Active TSS",
326
+ "E3": "Transcription at gene 5'/3'", "E4": "Strong Transcription",
327
+ "E5": "Weak Transcription", "E6": "Genic Enhancers",
328
+ "E7": "Enhancers", "E8": "ZNF genes & Repeats",
329
+ "E9": "Heterochromatin", "E10": "Bivalent/Poised TSS",
330
+ "E11": "Flanking Bivalent TSS/Enh", "E12": "Bivalent Enhancer",
331
+ "E13": "Repressed PolyComb", "E14": "Weak Repressed PolyComb",
332
+ "E15": "Quiescent/Low",
333
+ }
334
+ labels = state_labels or default_labels
335
+
336
+ segments = pd.read_csv(segments_bed, sep="\t",
337
+ names=["chr", "start", "end", "state"])
338
+ segments["width"] = segments["end"] - segments["start"]
339
+ segments["label"] = segments["state"].map(labels)
340
+
341
+ # ゲノムカバレッジ統計
342
+ total_bp = segments["width"].sum()
343
+ state_coverage = segments.groupby("label")["width"].sum() / total_bp
344
+ print(" Chromatin state genome coverage:")
345
+ for label, pct in state_coverage.sort_values(ascending=False).items():
346
+ print(f" {label}: {pct:.1%}")
347
+
348
+ return segments, state_coverage
349
+ ```
350
+
351
+ ## 5. Hi-C 3D ゲノム構造解析
352
+
353
+ ```python
354
+ import numpy as np
355
+ import pandas as pd
356
+
357
+
358
+ def hic_contact_matrix_analysis(cool_file, resolution=10000,
359
+ chromosome="chr1"):
360
+ """
361
+ Hi-C 接触マップ解析 (.cool/.mcool 形式)。
362
+
363
+ 1. ICE 正規化
364
+ 2. A/B コンパートメント同定 (PCA)
365
+ 3. TAD 呼び出し (Insulation Score)
366
+ """
367
+ import cooler
368
+
369
+ # クールファイル読み込み
370
+ clr = cooler.Cooler(f"{cool_file}::resolutions/{resolution}")
371
+ matrix = clr.matrix(balance=True).fetch(chromosome)
372
+
373
+ print(f" Contact matrix shape: {matrix.shape}")
374
+ print(f" Resolution: {resolution:,} bp")
375
+ print(f" Non-zero entries: {np.count_nonzero(~np.isnan(matrix)):,}")
376
+
377
+ return matrix
378
+
379
+
380
+ def call_tads_insulation_score(matrix, resolution=10000, window_size=500000):
381
+ """
382
+ Insulation Score 法による TAD (Topologically Associating Domain) 呼び出し。
383
+
384
+ Parameters:
385
+ window_size: Insulation window サイズ (bp)
386
+ """
387
+ window_bins = window_size // resolution
388
+
389
+ n = matrix.shape[0]
390
+ insulation = np.zeros(n)
391
+
392
+ for i in range(window_bins, n - window_bins):
393
+ submat = matrix[i - window_bins:i, i:i + window_bins]
394
+ insulation[i] = np.nanmean(submat)
395
+
396
+ # log2 正規化
397
+ mean_val = np.nanmean(insulation[insulation > 0])
398
+ log_insulation = np.log2(insulation / mean_val + 1e-10)
399
+
400
+ # TAD 境界 = Insulation Score の極小値
401
+ from scipy.signal import argrelextrema
402
+ minima = argrelextrema(log_insulation, np.less, order=5)[0]
403
+
404
+ tad_boundaries = minima * resolution
405
+ n_tads = len(tad_boundaries) - 1
406
+
407
+ print(f" Found {len(tad_boundaries)} TAD boundaries")
408
+ print(f" Estimated {n_tads} TADs")
409
+ print(f" Mean TAD size: {np.diff(tad_boundaries).mean() / 1e6:.2f} Mb")
410
+
411
+ return log_insulation, tad_boundaries
412
+
413
+
414
+ def ab_compartment_analysis(matrix, resolution=100000):
415
+ """
416
+ Hi-C データからの A/B コンパートメント同定。
417
+
418
+ A コンパートメント: euchromatin, 活性, 遺伝子リッチ
419
+ B コンパートメント: heterochromatin, 不活性, 遺伝子プア
420
+ """
421
+ from sklearn.decomposition import PCA
422
+
423
+ # O/E (Observed/Expected) マトリクス
424
+ matrix_clean = np.nan_to_num(matrix, nan=0.0)
425
+ expected = np.zeros_like(matrix_clean)
426
+ for d in range(matrix_clean.shape[0]):
427
+ diag_vals = np.diag(matrix_clean, d)
428
+ mean_val = np.mean(diag_vals) if len(diag_vals) > 0 else 0
429
+ np.fill_diagonal(expected[d:, :], mean_val)
430
+ np.fill_diagonal(expected[:, d:], mean_val)
431
+
432
+ oe_matrix = matrix_clean / (expected + 1e-10)
433
+
434
+ # 相関マトリクス → PCA
435
+ corr_matrix = np.corrcoef(oe_matrix)
436
+ corr_matrix = np.nan_to_num(corr_matrix)
437
+
438
+ pca = PCA(n_components=2)
439
+ components = pca.fit_transform(corr_matrix)
440
+ pc1 = components[:, 0]
441
+
442
+ # A/B 分類 (PC1 正 = A, 負 = B)
443
+ compartment = np.where(pc1 > 0, "A", "B")
444
+ a_frac = np.mean(compartment == "A")
445
+
446
+ print(f" A compartment: {a_frac:.1%}")
447
+ print(f" B compartment: {1 - a_frac:.1%}")
448
+ print(f" PC1 variance explained: {pca.explained_variance_ratio_[0]:.1%}")
449
+
450
+ return pc1, compartment
451
+ ```
452
+
453
+ ## 6. 転写因子モチーフ濃縮解析
454
+
455
+ ```python
456
+ import pandas as pd
457
+ import numpy as np
458
+ from scipy.stats import fisher_exact
459
+
460
+
461
+ def motif_enrichment_analysis(peak_sequences, background_sequences,
462
+ jaspar_db="JASPAR2024_CORE_vertebrates",
463
+ pvalue_cutoff=0.01):
464
+ """
465
+ ピーク領域における転写因子結合モチーフの濃縮解析。
466
+
467
+ Parameters:
468
+ peak_sequences: FASTA ファイル (ピーク中心 ±250 bp)
469
+ background_sequences: ランダムゲノム領域 FASTA
470
+ jaspar_db: JASPAR データベースバージョン
471
+ """
472
+ from Bio import motifs
473
+
474
+ # JASPAR PWM スキャン (概念的コード)
475
+ results = []
476
+
477
+ # Homer / MEME-ChIP 呼び出し (実運用)
478
+ print(f" Scanning {jaspar_db} motifs against peak sequences...")
479
+ print(" (Using FIMO from MEME Suite for motif scanning)")
480
+
481
+ # Fisher exact test による濃縮
482
+ # peak_hits / peak_total vs bg_hits / bg_total
483
+ # for each motif in JASPAR database
484
+
485
+ return results
486
+
487
+
488
+ def differential_binding_analysis(sample_sheet, peaks_dir,
489
+ contrast=("Treatment", "Control"),
490
+ fdr_cutoff=0.05, fold_change_cutoff=2):
491
+ """
492
+ DiffBind による差次結合解析。
493
+
494
+ Parameters:
495
+ sample_sheet: DiffBind サンプルシート CSV
496
+ contrast: (treatment, control) 比較群
497
+ fdr_cutoff: FDR 閾値
498
+ fold_change_cutoff: log2FC 閾値
499
+ """
500
+ # R/rpy2 経由で DiffBind を呼び出し
501
+ import subprocess
502
+ r_script = f"""
503
+ library(DiffBind)
504
+ samples <- read.csv("{sample_sheet}")
505
+ dba <- dba(sampleSheet=samples)
506
+ dba <- dba.count(dba)
507
+ dba <- dba.contrast(dba, categories=DBA_CONDITION)
508
+ dba <- dba.analyze(dba)
509
+ db_sites <- dba.report(dba, th={fdr_cutoff}, fold={np.log2(fold_change_cutoff)})
510
+ write.csv(as.data.frame(db_sites), "results/diffbind_results.csv")
511
+ """
512
+
513
+ print(f" Running DiffBind: {contrast[0]} vs {contrast[1]}")
514
+ print(f" FDR cutoff: {fdr_cutoff}, log2FC cutoff: ±{np.log2(fold_change_cutoff):.1f}")
515
+
516
+ return r_script
517
+ ```
518
+
519
+ ## References
520
+
521
+ ### Output Files
522
+
523
+ | ファイル | 形式 |
524
+ |---|---|
525
+ | `results/chipseq/{name}_peaks.narrowPeak` | BED/narrowPeak |
526
+ | `results/chipseq/{name}_peaks.broadPeak` | BED/broadPeak |
527
+ | `results/atacseq/fragment_size_dist.csv` | CSV |
528
+ | `results/methylation/dmr_results.csv` | CSV |
529
+ | `results/chromhmm/emissions_{n}.txt` | TSV |
530
+ | `results/hic/tad_boundaries.bed` | BED |
531
+ | `results/hic/compartments.csv` | CSV |
532
+ | `results/diffbind_results.csv` | CSV |
533
+ | `figures/chromatin_state_heatmap.png` | PNG |
534
+ | `figures/hic_contact_map.png` | PNG |
535
+
536
+ ### 利用可能ツール
537
+
538
+ > [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
539
+
540
+ | カテゴリ | 主要ツール | 用途 |
541
+ |---|---|---|
542
+ | ChIP-Atlas | `ChIPAtlas_enrichment_analysis` | TF/ヒストン修飾エンリッチメント解析 |
543
+ | ChIP-Atlas | `ChIPAtlas_get_experiments` | 実験メタデータ取得 (43 万+実験) |
544
+ | ChIP-Atlas | `ChIPAtlas_get_peak_data` | ピークコールデータ取得 |
545
+ | ChIP-Atlas | `ChIPAtlas_search_datasets` | データセット検索 (抗原/細胞種) |
546
+ | 4DN | `FourDN_search_data` | Hi-C/ChIA-PET 3D ゲノムデータ検索 |
547
+ | JASPAR | `jaspar_search_matrices` | 転写因子結合モチーフ (PWM) 検索 |
548
+ | JASPAR | `jaspar_get_matrix` | PWM (Position Weight Matrix) 取得 |
549
+ | JASPAR | `jaspar_list_collections` | JASPAR コレクション一覧 |
550
+ | SCREEN | `SCREEN_get_regulatory_elements` | cCRE (候補シス調節エレメント) 取得 |
551
+ | ENCODE | `ENCODE_search_experiments` | ENCODE ChIP-seq/ATAC-seq 実験検索 |
552
+ | ENCODE | `ENCODE_get_experiment` | ENCODE 実験詳細取得 |
553
+ | ENCODE | `ENCODE_list_files` | ENCODE ファイル一覧 |
554
+
555
+ ### 参照スキル
556
+
557
+ | スキル | 関連 |
558
+ |---|---|
559
+ | `scientific-single-cell-genomics` | scATAC-seq 連携 |
560
+ | `scientific-sequence-analysis` | ゲノム配列操作 |
561
+ | `scientific-bioinformatics` | BAM/VCF 処理 |
562
+ | `scientific-population-genetics` | eQTL・調節バリアント |
563
+ | `scientific-gene-expression-transcriptomics` | 発現-エピゲノム統合 |
564
+
565
+ ### 依存パッケージ
566
+
567
+ `macs3`, `cooler`, `pybedtools`, `deeptools`, `scikit-learn`, `scipy`, `pandas`, `numpy`, `biopython`