@nahisaho/satori 0.9.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +188 -39
- package/package.json +1 -1
- package/src/.github/skills/scientific-clinical-trials-analytics/SKILL.md +340 -0
- package/src/.github/skills/scientific-computational-materials/SKILL.md +353 -0
- package/src/.github/skills/scientific-environmental-ecology/SKILL.md +295 -0
- package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +332 -0
- package/src/.github/skills/scientific-epigenomics-chromatin/SKILL.md +567 -0
- package/src/.github/skills/scientific-gene-expression-transcriptomics/SKILL.md +330 -0
- package/src/.github/skills/scientific-immunoinformatics/SKILL.md +341 -0
- package/src/.github/skills/scientific-infectious-disease/SKILL.md +342 -0
- package/src/.github/skills/scientific-lab-data-management/SKILL.md +334 -0
- package/src/.github/skills/scientific-microbiome-metagenomics/SKILL.md +349 -0
- package/src/.github/skills/scientific-neuroscience-electrophysiology/SKILL.md +400 -0
- package/src/.github/skills/scientific-pharmacogenomics/SKILL.md +342 -0
- package/src/.github/skills/scientific-population-genetics/SKILL.md +336 -0
- package/src/.github/skills/scientific-proteomics-mass-spectrometry/SKILL.md +401 -0
- package/src/.github/skills/scientific-regulatory-science/SKILL.md +256 -0
- package/src/.github/skills/scientific-scientific-schematics/SKILL.md +336 -0
- package/src/.github/skills/scientific-single-cell-genomics/SKILL.md +361 -0
- package/src/.github/skills/scientific-spatial-transcriptomics/SKILL.md +281 -0
- package/src/.github/skills/scientific-systems-biology/SKILL.md +310 -0
- package/src/.github/skills/scientific-text-mining-nlp/SKILL.md +358 -0
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-infectious-disease
|
|
3
|
+
description: |
|
|
4
|
+
感染症ゲノミクス・疫学スキル。病原体ゲノム解析(SNP/系統樹)・
|
|
5
|
+
AMR(薬剤耐性)遺伝子検出・分子疫学(MLST/cgMLST)・
|
|
6
|
+
アウトブレイク調査トレーシング・疫学的 SIR/SEIR コンパートメントモデル・
|
|
7
|
+
伝播ネットワーク推定パイプライン。
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific Infectious Disease Genomics
|
|
11
|
+
|
|
12
|
+
病原体ゲノミクスと感染症疫学の統合解析パイプラインを提供する。
|
|
13
|
+
病原体配列タイピング、系統解析、薬剤耐性遺伝子検出、
|
|
14
|
+
アウトブレイク伝播推定、数理疫学モデルを体系的に扱う。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- 病原体の全ゲノムシーケンスデータの解析が必要なとき
|
|
19
|
+
- 薬剤耐性(AMR)遺伝子を検出・分類するとき
|
|
20
|
+
- 分子疫学タイピング(MLST, cgMLST, SNP)を行うとき
|
|
21
|
+
- アウトブレイクの伝播経路を推定するとき
|
|
22
|
+
- SIR / SEIR 等のコンパートメントモデルで感染拡大をシミュレーションするとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. 病原体ゲノム前処理
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import numpy as np
|
|
32
|
+
import pandas as pd
|
|
33
|
+
|
|
34
|
+
def pathogen_qc_pipeline(fastq_r1, fastq_r2, reference_genome,
|
|
35
|
+
min_depth=30, min_coverage=0.95):
|
|
36
|
+
"""
|
|
37
|
+
病原体 WGS 前処理パイプライン。
|
|
38
|
+
|
|
39
|
+
手順:
|
|
40
|
+
1. Fastp — read QC + adapter trimming
|
|
41
|
+
2. BWA-MEM2 — リファレンスマッピング
|
|
42
|
+
3. Samtools / Picard — dupmark + sort
|
|
43
|
+
4. FreeBayes / GATK — variant calling
|
|
44
|
+
5. カバレッジ / 深度 QC
|
|
45
|
+
|
|
46
|
+
品質基準:
|
|
47
|
+
- mean_depth ≥ min_depth (既定: 30x)
|
|
48
|
+
- genome_coverage ≥ min_coverage (既定: 95%)
|
|
49
|
+
"""
|
|
50
|
+
import subprocess
|
|
51
|
+
|
|
52
|
+
cmds = [
|
|
53
|
+
# QC + trimming
|
|
54
|
+
f"fastp -i {fastq_r1} -I {fastq_r2} -o trim_R1.fq.gz -O trim_R2.fq.gz "
|
|
55
|
+
f"--json qc_report.json",
|
|
56
|
+
# Mapping
|
|
57
|
+
f"bwa-mem2 mem -t 8 {reference_genome} trim_R1.fq.gz trim_R2.fq.gz | "
|
|
58
|
+
f"samtools sort -@ 4 -o aligned.bam",
|
|
59
|
+
# Mark duplicates
|
|
60
|
+
f"samtools markdup aligned.bam dedup.bam",
|
|
61
|
+
f"samtools index dedup.bam",
|
|
62
|
+
# Variant calling
|
|
63
|
+
f"freebayes -f {reference_genome} dedup.bam > variants.vcf",
|
|
64
|
+
# Coverage stats
|
|
65
|
+
f"samtools depth -a dedup.bam | awk '{{sum+=$3; n++}} END {{print sum/n}}'"
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
for cmd in cmds:
|
|
69
|
+
subprocess.run(cmd, shell=True, check=True)
|
|
70
|
+
|
|
71
|
+
print(f" Pipeline complete: variants.vcf generated")
|
|
72
|
+
return "variants.vcf"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## 2. AMR 遺伝子検出
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
def detect_amr_genes(assembly_fasta, database="resfinder"):
|
|
79
|
+
"""
|
|
80
|
+
薬剤耐性(AMR)遺伝子の検出。
|
|
81
|
+
|
|
82
|
+
データベース:
|
|
83
|
+
- ResFinder: 後天性耐性遺伝子
|
|
84
|
+
- CARD (RGI): 包括的 AMR データベース
|
|
85
|
+
- AMRFinderPlus: NCBI 統合 AMR 検出
|
|
86
|
+
|
|
87
|
+
結果カテゴリ:
|
|
88
|
+
- 耐性遺伝子(acquired resistance genes)
|
|
89
|
+
- 点変異(point mutations)
|
|
90
|
+
- 耐性表現型予測
|
|
91
|
+
"""
|
|
92
|
+
import subprocess
|
|
93
|
+
import json
|
|
94
|
+
|
|
95
|
+
if database == "resfinder":
|
|
96
|
+
cmd = (f"python -m resfinder -ifa {assembly_fasta} "
|
|
97
|
+
f"--acquired --point -o resfinder_results/")
|
|
98
|
+
subprocess.run(cmd, shell=True, check=True)
|
|
99
|
+
|
|
100
|
+
with open("resfinder_results/ResFinder_results_tab.txt") as f:
|
|
101
|
+
lines = f.readlines()
|
|
102
|
+
results = parse_resfinder_output(lines)
|
|
103
|
+
|
|
104
|
+
elif database == "card":
|
|
105
|
+
cmd = f"rgi main -i {assembly_fasta} -o rgi_results -t contig -a BLAST"
|
|
106
|
+
subprocess.run(cmd, shell=True, check=True)
|
|
107
|
+
results = pd.read_csv("rgi_results.txt", sep="\t")
|
|
108
|
+
|
|
109
|
+
n_genes = len(results) if isinstance(results, list) else len(results)
|
|
110
|
+
print(f" AMR: {n_genes} resistance genes detected ({database})")
|
|
111
|
+
return results
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def parse_resfinder_output(lines):
|
|
115
|
+
"""ResFinder 出力をパースする。"""
|
|
116
|
+
results = []
|
|
117
|
+
for line in lines[1:]:
|
|
118
|
+
fields = line.strip().split("\t")
|
|
119
|
+
if len(fields) >= 6:
|
|
120
|
+
results.append({
|
|
121
|
+
"gene": fields[0],
|
|
122
|
+
"identity": float(fields[1]),
|
|
123
|
+
"coverage": float(fields[2]),
|
|
124
|
+
"phenotype": fields[5] if len(fields) > 5 else "Unknown",
|
|
125
|
+
})
|
|
126
|
+
return results
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## 3. 分子疫学タイピング
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
def molecular_typing(assembly_fasta, organism, scheme="mlst"):
|
|
133
|
+
"""
|
|
134
|
+
分子疫学タイピング。
|
|
135
|
+
|
|
136
|
+
scheme:
|
|
137
|
+
- "mlst": Multi-Locus Sequence Typing(7 遺伝子座)
|
|
138
|
+
- "cgmlst": core genome MLST(数百〜数千遺伝子座)
|
|
139
|
+
- "wgmlst": whole genome MLST
|
|
140
|
+
|
|
141
|
+
MLST:
|
|
142
|
+
各ハウスキーピング遺伝子座のアリル番号の組み合わせで
|
|
143
|
+
Sequence Type(ST)を決定する。
|
|
144
|
+
"""
|
|
145
|
+
import subprocess
|
|
146
|
+
|
|
147
|
+
if scheme == "mlst":
|
|
148
|
+
cmd = f"mlst {assembly_fasta} --scheme {organism}"
|
|
149
|
+
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
|
|
150
|
+
fields = result.stdout.strip().split("\t")
|
|
151
|
+
typing = {
|
|
152
|
+
"file": fields[0],
|
|
153
|
+
"scheme": fields[1],
|
|
154
|
+
"ST": fields[2],
|
|
155
|
+
"alleles": fields[3:],
|
|
156
|
+
}
|
|
157
|
+
elif scheme == "cgmlst":
|
|
158
|
+
cmd = f"chewbbaca AlleleCall -i {assembly_fasta} -g schema/ -o cgmlst_results/"
|
|
159
|
+
subprocess.run(cmd, shell=True, check=True)
|
|
160
|
+
typing = {"scheme": "cgMLST", "results_dir": "cgmlst_results/"}
|
|
161
|
+
|
|
162
|
+
print(f" Typing: ST={typing.get('ST', 'N/A')} ({scheme})")
|
|
163
|
+
return typing
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## 4. 系統解析・伝播推定
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
def phylogenetic_analysis(alignment_fasta, method="iqtree", model="GTR+G"):
|
|
170
|
+
"""
|
|
171
|
+
病原体系統解析パイプライン。
|
|
172
|
+
|
|
173
|
+
method:
|
|
174
|
+
- "iqtree": IQ-TREE 2 — 最尤法(ModelFinder 自動モデル選択)
|
|
175
|
+
- "raxml": RAxML-NG — 最尤法
|
|
176
|
+
- "beast": BEAST 2 — ベイズ系統年代学
|
|
177
|
+
|
|
178
|
+
アウトブレイク推定:
|
|
179
|
+
- SNP 距離行列 → 最小スパニングツリー
|
|
180
|
+
- tMRCA (最近共通祖先時間) 推定
|
|
181
|
+
"""
|
|
182
|
+
import subprocess
|
|
183
|
+
from Bio import Phylo
|
|
184
|
+
|
|
185
|
+
if method == "iqtree":
|
|
186
|
+
cmd = (f"iqtree2 -s {alignment_fasta} -m {model} "
|
|
187
|
+
f"-bb 1000 -alrt 1000 -nt AUTO")
|
|
188
|
+
subprocess.run(cmd, shell=True, check=True)
|
|
189
|
+
tree = Phylo.read(f"{alignment_fasta}.treefile", "newick")
|
|
190
|
+
|
|
191
|
+
return tree
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def transmission_network(snp_matrix, max_snp_distance=10):
|
|
195
|
+
"""
|
|
196
|
+
SNP 距離ベースの伝播ネットワーク推定。
|
|
197
|
+
|
|
198
|
+
基準:
|
|
199
|
+
- 直接伝播: SNP 距離 ≤ max_snp_distance
|
|
200
|
+
- 近縁クラスタ: SNP 距離 ≤ 2 × max_snp_distance
|
|
201
|
+
|
|
202
|
+
アルゴリズム:
|
|
203
|
+
1. ペアワイズ SNP 距離計算
|
|
204
|
+
2. 閾値以下のペアをエッジとして接続
|
|
205
|
+
3. 最小スパニングツリーで伝播方向推定
|
|
206
|
+
"""
|
|
207
|
+
import networkx as nx
|
|
208
|
+
|
|
209
|
+
G = nx.Graph()
|
|
210
|
+
samples = snp_matrix.index.tolist()
|
|
211
|
+
G.add_nodes_from(samples)
|
|
212
|
+
|
|
213
|
+
for i, s1 in enumerate(samples):
|
|
214
|
+
for j, s2 in enumerate(samples):
|
|
215
|
+
if i < j:
|
|
216
|
+
dist = snp_matrix.iloc[i, j]
|
|
217
|
+
if dist <= max_snp_distance:
|
|
218
|
+
G.add_edge(s1, s2, weight=dist, snp_distance=dist)
|
|
219
|
+
|
|
220
|
+
mst = nx.minimum_spanning_tree(G)
|
|
221
|
+
clusters = list(nx.connected_components(G))
|
|
222
|
+
|
|
223
|
+
print(f" Transmission: {G.number_of_edges()} links, "
|
|
224
|
+
f"{len(clusters)} clusters")
|
|
225
|
+
return G, mst, clusters
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## 5. SIR / SEIR コンパートメントモデル
|
|
229
|
+
|
|
230
|
+
```python
|
|
231
|
+
from scipy.integrate import odeint
|
|
232
|
+
|
|
233
|
+
def sir_model(y, t, beta, gamma, N):
|
|
234
|
+
"""
|
|
235
|
+
SIR コンパートメントモデル。
|
|
236
|
+
|
|
237
|
+
dS/dt = -β · S · I / N
|
|
238
|
+
dI/dt = β · S · I / N - γ · I
|
|
239
|
+
dR/dt = γ · I
|
|
240
|
+
|
|
241
|
+
R₀ = β / γ (基本再生産数)
|
|
242
|
+
"""
|
|
243
|
+
S, I, R = y
|
|
244
|
+
dSdt = -beta * S * I / N
|
|
245
|
+
dIdt = beta * S * I / N - gamma * I
|
|
246
|
+
dRdt = gamma * I
|
|
247
|
+
return [dSdt, dIdt, dRdt]
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def seir_model(y, t, beta, sigma, gamma, N):
|
|
251
|
+
"""
|
|
252
|
+
SEIR コンパートメントモデル(潜伏期あり)。
|
|
253
|
+
|
|
254
|
+
dS/dt = -β · S · I / N
|
|
255
|
+
dE/dt = β · S · I / N - σ · E
|
|
256
|
+
dI/dt = σ · E - γ · I
|
|
257
|
+
dR/dt = γ · I
|
|
258
|
+
|
|
259
|
+
σ: 潜伏期の逆数 (1/incubation_period)
|
|
260
|
+
"""
|
|
261
|
+
S, E, I, R = y
|
|
262
|
+
dSdt = -beta * S * I / N
|
|
263
|
+
dEdt = beta * S * I / N - sigma * E
|
|
264
|
+
dIdt = sigma * E - gamma * I
|
|
265
|
+
dRdt = gamma * I
|
|
266
|
+
return [dSdt, dEdt, dIdt, dRdt]
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def run_epidemic_simulation(model="SIR", N=1e6, I0=10, R0=2.5,
|
|
270
|
+
gamma=1/10, sigma=1/5, days=180):
|
|
271
|
+
"""
|
|
272
|
+
感染症拡大シミュレーション。
|
|
273
|
+
|
|
274
|
+
Parameters:
|
|
275
|
+
R0: 基本再生産数
|
|
276
|
+
gamma: 回復率 (1/感染期間)
|
|
277
|
+
sigma: 発症率 (1/潜伏期間、SEIR のみ)
|
|
278
|
+
days: シミュレーション日数
|
|
279
|
+
"""
|
|
280
|
+
beta = R0 * gamma
|
|
281
|
+
t = np.linspace(0, days, days * 10)
|
|
282
|
+
|
|
283
|
+
if model == "SIR":
|
|
284
|
+
y0 = [N - I0, I0, 0]
|
|
285
|
+
sol = odeint(sir_model, y0, t, args=(beta, gamma, N))
|
|
286
|
+
df = pd.DataFrame(sol, columns=["S", "I", "R"])
|
|
287
|
+
elif model == "SEIR":
|
|
288
|
+
y0 = [N - I0, 0, I0, 0]
|
|
289
|
+
sol = odeint(seir_model, y0, t, args=(beta, sigma, gamma, N))
|
|
290
|
+
df = pd.DataFrame(sol, columns=["S", "E", "I", "R"])
|
|
291
|
+
|
|
292
|
+
df["t"] = t
|
|
293
|
+
peak_I = df["I"].max()
|
|
294
|
+
peak_day = df.loc[df["I"].idxmax(), "t"]
|
|
295
|
+
|
|
296
|
+
print(f" {model}: R₀={R0:.1f}, peak infection={peak_I:.0f} at day {peak_day:.0f}")
|
|
297
|
+
return df
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## References
|
|
301
|
+
|
|
302
|
+
### Output Files
|
|
303
|
+
|
|
304
|
+
| ファイル | 形式 |
|
|
305
|
+
|---|---|
|
|
306
|
+
| `results/amr_genes.csv` | CSV |
|
|
307
|
+
| `results/mlst_typing.json` | JSON |
|
|
308
|
+
| `results/snp_matrix.csv` | CSV |
|
|
309
|
+
| `results/transmission_network.json` | JSON |
|
|
310
|
+
| `results/epidemic_simulation.csv` | CSV |
|
|
311
|
+
| `figures/phylogenetic_tree.png` | PNG |
|
|
312
|
+
| `figures/transmission_network.png` | PNG |
|
|
313
|
+
| `figures/epidemic_curves.png` | PNG |
|
|
314
|
+
|
|
315
|
+
### 利用可能ツール
|
|
316
|
+
|
|
317
|
+
> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
|
|
318
|
+
|
|
319
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
320
|
+
|---|---|---|
|
|
321
|
+
| EUHealthInfo | `euhealthinfo_search_infectious_diseases` | 感染症サーベイランスデータ |
|
|
322
|
+
| EUHealthInfo | `euhealthinfo_search_surveillance` | 疫学サーベイランス |
|
|
323
|
+
| CDC | `cdc_data_search_datasets` | CDC データセット検索 |
|
|
324
|
+
| CDC | `cdc_data_get_dataset` | CDC データ取得 |
|
|
325
|
+
| NCBI | `BLAST_nucleotide_search` | 病原体配列同定 |
|
|
326
|
+
| NCBI | `NCBI_get_sequence` | ゲノム配列取得 |
|
|
327
|
+
| PubMed | `PubMed_search_articles` | 感染症文献検索 |
|
|
328
|
+
| ClinicalTrials | `search_clinical_trials` | 感染症治療臨床試験 |
|
|
329
|
+
|
|
330
|
+
### 参照スキル
|
|
331
|
+
|
|
332
|
+
| スキル | 連携内容 |
|
|
333
|
+
|---|---|
|
|
334
|
+
| [scientific-sequence-analysis](../scientific-sequence-analysis/SKILL.md) | 配列アライメント・BLAST |
|
|
335
|
+
| [scientific-bioinformatics](../scientific-bioinformatics/SKILL.md) | ゲノムアノテーション |
|
|
336
|
+
| [scientific-network-analysis](../scientific-network-analysis/SKILL.md) | 伝播ネットワーク可視化 |
|
|
337
|
+
| [scientific-survival-clinical](../scientific-survival-clinical/SKILL.md) | 感染症アウトカム解析 |
|
|
338
|
+
| [scientific-bayesian-statistics](../scientific-bayesian-statistics/SKILL.md) | ベイズ系統年代学 |
|
|
339
|
+
|
|
340
|
+
#### 依存パッケージ
|
|
341
|
+
|
|
342
|
+
- biopython, ete3, scipy, networkx, subprocess (fastp, bwa-mem2, freebayes, iqtree2)
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-lab-data-management
|
|
3
|
+
description: |
|
|
4
|
+
ラボデータ管理スキル。Benchling (ELN/DNA 設計/レジストリ)、
|
|
5
|
+
DNAnexus (ゲノミクス PaaS)、LatchBio (ワークフロー)、
|
|
6
|
+
OMERO (バイオイメージング)、Protocols.io (プロトコル共有)
|
|
7
|
+
を統合したウェット・ドライラボデータ管理パイプライン。
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific Lab Data Management
|
|
11
|
+
|
|
12
|
+
ウェットラボ実験管理からゲノミクスデータ処理まで、
|
|
13
|
+
ラボデータの生成・記録・解析・共有を統合管理するパイプライン。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- 電子実験ノート (ELN) でプロトコル・結果を記録するとき
|
|
18
|
+
- DNA 配列設計・クローニング計画を管理するとき
|
|
19
|
+
- ゲノミクス大規模データを PaaS 上で解析するとき
|
|
20
|
+
- バイオイメージングデータを構造化管理するとき
|
|
21
|
+
- 実験プロトコルを共有・再現するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. Benchling ELN / DNA 設計
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import json
|
|
31
|
+
import requests
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class BenchlingClient:
|
|
35
|
+
"""
|
|
36
|
+
Benchling API クライアント。
|
|
37
|
+
|
|
38
|
+
Benchling 機能:
|
|
39
|
+
- ELN (Electronic Lab Notebook): 実験記録
|
|
40
|
+
- Molecular Biology: DNA 配列設計, プライマー設計, クローニング
|
|
41
|
+
- Registry: サンプル・試薬レジストリ
|
|
42
|
+
- Inventory: 在庫管理
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, api_key, tenant_url):
|
|
46
|
+
self.base_url = f"https://{tenant_url}/api/v2"
|
|
47
|
+
self.headers = {
|
|
48
|
+
"Authorization": f"Basic {api_key}",
|
|
49
|
+
"Content-Type": "application/json",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def create_dna_sequence(self, name, bases, folder_id,
|
|
53
|
+
annotations=None):
|
|
54
|
+
"""
|
|
55
|
+
DNA 配列の登録。
|
|
56
|
+
|
|
57
|
+
Parameters:
|
|
58
|
+
- name: 配列名
|
|
59
|
+
- bases: 塩基配列 (ATCG)
|
|
60
|
+
- folder_id: 保存先フォルダ
|
|
61
|
+
- annotations: アノテーション [{name, start, end, type, strand}]
|
|
62
|
+
"""
|
|
63
|
+
payload = {
|
|
64
|
+
"name": name,
|
|
65
|
+
"bases": bases,
|
|
66
|
+
"folderId": folder_id,
|
|
67
|
+
"isCircular": False,
|
|
68
|
+
"annotations": annotations or [],
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
print(f" Benchling DNA sequence: {name}")
|
|
72
|
+
print(f" Length: {len(bases)} bp")
|
|
73
|
+
if annotations:
|
|
74
|
+
print(f" Annotations: {len(annotations)}")
|
|
75
|
+
|
|
76
|
+
return payload
|
|
77
|
+
|
|
78
|
+
def search_registry(self, query, schema_id=None, page_size=50):
|
|
79
|
+
"""
|
|
80
|
+
Benchling Registry 検索。
|
|
81
|
+
|
|
82
|
+
レジストリエンティティ:
|
|
83
|
+
- プラスミド, 菌株, 抗体, 細胞株, 化合物
|
|
84
|
+
"""
|
|
85
|
+
params = {
|
|
86
|
+
"query": query,
|
|
87
|
+
"pageSize": page_size,
|
|
88
|
+
}
|
|
89
|
+
if schema_id:
|
|
90
|
+
params["schemaId"] = schema_id
|
|
91
|
+
|
|
92
|
+
print(f" Benchling registry search: '{query}'")
|
|
93
|
+
|
|
94
|
+
return params
|
|
95
|
+
|
|
96
|
+
def create_entry(self, name, folder_id, template_id=None):
|
|
97
|
+
"""
|
|
98
|
+
ELN エントリ (実験ノート) 作成。
|
|
99
|
+
"""
|
|
100
|
+
payload = {
|
|
101
|
+
"name": name,
|
|
102
|
+
"folderId": folder_id,
|
|
103
|
+
}
|
|
104
|
+
if template_id:
|
|
105
|
+
payload["entryTemplateId"] = template_id
|
|
106
|
+
|
|
107
|
+
print(f" Benchling ELN entry: {name}")
|
|
108
|
+
|
|
109
|
+
return payload
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## 2. DNAnexus ゲノミクス PaaS
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
import json
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class DNAnexusClient:
|
|
119
|
+
"""
|
|
120
|
+
DNAnexus Platform API クライアント。
|
|
121
|
+
|
|
122
|
+
DNAnexus 機能:
|
|
123
|
+
- データストレージ: FASTQ, BAM, VCF 等の大規模ファイル
|
|
124
|
+
- ワークフロー実行: WDL/CWL/Applet ベース
|
|
125
|
+
- コンプライアンス: HIPAA, GxP, FedRAMP
|
|
126
|
+
- コラボレーション: プロジェクト単位のアクセス管理
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
def __init__(self, token):
|
|
130
|
+
self.token = token
|
|
131
|
+
self.base_url = "https://api.dnanexus.com"
|
|
132
|
+
|
|
133
|
+
def upload_file(self, local_path, project_id, folder="/"):
|
|
134
|
+
"""
|
|
135
|
+
ファイルアップロード。
|
|
136
|
+
|
|
137
|
+
対応形式: FASTQ(.gz), BAM, CRAM, VCF, BED, etc.
|
|
138
|
+
"""
|
|
139
|
+
print(f" DNAnexus upload: {local_path}")
|
|
140
|
+
print(f" Project: {project_id}")
|
|
141
|
+
print(f" Destination: {folder}")
|
|
142
|
+
|
|
143
|
+
return {"local_path": local_path, "project_id": project_id}
|
|
144
|
+
|
|
145
|
+
def run_workflow(self, workflow_id, project_id, inputs):
|
|
146
|
+
"""
|
|
147
|
+
ワークフロー実行。
|
|
148
|
+
|
|
149
|
+
ワークフロー例:
|
|
150
|
+
- GATK Best Practices (germline/somatic)
|
|
151
|
+
- RNA-STAR alignment + featureCounts
|
|
152
|
+
- DeepVariant caller
|
|
153
|
+
- Structural variant calling
|
|
154
|
+
"""
|
|
155
|
+
print(f" DNAnexus workflow: {workflow_id}")
|
|
156
|
+
print(f" Project: {project_id}")
|
|
157
|
+
print(f" Inputs: {len(inputs)} parameters")
|
|
158
|
+
|
|
159
|
+
return {
|
|
160
|
+
"workflow_id": workflow_id,
|
|
161
|
+
"project_id": project_id,
|
|
162
|
+
"inputs": inputs,
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
def list_project_files(self, project_id, folder="/", name_glob=None):
|
|
166
|
+
"""
|
|
167
|
+
プロジェクト内ファイル一覧。
|
|
168
|
+
"""
|
|
169
|
+
params = {"folder": folder}
|
|
170
|
+
if name_glob:
|
|
171
|
+
params["name"] = {"glob": name_glob}
|
|
172
|
+
|
|
173
|
+
print(f" DNAnexus list: {project_id}{folder}")
|
|
174
|
+
|
|
175
|
+
return params
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## 3. OMERO バイオイメージング管理
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
import json
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class OMEROClient:
|
|
185
|
+
"""
|
|
186
|
+
OMERO (Open Microscopy Environment Remote Objects) クライアント。
|
|
187
|
+
|
|
188
|
+
OMERO 機能:
|
|
189
|
+
- 画像データ管理: 150+ 画像フォーマット (Bio-Formats)
|
|
190
|
+
- メタデータ: Key-Value, タグ, ROI
|
|
191
|
+
- 解析統合: ImageJ/Fiji, CellProfiler, napari
|
|
192
|
+
- アクセス制御: プロジェクト/グループ権限
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
def __init__(self, host, port=4064):
|
|
196
|
+
self.host = host
|
|
197
|
+
self.port = port
|
|
198
|
+
|
|
199
|
+
def import_images(self, file_paths, dataset_id):
|
|
200
|
+
"""
|
|
201
|
+
画像インポート。
|
|
202
|
+
|
|
203
|
+
対応フォーマット (Bio-Formats):
|
|
204
|
+
- OME-TIFF, ND2 (Nikon), CZI (Zeiss), LIF (Leica)
|
|
205
|
+
- VSI (Olympus), SVS (Aperio), DICOM
|
|
206
|
+
"""
|
|
207
|
+
print(f" OMERO import: {len(file_paths)} images → Dataset {dataset_id}")
|
|
208
|
+
|
|
209
|
+
return {"files": file_paths, "dataset_id": dataset_id}
|
|
210
|
+
|
|
211
|
+
def create_roi(self, image_id, shapes):
|
|
212
|
+
"""
|
|
213
|
+
ROI (Region of Interest) 作成。
|
|
214
|
+
|
|
215
|
+
Shape タイプ:
|
|
216
|
+
- Rectangle, Ellipse, Polygon
|
|
217
|
+
- Line, Polyline, Point
|
|
218
|
+
- Mask (binary mask)
|
|
219
|
+
"""
|
|
220
|
+
print(f" OMERO ROI: Image {image_id}, {len(shapes)} shapes")
|
|
221
|
+
|
|
222
|
+
return {"image_id": image_id, "shapes": shapes}
|
|
223
|
+
|
|
224
|
+
def query_images(self, project=None, dataset=None,
|
|
225
|
+
key_value_pairs=None):
|
|
226
|
+
"""
|
|
227
|
+
画像検索 (メタデータベース)。
|
|
228
|
+
|
|
229
|
+
フィルタ:
|
|
230
|
+
- プロジェクト/データセット階層
|
|
231
|
+
- Key-Value annotation
|
|
232
|
+
- タグ
|
|
233
|
+
- 取得日, 機器名
|
|
234
|
+
"""
|
|
235
|
+
print(f" OMERO query:")
|
|
236
|
+
if project:
|
|
237
|
+
print(f" Project: {project}")
|
|
238
|
+
if key_value_pairs:
|
|
239
|
+
print(f" Key-Value: {key_value_pairs}")
|
|
240
|
+
|
|
241
|
+
return {"project": project, "dataset": dataset}
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
## 4. Protocols.io プロトコル共有
|
|
245
|
+
|
|
246
|
+
```python
|
|
247
|
+
import json
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def create_protocol(title, description, steps, reagents=None,
|
|
251
|
+
doi_prefix="dx.doi.org/10.17504"):
|
|
252
|
+
"""
|
|
253
|
+
Protocols.io プロトコル作成。
|
|
254
|
+
|
|
255
|
+
Protocols.io:
|
|
256
|
+
- DOI 付与による引用可能なプロトコル
|
|
257
|
+
- バージョン管理
|
|
258
|
+
- フォーク・改変・派生
|
|
259
|
+
- JOVE, Nature Protocol Exchange 連携
|
|
260
|
+
"""
|
|
261
|
+
protocol = {
|
|
262
|
+
"title": title,
|
|
263
|
+
"description": description,
|
|
264
|
+
"steps": [],
|
|
265
|
+
"reagents": reagents or [],
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
for i, step in enumerate(steps, 1):
|
|
269
|
+
protocol["steps"].append({
|
|
270
|
+
"step_number": i,
|
|
271
|
+
"description": step.get("description", ""),
|
|
272
|
+
"duration": step.get("duration"),
|
|
273
|
+
"temperature": step.get("temperature"),
|
|
274
|
+
"critical_step": step.get("critical", False),
|
|
275
|
+
"expected_result": step.get("expected_result"),
|
|
276
|
+
})
|
|
277
|
+
|
|
278
|
+
print(f" Protocol: {title}")
|
|
279
|
+
print(f" Steps: {len(steps)}")
|
|
280
|
+
if reagents:
|
|
281
|
+
print(f" Reagents: {len(reagents)}")
|
|
282
|
+
print(f" DOI: {doi_prefix}/protocols.io...")
|
|
283
|
+
|
|
284
|
+
return protocol
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def fork_protocol(original_protocol_id, modifications):
|
|
288
|
+
"""
|
|
289
|
+
既存プロトコルのフォークと改変。
|
|
290
|
+
|
|
291
|
+
- 変更点の追跡
|
|
292
|
+
- 元プロトコルへのリンク
|
|
293
|
+
- バージョン番号の自動付与
|
|
294
|
+
"""
|
|
295
|
+
print(f" Forking protocol: {original_protocol_id}")
|
|
296
|
+
print(f" Modifications: {len(modifications)}")
|
|
297
|
+
|
|
298
|
+
return {
|
|
299
|
+
"forked_from": original_protocol_id,
|
|
300
|
+
"modifications": modifications,
|
|
301
|
+
}
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## References
|
|
305
|
+
|
|
306
|
+
### Output Files
|
|
307
|
+
|
|
308
|
+
| ファイル | 形式 |
|
|
309
|
+
|---|---|
|
|
310
|
+
| `results/benchling_sequences.json` | JSON |
|
|
311
|
+
| `results/benchling_registry.json` | JSON |
|
|
312
|
+
| `results/dnanexus_workflow_output.json` | JSON |
|
|
313
|
+
| `results/omero_image_metadata.json` | JSON |
|
|
314
|
+
| `results/protocol.json` | JSON |
|
|
315
|
+
|
|
316
|
+
### 利用可能ツール
|
|
317
|
+
|
|
318
|
+
> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
|
|
319
|
+
|
|
320
|
+
なし — 各プラットフォームの REST API を直接利用。
|
|
321
|
+
|
|
322
|
+
### 参照スキル
|
|
323
|
+
|
|
324
|
+
| スキル | 関連 |
|
|
325
|
+
|---|---|
|
|
326
|
+
| `scientific-bioinformatics` | ゲノミクスデータ解析 |
|
|
327
|
+
| `scientific-imaging-analysis` | 顕微鏡画像解析 |
|
|
328
|
+
| `scientific-gene-expression-transcriptomics` | RNA-seq データ管理 |
|
|
329
|
+
| `scientific-single-cell-genomics` | scRNA-seq データ管理 |
|
|
330
|
+
| `scientific-data-analysis` | データ前処理 |
|
|
331
|
+
|
|
332
|
+
### 依存パッケージ
|
|
333
|
+
|
|
334
|
+
`requests`, `json`, `pandas` (各プラットフォーム SDK: `benchling-sdk`, `dxpy`, `omero-py`)
|