@nahisaho/satori 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -43
- package/package.json +1 -1
- package/src/.github/skills/scientific-clinical-trials-analytics/SKILL.md +340 -0
- package/src/.github/skills/scientific-computational-materials/SKILL.md +353 -0
- package/src/.github/skills/scientific-epigenomics-chromatin/SKILL.md +567 -0
- package/src/.github/skills/scientific-gene-expression-transcriptomics/SKILL.md +330 -0
- package/src/.github/skills/scientific-lab-data-management/SKILL.md +334 -0
- package/src/.github/skills/scientific-neuroscience-electrophysiology/SKILL.md +400 -0
- package/src/.github/skills/scientific-pharmacogenomics/SKILL.md +342 -0
- package/src/.github/skills/scientific-proteomics-mass-spectrometry/SKILL.md +401 -0
- package/src/.github/skills/scientific-regulatory-science/SKILL.md +256 -0
- package/src/.github/skills/scientific-scientific-schematics/SKILL.md +336 -0
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-proteomics-mass-spectrometry
|
|
3
|
+
description: |
|
|
4
|
+
プロテオミクス・質量分析解析スキル。LC-MS/MS データ前処理、ペプチド同定 (PSM/FDR 制御)、
|
|
5
|
+
蛋白質定量 (LFQ/TMT/SILAC/iBAQ)、翻訳後修飾 (PTM) マッピング、
|
|
6
|
+
スペクトル類似度スコアリング (コサイン/修正コサイン)、分子ネットワーキング (GNPS)、
|
|
7
|
+
化合物アノテーション (HMDB/MassBank) を統合した質量分析パイプライン。
|
|
8
|
+
pyOpenMS / matchms ベースの包括的ワークフロー。
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Scientific Proteomics & Mass Spectrometry
|
|
12
|
+
|
|
13
|
+
LC-MS/MS ベースのプロテオミクス・メタボロミクス質量分析データを対象に、
|
|
14
|
+
スペクトル前処理→ペプチド/化合物同定→定量→差次的解析の
|
|
15
|
+
標準パイプラインを提供する。
|
|
16
|
+
|
|
17
|
+
## When to Use
|
|
18
|
+
|
|
19
|
+
- LC-MS/MS プロテオミクスデータのペプチド同定・蛋白質定量が必要なとき
|
|
20
|
+
- TMT/SILAC/LFQ による差次的蛋白質発現解析を行うとき
|
|
21
|
+
- 翻訳後修飾 (リン酸化, ユビキチン化, アセチル化) マッピングが必要なとき
|
|
22
|
+
- スペクトルライブラリ検索・分子ネットワーキングを行うとき
|
|
23
|
+
- 化合物同定 (HMDB/MassBank/GNPS) が必要なとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. MS データ前処理 (pyOpenMS)
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import numpy as np
|
|
33
|
+
import pandas as pd
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def ms_data_preprocessing(mzml_file, noise_threshold=1000,
|
|
37
|
+
peak_picking_method="centroid"):
|
|
38
|
+
"""
|
|
39
|
+
LC-MS/MS データ前処理パイプライン (pyOpenMS ベース)。
|
|
40
|
+
|
|
41
|
+
1. Raw → mzML 変換 (事前に msconvert)
|
|
42
|
+
2. ピーク検出 (セントロイド化)
|
|
43
|
+
3. ベースライン補正
|
|
44
|
+
4. ノイズ除去
|
|
45
|
+
5. RT アラインメント
|
|
46
|
+
"""
|
|
47
|
+
from pyopenms import MSExperiment, MzMLFile, PeakPickerHiRes
|
|
48
|
+
|
|
49
|
+
# mzML 読み込み
|
|
50
|
+
exp = MSExperiment()
|
|
51
|
+
MzMLFile().load(mzml_file, exp)
|
|
52
|
+
|
|
53
|
+
print(f" Loaded {exp.getNrSpectra()} spectra from {mzml_file}")
|
|
54
|
+
print(f" MS1 scans: {sum(1 for s in exp if s.getMSLevel() == 1)}")
|
|
55
|
+
print(f" MS2 scans: {sum(1 for s in exp if s.getMSLevel() == 2)}")
|
|
56
|
+
|
|
57
|
+
# ピークピッキング
|
|
58
|
+
if peak_picking_method == "centroid":
|
|
59
|
+
picker = PeakPickerHiRes()
|
|
60
|
+
exp_picked = MSExperiment()
|
|
61
|
+
picker.pickExperiment(exp, exp_picked)
|
|
62
|
+
print(f" After peak picking: {exp_picked.getNrSpectra()} spectra")
|
|
63
|
+
return exp_picked
|
|
64
|
+
|
|
65
|
+
return exp
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def feature_detection(exp, mass_error_ppm=10, noise_threshold=1000):
|
|
69
|
+
"""
|
|
70
|
+
LC-MS 特徴検出 — m/z × RT × 強度の 3D ピーク検出。
|
|
71
|
+
"""
|
|
72
|
+
from pyopenms import FeatureFinder, FeatureMap
|
|
73
|
+
|
|
74
|
+
ff = FeatureFinder()
|
|
75
|
+
features = FeatureMap()
|
|
76
|
+
|
|
77
|
+
ff_params = ff.getParameters()
|
|
78
|
+
ff_params.setValue("mass_trace:mz_tolerance", float(mass_error_ppm))
|
|
79
|
+
ff_params.setValue("noise_threshold_int", float(noise_threshold))
|
|
80
|
+
ff.setParameters(ff_params)
|
|
81
|
+
|
|
82
|
+
ff.run("centroided", exp, features, FeatureMap())
|
|
83
|
+
|
|
84
|
+
print(f" Detected {features.size()} features")
|
|
85
|
+
|
|
86
|
+
results = []
|
|
87
|
+
for f in features:
|
|
88
|
+
results.append({
|
|
89
|
+
"mz": f.getMZ(),
|
|
90
|
+
"rt": f.getRT(),
|
|
91
|
+
"intensity": f.getIntensity(),
|
|
92
|
+
"charge": f.getCharge(),
|
|
93
|
+
"quality": f.getOverallQuality(),
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
return pd.DataFrame(results)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## 2. ペプチド同定 (データベース検索)
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
import numpy as np
|
|
103
|
+
import pandas as pd
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def peptide_identification(mzml_file, fasta_db, enzyme="Trypsin",
|
|
107
|
+
missed_cleavages=2, precursor_mass_tol=10,
|
|
108
|
+
fragment_mass_tol=0.02, fdr_cutoff=0.01):
|
|
109
|
+
"""
|
|
110
|
+
MS/MS スペクトルからのペプチド同定パイプライン。
|
|
111
|
+
|
|
112
|
+
1. データベース検索 (X!Tandem / Comet / MSGF+)
|
|
113
|
+
2. PSM (Peptide-Spectrum Match) スコアリング
|
|
114
|
+
3. Target-Decoy FDR 制御
|
|
115
|
+
4. Protein inference (Occam's Razor)
|
|
116
|
+
"""
|
|
117
|
+
from pyopenms import (
|
|
118
|
+
IdXMLFile, ProteinIdentification,
|
|
119
|
+
PeptideIdentification
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# 検索パラメータ
|
|
123
|
+
search_params = {
|
|
124
|
+
"database": fasta_db,
|
|
125
|
+
"enzyme": enzyme,
|
|
126
|
+
"missed_cleavages": missed_cleavages,
|
|
127
|
+
"precursor_mass_tolerance": f"{precursor_mass_tol} ppm",
|
|
128
|
+
"fragment_mass_tolerance": f"{fragment_mass_tol} Da",
|
|
129
|
+
"fixed_modifications": ["Carbamidomethyl (C)"],
|
|
130
|
+
"variable_modifications": ["Oxidation (M)", "Acetyl (Protein N-term)"],
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
print(f" Database search parameters:")
|
|
134
|
+
for k, v in search_params.items():
|
|
135
|
+
print(f" {k}: {v}")
|
|
136
|
+
|
|
137
|
+
# FDR 制御
|
|
138
|
+
# Target-Decoy approach: concatenate reversed sequences
|
|
139
|
+
print(f" FDR cutoff: {fdr_cutoff} (1% at PSM level)")
|
|
140
|
+
print(f" Method: Target-Decoy competition (TDC)")
|
|
141
|
+
|
|
142
|
+
return search_params
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def protein_quantification(psm_results, method="LFQ",
|
|
146
|
+
min_peptides=2, min_ratio_count=2):
|
|
147
|
+
"""
|
|
148
|
+
蛋白質定量。
|
|
149
|
+
|
|
150
|
+
Methods:
|
|
151
|
+
- LFQ (Label-Free Quantification): MS1 強度ベース
|
|
152
|
+
- iBAQ (intensity-Based Absolute Quantification)
|
|
153
|
+
- TMT (Tandem Mass Tag): Reporter ion 強度
|
|
154
|
+
- SILAC: Heavy/Light 比率
|
|
155
|
+
"""
|
|
156
|
+
print(f" Quantification method: {method}")
|
|
157
|
+
print(f" Minimum peptides per protein: {min_peptides}")
|
|
158
|
+
|
|
159
|
+
if method == "LFQ":
|
|
160
|
+
# MaxLFQ アルゴリズム: ペプチド比率の中央値正規化
|
|
161
|
+
print(" Normalization: MaxLFQ (median of peptide ratios)")
|
|
162
|
+
print(" Missing value imputation: MinDet (minimum deterministic)")
|
|
163
|
+
|
|
164
|
+
elif method == "TMT":
|
|
165
|
+
print(" TMT channels: 126-134N (TMTpro 18-plex)")
|
|
166
|
+
print(" Reporter ion extraction: ±10 ppm")
|
|
167
|
+
print(" Normalization: Median centering → IRS (Internal Reference Scaling)")
|
|
168
|
+
|
|
169
|
+
elif method == "SILAC":
|
|
170
|
+
print(" Labels: Light (K0R0) vs Heavy (K8R10)")
|
|
171
|
+
print(" Ratio calculation: median of peptide ratios")
|
|
172
|
+
|
|
173
|
+
elif method == "iBAQ":
|
|
174
|
+
print(" iBAQ = Σ(peptide intensities) / n_observable_peptides")
|
|
175
|
+
|
|
176
|
+
return {"method": method, "min_peptides": min_peptides}
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## 3. 翻訳後修飾 (PTM) 解析
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
import pandas as pd
|
|
183
|
+
import numpy as np
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def ptm_site_localization(psm_results, ptm_types=None,
|
|
187
|
+
localization_prob_cutoff=0.75):
|
|
188
|
+
"""
|
|
189
|
+
翻訳後修飾サイトの局在化解析。
|
|
190
|
+
|
|
191
|
+
主要 PTM タイプ:
|
|
192
|
+
- Phosphorylation (S/T/Y): リン酸化
|
|
193
|
+
- Ubiquitination (K): ユビキチン化 (diGly remnant)
|
|
194
|
+
- Acetylation (K): アセチル化
|
|
195
|
+
- Methylation (K/R): メチル化
|
|
196
|
+
- Glycosylation (N/S/T): 糖鎖修飾
|
|
197
|
+
- SUMOylation (K): SUMO 化
|
|
198
|
+
"""
|
|
199
|
+
ptm_types = ptm_types or ["Phospho (S)", "Phospho (T)", "Phospho (Y)"]
|
|
200
|
+
|
|
201
|
+
print(f" PTM types analyzed: {ptm_types}")
|
|
202
|
+
print(f" Localization probability cutoff: {localization_prob_cutoff}")
|
|
203
|
+
print(" Methods: phosphoRS / Ascore / ptmRS")
|
|
204
|
+
|
|
205
|
+
# PTM 濃縮解析 (Motif-X / pLogo)
|
|
206
|
+
print(" Motif enrichment: Motif-X algorithm")
|
|
207
|
+
print(" Window: ±7 residues around modification site")
|
|
208
|
+
print(" Significance: p < 1e-6 (binomial test)")
|
|
209
|
+
|
|
210
|
+
return {"ptm_types": ptm_types, "cutoff": localization_prob_cutoff}
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def phosphoproteomics_kinase_activity(phosphosites_df,
|
|
214
|
+
kinase_substrate_db="PhosphoSitePlus"):
|
|
215
|
+
"""
|
|
216
|
+
フォスフォプロテオミクスからのキナーゼ活性推定 (KSEA)。
|
|
217
|
+
|
|
218
|
+
Kinase-Substrate Enrichment Analysis:
|
|
219
|
+
- PhosphoSitePlus / NetworKIN のキナーゼ-基質関係を使用
|
|
220
|
+
- 各キナーゼの基質群の平均 log2FC をスコア化
|
|
221
|
+
- z-test で有意性を評価
|
|
222
|
+
"""
|
|
223
|
+
print(f" Kinase-substrate database: {kinase_substrate_db}")
|
|
224
|
+
print(" Algorithm: KSEA (Kinase-Substrate Enrichment Analysis)")
|
|
225
|
+
print(" Score: mean(log2FC of substrates) × sqrt(n_substrates)")
|
|
226
|
+
|
|
227
|
+
return {"database": kinase_substrate_db, "method": "KSEA"}
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
## 4. スペクトルマッチング・分子ネットワーキング
|
|
231
|
+
|
|
232
|
+
```python
|
|
233
|
+
import numpy as np
|
|
234
|
+
import pandas as pd
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def spectral_similarity_scoring(query_spectrum, library_spectrum,
|
|
238
|
+
method="modified_cosine",
|
|
239
|
+
mz_tolerance=0.02):
|
|
240
|
+
"""
|
|
241
|
+
MS/MS スペクトル間の類似度スコアリング。
|
|
242
|
+
|
|
243
|
+
Methods:
|
|
244
|
+
- cosine: 標準コサイン類似度
|
|
245
|
+
- modified_cosine: 前駆体質量差を考慮したシフトマッチング
|
|
246
|
+
- spec2vec: Word2Vec ベースのスペクトル埋め込み
|
|
247
|
+
"""
|
|
248
|
+
from matchms import Spectrum, calculate_scores
|
|
249
|
+
from matchms.similarity import ModifiedCosine, CosineGreedy
|
|
250
|
+
|
|
251
|
+
if method == "modified_cosine":
|
|
252
|
+
similarity_func = ModifiedCosine(tolerance=mz_tolerance)
|
|
253
|
+
else:
|
|
254
|
+
similarity_func = CosineGreedy(tolerance=mz_tolerance)
|
|
255
|
+
|
|
256
|
+
score = similarity_func.pair(query_spectrum, library_spectrum)
|
|
257
|
+
|
|
258
|
+
print(f" Similarity method: {method}")
|
|
259
|
+
print(f" Score: {score['score']:.4f}")
|
|
260
|
+
print(f" Matched peaks: {score['matches']}")
|
|
261
|
+
|
|
262
|
+
return score
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def molecular_networking(spectra_list, min_cosine=0.7,
|
|
266
|
+
min_matched_peaks=6, max_neighbors=10):
|
|
267
|
+
"""
|
|
268
|
+
GNPS スタイル分子ネットワーキング。
|
|
269
|
+
|
|
270
|
+
スペクトル間の修正コサイン類似度に基づいてネットワークを構築。
|
|
271
|
+
類似構造を持つ化合物がクラスタを形成 → 未知化合物の推定に活用。
|
|
272
|
+
"""
|
|
273
|
+
from matchms import calculate_scores
|
|
274
|
+
from matchms.similarity import ModifiedCosine
|
|
275
|
+
import networkx as nx
|
|
276
|
+
|
|
277
|
+
sim_func = ModifiedCosine(tolerance=0.02)
|
|
278
|
+
|
|
279
|
+
G = nx.Graph()
|
|
280
|
+
n = len(spectra_list)
|
|
281
|
+
edge_count = 0
|
|
282
|
+
|
|
283
|
+
for i in range(n):
|
|
284
|
+
G.add_node(i)
|
|
285
|
+
for j in range(i + 1, n):
|
|
286
|
+
score = sim_func.pair(spectra_list[i], spectra_list[j])
|
|
287
|
+
if (score["score"] >= min_cosine and
|
|
288
|
+
score["matches"] >= min_matched_peaks):
|
|
289
|
+
G.add_edge(i, j, weight=score["score"],
|
|
290
|
+
matches=score["matches"])
|
|
291
|
+
edge_count += 1
|
|
292
|
+
|
|
293
|
+
print(f" Molecular network: {n} nodes, {edge_count} edges")
|
|
294
|
+
print(f" Connected components: {nx.number_connected_components(G)}")
|
|
295
|
+
print(f" Parameters: min_cosine={min_cosine}, min_matched={min_matched_peaks}")
|
|
296
|
+
|
|
297
|
+
return G
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## 5. 差次的蛋白質発現 + 機能濃縮
|
|
301
|
+
|
|
302
|
+
```python
|
|
303
|
+
import pandas as pd
|
|
304
|
+
import numpy as np
|
|
305
|
+
from scipy.stats import ttest_ind
|
|
306
|
+
from statsmodels.stats.multitest import multipletests
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def differential_protein_expression(intensity_matrix, groups,
|
|
310
|
+
log2fc_cutoff=1.0, fdr_cutoff=0.05,
|
|
311
|
+
imputation="MinDet"):
|
|
312
|
+
"""
|
|
313
|
+
差次的蛋白質発現解析。
|
|
314
|
+
|
|
315
|
+
Parameters:
|
|
316
|
+
intensity_matrix: proteins × samples (log2 LFQ 強度)
|
|
317
|
+
groups: サンプルグループ (e.g., ["Treatment", "Control", ...])
|
|
318
|
+
imputation: 欠損値補完法 (MinDet / MinProb / KNN / QRILC)
|
|
319
|
+
"""
|
|
320
|
+
group_names = sorted(set(groups))
|
|
321
|
+
g1, g2 = group_names[0], group_names[1]
|
|
322
|
+
g1_idx = [i for i, g in enumerate(groups) if g == g1]
|
|
323
|
+
g2_idx = [i for i, g in enumerate(groups) if g == g2]
|
|
324
|
+
|
|
325
|
+
results = []
|
|
326
|
+
for protein in intensity_matrix.index:
|
|
327
|
+
vals1 = intensity_matrix.loc[protein, intensity_matrix.columns[g1_idx]].dropna()
|
|
328
|
+
vals2 = intensity_matrix.loc[protein, intensity_matrix.columns[g2_idx]].dropna()
|
|
329
|
+
|
|
330
|
+
if len(vals1) < 2 or len(vals2) < 2:
|
|
331
|
+
continue
|
|
332
|
+
|
|
333
|
+
log2fc = vals2.mean() - vals1.mean()
|
|
334
|
+
stat, pval = ttest_ind(vals1, vals2, equal_var=False)
|
|
335
|
+
|
|
336
|
+
results.append({
|
|
337
|
+
"protein": protein,
|
|
338
|
+
"log2FC": log2fc,
|
|
339
|
+
"pvalue": pval,
|
|
340
|
+
"mean_g1": vals1.mean(),
|
|
341
|
+
"mean_g2": vals2.mean(),
|
|
342
|
+
"n_g1": len(vals1),
|
|
343
|
+
"n_g2": len(vals2),
|
|
344
|
+
})
|
|
345
|
+
|
|
346
|
+
df = pd.DataFrame(results)
|
|
347
|
+
df["padj"] = multipletests(df["pvalue"], method="fdr_bh")[1]
|
|
348
|
+
|
|
349
|
+
sig_up = df[(df["padj"] < fdr_cutoff) & (df["log2FC"] > log2fc_cutoff)]
|
|
350
|
+
sig_down = df[(df["padj"] < fdr_cutoff) & (df["log2FC"] < -log2fc_cutoff)]
|
|
351
|
+
|
|
352
|
+
print(f" {g2} vs {g1}:")
|
|
353
|
+
print(f" Total proteins tested: {len(df)}")
|
|
354
|
+
print(f" Significant UP: {len(sig_up)} (log2FC > {log2fc_cutoff}, FDR < {fdr_cutoff})")
|
|
355
|
+
print(f" Significant DOWN: {len(sig_down)} (log2FC < -{log2fc_cutoff}, FDR < {fdr_cutoff})")
|
|
356
|
+
|
|
357
|
+
return df
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
## References
|
|
361
|
+
|
|
362
|
+
### Output Files
|
|
363
|
+
|
|
364
|
+
| ファイル | 形式 |
|
|
365
|
+
|---|---|
|
|
366
|
+
| `results/features_detected.csv` | CSV |
|
|
367
|
+
| `results/psm_results.csv` | CSV |
|
|
368
|
+
| `results/protein_quant.csv` | CSV |
|
|
369
|
+
| `results/ptm_sites.csv` | CSV |
|
|
370
|
+
| `results/differential_proteins.csv` | CSV |
|
|
371
|
+
| `results/molecular_network.graphml` | GraphML |
|
|
372
|
+
| `figures/volcano_proteomics.png` | PNG |
|
|
373
|
+
| `figures/molecular_network.png` | PNG |
|
|
374
|
+
|
|
375
|
+
### 利用可能ツール
|
|
376
|
+
|
|
377
|
+
> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
|
|
378
|
+
|
|
379
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
380
|
+
|---|---|---|
|
|
381
|
+
| PRIDE | `PRIDE_search_proteomics` | プロテオミクスプロジェクト検索 |
|
|
382
|
+
| PRIDE | `PRIDE_get_project` | プロジェクト詳細取得 |
|
|
383
|
+
| PRIDE | `PRIDE_get_project_files` | プロテオミクスデータファイル取得 |
|
|
384
|
+
| UniProt | `search_uniprot_by_name` | 蛋白質検索 |
|
|
385
|
+
| UniProt | `get_uniprot_entry` | 蛋白質エントリ詳細 |
|
|
386
|
+
| KEGG | `kegg_get_pathway_info` | 蛋白質パスウェイ情報 |
|
|
387
|
+
| Reactome | `reactome_pathway_analysis` | パスウェイ濃縮解析 |
|
|
388
|
+
|
|
389
|
+
### 参照スキル
|
|
390
|
+
|
|
391
|
+
| スキル | 関連 |
|
|
392
|
+
|---|---|
|
|
393
|
+
| `scientific-metabolomics` | 代謝物 MS 解析 |
|
|
394
|
+
| `scientific-spectral-signal` | スペクトル解析基盤 |
|
|
395
|
+
| `scientific-bioinformatics` | 配列・蛋白質データベース |
|
|
396
|
+
| `scientific-network-analysis` | 分子ネットワーク可視化 |
|
|
397
|
+
| `scientific-multi-omics` | マルチオミクス統合 |
|
|
398
|
+
|
|
399
|
+
### 依存パッケージ
|
|
400
|
+
|
|
401
|
+
`pyopenms`, `matchms`, `pandas`, `numpy`, `scipy`, `scikit-learn`, `networkx`, `statsmodels`
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-regulatory-science
|
|
3
|
+
description: |
|
|
4
|
+
規制科学パイプラインスキル。FDA (医薬品/医療機器/食品)・EMA・PMDA 規制データベース横断照会、
|
|
5
|
+
Orange Book 承認履歴・特許・排他性情報、510(k) デバイスクリアランス、
|
|
6
|
+
ISO 13485 品質管理システム (設計管理/CAPA/リスク管理)、
|
|
7
|
+
USPTO 特許検索を統合した薬事・規制情報パイプライン。
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific Regulatory Science
|
|
11
|
+
|
|
12
|
+
FDA・EMA・PMDA 等の規制当局データベースを横断照会し、
|
|
13
|
+
医薬品・医療機器・食品の承認情報・安全性データ・品質管理要件・知的財産情報を
|
|
14
|
+
体系的に取得するパイプラインを提供する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- FDA 医薬品の承認履歴・ジェネリック状況を調査するとき
|
|
19
|
+
- 医療機器の 510(k) クリアランス・規制分類を確認するとき
|
|
20
|
+
- 食品安全性 (FDA リコール, 有害事象) データを取得するとき
|
|
21
|
+
- ISO 13485 品質管理システムの設計管理・CAPA を計画するとき
|
|
22
|
+
- 競合特許のランドスケープ調査が必要なとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. FDA Orange Book 照会
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import pandas as pd
|
|
32
|
+
import json
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def query_fda_orange_book(drug_name=None, nda_number=None,
|
|
36
|
+
active_ingredient=None):
|
|
37
|
+
"""
|
|
38
|
+
FDA Orange Book (Approved Drug Products with Therapeutic Equivalence Evaluations)。
|
|
39
|
+
|
|
40
|
+
取得項目:
|
|
41
|
+
- 承認履歴 (NDA/ANDA, 承認日, 適応症)
|
|
42
|
+
- 治療的同等性 (TE) コード (AB, BX, etc.)
|
|
43
|
+
- 特許情報 (満了日, 特許番号, Use Code)
|
|
44
|
+
- 排他性情報 (NCE, Orphan, Pediatric)
|
|
45
|
+
- ジェネリック承認状況
|
|
46
|
+
"""
|
|
47
|
+
print(f" Querying FDA Orange Book:")
|
|
48
|
+
if drug_name:
|
|
49
|
+
print(f" Drug: {drug_name}")
|
|
50
|
+
if active_ingredient:
|
|
51
|
+
print(f" Active ingredient: {active_ingredient}")
|
|
52
|
+
|
|
53
|
+
return {"drug_name": drug_name, "nda_number": nda_number}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def analyze_patent_landscape(drug_name):
|
|
57
|
+
"""
|
|
58
|
+
医薬品特許ランドスケープ解析。
|
|
59
|
+
|
|
60
|
+
- Orange Book 掲載特許の満了日タイムライン
|
|
61
|
+
- パラグラフ IV チャレンジの有無
|
|
62
|
+
- 180 日排他性の状況
|
|
63
|
+
- ジェネリック参入予測
|
|
64
|
+
"""
|
|
65
|
+
print(f" Patent landscape for: {drug_name}")
|
|
66
|
+
print(" Analysis: Patent expiry timeline + exclusivity + generic entry")
|
|
67
|
+
|
|
68
|
+
return {"drug_name": drug_name}
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## 2. FDA 医療機器規制データ
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
import pandas as pd
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def query_fda_device_classification(device_name=None, product_code=None,
|
|
78
|
+
regulation_number=None):
|
|
79
|
+
"""
|
|
80
|
+
FDA 医療機器分類照会。
|
|
81
|
+
|
|
82
|
+
分類クラス:
|
|
83
|
+
- Class I: 一般規制 (General Controls)
|
|
84
|
+
- Class II: 特別規制 (Special Controls) — 510(k) 必要
|
|
85
|
+
- Class III: 市販前承認 (PMA) 必要
|
|
86
|
+
"""
|
|
87
|
+
print(f" FDA Device Classification:")
|
|
88
|
+
if device_name:
|
|
89
|
+
print(f" Device: {device_name}")
|
|
90
|
+
print(" Regulatory pathways: 510(k), De Novo, PMA, HDE")
|
|
91
|
+
|
|
92
|
+
return {"device_name": device_name, "product_code": product_code}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def query_510k_clearance(device_name=None, applicant=None,
|
|
96
|
+
decision_date_from=None):
|
|
97
|
+
"""
|
|
98
|
+
FDA 510(k) クリアランスデータ照会。
|
|
99
|
+
|
|
100
|
+
510(k) = Premarket Notification
|
|
101
|
+
- Predicate device との実質的同等性 (SE) の証明
|
|
102
|
+
- 90 日レビュー (Traditional) / 30 日 (Special/Abbreviated)
|
|
103
|
+
"""
|
|
104
|
+
print(f" 510(k) Clearance search:")
|
|
105
|
+
if device_name:
|
|
106
|
+
print(f" Device: {device_name}")
|
|
107
|
+
|
|
108
|
+
return {"device_name": device_name}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## 3. ISO 13485 品質管理システム
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
import json
|
|
115
|
+
from datetime import datetime
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def iso13485_design_control_checklist(product_name, risk_class="II"):
|
|
119
|
+
"""
|
|
120
|
+
ISO 13485 設計管理チェックリスト。
|
|
121
|
+
|
|
122
|
+
設計管理プロセス (ISO 13485:2016 §7.3):
|
|
123
|
+
- 7.3.2 設計・開発計画
|
|
124
|
+
- 7.3.3 設計入力
|
|
125
|
+
- 7.3.4 設計出力
|
|
126
|
+
- 7.3.5 設計レビュー
|
|
127
|
+
- 7.3.6 設計検証
|
|
128
|
+
- 7.3.7 設計バリデーション
|
|
129
|
+
- 7.3.8 設計移管
|
|
130
|
+
- 7.3.9 設計変更管理
|
|
131
|
+
"""
|
|
132
|
+
checklist = {
|
|
133
|
+
"product": product_name,
|
|
134
|
+
"risk_class": risk_class,
|
|
135
|
+
"design_phases": [
|
|
136
|
+
{"phase": "Design Planning", "ref": "§7.3.2",
|
|
137
|
+
"deliverables": ["DHF 設計履歴ファイル", "プロジェクト計画書", "リスク管理計画"]},
|
|
138
|
+
{"phase": "Design Input", "ref": "§7.3.3",
|
|
139
|
+
"deliverables": ["ユーザーニーズ", "設計要求仕様 (DRS)", "規制要求"]},
|
|
140
|
+
{"phase": "Design Output", "ref": "§7.3.4",
|
|
141
|
+
"deliverables": ["設計仕様書", "図面", "DMR (Device Master Record)"]},
|
|
142
|
+
{"phase": "Design Review", "ref": "§7.3.5",
|
|
143
|
+
"deliverables": ["設計レビュー議事録", "アクションアイテム"]},
|
|
144
|
+
{"phase": "Design Verification", "ref": "§7.3.6",
|
|
145
|
+
"deliverables": ["検証プロトコル/レポート", "トレーサビリティマトリクス"]},
|
|
146
|
+
{"phase": "Design Validation", "ref": "§7.3.7",
|
|
147
|
+
"deliverables": ["バリデーションプロトコル/レポート", "臨床評価 (必要時)"]},
|
|
148
|
+
{"phase": "Design Transfer", "ref": "§7.3.8",
|
|
149
|
+
"deliverables": ["製造移管文書", "製造仕様"]},
|
|
150
|
+
{"phase": "Design Changes", "ref": "§7.3.9",
|
|
151
|
+
"deliverables": ["変更管理 (ECO)", "影響分析"]},
|
|
152
|
+
],
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
print(f" ISO 13485 Design Control for: {product_name} (Class {risk_class})")
|
|
156
|
+
for phase in checklist["design_phases"]:
|
|
157
|
+
print(f" {phase['ref']} {phase['phase']}: {len(phase['deliverables'])} deliverables")
|
|
158
|
+
|
|
159
|
+
return checklist
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def capa_process(nonconformity_description, severity="Major"):
|
|
163
|
+
"""
|
|
164
|
+
CAPA (Corrective and Preventive Action) プロセス。
|
|
165
|
+
|
|
166
|
+
ISO 13485 §8.5.2 (是正処置) / §8.5.3 (予防処置)
|
|
167
|
+
"""
|
|
168
|
+
capa_record = {
|
|
169
|
+
"id": f"CAPA-{datetime.now().strftime('%Y%m%d-%H%M')}",
|
|
170
|
+
"nonconformity": nonconformity_description,
|
|
171
|
+
"severity": severity,
|
|
172
|
+
"steps": [
|
|
173
|
+
"1. 問題の特定と文書化",
|
|
174
|
+
"2. 即時是正処置 (Containment)",
|
|
175
|
+
"3. 根本原因分析 (5-Why / Fishbone)",
|
|
176
|
+
"4. 是正処置の計画と実施",
|
|
177
|
+
"5. 有効性検証",
|
|
178
|
+
"6. 予防処置の展開",
|
|
179
|
+
"7. CAPA クローズ",
|
|
180
|
+
],
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
print(f" CAPA initiated: {capa_record['id']}")
|
|
184
|
+
print(f" Severity: {severity}")
|
|
185
|
+
|
|
186
|
+
return capa_record
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## 4. USPTO 特許検索
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
import pandas as pd
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def search_patents(query, start_date=None, end_date=None,
|
|
196
|
+
assignee=None, max_results=50):
|
|
197
|
+
"""
|
|
198
|
+
USPTO 特許検索 (PatentsView API)。
|
|
199
|
+
|
|
200
|
+
検索フィールド:
|
|
201
|
+
- 特許タイトル・アブストラクト・クレーム
|
|
202
|
+
- 出願人/譲受人
|
|
203
|
+
- 出願日/登録日
|
|
204
|
+
- CPC (Cooperative Patent Classification) コード
|
|
205
|
+
"""
|
|
206
|
+
print(f" USPTO Patent search: '{query}'")
|
|
207
|
+
if assignee:
|
|
208
|
+
print(f" Assignee: {assignee}")
|
|
209
|
+
if start_date:
|
|
210
|
+
print(f" Date range: {start_date} → {end_date or 'present'}")
|
|
211
|
+
|
|
212
|
+
return {"query": query, "max_results": max_results}
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## References
|
|
216
|
+
|
|
217
|
+
### Output Files
|
|
218
|
+
|
|
219
|
+
| ファイル | 形式 |
|
|
220
|
+
|---|---|
|
|
221
|
+
| `results/fda_orange_book.json` | JSON |
|
|
222
|
+
| `results/device_classification.csv` | CSV |
|
|
223
|
+
| `results/510k_clearances.csv` | CSV |
|
|
224
|
+
| `results/iso13485_checklist.json` | JSON |
|
|
225
|
+
| `results/capa_record.json` | JSON |
|
|
226
|
+
| `results/patent_search.csv` | CSV |
|
|
227
|
+
| `figures/patent_timeline.png` | PNG |
|
|
228
|
+
|
|
229
|
+
### 利用可能ツール
|
|
230
|
+
|
|
231
|
+
> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
|
|
232
|
+
|
|
233
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
234
|
+
|---|---|---|
|
|
235
|
+
| FDA Orange Book | `FDA_OrangeBook_search_drug` | 医薬品検索 |
|
|
236
|
+
| FDA Orange Book | `FDA_OrangeBook_get_approval_history` | 承認履歴取得 |
|
|
237
|
+
| FDA Orange Book | `FDA_OrangeBook_get_patent_info` | 特許情報取得 |
|
|
238
|
+
| FDA Orange Book | `FDA_OrangeBook_get_exclusivity` | 排他性情報 |
|
|
239
|
+
| FDA Orange Book | `FDA_OrangeBook_check_generic_availability` | ジェネリック承認状況 |
|
|
240
|
+
| FDA Orange Book | `FDA_OrangeBook_get_te_code` | 治療的同等性コード |
|
|
241
|
+
| FAERS | `FAERS_search_adverse_event_reports` | 有害事象レポート検索 |
|
|
242
|
+
| FAERS | `FAERS_calculate_disproportionality` | 不均衡分析 (ROR/PRR/IC) |
|
|
243
|
+
|
|
244
|
+
### 参照スキル
|
|
245
|
+
|
|
246
|
+
| スキル | 関連 |
|
|
247
|
+
|---|---|
|
|
248
|
+
| `scientific-pharmacovigilance` | 市販後安全性 |
|
|
249
|
+
| `scientific-pharmacogenomics` | FDA PGx バイオマーカー |
|
|
250
|
+
| `scientific-clinical-trials-analytics` | 臨床試験レジストリ |
|
|
251
|
+
| `scientific-admet-pharmacokinetics` | 前臨床 ADMET |
|
|
252
|
+
| `scientific-grant-writing` | 規制戦略セクション |
|
|
253
|
+
|
|
254
|
+
### 依存パッケージ
|
|
255
|
+
|
|
256
|
+
`pandas`, `numpy`, `requests`, `json`
|