@nahisaho/satori 0.25.3 → 0.25.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/satori.js
CHANGED
|
@@ -4,6 +4,7 @@ const fs = require('fs');
|
|
|
4
4
|
const path = require('path');
|
|
5
5
|
|
|
6
6
|
const COMMAND = process.argv[2];
|
|
7
|
+
const SUBCOMMAND = process.argv[3];
|
|
7
8
|
const FLAGS = process.argv.slice(3);
|
|
8
9
|
|
|
9
10
|
const PACKAGE_ROOT = path.resolve(__dirname, '..');
|
|
@@ -70,6 +71,8 @@ SATORI — Agent Skills for Science
|
|
|
70
71
|
|
|
71
72
|
Usage:
|
|
72
73
|
satori init [--force] [--dry-run] Install .github/ skills into current directory
|
|
74
|
+
satori pipeline suggest Interactive pipeline recommendation
|
|
75
|
+
satori pipeline list List all available pipelines
|
|
73
76
|
satori help Show this help message
|
|
74
77
|
satori --version, -v Show version number
|
|
75
78
|
|
|
@@ -79,6 +82,103 @@ Options:
|
|
|
79
82
|
`);
|
|
80
83
|
}
|
|
81
84
|
|
|
85
|
+
// ── Pipeline Suggest ──
|
|
86
|
+
|
|
87
|
+
const PIPELINES = [
|
|
88
|
+
{ id: 1, name: '仮説検証→論文化', domain: 'general', keywords: ['仮説', '統計', '論文', 'hypothesis'], skills: 'hypothesis-engine → data-preprocessing → statistical-testing → ml-classification → publication-figures → academic-writing → critical-review' },
|
|
89
|
+
{ id: 2, name: 'バリアント→臨床', domain: 'genomics', keywords: ['バリアント', 'variant', 'VCF', 'WGS', 'WES'], skills: 'variant-interpretation → pharmacogenomics → precision-oncology → clinical-decision-support → clinical-reporting' },
|
|
90
|
+
{ id: 3, name: 'トランスクリプトーム', domain: 'genomics', keywords: ['RNA-seq', 'トランスクリプトーム', 'DEG', '発現'], skills: 'rnaseq-analysis → pathway-enrichment → network-analysis → publication-figures' },
|
|
91
|
+
{ id: 4, name: 'エピジェネティクス', domain: 'genomics', keywords: ['エピゲノム', 'ChIP-seq', 'ATAC-seq', 'メチル化'], skills: 'epigenomics-chromatin → regulatory-genomics → noncoding-rna → gene-regulation' },
|
|
92
|
+
{ id: 5, name: 'AlphaFold 構造解析', domain: 'structural', keywords: ['AlphaFold', 'タンパク質構造', '3D', 'protein structure'], skills: 'alphafold-structures → protein-structure-analysis → molecular-docking' },
|
|
93
|
+
{ id: 6, name: 'エビデンス合成', domain: 'literature', keywords: ['メタアナリシス', 'systematic review', '文献', 'エビデンス'], skills: 'deep-research → literature-search → meta-analysis → evidence-synthesis → academic-writing → critical-review' },
|
|
94
|
+
{ id: 7, name: '創薬パイプライン', domain: 'pharma', keywords: ['創薬', 'drug discovery', 'ADMET', 'ドッキング'], skills: 'drug-target-profiling → compound-screening → molecular-docking → admet-pharmacokinetics → drug-repurposing' },
|
|
95
|
+
{ id: 8, name: 'ML/XAI パイプライン', domain: 'ml', keywords: ['機械学習', 'ML', 'SHAP', 'XAI', '予測モデル'], skills: 'data-preprocessing → ml-classification → ml-regression → explainable-ai → fairness-bias → publication-figures' },
|
|
96
|
+
{ id: 9, name: '環境・生態学', domain: 'ecology', keywords: ['生態', '生物多様性', 'SDM', '環境', 'ecology'], skills: 'environmental-ecology → biodiversity-conservation → species-distribution → time-series-forecasting' },
|
|
97
|
+
{ id: 10, name: '計算材料科学', domain: 'materials', keywords: ['材料', 'materials', 'DFT', '物性'], skills: 'computational-materials → cheminformatics → molecular-dynamics → ml-regression' },
|
|
98
|
+
{ id: 11, name: '医薬品安全性', domain: 'pharma', keywords: ['有害事象', 'ファーマコビジランス', '安全性', 'adverse'], skills: 'pharmacovigilance → pharmacogenomics → regulatory-science' },
|
|
99
|
+
{ id: 12, name: '希少疾患', domain: 'clinical', keywords: ['希少疾患', 'rare disease', 'Orphanet'], skills: 'rare-disease-genetics → gene-panel-design → variant-interpretation → clinical-reporting' },
|
|
100
|
+
{ id: 13, name: 'がんゲノミクス', domain: 'oncology', keywords: ['がん', 'cancer', 'TMB', '体細胞変異'], skills: 'cancer-genomics → precision-oncology → biomarker-discovery → clinical-reporting' },
|
|
101
|
+
{ id: 14, name: 'GWAS・集団遺伝学', domain: 'genomics', keywords: ['GWAS', '集団遺伝学', 'population genetics', 'biobank'], skills: 'biobank-cohort → population-genetics → statistical-testing → publication-figures' },
|
|
102
|
+
{ id: 15, name: 'シングルセル', domain: 'genomics', keywords: ['シングルセル', 'single-cell', 'scRNA-seq', '空間トランスクリプトーム'], skills: 'cellxgene-census → scvi-integration → spatial-transcriptomics → gene-regulation' },
|
|
103
|
+
{ id: 16, name: 'プロテオミクス', domain: 'omics', keywords: ['プロテオミクス', 'proteomics', '質量分析'], skills: 'proteomics → protein-structure-analysis → network-analysis' },
|
|
104
|
+
{ id: 17, name: 'メタボロミクス', domain: 'omics', keywords: ['メタボロミクス', 'metabolomics', '代謝物', '脂質'], skills: 'metabolomics → lipidomics → systems-biology → network-analysis' },
|
|
105
|
+
{ id: 18, name: 'マイクロバイオーム', domain: 'ecology', keywords: ['マイクロバイオーム', 'metagenome', '16S', '腸内細菌'], skills: 'microbiome-metagenomics → metagenome-assembled-genomes → phylogenetics → environmental-ecology' },
|
|
106
|
+
{ id: 19, name: 'パスウェイ・KG', domain: 'systems', keywords: ['パスウェイ', 'ナレッジグラフ', 'knowledge graph', 'pathway'], skills: 'gene-id-mapping → pathway-enrichment → ontology-integration → network-analysis → knowledge-graph' },
|
|
107
|
+
{ id: 20, name: '農業・食品', domain: 'agriculture', keywords: ['農業', '食品', 'agriculture', 'food safety'], skills: 'agricultural-science → food-science-nutrition → environmental-ecology' },
|
|
108
|
+
{ id: 21, name: '臨床情報学', domain: 'clinical', keywords: ['臨床', 'EHR', 'FHIR', 'OMOP', '電子カルテ'], skills: 'clinical-standards → clinical-nlp → clinical-reporting → healthcare-ai → survival-clinical' },
|
|
109
|
+
{ id: 22, name: 'ロボティクス・IoT', domain: 'engineering', keywords: ['ロボティクス', 'IoT', 'ロボット', 'robotics'], skills: 'robotics-automation → lab-automation → lab-data-management → interactive-dashboard' },
|
|
110
|
+
{ id: 23, name: '実験計画・統計', domain: 'general', keywords: ['実験計画', 'DOE', '検出力', 'サンプルサイズ'], skills: 'experimental-design → statistical-testing → reproducibility-assessment → publication-figures' },
|
|
111
|
+
{ id: 24, name: '科学的可視化', domain: 'general', keywords: ['可視化', 'visualization', 'ダッシュボード', 'dashboard'], skills: 'publication-figures → interactive-dashboard' },
|
|
112
|
+
{ id: 25, name: '学術出版', domain: 'literature', keywords: ['論文投稿', 'journal', 'グラント', 'grant'], skills: 'academic-writing → critical-review → citation-network' },
|
|
113
|
+
{ id: 26, name: '科学教育', domain: 'education', keywords: ['教育', 'education', 'カリキュラム'], skills: 'science-education → reproducibility-assessment' },
|
|
114
|
+
];
|
|
115
|
+
|
|
116
|
+
function pipelineSuggest() {
|
|
117
|
+
const readline = require('readline');
|
|
118
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
119
|
+
|
|
120
|
+
const ask = (q) => new Promise((resolve) => rl.question(q, resolve));
|
|
121
|
+
|
|
122
|
+
(async () => {
|
|
123
|
+
console.log('\n🔬 SATORI Pipeline Suggest — インタラクティブパイプライン推薦\n');
|
|
124
|
+
console.log('研究内容を入力すると、最適なパイプラインを提案します。');
|
|
125
|
+
console.log('(Ctrl+C で終了)\n');
|
|
126
|
+
|
|
127
|
+
const input = await ask('何を解析しますか? キーワードや研究テーマを入力してください:\n> ');
|
|
128
|
+
const query = input.toLowerCase();
|
|
129
|
+
|
|
130
|
+
// Score each pipeline by keyword match
|
|
131
|
+
const scored = PIPELINES.map((p) => {
|
|
132
|
+
let score = 0;
|
|
133
|
+
for (const kw of p.keywords) {
|
|
134
|
+
if (query.includes(kw.toLowerCase())) score += 2;
|
|
135
|
+
}
|
|
136
|
+
// Partial match on name
|
|
137
|
+
if (query.includes(p.name.toLowerCase()) || p.name.toLowerCase().includes(query)) score += 1;
|
|
138
|
+
return { ...p, score };
|
|
139
|
+
})
|
|
140
|
+
.filter((p) => p.score > 0)
|
|
141
|
+
.sort((a, b) => b.score - a.score);
|
|
142
|
+
|
|
143
|
+
console.log('');
|
|
144
|
+
if (scored.length === 0) {
|
|
145
|
+
console.log('❌ 該当するパイプラインが見つかりませんでした。');
|
|
146
|
+
console.log('');
|
|
147
|
+
console.log('利用可能なキーワード例:');
|
|
148
|
+
console.log(' 遺伝子/バリアント, 創薬/ADMET, RNA-seq, がん, 機械学習/ML,');
|
|
149
|
+
console.log(' メタボロミクス, マイクロバイオーム, 環境/生態, 材料, 臨床/EHR,');
|
|
150
|
+
console.log(' 文献/メタアナリシス, 可視化, 論文, AlphaFold, シングルセル');
|
|
151
|
+
console.log('');
|
|
152
|
+
console.log('全パイプライン一覧は `satori pipeline list` で確認できます。');
|
|
153
|
+
} else {
|
|
154
|
+
console.log(`✅ ${scored.length} 件のパイプラインが見つかりました:\n`);
|
|
155
|
+
const top = scored.slice(0, 5);
|
|
156
|
+
for (const p of top) {
|
|
157
|
+
console.log(` 📋 Pipeline #${p.id}: ${p.name}`);
|
|
158
|
+
console.log(` スキル連鎖: ${p.skills}`);
|
|
159
|
+
console.log('');
|
|
160
|
+
}
|
|
161
|
+
if (scored.length > 5) {
|
|
162
|
+
console.log(` ... 他 ${scored.length - 5} 件`);
|
|
163
|
+
}
|
|
164
|
+
console.log('詳細は docs/SATORI_PIPELINE_EXAMPLES.md を参照してください。');
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
rl.close();
|
|
168
|
+
})();
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function pipelineList() {
|
|
172
|
+
console.log('\n📋 SATORI パイプライン一覧 (26 ドメインパイプライン)\n');
|
|
173
|
+
for (const p of PIPELINES) {
|
|
174
|
+
console.log(` #${String(p.id).padStart(2, ' ')} ${p.name}`);
|
|
175
|
+
console.log(` ${p.skills}`);
|
|
176
|
+
console.log('');
|
|
177
|
+
}
|
|
178
|
+
console.log('クロスドメイン (15), 産業特化 (5), 方法論特化 (4) パイプラインは');
|
|
179
|
+
console.log('docs/SATORI_PIPELINE_EXAMPLES.md を参照してください。');
|
|
180
|
+
}
|
|
181
|
+
|
|
82
182
|
function showVersion() {
|
|
83
183
|
const pkg = require(path.join(PACKAGE_ROOT, 'package.json'));
|
|
84
184
|
console.log(pkg.version);
|
|
@@ -88,6 +188,17 @@ switch (COMMAND) {
|
|
|
88
188
|
case 'init':
|
|
89
189
|
init();
|
|
90
190
|
break;
|
|
191
|
+
case 'pipeline':
|
|
192
|
+
if (SUBCOMMAND === 'suggest') {
|
|
193
|
+
pipelineSuggest();
|
|
194
|
+
} else if (SUBCOMMAND === 'list') {
|
|
195
|
+
pipelineList();
|
|
196
|
+
} else {
|
|
197
|
+
console.error(`Unknown pipeline subcommand: ${SUBCOMMAND || '(none)'}`);
|
|
198
|
+
console.log('Usage: satori pipeline suggest | satori pipeline list');
|
|
199
|
+
process.exit(1);
|
|
200
|
+
}
|
|
201
|
+
break;
|
|
91
202
|
case 'help':
|
|
92
203
|
case '--help':
|
|
93
204
|
case '-h':
|
package/package.json
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nahisaho/satori",
|
|
3
|
-
"version": "0.25.
|
|
3
|
+
"version": "0.25.5",
|
|
4
4
|
"description": "SATORI — Agent Skills for Science. GitHub Copilot Agent Skills collection for scientific data analysis.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
|
7
|
-
"satori-skills": "bin/satori.js",
|
|
8
7
|
"satori": "bin/satori.js"
|
|
9
8
|
},
|
|
10
9
|
"files": [
|
|
@@ -399,6 +399,28 @@ def preprocessing_pipeline(df, target_col=None, config=None):
|
|
|
399
399
|
|
|
400
400
|
print(f" Preprocessing complete: {df.shape}")
|
|
401
401
|
|
|
402
|
+
# チェックポイント: 前処理済みデータを永続化(パイプライン連携用)
|
|
403
|
+
from pathlib import Path
|
|
404
|
+
results_dir = Path("results")
|
|
405
|
+
results_dir.mkdir(parents=True, exist_ok=True)
|
|
406
|
+
df.to_csv(results_dir / "preprocessed_data.csv", index=False)
|
|
407
|
+
print(f" ✔ Preprocessed data saved: results/preprocessed_data.csv ({df.shape})")
|
|
408
|
+
|
|
409
|
+
# 前処理サマリーをJSONに保存
|
|
410
|
+
import json
|
|
411
|
+
summary = {
|
|
412
|
+
"original_shape": [n_original, len(df.columns)],
|
|
413
|
+
"processed_shape": list(df.shape),
|
|
414
|
+
"duplicates_removed": n_original - len(df) if config.get("drop_duplicates", True) else 0,
|
|
415
|
+
"numeric_columns": len(df.select_dtypes(include=[np.number]).columns),
|
|
416
|
+
"categorical_columns": len(df.select_dtypes(include=["object", "category"]).columns),
|
|
417
|
+
"scaling_method": config.get("scaling_method", "standard"),
|
|
418
|
+
"missing_strategy": config.get("missing_strategy", "auto"),
|
|
419
|
+
}
|
|
420
|
+
with open(results_dir / "preprocessing_summary.json", "w") as f:
|
|
421
|
+
json.dump(summary, f, indent=2, ensure_ascii=False)
|
|
422
|
+
print(f" ✔ Preprocessing summary saved: results/preprocessing_summary.json")
|
|
423
|
+
|
|
402
424
|
return df, {"encoders": encoders, "scaler": scaler}
|
|
403
425
|
```
|
|
404
426
|
|
|
@@ -268,6 +268,46 @@ def mag_pipeline(assembly_fasta, bam_file,
|
|
|
268
268
|
f"{len(quality)} QC passed → "
|
|
269
269
|
f"{len(derep)} dereplicated")
|
|
270
270
|
|
|
271
|
+
# === パイプライン連携用の構造化出力 ===
|
|
272
|
+
results = Path(output_dir) / "results"
|
|
273
|
+
results.mkdir(parents=True, exist_ok=True)
|
|
274
|
+
|
|
275
|
+
# 1) MAG品質サマリーCSV (→ phylogenetics, environmental-ecology)
|
|
276
|
+
quality.to_csv(results / "mag_quality_summary.csv", index=False)
|
|
277
|
+
print(f" ✔ MAG quality summary: {results / 'mag_quality_summary.csv'}")
|
|
278
|
+
|
|
279
|
+
# 2) 分類学サマリーCSV (→ phylogenetics)
|
|
280
|
+
if not taxonomy.empty:
|
|
281
|
+
taxonomy.to_csv(results / "mag_taxonomy.csv", index=False)
|
|
282
|
+
print(f" ✔ MAG taxonomy: {results / 'mag_taxonomy.csv'}")
|
|
283
|
+
|
|
284
|
+
# 3) 代表MAGをFASTAに統合 (→ phylogenetics, annotation)
|
|
285
|
+
representative_fasta = results / "representative_mags.fasta"
|
|
286
|
+
with open(representative_fasta, "w") as f:
|
|
287
|
+
for mag_path in derep:
|
|
288
|
+
mag_name = Path(mag_path).stem
|
|
289
|
+
with open(mag_path) as mag_f:
|
|
290
|
+
for line in mag_f:
|
|
291
|
+
if line.startswith(">"):
|
|
292
|
+
f.write(f">{mag_name}_{line[1:]}")
|
|
293
|
+
else:
|
|
294
|
+
f.write(line)
|
|
295
|
+
print(f" ✔ Representative MAGs FASTA: {representative_fasta}")
|
|
296
|
+
|
|
297
|
+
# 4) パイプラインサマリーJSON
|
|
298
|
+
import json
|
|
299
|
+
pipeline_summary = {
|
|
300
|
+
"total_bins": len(bins),
|
|
301
|
+
"quality_passed": len(quality),
|
|
302
|
+
"high_quality": int((quality["quality"] == "high").sum()) if "quality" in quality.columns else 0,
|
|
303
|
+
"medium_quality": int((quality["quality"] == "medium").sum()) if "quality" in quality.columns else 0,
|
|
304
|
+
"dereplicated": len(derep),
|
|
305
|
+
"classified": len(taxonomy) if not taxonomy.empty else 0,
|
|
306
|
+
}
|
|
307
|
+
with open(results / "mag_pipeline_summary.json", "w") as f:
|
|
308
|
+
json.dump(pipeline_summary, f, indent=2)
|
|
309
|
+
print(f" ✔ Pipeline summary: {results / 'mag_pipeline_summary.json'}")
|
|
310
|
+
|
|
271
311
|
return {
|
|
272
312
|
"bins": bins,
|
|
273
313
|
"checkm": checkm,
|