@nahisaho/satori 0.25.4 → 0.25.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/satori.js CHANGED
@@ -4,6 +4,7 @@ const fs = require('fs');
4
4
  const path = require('path');
5
5
 
6
6
  const COMMAND = process.argv[2];
7
+ const SUBCOMMAND = process.argv[3];
7
8
  const FLAGS = process.argv.slice(3);
8
9
 
9
10
  const PACKAGE_ROOT = path.resolve(__dirname, '..');
@@ -70,6 +71,8 @@ SATORI — Agent Skills for Science
70
71
 
71
72
  Usage:
72
73
  satori init [--force] [--dry-run] Install .github/ skills into current directory
74
+ satori pipeline suggest Interactive pipeline recommendation
75
+ satori pipeline list List all available pipelines
73
76
  satori help Show this help message
74
77
  satori --version, -v Show version number
75
78
 
@@ -79,6 +82,103 @@ Options:
79
82
  `);
80
83
  }
81
84
 
85
+ // ── Pipeline Suggest ──
86
+
87
+ const PIPELINES = [
88
+ { id: 1, name: '仮説検証→論文化', domain: 'general', keywords: ['仮説', '統計', '論文', 'hypothesis'], skills: 'hypothesis-engine → data-preprocessing → statistical-testing → ml-classification → publication-figures → academic-writing → critical-review' },
89
+ { id: 2, name: 'バリアント→臨床', domain: 'genomics', keywords: ['バリアント', 'variant', 'VCF', 'WGS', 'WES'], skills: 'variant-interpretation → pharmacogenomics → precision-oncology → clinical-decision-support → clinical-reporting' },
90
+ { id: 3, name: 'トランスクリプトーム', domain: 'genomics', keywords: ['RNA-seq', 'トランスクリプトーム', 'DEG', '発現'], skills: 'rnaseq-analysis → pathway-enrichment → network-analysis → publication-figures' },
91
+ { id: 4, name: 'エピジェネティクス', domain: 'genomics', keywords: ['エピゲノム', 'ChIP-seq', 'ATAC-seq', 'メチル化'], skills: 'epigenomics-chromatin → regulatory-genomics → noncoding-rna → gene-regulation' },
92
+ { id: 5, name: 'AlphaFold 構造解析', domain: 'structural', keywords: ['AlphaFold', 'タンパク質構造', '3D', 'protein structure'], skills: 'alphafold-structures → protein-structure-analysis → molecular-docking' },
93
+ { id: 6, name: 'エビデンス合成', domain: 'literature', keywords: ['メタアナリシス', 'systematic review', '文献', 'エビデンス'], skills: 'deep-research → literature-search → meta-analysis → evidence-synthesis → academic-writing → critical-review' },
94
+ { id: 7, name: '創薬パイプライン', domain: 'pharma', keywords: ['創薬', 'drug discovery', 'ADMET', 'ドッキング'], skills: 'drug-target-profiling → compound-screening → molecular-docking → admet-pharmacokinetics → drug-repurposing' },
95
+ { id: 8, name: 'ML/XAI パイプライン', domain: 'ml', keywords: ['機械学習', 'ML', 'SHAP', 'XAI', '予測モデル'], skills: 'data-preprocessing → ml-classification → ml-regression → explainable-ai → fairness-bias → publication-figures' },
96
+ { id: 9, name: '環境・生態学', domain: 'ecology', keywords: ['生態', '生物多様性', 'SDM', '環境', 'ecology'], skills: 'environmental-ecology → biodiversity-conservation → species-distribution → time-series-forecasting' },
97
+ { id: 10, name: '計算材料科学', domain: 'materials', keywords: ['材料', 'materials', 'DFT', '物性'], skills: 'computational-materials → cheminformatics → molecular-dynamics → ml-regression' },
98
+ { id: 11, name: '医薬品安全性', domain: 'pharma', keywords: ['有害事象', 'ファーマコビジランス', '安全性', 'adverse'], skills: 'pharmacovigilance → pharmacogenomics → regulatory-science' },
99
+ { id: 12, name: '希少疾患', domain: 'clinical', keywords: ['希少疾患', 'rare disease', 'Orphanet'], skills: 'rare-disease-genetics → gene-panel-design → variant-interpretation → clinical-reporting' },
100
+ { id: 13, name: 'がんゲノミクス', domain: 'oncology', keywords: ['がん', 'cancer', 'TMB', '体細胞変異'], skills: 'cancer-genomics → precision-oncology → biomarker-discovery → clinical-reporting' },
101
+ { id: 14, name: 'GWAS・集団遺伝学', domain: 'genomics', keywords: ['GWAS', '集団遺伝学', 'population genetics', 'biobank'], skills: 'biobank-cohort → population-genetics → statistical-testing → publication-figures' },
102
+ { id: 15, name: 'シングルセル', domain: 'genomics', keywords: ['シングルセル', 'single-cell', 'scRNA-seq', '空間トランスクリプトーム'], skills: 'cellxgene-census → scvi-integration → spatial-transcriptomics → gene-regulation' },
103
+ { id: 16, name: 'プロテオミクス', domain: 'omics', keywords: ['プロテオミクス', 'proteomics', '質量分析'], skills: 'proteomics → protein-structure-analysis → network-analysis' },
104
+ { id: 17, name: 'メタボロミクス', domain: 'omics', keywords: ['メタボロミクス', 'metabolomics', '代謝物', '脂質'], skills: 'metabolomics → lipidomics → systems-biology → network-analysis' },
105
+ { id: 18, name: 'マイクロバイオーム', domain: 'ecology', keywords: ['マイクロバイオーム', 'metagenome', '16S', '腸内細菌'], skills: 'microbiome-metagenomics → metagenome-assembled-genomes → phylogenetics → environmental-ecology' },
106
+ { id: 19, name: 'パスウェイ・KG', domain: 'systems', keywords: ['パスウェイ', 'ナレッジグラフ', 'knowledge graph', 'pathway'], skills: 'gene-id-mapping → pathway-enrichment → ontology-integration → network-analysis → knowledge-graph' },
107
+ { id: 20, name: '農業・食品', domain: 'agriculture', keywords: ['農業', '食品', 'agriculture', 'food safety'], skills: 'agricultural-science → food-science-nutrition → environmental-ecology' },
108
+ { id: 21, name: '臨床情報学', domain: 'clinical', keywords: ['臨床', 'EHR', 'FHIR', 'OMOP', '電子カルテ'], skills: 'clinical-standards → clinical-nlp → clinical-reporting → healthcare-ai → survival-clinical' },
109
+ { id: 22, name: 'ロボティクス・IoT', domain: 'engineering', keywords: ['ロボティクス', 'IoT', 'ロボット', 'robotics'], skills: 'robotics-automation → lab-automation → lab-data-management → interactive-dashboard' },
110
+ { id: 23, name: '実験計画・統計', domain: 'general', keywords: ['実験計画', 'DOE', '検出力', 'サンプルサイズ'], skills: 'experimental-design → statistical-testing → reproducibility-assessment → publication-figures' },
111
+ { id: 24, name: '科学的可視化', domain: 'general', keywords: ['可視化', 'visualization', 'ダッシュボード', 'dashboard'], skills: 'publication-figures → interactive-dashboard' },
112
+ { id: 25, name: '学術出版', domain: 'literature', keywords: ['論文投稿', 'journal', 'グラント', 'grant'], skills: 'academic-writing → critical-review → citation-network' },
113
+ { id: 26, name: '科学教育', domain: 'education', keywords: ['教育', 'education', 'カリキュラム'], skills: 'science-education → reproducibility-assessment' },
114
+ ];
115
+
116
+ function pipelineSuggest() {
117
+ const readline = require('readline');
118
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
119
+
120
+ const ask = (q) => new Promise((resolve) => rl.question(q, resolve));
121
+
122
+ (async () => {
123
+ console.log('\n🔬 SATORI Pipeline Suggest — インタラクティブパイプライン推薦\n');
124
+ console.log('研究内容を入力すると、最適なパイプラインを提案します。');
125
+ console.log('(Ctrl+C で終了)\n');
126
+
127
+ const input = await ask('何を解析しますか? キーワードや研究テーマを入力してください:\n> ');
128
+ const query = input.toLowerCase();
129
+
130
+ // Score each pipeline by keyword match
131
+ const scored = PIPELINES.map((p) => {
132
+ let score = 0;
133
+ for (const kw of p.keywords) {
134
+ if (query.includes(kw.toLowerCase())) score += 2;
135
+ }
136
+ // Partial match on name
137
+ if (query.includes(p.name.toLowerCase()) || p.name.toLowerCase().includes(query)) score += 1;
138
+ return { ...p, score };
139
+ })
140
+ .filter((p) => p.score > 0)
141
+ .sort((a, b) => b.score - a.score);
142
+
143
+ console.log('');
144
+ if (scored.length === 0) {
145
+ console.log('❌ 該当するパイプラインが見つかりませんでした。');
146
+ console.log('');
147
+ console.log('利用可能なキーワード例:');
148
+ console.log(' 遺伝子/バリアント, 創薬/ADMET, RNA-seq, がん, 機械学習/ML,');
149
+ console.log(' メタボロミクス, マイクロバイオーム, 環境/生態, 材料, 臨床/EHR,');
150
+ console.log(' 文献/メタアナリシス, 可視化, 論文, AlphaFold, シングルセル');
151
+ console.log('');
152
+ console.log('全パイプライン一覧は `satori pipeline list` で確認できます。');
153
+ } else {
154
+ console.log(`✅ ${scored.length} 件のパイプラインが見つかりました:\n`);
155
+ const top = scored.slice(0, 5);
156
+ for (const p of top) {
157
+ console.log(` 📋 Pipeline #${p.id}: ${p.name}`);
158
+ console.log(` スキル連鎖: ${p.skills}`);
159
+ console.log('');
160
+ }
161
+ if (scored.length > 5) {
162
+ console.log(` ... 他 ${scored.length - 5} 件`);
163
+ }
164
+ console.log('詳細は docs/SATORI_PIPELINE_EXAMPLES.md を参照してください。');
165
+ }
166
+
167
+ rl.close();
168
+ })();
169
+ }
170
+
171
+ function pipelineList() {
172
+ console.log('\n📋 SATORI パイプライン一覧 (26 ドメインパイプライン)\n');
173
+ for (const p of PIPELINES) {
174
+ console.log(` #${String(p.id).padStart(2, ' ')} ${p.name}`);
175
+ console.log(` ${p.skills}`);
176
+ console.log('');
177
+ }
178
+ console.log('クロスドメイン (15), 産業特化 (5), 方法論特化 (4) パイプラインは');
179
+ console.log('docs/SATORI_PIPELINE_EXAMPLES.md を参照してください。');
180
+ }
181
+
82
182
  function showVersion() {
83
183
  const pkg = require(path.join(PACKAGE_ROOT, 'package.json'));
84
184
  console.log(pkg.version);
@@ -88,6 +188,17 @@ switch (COMMAND) {
88
188
  case 'init':
89
189
  init();
90
190
  break;
191
+ case 'pipeline':
192
+ if (SUBCOMMAND === 'suggest') {
193
+ pipelineSuggest();
194
+ } else if (SUBCOMMAND === 'list') {
195
+ pipelineList();
196
+ } else {
197
+ console.error(`Unknown pipeline subcommand: ${SUBCOMMAND || '(none)'}`);
198
+ console.log('Usage: satori pipeline suggest | satori pipeline list');
199
+ process.exit(1);
200
+ }
201
+ break;
91
202
  case 'help':
92
203
  case '--help':
93
204
  case '-h':
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nahisaho/satori",
3
- "version": "0.25.4",
3
+ "version": "0.25.5",
4
4
  "description": "SATORI — Agent Skills for Science. GitHub Copilot Agent Skills collection for scientific data analysis.",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -399,6 +399,28 @@ def preprocessing_pipeline(df, target_col=None, config=None):
399
399
 
400
400
  print(f" Preprocessing complete: {df.shape}")
401
401
 
402
+ # チェックポイント: 前処理済みデータを永続化(パイプライン連携用)
403
+ from pathlib import Path
404
+ results_dir = Path("results")
405
+ results_dir.mkdir(parents=True, exist_ok=True)
406
+ df.to_csv(results_dir / "preprocessed_data.csv", index=False)
407
+ print(f" ✔ Preprocessed data saved: results/preprocessed_data.csv ({df.shape})")
408
+
409
+ # 前処理サマリーをJSONに保存
410
+ import json
411
+ summary = {
412
+ "original_shape": [n_original, len(df.columns)],
413
+ "processed_shape": list(df.shape),
414
+ "duplicates_removed": n_original - len(df) if config.get("drop_duplicates", True) else 0,
415
+ "numeric_columns": len(df.select_dtypes(include=[np.number]).columns),
416
+ "categorical_columns": len(df.select_dtypes(include=["object", "category"]).columns),
417
+ "scaling_method": config.get("scaling_method", "standard"),
418
+ "missing_strategy": config.get("missing_strategy", "auto"),
419
+ }
420
+ with open(results_dir / "preprocessing_summary.json", "w") as f:
421
+ json.dump(summary, f, indent=2, ensure_ascii=False)
422
+ print(f" ✔ Preprocessing summary saved: results/preprocessing_summary.json")
423
+
402
424
  return df, {"encoders": encoders, "scaler": scaler}
403
425
  ```
404
426
 
@@ -268,6 +268,46 @@ def mag_pipeline(assembly_fasta, bam_file,
268
268
  f"{len(quality)} QC passed → "
269
269
  f"{len(derep)} dereplicated")
270
270
 
271
+ # === パイプライン連携用の構造化出力 ===
272
+ results = Path(output_dir) / "results"
273
+ results.mkdir(parents=True, exist_ok=True)
274
+
275
+ # 1) MAG品質サマリーCSV (→ phylogenetics, environmental-ecology)
276
+ quality.to_csv(results / "mag_quality_summary.csv", index=False)
277
+ print(f" ✔ MAG quality summary: {results / 'mag_quality_summary.csv'}")
278
+
279
+ # 2) 分類学サマリーCSV (→ phylogenetics)
280
+ if not taxonomy.empty:
281
+ taxonomy.to_csv(results / "mag_taxonomy.csv", index=False)
282
+ print(f" ✔ MAG taxonomy: {results / 'mag_taxonomy.csv'}")
283
+
284
+ # 3) 代表MAGをFASTAに統合 (→ phylogenetics, annotation)
285
+ representative_fasta = results / "representative_mags.fasta"
286
+ with open(representative_fasta, "w") as f:
287
+ for mag_path in derep:
288
+ mag_name = Path(mag_path).stem
289
+ with open(mag_path) as mag_f:
290
+ for line in mag_f:
291
+ if line.startswith(">"):
292
+ f.write(f">{mag_name}_{line[1:]}")
293
+ else:
294
+ f.write(line)
295
+ print(f" ✔ Representative MAGs FASTA: {representative_fasta}")
296
+
297
+ # 4) パイプラインサマリーJSON
298
+ import json
299
+ pipeline_summary = {
300
+ "total_bins": len(bins),
301
+ "quality_passed": len(quality),
302
+ "high_quality": int((quality["quality"] == "high").sum()) if "quality" in quality.columns else 0,
303
+ "medium_quality": int((quality["quality"] == "medium").sum()) if "quality" in quality.columns else 0,
304
+ "dereplicated": len(derep),
305
+ "classified": len(taxonomy) if not taxonomy.empty else 0,
306
+ }
307
+ with open(results / "mag_pipeline_summary.json", "w") as f:
308
+ json.dump(pipeline_summary, f, indent=2)
309
+ print(f" ✔ Pipeline summary: {results / 'mag_pipeline_summary.json'}")
310
+
271
311
  return {
272
312
  "bins": bins,
273
313
  "checkm": checkm,