create-verifiable-agent 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,94 @@
1
+ 'use strict';
2
+
3
+ const yaml = require('js-yaml');
4
+
5
+ async function generateCollabCard(context, recipeYaml, verification) {
6
+ const recipe = yaml.load(recipeYaml);
7
+ const agents = recipe.agents || [];
8
+ const now = new Date().toISOString();
9
+ const verifierAgent = agents.find(a => a.id === 'verifier');
10
+
11
+ return `# Human-AI Collaboration Card
12
+
13
+ > **Project:** ${context.repoName}
14
+ > **Generated:** ${now}
15
+ > **Verification status:** ${verification.passed ? '✅ PASSED' : '❌ NEEDS REVIEW'}
16
+
17
+ ---
18
+
19
+ ## What the AI does
20
+
21
+ ${agents.map(a => `- **${a.role}** (\`${a.id}\`): ${(a.responsibilities || []).slice(0, 2).join('; ')}`).join('\n')}
22
+
23
+ ---
24
+
25
+ ## What the human does
26
+
27
+ | Gate | Human Action Required | Why |
28
+ |------|-----------------------|-----|
29
+ | Before planning | Review codebase summary | Catch mis-detections early |
30
+ | Before execution | Approve task plan | Prevent unintended changes |
31
+ | After verification | Sign off on report | Legal/compliance ownership |
32
+ | On failure | Investigate and override | AI may not understand context |
33
+
34
+ ---
35
+
36
+ ## Trust boundaries
37
+
38
+ | Capability | AI autonomy | Human required |
39
+ |-----------|-------------|----------------|
40
+ | Read files | ✅ Full | |
41
+ | Write/edit files | ⚠️ Sandbox only | ✋ Approval needed |
42
+ | Run shell commands | ⚠️ Sandbox only | ✋ Approval needed |
43
+ | Computer Use (UI) | ⚠️ Sandboxed browser | ✋ Review screenshots |
44
+ | External API calls | ❌ Blocked in sandbox | Must enable explicitly |
45
+ | Git push | ❌ Never automatic | ✋ Human initiates |
46
+
47
+ ---
48
+
49
+ ## Verification summary
50
+
51
+ ${verification.results.map(r => {
52
+ const icon = r.passed ? '✅' : '❌';
53
+ return `- ${icon} **${r.name}**: ${r.details?.slice(0, 100) || ''}`;
54
+ }).join('\n')}
55
+
56
+ ---
57
+
58
+ ## Model card
59
+
60
+ | Field | Value |
61
+ |-------|-------|
62
+ | Model | \`claude-sonnet-4-6\` |
63
+ | Computer Use | Enabled (sandboxed) |
64
+ | Plan mode | ON (Pro default) |
65
+ | Sandbox | ${recipe.safety?.sandbox_mode ? 'ON ✅' : 'OFF ⚠️'} |
66
+ | Guardrails | ${(recipe.safety?.guardrails || []).length} active |
67
+
68
+ ---
69
+
70
+ ## Known limitations
71
+
72
+ - AI may misidentify tech stack for highly custom setups
73
+ - Self-consistency scoring requires ≥2 API samples (costs tokens)
74
+ - Computer Use screenshots may lag behind fast UIs
75
+ - Provenance hashes do not survive file renames
76
+
77
+ ---
78
+
79
+ ## How to escalate
80
+
81
+ If the AI produces unexpected output:
82
+ 1. Check \`verification-report.yaml\` for specific failures
83
+ 2. Re-run with \`--sandbox\` and inspect the recipe before live execution
84
+ 3. Open an issue at https://github.com/kju4q/verifiable-agent-recipe/issues
85
+
86
+ ---
87
+
88
+ *This card is auto-generated. Human reviewer must sign before production use.*
89
+
90
+ **Reviewed by:** _____________________________ **Date:** _____________
91
+ `;
92
+ }
93
+
94
+ module.exports = { generateCollabCard };
@@ -0,0 +1,17 @@
1
+ 'use strict';
2
+
3
+ const path = require('path');
4
+ const fs = require('fs');
5
+
6
+ async function loadDemo(name) {
7
+ const demoDir = path.join(__dirname, '..', 'demo');
8
+ const demoFile = path.join(demoDir, `${name}.js`);
9
+
10
+ if (!fs.existsSync(demoFile)) {
11
+ throw new Error(`Demo '${name}' not found. Available demos: mythos`);
12
+ }
13
+
14
+ return require(demoFile);
15
+ }
16
+
17
+ module.exports = { loadDemo };
@@ -0,0 +1,190 @@
1
+ 'use strict';
2
+
3
+ const Anthropic = require('@anthropic-ai/sdk');
4
+ const yaml = require('js-yaml');
5
+ const fs = require('fs');
6
+ const path = require('path');
7
+
8
+ const RECIPE_SYSTEM_PROMPT = `You are an expert AI systems architect. Given a codebase analysis, produce a multi-agent YAML recipe.
9
+
10
+ The recipe must contain:
11
+ 1. metadata: name, version, description, source_repo, generated_at
12
+ 2. agents: list of specialized agents (analyzer, planner, executor, verifier, reporter)
13
+ Each agent has: id, role, model, tools, responsibilities, inputs, outputs
14
+ 3. workflow: ordered steps with agent assignments and data flow
15
+ 4. verification: self_consistency checks and provenance tracking
16
+ 5. safety: sandbox_mode flag and guardrails
17
+ 6. computer_use: whether any agent uses the Computer Use API
18
+
19
+ Output ONLY valid YAML. No markdown fences.`;
20
+
21
+ async function generateRecipe(context, { model, apiKey, sandbox } = {}) {
22
+ if (sandbox || !apiKey) {
23
+ return buildStaticRecipe(context);
24
+ }
25
+
26
+ const client = new Anthropic({ apiKey });
27
+
28
+ const userMessage = `Analyze this codebase and produce the multi-agent YAML recipe:
29
+
30
+ ${context.summary}
31
+
32
+ Key files:
33
+ ${Object.entries(context.keyFiles).map(([k, v]) => `=== ${k} ===\n${v}`).join('\n\n')}
34
+
35
+ Top files by path:
36
+ ${context.files.slice(0, 20).map(f => `- ${f.path} (${f.ext})`).join('\n')}
37
+
38
+ Requirements:
39
+ - Create 5 specialized agents tailored to this codebase
40
+ - Include Computer Use agent for UI/browser tasks if relevant
41
+ - Add self-consistency verification loops
42
+ - Add provenance tracking for each agent output
43
+ - Safety: default sandbox_mode: true
44
+ - Use model: ${model}`;
45
+
46
+ const message = await client.messages.create({
47
+ model,
48
+ max_tokens: 4096,
49
+ system: RECIPE_SYSTEM_PROMPT,
50
+ messages: [{ role: 'user', content: userMessage }],
51
+ });
52
+
53
+ const rawText = message.content[0].text.trim();
54
+
55
+ // Validate YAML
56
+ try {
57
+ yaml.load(rawText);
58
+ } catch (e) {
59
+ console.warn('\n Warning: Generated YAML had syntax issues, using fallback recipe.');
60
+ return buildStaticRecipe(context);
61
+ }
62
+
63
+ return rawText;
64
+ }
65
+
66
+ function buildStaticRecipe(context) {
67
+ const recipe = {
68
+ metadata: {
69
+ name: `${context.repoName}-verifiable-agent`,
70
+ version: '1.0.0',
71
+ description: `Multi-agent recipe for ${context.repoName}`,
72
+ source_repo: context.repoName,
73
+ generated_at: new Date().toISOString(),
74
+ model: 'claude-sonnet-4-6',
75
+ computer_use_enabled: true,
76
+ },
77
+ safety: {
78
+ sandbox_mode: true,
79
+ guardrails: [
80
+ 'no_destructive_writes',
81
+ 'no_external_api_calls_in_sandbox',
82
+ 'human_approval_required_for_mutations',
83
+ 'rate_limit_api_calls',
84
+ ],
85
+ plan_mode: true,
86
+ accept_edits: false,
87
+ },
88
+ agents: [
89
+ {
90
+ id: 'analyzer',
91
+ role: 'Codebase Analyzer',
92
+ model: 'claude-sonnet-4-6',
93
+ tools: ['read_file', 'list_files', 'grep', 'glob'],
94
+ responsibilities: [
95
+ 'Scan repository structure and detect tech stack',
96
+ 'Identify entry points, key modules, and dependencies',
97
+ 'Extract architecture patterns and data flows',
98
+ ],
99
+ inputs: ['source_repo_path'],
100
+ outputs: ['codebase_summary', 'file_index', 'stack_report'],
101
+ },
102
+ {
103
+ id: 'planner',
104
+ role: 'Task Planner',
105
+ model: 'claude-sonnet-4-6',
106
+ tools: ['read_file', 'write_file'],
107
+ responsibilities: [
108
+ 'Decompose high-level goal into verifiable sub-tasks',
109
+ 'Assign sub-tasks to appropriate specialist agents',
110
+ 'Define success criteria for each task',
111
+ ],
112
+ inputs: ['codebase_summary', 'user_goal'],
113
+ outputs: ['task_plan', 'agent_assignments'],
114
+ },
115
+ {
116
+ id: 'executor',
117
+ role: 'Code Executor',
118
+ model: 'claude-sonnet-4-6',
119
+ tools: ['bash', 'write_file', 'edit_file'],
120
+ responsibilities: [
121
+ 'Implement planned changes with minimal blast radius',
122
+ 'Run tests after each change',
123
+ 'Rollback on failure',
124
+ ],
125
+ inputs: ['task_plan', 'codebase_summary'],
126
+ outputs: ['code_changes', 'test_results'],
127
+ safety: { require_sandbox: true, require_approval: true },
128
+ },
129
+ {
130
+ id: 'computer_use_agent',
131
+ role: 'Computer Use Agent',
132
+ model: 'claude-sonnet-4-6',
133
+ computer_use: true,
134
+ tools: ['screenshot', 'mouse_move', 'left_click', 'type', 'key'],
135
+ responsibilities: [
136
+ 'Perform UI interactions for browser-based tasks',
137
+ 'Capture screenshots as provenance evidence',
138
+ 'Validate visual outputs',
139
+ ],
140
+ inputs: ['ui_task_spec'],
141
+ outputs: ['screenshots', 'interaction_log', 'visual_validation'],
142
+ safety: { require_sandbox: true, no_real_purchases: true },
143
+ },
144
+ {
145
+ id: 'verifier',
146
+ role: 'Output Verifier',
147
+ model: 'claude-sonnet-4-6',
148
+ tools: ['read_file', 'bash'],
149
+ responsibilities: [
150
+ 'Run self-consistency checks across agent outputs',
151
+ 'Validate provenance chain',
152
+ 'Flag hallucinations or contradictions',
153
+ 'Score confidence for each claim',
154
+ ],
155
+ inputs: ['code_changes', 'task_plan', 'test_results'],
156
+ outputs: ['verification_report', 'confidence_scores', 'provenance_chain'],
157
+ },
158
+ ],
159
+ workflow: [
160
+ { step: 1, agent: 'analyzer', action: 'scan_and_summarize', outputs_to: ['planner', 'verifier'] },
161
+ { step: 2, agent: 'planner', action: 'create_task_plan', outputs_to: ['executor'] },
162
+ { step: 3, agent: 'executor', action: 'implement_changes', outputs_to: ['verifier'], requires_approval: true },
163
+ { step: 4, agent: 'computer_use_agent', action: 'validate_ui', outputs_to: ['verifier'], optional: true },
164
+ { step: 5, agent: 'verifier', action: 'verify_all_outputs', outputs_to: null },
165
+ ],
166
+ verification: {
167
+ self_consistency: {
168
+ enabled: true,
169
+ method: 'multi_sample',
170
+ samples: 3,
171
+ threshold: 0.8,
172
+ description: 'Run each critical task 3 times, flag if results diverge > 20%',
173
+ },
174
+ provenance: {
175
+ enabled: true,
176
+ track_inputs: true,
177
+ track_model_version: true,
178
+ track_timestamps: true,
179
+ hash_outputs: true,
180
+ description: 'Every agent output is hashed and linked to its inputs',
181
+ },
182
+ human_review_gates: ['after_planner', 'before_executor', 'after_verifier'],
183
+ },
184
+ stack: context.stack,
185
+ };
186
+
187
+ return yaml.dump(recipe, { lineWidth: 120 });
188
+ }
189
+
190
+ module.exports = { generateRecipe };
@@ -0,0 +1,262 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * html-extractor.js
5
+ * Parses HTML/blog-post leak documents and extracts:
6
+ * - Page title and meta description
7
+ * - Key quoted claims (pull-quotes, blockquotes, <em>/<strong> emphasis)
8
+ * - Structured FAQ entries (JSON-LD)
9
+ * - Capability claims (coding, reasoning, cybersecurity)
10
+ * - Cyber-risk warnings
11
+ * - Timeline / provenance signals
12
+ *
13
+ * No external dependencies — pure regex + stdlib.
14
+ */
15
+
16
+ // Phrases that signal high-value claims worth surfacing
17
+ const SIGNAL_PHRASES = [
18
+ 'step change',
19
+ 'most capable',
20
+ 'dramatically higher',
21
+ 'far ahead',
22
+ 'cyber capabilit',
23
+ 'exploit vulnerabilit',
24
+ 'outpace',
25
+ 'defenders',
26
+ 'presages',
27
+ 'wave of models',
28
+ 'new tier',
29
+ 'above opus',
30
+ 'leaked',
31
+ 'human error',
32
+ 'cms misconfiguration',
33
+ '3,000',
34
+ 'fortune',
35
+ ];
36
+
37
+ /**
38
+ * Parse an HTML document and return a structured extraction result.
39
+ * @param {string} html Raw HTML string
40
+ * @param {string} filename Original filename (for context)
41
+ * @returns {LeakExtraction}
42
+ */
43
+ function extractFromHtml(html, filename = 'document.html') {
44
+ const title = extractTitle(html);
45
+ const metaDescription = extractMeta(html, 'description');
46
+ const faqEntries = extractJsonLdFaq(html);
47
+ const blockquotes = extractBlockquotes(html);
48
+ const keyQuotes = extractKeyQuotes(html);
49
+ const capabilities = extractCapabilities(html);
50
+ const cyberRiskWarnings = extractCyberRisk(html);
51
+ const timeline = extractTimeline(html);
52
+ const plainText = htmlToText(html);
53
+ const signalMatches = findSignalMatches(plainText);
54
+
55
+ return {
56
+ filename,
57
+ title,
58
+ metaDescription,
59
+ faqEntries,
60
+ blockquotes,
61
+ keyQuotes,
62
+ capabilities,
63
+ cyberRiskWarnings,
64
+ timeline,
65
+ signalMatches,
66
+ plainText,
67
+ // Convenience: all unique notable quotes in one flat array
68
+ allNotableQuotes: dedupeQuotes([
69
+ ...blockquotes,
70
+ ...keyQuotes,
71
+ ...cyberRiskWarnings,
72
+ ]),
73
+ };
74
+ }
75
+
76
+ // ── Internal extractors ──────────────────────────────────────────────────────
77
+
78
+ function extractTitle(html) {
79
+ const m = html.match(/<title[^>]*>(.*?)<\/title>/is);
80
+ return m ? cleanText(m[1]) : '';
81
+ }
82
+
83
+ function extractMeta(html, name) {
84
+ const m = html.match(new RegExp(`<meta[^>]+name=["']${name}["'][^>]+content=["']([^"']+)["']`, 'i'))
85
+ || html.match(new RegExp(`<meta[^>]+content=["']([^"']+)["'][^>]+name=["']${name}["']`, 'i'));
86
+ return m ? cleanText(m[1]) : '';
87
+ }
88
+
89
+ function extractJsonLdFaq(html) {
90
+ const blocks = [];
91
+ const regex = /<script\s+type="application\/ld\+json">([\s\S]*?)<\/script>/gi;
92
+ let m;
93
+ while ((m = regex.exec(html)) !== null) {
94
+ try {
95
+ const data = JSON.parse(m[1]);
96
+ if (data['@type'] === 'FAQPage') {
97
+ for (const item of data.mainEntity || []) {
98
+ blocks.push({
99
+ question: item.name || '',
100
+ answer: item.acceptedAnswer?.text || '',
101
+ });
102
+ }
103
+ }
104
+ } catch { /* skip malformed */ }
105
+ }
106
+ return blocks;
107
+ }
108
+
109
+ function extractBlockquotes(html) {
110
+ const quotes = [];
111
+ const regex = /<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi;
112
+ let m;
113
+ while ((m = regex.exec(html)) !== null) {
114
+ const text = cleanText(htmlToText(m[1]));
115
+ if (text.length > 20) quotes.push(text);
116
+ }
117
+ return quotes;
118
+ }
119
+
120
+ function extractKeyQuotes(html) {
121
+ const quotes = new Set();
122
+
123
+ // Pattern 1: content inside "pull-quote" or "quote" class divs
124
+ const classPatterns = [
125
+ /class="[^"]*(?:quote|pull-quote|blockquote|highlight|callout)[^"]*"[^>]*>([\s\S]{20,300}?)</gi,
126
+ /class="[^"]*(?:stat|metric|claim|key-claim)[^"]*"[^>]*>([\s\S]{10,200}?)</gi,
127
+ ];
128
+
129
+ for (const pat of classPatterns) {
130
+ let m;
131
+ while ((m = pat.exec(html)) !== null) {
132
+ const t = cleanText(htmlToText(m[1]));
133
+ if (t.length > 15) quotes.add(t);
134
+ }
135
+ }
136
+
137
+ // Pattern 2: sentences containing signal phrases in visible text
138
+ const text = htmlToText(html);
139
+ const sentences = text.split(/(?<=[.!?])\s+/);
140
+ for (const s of sentences) {
141
+ const lower = s.toLowerCase();
142
+ if (SIGNAL_PHRASES.some(p => lower.includes(p)) && s.length > 30 && s.length < 500) {
143
+ quotes.add(cleanText(s));
144
+ }
145
+ }
146
+
147
+ return [...quotes].filter(q => q.length > 20);
148
+ }
149
+
150
+ function extractCapabilities(html) {
151
+ const text = htmlToText(html);
152
+ const capabilities = {};
153
+
154
+ const patterns = {
155
+ coding: /coding[^.!?\n]{0,200}/gi,
156
+ reasoning: /reasoning[^.!?\n]{0,200}/gi,
157
+ cybersecurity: /cyber(?:security|[- ]capabilit)[^.!?\n]{0,300}/gi,
158
+ overall: /(?:step change|most capable|new tier)[^.!?\n]{0,200}/gi,
159
+ };
160
+
161
+ for (const [key, regex] of Object.entries(patterns)) {
162
+ const matches = [];
163
+ let m;
164
+ while ((m = regex.exec(text)) !== null) {
165
+ const cleaned = cleanText(m[0]);
166
+ if (cleaned.length > 20 && !matches.includes(cleaned)) {
167
+ matches.push(cleaned);
168
+ if (matches.length >= 3) break;
169
+ }
170
+ }
171
+ if (matches.length) capabilities[key] = matches;
172
+ }
173
+
174
+ return capabilities;
175
+ }
176
+
177
+ function extractCyberRisk(html) {
178
+ const text = htmlToText(html);
179
+ const warnings = new Set();
180
+
181
+ // High-signal cyber risk patterns
182
+ const patterns = [
183
+ /far ahead of any other[^.!?]{0,150}/gi,
184
+ /exploit vulnerabilit[^.!?]{0,200}/gi,
185
+ /outpace[^.!?]{0,150}/gi,
186
+ /presages[^.!?]{0,200}/gi,
187
+ /wave of models[^.!?]{0,200}/gi,
188
+ /defenders can[^.!?]{0,150}/gi,
189
+ /faster than[^.!?]{0,150}/gi,
190
+ ];
191
+
192
+ for (const pat of patterns) {
193
+ let m;
194
+ while ((m = pat.exec(text)) !== null) {
195
+ const cleaned = cleanText(m[0]);
196
+ if (cleaned.length > 20) warnings.add(cleaned);
197
+ }
198
+ }
199
+
200
+ return [...warnings];
201
+ }
202
+
203
+ function extractTimeline(html) {
204
+ const text = htmlToText(html);
205
+ const events = [];
206
+ // Match date patterns like "March 26, 2026" or "February 2026"
207
+ const datePattern = /(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+(?:\d{1,2},\s+)?\d{4}[^\n.!?]{0,200}/g;
208
+ let m;
209
+ while ((m = datePattern.exec(text)) !== null) {
210
+ const cleaned = cleanText(m[0]);
211
+ if (cleaned.length > 10 && !events.some(e => e.includes(cleaned.slice(0, 20)))) {
212
+ events.push(cleaned);
213
+ }
214
+ }
215
+ return events.slice(0, 10);
216
+ }
217
+
218
+ function findSignalMatches(text) {
219
+ const lower = text.toLowerCase();
220
+ return SIGNAL_PHRASES.filter(p => lower.includes(p));
221
+ }
222
+
223
+ // ── Utilities ────────────────────────────────────────────────────────────────
224
+
225
+ function htmlToText(html) {
226
+ return html
227
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, ' ')
228
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, ' ')
229
+ .replace(/<br\s*\/?>/gi, '\n')
230
+ .replace(/<\/p>/gi, '\n')
231
+ .replace(/<\/div>/gi, '\n')
232
+ .replace(/<\/li>/gi, '\n')
233
+ .replace(/<[^>]+>/g, ' ')
234
+ .replace(/&amp;/g, '&')
235
+ .replace(/&lt;/g, '<')
236
+ .replace(/&gt;/g, '>')
237
+ .replace(/&quot;/g, '"')
238
+ .replace(/&#39;/g, "'")
239
+ .replace(/&nbsp;/g, ' ')
240
+ .replace(/[ \t]+/g, ' ')
241
+ .replace(/\n{3,}/g, '\n\n')
242
+ .trim();
243
+ }
244
+
245
+ function cleanText(s) {
246
+ return s
247
+ .replace(/\s+/g, ' ')
248
+ .replace(/^\s+|\s+$/g, '')
249
+ .replace(/^["'\s]+|["'\s]+$/g, '');
250
+ }
251
+
252
+ function dedupeQuotes(arr) {
253
+ const seen = new Set();
254
+ return arr.filter(q => {
255
+ const key = q.toLowerCase().slice(0, 60);
256
+ if (seen.has(key)) return false;
257
+ seen.add(key);
258
+ return true;
259
+ });
260
+ }
261
+
262
+ module.exports = { extractFromHtml, htmlToText };
package/src/index.js ADDED
@@ -0,0 +1,107 @@
1
+ 'use strict';
2
+
3
+ const chalk = require('chalk');
4
+ const ora = require('ora');
5
+ const fs = require('fs');
6
+ const path = require('path');
7
+ const { analyzeSource } = require('./analyzer');
8
+ const { generateRecipe } = require('./generator');
9
+ const { runVerification } = require('./verifier');
10
+ const { generateNotebook } = require('./notebook');
11
+ const { generateCollabCard } = require('./collab-card');
12
+ const { loadDemo } = require('./demo-loader');
13
+ const { planMode } = require('./plan');
14
+
15
+ async function run(opts) {
16
+ const {
17
+ source,
18
+ outputDir,
19
+ sandbox,
20
+ planMode: usePlanMode,
21
+ acceptEdits,
22
+ demo,
23
+ notebook,
24
+ collabCard,
25
+ model,
26
+ maxFiles,
27
+ apiKey,
28
+ } = opts;
29
+
30
+ // ── Safety check ────────────────────────────────────────────────────────────
31
+ if (sandbox) {
32
+ console.log(chalk.yellow.bold(' [SANDBOX MODE] No real API calls or mutations will occur.\n'));
33
+ }
34
+
35
+ if (!apiKey && !sandbox) {
36
+ console.error(chalk.red(' Error: ANTHROPIC_API_KEY not set. Use --api-key or export ANTHROPIC_API_KEY=...'));
37
+ console.error(chalk.gray(' Tip: run with --sandbox to test without an API key.\n'));
38
+ process.exit(1);
39
+ }
40
+
41
+ // ── Ensure output dir ────────────────────────────────────────────────────────
42
+ fs.mkdirSync(outputDir, { recursive: true });
43
+
44
+ // ── Step 1: Ingest source ────────────────────────────────────────────────────
45
+ let context;
46
+
47
+ if (demo || source === '__demo__') {
48
+ const spinner = ora('Loading Mythos demo context...').start();
49
+ context = await loadDemo(demo || 'mythos');
50
+ spinner.succeed('Mythos cyber-risk simulation loaded');
51
+ } else {
52
+ const spinner = ora(`Analyzing ${source}...`).start();
53
+ context = await analyzeSource(source, { maxFiles, sandbox });
54
+ spinner.succeed(`Analyzed ${context.files.length} files from ${context.repoName}`);
55
+ }
56
+
57
+ // ── Step 2: Plan mode (show before executing) ────────────────────────────────
58
+ if (usePlanMode && !acceptEdits) {
59
+ const approved = await planMode(context, opts);
60
+ if (!approved) {
61
+ console.log(chalk.yellow('\n Aborted. Re-run with --accept-edits to skip confirmation.\n'));
62
+ process.exit(0);
63
+ }
64
+ }
65
+
66
+ // ── Step 3: Generate outputs ─────────────────────────────────────────────────
67
+ console.log(chalk.cyan('\n Generating outputs...\n'));
68
+
69
+ const spinner2 = ora('Building multi-agent YAML recipe...').start();
70
+ const recipe = await generateRecipe(context, { model, apiKey, sandbox });
71
+ const recipeFile = path.join(outputDir, 'recipe.yaml');
72
+ fs.writeFileSync(recipeFile, recipe);
73
+ spinner2.succeed(`Recipe → ${path.relative(process.cwd(), recipeFile)}`);
74
+
75
+ const spinner3 = ora('Running verification loops...').start();
76
+ const verification = await runVerification(context, recipe, { model, apiKey, sandbox });
77
+ const verifyFile = path.join(outputDir, 'verification-report.yaml');
78
+ fs.writeFileSync(verifyFile, verification.report);
79
+ const statusIcon = verification.passed ? chalk.green('✔') : chalk.red('✖');
80
+ spinner3.succeed(`Verification ${statusIcon} → ${path.relative(process.cwd(), verifyFile)}`);
81
+
82
+ if (notebook) {
83
+ const spinner4 = ora('Building interactive Markdown notebook...').start();
84
+ const nb = await generateNotebook(context, recipe, verification);
85
+ const nbFile = path.join(outputDir, 'notebook.md');
86
+ fs.writeFileSync(nbFile, nb);
87
+ spinner4.succeed(`Notebook → ${path.relative(process.cwd(), nbFile)}`);
88
+ }
89
+
90
+ if (collabCard) {
91
+ const spinner5 = ora('Creating human-AI collaboration card...').start();
92
+ const card = await generateCollabCard(context, recipe, verification);
93
+ const cardFile = path.join(outputDir, 'collab-card.md');
94
+ fs.writeFileSync(cardFile, card);
95
+ spinner5.succeed(`Collab card → ${path.relative(process.cwd(), cardFile)}`);
96
+ }
97
+
98
+ // ── Done ─────────────────────────────────────────────────────────────────────
99
+ console.log(chalk.green.bold('\n All outputs written to: ') + chalk.white(outputDir));
100
+ console.log(chalk.gray('\n Files generated:'));
101
+ for (const f of fs.readdirSync(outputDir)) {
102
+ console.log(chalk.gray(` • ${f}`));
103
+ }
104
+ console.log('');
105
+ }
106
+
107
+ module.exports = { run };