npm - create-verifiable-agent - Versions diffs - 1.0.0 - Mend

create-verifiable-agent 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +222 -0
package/bin/create-verifiable-agent.js +51 -0
package/demo/mythos-recipe.yaml +183 -0
package/demo/mythos.js +337 -0
package/package.json +49 -0
package/src/analyzer.js +216 -0
package/src/collab-card.js +94 -0
package/src/demo-loader.js +17 -0
package/src/generator.js +190 -0
package/src/html-extractor.js +262 -0
package/src/index.js +107 -0
package/src/notebook.js +277 -0
package/src/plan.js +49 -0
package/src/verifier.js +320 -0

package/src/collab-card.js ADDED Viewed

@@ -0,0 +1,94 @@
+'use strict';
+const yaml = require('js-yaml');
+async function generateCollabCard(context, recipeYaml, verification) {
+  const recipe = yaml.load(recipeYaml);
+  const agents = recipe.agents || [];
+  const now = new Date().toISOString();
+  const verifierAgent = agents.find(a => a.id === 'verifier');
+  return `# Human-AI Collaboration Card
+> **Project:** ${context.repoName}
+> **Generated:** ${now}
+> **Verification status:** ${verification.passed ? '✅ PASSED' : '❌ NEEDS REVIEW'}
+---
+## What the AI does
+${agents.map(a => `- **${a.role}** (\`${a.id}\`): ${(a.responsibilities || []).slice(0, 2).join('; ')}`).join('\n')}
+---
+## What the human does
+| Gate | Human Action Required | Why |
+|------|-----------------------|-----|
+| Before planning | Review codebase summary | Catch mis-detections early |
+| Before execution | Approve task plan | Prevent unintended changes |
+| After verification | Sign off on report | Legal/compliance ownership |
+| On failure | Investigate and override | AI may not understand context |
+---
+## Trust boundaries
+| Capability | AI autonomy | Human required |
+|-----------|-------------|----------------|
+| Read files | ✅ Full | |
+| Write/edit files | ⚠️ Sandbox only | ✋ Approval needed |
+| Run shell commands | ⚠️ Sandbox only | ✋ Approval needed |
+| Computer Use (UI) | ⚠️ Sandboxed browser | ✋ Review screenshots |
+| External API calls | ❌ Blocked in sandbox | Must enable explicitly |
+| Git push | ❌ Never automatic | ✋ Human initiates |
+---
+## Verification summary
+${verification.results.map(r => {
+  const icon = r.passed ? '✅' : '❌';
+  return `- ${icon} **${r.name}**: ${r.details?.slice(0, 100) || ''}`;
+}).join('\n')}
+---
+## Model card
+| Field | Value |
+|-------|-------|
+| Model | \`claude-sonnet-4-6\` |
+| Computer Use | Enabled (sandboxed) |
+| Plan mode | ON (Pro default) |
+| Sandbox | ${recipe.safety?.sandbox_mode ? 'ON ✅' : 'OFF ⚠️'} |
+| Guardrails | ${(recipe.safety?.guardrails || []).length} active |
+---
+## Known limitations
+- AI may misidentify tech stack for highly custom setups
+- Self-consistency scoring requires ≥2 API samples (costs tokens)
+- Computer Use screenshots may lag behind fast UIs
+- Provenance hashes do not survive file renames
+---
+## How to escalate
+If the AI produces unexpected output:
+1. Check \`verification-report.yaml\` for specific failures
+2. Re-run with \`--sandbox\` and inspect the recipe before live execution
+3. Open an issue at https://github.com/kju4q/verifiable-agent-recipe/issues
+---
+*This card is auto-generated. Human reviewer must sign before production use.*
+**Reviewed by:** _____________________________ **Date:** _____________
+`;
+}
+module.exports = { generateCollabCard };

package/src/demo-loader.js ADDED Viewed

@@ -0,0 +1,17 @@
+'use strict';
+const path = require('path');
+const fs = require('fs');
+async function loadDemo(name) {
+  const demoDir = path.join(__dirname, '..', 'demo');
+  const demoFile = path.join(demoDir, `${name}.js`);
+  if (!fs.existsSync(demoFile)) {
+    throw new Error(`Demo '${name}' not found. Available demos: mythos`);
+  }
+  return require(demoFile);
+}
+module.exports = { loadDemo };

package/src/generator.js ADDED Viewed

@@ -0,0 +1,190 @@
+'use strict';
+const Anthropic = require('@anthropic-ai/sdk');
+const yaml = require('js-yaml');
+const fs = require('fs');
+const path = require('path');
+const RECIPE_SYSTEM_PROMPT = `You are an expert AI systems architect. Given a codebase analysis, produce a multi-agent YAML recipe.
+The recipe must contain:
+1. metadata: name, version, description, source_repo, generated_at
+2. agents: list of specialized agents (analyzer, planner, executor, verifier, reporter)
+   Each agent has: id, role, model, tools, responsibilities, inputs, outputs
+3. workflow: ordered steps with agent assignments and data flow
+4. verification: self_consistency checks and provenance tracking
+5. safety: sandbox_mode flag and guardrails
+6. computer_use: whether any agent uses the Computer Use API
+Output ONLY valid YAML. No markdown fences.`;
+async function generateRecipe(context, { model, apiKey, sandbox } = {}) {
+  if (sandbox || !apiKey) {
+    return buildStaticRecipe(context);
+  }
+  const client = new Anthropic({ apiKey });
+  const userMessage = `Analyze this codebase and produce the multi-agent YAML recipe:
+${context.summary}
+Key files:
+${Object.entries(context.keyFiles).map(([k, v]) => `=== ${k} ===\n${v}`).join('\n\n')}
+Top files by path:
+${context.files.slice(0, 20).map(f => `- ${f.path} (${f.ext})`).join('\n')}
+Requirements:
+- Create 5 specialized agents tailored to this codebase
+- Include Computer Use agent for UI/browser tasks if relevant
+- Add self-consistency verification loops
+- Add provenance tracking for each agent output
+- Safety: default sandbox_mode: true
+- Use model: ${model}`;
+  const message = await client.messages.create({
+    model,
+    max_tokens: 4096,
+    system: RECIPE_SYSTEM_PROMPT,
+    messages: [{ role: 'user', content: userMessage }],
+  });
+  const rawText = message.content[0].text.trim();
+  // Validate YAML
+  try {
+    yaml.load(rawText);
+  } catch (e) {
+    console.warn('\n  Warning: Generated YAML had syntax issues, using fallback recipe.');
+    return buildStaticRecipe(context);
+  }
+  return rawText;
+}
+function buildStaticRecipe(context) {
+  const recipe = {
+    metadata: {
+      name: `${context.repoName}-verifiable-agent`,
+      version: '1.0.0',
+      description: `Multi-agent recipe for ${context.repoName}`,
+      source_repo: context.repoName,
+      generated_at: new Date().toISOString(),
+      model: 'claude-sonnet-4-6',
+      computer_use_enabled: true,
+    },
+    safety: {
+      sandbox_mode: true,
+      guardrails: [
+        'no_destructive_writes',
+        'no_external_api_calls_in_sandbox',
+        'human_approval_required_for_mutations',
+        'rate_limit_api_calls',
+      ],
+      plan_mode: true,
+      accept_edits: false,
+    },
+    agents: [
+      {
+        id: 'analyzer',
+        role: 'Codebase Analyzer',
+        model: 'claude-sonnet-4-6',
+        tools: ['read_file', 'list_files', 'grep', 'glob'],
+        responsibilities: [
+          'Scan repository structure and detect tech stack',
+          'Identify entry points, key modules, and dependencies',
+          'Extract architecture patterns and data flows',
+        ],
+        inputs: ['source_repo_path'],
+        outputs: ['codebase_summary', 'file_index', 'stack_report'],
+      },
+      {
+        id: 'planner',
+        role: 'Task Planner',
+        model: 'claude-sonnet-4-6',
+        tools: ['read_file', 'write_file'],
+        responsibilities: [
+          'Decompose high-level goal into verifiable sub-tasks',
+          'Assign sub-tasks to appropriate specialist agents',
+          'Define success criteria for each task',
+        ],
+        inputs: ['codebase_summary', 'user_goal'],
+        outputs: ['task_plan', 'agent_assignments'],
+      },
+      {
+        id: 'executor',
+        role: 'Code Executor',
+        model: 'claude-sonnet-4-6',
+        tools: ['bash', 'write_file', 'edit_file'],
+        responsibilities: [
+          'Implement planned changes with minimal blast radius',
+          'Run tests after each change',
+          'Rollback on failure',
+        ],
+        inputs: ['task_plan', 'codebase_summary'],
+        outputs: ['code_changes', 'test_results'],
+        safety: { require_sandbox: true, require_approval: true },
+      },
+      {
+        id: 'computer_use_agent',
+        role: 'Computer Use Agent',
+        model: 'claude-sonnet-4-6',
+        computer_use: true,
+        tools: ['screenshot', 'mouse_move', 'left_click', 'type', 'key'],
+        responsibilities: [
+          'Perform UI interactions for browser-based tasks',
+          'Capture screenshots as provenance evidence',
+          'Validate visual outputs',
+        ],
+        inputs: ['ui_task_spec'],
+        outputs: ['screenshots', 'interaction_log', 'visual_validation'],
+        safety: { require_sandbox: true, no_real_purchases: true },
+      },
+      {
+        id: 'verifier',
+        role: 'Output Verifier',
+        model: 'claude-sonnet-4-6',
+        tools: ['read_file', 'bash'],
+        responsibilities: [
+          'Run self-consistency checks across agent outputs',
+          'Validate provenance chain',
+          'Flag hallucinations or contradictions',
+          'Score confidence for each claim',
+        ],
+        inputs: ['code_changes', 'task_plan', 'test_results'],
+        outputs: ['verification_report', 'confidence_scores', 'provenance_chain'],
+      },
+    ],
+    workflow: [
+      { step: 1, agent: 'analyzer', action: 'scan_and_summarize', outputs_to: ['planner', 'verifier'] },
+      { step: 2, agent: 'planner', action: 'create_task_plan', outputs_to: ['executor'] },
+      { step: 3, agent: 'executor', action: 'implement_changes', outputs_to: ['verifier'], requires_approval: true },
+      { step: 4, agent: 'computer_use_agent', action: 'validate_ui', outputs_to: ['verifier'], optional: true },
+      { step: 5, agent: 'verifier', action: 'verify_all_outputs', outputs_to: null },
+    ],
+    verification: {
+      self_consistency: {
+        enabled: true,
+        method: 'multi_sample',
+        samples: 3,
+        threshold: 0.8,
+        description: 'Run each critical task 3 times, flag if results diverge > 20%',
+      },
+      provenance: {
+        enabled: true,
+        track_inputs: true,
+        track_model_version: true,
+        track_timestamps: true,
+        hash_outputs: true,
+        description: 'Every agent output is hashed and linked to its inputs',
+      },
+      human_review_gates: ['after_planner', 'before_executor', 'after_verifier'],
+    },
+    stack: context.stack,
+  };
+  return yaml.dump(recipe, { lineWidth: 120 });
+}
+module.exports = { generateRecipe };

package/src/html-extractor.js ADDED Viewed

@@ -0,0 +1,262 @@
+'use strict';
+/**
+ * html-extractor.js
+ * Parses HTML/blog-post leak documents and extracts:
+ *  - Page title and meta description
+ *  - Key quoted claims (pull-quotes, blockquotes, <em>/<strong> emphasis)
+ *  - Structured FAQ entries (JSON-LD)
+ *  - Capability claims (coding, reasoning, cybersecurity)
+ *  - Cyber-risk warnings
+ *  - Timeline / provenance signals
+ *
+ * No external dependencies — pure regex + stdlib.
+ */
+// Phrases that signal high-value claims worth surfacing
+const SIGNAL_PHRASES = [
+  'step change',
+  'most capable',
+  'dramatically higher',
+  'far ahead',
+  'cyber capabilit',
+  'exploit vulnerabilit',
+  'outpace',
+  'defenders',
+  'presages',
+  'wave of models',
+  'new tier',
+  'above opus',
+  'leaked',
+  'human error',
+  'cms misconfiguration',
+  '3,000',
+  'fortune',
+];
+/**
+ * Parse an HTML document and return a structured extraction result.
+ * @param {string} html  Raw HTML string
+ * @param {string} filename  Original filename (for context)
+ * @returns {LeakExtraction}
+ */
+function extractFromHtml(html, filename = 'document.html') {
+  const title = extractTitle(html);
+  const metaDescription = extractMeta(html, 'description');
+  const faqEntries = extractJsonLdFaq(html);
+  const blockquotes = extractBlockquotes(html);
+  const keyQuotes = extractKeyQuotes(html);
+  const capabilities = extractCapabilities(html);
+  const cyberRiskWarnings = extractCyberRisk(html);
+  const timeline = extractTimeline(html);
+  const plainText = htmlToText(html);
+  const signalMatches = findSignalMatches(plainText);
+  return {
+    filename,
+    title,
+    metaDescription,
+    faqEntries,
+    blockquotes,
+    keyQuotes,
+    capabilities,
+    cyberRiskWarnings,
+    timeline,
+    signalMatches,
+    plainText,
+    // Convenience: all unique notable quotes in one flat array
+    allNotableQuotes: dedupeQuotes([
+      ...blockquotes,
+      ...keyQuotes,
+      ...cyberRiskWarnings,
+    ]),
+  };
+}
+// ── Internal extractors ──────────────────────────────────────────────────────
+function extractTitle(html) {
+  const m = html.match(/<title[^>]*>(.*?)<\/title>/is);
+  return m ? cleanText(m[1]) : '';
+}
+function extractMeta(html, name) {
+  const m = html.match(new RegExp(`<meta[^>]+name=["']${name}["'][^>]+content=["']([^"']+)["']`, 'i'))
+    || html.match(new RegExp(`<meta[^>]+content=["']([^"']+)["'][^>]+name=["']${name}["']`, 'i'));
+  return m ? cleanText(m[1]) : '';
+}
+function extractJsonLdFaq(html) {
+  const blocks = [];
+  const regex = /<script\s+type="application\/ld\+json">([\s\S]*?)<\/script>/gi;
+  let m;
+  while ((m = regex.exec(html)) !== null) {
+    try {
+      const data = JSON.parse(m[1]);
+      if (data['@type'] === 'FAQPage') {
+        for (const item of data.mainEntity || []) {
+          blocks.push({
+            question: item.name || '',
+            answer: item.acceptedAnswer?.text || '',
+          });
+        }
+      }
+    } catch { /* skip malformed */ }
+  }
+  return blocks;
+}
+function extractBlockquotes(html) {
+  const quotes = [];
+  const regex = /<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi;
+  let m;
+  while ((m = regex.exec(html)) !== null) {
+    const text = cleanText(htmlToText(m[1]));
+    if (text.length > 20) quotes.push(text);
+  }
+  return quotes;
+}
+function extractKeyQuotes(html) {
+  const quotes = new Set();
+  // Pattern 1: content inside "pull-quote" or "quote" class divs
+  const classPatterns = [
+    /class="[^"]*(?:quote|pull-quote|blockquote|highlight|callout)[^"]*"[^>]*>([\s\S]{20,300}?)</gi,
+    /class="[^"]*(?:stat|metric|claim|key-claim)[^"]*"[^>]*>([\s\S]{10,200}?)</gi,
+  ];
+  for (const pat of classPatterns) {
+    let m;
+    while ((m = pat.exec(html)) !== null) {
+      const t = cleanText(htmlToText(m[1]));
+      if (t.length > 15) quotes.add(t);
+    }
+  }
+  // Pattern 2: sentences containing signal phrases in visible text
+  const text = htmlToText(html);
+  const sentences = text.split(/(?<=[.!?])\s+/);
+  for (const s of sentences) {
+    const lower = s.toLowerCase();
+    if (SIGNAL_PHRASES.some(p => lower.includes(p)) && s.length > 30 && s.length < 500) {
+      quotes.add(cleanText(s));
+    }
+  }
+  return [...quotes].filter(q => q.length > 20);
+}
+function extractCapabilities(html) {
+  const text = htmlToText(html);
+  const capabilities = {};
+  const patterns = {
+    coding: /coding[^.!?\n]{0,200}/gi,
+    reasoning: /reasoning[^.!?\n]{0,200}/gi,
+    cybersecurity: /cyber(?:security|[- ]capabilit)[^.!?\n]{0,300}/gi,
+    overall: /(?:step change|most capable|new tier)[^.!?\n]{0,200}/gi,
+  };
+  for (const [key, regex] of Object.entries(patterns)) {
+    const matches = [];
+    let m;
+    while ((m = regex.exec(text)) !== null) {
+      const cleaned = cleanText(m[0]);
+      if (cleaned.length > 20 && !matches.includes(cleaned)) {
+        matches.push(cleaned);
+        if (matches.length >= 3) break;
+      }
+    }
+    if (matches.length) capabilities[key] = matches;
+  }
+  return capabilities;
+}
+function extractCyberRisk(html) {
+  const text = htmlToText(html);
+  const warnings = new Set();
+  // High-signal cyber risk patterns
+  const patterns = [
+    /far ahead of any other[^.!?]{0,150}/gi,
+    /exploit vulnerabilit[^.!?]{0,200}/gi,
+    /outpace[^.!?]{0,150}/gi,
+    /presages[^.!?]{0,200}/gi,
+    /wave of models[^.!?]{0,200}/gi,
+    /defenders can[^.!?]{0,150}/gi,
+    /faster than[^.!?]{0,150}/gi,
+  ];
+  for (const pat of patterns) {
+    let m;
+    while ((m = pat.exec(text)) !== null) {
+      const cleaned = cleanText(m[0]);
+      if (cleaned.length > 20) warnings.add(cleaned);
+    }
+  }
+  return [...warnings];
+}
+function extractTimeline(html) {
+  const text = htmlToText(html);
+  const events = [];
+  // Match date patterns like "March 26, 2026" or "February 2026"
+  const datePattern = /(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+(?:\d{1,2},\s+)?\d{4}[^\n.!?]{0,200}/g;
+  let m;
+  while ((m = datePattern.exec(text)) !== null) {
+    const cleaned = cleanText(m[0]);
+    if (cleaned.length > 10 && !events.some(e => e.includes(cleaned.slice(0, 20)))) {
+      events.push(cleaned);
+    }
+  }
+  return events.slice(0, 10);
+}
+function findSignalMatches(text) {
+  const lower = text.toLowerCase();
+  return SIGNAL_PHRASES.filter(p => lower.includes(p));
+}
+// ── Utilities ────────────────────────────────────────────────────────────────
+function htmlToText(html) {
+  return html
+    .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, ' ')
+    .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, ' ')
+    .replace(/<br\s*\/?>/gi, '\n')
+    .replace(/<\/p>/gi, '\n')
+    .replace(/<\/div>/gi, '\n')
+    .replace(/<\/li>/gi, '\n')
+    .replace(/<[^>]+>/g, ' ')
+    .replace(/&amp;/g, '&')
+    .replace(/&lt;/g, '<')
+    .replace(/&gt;/g, '>')
+    .replace(/&quot;/g, '"')
+    .replace(/&#39;/g, "'")
+    .replace(/&nbsp;/g, ' ')
+    .replace(/[ \t]+/g, ' ')
+    .replace(/\n{3,}/g, '\n\n')
+    .trim();
+}
+function cleanText(s) {
+  return s
+    .replace(/\s+/g, ' ')
+    .replace(/^\s+|\s+$/g, '')
+    .replace(/^["'\s]+|["'\s]+$/g, '');
+}
+function dedupeQuotes(arr) {
+  const seen = new Set();
+  return arr.filter(q => {
+    const key = q.toLowerCase().slice(0, 60);
+    if (seen.has(key)) return false;
+    seen.add(key);
+    return true;
+  });
+}
+module.exports = { extractFromHtml, htmlToText };

package/src/index.js ADDED Viewed

@@ -0,0 +1,107 @@
+'use strict';
+const chalk = require('chalk');
+const ora = require('ora');
+const fs = require('fs');
+const path = require('path');
+const { analyzeSource } = require('./analyzer');
+const { generateRecipe } = require('./generator');
+const { runVerification } = require('./verifier');
+const { generateNotebook } = require('./notebook');
+const { generateCollabCard } = require('./collab-card');
+const { loadDemo } = require('./demo-loader');
+const { planMode } = require('./plan');
+async function run(opts) {
+  const {
+    source,
+    outputDir,
+    sandbox,
+    planMode: usePlanMode,
+    acceptEdits,
+    demo,
+    notebook,
+    collabCard,
+    model,
+    maxFiles,
+    apiKey,
+  } = opts;
+  // ── Safety check ────────────────────────────────────────────────────────────
+  if (sandbox) {
+    console.log(chalk.yellow.bold('  [SANDBOX MODE] No real API calls or mutations will occur.\n'));
+  }
+  if (!apiKey && !sandbox) {
+    console.error(chalk.red('  Error: ANTHROPIC_API_KEY not set. Use --api-key or export ANTHROPIC_API_KEY=...'));
+    console.error(chalk.gray('  Tip: run with --sandbox to test without an API key.\n'));
+    process.exit(1);
+  }
+  // ── Ensure output dir ────────────────────────────────────────────────────────
+  fs.mkdirSync(outputDir, { recursive: true });
+  // ── Step 1: Ingest source ────────────────────────────────────────────────────
+  let context;
+  if (demo || source === '__demo__') {
+    const spinner = ora('Loading Mythos demo context...').start();
+    context = await loadDemo(demo || 'mythos');
+    spinner.succeed('Mythos cyber-risk simulation loaded');
+  } else {
+    const spinner = ora(`Analyzing ${source}...`).start();
+    context = await analyzeSource(source, { maxFiles, sandbox });
+    spinner.succeed(`Analyzed ${context.files.length} files from ${context.repoName}`);
+  }
+  // ── Step 2: Plan mode (show before executing) ────────────────────────────────
+  if (usePlanMode && !acceptEdits) {
+    const approved = await planMode(context, opts);
+    if (!approved) {
+      console.log(chalk.yellow('\n  Aborted. Re-run with --accept-edits to skip confirmation.\n'));
+      process.exit(0);
+    }
+  }
+  // ── Step 3: Generate outputs ─────────────────────────────────────────────────
+  console.log(chalk.cyan('\n  Generating outputs...\n'));
+  const spinner2 = ora('Building multi-agent YAML recipe...').start();
+  const recipe = await generateRecipe(context, { model, apiKey, sandbox });
+  const recipeFile = path.join(outputDir, 'recipe.yaml');
+  fs.writeFileSync(recipeFile, recipe);
+  spinner2.succeed(`Recipe → ${path.relative(process.cwd(), recipeFile)}`);
+  const spinner3 = ora('Running verification loops...').start();
+  const verification = await runVerification(context, recipe, { model, apiKey, sandbox });
+  const verifyFile = path.join(outputDir, 'verification-report.yaml');
+  fs.writeFileSync(verifyFile, verification.report);
+  const statusIcon = verification.passed ? chalk.green('✔') : chalk.red('✖');
+  spinner3.succeed(`Verification ${statusIcon} → ${path.relative(process.cwd(), verifyFile)}`);
+  if (notebook) {
+    const spinner4 = ora('Building interactive Markdown notebook...').start();
+    const nb = await generateNotebook(context, recipe, verification);
+    const nbFile = path.join(outputDir, 'notebook.md');
+    fs.writeFileSync(nbFile, nb);
+    spinner4.succeed(`Notebook → ${path.relative(process.cwd(), nbFile)}`);
+  }
+  if (collabCard) {
+    const spinner5 = ora('Creating human-AI collaboration card...').start();
+    const card = await generateCollabCard(context, recipe, verification);
+    const cardFile = path.join(outputDir, 'collab-card.md');
+    fs.writeFileSync(cardFile, card);
+    spinner5.succeed(`Collab card → ${path.relative(process.cwd(), cardFile)}`);
+  }
+  // ── Done ─────────────────────────────────────────────────────────────────────
+  console.log(chalk.green.bold('\n  All outputs written to: ') + chalk.white(outputDir));
+  console.log(chalk.gray('\n  Files generated:'));
+  for (const f of fs.readdirSync(outputDir)) {
+    console.log(chalk.gray(`    • ${f}`));
+  }
+  console.log('');
+}
+module.exports = { run };