npm - @jigyasudham/veto - Versions diffs - 0.8.0 - Mend

@jigyasudham/veto 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (198) hide show

package/.claude/settings.local.json +9 -0
package/README.md +190 -0
package/dist/adapters/claude.js +57 -0
package/dist/adapters/codex.js +58 -0
package/dist/adapters/gemini.js +58 -0
package/dist/adapters/index.js +156 -0
package/dist/agents/development/api.js +116 -0
package/dist/agents/development/backend.js +82 -0
package/dist/agents/development/coder.js +207 -0
package/dist/agents/development/database.js +81 -0
package/dist/agents/development/debugger.js +234 -0
package/dist/agents/development/devops.js +84 -0
package/dist/agents/development/frontend.js +83 -0
package/dist/agents/development/migration.js +141 -0
package/dist/agents/development/performance.js +142 -0
package/dist/agents/development/refactor.js +85 -0
package/dist/agents/development/reviewer.js +260 -0
package/dist/agents/development/tester.js +143 -0
package/dist/agents/executor.js +144 -0
package/dist/agents/memory/context-manager.js +167 -0
package/dist/agents/memory/decision-logger.js +157 -0
package/dist/agents/memory/knowledge-base.js +120 -0
package/dist/agents/memory/pattern-learner.js +140 -0
package/dist/agents/memory/project-mapper.js +114 -0
package/dist/agents/quality/accessibility.js +89 -0
package/dist/agents/quality/code-quality.js +109 -0
package/dist/agents/quality/compatibility.js +55 -0
package/dist/agents/quality/documentation.js +95 -0
package/dist/agents/quality/error-handling.js +87 -0
package/dist/agents/research/competitor-analyzer.js +44 -0
package/dist/agents/research/cost-analyzer.js +51 -0
package/dist/agents/research/estimator.js +57 -0
package/dist/agents/research/ethics-bias.js +111 -0
package/dist/agents/research/researcher.js +112 -0
package/dist/agents/research/risk-assessor.js +61 -0
package/dist/agents/research/tech-advisor.js +52 -0
package/dist/agents/security/auth.js +269 -0
package/dist/agents/security/dependency-audit.js +273 -0
package/dist/agents/security/penetration.js +245 -0
package/dist/agents/security/privacy.js +259 -0
package/dist/agents/security/scanner.js +288 -0
package/dist/agents/security/secrets.js +212 -0
package/dist/agents/types.js +2 -0
package/dist/agents/workflow/automation.js +56 -0
package/dist/agents/workflow/file-manager.js +49 -0
package/dist/agents/workflow/git-agent.js +52 -0
package/dist/agents/workflow/reporter.js +48 -0
package/dist/agents/workflow/search-agent.js +39 -0
package/dist/agents/workflow/task-coordinator.js +40 -0
package/dist/agents/workflow/task-planner.js +46 -0
package/dist/cli.js +132 -0
package/dist/council/decision-engine.js +136 -0
package/dist/council/devil-advocate.js +106 -0
package/dist/council/index.js +37 -0
package/dist/council/lead-developer.js +108 -0
package/dist/council/legal-compliance.js +142 -0
package/dist/council/product-manager.js +92 -0
package/dist/council/security.js +162 -0
package/dist/council/system-architect.js +122 -0
package/dist/council/types.js +2 -0
package/dist/council/ux-designer.js +109 -0
package/dist/memory/local.js +182 -0
package/dist/memory/schema.js +116 -0
package/dist/memory/sync.js +199 -0
package/dist/router/complexity-scorer.js +78 -0
package/dist/router/context-compressor.js +58 -0
package/dist/router/index.js +29 -0
package/dist/router/learning-updater.js +186 -0
package/dist/router/model-selector.js +51 -0
package/dist/router/rate-monitor.js +73 -0
package/dist/server.js +949 -0
package/dist/skills/development/skill-api-design.js +313 -0
package/dist/skills/development/skill-auth.js +255 -0
package/dist/skills/development/skill-ci-cd.js +2 -0
package/dist/skills/development/skill-crud.js +193 -0
package/dist/skills/development/skill-db-schema.js +2 -0
package/dist/skills/development/skill-docker.js +2 -0
package/dist/skills/development/skill-env-setup.js +2 -0
package/dist/skills/development/skill-scaffold.js +299 -0
package/dist/skills/intelligence/skill-complexity-score.js +66 -0
package/dist/skills/intelligence/skill-cost-track.js +36 -0
package/dist/skills/intelligence/skill-learning-loop.js +66 -0
package/dist/skills/intelligence/skill-pattern-detect.js +35 -0
package/dist/skills/intelligence/skill-rate-watch.js +58 -0
package/dist/skills/memory/skill-context-compress.js +82 -0
package/dist/skills/memory/skill-cross-sync.js +88 -0
package/dist/skills/memory/skill-decision-log.js +103 -0
package/dist/skills/memory/skill-session-restore.js +44 -0
package/dist/skills/memory/skill-session-save.js +78 -0
package/dist/skills/quality/skill-accessibility.js +2 -0
package/dist/skills/quality/skill-code-review.js +60 -0
package/dist/skills/quality/skill-docs-gen.js +2 -0
package/dist/skills/quality/skill-perf-audit.js +2 -0
package/dist/skills/quality/skill-security-scan.js +67 -0
package/dist/skills/quality/skill-test-suite.js +274 -0
package/dist/skills/workflow/skill-deploy.js +2 -0
package/dist/skills/workflow/skill-git-workflow.js +2 -0
package/dist/skills/workflow/skill-rollback.js +2 -0
package/dist/skills/workflow/skill-task-breakdown.js +2 -0
package/package.json +30 -0
package/src/adapters/claude.ts +70 -0
package/src/adapters/codex.ts +71 -0
package/src/adapters/gemini.ts +71 -0
package/src/adapters/index.ts +217 -0
package/src/agents/development/api.ts +120 -0
package/src/agents/development/backend.ts +85 -0
package/src/agents/development/coder.ts +213 -0
package/src/agents/development/database.ts +83 -0
package/src/agents/development/debugger.ts +238 -0
package/src/agents/development/devops.ts +86 -0
package/src/agents/development/frontend.ts +85 -0
package/src/agents/development/migration.ts +144 -0
package/src/agents/development/performance.ts +144 -0
package/src/agents/development/refactor.ts +86 -0
package/src/agents/development/reviewer.ts +268 -0
package/src/agents/development/tester.ts +151 -0
package/src/agents/executor.ts +158 -0
package/src/agents/memory/context-manager.ts +171 -0
package/src/agents/memory/decision-logger.ts +160 -0
package/src/agents/memory/knowledge-base.ts +124 -0
package/src/agents/memory/pattern-learner.ts +143 -0
package/src/agents/memory/project-mapper.ts +118 -0
package/src/agents/quality/accessibility.ts +99 -0
package/src/agents/quality/code-quality.ts +115 -0
package/src/agents/quality/compatibility.ts +58 -0
package/src/agents/quality/documentation.ts +105 -0
package/src/agents/quality/error-handling.ts +96 -0
package/src/agents/research/competitor-analyzer.ts +45 -0
package/src/agents/research/cost-analyzer.ts +54 -0
package/src/agents/research/estimator.ts +60 -0
package/src/agents/research/ethics-bias.ts +113 -0
package/src/agents/research/researcher.ts +114 -0
package/src/agents/research/risk-assessor.ts +63 -0
package/src/agents/research/tech-advisor.ts +55 -0
package/src/agents/security/auth.ts +287 -0
package/src/agents/security/dependency-audit.ts +337 -0
package/src/agents/security/penetration.ts +262 -0
package/src/agents/security/privacy.ts +285 -0
package/src/agents/security/scanner.ts +322 -0
package/src/agents/security/secrets.ts +249 -0
package/src/agents/types.ts +66 -0
package/src/agents/workflow/automation.ts +59 -0
package/src/agents/workflow/file-manager.ts +52 -0
package/src/agents/workflow/git-agent.ts +55 -0
package/src/agents/workflow/reporter.ts +51 -0
package/src/agents/workflow/search-agent.ts +40 -0
package/src/agents/workflow/task-coordinator.ts +41 -0
package/src/agents/workflow/task-planner.ts +47 -0
package/src/cli.ts +143 -0
package/src/council/decision-engine.ts +171 -0
package/src/council/devil-advocate.ts +116 -0
package/src/council/index.ts +44 -0
package/src/council/lead-developer.ts +118 -0
package/src/council/legal-compliance.ts +152 -0
package/src/council/product-manager.ts +102 -0
package/src/council/security.ts +172 -0
package/src/council/system-architect.ts +132 -0
package/src/council/types.ts +33 -0
package/src/council/ux-designer.ts +121 -0
package/src/memory/local.ts +305 -0
package/src/memory/schema.ts +174 -0
package/src/memory/sync.ts +274 -0
package/src/router/complexity-scorer.ts +96 -0
package/src/router/context-compressor.ts +74 -0
package/src/router/index.ts +60 -0
package/src/router/learning-updater.ts +271 -0
package/src/router/model-selector.ts +83 -0
package/src/router/rate-monitor.ts +103 -0
package/src/server.ts +1038 -0
package/src/skills/development/skill-api-design.ts +329 -0
package/src/skills/development/skill-auth.ts +271 -0
package/src/skills/development/skill-ci-cd.ts +0 -0
package/src/skills/development/skill-crud.ts +209 -0
package/src/skills/development/skill-db-schema.ts +0 -0
package/src/skills/development/skill-docker.ts +0 -0
package/src/skills/development/skill-env-setup.ts +0 -0
package/src/skills/development/skill-scaffold.ts +323 -0
package/src/skills/intelligence/skill-complexity-score.ts +69 -0
package/src/skills/intelligence/skill-cost-track.ts +39 -0
package/src/skills/intelligence/skill-learning-loop.ts +69 -0
package/src/skills/intelligence/skill-pattern-detect.ts +38 -0
package/src/skills/intelligence/skill-rate-watch.ts +61 -0
package/src/skills/memory/skill-context-compress.ts +98 -0
package/src/skills/memory/skill-cross-sync.ts +104 -0
package/src/skills/memory/skill-decision-log.ts +119 -0
package/src/skills/memory/skill-session-restore.ts +59 -0
package/src/skills/memory/skill-session-save.ts +94 -0
package/src/skills/quality/skill-accessibility.ts +0 -0
package/src/skills/quality/skill-code-review.ts +84 -0
package/src/skills/quality/skill-docs-gen.ts +0 -0
package/src/skills/quality/skill-perf-audit.ts +0 -0
package/src/skills/quality/skill-security-scan.ts +91 -0
package/src/skills/quality/skill-test-suite.ts +290 -0
package/src/skills/workflow/skill-deploy.ts +0 -0
package/src/skills/workflow/skill-git-workflow.ts +0 -0
package/src/skills/workflow/skill-rollback.ts +0 -0
package/src/skills/workflow/skill-task-breakdown.ts +0 -0
package/tsconfig.json +20 -0

package/src/agents/quality/error-handling.ts ADDED Viewed

@@ -0,0 +1,96 @@
+import { AgentPlan, AgentAnalysis, AgentFinding, WorkerAgentType } from '../types.js';
+export function plan(task: string, context?: string): AgentPlan {
+  return {
+    agent: 'error-handling' as WorkerAgentType,
+    task,
+    tier: 2,
+    approach: 'Audit error handling for the three failure classes: expected failures (validation, not found, auth), unexpected failures (bugs, network failures, DB errors), and catastrophic failures (process crash, OOM). Each class needs a different treatment. Expected failures return structured error responses. Unexpected failures are logged with context and surfaced as generic errors to users. Catastrophic failures are captured by a top-level handler and alert.',
+    steps: [
+      'Identify all async operations: DB calls, HTTP requests, file I/O, external APIs',
+      'Verify every async call has a try/catch or .catch() — no floating promises',
+      'Check that catch blocks do not silently swallow errors — log OR rethrow, never neither',
+      'Verify error messages to users do not leak stack traces or internal details in production',
+      'Check that typed errors are used — no throw new Error("raw string") for domain errors',
+      'Verify HTTP error codes are semantically correct: 400 vs 422, 401 vs 403, 404 vs 410',
+      'Check that errors include enough context to diagnose: which record, which operation, what input',
+      'Verify the top-level process error handler is in place: uncaughtException, unhandledRejection',
+      'Check that external API timeouts are configured — no indefinite hangs',
+      'Test each error path: does the system recover gracefully or leave state corrupted?',
+    ],
+    checklist: [
+      '[ ] No floating promises — every async call awaited or .catch() chained',
+      '[ ] No empty or silent catch blocks',
+      '[ ] Stack traces never exposed to users in production',
+      '[ ] Typed domain errors — not raw Error strings',
+      '[ ] HTTP status codes semantically correct',
+      '[ ] Error log includes context (what failed, on what input)',
+      '[ ] process.on("uncaughtException") and ("unhandledRejection") registered',
+      '[ ] All external HTTP calls have a timeout configured',
+      '[ ] Error paths tested — system recovers without corrupting state',
+    ],
+    pitfalls: [
+      'Catch-and-log without rethrowing — the caller assumes success when the operation failed',
+      'Using the same error type for expected and unexpected failures — callers cannot distinguish',
+      'Not including the original error in a re-thrown wrapper — stack trace lost',
+      'Logging only the error message without the input that caused it — impossible to reproduce',
+      'Catching Error but not checking error.name — all errors look the same',
+    ],
+    patterns: [
+      'Error taxonomy: expected (return structured error) | unexpected (log + generic response) | catastrophic (alert)',
+      'Context-rich errors: every thrown error includes the operation, input, and correlation ID',
+      'Fail-fast on programming errors: throw immediately on invalid arguments — do not try to recover',
+      'Structured error types: one class per error category with a code field for programmatic handling',
+    ],
+    duration_estimate: '2-4 hours',
+  };
+}
+export function analyze(code: string, context?: string): AgentAnalysis {
+  const findings: AgentFinding[] = [];
+  // Empty catch blocks
+  const emptyCatch = (code.match(/catch\s*\([^)]*\)\s*\{\s*\}/g) ?? []).length;
+  if (emptyCatch > 0) {
+    findings.push({ severity: 'critical', category: 'silent-failure', description: `${emptyCatch} empty catch block(s) — exceptions silently swallowed`, fix: 'Log the error and/or rethrow. Never swallow exceptions silently.' });
+  }
+  // Catch with only console.log (no rethrow)
+  const logOnlyCatch = (code.match(/catch\s*\([^)]*\)\s*\{[^}]*console\.[a-z]+[^}]*\}/g) ?? [])
+    .filter(block => !block.includes('throw')).length;
+  if (logOnlyCatch > 0) {
+    findings.push({ severity: 'high', category: 'swallowed-error', description: `${logOnlyCatch} catch block(s) log but do not rethrow — caller assumes success`, fix: 'After logging, either rethrow the error or return a typed error result to the caller.' });
+  }
+  // Floating promises
+  const floatingPromise = (code.match(/(?<!\bawait\s)(?<!\breturn\s)\b\w+\([^)]*\)\.then\(/g) ?? []).length;
+  if (floatingPromise > 1) {
+    findings.push({ severity: 'high', category: 'floating-promise', description: `Potential floating promise(s) detected`, fix: 'Ensure all Promise chains are awaited or have a .catch() handler.' });
+  }
+  // Raw Error strings
+  const rawThrows = (code.match(/throw new Error\(['"`][^'"`]{1,50}['"`]\)/g) ?? []).length;
+  if (rawThrows > 2) {
+    findings.push({ severity: 'medium', category: 'untyped-errors', description: `${rawThrows} raw Error string throws — callers cannot distinguish error types`, fix: 'Create typed error classes (class NotFoundError extends Error) for each distinct error category.' });
+  }
+  // No timeout on fetch/axios
+  if (/fetch\(|axios\.|got\(|request\(/.test(code) && !/timeout/.test(code)) {
+    findings.push({ severity: 'medium', category: 'missing-timeout', description: 'External HTTP call detected without apparent timeout configuration', fix: 'Add a timeout to all external HTTP calls. Unconfigured calls hang indefinitely on network issues.' });
+  }
+  const critCount = findings.filter(f => f.severity === 'critical').length;
+  const highCount = findings.filter(f => f.severity === 'high').length;
+  const score = Math.max(0, 100 - critCount * 30 - highCount * 20 - findings.filter(f => f.severity === 'medium').length * 10);
+  return {
+    agent: 'error-handling' as WorkerAgentType,
+    subject: context ?? 'code',
+    findings,
+    score,
+    verdict: score >= 85 ? 'approved' : score >= 65 ? 'approved_with_warnings' : score >= 40 ? 'needs_revision' : 'rejected',
+    summary: findings.length === 0 ? 'Error handling looks solid.' : `${findings.length} error handling issue(s). Top: ${findings[0].description}`,
+    critical_count: critCount,
+    high_count: highCount,
+  };
+}

package/src/agents/research/competitor-analyzer.ts ADDED Viewed

@@ -0,0 +1,45 @@
+import { AgentPlan, WorkerAgentType } from '../types.js';
+export function plan(task: string, context?: string): AgentPlan {
+  return {
+    agent: 'competitor-analyzer' as WorkerAgentType,
+    task,
+    tier: 2,
+    approach: 'Analyse competitors on three dimensions: feature gap (what they have that you don\'t), differentiation gap (what you have that they don\'t), and positioning gap (where they own user perception). The goal is not to copy — it is to identify where building a feature would be table-stakes vs. where it would be genuinely differentiating.',
+    steps: [
+      'List the top 3–5 direct competitors and 2–3 indirect alternatives',
+      'For each competitor: document their core value proposition in one sentence',
+      'Build a feature matrix: rows = features, columns = products, cells = yes/no/partial',
+      'Identify table-stakes features: present in all competitors (building these is hygiene, not differentiation)',
+      'Identify differentiating features: present in 0–1 competitors (building these creates moat)',
+      'Identify features your product has that no competitor has — this is your current moat',
+      'Research competitor pricing to understand the value they signal for each tier',
+      'Read user reviews of competitors to find the most common complaints (these are opportunities)',
+      'Identify the positioning gaps: what perception do users have of each competitor?',
+      'Recommend the top 3 features by differentiation potential × build cost',
+      'Store analysis in veto_memory_store (type="reference", tags=["competitive", "strategy"])',
+    ],
+    checklist: [
+      '[ ] Top 5 direct competitors identified',
+      '[ ] Feature matrix built with table-stakes vs. differentiating features separated',
+      '[ ] Current moat (unique features) identified',
+      '[ ] User review analysis for top 2 complaints per competitor',
+      '[ ] Differentiation opportunities ranked by potential × cost',
+      '[ ] Analysis stored in veto_memory_store',
+    ],
+    pitfalls: [
+      'Copying competitor features without understanding why they built them — builds technical debt, not differentiation',
+      'Ignoring indirect alternatives — users often switch to a different category entirely, not a direct competitor',
+      'Treating all feature gaps as problems — some are intentional product decisions',
+      'Not reading user reviews — documentation says what a product does; reviews say what it fails to do',
+      'Analysing features in isolation from pricing — a feature only matters if the target segment can afford it',
+    ],
+    patterns: [
+      'Table-stakes separation: hygiene features vs. moat features require different prioritisation logic',
+      'Complaint mining: competitor weaknesses in reviews are your clearest product opportunities',
+      'Moat-first thinking: understand your current differentiation before adding more features',
+      'Indirect competitor awareness: the biggest threat is often a product in a different category',
+    ],
+    duration_estimate: '2-4 hours',
+  };
+}

package/src/agents/research/cost-analyzer.ts ADDED Viewed

@@ -0,0 +1,54 @@
+import { AgentPlan, WorkerAgentType } from '../types.js';
+export function plan(task: string, context?: string): AgentPlan {
+  const t = (task + ' ' + (context ?? '')).toLowerCase();
+  const isTokenCost = t.includes('token') || t.includes('ai cost') || t.includes('api cost') || t.includes('llm');
+  const isInfra = t.includes('hosting') || t.includes('server') || t.includes('cloud') || t.includes('infra');
+  const approach = isTokenCost
+    ? 'Analyse AI token costs across all three platforms. Track tokens-per-task, cost-per-tier, and cost trajectory over time. Identify the top 3 cost drivers and recommend routing adjustments. A 20% reduction in average tier without quality loss is typically achievable via better router threshold calibration.'
+    : isInfra
+    ? 'Analyse infrastructure costs by service. Identify idle resources, over-provisioned instances, and services that could be consolidated. Compare reserved vs. on-demand pricing for predictable workloads. Document the cost-per-feature to identify which features have disproportionate infrastructure cost.'
+    : 'Track the three cost categories in this project: AI token spend, infrastructure, and developer time. Developer time is almost always the largest cost and the most under-tracked. Quantify each category, identify the top driver, and recommend the highest-ROI reduction.';
+  return {
+    agent: 'cost-analyzer' as WorkerAgentType,
+    task,
+    tier: 1,
+    approach,
+    steps: [
+      'Identify which cost category to analyse: AI tokens, infrastructure, or developer time',
+      'For token costs: call veto_rate_status and veto_learning_stats to get tier distribution',
+      'Calculate average cost per task by tier (Tier 1 ≈ $0.001, Tier 2 ≈ $0.01, Tier 3 ≈ $0.05)',
+      'Identify tasks that are over-tiered: Tier 3 tasks with output quality achievable at Tier 2',
+      'For infrastructure: list monthly costs per service and idle/utilisation %',
+      'For developer time: estimate hours lost to context switches, re-explanation, and tool friction',
+      'Rank cost drivers by monthly impact ($ or hours)',
+      'Propose the top 3 changes with estimated savings and implementation cost',
+      'Calculate ROI for each proposal: savings per month / implementation cost in days',
+      'Store cost analysis in veto_memory_store (type="reference", tags=["cost", "performance"])',
+    ],
+    checklist: [
+      '[ ] Cost category identified and scoped',
+      '[ ] Current monthly cost baseline established',
+      '[ ] Top 3 cost drivers identified and ranked by impact',
+      '[ ] At least one reduction proposal with estimated ROI',
+      '[ ] Developer time cost estimated (not just infrastructure)',
+      '[ ] Cost reduction proposals ranked by ROI, not just absolute savings',
+    ],
+    pitfalls: [
+      'Optimising infrastructure cost while ignoring developer time — wrong priority',
+      'Treating all Tier 3 tasks as necessary — many can be downgraded with better descriptions',
+      'Not establishing a baseline before optimising — you cannot measure improvement without it',
+      'Optimising for the cheapest option instead of the best cost/quality ratio',
+      'Ignoring the cost of the optimisation itself — a 5% savings that takes 2 weeks is not worth it',
+    ],
+    patterns: [
+      'Baseline-first: measure before optimising',
+      'ROI ranking: sort proposals by monthly savings / implementation cost, not just savings',
+      'Developer time inclusion: add estimated developer friction hours to every cost analysis',
+      'Tier distribution analysis: the ratio of T1/T2/T3 tasks directly predicts AI spend',
+    ],
+    duration_estimate: '1-2 hours',
+  };
+}

package/src/agents/research/estimator.ts ADDED Viewed

@@ -0,0 +1,60 @@
+import { AgentPlan, WorkerAgentType } from '../types.js';
+export function plan(task: string, context?: string): AgentPlan {
+  const t = (task + ' ' + (context ?? '')).toLowerCase();
+  const isFeature = t.includes('feature') || t.includes('implement') || t.includes('build') || t.includes('add');
+  const isProject = t.includes('project') || t.includes('entire') || t.includes('full') || t.includes('phase');
+  const isRefactor = t.includes('refactor') || t.includes('migrat') || t.includes('rewrite') || t.includes('restructur');
+  const approach = isProject
+    ? 'Estimate the full project using bottom-up decomposition: break into phases, phases into milestones, milestones into tasks. Estimate each task independently, then add a calibration factor based on team history. Never estimate a project in one number — give P50 (likely), P80 (likely with normal friction), and P95 (likely with major unexpected issue).'
+    : isRefactor
+    ? 'Refactors and migrations consistently take 2–3x the original estimate because hidden coupling is discovered mid-work. Use a base estimate of "how long to implement from scratch" × 2, plus a separate estimate for integration testing and rollback preparation. Do not use the refactor estimate as a deadline — use it as a budget check.'
+    : isFeature
+    ? 'Estimate the feature in four parts: core implementation, edge cases and error handling, tests, and integration. Core implementation is what engineers quote; the other three are what makes estimates wrong. A realistic feature estimate is roughly: core × 2.5.'
+    : 'Break the work into tasks of 1–4 hours each. Anything larger than 4 hours contains hidden complexity that will surface mid-execution. Estimate each small task, sum them, add 30% for integration and unexpected issues.';
+  return {
+    agent: 'estimator' as WorkerAgentType,
+    task,
+    tier: 2,
+    approach,
+    steps: [
+      'Decompose the work into tasks of 1–4 hours each — nothing larger',
+      'Estimate each task independently (avoid anchoring on the total)',
+      'For each estimate: document the main assumption that could make it wrong',
+      'Identify which tasks have the highest uncertainty — these drive the range',
+      'Sum the task estimates to get the P50 (50% chance of hitting)',
+      'Add 30% to get P80 (accounting for normal integration friction)',
+      'Add another 30% to P80 to get P95 (accounting for one major unexpected issue)',
+      'Identify the critical path: which tasks block all other work?',
+      'Identify tasks that can be parallelised if more people are added',
+      'Check against historical estimates: what was the last similar task\'s actual vs. estimate ratio?',
+      'Store the estimate in veto_memory_store (type="reference") with actual time tracked when complete',
+    ],
+    checklist: [
+      '[ ] Work decomposed into tasks ≤ 4 hours each',
+      '[ ] Each task estimated independently',
+      '[ ] Main assumption per task documented',
+      '[ ] P50, P80, P95 range calculated',
+      '[ ] Critical path identified',
+      '[ ] Historical ratio checked (if prior estimates exist)',
+      '[ ] Estimate stored for calibration tracking',
+    ],
+    pitfalls: [
+      'Estimating top-down from a desired deadline — the estimate becomes a negotiation, not a forecast',
+      'Not separating core implementation from edge cases, tests, and integration — this is where estimates consistently fail',
+      'Giving a single number instead of a range — single-number estimates communicate false precision',
+      'Not documenting the assumptions behind the estimate — when reality diverges, nobody knows why',
+      'Forgetting that refactors and migrations have a 2–3x hidden complexity multiplier',
+      'Not tracking actuals — without actuals, estimates never improve',
+    ],
+    patterns: [
+      'Bottom-up decomposition: sum of small estimates beats one large estimate every time',
+      'P50/P80/P95 range: communicates uncertainty honestly rather than false precision',
+      'Assumption documentation: the assumption is the estimate — when it is wrong, the estimate is wrong',
+      'Estimate-to-actual tracking: store both estimate and actual in veto_memory_store to calibrate future estimates',
+    ],
+    duration_estimate: '30-60 minutes',
+  };
+}

package/src/agents/research/ethics-bias.ts ADDED Viewed

@@ -0,0 +1,113 @@
+import { AgentPlan, WorkerAgentType } from '../types.js';
+type EthicsCategory = 'ai-ml' | 'dark-patterns' | 'fairness' | 'privacy' | 'general';
+function detectCategory(task: string): EthicsCategory {
+  const t = task.toLowerCase();
+  if (t.includes('ai') || t.includes('ml') || t.includes('model') || t.includes('predict') || t.includes('recommend')) return 'ai-ml';
+  if (t.includes('dark pattern') || t.includes('ux') || t.includes('onboarding') || t.includes('notification') || t.includes('subscription')) return 'dark-patterns';
+  if (t.includes('fair') || t.includes('bias') || t.includes('demographic') || t.includes('discriminat') || t.includes('equit')) return 'fairness';
+  if (t.includes('privac') || t.includes('data') || t.includes('tracking') || t.includes('surveillance')) return 'privacy';
+  return 'general';
+}
+const categoryApproach: Record<EthicsCategory, string> = {
+  'ai-ml': 'Audit the AI/ML system for bias in three places: training data (is it representative?), model outputs (does it perform differently across demographic groups?), and system design (does the framing of the problem embed assumptions that harm certain users?). Bias at the data level is fixable. Bias at the framing level may require rethinking the feature.',
+  'dark-patterns': 'Review the UX flow for patterns that extract value from users through confusion or manipulation rather than genuine usefulness. Dark patterns degrade trust at a rate users cannot articulate — they just stop using the product. The test: would a user feel tricked if they understood exactly what the product was doing?',
+  'fairness': 'Analyse whether the feature treats all user groups equitably. Start with the groups most likely to be harmed by the design assumptions. Document which groups benefit, which are neutral, and which may be disadvantaged. Disproportionate harm does not have to be intentional to be real.',
+  'privacy': 'Evaluate the data collection and usage against the principle of data minimisation: collect only what is necessary, use only for the stated purpose, retain only as long as needed. Identify the worst-case use of each data point if it were leaked or misused. Privacy harms are irreversible — the standard should be higher than legal compliance.',
+  'general': 'Evaluate the feature against three ethical dimensions: autonomy (does it respect user choice?), harm (could it cause disproportionate harm to specific groups?), and transparency (do users understand what the system is doing on their behalf?). A feature that scores poorly on all three should not ship as designed.',
+};
+const categorySteps: Record<EthicsCategory, string[]> = {
+  'ai-ml': [
+    'Identify all groups who will be affected by the AI/ML system\'s outputs',
+    'Check training data: is it representative of all affected groups?',
+    'Identify proxy variables: features that correlate with protected characteristics (zip code → race, etc.)',
+    'Measure output quality separately for each demographic group — aggregate accuracy hides disparities',
+    'Test edge cases for groups likely to be underrepresented in training data',
+    'Evaluate the feedback loop: does poor performance for group X reduce their usage, which reduces training data for X, which worsens performance? (Feedback loop bias)',
+    'Define the acceptable fairness metric: demographic parity, equalised odds, or calibration — and document the tradeoff',
+    'Establish a bias monitoring plan for post-launch',
+    'Document findings in veto_memory_store (type="decision", tags=["ethics", "ai-bias"])',
+  ],
+  'dark-patterns': [
+    'Map the full user flow from signup through cancellation — cancellation is where dark patterns concentrate',
+    'Check for: confirmshaming (guilt-inducing opt-out language), hidden costs, roach motel (easy in, hard out), misdirection, disguised ads',
+    'Apply the "would a user feel tricked" test to each step',
+    'Check notification permission requests for false urgency or misleading framing',
+    'Review subscription cancellation flow: how many steps, how many confirmation screens?',
+    'Review default settings: are they set in the user\'s interest or the product\'s interest?',
+    'Identify patterns that are legal but erode trust — these cause churn, not just complaints',
+    'Recommend the honest alternative for each dark pattern found',
+    'Store findings in veto_memory_store (type="decision", tags=["ethics", "ux"])',
+  ],
+  'fairness': [
+    'Identify which user groups are affected by this feature',
+    'For each group: document the expected benefit and expected harm',
+    'Identify the group most likely to be disadvantaged by the current design assumptions',
+    'Check if the feature was designed with a default user in mind — who is that user, and who is excluded?',
+    'Evaluate accessibility: does the feature work for users with disabilities?',
+    'Evaluate language/literacy: does the feature assume a reading level or language that excludes users?',
+    'Evaluate economic access: does the feature require hardware, bandwidth, or time that not all users have?',
+    'Recommend design changes that reduce disproportionate harm without removing value for majority users',
+    'Store fairness analysis in veto_memory_store (type="decision", tags=["ethics", "fairness"])',
+  ],
+  'privacy': [
+    'List all data points collected by this feature',
+    'For each data point: is it strictly necessary for the stated purpose?',
+    'Apply data minimisation: remove every data point that is not strictly necessary',
+    'Identify retention: how long is each data point kept? Is there an automatic deletion policy?',
+    'Identify worst-case misuse for each data point (leaked, sold, subpoenaed, used for a different purpose)',
+    'Check if users can access, correct, and delete their data (GDPR Art. 15-17)',
+    'Check if the privacy policy accurately describes this data collection',
+    'Evaluate third-party data sharing: does each third-party SDK or API receive PII?',
+    'Store findings in veto_memory_store (type="decision", tags=["ethics", "privacy"])',
+  ],
+  'general': [
+    'Identify all affected user groups — not just the primary user',
+    'Evaluate autonomy: does the feature respect user choice, or does it nudge/coerce?',
+    'Evaluate harm: which groups could be harmed, and is the harm proportionate to the benefit?',
+    'Evaluate transparency: do users understand what the system is doing on their behalf?',
+    'Apply the "front page test": would this design choice be described negatively in a tech journalism article?',
+    'Apply the "worst user" test: what does the most vulnerable user experience when using this feature?',
+    'Identify the single highest-risk ethical dimension and make a concrete recommendation',
+    'Store findings in veto_memory_store (type="decision", tags=["ethics"])',
+  ],
+};
+export function plan(task: string, context?: string): AgentPlan {
+  const category = detectCategory(task + ' ' + (context ?? ''));
+  return {
+    agent: 'ethics-bias' as WorkerAgentType,
+    task,
+    tier: 3,
+    approach: categoryApproach[category],
+    steps: categorySteps[category],
+    checklist: [
+      '[ ] All affected user groups identified — not just the primary user',
+      '[ ] AI/ML systems: output quality measured separately per demographic group',
+      '[ ] Dark patterns: "would a user feel tricked" test applied',
+      '[ ] Fairness: group most likely to be disadvantaged explicitly identified',
+      '[ ] Privacy: data minimisation applied — unnecessary data points removed',
+      '[ ] Feedback loops identified for AI/ML features',
+      '[ ] Post-launch monitoring plan defined for AI/ML outputs',
+      '[ ] Findings stored in veto_memory_store with tags=["ethics"]',
+    ],
+    pitfalls: [
+      'Treating legal compliance as an ethical ceiling — GDPR compliance is the floor, not the standard',
+      'Evaluating bias only on aggregate accuracy — group-level disparities are invisible in aggregate metrics',
+      'Assuming good intent eliminates harm — disproportionate harm is real regardless of intent',
+      'Treating ethics as a one-time check — AI/ML bias drifts as usage patterns change',
+      'Ignoring indirect harm — the people harmed most are often not the users in the room when the product is designed',
+      'Conflating user preference with user interest — users may prefer a dark pattern in the moment but it harms them long-term',
+    ],
+    patterns: [
+      'Affected-group enumeration: start every ethics analysis by listing all affected groups, especially non-users',
+      'Worst-user test: evaluate through the lens of the most vulnerable person likely to use this feature',
+      'Front-page test: would this design choice be described negatively in a tech journalism article?',
+      'Disparity detection: always measure AI/ML performance per group, not just in aggregate',
+    ],
+    duration_estimate: '1-3 hours',
+  };
+}

package/src/agents/research/researcher.ts ADDED Viewed

@@ -0,0 +1,114 @@
+import { AgentPlan, WorkerAgentType } from '../types.js';
+type ResearchCategory = 'technology' | 'approach' | 'library' | 'pattern' | 'general';
+function detectCategory(task: string): ResearchCategory {
+  const t = task.toLowerCase();
+  if (t.includes('library') || t.includes('package') || t.includes('npm') || t.includes('dependency')) return 'library';
+  if (t.includes('how to') || t.includes('approach') || t.includes('best way') || t.includes('strategy')) return 'approach';
+  if (t.includes('tech') || t.includes('framework') || t.includes('language') || t.includes('tool')) return 'technology';
+  if (t.includes('pattern') || t.includes('design') || t.includes('architecture')) return 'pattern';
+  return 'general';
+}
+const categoryApproach: Record<ResearchCategory, string> = {
+  'technology': 'Evaluate the technology against four criteria: maturity (production usage at scale), maintenance (commit activity, issue response time), ecosystem (plugins, integrations, community), and fit (does it solve the actual problem or require workarounds). Compare the top 3 options side-by-side. Recommend one with clear reasoning.',
+  'approach': 'Research the dominant approaches to this problem. For each approach: document the tradeoffs, who uses it at scale, what problems it introduces, and when it is the wrong choice. Identify the approach that minimises future regret for this specific context.',
+  'library': 'Audit the library before committing: weekly downloads, last publish date, open issues, license, bundle size, TypeScript support, and peer dependency conflicts. Check for known CVEs. Find the 2–3 best alternatives and compare. Recommend based on the actual requirements, not just stars.',
+  'pattern': 'Research the established patterns for this problem domain. Identify which pattern major codebases (open source projects similar to this one) use. Document the tradeoffs and known failure modes of each. Recommend the pattern with the best track record for this scale and team size.',
+  'general': 'Frame the research question precisely before searching. Identify: what is actually being asked, what evidence would constitute an answer, what sources are authoritative for this topic. Synthesise findings into a recommendation with clear reasoning.',
+};
+const categorySteps: Record<ResearchCategory, string[]> = {
+  'technology': [
+    'Define the evaluation criteria specific to this project (performance, bundle size, licensing, etc.)',
+    'Identify the top 3–5 candidate technologies via npm/GitHub/documentation',
+    'For each candidate: check production usage at scale (case studies, "used by" lists)',
+    'Check maintenance: last commit, release cadence, open issues, response time',
+    'Check ecosystem: number of plugins, integrations, community size',
+    'Run a minimal proof-of-concept for the top 2 candidates',
+    'Document the tradeoffs in a comparison table',
+    'Identify deal-breakers for each option',
+    'Make a recommendation with explicit reasoning and known risks',
+    'Store the research outcome in veto_memory_store (type="reference") for future retrieval',
+  ],
+  'approach': [
+    'Define the problem precisely — what outcome are we optimising for?',
+    'List the constraints that eliminate certain approaches immediately',
+    'Research the 3–5 dominant approaches used in production',
+    'For each approach: document implementation complexity, operational burden, and failure modes',
+    'Find real-world examples of each approach at similar scale',
+    'Identify which approach has the worst tail risks (what happens when it fails?)',
+    'Test the recommended approach against the stated constraints',
+    'Document the approach not chosen and why — prevents relitigating the decision',
+    'Store the recommendation in veto_memory_store (type="decision")',
+  ],
+  'library': [
+    'Check npm: weekly downloads, version history, last publish date',
+    'Check GitHub: star count is vanity; check commit frequency, issue close rate, PR merge speed',
+    'Check the license: MIT/Apache/BSD = fine. GPL/AGPL = requires legal review.',
+    'Run: npm audit on the package to check for known CVEs',
+    'Check bundle size via bundlephobia.com (for frontend dependencies)',
+    'Check TypeScript support: first-party types or @types/ package',
+    'Check peer dependency compatibility with the current project',
+    'Find the 2 best alternatives and compare on the same criteria',
+    'Recommend with specific version to pin and known issues to watch',
+    'Store decision in veto_memory_store with tags=["dependency", package name]',
+  ],
+  'pattern': [
+    'Name the problem domain clearly (e.g. "managing shared state in multi-step forms")',
+    'Search for the canonical pattern name in this domain',
+    'Find 3–5 well-regarded open-source implementations of each pattern',
+    'Document: what the pattern solves, what it cannot solve, and its failure modes',
+    'Identify the complexity cost: what does adopting this pattern require developers to understand?',
+    'Check if this codebase already uses a related pattern — consistency matters more than perfection',
+    'Recommend the pattern that fits the team\'s current knowledge and codebase conventions',
+    'Store the pattern recommendation in veto_memory_store (type="reference")',
+  ],
+  'general': [
+    'Frame the research question: what specific information is needed?',
+    'Identify authoritative sources for this topic',
+    'Gather evidence from at least 3 independent sources',
+    'Identify where sources disagree and why',
+    'Synthesise findings: what is well-established vs. contested?',
+    'Apply findings to the specific context of this project',
+    'Make a recommendation with confidence level (high/medium/low)',
+    'Document sources and store in veto_memory_store (type="reference")',
+  ],
+};
+export function plan(task: string, context?: string): AgentPlan {
+  const category = detectCategory(task + ' ' + (context ?? ''));
+  return {
+    agent: 'researcher' as WorkerAgentType,
+    task,
+    tier: 2,
+    approach: categoryApproach[category],
+    steps: categorySteps[category],
+    checklist: [
+      '[ ] Research question defined precisely before searching',
+      '[ ] At least 3 independent sources consulted',
+      '[ ] Top 2–3 options compared side-by-side',
+      '[ ] Tradeoffs and failure modes documented for each option',
+      '[ ] License, maintenance, and security checked (for libraries)',
+      '[ ] Recommendation made with clear reasoning',
+      '[ ] Decision stored in veto_memory_store for future retrieval',
+      '[ ] Sources documented',
+    ],
+    pitfalls: [
+      'Researching without a precise question — "research authentication" is not researchable',
+      'Using star count as a quality signal — stars measure marketing, not quality',
+      'Not checking maintenance status — a popular abandoned library is worse than an obscure active one',
+      'Recommending the newest option instead of the most proven one',
+      'Not documenting the options that were rejected — the decision gets relitigated next month',
+      'Forgetting to check license compatibility for commercial or open-source projects',
+    ],
+    patterns: [
+      'Evidence-based recommendation: recommendation follows from documented evidence, not preference',
+      'Rejection documentation: record what was rejected and why as strongly as what was chosen',
+      'Source triangulation: any single source can be wrong — find 3 independent confirmations',
+      'Fit-first evaluation: the best option for someone else\'s context may be wrong for yours',
+    ],
+    duration_estimate: '30-90 minutes',
+  };
+}

package/src/agents/research/risk-assessor.ts ADDED Viewed

@@ -0,0 +1,63 @@
+import { AgentPlan, WorkerAgentType } from '../types.js';
+type RiskCategory = 'technical' | 'security' | 'business' | 'general';
+function detectCategory(task: string): RiskCategory {
+  const t = task.toLowerCase();
+  if (t.includes('security') || t.includes('vulnerability') || t.includes('attack') || t.includes('breach')) return 'security';
+  if (t.includes('business') || t.includes('market') || t.includes('revenue') || t.includes('legal') || t.includes('regulatory')) return 'business';
+  if (t.includes('technical') || t.includes('architecture') || t.includes('system') || t.includes('infra') || t.includes('deploy')) return 'technical';
+  return 'general';
+}
+const approaches: Record<RiskCategory, string> = {
+  'technical': 'Assess technical risks using a failure mode analysis. For each component: what happens when it fails? What is the blast radius? What is the recovery path? Assign probability (1–5) and impact (1–5), sort by P×I score. Focus on the top 3 risks — those are the ones worth mitigating now.',
+  'security': 'Perform a threat model using STRIDE: Spoofing, Tampering, Repudiation, Information Disclosure, Denial of Service, Elevation of Privilege. For each threat: document the attack vector, current controls, and residual risk. Prioritise by attacker motivation × exploitability × impact.',
+  'business': 'Assess business risks across four categories: market (demand disappears), operational (team/tooling fails), financial (runway runs out), and regulatory (compliance changes). For each: what is the early warning signal? What is the mitigation before it becomes a crisis?',
+  'general': 'Enumerate risks across technical, business, and operational dimensions. For each risk: probability (low/medium/high), impact (low/medium/high), detectability (would we know before it causes damage?), and mitigation. Prioritise by P×I with a tie-break on detectability.',
+};
+export function plan(task: string, context?: string): AgentPlan {
+  const category = detectCategory(task + ' ' + (context ?? ''));
+  return {
+    agent: 'risk-assessor' as WorkerAgentType,
+    task,
+    tier: 3,
+    approach: approaches[category],
+    steps: [
+      'Define the scope: what system, feature, or decision is being assessed?',
+      'Enumerate all risks — do not filter yet, just list everything that could go wrong',
+      'For each risk: assign probability (1–5) and impact (1–5)',
+      'Calculate risk score: P × I (max 25)',
+      'Sort risks by score, descending',
+      'For the top 5 risks: document the early warning signal and mitigation',
+      'Identify which risks can be eliminated vs. mitigated vs. accepted',
+      'For accepted risks: document the acceptance rationale and review date',
+      'Assign ownership: who is responsible for monitoring each top-5 risk?',
+      'Store risk register in veto_memory_store (type="reference", tags=["risk", domain])',
+    ],
+    checklist: [
+      '[ ] Scope defined clearly',
+      '[ ] At least 10 risks enumerated before filtering',
+      '[ ] All risks scored on P × I scale',
+      '[ ] Top 5 risks have early warning signals documented',
+      '[ ] Top 5 risks have mitigation steps documented',
+      '[ ] Accepted risks have rationale and review date',
+      '[ ] Risk register stored in veto_memory_store',
+    ],
+    pitfalls: [
+      'Listing only obvious risks — the dangerous ones are the ones nobody thinks of',
+      'Scoring risks on gut feel without separating probability from impact',
+      'Documenting risks without assigning owners — nobody monitors an ownerless risk',
+      'Treating mitigation as risk elimination — most mitigations only reduce probability or impact, not to zero',
+      'Not setting a review date — risk registers become stale within 3 months',
+    ],
+    patterns: [
+      'P × I scoring: separates probability from impact to avoid conflating "likely but minor" with "unlikely but catastrophic"',
+      'Early warning signal: each top risk gets a concrete signal that would trigger the mitigation',
+      'Accepted risk documentation: explicit acceptance is better than implicit ignorance',
+      'Risk ownership: every top-5 risk has a named owner who is responsible for the early warning signal',
+    ],
+    duration_estimate: '1-3 hours',
+  };
+}

package/src/agents/research/tech-advisor.ts ADDED Viewed

@@ -0,0 +1,55 @@
+import { AgentPlan, WorkerAgentType } from '../types.js';
+export function plan(task: string, context?: string): AgentPlan {
+  const t = (task + ' ' + (context ?? '')).toLowerCase();
+  const isGreenfield = t.includes('new project') || t.includes('start') || t.includes('greenfield') || t.includes('choose');
+  const isMigration = t.includes('migrat') || t.includes('replac') || t.includes('switch from') || t.includes('move from');
+  const approach = isGreenfield
+    ? 'Evaluate the full stack for a greenfield project. Prioritise developer velocity, operational simplicity, and escape hatches. Recommend a stack with a 2-year horizon, not a 10-year one.'
+    : isMigration
+    ? 'Evaluate the migration path. A technology switch is only worth it if the current choice is actively blocking progress. Quantify: what does the current stack cost in monthly developer friction? What is the migration cost in weeks? Recommend migrate only if ROI is positive within 6 months.'
+    : 'The best technology is usually the one the team already knows, unless there is a specific capability gap. Document the gap first, then evaluate if a technology switch is the minimal solution.';
+  return {
+    agent: 'tech-advisor' as WorkerAgentType,
+    task,
+    tier: 3,
+    approach,
+    steps: [
+      'Identify the actual problem: what specific capability is missing or causing friction?',
+      'Check if the current stack solves it with a library/plugin before considering a switch',
+      'If a new technology is needed: define evaluation criteria for this project specifically',
+      'List the top 3 candidates that meet the baseline criteria',
+      'Evaluate each: team learning curve, operational complexity, ecosystem maturity, licensing',
+      'Identify hidden costs: CI/CD changes, deployment changes, team training, tooling updates',
+      'Run a time-boxed spike (2–4 hours) with the top candidate to validate the fit',
+      'Document the decision as an ADR with alternatives and rejection reasons',
+      'Identify the first sign the chosen technology was wrong — what triggers re-evaluation?',
+      'Store in veto_memory_store (type="decision", tags=["tech-stack"])',
+    ],
+    checklist: [
+      '[ ] Problem statement defined before evaluating solutions',
+      '[ ] Current stack\'s ability to solve the problem checked first',
+      '[ ] Top 3 candidates evaluated against project-specific criteria',
+      '[ ] Hidden migration costs estimated',
+      '[ ] Time-boxed spike completed for top candidate',
+      '[ ] ADR written with alternatives and rejection reasons',
+      '[ ] Re-evaluation trigger defined',
+    ],
+    pitfalls: [
+      'Choosing a technology because it is popular industry-wide — the industry\'s context is not yours',
+      'Underestimating hidden migration costs (CI/CD, docs, engineer onboarding)',
+      'Skipping the spike — looks good in docs often fails on real requirements',
+      'Not defining a re-evaluation trigger — you end up committed to a wrong choice indefinitely',
+      'Recommending a full rewrite when an adapter would solve the problem',
+    ],
+    patterns: [
+      'Capability-gap-first: identify the gap before evaluating technologies',
+      'Time-boxed spike: validate with real code before committing',
+      'ADR on adoption: document the decision the day it is made',
+      'Re-evaluation trigger: define upfront what would make you revisit this decision',
+    ],
+    duration_estimate: '2-4 hours',
+  };
+}