npm - thumbgate - Versions diffs - 1.26.7 → 1.27.2 - Mend

thumbgate 1.26.7 → 1.27.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.well-known/agentic-verify.txt +1 -0
package/.well-known/llms.txt +2 -0
package/.well-known/mcp/server-card.json +1 -1
package/README.md +20 -9
package/adapters/claude/.mcp.json +2 -2
package/adapters/gcp/dfcx-webhook-gate.js +295 -0
package/adapters/mcp/server-stdio.js +28 -1
package/adapters/opencode/opencode.json +1 -1
package/bench/thumbgate-bench.json +2 -2
package/bin/cli.js +147 -10
package/bin/dashboard-cli.js +7 -0
package/config/gate-classifier-routing.json +98 -0
package/config/gate-templates.json +60 -0
package/config/mcp-allowlists.json +8 -7
package/config/model-candidates.json +71 -6
package/package.json +26 -10
package/public/chatgpt-app.html +330 -0
package/public/codex-plugin.html +66 -14
package/public/dashboard.html +203 -17
package/public/index.html +79 -4
package/public/learn.html +70 -0
package/public/lessons.html +129 -6
package/public/numbers.html +2 -2
package/public/pricing.html +20 -2
package/scripts/agent-operations-planner.js +621 -0
package/scripts/agent-reward-model.js +53 -1
package/scripts/ai-component-inventory.js +367 -0
package/scripts/classifier-routing.js +130 -0
package/scripts/cli-schema.js +26 -0
package/scripts/dashboard-chat.js +64 -17
package/scripts/feedback-sanitizer.js +105 -0
package/scripts/gates-engine.js +258 -61
package/scripts/hybrid-feedback-context.js +141 -7
package/scripts/memory-scope-readiness.js +159 -0
package/scripts/parallel-workflow-orchestrator.js +293 -0
package/scripts/plausible-domain-config.js +86 -0
package/scripts/plausible-server-events.js +4 -2
package/scripts/proxy-pointer-rag-guardrails.js +42 -1
package/scripts/qa-scenario-planner.js +136 -0
package/scripts/repeat-metric.js +28 -12
package/scripts/secret-fixture-tokens.js +61 -0
package/scripts/secret-scanner.js +44 -5
package/scripts/security-scanner.js +80 -0
package/scripts/seo-gsd.js +53 -0
package/scripts/thumbgate-bench.js +16 -1
package/scripts/tool-registry.js +37 -0
package/scripts/workflow-sentinel.js +189 -4
package/src/api/server.js +276 -10

package/scripts/hybrid-feedback-context.js CHANGED Viewed

@@ -18,6 +18,11 @@ const fs = require('fs');
 const path = require('path');
 const { resolveFeedbackDir } = require('./feedback-paths');
 const { readJsonl } = require('./fs-utils');
+const {
+  TRANSPORT_WORDS,
+  sanitizeFeedbackText,
+  transportWordsOnly,
+} = require('./feedback-sanitizer');
 // ---------------------------------------------------------------------------
 // Paths
@@ -51,6 +56,7 @@ const STOPWORDS = new Set([
   'has', 'had', 'not', 'but', 'they', 'you', 'can', 'will', 'all', 'any',
   'one', 'its', 'our', 'also', 'more', 'very', 'just', 'into', 'been',
   'bash', 'edit', 'write', 'tool', 'hook', 'clear',
+  ...TRANSPORT_WORDS,
 ]);
 const NEG = new Set([
@@ -74,7 +80,7 @@ const HYBRID_JSONL_READ_LIMIT = 400;
  */
 function normalize(text) {
   if (!text || typeof text !== 'string') return '';
-  return text
+  return sanitizeFeedbackText(text)
     .replace(/\/Users\/[^\s/]+/g, '/Users/redacted')
     .replace(/:\d{4,5}\b/g, ':PORT')
     .toLowerCase()
@@ -97,7 +103,9 @@ function stripFeedbackPrefix(text) {
  * Compose normalize + stripFeedbackPrefix.
  */
 function normalizePatternText(text) {
-  return normalize(stripFeedbackPrefix(text));
+  const normalized = normalize(stripFeedbackPrefix(text));
+  if (transportWordsOnly(normalized)) return '';
+  return normalized;
 }
 /**
@@ -125,6 +133,104 @@ function classify(entry) {
   return 'neutral';
 }
+function isHookPromptEnvelope(context) {
+  if (!context || typeof context !== 'string') return false;
+  try {
+    const parsed = JSON.parse(context);
+    if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return false;
+    return Boolean(
+      parsed.prompt &&
+      (
+        parsed.hookEventName ||
+        parsed.hook_event_name ||
+        parsed.workspaceRoot ||
+        parsed.workspace_root ||
+        parsed.session_id ||
+        parsed.sessionId ||
+        parsed.transcript_path ||
+        parsed.transcriptPath
+      )
+    );
+  } catch (_) {
+    return false;
+  }
+}
+function patternContext(entry) {
+  const context = entry && entry.context ? String(entry.context) : '';
+  if (!context) return '';
+  const hasExplicitFeedback = Boolean(
+    entry.whatWentWrong ||
+    entry.what_went_wrong ||
+    entry.whatToChange ||
+    entry.what_to_change ||
+    entry.failureType ||
+    (Array.isArray(entry.tags) && entry.tags.length > 0) ||
+    entry.structuredRule
+  );
+  if (isHookPromptEnvelope(context) && !hasExplicitFeedback) return '';
+  if (isHookPromptEnvelope(context) && hasExplicitFeedback) {
+    return '';
+  }
+  return context;
+}
+/**
+ * Check if the feedback entry is an automated enforcement log (e.g. from gates engine)
+ * rather than real developer/user feedback.
+ */
+function isAutomatedFeedback(entry) {
+  const tags = entry.tags || [];
+  if (tags.includes('auto-capture') || tags.includes('gates-engine') || tags.includes('audit-trail')) {
+    return true;
+  }
+  const context = String(entry.context || entry.whatWentWrong || '').toLowerCase();
+  return context.includes('gate "') || context.includes('blocked tool') || context.includes('warned tool');
+}
+function isHookPromptEnvelope(context) {
+  if (!context || typeof context !== 'string') return false;
+  try {
+    const parsed = JSON.parse(context);
+    if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return false;
+    return Boolean(
+      parsed.prompt &&
+      (
+        parsed.hookEventName ||
+        parsed.hook_event_name ||
+        parsed.workspaceRoot ||
+        parsed.workspace_root ||
+        parsed.session_id ||
+        parsed.sessionId ||
+        parsed.transcript_path ||
+        parsed.transcriptPath
+      )
+    );
+  } catch (_) {
+    return false;
+  }
+}
+function patternContext(entry) {
+  const context = entry && entry.context ? String(entry.context) : '';
+  if (!context) return '';
+  const hasExplicitFeedback = Boolean(
+    entry.whatWentWrong ||
+    entry.what_went_wrong ||
+    entry.whatToChange ||
+    entry.what_to_change ||
+    entry.failureType ||
+    (Array.isArray(entry.tags) && entry.tags.length > 0) ||
+    entry.structuredRule
+  );
+  if (isHookPromptEnvelope(context) && !hasExplicitFeedback) return '';
+  if (isHookPromptEnvelope(context) && hasExplicitFeedback) {
+    return '';
+  }
+  return context;
+}
 /**
  * Extract ms from a timestamp value. Returns 0 on failure.
  */
@@ -212,13 +318,15 @@ function buildHybridState(opts) {
     if (cls === 'positive') positive++;
     if (cls === 'negative') {
       negative++;
-      // Track tool-level negative counts
-      const toolName = inferToolName(entry.toolName || entry.tool_name || 'unknown', entry.context || '');
-      toolNegatives[toolName] = (toolNegatives[toolName] || 0) + 1;
+      // Track tool-level negative counts (exclude automated gate logs)
+      if (!isAutomatedFeedback(entry)) {
+        const toolName = inferToolName(entry.toolName || entry.tool_name || 'unknown', entry.context || '');
+        toolNegatives[toolName] = (toolNegatives[toolName] || 0) + 1;
+      }
       // Build pattern from context / whatWentWrong / what_went_wrong
       const rawText = [
-        entry.context || '',
+        patternContext(entry),
         entry.whatWentWrong || entry.what_went_wrong || '',
         entry.whatToChange || entry.what_to_change || '',
         entry.failureType || '',
@@ -254,11 +362,13 @@ function buildHybridState(opts) {
   // Process attributed feedback separately to track attributed tool counts
   for (const entry of attributedEntries) {
+    if (classify(entry) !== 'negative') continue; // skip pruned/positive
+    if (isAutomatedFeedback(entry)) continue; // skip automated gate blocks
     const toolName = inferToolName(entry.toolName || entry.tool_name || entry.attributed_tool || 'unknown', entry.context || '');
     toolNegativesAttributed[toolName] = (toolNegativesAttributed[toolName] || 0) + 1;
     const rawText = [
-      entry.context || '',
+      patternContext(entry),
       entry.whatWentWrong || entry.what_went_wrong || '',
       ...(Array.isArray(entry.tags) ? entry.tags : []),
       ...(entry.richContext && Array.isArray(entry.richContext.filePaths) ? entry.richContext.filePaths : []),
@@ -626,6 +736,29 @@ function evaluatePretool(toolName, toolInput, opts) {
   return evaluatePretoolFromState(state, toolName, toolInput);
 }
+// Claw-style agent support (high-ROI for EnterpriseClaw / OpenShell agents from Automation Anywhere / Nvidia)
+// Extends hybrid context for claw_action_type (file, screen, dynamic-tool, orchestration), agent_identity, hybrid_route.
+// Use in evaluatePretool calls from claw-aware MCP/hooks: pass {clawContext: {actionType: 'dynamic-tool-creation', agentId: '...', route: 'local/cloud'}} in opts.
+function evaluateClawPretool(toolName, toolInput, clawContext, opts) {
+  const o = opts || {};
+  const claw = clawContext || {};
+  // Merge claw metadata into toolInput for gate evaluation (so templates like block-dynamic-tool-creation can match)
+  const enrichedInput = {
+    ...(typeof toolInput === 'object' ? toolInput : { raw: toolInput }),
+    _claw: {
+      actionType: claw.actionType || 'unknown',
+      agentId: claw.agentId || 'unknown',
+      hybridRoute: claw.hybridRoute || 'unknown',
+      screenInteraction: !!claw.screenInteraction,
+      fileAccess: !!claw.fileAccess,
+    }
+  };
+  const result = evaluatePretool(toolName, JSON.stringify(enrichedInput), o);
+  // Tag result with claw metadata for logging/feedback
+  result.clawContext = claw;
+  return result;
+}
 // ---------------------------------------------------------------------------
 // CLI main()
 // ---------------------------------------------------------------------------
@@ -674,6 +807,7 @@ function main() {
 module.exports = {
   buildHybridState,
   evaluatePretool,
+  evaluateClawPretool,
   compileGuardArtifact,
   writeGuardArtifact,
   readGuardArtifact,

package/scripts/memory-scope-readiness.js CHANGED Viewed

@@ -2,6 +2,38 @@
 'use strict';
 const REQUIRED_SCOPE_FIELDS = ['entityId', 'projectId', 'processId', 'sessionId'];
+const MEMORY_OS_LAYERS = Object.freeze([
+  {
+    id: 'file_layer',
+    name: 'File Layer',
+    purpose: 'Raw feedback, tool receipts, sessions, and memory rows are durably stored before interpretation.',
+  },
+  {
+    id: 'vector_db_layer',
+    name: 'Vector DB Layer',
+    purpose: 'Semantic retrieval can find related lessons without stuffing every raw memory into context.',
+  },
+  {
+    id: 'structured_facts_layer',
+    name: 'Structured Facts Layer',
+    purpose: 'Confirmed account, project, policy, and budget facts are typed separately from fuzzy memories.',
+  },
+  {
+    id: 'auto_curation_layer',
+    name: 'Auto Curation Layer',
+    purpose: 'Duplicate, stale, contradictory, and unscoped memories are consolidated before retrieval quality decays.',
+  },
+  {
+    id: 'context_layer',
+    name: 'Context Layer',
+    purpose: 'Only relevant scoped memories enter a given tool call, PR, deployment, or support session.',
+  },
+  {
+    id: 'interface_layer',
+    name: 'Interface Layer',
+    purpose: 'The memory contract is exposed through CLI, MCP, hooks, dashboards, and agent adapters without model lock-in.',
+  },
+]);
 const FIELD_ALIASES = {
   entityId: [
@@ -228,6 +260,128 @@ function buildRecommendations({ unscopedRecords, crossScopeDuplicates }) {
   return recommendations;
 }
+function hasEmbeddingEvidence(record = {}) {
+  return Boolean(
+    record.embedding
+    || record.vector
+    || record.embeddingId
+    || record.metadata?.embedding
+    || record.metadata?.embeddingId
+    || record.metadata?.vectorId
+    || record.semanticKey
+    || record.metadata?.semanticKey
+  );
+}
+function hasStructuredFactEvidence(record = {}) {
+  const type = String(record.type || record.kind || record.memoryType || record.metadata?.type || '').toLowerCase();
+  return type === 'fact'
+    || type === 'structured_fact'
+    || Boolean(record.factKey || record.fact || record.metadata?.factKey || record.metadata?.fact);
+}
+function hasContextEvidence(record = {}) {
+  return Boolean(
+    record.contextPackId
+    || record.contextPack
+    || record.metadata?.contextPackId
+    || record.metadata?.contextPack
+    || record.retrievalQuery
+    || record.metadata?.retrievalQuery
+  );
+}
+function boolCapability(capabilities = {}, ...keys) {
+  return keys.some((key) => capabilities[key] === true);
+}
+function buildMemoryOsLayerReport(records = [], capabilities = {}) {
+  const scopeReport = buildMemoryScopeReadinessReport(records);
+  const semanticRecords = records.filter(hasEmbeddingEvidence);
+  const structuredFactRecords = records.filter(hasStructuredFactEvidence);
+  const contextRecords = records.filter(hasContextEvidence);
+  const curationReady = scopeReport.unscopedRecords === 0 && scopeReport.crossScopeDuplicates.length === 0;
+  const checks = [
+    {
+      id: 'file_layer',
+      ok: records.length > 0 || boolCapability(capabilities, 'rawStorage', 'fileLayer'),
+      evidence: {
+        records: records.length,
+        durableStore: Boolean(records.length > 0 || capabilities.rawStorage || capabilities.fileLayer),
+      },
+      recommendation: 'Capture raw feedback, action receipts, and tool outcomes before promoting memories.',
+    },
+    {
+      id: 'vector_db_layer',
+      ok: semanticRecords.length > 0 || boolCapability(capabilities, 'semanticSearch', 'vectorDbLayer'),
+      evidence: {
+        semanticRecords: semanticRecords.length,
+        semanticSearch: Boolean(capabilities.semanticSearch || capabilities.vectorDbLayer),
+      },
+      recommendation: 'Index lessons with semantic keys or embeddings so related failures are retrieved before action.',
+    },
+    {
+      id: 'structured_facts_layer',
+      ok: structuredFactRecords.length > 0 || boolCapability(capabilities, 'structuredFacts', 'structuredFactsLayer'),
+      evidence: {
+        structuredFactRecords: structuredFactRecords.length,
+        structuredFacts: Boolean(capabilities.structuredFacts || capabilities.structuredFactsLayer),
+      },
+      recommendation: 'Store confirmed customer, project, policy, and budget facts as typed records, not just prose.',
+    },
+    {
+      id: 'auto_curation_layer',
+      ok: curationReady && boolCapability(capabilities, 'autoCuration', 'dedupe', 'autoCurationLayer'),
+      evidence: {
+        unscopedRecords: scopeReport.unscopedRecords,
+        crossScopeDuplicates: scopeReport.crossScopeDuplicates.length,
+        autoCuration: Boolean(capabilities.autoCuration || capabilities.dedupe || capabilities.autoCurationLayer),
+      },
+      recommendation: 'Run dedupe, contradiction, stale-memory, and scope-isolation checks before memories can become gates.',
+    },
+    {
+      id: 'context_layer',
+      ok: contextRecords.length > 0 || boolCapability(capabilities, 'contextPacks', 'contextLayer', 'scopedRetrieval'),
+      evidence: {
+        contextRecords: contextRecords.length,
+        scopedRetrieval: Boolean(capabilities.contextPacks || capabilities.contextLayer || capabilities.scopedRetrieval),
+      },
+      recommendation: 'Inject scoped context packs per task instead of loading every memory into the model window.',
+    },
+    {
+      id: 'interface_layer',
+      ok: boolCapability(capabilities, 'mcp', 'cli', 'hooks', 'dashboard', 'interfaceLayer'),
+      evidence: {
+        cli: Boolean(capabilities.cli),
+        mcp: Boolean(capabilities.mcp),
+        hooks: Boolean(capabilities.hooks),
+        dashboard: Boolean(capabilities.dashboard),
+      },
+      recommendation: 'Expose the same memory contract through CLI, MCP, hooks, dashboard, and agent adapters.',
+    },
+  ].map((check) => {
+    const layer = MEMORY_OS_LAYERS.find((candidate) => candidate.id === check.id);
+    return {
+      ...layer,
+      ...check,
+    };
+  });
+  const missingLayers = checks.filter((check) => !check.ok).map((check) => check.id);
+  return {
+    ready: missingLayers.length === 0,
+    riskLevel: missingLayers.length === 0 ? 'low' : missingLayers.length <= 2 ? 'medium' : 'high',
+    layers: checks,
+    missingLayers,
+    scopeReport,
+    recommendations: checks
+      .filter((check) => !check.ok)
+      .map((check) => check.recommendation),
+  };
+}
 function selectRecordsForScope(records = [], requestedScope = {}, options = {}) {
   const requested = normalizeScope(requestedScope);
   const requestedKey = memoryScopeKey(requested);
@@ -265,6 +419,7 @@ function buildMemoriStyleBenchmarkRecords() {
       projectId: 'thumbgate',
       processId: 'agent-a',
       sessionId: 'session-1',
+      metadata: { semanticKey: 'checkout-readiness', contextPackId: 'checkout-pro' },
       content: 'Use the paid sprint checklist before changing checkout code.',
     },
     {
@@ -298,14 +453,18 @@ function buildMemoriStyleBenchmarkRecords() {
       processId: 'agent-a',
       sessionId: 'session-1',
       visibility: 'shared',
+      type: 'fact',
+      factKey: 'checkout.mutation_policy',
       content: 'Shared rule: checkout mutations require audit evidence.',
     },
   ];
 }
 module.exports = {
+  MEMORY_OS_LAYERS,
   REQUIRED_SCOPE_FIELDS,
   buildMemoriStyleBenchmarkRecords,
+  buildMemoryOsLayerReport,
   buildMemoryScopeReadinessReport,
   isSharedMemory,
   memoryScopeKey,

package/scripts/parallel-workflow-orchestrator.js ADDED Viewed

@@ -0,0 +1,293 @@
+'use strict';
+const fs = require('fs');
+const path = require('path');
+const { getFeedbackPaths } = require('./feedback-loop');
+const { ensureDir } = require('./fs-utils');
+const { loadOptionalModule } = require('./private-core-boundary');
+const launcher = loadOptionalModule(path.join(__dirname, 'hosted-job-launcher'), () => ({
+  launchManagedJob: () => {
+    throw new Error('Managed jobs require ThumbGate-Core.');
+  },
+  resumeHostedJob: () => {
+    throw new Error('Resuming hosted jobs requires ThumbGate-Core.');
+  },
+}));
+const runner = loadOptionalModule(path.join(__dirname, 'async-job-runner'), () => ({
+  readJobState: () => null,
+  listJobStates: () => [],
+}));
+const { launchManagedJob, resumeHostedJob } = launcher;
+const { readJobState, listJobStates } = runner;
+const DEFAULT_CONCURRENCY = 3;
+const POLL_INTERVAL_MS = 200;
+function nowIso() {
+  return new Date().toISOString();
+}
+/**
+ * Dynamically decompose a high-level objective into parallel, specialized subtasks.
+ * Supports rule-based fallback and can be extended to use LLM planning.
+ */
+function planWorkflow(objective) {
+  const obj = (objective || '').toLowerCase().trim();
+  const subtasks = [];
+  if (obj.includes('security') || obj.includes('audit') || obj.includes('leak') || obj.includes('secret')) {
+    subtasks.push({
+      name: 'scan_secrets',
+      tags: ['security', 'secret-scanner'],
+      stages: [
+        {
+          name: 'secret_scan',
+          command: 'node scripts/secret-scanner.js --json || true',
+        }
+      ]
+    });
+    subtasks.push({
+      name: 'audit_dependencies',
+      tags: ['security', 'dependencies'],
+      stages: [
+        {
+          name: 'npm_audit',
+          command: 'npm audit --json || true',
+        }
+      ]
+    });
+    subtasks.push({
+      name: 'check_permissions',
+      tags: ['security', 'credentials'],
+      stages: [
+        {
+          name: 'credential_gate_check',
+          command: 'node scripts/single-use-credential-gate.js plan || true',
+        }
+      ]
+    });
+  } else if (obj.includes('performance') || obj.includes('benchmark') || obj.includes('bench')) {
+    subtasks.push({
+      name: 'benchmark_candidates',
+      tags: ['performance', 'bench'],
+      stages: [
+        {
+          name: 'run_bench',
+          command: 'npx thumbgate bench --json --min-score=90 || true',
+        }
+      ]
+    });
+    subtasks.push({
+      name: 'check_budget',
+      tags: ['performance', 'budget'],
+      stages: [
+        {
+          name: 'budget_status',
+          command: 'node scripts/budget-guard.js --status || true',
+        }
+      ]
+    });
+  } else {
+    // Default general-purpose fallback workflow: code search and check integrity
+    subtasks.push({
+      name: 'code_search',
+      tags: ['exploration'],
+      stages: [
+        {
+          name: 'search_fs',
+          command: 'node scripts/filesystem-search.js --query="pretool" --limit=5 || true',
+        }
+      ]
+    });
+    subtasks.push({
+      name: 'check_integrity',
+      tags: ['integrity'],
+      stages: [
+        {
+          name: 'ops_integrity',
+          command: 'node scripts/operational-integrity.js --ci || true',
+        }
+      ]
+    });
+  }
+  return {
+    objective,
+    plannedAt: nowIso(),
+    subtasks: subtasks.map((task, idx) => ({
+      ...task,
+      id: `subtask_${Date.now()}_${idx}_${Math.random().toString(36).slice(2, 6)}`,
+      autoImprove: false,
+      verificationMode: 'none',
+      recordFeedback: false,
+    })),
+  };
+}
+/**
+ * Execute a list of planned subtasks in parallel, respecting a concurrency limit.
+ * Polls active jobs until all complete, then consolidates the results.
+ */
+async function executeWorkflow(objective, options = {}) {
+  const plan = planWorkflow(objective);
+  const concurrency = Number(options.concurrency) || DEFAULT_CONCURRENCY;
+  const timeoutMs = Number(options.timeoutMs) || 60000; // 60s timeout safety
+  const { FEEDBACK_DIR } = getFeedbackPaths();
+  const workflowId = `wf_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
+  const workflowDir = path.join(FEEDBACK_DIR, 'workflows', workflowId);
+  ensureDir(workflowDir);
+  const activeJobs = new Map();
+  const queue = [...plan.subtasks];
+  const results = [];
+  const start = Date.now();
+  const runNext = () => {
+    while (activeJobs.size < concurrency && queue.length > 0) {
+      const task = queue.shift();
+      const launched = launchManagedJob(task, { cwd: options.cwd });
+      activeJobs.set(task.id, {
+        jobId: launched.jobId,
+        taskName: task.name,
+        launchedAt: Date.now(),
+      });
+    }
+  };
+  runNext();
+  // Polling loop
+  await new Promise((resolve) => {
+    const interval = setInterval(() => {
+      let allDone = true;
+      for (const [taskId, info] of activeJobs.entries()) {
+        const jobState = readJobState(info.jobId);
+        if (!jobState) {
+          allDone = false;
+          continue;
+        }
+        const isTerminal = ['completed', 'failed', 'cancelled'].includes(jobState.status);
+        if (isTerminal) {
+          results.push({
+            taskId,
+            taskName: info.taskName,
+            jobId: info.jobId,
+            status: jobState.status,
+            context: jobState.currentContext,
+            stageHistory: jobState.stageHistory,
+            lastError: jobState.lastError,
+          });
+          activeJobs.delete(taskId);
+          runNext();
+        } else {
+          allDone = false;
+        }
+      }
+      const elapsed = Date.now() - start;
+      if (allDone && queue.length === 0) {
+        clearInterval(interval);
+        resolve();
+      } else if (elapsed >= timeoutMs) {
+        clearInterval(interval);
+        // Timeout remaining active tasks
+        for (const [taskId, info] of activeJobs.entries()) {
+          results.push({
+            taskId,
+            taskName: info.taskName,
+            jobId: info.jobId,
+            status: 'timeout',
+            lastError: { message: `Subtask timed out after ${timeoutMs}ms`, code: 'TIMEOUT' },
+          });
+        }
+        resolve();
+      }
+    }, POLL_INTERVAL_MS);
+  });
+  const durationMs = Date.now() - start;
+  // Compile final markdown report
+  const reportPath = path.join(workflowDir, 'report.md');
+  const reportContent = compileWorkflowReport(plan, results, durationMs, workflowId);
+  fs.writeFileSync(reportPath, reportContent, 'utf8');
+  // Also save the raw execution results JSON
+  const resultsJsonPath = path.join(workflowDir, 'results.json');
+  fs.writeFileSync(resultsJsonPath, JSON.stringify({
+    workflowId,
+    objective,
+    durationMs,
+    plan,
+    results,
+  }, null, 2) + '\n', 'utf8');
+  return {
+    workflowId,
+    objective,
+    durationMs,
+    reportPath,
+    results,
+  };
+}
+function compileWorkflowReport(plan, results, durationMs, workflowId) {
+  const timestamp = nowIso();
+  const totalSubtasks = plan.subtasks.length;
+  const completed = results.filter((r) => r.status === 'completed').length;
+  const failed = results.filter((r) => r.status === 'failed' || r.status === 'timeout').length;
+  let report = `# Dynamic Workflow Execution Report: ${workflowId}\n\n`;
+  report += `**Objective:** ${plan.objective}\n`;
+  report += `**Executed At:** ${timestamp}\n`;
+  report += `**Duration:** ${(durationMs / 1000).toFixed(2)}s\n`;
+  report += `**Status:** ${completed === totalSubtasks ? '✅ SUCCESS' : '⚠️ COMPLETED WITH FAILURES'}\n\n`;
+  report += `## Summary\n`;
+  report += `- Total planned subtasks: ${totalSubtasks}\n`;
+  report += `- Completed successfully: ${completed}\n`;
+  report += `- Failed/Timed out: ${failed}\n\n`;
+  report += `## Subtask Breakdown\n\n`;
+  for (const res of results) {
+    const taskPlan = plan.subtasks.find((t) => t.id === res.taskId) || {};
+    const commandUsed = taskPlan.stages && taskPlan.stages[0] ? taskPlan.stages[0].command : 'N/A';
+    report += `### ✦ Subtask: \`${res.taskName}\`\n`;
+    report += `- **Job ID:** \`${res.jobId}\`\n`;
+    report += `- **Status:** ${res.status === 'completed' ? '✅ COMPLETED' : '❌ ' + res.status.toUpperCase()}\n`;
+    report += `- **Command Run:** \`${commandUsed}\`\n`;
+    if (res.lastError) {
+      report += `- **Error:** \`${res.lastError.message}\` (Code: \`${res.lastError.code}\`)\n`;
+    }
+    if (res.context) {
+      report += `\n**Output Context Preview:**\n\`\`\`json\n`;
+      try {
+        // Try parsing output context as JSON for clean formatting
+        const parsed = JSON.parse(res.context);
+        report += JSON.stringify(parsed, null, 2);
+      } catch {
+        report += res.context.slice(0, 1000) + (res.context.length > 1000 ? '\n... (truncated)' : '');
+      }
+      report += `\n\`\`\`\n`;
+    }
+    report += `\n---\n\n`;
+  }
+  return report;
+}
+module.exports = {
+  planWorkflow,
+  executeWorkflow,
+  compileWorkflowReport,
+};