thumbgate 1.26.7 → 1.27.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.well-known/agentic-verify.txt +1 -0
  4. package/.well-known/llms.txt +2 -0
  5. package/.well-known/mcp/server-card.json +1 -1
  6. package/README.md +20 -9
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/gcp/dfcx-webhook-gate.js +295 -0
  9. package/adapters/mcp/server-stdio.js +28 -1
  10. package/adapters/opencode/opencode.json +1 -1
  11. package/bench/thumbgate-bench.json +2 -2
  12. package/bin/cli.js +147 -10
  13. package/bin/dashboard-cli.js +7 -0
  14. package/config/gate-classifier-routing.json +98 -0
  15. package/config/gate-templates.json +60 -0
  16. package/config/mcp-allowlists.json +8 -7
  17. package/config/model-candidates.json +71 -6
  18. package/package.json +26 -10
  19. package/public/chatgpt-app.html +330 -0
  20. package/public/codex-plugin.html +66 -14
  21. package/public/dashboard.html +203 -17
  22. package/public/index.html +79 -4
  23. package/public/learn.html +70 -0
  24. package/public/lessons.html +129 -6
  25. package/public/numbers.html +2 -2
  26. package/public/pricing.html +20 -2
  27. package/scripts/agent-operations-planner.js +621 -0
  28. package/scripts/agent-reward-model.js +53 -1
  29. package/scripts/ai-component-inventory.js +367 -0
  30. package/scripts/classifier-routing.js +130 -0
  31. package/scripts/cli-schema.js +26 -0
  32. package/scripts/dashboard-chat.js +64 -17
  33. package/scripts/feedback-sanitizer.js +105 -0
  34. package/scripts/gates-engine.js +258 -61
  35. package/scripts/hybrid-feedback-context.js +141 -7
  36. package/scripts/memory-scope-readiness.js +159 -0
  37. package/scripts/parallel-workflow-orchestrator.js +293 -0
  38. package/scripts/plausible-domain-config.js +86 -0
  39. package/scripts/plausible-server-events.js +4 -2
  40. package/scripts/proxy-pointer-rag-guardrails.js +42 -1
  41. package/scripts/qa-scenario-planner.js +136 -0
  42. package/scripts/repeat-metric.js +28 -12
  43. package/scripts/secret-fixture-tokens.js +61 -0
  44. package/scripts/secret-scanner.js +44 -5
  45. package/scripts/security-scanner.js +80 -0
  46. package/scripts/seo-gsd.js +53 -0
  47. package/scripts/thumbgate-bench.js +16 -1
  48. package/scripts/tool-registry.js +37 -0
  49. package/scripts/workflow-sentinel.js +189 -4
  50. package/src/api/server.js +276 -10
@@ -18,6 +18,11 @@ const fs = require('fs');
18
18
  const path = require('path');
19
19
  const { resolveFeedbackDir } = require('./feedback-paths');
20
20
  const { readJsonl } = require('./fs-utils');
21
+ const {
22
+ TRANSPORT_WORDS,
23
+ sanitizeFeedbackText,
24
+ transportWordsOnly,
25
+ } = require('./feedback-sanitizer');
21
26
 
22
27
  // ---------------------------------------------------------------------------
23
28
  // Paths
@@ -51,6 +56,7 @@ const STOPWORDS = new Set([
51
56
  'has', 'had', 'not', 'but', 'they', 'you', 'can', 'will', 'all', 'any',
52
57
  'one', 'its', 'our', 'also', 'more', 'very', 'just', 'into', 'been',
53
58
  'bash', 'edit', 'write', 'tool', 'hook', 'clear',
59
+ ...TRANSPORT_WORDS,
54
60
  ]);
55
61
 
56
62
  const NEG = new Set([
@@ -74,7 +80,7 @@ const HYBRID_JSONL_READ_LIMIT = 400;
74
80
  */
75
81
  function normalize(text) {
76
82
  if (!text || typeof text !== 'string') return '';
77
- return text
83
+ return sanitizeFeedbackText(text)
78
84
  .replace(/\/Users\/[^\s/]+/g, '/Users/redacted')
79
85
  .replace(/:\d{4,5}\b/g, ':PORT')
80
86
  .toLowerCase()
@@ -97,7 +103,9 @@ function stripFeedbackPrefix(text) {
97
103
  * Compose normalize + stripFeedbackPrefix.
98
104
  */
99
105
  function normalizePatternText(text) {
100
- return normalize(stripFeedbackPrefix(text));
106
+ const normalized = normalize(stripFeedbackPrefix(text));
107
+ if (transportWordsOnly(normalized)) return '';
108
+ return normalized;
101
109
  }
102
110
 
103
111
  /**
@@ -125,6 +133,104 @@ function classify(entry) {
125
133
  return 'neutral';
126
134
  }
127
135
 
136
+ function isHookPromptEnvelope(context) {
137
+ if (!context || typeof context !== 'string') return false;
138
+ try {
139
+ const parsed = JSON.parse(context);
140
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return false;
141
+ return Boolean(
142
+ parsed.prompt &&
143
+ (
144
+ parsed.hookEventName ||
145
+ parsed.hook_event_name ||
146
+ parsed.workspaceRoot ||
147
+ parsed.workspace_root ||
148
+ parsed.session_id ||
149
+ parsed.sessionId ||
150
+ parsed.transcript_path ||
151
+ parsed.transcriptPath
152
+ )
153
+ );
154
+ } catch (_) {
155
+ return false;
156
+ }
157
+ }
158
+
159
+ function patternContext(entry) {
160
+ const context = entry && entry.context ? String(entry.context) : '';
161
+ if (!context) return '';
162
+ const hasExplicitFeedback = Boolean(
163
+ entry.whatWentWrong ||
164
+ entry.what_went_wrong ||
165
+ entry.whatToChange ||
166
+ entry.what_to_change ||
167
+ entry.failureType ||
168
+ (Array.isArray(entry.tags) && entry.tags.length > 0) ||
169
+ entry.structuredRule
170
+ );
171
+ if (isHookPromptEnvelope(context) && !hasExplicitFeedback) return '';
172
+ if (isHookPromptEnvelope(context) && hasExplicitFeedback) {
173
+ return '';
174
+ }
175
+ return context;
176
+ }
177
+
178
+ /**
179
+ * Check if the feedback entry is an automated enforcement log (e.g. from gates engine)
180
+ * rather than real developer/user feedback.
181
+ */
182
+ function isAutomatedFeedback(entry) {
183
+ const tags = entry.tags || [];
184
+ if (tags.includes('auto-capture') || tags.includes('gates-engine') || tags.includes('audit-trail')) {
185
+ return true;
186
+ }
187
+ const context = String(entry.context || entry.whatWentWrong || '').toLowerCase();
188
+ return context.includes('gate "') || context.includes('blocked tool') || context.includes('warned tool');
189
+ }
190
+
191
+
192
+ function isHookPromptEnvelope(context) {
193
+ if (!context || typeof context !== 'string') return false;
194
+ try {
195
+ const parsed = JSON.parse(context);
196
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return false;
197
+ return Boolean(
198
+ parsed.prompt &&
199
+ (
200
+ parsed.hookEventName ||
201
+ parsed.hook_event_name ||
202
+ parsed.workspaceRoot ||
203
+ parsed.workspace_root ||
204
+ parsed.session_id ||
205
+ parsed.sessionId ||
206
+ parsed.transcript_path ||
207
+ parsed.transcriptPath
208
+ )
209
+ );
210
+ } catch (_) {
211
+ return false;
212
+ }
213
+ }
214
+
215
+ function patternContext(entry) {
216
+ const context = entry && entry.context ? String(entry.context) : '';
217
+ if (!context) return '';
218
+ const hasExplicitFeedback = Boolean(
219
+ entry.whatWentWrong ||
220
+ entry.what_went_wrong ||
221
+ entry.whatToChange ||
222
+ entry.what_to_change ||
223
+ entry.failureType ||
224
+ (Array.isArray(entry.tags) && entry.tags.length > 0) ||
225
+ entry.structuredRule
226
+ );
227
+ if (isHookPromptEnvelope(context) && !hasExplicitFeedback) return '';
228
+ if (isHookPromptEnvelope(context) && hasExplicitFeedback) {
229
+ return '';
230
+ }
231
+ return context;
232
+ }
233
+
128
234
  /**
129
235
  * Extract ms from a timestamp value. Returns 0 on failure.
130
236
  */
@@ -212,13 +318,15 @@ function buildHybridState(opts) {
212
318
  if (cls === 'positive') positive++;
213
319
  if (cls === 'negative') {
214
320
  negative++;
215
- // Track tool-level negative counts
216
- const toolName = inferToolName(entry.toolName || entry.tool_name || 'unknown', entry.context || '');
217
- toolNegatives[toolName] = (toolNegatives[toolName] || 0) + 1;
321
+ // Track tool-level negative counts (exclude automated gate logs)
322
+ if (!isAutomatedFeedback(entry)) {
323
+ const toolName = inferToolName(entry.toolName || entry.tool_name || 'unknown', entry.context || '');
324
+ toolNegatives[toolName] = (toolNegatives[toolName] || 0) + 1;
325
+ }
218
326
 
219
327
  // Build pattern from context / whatWentWrong / what_went_wrong
220
328
  const rawText = [
221
- entry.context || '',
329
+ patternContext(entry),
222
330
  entry.whatWentWrong || entry.what_went_wrong || '',
223
331
  entry.whatToChange || entry.what_to_change || '',
224
332
  entry.failureType || '',
@@ -254,11 +362,13 @@ function buildHybridState(opts) {
254
362
 
255
363
  // Process attributed feedback separately to track attributed tool counts
256
364
  for (const entry of attributedEntries) {
365
+ if (classify(entry) !== 'negative') continue; // skip pruned/positive
366
+ if (isAutomatedFeedback(entry)) continue; // skip automated gate blocks
257
367
  const toolName = inferToolName(entry.toolName || entry.tool_name || entry.attributed_tool || 'unknown', entry.context || '');
258
368
  toolNegativesAttributed[toolName] = (toolNegativesAttributed[toolName] || 0) + 1;
259
369
 
260
370
  const rawText = [
261
- entry.context || '',
371
+ patternContext(entry),
262
372
  entry.whatWentWrong || entry.what_went_wrong || '',
263
373
  ...(Array.isArray(entry.tags) ? entry.tags : []),
264
374
  ...(entry.richContext && Array.isArray(entry.richContext.filePaths) ? entry.richContext.filePaths : []),
@@ -626,6 +736,29 @@ function evaluatePretool(toolName, toolInput, opts) {
626
736
  return evaluatePretoolFromState(state, toolName, toolInput);
627
737
  }
628
738
 
739
+ // Claw-style agent support (high-ROI for EnterpriseClaw / OpenShell agents from Automation Anywhere / Nvidia)
740
+ // Extends hybrid context for claw_action_type (file, screen, dynamic-tool, orchestration), agent_identity, hybrid_route.
741
+ // Use in evaluatePretool calls from claw-aware MCP/hooks: pass {clawContext: {actionType: 'dynamic-tool-creation', agentId: '...', route: 'local/cloud'}} in opts.
742
+ function evaluateClawPretool(toolName, toolInput, clawContext, opts) {
743
+ const o = opts || {};
744
+ const claw = clawContext || {};
745
+ // Merge claw metadata into toolInput for gate evaluation (so templates like block-dynamic-tool-creation can match)
746
+ const enrichedInput = {
747
+ ...(typeof toolInput === 'object' ? toolInput : { raw: toolInput }),
748
+ _claw: {
749
+ actionType: claw.actionType || 'unknown',
750
+ agentId: claw.agentId || 'unknown',
751
+ hybridRoute: claw.hybridRoute || 'unknown',
752
+ screenInteraction: !!claw.screenInteraction,
753
+ fileAccess: !!claw.fileAccess,
754
+ }
755
+ };
756
+ const result = evaluatePretool(toolName, JSON.stringify(enrichedInput), o);
757
+ // Tag result with claw metadata for logging/feedback
758
+ result.clawContext = claw;
759
+ return result;
760
+ }
761
+
629
762
  // ---------------------------------------------------------------------------
630
763
  // CLI main()
631
764
  // ---------------------------------------------------------------------------
@@ -674,6 +807,7 @@ function main() {
674
807
  module.exports = {
675
808
  buildHybridState,
676
809
  evaluatePretool,
810
+ evaluateClawPretool,
677
811
  compileGuardArtifact,
678
812
  writeGuardArtifact,
679
813
  readGuardArtifact,
@@ -2,6 +2,38 @@
2
2
  'use strict';
3
3
 
4
4
  const REQUIRED_SCOPE_FIELDS = ['entityId', 'projectId', 'processId', 'sessionId'];
5
+ const MEMORY_OS_LAYERS = Object.freeze([
6
+ {
7
+ id: 'file_layer',
8
+ name: 'File Layer',
9
+ purpose: 'Raw feedback, tool receipts, sessions, and memory rows are durably stored before interpretation.',
10
+ },
11
+ {
12
+ id: 'vector_db_layer',
13
+ name: 'Vector DB Layer',
14
+ purpose: 'Semantic retrieval can find related lessons without stuffing every raw memory into context.',
15
+ },
16
+ {
17
+ id: 'structured_facts_layer',
18
+ name: 'Structured Facts Layer',
19
+ purpose: 'Confirmed account, project, policy, and budget facts are typed separately from fuzzy memories.',
20
+ },
21
+ {
22
+ id: 'auto_curation_layer',
23
+ name: 'Auto Curation Layer',
24
+ purpose: 'Duplicate, stale, contradictory, and unscoped memories are consolidated before retrieval quality decays.',
25
+ },
26
+ {
27
+ id: 'context_layer',
28
+ name: 'Context Layer',
29
+ purpose: 'Only relevant scoped memories enter a given tool call, PR, deployment, or support session.',
30
+ },
31
+ {
32
+ id: 'interface_layer',
33
+ name: 'Interface Layer',
34
+ purpose: 'The memory contract is exposed through CLI, MCP, hooks, dashboards, and agent adapters without model lock-in.',
35
+ },
36
+ ]);
5
37
 
6
38
  const FIELD_ALIASES = {
7
39
  entityId: [
@@ -228,6 +260,128 @@ function buildRecommendations({ unscopedRecords, crossScopeDuplicates }) {
228
260
  return recommendations;
229
261
  }
230
262
 
263
+ function hasEmbeddingEvidence(record = {}) {
264
+ return Boolean(
265
+ record.embedding
266
+ || record.vector
267
+ || record.embeddingId
268
+ || record.metadata?.embedding
269
+ || record.metadata?.embeddingId
270
+ || record.metadata?.vectorId
271
+ || record.semanticKey
272
+ || record.metadata?.semanticKey
273
+ );
274
+ }
275
+
276
+ function hasStructuredFactEvidence(record = {}) {
277
+ const type = String(record.type || record.kind || record.memoryType || record.metadata?.type || '').toLowerCase();
278
+ return type === 'fact'
279
+ || type === 'structured_fact'
280
+ || Boolean(record.factKey || record.fact || record.metadata?.factKey || record.metadata?.fact);
281
+ }
282
+
283
+ function hasContextEvidence(record = {}) {
284
+ return Boolean(
285
+ record.contextPackId
286
+ || record.contextPack
287
+ || record.metadata?.contextPackId
288
+ || record.metadata?.contextPack
289
+ || record.retrievalQuery
290
+ || record.metadata?.retrievalQuery
291
+ );
292
+ }
293
+
294
+ function boolCapability(capabilities = {}, ...keys) {
295
+ return keys.some((key) => capabilities[key] === true);
296
+ }
297
+
298
+ function buildMemoryOsLayerReport(records = [], capabilities = {}) {
299
+ const scopeReport = buildMemoryScopeReadinessReport(records);
300
+ const semanticRecords = records.filter(hasEmbeddingEvidence);
301
+ const structuredFactRecords = records.filter(hasStructuredFactEvidence);
302
+ const contextRecords = records.filter(hasContextEvidence);
303
+ const curationReady = scopeReport.unscopedRecords === 0 && scopeReport.crossScopeDuplicates.length === 0;
304
+
305
+ const checks = [
306
+ {
307
+ id: 'file_layer',
308
+ ok: records.length > 0 || boolCapability(capabilities, 'rawStorage', 'fileLayer'),
309
+ evidence: {
310
+ records: records.length,
311
+ durableStore: Boolean(records.length > 0 || capabilities.rawStorage || capabilities.fileLayer),
312
+ },
313
+ recommendation: 'Capture raw feedback, action receipts, and tool outcomes before promoting memories.',
314
+ },
315
+ {
316
+ id: 'vector_db_layer',
317
+ ok: semanticRecords.length > 0 || boolCapability(capabilities, 'semanticSearch', 'vectorDbLayer'),
318
+ evidence: {
319
+ semanticRecords: semanticRecords.length,
320
+ semanticSearch: Boolean(capabilities.semanticSearch || capabilities.vectorDbLayer),
321
+ },
322
+ recommendation: 'Index lessons with semantic keys or embeddings so related failures are retrieved before action.',
323
+ },
324
+ {
325
+ id: 'structured_facts_layer',
326
+ ok: structuredFactRecords.length > 0 || boolCapability(capabilities, 'structuredFacts', 'structuredFactsLayer'),
327
+ evidence: {
328
+ structuredFactRecords: structuredFactRecords.length,
329
+ structuredFacts: Boolean(capabilities.structuredFacts || capabilities.structuredFactsLayer),
330
+ },
331
+ recommendation: 'Store confirmed customer, project, policy, and budget facts as typed records, not just prose.',
332
+ },
333
+ {
334
+ id: 'auto_curation_layer',
335
+ ok: curationReady && boolCapability(capabilities, 'autoCuration', 'dedupe', 'autoCurationLayer'),
336
+ evidence: {
337
+ unscopedRecords: scopeReport.unscopedRecords,
338
+ crossScopeDuplicates: scopeReport.crossScopeDuplicates.length,
339
+ autoCuration: Boolean(capabilities.autoCuration || capabilities.dedupe || capabilities.autoCurationLayer),
340
+ },
341
+ recommendation: 'Run dedupe, contradiction, stale-memory, and scope-isolation checks before memories can become gates.',
342
+ },
343
+ {
344
+ id: 'context_layer',
345
+ ok: contextRecords.length > 0 || boolCapability(capabilities, 'contextPacks', 'contextLayer', 'scopedRetrieval'),
346
+ evidence: {
347
+ contextRecords: contextRecords.length,
348
+ scopedRetrieval: Boolean(capabilities.contextPacks || capabilities.contextLayer || capabilities.scopedRetrieval),
349
+ },
350
+ recommendation: 'Inject scoped context packs per task instead of loading every memory into the model window.',
351
+ },
352
+ {
353
+ id: 'interface_layer',
354
+ ok: boolCapability(capabilities, 'mcp', 'cli', 'hooks', 'dashboard', 'interfaceLayer'),
355
+ evidence: {
356
+ cli: Boolean(capabilities.cli),
357
+ mcp: Boolean(capabilities.mcp),
358
+ hooks: Boolean(capabilities.hooks),
359
+ dashboard: Boolean(capabilities.dashboard),
360
+ },
361
+ recommendation: 'Expose the same memory contract through CLI, MCP, hooks, dashboard, and agent adapters.',
362
+ },
363
+ ].map((check) => {
364
+ const layer = MEMORY_OS_LAYERS.find((candidate) => candidate.id === check.id);
365
+ return {
366
+ ...layer,
367
+ ...check,
368
+ };
369
+ });
370
+
371
+ const missingLayers = checks.filter((check) => !check.ok).map((check) => check.id);
372
+
373
+ return {
374
+ ready: missingLayers.length === 0,
375
+ riskLevel: missingLayers.length === 0 ? 'low' : missingLayers.length <= 2 ? 'medium' : 'high',
376
+ layers: checks,
377
+ missingLayers,
378
+ scopeReport,
379
+ recommendations: checks
380
+ .filter((check) => !check.ok)
381
+ .map((check) => check.recommendation),
382
+ };
383
+ }
384
+
231
385
  function selectRecordsForScope(records = [], requestedScope = {}, options = {}) {
232
386
  const requested = normalizeScope(requestedScope);
233
387
  const requestedKey = memoryScopeKey(requested);
@@ -265,6 +419,7 @@ function buildMemoriStyleBenchmarkRecords() {
265
419
  projectId: 'thumbgate',
266
420
  processId: 'agent-a',
267
421
  sessionId: 'session-1',
422
+ metadata: { semanticKey: 'checkout-readiness', contextPackId: 'checkout-pro' },
268
423
  content: 'Use the paid sprint checklist before changing checkout code.',
269
424
  },
270
425
  {
@@ -298,14 +453,18 @@ function buildMemoriStyleBenchmarkRecords() {
298
453
  processId: 'agent-a',
299
454
  sessionId: 'session-1',
300
455
  visibility: 'shared',
456
+ type: 'fact',
457
+ factKey: 'checkout.mutation_policy',
301
458
  content: 'Shared rule: checkout mutations require audit evidence.',
302
459
  },
303
460
  ];
304
461
  }
305
462
 
306
463
  module.exports = {
464
+ MEMORY_OS_LAYERS,
307
465
  REQUIRED_SCOPE_FIELDS,
308
466
  buildMemoriStyleBenchmarkRecords,
467
+ buildMemoryOsLayerReport,
309
468
  buildMemoryScopeReadinessReport,
310
469
  isSharedMemory,
311
470
  memoryScopeKey,
@@ -0,0 +1,293 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const { getFeedbackPaths } = require('./feedback-loop');
6
+ const { ensureDir } = require('./fs-utils');
7
+ const { loadOptionalModule } = require('./private-core-boundary');
8
+
9
+ const launcher = loadOptionalModule(path.join(__dirname, 'hosted-job-launcher'), () => ({
10
+ launchManagedJob: () => {
11
+ throw new Error('Managed jobs require ThumbGate-Core.');
12
+ },
13
+ resumeHostedJob: () => {
14
+ throw new Error('Resuming hosted jobs requires ThumbGate-Core.');
15
+ },
16
+ }));
17
+
18
+ const runner = loadOptionalModule(path.join(__dirname, 'async-job-runner'), () => ({
19
+ readJobState: () => null,
20
+ listJobStates: () => [],
21
+ }));
22
+
23
+ const { launchManagedJob, resumeHostedJob } = launcher;
24
+ const { readJobState, listJobStates } = runner;
25
+
26
+ const DEFAULT_CONCURRENCY = 3;
27
+ const POLL_INTERVAL_MS = 200;
28
+
29
+ function nowIso() {
30
+ return new Date().toISOString();
31
+ }
32
+
33
+ /**
34
+ * Dynamically decompose a high-level objective into parallel, specialized subtasks.
35
+ * Supports rule-based fallback and can be extended to use LLM planning.
36
+ */
37
+ function planWorkflow(objective) {
38
+ const obj = (objective || '').toLowerCase().trim();
39
+ const subtasks = [];
40
+
41
+ if (obj.includes('security') || obj.includes('audit') || obj.includes('leak') || obj.includes('secret')) {
42
+ subtasks.push({
43
+ name: 'scan_secrets',
44
+ tags: ['security', 'secret-scanner'],
45
+ stages: [
46
+ {
47
+ name: 'secret_scan',
48
+ command: 'node scripts/secret-scanner.js --json || true',
49
+ }
50
+ ]
51
+ });
52
+ subtasks.push({
53
+ name: 'audit_dependencies',
54
+ tags: ['security', 'dependencies'],
55
+ stages: [
56
+ {
57
+ name: 'npm_audit',
58
+ command: 'npm audit --json || true',
59
+ }
60
+ ]
61
+ });
62
+ subtasks.push({
63
+ name: 'check_permissions',
64
+ tags: ['security', 'credentials'],
65
+ stages: [
66
+ {
67
+ name: 'credential_gate_check',
68
+ command: 'node scripts/single-use-credential-gate.js plan || true',
69
+ }
70
+ ]
71
+ });
72
+ } else if (obj.includes('performance') || obj.includes('benchmark') || obj.includes('bench')) {
73
+ subtasks.push({
74
+ name: 'benchmark_candidates',
75
+ tags: ['performance', 'bench'],
76
+ stages: [
77
+ {
78
+ name: 'run_bench',
79
+ command: 'npx thumbgate bench --json --min-score=90 || true',
80
+ }
81
+ ]
82
+ });
83
+ subtasks.push({
84
+ name: 'check_budget',
85
+ tags: ['performance', 'budget'],
86
+ stages: [
87
+ {
88
+ name: 'budget_status',
89
+ command: 'node scripts/budget-guard.js --status || true',
90
+ }
91
+ ]
92
+ });
93
+ } else {
94
+ // Default general-purpose fallback workflow: code search and check integrity
95
+ subtasks.push({
96
+ name: 'code_search',
97
+ tags: ['exploration'],
98
+ stages: [
99
+ {
100
+ name: 'search_fs',
101
+ command: 'node scripts/filesystem-search.js --query="pretool" --limit=5 || true',
102
+ }
103
+ ]
104
+ });
105
+ subtasks.push({
106
+ name: 'check_integrity',
107
+ tags: ['integrity'],
108
+ stages: [
109
+ {
110
+ name: 'ops_integrity',
111
+ command: 'node scripts/operational-integrity.js --ci || true',
112
+ }
113
+ ]
114
+ });
115
+ }
116
+
117
+ return {
118
+ objective,
119
+ plannedAt: nowIso(),
120
+ subtasks: subtasks.map((task, idx) => ({
121
+ ...task,
122
+ id: `subtask_${Date.now()}_${idx}_${Math.random().toString(36).slice(2, 6)}`,
123
+ autoImprove: false,
124
+ verificationMode: 'none',
125
+ recordFeedback: false,
126
+ })),
127
+ };
128
+ }
129
+
130
+ /**
131
+ * Execute a list of planned subtasks in parallel, respecting a concurrency limit.
132
+ * Polls active jobs until all complete, then consolidates the results.
133
+ */
134
+ async function executeWorkflow(objective, options = {}) {
135
+ const plan = planWorkflow(objective);
136
+ const concurrency = Number(options.concurrency) || DEFAULT_CONCURRENCY;
137
+ const timeoutMs = Number(options.timeoutMs) || 60000; // 60s timeout safety
138
+
139
+ const { FEEDBACK_DIR } = getFeedbackPaths();
140
+ const workflowId = `wf_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
141
+ const workflowDir = path.join(FEEDBACK_DIR, 'workflows', workflowId);
142
+ ensureDir(workflowDir);
143
+
144
+ const activeJobs = new Map();
145
+ const queue = [...plan.subtasks];
146
+ const results = [];
147
+ const start = Date.now();
148
+
149
+ const runNext = () => {
150
+ while (activeJobs.size < concurrency && queue.length > 0) {
151
+ const task = queue.shift();
152
+ const launched = launchManagedJob(task, { cwd: options.cwd });
153
+ activeJobs.set(task.id, {
154
+ jobId: launched.jobId,
155
+ taskName: task.name,
156
+ launchedAt: Date.now(),
157
+ });
158
+ }
159
+ };
160
+
161
+ runNext();
162
+
163
+ // Polling loop
164
+ await new Promise((resolve) => {
165
+ const interval = setInterval(() => {
166
+ let allDone = true;
167
+
168
+ for (const [taskId, info] of activeJobs.entries()) {
169
+ const jobState = readJobState(info.jobId);
170
+ if (!jobState) {
171
+ allDone = false;
172
+ continue;
173
+ }
174
+
175
+ const isTerminal = ['completed', 'failed', 'cancelled'].includes(jobState.status);
176
+ if (isTerminal) {
177
+ results.push({
178
+ taskId,
179
+ taskName: info.taskName,
180
+ jobId: info.jobId,
181
+ status: jobState.status,
182
+ context: jobState.currentContext,
183
+ stageHistory: jobState.stageHistory,
184
+ lastError: jobState.lastError,
185
+ });
186
+ activeJobs.delete(taskId);
187
+ runNext();
188
+ } else {
189
+ allDone = false;
190
+ }
191
+ }
192
+
193
+ const elapsed = Date.now() - start;
194
+ if (allDone && queue.length === 0) {
195
+ clearInterval(interval);
196
+ resolve();
197
+ } else if (elapsed >= timeoutMs) {
198
+ clearInterval(interval);
199
+ // Timeout remaining active tasks
200
+ for (const [taskId, info] of activeJobs.entries()) {
201
+ results.push({
202
+ taskId,
203
+ taskName: info.taskName,
204
+ jobId: info.jobId,
205
+ status: 'timeout',
206
+ lastError: { message: `Subtask timed out after ${timeoutMs}ms`, code: 'TIMEOUT' },
207
+ });
208
+ }
209
+ resolve();
210
+ }
211
+ }, POLL_INTERVAL_MS);
212
+ });
213
+
214
+ const durationMs = Date.now() - start;
215
+
216
+ // Compile final markdown report
217
+ const reportPath = path.join(workflowDir, 'report.md');
218
+ const reportContent = compileWorkflowReport(plan, results, durationMs, workflowId);
219
+ fs.writeFileSync(reportPath, reportContent, 'utf8');
220
+
221
+ // Also save the raw execution results JSON
222
+ const resultsJsonPath = path.join(workflowDir, 'results.json');
223
+ fs.writeFileSync(resultsJsonPath, JSON.stringify({
224
+ workflowId,
225
+ objective,
226
+ durationMs,
227
+ plan,
228
+ results,
229
+ }, null, 2) + '\n', 'utf8');
230
+
231
+ return {
232
+ workflowId,
233
+ objective,
234
+ durationMs,
235
+ reportPath,
236
+ results,
237
+ };
238
+ }
239
+
240
+ function compileWorkflowReport(plan, results, durationMs, workflowId) {
241
+ const timestamp = nowIso();
242
+ const totalSubtasks = plan.subtasks.length;
243
+ const completed = results.filter((r) => r.status === 'completed').length;
244
+ const failed = results.filter((r) => r.status === 'failed' || r.status === 'timeout').length;
245
+
246
+ let report = `# Dynamic Workflow Execution Report: ${workflowId}\n\n`;
247
+ report += `**Objective:** ${plan.objective}\n`;
248
+ report += `**Executed At:** ${timestamp}\n`;
249
+ report += `**Duration:** ${(durationMs / 1000).toFixed(2)}s\n`;
250
+ report += `**Status:** ${completed === totalSubtasks ? '✅ SUCCESS' : '⚠️ COMPLETED WITH FAILURES'}\n\n`;
251
+
252
+ report += `## Summary\n`;
253
+ report += `- Total planned subtasks: ${totalSubtasks}\n`;
254
+ report += `- Completed successfully: ${completed}\n`;
255
+ report += `- Failed/Timed out: ${failed}\n\n`;
256
+
257
+ report += `## Subtask Breakdown\n\n`;
258
+
259
+ for (const res of results) {
260
+ const taskPlan = plan.subtasks.find((t) => t.id === res.taskId) || {};
261
+ const commandUsed = taskPlan.stages && taskPlan.stages[0] ? taskPlan.stages[0].command : 'N/A';
262
+
263
+ report += `### ✦ Subtask: \`${res.taskName}\`\n`;
264
+ report += `- **Job ID:** \`${res.jobId}\`\n`;
265
+ report += `- **Status:** ${res.status === 'completed' ? '✅ COMPLETED' : '❌ ' + res.status.toUpperCase()}\n`;
266
+ report += `- **Command Run:** \`${commandUsed}\`\n`;
267
+
268
+ if (res.lastError) {
269
+ report += `- **Error:** \`${res.lastError.message}\` (Code: \`${res.lastError.code}\`)\n`;
270
+ }
271
+
272
+ if (res.context) {
273
+ report += `\n**Output Context Preview:**\n\`\`\`json\n`;
274
+ try {
275
+ // Try parsing output context as JSON for clean formatting
276
+ const parsed = JSON.parse(res.context);
277
+ report += JSON.stringify(parsed, null, 2);
278
+ } catch {
279
+ report += res.context.slice(0, 1000) + (res.context.length > 1000 ? '\n... (truncated)' : '');
280
+ }
281
+ report += `\n\`\`\`\n`;
282
+ }
283
+ report += `\n---\n\n`;
284
+ }
285
+
286
+ return report;
287
+ }
288
+
289
+ module.exports = {
290
+ planWorkflow,
291
+ executeWorkflow,
292
+ compileWorkflowReport,
293
+ };