thumbgate 1.26.7 → 1.27.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/agentic-verify.txt +1 -0
- package/.well-known/llms.txt +2 -0
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +20 -9
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/gcp/dfcx-webhook-gate.js +295 -0
- package/adapters/mcp/server-stdio.js +28 -1
- package/adapters/opencode/opencode.json +1 -1
- package/bench/thumbgate-bench.json +2 -2
- package/bin/cli.js +147 -10
- package/bin/dashboard-cli.js +7 -0
- package/config/gate-classifier-routing.json +98 -0
- package/config/gate-templates.json +60 -0
- package/config/mcp-allowlists.json +8 -7
- package/config/model-candidates.json +71 -6
- package/package.json +26 -10
- package/public/chatgpt-app.html +330 -0
- package/public/codex-plugin.html +66 -14
- package/public/dashboard.html +203 -17
- package/public/index.html +79 -4
- package/public/learn.html +70 -0
- package/public/lessons.html +129 -6
- package/public/numbers.html +2 -2
- package/public/pricing.html +20 -2
- package/scripts/agent-operations-planner.js +621 -0
- package/scripts/agent-reward-model.js +53 -1
- package/scripts/ai-component-inventory.js +367 -0
- package/scripts/classifier-routing.js +130 -0
- package/scripts/cli-schema.js +26 -0
- package/scripts/dashboard-chat.js +64 -17
- package/scripts/feedback-sanitizer.js +105 -0
- package/scripts/gates-engine.js +258 -61
- package/scripts/hybrid-feedback-context.js +141 -7
- package/scripts/memory-scope-readiness.js +159 -0
- package/scripts/parallel-workflow-orchestrator.js +293 -0
- package/scripts/plausible-domain-config.js +86 -0
- package/scripts/plausible-server-events.js +4 -2
- package/scripts/proxy-pointer-rag-guardrails.js +42 -1
- package/scripts/qa-scenario-planner.js +136 -0
- package/scripts/repeat-metric.js +28 -12
- package/scripts/secret-fixture-tokens.js +61 -0
- package/scripts/secret-scanner.js +44 -5
- package/scripts/security-scanner.js +80 -0
- package/scripts/seo-gsd.js +53 -0
- package/scripts/thumbgate-bench.js +16 -1
- package/scripts/tool-registry.js +37 -0
- package/scripts/workflow-sentinel.js +189 -4
- package/src/api/server.js +276 -10
|
@@ -18,6 +18,11 @@ const fs = require('fs');
|
|
|
18
18
|
const path = require('path');
|
|
19
19
|
const { resolveFeedbackDir } = require('./feedback-paths');
|
|
20
20
|
const { readJsonl } = require('./fs-utils');
|
|
21
|
+
const {
|
|
22
|
+
TRANSPORT_WORDS,
|
|
23
|
+
sanitizeFeedbackText,
|
|
24
|
+
transportWordsOnly,
|
|
25
|
+
} = require('./feedback-sanitizer');
|
|
21
26
|
|
|
22
27
|
// ---------------------------------------------------------------------------
|
|
23
28
|
// Paths
|
|
@@ -51,6 +56,7 @@ const STOPWORDS = new Set([
|
|
|
51
56
|
'has', 'had', 'not', 'but', 'they', 'you', 'can', 'will', 'all', 'any',
|
|
52
57
|
'one', 'its', 'our', 'also', 'more', 'very', 'just', 'into', 'been',
|
|
53
58
|
'bash', 'edit', 'write', 'tool', 'hook', 'clear',
|
|
59
|
+
...TRANSPORT_WORDS,
|
|
54
60
|
]);
|
|
55
61
|
|
|
56
62
|
const NEG = new Set([
|
|
@@ -74,7 +80,7 @@ const HYBRID_JSONL_READ_LIMIT = 400;
|
|
|
74
80
|
*/
|
|
75
81
|
function normalize(text) {
|
|
76
82
|
if (!text || typeof text !== 'string') return '';
|
|
77
|
-
return text
|
|
83
|
+
return sanitizeFeedbackText(text)
|
|
78
84
|
.replace(/\/Users\/[^\s/]+/g, '/Users/redacted')
|
|
79
85
|
.replace(/:\d{4,5}\b/g, ':PORT')
|
|
80
86
|
.toLowerCase()
|
|
@@ -97,7 +103,9 @@ function stripFeedbackPrefix(text) {
|
|
|
97
103
|
* Compose normalize + stripFeedbackPrefix.
|
|
98
104
|
*/
|
|
99
105
|
function normalizePatternText(text) {
|
|
100
|
-
|
|
106
|
+
const normalized = normalize(stripFeedbackPrefix(text));
|
|
107
|
+
if (transportWordsOnly(normalized)) return '';
|
|
108
|
+
return normalized;
|
|
101
109
|
}
|
|
102
110
|
|
|
103
111
|
/**
|
|
@@ -125,6 +133,104 @@ function classify(entry) {
|
|
|
125
133
|
return 'neutral';
|
|
126
134
|
}
|
|
127
135
|
|
|
136
|
+
function isHookPromptEnvelope(context) {
|
|
137
|
+
if (!context || typeof context !== 'string') return false;
|
|
138
|
+
try {
|
|
139
|
+
const parsed = JSON.parse(context);
|
|
140
|
+
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return false;
|
|
141
|
+
return Boolean(
|
|
142
|
+
parsed.prompt &&
|
|
143
|
+
(
|
|
144
|
+
parsed.hookEventName ||
|
|
145
|
+
parsed.hook_event_name ||
|
|
146
|
+
parsed.workspaceRoot ||
|
|
147
|
+
parsed.workspace_root ||
|
|
148
|
+
parsed.session_id ||
|
|
149
|
+
parsed.sessionId ||
|
|
150
|
+
parsed.transcript_path ||
|
|
151
|
+
parsed.transcriptPath
|
|
152
|
+
)
|
|
153
|
+
);
|
|
154
|
+
} catch (_) {
|
|
155
|
+
return false;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function patternContext(entry) {
|
|
160
|
+
const context = entry && entry.context ? String(entry.context) : '';
|
|
161
|
+
if (!context) return '';
|
|
162
|
+
const hasExplicitFeedback = Boolean(
|
|
163
|
+
entry.whatWentWrong ||
|
|
164
|
+
entry.what_went_wrong ||
|
|
165
|
+
entry.whatToChange ||
|
|
166
|
+
entry.what_to_change ||
|
|
167
|
+
entry.failureType ||
|
|
168
|
+
(Array.isArray(entry.tags) && entry.tags.length > 0) ||
|
|
169
|
+
entry.structuredRule
|
|
170
|
+
);
|
|
171
|
+
if (isHookPromptEnvelope(context) && !hasExplicitFeedback) return '';
|
|
172
|
+
if (isHookPromptEnvelope(context) && hasExplicitFeedback) {
|
|
173
|
+
return '';
|
|
174
|
+
}
|
|
175
|
+
return context;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Check if the feedback entry is an automated enforcement log (e.g. from gates engine)
|
|
180
|
+
* rather than real developer/user feedback.
|
|
181
|
+
*/
|
|
182
|
+
function isAutomatedFeedback(entry) {
|
|
183
|
+
const tags = entry.tags || [];
|
|
184
|
+
if (tags.includes('auto-capture') || tags.includes('gates-engine') || tags.includes('audit-trail')) {
|
|
185
|
+
return true;
|
|
186
|
+
}
|
|
187
|
+
const context = String(entry.context || entry.whatWentWrong || '').toLowerCase();
|
|
188
|
+
return context.includes('gate "') || context.includes('blocked tool') || context.includes('warned tool');
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
function isHookPromptEnvelope(context) {
|
|
193
|
+
if (!context || typeof context !== 'string') return false;
|
|
194
|
+
try {
|
|
195
|
+
const parsed = JSON.parse(context);
|
|
196
|
+
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return false;
|
|
197
|
+
return Boolean(
|
|
198
|
+
parsed.prompt &&
|
|
199
|
+
(
|
|
200
|
+
parsed.hookEventName ||
|
|
201
|
+
parsed.hook_event_name ||
|
|
202
|
+
parsed.workspaceRoot ||
|
|
203
|
+
parsed.workspace_root ||
|
|
204
|
+
parsed.session_id ||
|
|
205
|
+
parsed.sessionId ||
|
|
206
|
+
parsed.transcript_path ||
|
|
207
|
+
parsed.transcriptPath
|
|
208
|
+
)
|
|
209
|
+
);
|
|
210
|
+
} catch (_) {
|
|
211
|
+
return false;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function patternContext(entry) {
|
|
216
|
+
const context = entry && entry.context ? String(entry.context) : '';
|
|
217
|
+
if (!context) return '';
|
|
218
|
+
const hasExplicitFeedback = Boolean(
|
|
219
|
+
entry.whatWentWrong ||
|
|
220
|
+
entry.what_went_wrong ||
|
|
221
|
+
entry.whatToChange ||
|
|
222
|
+
entry.what_to_change ||
|
|
223
|
+
entry.failureType ||
|
|
224
|
+
(Array.isArray(entry.tags) && entry.tags.length > 0) ||
|
|
225
|
+
entry.structuredRule
|
|
226
|
+
);
|
|
227
|
+
if (isHookPromptEnvelope(context) && !hasExplicitFeedback) return '';
|
|
228
|
+
if (isHookPromptEnvelope(context) && hasExplicitFeedback) {
|
|
229
|
+
return '';
|
|
230
|
+
}
|
|
231
|
+
return context;
|
|
232
|
+
}
|
|
233
|
+
|
|
128
234
|
/**
|
|
129
235
|
* Extract ms from a timestamp value. Returns 0 on failure.
|
|
130
236
|
*/
|
|
@@ -212,13 +318,15 @@ function buildHybridState(opts) {
|
|
|
212
318
|
if (cls === 'positive') positive++;
|
|
213
319
|
if (cls === 'negative') {
|
|
214
320
|
negative++;
|
|
215
|
-
// Track tool-level negative counts
|
|
216
|
-
|
|
217
|
-
|
|
321
|
+
// Track tool-level negative counts (exclude automated gate logs)
|
|
322
|
+
if (!isAutomatedFeedback(entry)) {
|
|
323
|
+
const toolName = inferToolName(entry.toolName || entry.tool_name || 'unknown', entry.context || '');
|
|
324
|
+
toolNegatives[toolName] = (toolNegatives[toolName] || 0) + 1;
|
|
325
|
+
}
|
|
218
326
|
|
|
219
327
|
// Build pattern from context / whatWentWrong / what_went_wrong
|
|
220
328
|
const rawText = [
|
|
221
|
-
entry
|
|
329
|
+
patternContext(entry),
|
|
222
330
|
entry.whatWentWrong || entry.what_went_wrong || '',
|
|
223
331
|
entry.whatToChange || entry.what_to_change || '',
|
|
224
332
|
entry.failureType || '',
|
|
@@ -254,11 +362,13 @@ function buildHybridState(opts) {
|
|
|
254
362
|
|
|
255
363
|
// Process attributed feedback separately to track attributed tool counts
|
|
256
364
|
for (const entry of attributedEntries) {
|
|
365
|
+
if (classify(entry) !== 'negative') continue; // skip pruned/positive
|
|
366
|
+
if (isAutomatedFeedback(entry)) continue; // skip automated gate blocks
|
|
257
367
|
const toolName = inferToolName(entry.toolName || entry.tool_name || entry.attributed_tool || 'unknown', entry.context || '');
|
|
258
368
|
toolNegativesAttributed[toolName] = (toolNegativesAttributed[toolName] || 0) + 1;
|
|
259
369
|
|
|
260
370
|
const rawText = [
|
|
261
|
-
entry
|
|
371
|
+
patternContext(entry),
|
|
262
372
|
entry.whatWentWrong || entry.what_went_wrong || '',
|
|
263
373
|
...(Array.isArray(entry.tags) ? entry.tags : []),
|
|
264
374
|
...(entry.richContext && Array.isArray(entry.richContext.filePaths) ? entry.richContext.filePaths : []),
|
|
@@ -626,6 +736,29 @@ function evaluatePretool(toolName, toolInput, opts) {
|
|
|
626
736
|
return evaluatePretoolFromState(state, toolName, toolInput);
|
|
627
737
|
}
|
|
628
738
|
|
|
739
|
+
// Claw-style agent support (high-ROI for EnterpriseClaw / OpenShell agents from Automation Anywhere / Nvidia)
|
|
740
|
+
// Extends hybrid context for claw_action_type (file, screen, dynamic-tool, orchestration), agent_identity, hybrid_route.
|
|
741
|
+
// Use in evaluatePretool calls from claw-aware MCP/hooks: pass {clawContext: {actionType: 'dynamic-tool-creation', agentId: '...', route: 'local/cloud'}} in opts.
|
|
742
|
+
function evaluateClawPretool(toolName, toolInput, clawContext, opts) {
|
|
743
|
+
const o = opts || {};
|
|
744
|
+
const claw = clawContext || {};
|
|
745
|
+
// Merge claw metadata into toolInput for gate evaluation (so templates like block-dynamic-tool-creation can match)
|
|
746
|
+
const enrichedInput = {
|
|
747
|
+
...(typeof toolInput === 'object' ? toolInput : { raw: toolInput }),
|
|
748
|
+
_claw: {
|
|
749
|
+
actionType: claw.actionType || 'unknown',
|
|
750
|
+
agentId: claw.agentId || 'unknown',
|
|
751
|
+
hybridRoute: claw.hybridRoute || 'unknown',
|
|
752
|
+
screenInteraction: !!claw.screenInteraction,
|
|
753
|
+
fileAccess: !!claw.fileAccess,
|
|
754
|
+
}
|
|
755
|
+
};
|
|
756
|
+
const result = evaluatePretool(toolName, JSON.stringify(enrichedInput), o);
|
|
757
|
+
// Tag result with claw metadata for logging/feedback
|
|
758
|
+
result.clawContext = claw;
|
|
759
|
+
return result;
|
|
760
|
+
}
|
|
761
|
+
|
|
629
762
|
// ---------------------------------------------------------------------------
|
|
630
763
|
// CLI main()
|
|
631
764
|
// ---------------------------------------------------------------------------
|
|
@@ -674,6 +807,7 @@ function main() {
|
|
|
674
807
|
module.exports = {
|
|
675
808
|
buildHybridState,
|
|
676
809
|
evaluatePretool,
|
|
810
|
+
evaluateClawPretool,
|
|
677
811
|
compileGuardArtifact,
|
|
678
812
|
writeGuardArtifact,
|
|
679
813
|
readGuardArtifact,
|
|
@@ -2,6 +2,38 @@
|
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
const REQUIRED_SCOPE_FIELDS = ['entityId', 'projectId', 'processId', 'sessionId'];
|
|
5
|
+
const MEMORY_OS_LAYERS = Object.freeze([
|
|
6
|
+
{
|
|
7
|
+
id: 'file_layer',
|
|
8
|
+
name: 'File Layer',
|
|
9
|
+
purpose: 'Raw feedback, tool receipts, sessions, and memory rows are durably stored before interpretation.',
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
id: 'vector_db_layer',
|
|
13
|
+
name: 'Vector DB Layer',
|
|
14
|
+
purpose: 'Semantic retrieval can find related lessons without stuffing every raw memory into context.',
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
id: 'structured_facts_layer',
|
|
18
|
+
name: 'Structured Facts Layer',
|
|
19
|
+
purpose: 'Confirmed account, project, policy, and budget facts are typed separately from fuzzy memories.',
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
id: 'auto_curation_layer',
|
|
23
|
+
name: 'Auto Curation Layer',
|
|
24
|
+
purpose: 'Duplicate, stale, contradictory, and unscoped memories are consolidated before retrieval quality decays.',
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
id: 'context_layer',
|
|
28
|
+
name: 'Context Layer',
|
|
29
|
+
purpose: 'Only relevant scoped memories enter a given tool call, PR, deployment, or support session.',
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
id: 'interface_layer',
|
|
33
|
+
name: 'Interface Layer',
|
|
34
|
+
purpose: 'The memory contract is exposed through CLI, MCP, hooks, dashboards, and agent adapters without model lock-in.',
|
|
35
|
+
},
|
|
36
|
+
]);
|
|
5
37
|
|
|
6
38
|
const FIELD_ALIASES = {
|
|
7
39
|
entityId: [
|
|
@@ -228,6 +260,128 @@ function buildRecommendations({ unscopedRecords, crossScopeDuplicates }) {
|
|
|
228
260
|
return recommendations;
|
|
229
261
|
}
|
|
230
262
|
|
|
263
|
+
function hasEmbeddingEvidence(record = {}) {
|
|
264
|
+
return Boolean(
|
|
265
|
+
record.embedding
|
|
266
|
+
|| record.vector
|
|
267
|
+
|| record.embeddingId
|
|
268
|
+
|| record.metadata?.embedding
|
|
269
|
+
|| record.metadata?.embeddingId
|
|
270
|
+
|| record.metadata?.vectorId
|
|
271
|
+
|| record.semanticKey
|
|
272
|
+
|| record.metadata?.semanticKey
|
|
273
|
+
);
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function hasStructuredFactEvidence(record = {}) {
|
|
277
|
+
const type = String(record.type || record.kind || record.memoryType || record.metadata?.type || '').toLowerCase();
|
|
278
|
+
return type === 'fact'
|
|
279
|
+
|| type === 'structured_fact'
|
|
280
|
+
|| Boolean(record.factKey || record.fact || record.metadata?.factKey || record.metadata?.fact);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
function hasContextEvidence(record = {}) {
|
|
284
|
+
return Boolean(
|
|
285
|
+
record.contextPackId
|
|
286
|
+
|| record.contextPack
|
|
287
|
+
|| record.metadata?.contextPackId
|
|
288
|
+
|| record.metadata?.contextPack
|
|
289
|
+
|| record.retrievalQuery
|
|
290
|
+
|| record.metadata?.retrievalQuery
|
|
291
|
+
);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function boolCapability(capabilities = {}, ...keys) {
|
|
295
|
+
return keys.some((key) => capabilities[key] === true);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
function buildMemoryOsLayerReport(records = [], capabilities = {}) {
|
|
299
|
+
const scopeReport = buildMemoryScopeReadinessReport(records);
|
|
300
|
+
const semanticRecords = records.filter(hasEmbeddingEvidence);
|
|
301
|
+
const structuredFactRecords = records.filter(hasStructuredFactEvidence);
|
|
302
|
+
const contextRecords = records.filter(hasContextEvidence);
|
|
303
|
+
const curationReady = scopeReport.unscopedRecords === 0 && scopeReport.crossScopeDuplicates.length === 0;
|
|
304
|
+
|
|
305
|
+
const checks = [
|
|
306
|
+
{
|
|
307
|
+
id: 'file_layer',
|
|
308
|
+
ok: records.length > 0 || boolCapability(capabilities, 'rawStorage', 'fileLayer'),
|
|
309
|
+
evidence: {
|
|
310
|
+
records: records.length,
|
|
311
|
+
durableStore: Boolean(records.length > 0 || capabilities.rawStorage || capabilities.fileLayer),
|
|
312
|
+
},
|
|
313
|
+
recommendation: 'Capture raw feedback, action receipts, and tool outcomes before promoting memories.',
|
|
314
|
+
},
|
|
315
|
+
{
|
|
316
|
+
id: 'vector_db_layer',
|
|
317
|
+
ok: semanticRecords.length > 0 || boolCapability(capabilities, 'semanticSearch', 'vectorDbLayer'),
|
|
318
|
+
evidence: {
|
|
319
|
+
semanticRecords: semanticRecords.length,
|
|
320
|
+
semanticSearch: Boolean(capabilities.semanticSearch || capabilities.vectorDbLayer),
|
|
321
|
+
},
|
|
322
|
+
recommendation: 'Index lessons with semantic keys or embeddings so related failures are retrieved before action.',
|
|
323
|
+
},
|
|
324
|
+
{
|
|
325
|
+
id: 'structured_facts_layer',
|
|
326
|
+
ok: structuredFactRecords.length > 0 || boolCapability(capabilities, 'structuredFacts', 'structuredFactsLayer'),
|
|
327
|
+
evidence: {
|
|
328
|
+
structuredFactRecords: structuredFactRecords.length,
|
|
329
|
+
structuredFacts: Boolean(capabilities.structuredFacts || capabilities.structuredFactsLayer),
|
|
330
|
+
},
|
|
331
|
+
recommendation: 'Store confirmed customer, project, policy, and budget facts as typed records, not just prose.',
|
|
332
|
+
},
|
|
333
|
+
{
|
|
334
|
+
id: 'auto_curation_layer',
|
|
335
|
+
ok: curationReady && boolCapability(capabilities, 'autoCuration', 'dedupe', 'autoCurationLayer'),
|
|
336
|
+
evidence: {
|
|
337
|
+
unscopedRecords: scopeReport.unscopedRecords,
|
|
338
|
+
crossScopeDuplicates: scopeReport.crossScopeDuplicates.length,
|
|
339
|
+
autoCuration: Boolean(capabilities.autoCuration || capabilities.dedupe || capabilities.autoCurationLayer),
|
|
340
|
+
},
|
|
341
|
+
recommendation: 'Run dedupe, contradiction, stale-memory, and scope-isolation checks before memories can become gates.',
|
|
342
|
+
},
|
|
343
|
+
{
|
|
344
|
+
id: 'context_layer',
|
|
345
|
+
ok: contextRecords.length > 0 || boolCapability(capabilities, 'contextPacks', 'contextLayer', 'scopedRetrieval'),
|
|
346
|
+
evidence: {
|
|
347
|
+
contextRecords: contextRecords.length,
|
|
348
|
+
scopedRetrieval: Boolean(capabilities.contextPacks || capabilities.contextLayer || capabilities.scopedRetrieval),
|
|
349
|
+
},
|
|
350
|
+
recommendation: 'Inject scoped context packs per task instead of loading every memory into the model window.',
|
|
351
|
+
},
|
|
352
|
+
{
|
|
353
|
+
id: 'interface_layer',
|
|
354
|
+
ok: boolCapability(capabilities, 'mcp', 'cli', 'hooks', 'dashboard', 'interfaceLayer'),
|
|
355
|
+
evidence: {
|
|
356
|
+
cli: Boolean(capabilities.cli),
|
|
357
|
+
mcp: Boolean(capabilities.mcp),
|
|
358
|
+
hooks: Boolean(capabilities.hooks),
|
|
359
|
+
dashboard: Boolean(capabilities.dashboard),
|
|
360
|
+
},
|
|
361
|
+
recommendation: 'Expose the same memory contract through CLI, MCP, hooks, dashboard, and agent adapters.',
|
|
362
|
+
},
|
|
363
|
+
].map((check) => {
|
|
364
|
+
const layer = MEMORY_OS_LAYERS.find((candidate) => candidate.id === check.id);
|
|
365
|
+
return {
|
|
366
|
+
...layer,
|
|
367
|
+
...check,
|
|
368
|
+
};
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
const missingLayers = checks.filter((check) => !check.ok).map((check) => check.id);
|
|
372
|
+
|
|
373
|
+
return {
|
|
374
|
+
ready: missingLayers.length === 0,
|
|
375
|
+
riskLevel: missingLayers.length === 0 ? 'low' : missingLayers.length <= 2 ? 'medium' : 'high',
|
|
376
|
+
layers: checks,
|
|
377
|
+
missingLayers,
|
|
378
|
+
scopeReport,
|
|
379
|
+
recommendations: checks
|
|
380
|
+
.filter((check) => !check.ok)
|
|
381
|
+
.map((check) => check.recommendation),
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
|
|
231
385
|
function selectRecordsForScope(records = [], requestedScope = {}, options = {}) {
|
|
232
386
|
const requested = normalizeScope(requestedScope);
|
|
233
387
|
const requestedKey = memoryScopeKey(requested);
|
|
@@ -265,6 +419,7 @@ function buildMemoriStyleBenchmarkRecords() {
|
|
|
265
419
|
projectId: 'thumbgate',
|
|
266
420
|
processId: 'agent-a',
|
|
267
421
|
sessionId: 'session-1',
|
|
422
|
+
metadata: { semanticKey: 'checkout-readiness', contextPackId: 'checkout-pro' },
|
|
268
423
|
content: 'Use the paid sprint checklist before changing checkout code.',
|
|
269
424
|
},
|
|
270
425
|
{
|
|
@@ -298,14 +453,18 @@ function buildMemoriStyleBenchmarkRecords() {
|
|
|
298
453
|
processId: 'agent-a',
|
|
299
454
|
sessionId: 'session-1',
|
|
300
455
|
visibility: 'shared',
|
|
456
|
+
type: 'fact',
|
|
457
|
+
factKey: 'checkout.mutation_policy',
|
|
301
458
|
content: 'Shared rule: checkout mutations require audit evidence.',
|
|
302
459
|
},
|
|
303
460
|
];
|
|
304
461
|
}
|
|
305
462
|
|
|
306
463
|
module.exports = {
|
|
464
|
+
MEMORY_OS_LAYERS,
|
|
307
465
|
REQUIRED_SCOPE_FIELDS,
|
|
308
466
|
buildMemoriStyleBenchmarkRecords,
|
|
467
|
+
buildMemoryOsLayerReport,
|
|
309
468
|
buildMemoryScopeReadinessReport,
|
|
310
469
|
isSharedMemory,
|
|
311
470
|
memoryScopeKey,
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
const { getFeedbackPaths } = require('./feedback-loop');
|
|
6
|
+
const { ensureDir } = require('./fs-utils');
|
|
7
|
+
const { loadOptionalModule } = require('./private-core-boundary');
|
|
8
|
+
|
|
9
|
+
const launcher = loadOptionalModule(path.join(__dirname, 'hosted-job-launcher'), () => ({
|
|
10
|
+
launchManagedJob: () => {
|
|
11
|
+
throw new Error('Managed jobs require ThumbGate-Core.');
|
|
12
|
+
},
|
|
13
|
+
resumeHostedJob: () => {
|
|
14
|
+
throw new Error('Resuming hosted jobs requires ThumbGate-Core.');
|
|
15
|
+
},
|
|
16
|
+
}));
|
|
17
|
+
|
|
18
|
+
const runner = loadOptionalModule(path.join(__dirname, 'async-job-runner'), () => ({
|
|
19
|
+
readJobState: () => null,
|
|
20
|
+
listJobStates: () => [],
|
|
21
|
+
}));
|
|
22
|
+
|
|
23
|
+
const { launchManagedJob, resumeHostedJob } = launcher;
|
|
24
|
+
const { readJobState, listJobStates } = runner;
|
|
25
|
+
|
|
26
|
+
const DEFAULT_CONCURRENCY = 3;
|
|
27
|
+
const POLL_INTERVAL_MS = 200;
|
|
28
|
+
|
|
29
|
+
function nowIso() {
|
|
30
|
+
return new Date().toISOString();
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Dynamically decompose a high-level objective into parallel, specialized subtasks.
|
|
35
|
+
* Supports rule-based fallback and can be extended to use LLM planning.
|
|
36
|
+
*/
|
|
37
|
+
function planWorkflow(objective) {
|
|
38
|
+
const obj = (objective || '').toLowerCase().trim();
|
|
39
|
+
const subtasks = [];
|
|
40
|
+
|
|
41
|
+
if (obj.includes('security') || obj.includes('audit') || obj.includes('leak') || obj.includes('secret')) {
|
|
42
|
+
subtasks.push({
|
|
43
|
+
name: 'scan_secrets',
|
|
44
|
+
tags: ['security', 'secret-scanner'],
|
|
45
|
+
stages: [
|
|
46
|
+
{
|
|
47
|
+
name: 'secret_scan',
|
|
48
|
+
command: 'node scripts/secret-scanner.js --json || true',
|
|
49
|
+
}
|
|
50
|
+
]
|
|
51
|
+
});
|
|
52
|
+
subtasks.push({
|
|
53
|
+
name: 'audit_dependencies',
|
|
54
|
+
tags: ['security', 'dependencies'],
|
|
55
|
+
stages: [
|
|
56
|
+
{
|
|
57
|
+
name: 'npm_audit',
|
|
58
|
+
command: 'npm audit --json || true',
|
|
59
|
+
}
|
|
60
|
+
]
|
|
61
|
+
});
|
|
62
|
+
subtasks.push({
|
|
63
|
+
name: 'check_permissions',
|
|
64
|
+
tags: ['security', 'credentials'],
|
|
65
|
+
stages: [
|
|
66
|
+
{
|
|
67
|
+
name: 'credential_gate_check',
|
|
68
|
+
command: 'node scripts/single-use-credential-gate.js plan || true',
|
|
69
|
+
}
|
|
70
|
+
]
|
|
71
|
+
});
|
|
72
|
+
} else if (obj.includes('performance') || obj.includes('benchmark') || obj.includes('bench')) {
|
|
73
|
+
subtasks.push({
|
|
74
|
+
name: 'benchmark_candidates',
|
|
75
|
+
tags: ['performance', 'bench'],
|
|
76
|
+
stages: [
|
|
77
|
+
{
|
|
78
|
+
name: 'run_bench',
|
|
79
|
+
command: 'npx thumbgate bench --json --min-score=90 || true',
|
|
80
|
+
}
|
|
81
|
+
]
|
|
82
|
+
});
|
|
83
|
+
subtasks.push({
|
|
84
|
+
name: 'check_budget',
|
|
85
|
+
tags: ['performance', 'budget'],
|
|
86
|
+
stages: [
|
|
87
|
+
{
|
|
88
|
+
name: 'budget_status',
|
|
89
|
+
command: 'node scripts/budget-guard.js --status || true',
|
|
90
|
+
}
|
|
91
|
+
]
|
|
92
|
+
});
|
|
93
|
+
} else {
|
|
94
|
+
// Default general-purpose fallback workflow: code search and check integrity
|
|
95
|
+
subtasks.push({
|
|
96
|
+
name: 'code_search',
|
|
97
|
+
tags: ['exploration'],
|
|
98
|
+
stages: [
|
|
99
|
+
{
|
|
100
|
+
name: 'search_fs',
|
|
101
|
+
command: 'node scripts/filesystem-search.js --query="pretool" --limit=5 || true',
|
|
102
|
+
}
|
|
103
|
+
]
|
|
104
|
+
});
|
|
105
|
+
subtasks.push({
|
|
106
|
+
name: 'check_integrity',
|
|
107
|
+
tags: ['integrity'],
|
|
108
|
+
stages: [
|
|
109
|
+
{
|
|
110
|
+
name: 'ops_integrity',
|
|
111
|
+
command: 'node scripts/operational-integrity.js --ci || true',
|
|
112
|
+
}
|
|
113
|
+
]
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return {
|
|
118
|
+
objective,
|
|
119
|
+
plannedAt: nowIso(),
|
|
120
|
+
subtasks: subtasks.map((task, idx) => ({
|
|
121
|
+
...task,
|
|
122
|
+
id: `subtask_${Date.now()}_${idx}_${Math.random().toString(36).slice(2, 6)}`,
|
|
123
|
+
autoImprove: false,
|
|
124
|
+
verificationMode: 'none',
|
|
125
|
+
recordFeedback: false,
|
|
126
|
+
})),
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Execute a list of planned subtasks in parallel, respecting a concurrency limit.
|
|
132
|
+
* Polls active jobs until all complete, then consolidates the results.
|
|
133
|
+
*/
|
|
134
|
+
async function executeWorkflow(objective, options = {}) {
|
|
135
|
+
const plan = planWorkflow(objective);
|
|
136
|
+
const concurrency = Number(options.concurrency) || DEFAULT_CONCURRENCY;
|
|
137
|
+
const timeoutMs = Number(options.timeoutMs) || 60000; // 60s timeout safety
|
|
138
|
+
|
|
139
|
+
const { FEEDBACK_DIR } = getFeedbackPaths();
|
|
140
|
+
const workflowId = `wf_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
141
|
+
const workflowDir = path.join(FEEDBACK_DIR, 'workflows', workflowId);
|
|
142
|
+
ensureDir(workflowDir);
|
|
143
|
+
|
|
144
|
+
const activeJobs = new Map();
|
|
145
|
+
const queue = [...plan.subtasks];
|
|
146
|
+
const results = [];
|
|
147
|
+
const start = Date.now();
|
|
148
|
+
|
|
149
|
+
const runNext = () => {
|
|
150
|
+
while (activeJobs.size < concurrency && queue.length > 0) {
|
|
151
|
+
const task = queue.shift();
|
|
152
|
+
const launched = launchManagedJob(task, { cwd: options.cwd });
|
|
153
|
+
activeJobs.set(task.id, {
|
|
154
|
+
jobId: launched.jobId,
|
|
155
|
+
taskName: task.name,
|
|
156
|
+
launchedAt: Date.now(),
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
runNext();
|
|
162
|
+
|
|
163
|
+
// Polling loop
|
|
164
|
+
await new Promise((resolve) => {
|
|
165
|
+
const interval = setInterval(() => {
|
|
166
|
+
let allDone = true;
|
|
167
|
+
|
|
168
|
+
for (const [taskId, info] of activeJobs.entries()) {
|
|
169
|
+
const jobState = readJobState(info.jobId);
|
|
170
|
+
if (!jobState) {
|
|
171
|
+
allDone = false;
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const isTerminal = ['completed', 'failed', 'cancelled'].includes(jobState.status);
|
|
176
|
+
if (isTerminal) {
|
|
177
|
+
results.push({
|
|
178
|
+
taskId,
|
|
179
|
+
taskName: info.taskName,
|
|
180
|
+
jobId: info.jobId,
|
|
181
|
+
status: jobState.status,
|
|
182
|
+
context: jobState.currentContext,
|
|
183
|
+
stageHistory: jobState.stageHistory,
|
|
184
|
+
lastError: jobState.lastError,
|
|
185
|
+
});
|
|
186
|
+
activeJobs.delete(taskId);
|
|
187
|
+
runNext();
|
|
188
|
+
} else {
|
|
189
|
+
allDone = false;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const elapsed = Date.now() - start;
|
|
194
|
+
if (allDone && queue.length === 0) {
|
|
195
|
+
clearInterval(interval);
|
|
196
|
+
resolve();
|
|
197
|
+
} else if (elapsed >= timeoutMs) {
|
|
198
|
+
clearInterval(interval);
|
|
199
|
+
// Timeout remaining active tasks
|
|
200
|
+
for (const [taskId, info] of activeJobs.entries()) {
|
|
201
|
+
results.push({
|
|
202
|
+
taskId,
|
|
203
|
+
taskName: info.taskName,
|
|
204
|
+
jobId: info.jobId,
|
|
205
|
+
status: 'timeout',
|
|
206
|
+
lastError: { message: `Subtask timed out after ${timeoutMs}ms`, code: 'TIMEOUT' },
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
resolve();
|
|
210
|
+
}
|
|
211
|
+
}, POLL_INTERVAL_MS);
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
const durationMs = Date.now() - start;
|
|
215
|
+
|
|
216
|
+
// Compile final markdown report
|
|
217
|
+
const reportPath = path.join(workflowDir, 'report.md');
|
|
218
|
+
const reportContent = compileWorkflowReport(plan, results, durationMs, workflowId);
|
|
219
|
+
fs.writeFileSync(reportPath, reportContent, 'utf8');
|
|
220
|
+
|
|
221
|
+
// Also save the raw execution results JSON
|
|
222
|
+
const resultsJsonPath = path.join(workflowDir, 'results.json');
|
|
223
|
+
fs.writeFileSync(resultsJsonPath, JSON.stringify({
|
|
224
|
+
workflowId,
|
|
225
|
+
objective,
|
|
226
|
+
durationMs,
|
|
227
|
+
plan,
|
|
228
|
+
results,
|
|
229
|
+
}, null, 2) + '\n', 'utf8');
|
|
230
|
+
|
|
231
|
+
return {
|
|
232
|
+
workflowId,
|
|
233
|
+
objective,
|
|
234
|
+
durationMs,
|
|
235
|
+
reportPath,
|
|
236
|
+
results,
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function compileWorkflowReport(plan, results, durationMs, workflowId) {
|
|
241
|
+
const timestamp = nowIso();
|
|
242
|
+
const totalSubtasks = plan.subtasks.length;
|
|
243
|
+
const completed = results.filter((r) => r.status === 'completed').length;
|
|
244
|
+
const failed = results.filter((r) => r.status === 'failed' || r.status === 'timeout').length;
|
|
245
|
+
|
|
246
|
+
let report = `# Dynamic Workflow Execution Report: ${workflowId}\n\n`;
|
|
247
|
+
report += `**Objective:** ${plan.objective}\n`;
|
|
248
|
+
report += `**Executed At:** ${timestamp}\n`;
|
|
249
|
+
report += `**Duration:** ${(durationMs / 1000).toFixed(2)}s\n`;
|
|
250
|
+
report += `**Status:** ${completed === totalSubtasks ? '✅ SUCCESS' : '⚠️ COMPLETED WITH FAILURES'}\n\n`;
|
|
251
|
+
|
|
252
|
+
report += `## Summary\n`;
|
|
253
|
+
report += `- Total planned subtasks: ${totalSubtasks}\n`;
|
|
254
|
+
report += `- Completed successfully: ${completed}\n`;
|
|
255
|
+
report += `- Failed/Timed out: ${failed}\n\n`;
|
|
256
|
+
|
|
257
|
+
report += `## Subtask Breakdown\n\n`;
|
|
258
|
+
|
|
259
|
+
for (const res of results) {
|
|
260
|
+
const taskPlan = plan.subtasks.find((t) => t.id === res.taskId) || {};
|
|
261
|
+
const commandUsed = taskPlan.stages && taskPlan.stages[0] ? taskPlan.stages[0].command : 'N/A';
|
|
262
|
+
|
|
263
|
+
report += `### ✦ Subtask: \`${res.taskName}\`\n`;
|
|
264
|
+
report += `- **Job ID:** \`${res.jobId}\`\n`;
|
|
265
|
+
report += `- **Status:** ${res.status === 'completed' ? '✅ COMPLETED' : '❌ ' + res.status.toUpperCase()}\n`;
|
|
266
|
+
report += `- **Command Run:** \`${commandUsed}\`\n`;
|
|
267
|
+
|
|
268
|
+
if (res.lastError) {
|
|
269
|
+
report += `- **Error:** \`${res.lastError.message}\` (Code: \`${res.lastError.code}\`)\n`;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if (res.context) {
|
|
273
|
+
report += `\n**Output Context Preview:**\n\`\`\`json\n`;
|
|
274
|
+
try {
|
|
275
|
+
// Try parsing output context as JSON for clean formatting
|
|
276
|
+
const parsed = JSON.parse(res.context);
|
|
277
|
+
report += JSON.stringify(parsed, null, 2);
|
|
278
|
+
} catch {
|
|
279
|
+
report += res.context.slice(0, 1000) + (res.context.length > 1000 ? '\n... (truncated)' : '');
|
|
280
|
+
}
|
|
281
|
+
report += `\n\`\`\`\n`;
|
|
282
|
+
}
|
|
283
|
+
report += `\n---\n\n`;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return report;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
module.exports = {
|
|
290
|
+
planWorkflow,
|
|
291
|
+
executeWorkflow,
|
|
292
|
+
compileWorkflowReport,
|
|
293
|
+
};
|