thumbgate 1.26.7 → 1.27.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/agentic-verify.txt +1 -0
- package/.well-known/llms.txt +2 -0
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +20 -9
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/gcp/dfcx-webhook-gate.js +295 -0
- package/adapters/mcp/server-stdio.js +28 -1
- package/adapters/opencode/opencode.json +1 -1
- package/bench/thumbgate-bench.json +2 -2
- package/bin/cli.js +147 -10
- package/bin/dashboard-cli.js +7 -0
- package/config/gate-classifier-routing.json +98 -0
- package/config/gate-templates.json +60 -0
- package/config/mcp-allowlists.json +8 -7
- package/config/model-candidates.json +71 -6
- package/package.json +26 -10
- package/public/chatgpt-app.html +330 -0
- package/public/codex-plugin.html +66 -14
- package/public/dashboard.html +203 -17
- package/public/index.html +79 -4
- package/public/learn.html +70 -0
- package/public/lessons.html +129 -6
- package/public/numbers.html +2 -2
- package/public/pricing.html +20 -2
- package/scripts/agent-operations-planner.js +621 -0
- package/scripts/agent-reward-model.js +53 -1
- package/scripts/ai-component-inventory.js +367 -0
- package/scripts/classifier-routing.js +130 -0
- package/scripts/cli-schema.js +26 -0
- package/scripts/dashboard-chat.js +64 -17
- package/scripts/feedback-sanitizer.js +105 -0
- package/scripts/gates-engine.js +258 -61
- package/scripts/hybrid-feedback-context.js +141 -7
- package/scripts/memory-scope-readiness.js +159 -0
- package/scripts/parallel-workflow-orchestrator.js +293 -0
- package/scripts/plausible-domain-config.js +86 -0
- package/scripts/plausible-server-events.js +4 -2
- package/scripts/proxy-pointer-rag-guardrails.js +42 -1
- package/scripts/qa-scenario-planner.js +136 -0
- package/scripts/repeat-metric.js +28 -12
- package/scripts/secret-fixture-tokens.js +61 -0
- package/scripts/secret-scanner.js +44 -5
- package/scripts/security-scanner.js +80 -0
- package/scripts/seo-gsd.js +53 -0
- package/scripts/thumbgate-bench.js +16 -1
- package/scripts/tool-registry.js +37 -0
- package/scripts/workflow-sentinel.js +189 -4
- package/src/api/server.js +276 -10
package/scripts/seo-gsd.js
CHANGED
|
@@ -394,6 +394,58 @@ function buildSemanticPseoGuide() {
|
|
|
394
394
|
});
|
|
395
395
|
}
|
|
396
396
|
|
|
397
|
+
const ZERO_TRUST_GUIDE_SPEC = Object.freeze({
|
|
398
|
+
slug: 'ai-coding-agent-zero-trust',
|
|
399
|
+
meta: {
|
|
400
|
+
query: 'zero trust for ai coding agents',
|
|
401
|
+
title: 'Zero Trust for AI Coding Agents | Enforce It at the Tool Call',
|
|
402
|
+
heroTitle: 'Zero Trust for AI Coding Agents, Enforced at the Tool Call',
|
|
403
|
+
heroSummary: 'Zero trust for agents means never trust, always verify; least privilege; assume breach. ThumbGate is the local-first way to enforce those principles for Claude Code, Cursor, and Codex — blocking dangerous tool calls before they run, and turning every thumbs-down into a prevention rule so the same mistake never repeats.',
|
|
404
|
+
},
|
|
405
|
+
takeaways: [
|
|
406
|
+
'Zero trust for agents means verifying every action at the boundary where it executes — the tool call — instead of trusting the model’s stated intent.',
|
|
407
|
+
'ThumbGate runs in the PreToolUse hook on your machine: rm -rf, secret writes, off-scope edits, and bad git push are blocked before execution (assume breach, least privilege).',
|
|
408
|
+
'Unlike static DIY hooks, ThumbGate learns — a thumbs-down becomes an auto-promoted prevention rule that holds across every session, model, and agent.',
|
|
409
|
+
],
|
|
410
|
+
sections: [
|
|
411
|
+
['paragraphs', 'Why AI coding agents need zero trust at the tool call', [
|
|
412
|
+
'A coding agent reads files, runs shell commands, calls APIs, and pushes code with minimal human approval at each step. If it is manipulated, misconfigured, or simply wrong, the blast radius is whatever it can execute — and unlike a human, it does not pause to question a suspicious request.',
|
|
413
|
+
'Zero-trust security for agents adapts three principles to this reality: never trust, always verify; least privilege; and assume breach. The practical place to apply them is the action boundary — the moment before a tool call runs — not the model’s prompt or its good intentions.',
|
|
414
|
+
]],
|
|
415
|
+
['bullets', 'ThumbGate vs. rolling your own Claude Code hooks', [
|
|
416
|
+
'Static hooks and community repos do pattern-matching you write and maintain by hand, per machine, per project. ThumbGate ships the same blocking and adds a learning layer on top.',
|
|
417
|
+
'A thumbs-down on a bad action becomes an auto-promoted prevention rule — the repeat is blocked automatically next time, on every session and every agent, with zero extra config.',
|
|
418
|
+
'Local-first: enforcement runs in the PreToolUse hook on the developer machine, not a server-side gateway, so it works the moment you npx thumbgate init.',
|
|
419
|
+
'Works across Claude Code, Cursor, Codex, Gemini, Amp, Cline, and OpenCode — one rule set, every MCP-compatible agent.',
|
|
420
|
+
]],
|
|
421
|
+
['paragraphs', 'How ThumbGate maps to the zero-trust principles', [
|
|
422
|
+
'Never trust, always verify: every high-risk tool call is checked against prevention rules and workflow shape before it executes. Least privilege: task scope and approval gates keep an agent inside its declared blast radius. Assume breach: dangerous commands are blocked before they touch the disk, so a compromised or confused agent cannot do damage on the way to being caught.',
|
|
423
|
+
'This is enforcement, not observability. ThumbGate decides at the tool call whether the action runs — which is exactly where zero-trust controls have to live for autonomous agents.',
|
|
424
|
+
]],
|
|
425
|
+
],
|
|
426
|
+
faq: [
|
|
427
|
+
[
|
|
428
|
+
'Isn’t this just Claude Code’s built-in hooks?',
|
|
429
|
+
'Native hooks and community repos do static pattern-matching that you author and maintain per machine. ThumbGate adds the learning layer: a thumbs-down becomes a prevention rule that blocks the repeat automatically, across sessions and agents — the part static hooks cannot do.',
|
|
430
|
+
],
|
|
431
|
+
[
|
|
432
|
+
'How does ThumbGate enforce zero trust for AI agents?',
|
|
433
|
+
'It applies the core principles at the tool-call boundary on your machine: never trust, always verify (every risky action is checked before it runs), least privilege (task scope and approval gates), and assume breach (dangerous calls are blocked before they touch disk).',
|
|
434
|
+
],
|
|
435
|
+
],
|
|
436
|
+
relatedPaths: ['/guides/pre-action-checks', '/guides/agent-harness-optimization'],
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
function buildZeroTrustGuide() {
|
|
440
|
+
return preActionGuide(ZERO_TRUST_GUIDE_SPEC.slug, {
|
|
441
|
+
...ZERO_TRUST_GUIDE_SPEC.meta,
|
|
442
|
+
takeaways: ZERO_TRUST_GUIDE_SPEC.takeaways,
|
|
443
|
+
sections: ZERO_TRUST_GUIDE_SPEC.sections.map(([kind, heading, entries]) => buildSectionFromSpec(kind, heading, entries)),
|
|
444
|
+
faq: ZERO_TRUST_GUIDE_SPEC.faq.map(([question, text]) => answer(question, text)),
|
|
445
|
+
relatedPaths: ZERO_TRUST_GUIDE_SPEC.relatedPaths,
|
|
446
|
+
});
|
|
447
|
+
}
|
|
448
|
+
|
|
397
449
|
const PROXY_POINTER_RAG_GUARDRAILS_SPEC = Object.freeze({
|
|
398
450
|
slug: 'proxy-pointer-rag-guardrails',
|
|
399
451
|
meta: {
|
|
@@ -1536,6 +1588,7 @@ const PAGE_BLUEPRINTS = [
|
|
|
1536
1588
|
relatedPaths: ['/compare/speclock', '/guides/claude-code-feedback'],
|
|
1537
1589
|
},
|
|
1538
1590
|
buildSemanticPseoGuide(),
|
|
1591
|
+
buildZeroTrustGuide(),
|
|
1539
1592
|
buildProxyPointerRagGuide(),
|
|
1540
1593
|
buildRagPrecisionTuningGuide(),
|
|
1541
1594
|
buildAiEngineeringStackGuide(),
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
const fs = require('node:fs');
|
|
5
5
|
const os = require('node:os');
|
|
6
6
|
const path = require('node:path');
|
|
7
|
+
const { expandFixturePlaceholders } = require('./secret-fixture-tokens');
|
|
7
8
|
|
|
8
9
|
const ROOT = path.join(__dirname, '..');
|
|
9
10
|
const DEFAULT_SUITE_PATH = path.join(ROOT, 'bench', 'thumbgate-bench.json');
|
|
@@ -180,6 +181,20 @@ function assertObject(value, label) {
|
|
|
180
181
|
}
|
|
181
182
|
}
|
|
182
183
|
|
|
184
|
+
function expandScenarioFixturePlaceholders(value) {
|
|
185
|
+
if (typeof value === 'string') return expandFixturePlaceholders(value);
|
|
186
|
+
if (Array.isArray(value)) return value.map(expandScenarioFixturePlaceholders);
|
|
187
|
+
if (value && typeof value === 'object') {
|
|
188
|
+
return Object.fromEntries(
|
|
189
|
+
Object.entries(value).map(([key, nestedValue]) => [
|
|
190
|
+
key,
|
|
191
|
+
expandScenarioFixturePlaceholders(nestedValue),
|
|
192
|
+
]),
|
|
193
|
+
);
|
|
194
|
+
}
|
|
195
|
+
return value;
|
|
196
|
+
}
|
|
197
|
+
|
|
183
198
|
function loadScenarioSuite(filePath = DEFAULT_SUITE_PATH) {
|
|
184
199
|
const suite = readJson(filePath);
|
|
185
200
|
assertObject(suite, 'Scenario suite');
|
|
@@ -202,7 +217,7 @@ function loadScenarioSuite(filePath = DEFAULT_SUITE_PATH) {
|
|
|
202
217
|
throw new Error(`Scenario ${id} has invalid expectedDecision`);
|
|
203
218
|
}
|
|
204
219
|
return {
|
|
205
|
-
...scenario,
|
|
220
|
+
...expandScenarioFixturePlaceholders(scenario),
|
|
206
221
|
id,
|
|
207
222
|
unsafe: Boolean(scenario.unsafe),
|
|
208
223
|
positivePattern: Boolean(scenario.positivePattern),
|
package/scripts/tool-registry.js
CHANGED
|
@@ -161,6 +161,19 @@ const TOOLS = [
|
|
|
161
161
|
required: ['toolName'],
|
|
162
162
|
},
|
|
163
163
|
}),
|
|
164
|
+
readOnlyTool({
|
|
165
|
+
name: 'ai_component_inventory',
|
|
166
|
+
description: 'Scan a project for AI/ML provider SDKs, agent frameworks, vector databases, Vertex/Gemini/Dialogflow CX usage, and model artifacts. Returns evidence suitable for enterprise AI inventory and ML-BOM review.',
|
|
167
|
+
inputSchema: {
|
|
168
|
+
type: 'object',
|
|
169
|
+
properties: {
|
|
170
|
+
rootDir: { type: 'string', description: 'Project root to scan. Defaults to the current process working directory.' },
|
|
171
|
+
format: { type: 'string', enum: ['summary', 'json', 'cyclonedx'], description: 'Response format. summary is compact text; json returns ThumbGate inventory; cyclonedx returns ML-BOM JSON.' },
|
|
172
|
+
maxFiles: { type: 'number', description: 'Maximum files to scan (default 2500).' },
|
|
173
|
+
includeSnippets: { type: 'boolean', description: 'Include matched source snippets in evidence. Defaults true.' },
|
|
174
|
+
},
|
|
175
|
+
},
|
|
176
|
+
}),
|
|
164
177
|
readOnlyTool({
|
|
165
178
|
name: 'search_thumbgate',
|
|
166
179
|
description: 'Search raw ThumbGate state across feedback logs, ContextFS memory, prevention rules, and imported policy documents.',
|
|
@@ -818,6 +831,17 @@ const TOOLS = [
|
|
|
818
831
|
items: { type: 'string' },
|
|
819
832
|
description: 'Optional protected-file globs that require explicit approval before editing or publishing',
|
|
820
833
|
},
|
|
834
|
+
workflowContract: {
|
|
835
|
+
type: 'object',
|
|
836
|
+
description: 'Optional deterministic workflow run contract. Supports workflowId, allowedBranches, blockedActions, requiredEvidence, and completionGate.',
|
|
837
|
+
properties: {
|
|
838
|
+
workflowId: { type: 'string' },
|
|
839
|
+
allowedBranches: { type: 'array', items: { type: 'string' } },
|
|
840
|
+
blockedActions: { type: 'array', items: { type: 'string' } },
|
|
841
|
+
requiredEvidence: { type: 'array', items: { type: 'string' } },
|
|
842
|
+
completionGate: { type: 'string' },
|
|
843
|
+
},
|
|
844
|
+
},
|
|
821
845
|
repoPath: { type: 'string', description: 'Optional repo root used when evaluating git diff scope' },
|
|
822
846
|
localOnly: { type: 'boolean', description: 'When true, also marks the task as local-only' },
|
|
823
847
|
clear: { type: 'boolean', description: 'Clear the current task scope instead of setting one' },
|
|
@@ -1460,6 +1484,19 @@ const TOOLS = [
|
|
|
1460
1484
|
},
|
|
1461
1485
|
},
|
|
1462
1486
|
}),
|
|
1487
|
+
destructiveTool({
|
|
1488
|
+
name: 'parallel_workflow',
|
|
1489
|
+
description: 'Execute a parallel, multi-step subtask workflow to resolve an objective like a security audit, performance benchmark, or repository inspection.',
|
|
1490
|
+
inputSchema: {
|
|
1491
|
+
type: 'object',
|
|
1492
|
+
required: ['objective'],
|
|
1493
|
+
properties: {
|
|
1494
|
+
objective: { type: 'string', description: 'The objective to plan and execute (e.g. security audit, performance benchmark)' },
|
|
1495
|
+
concurrency: { type: 'number', description: 'Maximum parallel subtasks (default 3)' },
|
|
1496
|
+
timeoutMs: { type: 'number', description: 'Timeout in milliseconds (default 60000)' },
|
|
1497
|
+
},
|
|
1498
|
+
},
|
|
1499
|
+
}),
|
|
1463
1500
|
];
|
|
1464
1501
|
|
|
1465
1502
|
// Normalize at export: guarantee EVERY tool carries a human-readable title and a
|
|
@@ -69,6 +69,9 @@ function loadGovernanceState() {
|
|
|
69
69
|
branchGovernance: raw && raw.branchGovernance && typeof raw.branchGovernance === 'object'
|
|
70
70
|
? raw.branchGovernance
|
|
71
71
|
: null,
|
|
72
|
+
workflowContract: raw && raw.workflowContract && typeof raw.workflowContract === 'object'
|
|
73
|
+
? raw.workflowContract
|
|
74
|
+
: null,
|
|
72
75
|
};
|
|
73
76
|
}
|
|
74
77
|
|
|
@@ -361,6 +364,116 @@ function formatFileList(files, limit = 5) {
|
|
|
361
364
|
return `${items.slice(0, limit).join(', ')} (+${items.length - limit} more)`;
|
|
362
365
|
}
|
|
363
366
|
|
|
367
|
+
function normalizeStringList(values) {
|
|
368
|
+
if (!Array.isArray(values)) return [];
|
|
369
|
+
return [...new Set(values.map((value) => String(value || '').trim()).filter(Boolean))];
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
function normalizeWorkflowContract(contract) {
|
|
373
|
+
if (!contract || typeof contract !== 'object') return null;
|
|
374
|
+
return {
|
|
375
|
+
workflowId: String(contract.workflowId || contract.workflow_id || '').trim() || null,
|
|
376
|
+
allowedBranches: normalizeStringList(contract.allowedBranches || contract.allowed_branches),
|
|
377
|
+
blockedActions: normalizeStringList(contract.blockedActions || contract.blocked_actions),
|
|
378
|
+
requiredEvidence: normalizeStringList(contract.requiredEvidence || contract.required_evidence),
|
|
379
|
+
completionGate: String(contract.completionGate || contract.completion_gate || '').trim() || null,
|
|
380
|
+
};
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
function commandMatchesPattern(command, pattern) {
|
|
384
|
+
const text = String(command || '');
|
|
385
|
+
const raw = String(pattern || '').trim();
|
|
386
|
+
if (!text || !raw) return false;
|
|
387
|
+
try {
|
|
388
|
+
return new RegExp(raw, 'i').test(text);
|
|
389
|
+
} catch {
|
|
390
|
+
return text.toLowerCase().includes(raw.toLowerCase());
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
function isCompletionLikeAction(command) {
|
|
395
|
+
return /\b(?:git\s+(?:commit|push)|gh\s+pr\s+(?:create|merge)|gh\s+release\s+create|npm\s+publish|yarn\s+publish|pnpm\s+publish)\b/i
|
|
396
|
+
.test(String(command || ''));
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function collectEvidenceLabels(toolInput = {}, options = {}) {
|
|
400
|
+
const values = [];
|
|
401
|
+
for (const source of [
|
|
402
|
+
toolInput.evidence,
|
|
403
|
+
toolInput.evidenceLabels,
|
|
404
|
+
toolInput.proof,
|
|
405
|
+
toolInput.proofArtifacts,
|
|
406
|
+
toolInput.requiredEvidenceSatisfied,
|
|
407
|
+
options.evidence,
|
|
408
|
+
options.evidenceLabels,
|
|
409
|
+
options.proofArtifacts,
|
|
410
|
+
]) {
|
|
411
|
+
if (Array.isArray(source)) values.push(...source);
|
|
412
|
+
else if (source && typeof source === 'object') values.push(...Object.keys(source).filter((key) => source[key]));
|
|
413
|
+
else if (typeof source === 'string') values.push(...source.split(/[,;\n]/));
|
|
414
|
+
}
|
|
415
|
+
return normalizeStringList(values).map((value) => value.toLowerCase());
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
function evaluateWorkflowContract(contractInput, context = {}) {
|
|
419
|
+
const contract = normalizeWorkflowContract(contractInput);
|
|
420
|
+
if (!contract) {
|
|
421
|
+
return {
|
|
422
|
+
active: false,
|
|
423
|
+
contract: null,
|
|
424
|
+
violations: [],
|
|
425
|
+
mode: 'allow',
|
|
426
|
+
};
|
|
427
|
+
}
|
|
428
|
+
const violations = [];
|
|
429
|
+
const command = String(context.command || '');
|
|
430
|
+
const currentBranch = String(context.currentBranch || '').trim();
|
|
431
|
+
|
|
432
|
+
const blockedAction = contract.blockedActions.find((pattern) => commandMatchesPattern(command, pattern));
|
|
433
|
+
if (blockedAction) {
|
|
434
|
+
violations.push({
|
|
435
|
+
code: 'blocked_action',
|
|
436
|
+
severity: 'block',
|
|
437
|
+
message: `Workflow contract blocks this action: ${blockedAction}`,
|
|
438
|
+
pattern: blockedAction,
|
|
439
|
+
});
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
if (contract.allowedBranches.length > 0 && currentBranch) {
|
|
443
|
+
const allowed = contract.allowedBranches.some((glob) => matchesAnyGlob(currentBranch, [glob]));
|
|
444
|
+
if (!allowed) {
|
|
445
|
+
violations.push({
|
|
446
|
+
code: 'branch_outside_contract',
|
|
447
|
+
severity: 'warn',
|
|
448
|
+
message: `Current branch ${currentBranch} is outside the workflow contract.`,
|
|
449
|
+
currentBranch,
|
|
450
|
+
allowedBranches: contract.allowedBranches,
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
if (contract.requiredEvidence.length > 0 && isCompletionLikeAction(command)) {
|
|
456
|
+
const evidenceLabels = collectEvidenceLabels(context.toolInput || {}, context.options || {});
|
|
457
|
+
const missing = contract.requiredEvidence.filter((label) => !evidenceLabels.includes(label.toLowerCase()));
|
|
458
|
+
if (missing.length > 0) {
|
|
459
|
+
violations.push({
|
|
460
|
+
code: 'missing_required_evidence',
|
|
461
|
+
severity: 'block',
|
|
462
|
+
message: `Workflow completion is missing required evidence: ${missing.join(', ')}`,
|
|
463
|
+
missingEvidence: missing,
|
|
464
|
+
});
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
const hasBlock = violations.some((violation) => violation.severity === 'block');
|
|
469
|
+
return {
|
|
470
|
+
active: true,
|
|
471
|
+
contract,
|
|
472
|
+
violations,
|
|
473
|
+
mode: hasBlock ? 'block' : violations.length > 0 ? 'warn' : 'allow',
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
|
|
364
477
|
function severityFromScore(score) {
|
|
365
478
|
if (score >= 0.8) return 'critical';
|
|
366
479
|
if (score >= 0.55) return 'high';
|
|
@@ -434,6 +547,7 @@ function scoreRisk({
|
|
|
434
547
|
protectedSurface,
|
|
435
548
|
costControl,
|
|
436
549
|
workflowControl,
|
|
550
|
+
workflowContract,
|
|
437
551
|
actionProfile,
|
|
438
552
|
}) {
|
|
439
553
|
const drivers = [];
|
|
@@ -590,6 +704,17 @@ function scoreRisk({
|
|
|
590
704
|
);
|
|
591
705
|
}
|
|
592
706
|
}
|
|
707
|
+
if (workflowContract && workflowContract.active && workflowContract.violations.length > 0) {
|
|
708
|
+
for (const violation of workflowContract.violations) {
|
|
709
|
+
addDriver(
|
|
710
|
+
drivers,
|
|
711
|
+
`workflow_contract_${violation.code}`,
|
|
712
|
+
violation.severity === 'block' ? 0.38 : 0.18,
|
|
713
|
+
violation.message,
|
|
714
|
+
{ workflowId: workflowContract.contract && workflowContract.contract.workflowId }
|
|
715
|
+
);
|
|
716
|
+
}
|
|
717
|
+
}
|
|
593
718
|
if (memoryGuard && memoryGuard.mode && memoryGuard.mode !== 'allow') {
|
|
594
719
|
addDriver(
|
|
595
720
|
drivers,
|
|
@@ -663,6 +788,7 @@ function buildEvidence({
|
|
|
663
788
|
normalizedAction,
|
|
664
789
|
costControl,
|
|
665
790
|
workflowControl,
|
|
791
|
+
workflowContract,
|
|
666
792
|
actionProfile,
|
|
667
793
|
}) {
|
|
668
794
|
const evidence = [];
|
|
@@ -683,6 +809,15 @@ function buildEvidence({
|
|
|
683
809
|
evidence.push(`Workflow control ${workflowControl.mode}: ${workflowControl.reasons.join(' ')}`);
|
|
684
810
|
}
|
|
685
811
|
}
|
|
812
|
+
if (workflowContract && workflowContract.active) {
|
|
813
|
+
const workflowId = workflowContract.contract && workflowContract.contract.workflowId
|
|
814
|
+
? workflowContract.contract.workflowId
|
|
815
|
+
: 'unnamed';
|
|
816
|
+
evidence.push(`Workflow contract active: ${workflowId}.`);
|
|
817
|
+
for (const violation of workflowContract.violations.slice(0, 3)) {
|
|
818
|
+
evidence.push(`Workflow contract ${violation.severity}: ${violation.message}`);
|
|
819
|
+
}
|
|
820
|
+
}
|
|
686
821
|
if (actionProfile && actionProfile.backgroundAgent) {
|
|
687
822
|
evidence.push('Background or scheduled agent context detected for this action.');
|
|
688
823
|
}
|
|
@@ -801,6 +936,7 @@ function buildRemediations({
|
|
|
801
936
|
executionSurface,
|
|
802
937
|
costControl,
|
|
803
938
|
workflowControl,
|
|
939
|
+
workflowContract,
|
|
804
940
|
actionProfile,
|
|
805
941
|
}) {
|
|
806
942
|
const remediations = [];
|
|
@@ -903,6 +1039,33 @@ function buildRemediations({
|
|
|
903
1039
|
'High token or cost estimates should be reviewed before the model/tool loop continues.'
|
|
904
1040
|
);
|
|
905
1041
|
}
|
|
1042
|
+
if (workflowContract && workflowContract.active && workflowContract.violations.length > 0) {
|
|
1043
|
+
const codes = new Set(workflowContract.violations.map((violation) => violation.code));
|
|
1044
|
+
if (codes.has('missing_required_evidence')) {
|
|
1045
|
+
push(
|
|
1046
|
+
'attach_workflow_evidence',
|
|
1047
|
+
'Attach workflow evidence before completion',
|
|
1048
|
+
'Attach every required evidence label from the workflow contract before commit, push, PR, merge, release, or publish.',
|
|
1049
|
+
'Deterministic workflows should not claim completion until the run contract is proven.'
|
|
1050
|
+
);
|
|
1051
|
+
}
|
|
1052
|
+
if (codes.has('branch_outside_contract')) {
|
|
1053
|
+
push(
|
|
1054
|
+
'switch_to_contract_branch',
|
|
1055
|
+
'Move to an allowed workflow branch',
|
|
1056
|
+
'Switch to a branch allowed by the workflow contract or update the contract before retrying.',
|
|
1057
|
+
'Workflow contracts define where repeatable agent runs are allowed to mutate state.'
|
|
1058
|
+
);
|
|
1059
|
+
}
|
|
1060
|
+
if (codes.has('blocked_action')) {
|
|
1061
|
+
push(
|
|
1062
|
+
'remove_blocked_workflow_action',
|
|
1063
|
+
'Remove blocked workflow action',
|
|
1064
|
+
'Change the workflow step or request an explicit contract update before retrying this action.',
|
|
1065
|
+
'The run contract blocks this action before execution.'
|
|
1066
|
+
);
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
906
1069
|
if (workflowControl && workflowControl.mode && workflowControl.mode !== 'allow') {
|
|
907
1070
|
push(
|
|
908
1071
|
'add_environment_inspection',
|
|
@@ -1068,6 +1231,7 @@ function buildDecisionControl({
|
|
|
1068
1231
|
protectedSurface,
|
|
1069
1232
|
costControl,
|
|
1070
1233
|
workflowControl,
|
|
1234
|
+
workflowContract,
|
|
1071
1235
|
actionProfile,
|
|
1072
1236
|
}) {
|
|
1073
1237
|
const reversibility = classifyReversibility({
|
|
@@ -1082,11 +1246,14 @@ function buildDecisionControl({
|
|
|
1082
1246
|
const hasCostBlock = Boolean(costControl && costControl.mode === 'block');
|
|
1083
1247
|
const hasWorkflowWarning = Boolean(workflowControl && workflowControl.mode === 'warn');
|
|
1084
1248
|
const hasWorkflowBlock = Boolean(workflowControl && workflowControl.mode === 'block');
|
|
1249
|
+
const hasContractWarning = Boolean(workflowContract && workflowContract.mode === 'warn');
|
|
1250
|
+
const hasContractBlock = Boolean(workflowContract && workflowContract.mode === 'block');
|
|
1085
1251
|
const requiresCheckpoint = decision === 'warn'
|
|
1086
|
-
|| (decision === 'allow' && (reversibility !== 'two_way_door' || hasOperationalBlockers || hasCostWarning || hasWorkflowWarning));
|
|
1252
|
+
|| (decision === 'allow' && (reversibility !== 'two_way_door' || hasOperationalBlockers || hasCostWarning || hasWorkflowWarning || hasContractWarning));
|
|
1087
1253
|
const executionMode = decision === 'deny'
|
|
1088
1254
|
|| hasCostBlock
|
|
1089
1255
|
|| hasWorkflowBlock
|
|
1256
|
+
|| hasContractBlock
|
|
1090
1257
|
? 'blocked'
|
|
1091
1258
|
: requiresCheckpoint
|
|
1092
1259
|
? 'checkpoint_required'
|
|
@@ -1110,7 +1277,7 @@ function buildDecisionControl({
|
|
|
1110
1277
|
decisionOwner,
|
|
1111
1278
|
reversibility,
|
|
1112
1279
|
deliberation,
|
|
1113
|
-
requiresHumanApproval: (executionMode === 'checkpoint_required' && decisionOwner !== 'agent') || hasCostBlock || hasWorkflowBlock,
|
|
1280
|
+
requiresHumanApproval: (executionMode === 'checkpoint_required' && decisionOwner !== 'agent') || hasCostBlock || hasWorkflowBlock || hasContractBlock,
|
|
1114
1281
|
recommendedAction: executionMode === 'blocked'
|
|
1115
1282
|
? 'halt'
|
|
1116
1283
|
: executionMode === 'checkpoint_required'
|
|
@@ -1124,7 +1291,7 @@ function buildDecisionControl({
|
|
|
1124
1291
|
};
|
|
1125
1292
|
}
|
|
1126
1293
|
|
|
1127
|
-
function chooseDecision({ riskScore, integrity, memoryGuard, learnedPolicy, blastRadius, command, costControl, workflowControl, actionProfile }) {
|
|
1294
|
+
function chooseDecision({ riskScore, integrity, memoryGuard, learnedPolicy, blastRadius, command, costControl, workflowControl, workflowContract, actionProfile }) {
|
|
1128
1295
|
const hasOperationalBlockers = Boolean(integrity && Array.isArray(integrity.blockers) && integrity.blockers.length > 0);
|
|
1129
1296
|
if (costControl && costControl.mode === 'block') {
|
|
1130
1297
|
return 'deny';
|
|
@@ -1132,6 +1299,9 @@ function chooseDecision({ riskScore, integrity, memoryGuard, learnedPolicy, blas
|
|
|
1132
1299
|
if (workflowControl && workflowControl.mode === 'block') {
|
|
1133
1300
|
return 'deny';
|
|
1134
1301
|
}
|
|
1302
|
+
if (workflowContract && workflowContract.mode === 'block') {
|
|
1303
|
+
return 'deny';
|
|
1304
|
+
}
|
|
1135
1305
|
const destructiveBypass = /\bgit\s+push\b.*(?:--force|-f)\b/i.test(command) || /\bgh\s+pr\s+merge\b.*--admin\b/i.test(command);
|
|
1136
1306
|
const learnedPrediction = learnedPolicy && learnedPolicy.enabled ? learnedPolicy.prediction : null;
|
|
1137
1307
|
const learnedHardStop = Boolean(
|
|
@@ -1189,7 +1359,7 @@ function chooseDecision({ riskScore, integrity, memoryGuard, learnedPolicy, blas
|
|
|
1189
1359
|
if (economicAction || (backgroundAgent && riskScore >= 0.3)) {
|
|
1190
1360
|
return 'warn';
|
|
1191
1361
|
}
|
|
1192
|
-
if ((workflowControl && workflowControl.mode === 'warn') || (costControl && costControl.mode === 'warn') || riskScore >= 0.45 || (learnedWarning && riskScore >= 0.3) || (learnedRecall && riskScore >= 0.34)) {
|
|
1362
|
+
if ((workflowContract && workflowContract.mode === 'warn') || (workflowControl && workflowControl.mode === 'warn') || (costControl && costControl.mode === 'warn') || riskScore >= 0.45 || (learnedWarning && riskScore >= 0.3) || (learnedRecall && riskScore >= 0.34)) {
|
|
1193
1363
|
return 'warn';
|
|
1194
1364
|
}
|
|
1195
1365
|
return 'allow';
|
|
@@ -1243,6 +1413,15 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
1243
1413
|
requireVersionNotBehindBase: options.requireVersionNotBehindBase === true,
|
|
1244
1414
|
branchGovernance: governanceState.branchGovernance,
|
|
1245
1415
|
});
|
|
1416
|
+
const workflowContract = evaluateWorkflowContract(
|
|
1417
|
+
normalizedToolInput.workflowContract || options.workflowContract || governanceState.workflowContract,
|
|
1418
|
+
{
|
|
1419
|
+
command: normalizedToolInput.command || '',
|
|
1420
|
+
currentBranch: integrity.currentBranch,
|
|
1421
|
+
toolInput: normalizedToolInput,
|
|
1422
|
+
options,
|
|
1423
|
+
}
|
|
1424
|
+
);
|
|
1246
1425
|
const taskScopeViolation = buildTaskScopeViolation(governanceState.taskScope, affectedFiles);
|
|
1247
1426
|
const protectedSurface = buildProtectedSurface(governanceState, affectedFiles);
|
|
1248
1427
|
const protectedSurfaceForRisk = isProtectedApprovalRelevant(normalizedToolName, normalizedToolInput)
|
|
@@ -1290,6 +1469,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
1290
1469
|
protectedSurface: protectedSurfaceForRisk,
|
|
1291
1470
|
costControl,
|
|
1292
1471
|
workflowControl,
|
|
1472
|
+
workflowContract,
|
|
1293
1473
|
actionProfile,
|
|
1294
1474
|
});
|
|
1295
1475
|
const executionSurface = buildDockerSandboxPlan({
|
|
@@ -1316,6 +1496,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
1316
1496
|
command: normalizedToolInput.command || '',
|
|
1317
1497
|
costControl,
|
|
1318
1498
|
workflowControl,
|
|
1499
|
+
workflowContract,
|
|
1319
1500
|
actionProfile,
|
|
1320
1501
|
});
|
|
1321
1502
|
const evidence = buildEvidence({
|
|
@@ -1328,6 +1509,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
1328
1509
|
normalizedAction,
|
|
1329
1510
|
costControl,
|
|
1330
1511
|
workflowControl,
|
|
1512
|
+
workflowContract,
|
|
1331
1513
|
actionProfile,
|
|
1332
1514
|
});
|
|
1333
1515
|
const remediations = buildRemediations({
|
|
@@ -1340,6 +1522,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
1340
1522
|
executionSurface,
|
|
1341
1523
|
costControl,
|
|
1342
1524
|
workflowControl,
|
|
1525
|
+
workflowContract,
|
|
1343
1526
|
actionProfile,
|
|
1344
1527
|
});
|
|
1345
1528
|
const summary = decision === 'allow'
|
|
@@ -1353,6 +1536,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
1353
1536
|
normalizedAction,
|
|
1354
1537
|
costControl,
|
|
1355
1538
|
workflowControl,
|
|
1539
|
+
workflowContract,
|
|
1356
1540
|
decision,
|
|
1357
1541
|
riskScore: risk.score,
|
|
1358
1542
|
band: risk.band,
|
|
@@ -1388,6 +1572,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
1388
1572
|
protectedSurface: protectedSurfaceForRisk,
|
|
1389
1573
|
costControl,
|
|
1390
1574
|
workflowControl,
|
|
1575
|
+
workflowContract,
|
|
1391
1576
|
actionProfile,
|
|
1392
1577
|
});
|
|
1393
1578
|
report.reasoning = buildReasoning(report);
|