thumbgate 1.14.1 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +6 -6
- package/.claude-plugin/plugin.json +3 -3
- package/.well-known/llms.txt +5 -5
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +60 -35
- package/adapters/chatgpt/openapi.yaml +118 -2
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +217 -84
- package/adapters/opencode/opencode.json +1 -1
- package/bench/prompt-eval-suite.json +5 -1
- package/bin/cli.js +211 -8
- package/config/enforcement.json +59 -7
- package/config/evals/agent-safety-eval.json +338 -22
- package/config/gates/default.json +33 -0
- package/config/gates/routine.json +43 -0
- package/config/github-about.json +3 -3
- package/config/mcp-allowlists.json +4 -0
- package/config/merge-quality-checks.json +2 -1
- package/config/model-candidates.json +131 -0
- package/openapi/openapi.yaml +118 -2
- package/package.json +70 -51
- package/public/blog.html +7 -7
- package/public/codex-plugin.html +13 -7
- package/public/compare.html +29 -23
- package/public/dashboard.html +105 -12
- package/public/guide.html +28 -28
- package/public/index.html +233 -97
- package/public/learn.html +87 -20
- package/public/lessons.html +26 -2
- package/public/numbers.html +271 -0
- package/public/pro.html +89 -19
- package/scripts/agent-audit-trace.js +55 -0
- package/scripts/agent-memory-lifecycle.js +96 -0
- package/scripts/agent-readiness-plan.js +118 -0
- package/scripts/agentic-data-pipeline.js +21 -1
- package/scripts/agents-sdk-sandbox-plan.js +57 -0
- package/scripts/ai-org-governance.js +98 -0
- package/scripts/ai-search-distribution.js +43 -0
- package/scripts/artifact-agent-plan.js +81 -0
- package/scripts/billing.js +27 -8
- package/scripts/cli-feedback.js +2 -1
- package/scripts/cli-schema.js +60 -5
- package/scripts/code-mode-mcp-plan.js +71 -0
- package/scripts/commercial-offer.js +1 -1
- package/scripts/context-engine.js +1 -2
- package/scripts/context-manager.js +4 -1
- package/scripts/contextfs.js +214 -32
- package/scripts/dashboard-render-spec.js +1 -1
- package/scripts/dashboard.js +275 -9
- package/scripts/decision-journal.js +13 -3
- package/scripts/document-workflow-governance.js +62 -0
- package/scripts/enterprise-agent-rollout.js +34 -0
- package/scripts/experience-replay-governance.js +69 -0
- package/scripts/export-hf-dataset.js +1 -1
- package/scripts/feedback-loop.js +141 -9
- package/scripts/feedback-to-rules.js +17 -23
- package/scripts/gates-engine.js +4 -6
- package/scripts/growth-campaigns.js +49 -0
- package/scripts/harness-selector.js +145 -1
- package/scripts/hybrid-supervisor-agent.js +64 -0
- package/scripts/inference-cache-policy.js +72 -0
- package/scripts/inference-economics.js +53 -0
- package/scripts/internal-agent-bootstrap.js +12 -2
- package/scripts/knowledge-layer-plan.js +108 -0
- package/scripts/lesson-canonical.js +181 -0
- package/scripts/lesson-db.js +71 -10
- package/scripts/lesson-inference.js +183 -44
- package/scripts/lesson-search.js +4 -1
- package/scripts/lesson-synthesis.js +23 -2
- package/scripts/llm-client.js +157 -26
- package/scripts/mailer/resend-mailer.js +112 -1
- package/scripts/mcp-transport-strategy.js +66 -0
- package/scripts/memory-store-governance.js +60 -0
- package/scripts/meta-agent-loop.js +7 -13
- package/scripts/model-access-eligibility.js +38 -0
- package/scripts/model-migration-readiness.js +55 -0
- package/scripts/native-messaging-audit.js +514 -0
- package/scripts/operational-integrity.js +96 -3
- package/scripts/otel-declarative-config.js +56 -0
- package/scripts/perplexity-client.js +1 -1
- package/scripts/post-training-governance.js +34 -0
- package/scripts/pr-manager.js +47 -7
- package/scripts/private-core-boundary.js +72 -0
- package/scripts/production-agent-readiness.js +40 -0
- package/scripts/profile-router.js +16 -1
- package/scripts/prompt-eval.js +564 -32
- package/scripts/prompt-programs.js +93 -0
- package/scripts/provider-action-normalizer.js +585 -0
- package/scripts/rule-validator.js +285 -0
- package/scripts/scaling-law-claims.js +60 -0
- package/scripts/security-scanner.js +1 -1
- package/scripts/self-distill-agent.js +7 -32
- package/scripts/seo-gsd.js +400 -43
- package/scripts/skill-rag-router.js +53 -0
- package/scripts/spec-gate.js +1 -1
- package/scripts/student-consistent-training.js +73 -0
- package/scripts/synthetic-data-provenance.js +98 -0
- package/scripts/task-context-result.js +81 -0
- package/scripts/telemetry-analytics.js +149 -0
- package/scripts/thompson-sampling.js +2 -2
- package/scripts/token-savings.js +7 -6
- package/scripts/token-tco.js +46 -0
- package/scripts/tool-registry.js +75 -3
- package/scripts/verification-loop.js +10 -1
- package/scripts/verifier-scoring.js +71 -0
- package/scripts/workflow-sentinel.js +284 -28
- package/scripts/workspace-agent-routines.js +118 -0
- package/skills/thumbgate/SKILL.md +1 -1
- package/src/api/server.js +434 -120
- package/.claude-plugin/README.md +0 -170
- package/adapters/README.md +0 -12
- package/scripts/analytics-report.js +0 -328
- package/scripts/autonomous-workflow.js +0 -377
- package/scripts/billing-setup.js +0 -109
- package/scripts/creator-campaigns.js +0 -239
- package/scripts/cross-encoder-reranker.js +0 -235
- package/scripts/daemon-manager.js +0 -108
- package/scripts/decision-trace.js +0 -354
- package/scripts/delegation-runtime.js +0 -896
- package/scripts/dispatch-brief.js +0 -159
- package/scripts/distribution-surfaces.js +0 -110
- package/scripts/feedback-history-distiller.js +0 -382
- package/scripts/funnel-analytics.js +0 -35
- package/scripts/history-distiller.js +0 -200
- package/scripts/hosted-job-launcher.js +0 -256
- package/scripts/intent-router.js +0 -392
- package/scripts/lesson-reranker.js +0 -263
- package/scripts/lesson-retrieval.js +0 -148
- package/scripts/managed-lesson-agent.js +0 -183
- package/scripts/operational-dashboard.js +0 -103
- package/scripts/operational-summary.js +0 -129
- package/scripts/operator-artifacts.js +0 -608
- package/scripts/optimize-context.js +0 -17
- package/scripts/org-dashboard.js +0 -206
- package/scripts/partner-orchestration.js +0 -146
- package/scripts/predictive-insights.js +0 -356
- package/scripts/pulse.js +0 -80
- package/scripts/reflector-agent.js +0 -221
- package/scripts/sales-pipeline.js +0 -681
- package/scripts/session-episode-store.js +0 -329
- package/scripts/session-health-sensor.js +0 -242
- package/scripts/session-report.js +0 -120
- package/scripts/swarm-coordinator.js +0 -81
- package/scripts/tool-kpi-tracker.js +0 -12
- package/scripts/webhook-delivery.js +0 -62
- package/scripts/workflow-sprint-intake.js +0 -475
- package/skills/agent-memory/SKILL.md +0 -97
- package/skills/solve-architecture-autonomy/SKILL.md +0 -17
- package/skills/solve-architecture-autonomy/tool.js +0 -33
- package/skills/thumbgate-feedback/SKILL.md +0 -49
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
function buildDocumentWorkflowPlan(options = {}) {
|
|
4
|
+
const provider = options.provider || 'secure_content_layer';
|
|
5
|
+
const workflow = options.workflow || 'document_intake_routing';
|
|
6
|
+
|
|
7
|
+
return {
|
|
8
|
+
workflow,
|
|
9
|
+
provider,
|
|
10
|
+
steps: [
|
|
11
|
+
'discover eligible folders through approved connector scope',
|
|
12
|
+
'extract document metadata inside sandbox',
|
|
13
|
+
'classify document type with structured output',
|
|
14
|
+
'route document to approved destination',
|
|
15
|
+
'write audit event for every read, extraction, decision, and route',
|
|
16
|
+
],
|
|
17
|
+
zeroTrust: {
|
|
18
|
+
leastPrivilegeScopes: true,
|
|
19
|
+
credentialsOutsideSandbox: true,
|
|
20
|
+
noRawDocumentExportByDefault: true,
|
|
21
|
+
perFolderApproval: true,
|
|
22
|
+
},
|
|
23
|
+
requiredEvidence: [
|
|
24
|
+
'connector scope',
|
|
25
|
+
'source document id',
|
|
26
|
+
'classification result',
|
|
27
|
+
'route destination',
|
|
28
|
+
'audit event id',
|
|
29
|
+
'sandbox manifest',
|
|
30
|
+
],
|
|
31
|
+
gates: [
|
|
32
|
+
'block routing when connector scope is missing',
|
|
33
|
+
'block raw content export unless explicitly approved',
|
|
34
|
+
'block completion claims without audit event id',
|
|
35
|
+
'require human review for legal, financial, medical, or HR documents',
|
|
36
|
+
],
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function evaluateDocumentWorkflowRun(run = {}) {
|
|
41
|
+
const issues = [];
|
|
42
|
+
if (!run.connectorScope) issues.push('missing_connector_scope');
|
|
43
|
+
if (!run.sourceDocumentId) issues.push('missing_source_document_id');
|
|
44
|
+
if (!run.classification) issues.push('missing_classification');
|
|
45
|
+
if (!run.routeDestination) issues.push('missing_route_destination');
|
|
46
|
+
if (!run.auditEventId) issues.push('missing_audit_event_id');
|
|
47
|
+
if (!run.sandboxManifest) issues.push('missing_sandbox_manifest');
|
|
48
|
+
if (run.rawExport && !run.rawExportApproved) issues.push('raw_export_requires_approval');
|
|
49
|
+
if (['legal', 'financial', 'medical', 'hr'].includes(String(run.classification).toLowerCase()) && !run.humanReviewed) {
|
|
50
|
+
issues.push('sensitive_document_human_review_required');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
decision: issues.length ? 'deny' : 'allow',
|
|
55
|
+
issues,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
module.exports = {
|
|
60
|
+
buildDocumentWorkflowPlan,
|
|
61
|
+
evaluateDocumentWorkflowRun,
|
|
62
|
+
};
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
function buildEnterpriseAgentRollout(input = {}) {
|
|
5
|
+
const industry = input.industry || 'enterprise software';
|
|
6
|
+
return {
|
|
7
|
+
program: 'ThumbGate Enterprise Agent Acceleration',
|
|
8
|
+
industry,
|
|
9
|
+
operatingModel: {
|
|
10
|
+
forwardDeployedEngineer: true,
|
|
11
|
+
humanInTheLead: true,
|
|
12
|
+
domainExpertsRequired: true,
|
|
13
|
+
sovereignDeploymentOption: true,
|
|
14
|
+
},
|
|
15
|
+
phases: [
|
|
16
|
+
{ id: 'discover', outcome: 'rank workflows by measurable business value and risk' },
|
|
17
|
+
{ id: 'prototype', outcome: 'ship one governed agent with evidence and rollback' },
|
|
18
|
+
{ id: 'scale', outcome: 'publish reusable agent catalog and approval policies' },
|
|
19
|
+
{ id: 'operate', outcome: 'review traces, ROI, incidents, and policy drift weekly' },
|
|
20
|
+
],
|
|
21
|
+
governance: [
|
|
22
|
+
'human oversight for high-stakes recommendations',
|
|
23
|
+
'sovereign data boundary when required',
|
|
24
|
+
'agent catalog with owner and allowed tools',
|
|
25
|
+
'decision journal for every business-critical action',
|
|
26
|
+
'measurable outcome before expansion',
|
|
27
|
+
],
|
|
28
|
+
metrics: ['cycle_time_saved', 'blocked_risky_actions', 'approved_agent_runs', 'business_value_cents', 'incident_rate'],
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
module.exports = {
|
|
33
|
+
buildEnterpriseAgentRollout,
|
|
34
|
+
};
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
function buildExperienceReplayPolicy(options = {}) {
|
|
4
|
+
const maxStalenessHours = Number.isFinite(options.maxStalenessHours) ? options.maxStalenessHours : 24;
|
|
5
|
+
const replayRatio = Number.isFinite(options.replayRatio) ? options.replayRatio : 0.25;
|
|
6
|
+
const minEntropy = Number.isFinite(options.minEntropy) ? options.minEntropy : 0.65;
|
|
7
|
+
|
|
8
|
+
return {
|
|
9
|
+
policyId: 'feedback_experience_replay',
|
|
10
|
+
purpose: 'Reuse high-signal feedback trajectories without letting stale lessons dominate training.',
|
|
11
|
+
buffer: {
|
|
12
|
+
strategy: 'fifo_with_quality_filters',
|
|
13
|
+
maxStalenessHours,
|
|
14
|
+
replayRatio,
|
|
15
|
+
sampleWithoutRemoval: true,
|
|
16
|
+
},
|
|
17
|
+
filters: [
|
|
18
|
+
'redacted',
|
|
19
|
+
'source_feedback_id_present',
|
|
20
|
+
'outcome_evidence_present',
|
|
21
|
+
'not_contradicted_by_newer_lesson',
|
|
22
|
+
'not_low_confidence_or_vague_feedback',
|
|
23
|
+
],
|
|
24
|
+
monitors: {
|
|
25
|
+
maxStalenessHours,
|
|
26
|
+
minEntropy,
|
|
27
|
+
compareAgainstFreshOnly: true,
|
|
28
|
+
metrics: ['gate_precision', 'gate_recall', 'unsupported_claim_rate', 'policy_entropy', 'compute_saved_percent'],
|
|
29
|
+
},
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function evaluateReplayCandidate(candidate = {}, policy = buildExperienceReplayPolicy()) {
|
|
34
|
+
const issues = [];
|
|
35
|
+
if (!candidate.sourceFeedbackId) issues.push('missing_source_feedback_id');
|
|
36
|
+
if (!candidate.redacted) issues.push('redaction_required');
|
|
37
|
+
if (!candidate.outcomeEvidence) issues.push('missing_outcome_evidence');
|
|
38
|
+
if (candidate.contradictedByNewerLesson) issues.push('contradicted_by_newer_lesson');
|
|
39
|
+
if (candidate.vagueFeedback) issues.push('vague_feedback_not_replayable');
|
|
40
|
+
|
|
41
|
+
const ageHours = Number(candidate.ageHours || 0);
|
|
42
|
+
if (ageHours > policy.buffer.maxStalenessHours) issues.push('stale_replay_sample');
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
decision: issues.length ? 'reject' : 'accept',
|
|
46
|
+
issues,
|
|
47
|
+
replayWeight: issues.length ? 0 : Math.min(policy.buffer.replayRatio, Number(candidate.qualityScore || 1)),
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function evaluateReplayRun(run = {}, policy = buildExperienceReplayPolicy()) {
|
|
52
|
+
const issues = [];
|
|
53
|
+
if (Number(run.replayRatio || 0) > 0.5) issues.push('replay_ratio_too_high');
|
|
54
|
+
if (Number(run.policyEntropy || 0) < policy.monitors.minEntropy) issues.push('policy_entropy_too_low');
|
|
55
|
+
if (!run.freshOnlyBaseline) issues.push('missing_fresh_only_baseline');
|
|
56
|
+
if (!run.computeSavedPercent && run.computeSavedPercent !== 0) issues.push('missing_compute_saved_metric');
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
decision: issues.length ? 'warn' : 'allow',
|
|
60
|
+
issues,
|
|
61
|
+
computeEfficient: Number(run.computeSavedPercent || 0) > 0 && Number(run.policyEntropy || 0) >= policy.monitors.minEntropy,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
module.exports = {
|
|
66
|
+
buildExperienceReplayPolicy,
|
|
67
|
+
evaluateReplayCandidate,
|
|
68
|
+
evaluateReplayRun,
|
|
69
|
+
};
|
|
@@ -123,7 +123,7 @@ function buildPreferenceRow(pair, index) {
|
|
|
123
123
|
function buildDatasetInfo({ traceCount, preferenceCount, exportedAt }) {
|
|
124
124
|
return {
|
|
125
125
|
dataset_info: {
|
|
126
|
-
description: 'Agent traces and DPO preference pairs from ThumbGate — pre-action
|
|
126
|
+
description: 'Agent traces and DPO preference pairs from ThumbGate — pre-action checks for AI coding agents. Contains real-world tool call feedback, failure patterns, and learned corrections.',
|
|
127
127
|
citation: '',
|
|
128
128
|
homepage: 'https://github.com/IgorGanapolsky/ThumbGate',
|
|
129
129
|
license: 'MIT',
|
package/scripts/feedback-loop.js
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
const fs = require('fs');
|
|
11
11
|
const path = require('path');
|
|
12
|
+
const { loadOptionalModule } = require('./private-core-boundary');
|
|
12
13
|
const {
|
|
13
14
|
resolveFeedbackAction,
|
|
14
15
|
prepareForStorage,
|
|
@@ -25,7 +26,13 @@ const {
|
|
|
25
26
|
const { recordAction, attributeFeedback } = require('./feedback-attribution');
|
|
26
27
|
const {
|
|
27
28
|
distillFeedbackHistory,
|
|
28
|
-
} =
|
|
29
|
+
} = loadOptionalModule('./feedback-history-distiller', () => ({
|
|
30
|
+
distillFeedbackHistory: () => ({
|
|
31
|
+
inferredFields: {},
|
|
32
|
+
conversationWindow: [],
|
|
33
|
+
source: 'public-shell-fallback',
|
|
34
|
+
}),
|
|
35
|
+
}));
|
|
29
36
|
const {
|
|
30
37
|
extractFilePaths: extractConversationPaths,
|
|
31
38
|
extractErrors: extractConversationErrors,
|
|
@@ -41,6 +48,11 @@ const {
|
|
|
41
48
|
buildFeedbackPathsFromDir,
|
|
42
49
|
getFeedbackPaths: resolveFeedbackPaths,
|
|
43
50
|
} = require('./feedback-paths');
|
|
51
|
+
const {
|
|
52
|
+
reflect,
|
|
53
|
+
} = loadOptionalModule('./reflector-agent', () => ({
|
|
54
|
+
reflect: () => null,
|
|
55
|
+
}));
|
|
44
56
|
|
|
45
57
|
const AUDIT_TRAIL_TAG = 'audit-trail';
|
|
46
58
|
|
|
@@ -106,6 +118,12 @@ const DOMAIN_CATEGORIES = [
|
|
|
106
118
|
const HOME = process.env.HOME || process.env.USERPROFILE || '';
|
|
107
119
|
const pendingBackgroundSideEffects = new Set();
|
|
108
120
|
|
|
121
|
+
function loadReflectorAgentModule() {
|
|
122
|
+
const modulePath = path.resolve(__dirname, 'reflector-agent.js');
|
|
123
|
+
if (!fs.existsSync(modulePath)) return null;
|
|
124
|
+
return require(modulePath);
|
|
125
|
+
}
|
|
126
|
+
|
|
109
127
|
/**
|
|
110
128
|
* Update the statusline cache with latest lesson info after feedback capture.
|
|
111
129
|
* The statusline.sh script reads this cache to display lesson context in Claude Code's status bar.
|
|
@@ -197,7 +215,9 @@ function getSelfAuditModule() {
|
|
|
197
215
|
|
|
198
216
|
function getDelegationRuntimeModule() {
|
|
199
217
|
try {
|
|
200
|
-
|
|
218
|
+
const modulePath = path.resolve(__dirname, 'delegation-runtime.js');
|
|
219
|
+
if (!fs.existsSync(modulePath)) return null;
|
|
220
|
+
return require(modulePath);
|
|
201
221
|
} catch {
|
|
202
222
|
return null;
|
|
203
223
|
}
|
|
@@ -261,6 +281,7 @@ function normalizeAnalysisShape(analysis = {}) {
|
|
|
261
281
|
delegation: analysis.delegation || null,
|
|
262
282
|
boostedRisk: analysis.boostedRisk || null,
|
|
263
283
|
recommendations: Array.isArray(analysis.recommendations) ? analysis.recommendations : [],
|
|
284
|
+
actionableRemediations: Array.isArray(analysis.actionableRemediations) ? analysis.actionableRemediations : [],
|
|
264
285
|
source: analysis.source,
|
|
265
286
|
byDomain: Array.isArray(analysis.byDomain) ? analysis.byDomain : [],
|
|
266
287
|
byImportance: Array.isArray(analysis.byImportance) ? analysis.byImportance : [],
|
|
@@ -564,7 +585,7 @@ function saveSummary(summary) {
|
|
|
564
585
|
|
|
565
586
|
// ============================================================
|
|
566
587
|
// ML Side-Effect Helpers — Sequence Tracking (ML-03) and
|
|
567
|
-
// Diversity Tracking (ML-04). Inline
|
|
588
|
+
// Diversity Tracking (ML-04). Inline feedback-loop implementation.
|
|
568
589
|
// ============================================================
|
|
569
590
|
|
|
570
591
|
function inferDomain(tags, context) {
|
|
@@ -943,7 +964,6 @@ function captureFeedback(params) {
|
|
|
943
964
|
let reflection = null;
|
|
944
965
|
if (signal === 'negative' && Array.isArray(params.conversationWindow) && params.conversationWindow.length >= 2) {
|
|
945
966
|
try {
|
|
946
|
-
const { reflect } = require('./reflector-agent');
|
|
947
967
|
reflection = reflect({
|
|
948
968
|
conversationWindow: params.conversationWindow,
|
|
949
969
|
context: inferredContext,
|
|
@@ -1167,6 +1187,15 @@ function captureFeedback(params) {
|
|
|
1167
1187
|
timestamp: now,
|
|
1168
1188
|
};
|
|
1169
1189
|
|
|
1190
|
+
// Stamp a cross-session canonical hash on every memory record so future
|
|
1191
|
+
// captures can short-circuit dedup without re-canonicalizing legacy entries.
|
|
1192
|
+
// See scripts/lesson-canonical.js for the normalization contract.
|
|
1193
|
+
try {
|
|
1194
|
+
const { canonicalHash } = require('./lesson-canonical');
|
|
1195
|
+
const hash = canonicalHash(memoryRecord);
|
|
1196
|
+
if (hash) memoryRecord.canonicalHash = hash;
|
|
1197
|
+
} catch (_canonErr) { /* canonical hashing is non-blocking */ }
|
|
1198
|
+
|
|
1170
1199
|
// Bayesian Belief Update (Project Bayes)
|
|
1171
1200
|
try {
|
|
1172
1201
|
const { updateBelief, shouldPrune } = require('./belief-update');
|
|
@@ -1210,14 +1239,49 @@ function captureFeedback(params) {
|
|
|
1210
1239
|
const merged = mergeIntoExisting(MEMORY_LOG_PATH, similar.match, memoryRecord, feedbackEvent);
|
|
1211
1240
|
synthesisResult = { action: 'merged', existingId: similar.match.id, similarity: similar.similarity, occurrences: merged.occurrences };
|
|
1212
1241
|
|
|
1213
|
-
// Auto-promote if threshold reached
|
|
1242
|
+
// Auto-promote if threshold reached, but only after the rule
|
|
1243
|
+
// validator (scripts/rule-validator.js) confirms the proposed trigger
|
|
1244
|
+
// matches the seed lesson and has acceptable precision on recent
|
|
1245
|
+
// overlapping-tag events. This plugs the Autogenesis "validate
|
|
1246
|
+
// before integrate" phase that was missing from the original
|
|
1247
|
+
// promotion path — previously every threshold-crossing lesson
|
|
1248
|
+
// shipped a rule regardless of whether it would over-block positives.
|
|
1214
1249
|
if (shouldAutoPromote(merged)) {
|
|
1215
1250
|
const rule = synthesizePreventionRule(merged);
|
|
1216
|
-
|
|
1251
|
+
let validation = null;
|
|
1252
|
+
try {
|
|
1253
|
+
const { validateProposedRule } = require('./rule-validator');
|
|
1254
|
+
// Sample the last 50 memory events across both signals. Using
|
|
1255
|
+
// memory-log rather than feedback-log because memory records
|
|
1256
|
+
// carry the richer title/content fields the validator scores
|
|
1257
|
+
// against, and findSimilarLesson already reads this file.
|
|
1258
|
+
const recentEvents = readJSONL(MEMORY_LOG_PATH).slice(-50);
|
|
1259
|
+
validation = validateProposedRule(rule, {
|
|
1260
|
+
seedLesson: merged,
|
|
1261
|
+
recentEvents,
|
|
1262
|
+
});
|
|
1263
|
+
rule.validation = validation;
|
|
1264
|
+
} catch (_valErr) {
|
|
1265
|
+
// Validator failure must not block the existing pipeline; fall
|
|
1266
|
+
// back to the legacy "promote unconditionally" behavior.
|
|
1267
|
+
validation = { shouldPromote: true, reason: 'validator_error', error: _valErr.message };
|
|
1268
|
+
rule.validation = validation;
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1217
1271
|
synthesisResult.preventionRule = rule;
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1272
|
+
synthesisResult.validation = validation;
|
|
1273
|
+
if (validation.shouldPromote) {
|
|
1274
|
+
synthesisResult.autoPromoted = true;
|
|
1275
|
+
// Store the synthesized rule
|
|
1276
|
+
const rulesPath = path.join(path.dirname(MEMORY_LOG_PATH), 'synthesized-rules.jsonl');
|
|
1277
|
+
appendJSONLLocal(rulesPath, rule);
|
|
1278
|
+
} else {
|
|
1279
|
+
// Park rejected rules in a side log so operators can audit them.
|
|
1280
|
+
synthesisResult.autoPromoted = false;
|
|
1281
|
+
synthesisResult.rejectionReason = validation.reason;
|
|
1282
|
+
const rejectedPath = path.join(path.dirname(MEMORY_LOG_PATH), 'rejected-rules.jsonl');
|
|
1283
|
+
appendJSONLLocal(rejectedPath, rule);
|
|
1284
|
+
}
|
|
1221
1285
|
}
|
|
1222
1286
|
} else {
|
|
1223
1287
|
// No similar lesson — check exact duplicate, then store
|
|
@@ -1469,11 +1533,22 @@ function analyzeFeedback(logPath) {
|
|
|
1469
1533
|
};
|
|
1470
1534
|
|
|
1471
1535
|
const recommendations = [];
|
|
1536
|
+
// Structured counterpart to `recommendations` — machine-actionable shape so
|
|
1537
|
+
// hooks/agents can act on each item without regex-parsing prose strings.
|
|
1538
|
+
// Each entry: { type, target, evidence, action, rationale }.
|
|
1539
|
+
const actionableRemediations = [];
|
|
1472
1540
|
|
|
1473
1541
|
for (const [skill, stat] of Object.entries(skills)) {
|
|
1474
1542
|
const negRate = stat.total > 0 ? stat.negative / stat.total : 0;
|
|
1475
1543
|
if (stat.total >= 3 && negRate >= 0.5) {
|
|
1476
1544
|
recommendations.push(`IMPROVE skill '${skill}' (${stat.negative}/${stat.total} negative)`);
|
|
1545
|
+
actionableRemediations.push({
|
|
1546
|
+
type: 'skill-improve',
|
|
1547
|
+
target: skill,
|
|
1548
|
+
evidence: { positive: stat.positive, negative: stat.negative, total: stat.total, negativeRate: Math.round(negRate * 1000) / 1000 },
|
|
1549
|
+
action: 'review-and-update-skill',
|
|
1550
|
+
rationale: `Skill '${skill}' has ${stat.negative}/${stat.total} negative feedback events (${Math.round(negRate * 100)}% negative rate).`,
|
|
1551
|
+
});
|
|
1477
1552
|
}
|
|
1478
1553
|
}
|
|
1479
1554
|
|
|
@@ -1481,14 +1556,35 @@ function analyzeFeedback(logPath) {
|
|
|
1481
1556
|
const posRate = stat.total > 0 ? stat.positive / stat.total : 0;
|
|
1482
1557
|
if (stat.total >= 3 && posRate >= 0.8) {
|
|
1483
1558
|
recommendations.push(`REUSE pattern '${tag}' (${stat.positive}/${stat.total} positive)`);
|
|
1559
|
+
actionableRemediations.push({
|
|
1560
|
+
type: 'pattern-reuse',
|
|
1561
|
+
target: tag,
|
|
1562
|
+
evidence: { positive: stat.positive, negative: stat.negative, total: stat.total, positiveRate: Math.round(posRate * 1000) / 1000 },
|
|
1563
|
+
action: 'replicate-pattern',
|
|
1564
|
+
rationale: `Pattern '${tag}' has ${stat.positive}/${stat.total} positive feedback events (${Math.round(posRate * 100)}% positive rate).`,
|
|
1565
|
+
});
|
|
1484
1566
|
}
|
|
1485
1567
|
}
|
|
1486
1568
|
|
|
1487
1569
|
if (recent.length >= 10 && recentRate < approvalRate - 0.1) {
|
|
1488
1570
|
recommendations.push('DECLINING trend in last 20 signals; tighten verification before response.');
|
|
1571
|
+
actionableRemediations.push({
|
|
1572
|
+
type: 'trend-declining',
|
|
1573
|
+
target: 'recent-signals',
|
|
1574
|
+
evidence: { recentRate, approvalRate, sampleSize: recent.length },
|
|
1575
|
+
action: 'tighten-verification-before-response',
|
|
1576
|
+
rationale: `Recent approval rate (${Math.round(recentRate * 100)}%) has dropped ≥10pp below lifetime (${Math.round(approvalRate * 100)}%).`,
|
|
1577
|
+
});
|
|
1489
1578
|
}
|
|
1490
1579
|
if (trend === 'degrading') {
|
|
1491
1580
|
recommendations.push(`DEGRADING 7d trend (${rate7d}) vs 30d (${rate30d}); increase prevention rule injection.`);
|
|
1581
|
+
actionableRemediations.push({
|
|
1582
|
+
type: 'trend-degrading',
|
|
1583
|
+
target: '7d-window',
|
|
1584
|
+
evidence: { rate7d, rate30d, delta: Math.round((rate7d - rate30d) * 1000) / 1000 },
|
|
1585
|
+
action: 'increase-prevention-rule-injection',
|
|
1586
|
+
rationale: `7d rate (${rate7d}) is below 30d rate (${rate30d}) by more than threshold.`,
|
|
1587
|
+
});
|
|
1492
1588
|
}
|
|
1493
1589
|
|
|
1494
1590
|
let boostedRisk = null;
|
|
@@ -1499,9 +1595,23 @@ function analyzeFeedback(logPath) {
|
|
|
1499
1595
|
if (boostedRisk) {
|
|
1500
1596
|
boostedRisk.highRiskDomains.slice(0, 2).forEach((bucket) => {
|
|
1501
1597
|
recommendations.push(`CHECK high-risk domain '${bucket.key}' (${bucket.highRisk}/${bucket.total} high-risk)`);
|
|
1598
|
+
actionableRemediations.push({
|
|
1599
|
+
type: 'high-risk-domain',
|
|
1600
|
+
target: bucket.key,
|
|
1601
|
+
evidence: { highRisk: bucket.highRisk, total: bucket.total, riskRate: bucket.riskRate },
|
|
1602
|
+
action: 'audit-domain-failures',
|
|
1603
|
+
rationale: `Domain '${bucket.key}' has ${bucket.highRisk}/${bucket.total} high-risk events (${Math.round((bucket.riskRate || 0) * 100)}% risk rate).`,
|
|
1604
|
+
});
|
|
1502
1605
|
});
|
|
1503
1606
|
boostedRisk.highRiskTags.slice(0, 2).forEach((bucket) => {
|
|
1504
1607
|
recommendations.push(`CHECK high-risk tag '${bucket.key}' (${bucket.highRisk}/${bucket.total} high-risk)`);
|
|
1608
|
+
actionableRemediations.push({
|
|
1609
|
+
type: 'high-risk-tag',
|
|
1610
|
+
target: bucket.key,
|
|
1611
|
+
evidence: { highRisk: bucket.highRisk, total: bucket.total, riskRate: bucket.riskRate },
|
|
1612
|
+
action: 'audit-tag-failures',
|
|
1613
|
+
rationale: `Tag '${bucket.key}' has ${bucket.highRisk}/${bucket.total} high-risk events (${Math.round((bucket.riskRate || 0) * 100)}% risk rate).`,
|
|
1614
|
+
});
|
|
1505
1615
|
});
|
|
1506
1616
|
}
|
|
1507
1617
|
}
|
|
@@ -1516,9 +1626,23 @@ function analyzeFeedback(logPath) {
|
|
|
1516
1626
|
delegation = delegationRuntime.summarizeDelegation(paths.FEEDBACK_DIR);
|
|
1517
1627
|
if (delegation.attemptCount >= 3 && delegation.verificationFailureRate >= 0.5) {
|
|
1518
1628
|
recommendations.push(`REDUCE delegation: verification failure rate is ${Math.round(delegation.verificationFailureRate * 100)}%`);
|
|
1629
|
+
actionableRemediations.push({
|
|
1630
|
+
type: 'delegation-reduce',
|
|
1631
|
+
target: 'verification-failure-rate',
|
|
1632
|
+
evidence: { verificationFailureRate: delegation.verificationFailureRate, attemptCount: delegation.attemptCount },
|
|
1633
|
+
action: 'reduce-delegation-use',
|
|
1634
|
+
rationale: `Delegation verification failure rate is ${Math.round(delegation.verificationFailureRate * 100)}% across ${delegation.attemptCount} attempts.`,
|
|
1635
|
+
});
|
|
1519
1636
|
}
|
|
1520
1637
|
if (delegation.avoidedDelegationCount >= 3) {
|
|
1521
1638
|
recommendations.push(`REVIEW delegation policy: ${delegation.avoidedDelegationCount} handoff starts were blocked before execution`);
|
|
1639
|
+
actionableRemediations.push({
|
|
1640
|
+
type: 'delegation-policy-review',
|
|
1641
|
+
target: 'handoff-blocks',
|
|
1642
|
+
evidence: { avoidedDelegationCount: delegation.avoidedDelegationCount },
|
|
1643
|
+
action: 'review-delegation-policy',
|
|
1644
|
+
rationale: `${delegation.avoidedDelegationCount} handoff starts were blocked before execution.`,
|
|
1645
|
+
});
|
|
1522
1646
|
}
|
|
1523
1647
|
}
|
|
1524
1648
|
} catch {
|
|
@@ -1526,6 +1650,13 @@ function analyzeFeedback(logPath) {
|
|
|
1526
1650
|
}
|
|
1527
1651
|
diagnostics.categories.slice(0, 2).forEach((bucket) => {
|
|
1528
1652
|
recommendations.push(`DIAGNOSE '${bucket.key}' failures (${bucket.count})`);
|
|
1653
|
+
actionableRemediations.push({
|
|
1654
|
+
type: 'diagnose-failure-category',
|
|
1655
|
+
target: bucket.key,
|
|
1656
|
+
evidence: { count: bucket.count },
|
|
1657
|
+
action: 'investigate-failure-category',
|
|
1658
|
+
rationale: `Failure category '${bucket.key}' has ${bucket.count} diagnosed events.`,
|
|
1659
|
+
});
|
|
1529
1660
|
});
|
|
1530
1661
|
|
|
1531
1662
|
return normalizeAnalysisShape({
|
|
@@ -1547,6 +1678,7 @@ function analyzeFeedback(logPath) {
|
|
|
1547
1678
|
delegation,
|
|
1548
1679
|
boostedRisk,
|
|
1549
1680
|
recommendations,
|
|
1681
|
+
actionableRemediations,
|
|
1550
1682
|
});
|
|
1551
1683
|
}
|
|
1552
1684
|
|
|
@@ -253,7 +253,7 @@ Constraints:
|
|
|
253
253
|
- Return ONLY the JSON array — no markdown, no explanation outside the array.`;
|
|
254
254
|
|
|
255
255
|
async function analyzeWithLLM(entries) {
|
|
256
|
-
const { isAvailable,
|
|
256
|
+
const { isAvailable, callClaudeJson, MODELS } = require('./llm-client');
|
|
257
257
|
if (!isAvailable()) return null;
|
|
258
258
|
|
|
259
259
|
const negativeEntries = entries
|
|
@@ -276,34 +276,28 @@ async function analyzeWithLLM(entries) {
|
|
|
276
276
|
return entry;
|
|
277
277
|
}).join('\n\n');
|
|
278
278
|
|
|
279
|
-
const
|
|
279
|
+
const parsed = await callClaudeJson({
|
|
280
280
|
systemPrompt: LLM_RULES_SYSTEM_PROMPT,
|
|
281
281
|
userPrompt: `Analyze these ${negativeEntries.length} negative feedback entries and generate prevention rules:\n\n${batch}`,
|
|
282
282
|
model: MODELS.SMART,
|
|
283
283
|
maxTokens: 2048,
|
|
284
|
+
cache: true,
|
|
284
285
|
});
|
|
285
286
|
|
|
286
|
-
if (!
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
.
|
|
295
|
-
.
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
suggestedRule: r.message,
|
|
301
|
-
reasoning: r.reasoning || '',
|
|
302
|
-
source: 'llm-analysis',
|
|
303
|
-
}));
|
|
304
|
-
} catch {
|
|
305
|
-
return null;
|
|
306
|
-
}
|
|
287
|
+
if (!Array.isArray(parsed)) return null;
|
|
288
|
+
|
|
289
|
+
return parsed
|
|
290
|
+
.filter((r) => r.pattern && r.action && r.message && r.severity)
|
|
291
|
+
.slice(0, 10)
|
|
292
|
+
.map((r) => ({
|
|
293
|
+
pattern: r.pattern,
|
|
294
|
+
count: negativeEntries.length,
|
|
295
|
+
severity: ['critical', 'high', 'medium'].includes(r.severity) ? r.severity : 'medium',
|
|
296
|
+
hasHighRisk: r.severity === 'critical',
|
|
297
|
+
suggestedRule: r.message,
|
|
298
|
+
reasoning: r.reasoning || '',
|
|
299
|
+
source: 'llm-analysis',
|
|
300
|
+
}));
|
|
307
301
|
}
|
|
308
302
|
|
|
309
303
|
if (require.main === module) {
|
package/scripts/gates-engine.js
CHANGED
|
@@ -5,6 +5,7 @@ const fs = require('fs');
|
|
|
5
5
|
const path = require('path');
|
|
6
6
|
const crypto = require('crypto');
|
|
7
7
|
const { execSync, execFileSync } = require('child_process');
|
|
8
|
+
const { loadOptionalModule } = require('./private-core-boundary');
|
|
8
9
|
|
|
9
10
|
const { isProTier, FREE_TIER_MAX_GATES } = require('./rate-limiter');
|
|
10
11
|
const {
|
|
@@ -1976,12 +1977,9 @@ function buildRecentCorrectiveActionsContext(options = {}) {
|
|
|
1976
1977
|
function buildRelevantLessonContext(toolName, toolInput) {
|
|
1977
1978
|
if (!toolName) return null;
|
|
1978
1979
|
|
|
1979
|
-
|
|
1980
|
-
|
|
1981
|
-
|
|
1982
|
-
} catch {
|
|
1983
|
-
return null;
|
|
1984
|
-
}
|
|
1980
|
+
const { retrieveRelevantLessons } = loadOptionalModule('./lesson-retrieval', () => ({
|
|
1981
|
+
retrieveRelevantLessons: () => [],
|
|
1982
|
+
}));
|
|
1985
1983
|
|
|
1986
1984
|
// Extract a searchable action context from the tool input
|
|
1987
1985
|
const actionContext = extractActionContext(toolName, toolInput);
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
function buildCreatorGrowthCampaign(input = {}) {
|
|
5
|
+
const appUrl = input.appUrl || 'https://thumbgate-production.up.railway.app';
|
|
6
|
+
const webinarTitle = input.webinarTitle || 'Stop AI Agents From Repeating Expensive Mistakes';
|
|
7
|
+
const offerCode = input.offerCode || 'AGENTGATES';
|
|
8
|
+
return {
|
|
9
|
+
campaignId: 'creator_webinar_agent_governance',
|
|
10
|
+
channelFit: ['beehiiv', 'linkedin', 'newsletter', 'webinar', 'youtube'],
|
|
11
|
+
audience: 'founders, engineering managers, AI automators, and creator-operators shipping with coding agents',
|
|
12
|
+
webinar: {
|
|
13
|
+
title: webinarTitle,
|
|
14
|
+
promise: 'In 30 minutes, see how a thumbs-down turns into a pre-action gate that blocks the same agent mistake next time.',
|
|
15
|
+
demoFlow: [
|
|
16
|
+
'Show a risky agent action before ThumbGate.',
|
|
17
|
+
'Capture corrective feedback with context.',
|
|
18
|
+
'Regenerate the prevention rule.',
|
|
19
|
+
'Replay the action and show the gate blocking it.',
|
|
20
|
+
'Export the decision journal and proof report.',
|
|
21
|
+
],
|
|
22
|
+
cta: `${appUrl}/#workflow-sprint-intake?utm_source=beehiiv&utm_campaign=creator_webinar_agent_governance&offer=${offerCode}`,
|
|
23
|
+
},
|
|
24
|
+
paywall: {
|
|
25
|
+
freeMeter: 2,
|
|
26
|
+
paidTrial: '$1 for 14 days',
|
|
27
|
+
paidContent: [
|
|
28
|
+
'Routine-ready security audit prompt',
|
|
29
|
+
'CRE prompt review checklist',
|
|
30
|
+
'Data Table Agent schema planner template',
|
|
31
|
+
'Workspace Agent approval-policy checklist',
|
|
32
|
+
],
|
|
33
|
+
},
|
|
34
|
+
posts: [
|
|
35
|
+
{
|
|
36
|
+
platform: 'linkedin',
|
|
37
|
+
text: 'AI agents are becoming scheduled coworkers. The missing layer is enforcement: approvals, evidence, rollback, and memory that blocks repeat mistakes. ThumbGate turns feedback into pre-action gates.',
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
platform: 'newsletter',
|
|
41
|
+
text: 'This week: how to stop prompting and hoping. Treat prompts as runtime programs, require evidence before tool actions, and use ThumbGate to block known-bad agent patterns.',
|
|
42
|
+
},
|
|
43
|
+
],
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
module.exports = {
|
|
48
|
+
buildCreatorGrowthCampaign,
|
|
49
|
+
};
|