thumbgate 1.15.0 → 1.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +6 -6
- package/.claude-plugin/plugin.json +3 -3
- package/.well-known/llms.txt +5 -5
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +59 -35
- package/adapters/chatgpt/openapi.yaml +118 -2
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +210 -84
- package/adapters/opencode/opencode.json +1 -1
- package/bench/prompt-eval-suite.json +5 -1
- package/bin/cli.js +157 -8
- package/config/evals/agent-safety-eval.json +338 -22
- package/config/gates/routine.json +43 -0
- package/config/github-about.json +3 -3
- package/config/model-candidates.json +131 -0
- package/openapi/openapi.yaml +118 -2
- package/package.json +57 -49
- package/public/blog.html +7 -7
- package/public/codex-plugin.html +6 -6
- package/public/compare.html +29 -23
- package/public/dashboard.html +82 -10
- package/public/guide.html +28 -28
- package/public/index.html +216 -98
- package/public/learn.html +50 -22
- package/public/lessons.html +1 -1
- package/public/numbers.html +17 -17
- package/public/pro.html +82 -18
- package/scripts/agent-audit-trace.js +55 -0
- package/scripts/agent-memory-lifecycle.js +96 -0
- package/scripts/agent-readiness-plan.js +118 -0
- package/scripts/agentic-data-pipeline.js +21 -1
- package/scripts/agents-sdk-sandbox-plan.js +57 -0
- package/scripts/ai-org-governance.js +98 -0
- package/scripts/ai-search-distribution.js +43 -0
- package/scripts/artifact-agent-plan.js +81 -0
- package/scripts/billing.js +27 -8
- package/scripts/cli-schema.js +18 -2
- package/scripts/code-mode-mcp-plan.js +71 -0
- package/scripts/context-engine.js +1 -2
- package/scripts/context-manager.js +4 -1
- package/scripts/dashboard-render-spec.js +1 -1
- package/scripts/dashboard.js +275 -9
- package/scripts/decision-journal.js +13 -3
- package/scripts/document-workflow-governance.js +62 -0
- package/scripts/enterprise-agent-rollout.js +34 -0
- package/scripts/experience-replay-governance.js +69 -0
- package/scripts/export-hf-dataset.js +1 -1
- package/scripts/feedback-loop.js +92 -4
- package/scripts/feedback-to-rules.js +17 -23
- package/scripts/gates-engine.js +4 -6
- package/scripts/growth-campaigns.js +49 -0
- package/scripts/harness-selector.js +16 -4
- package/scripts/hybrid-supervisor-agent.js +64 -0
- package/scripts/inference-cache-policy.js +72 -0
- package/scripts/inference-economics.js +53 -0
- package/scripts/internal-agent-bootstrap.js +12 -2
- package/scripts/knowledge-layer-plan.js +108 -0
- package/scripts/lesson-inference.js +183 -44
- package/scripts/lesson-search.js +4 -1
- package/scripts/llm-client.js +157 -26
- package/scripts/mailer/resend-mailer.js +112 -1
- package/scripts/mcp-transport-strategy.js +66 -0
- package/scripts/memory-store-governance.js +60 -0
- package/scripts/meta-agent-loop.js +7 -13
- package/scripts/model-access-eligibility.js +38 -0
- package/scripts/model-migration-readiness.js +55 -0
- package/scripts/operational-integrity.js +96 -3
- package/scripts/otel-declarative-config.js +56 -0
- package/scripts/perplexity-client.js +1 -1
- package/scripts/post-training-governance.js +34 -0
- package/scripts/private-core-boundary.js +72 -0
- package/scripts/production-agent-readiness.js +40 -0
- package/scripts/prompt-eval.js +564 -32
- package/scripts/prompt-programs.js +93 -0
- package/scripts/provider-action-normalizer.js +585 -0
- package/scripts/scaling-law-claims.js +60 -0
- package/scripts/security-scanner.js +1 -1
- package/scripts/self-distill-agent.js +7 -32
- package/scripts/seo-gsd.js +232 -55
- package/scripts/skill-rag-router.js +53 -0
- package/scripts/spec-gate.js +1 -1
- package/scripts/student-consistent-training.js +73 -0
- package/scripts/synthetic-data-provenance.js +98 -0
- package/scripts/task-context-result.js +81 -0
- package/scripts/telemetry-analytics.js +149 -0
- package/scripts/thompson-sampling.js +2 -2
- package/scripts/token-savings.js +7 -6
- package/scripts/token-tco.js +46 -0
- package/scripts/tool-registry.js +63 -3
- package/scripts/verification-loop.js +10 -1
- package/scripts/verifier-scoring.js +71 -0
- package/scripts/workflow-sentinel.js +284 -28
- package/scripts/workspace-agent-routines.js +118 -0
- package/src/api/server.js +381 -120
- package/scripts/analytics-report.js +0 -328
- package/scripts/autonomous-workflow.js +0 -377
- package/scripts/billing-setup.js +0 -109
- package/scripts/creator-campaigns.js +0 -239
- package/scripts/cross-encoder-reranker.js +0 -235
- package/scripts/daemon-manager.js +0 -108
- package/scripts/decision-trace.js +0 -354
- package/scripts/delegation-runtime.js +0 -896
- package/scripts/dispatch-brief.js +0 -159
- package/scripts/distribution-surfaces.js +0 -110
- package/scripts/feedback-history-distiller.js +0 -382
- package/scripts/funnel-analytics.js +0 -35
- package/scripts/history-distiller.js +0 -200
- package/scripts/hosted-job-launcher.js +0 -256
- package/scripts/intent-router.js +0 -392
- package/scripts/lesson-reranker.js +0 -263
- package/scripts/lesson-retrieval.js +0 -148
- package/scripts/managed-lesson-agent.js +0 -183
- package/scripts/operational-dashboard.js +0 -103
- package/scripts/operational-summary.js +0 -129
- package/scripts/operator-artifacts.js +0 -608
- package/scripts/optimize-context.js +0 -17
- package/scripts/org-dashboard.js +0 -206
- package/scripts/partner-orchestration.js +0 -146
- package/scripts/predictive-insights.js +0 -356
- package/scripts/pulse.js +0 -80
- package/scripts/reflector-agent.js +0 -221
- package/scripts/sales-pipeline.js +0 -681
- package/scripts/session-episode-store.js +0 -329
- package/scripts/session-health-sensor.js +0 -242
- package/scripts/session-report.js +0 -120
- package/scripts/swarm-coordinator.js +0 -81
- package/scripts/tool-kpi-tracker.js +0 -12
- package/scripts/webhook-delivery.js +0 -62
- package/scripts/workflow-sprint-intake.js +0 -475
package/scripts/feedback-loop.js
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
const fs = require('fs');
|
|
11
11
|
const path = require('path');
|
|
12
|
+
const { loadOptionalModule } = require('./private-core-boundary');
|
|
12
13
|
const {
|
|
13
14
|
resolveFeedbackAction,
|
|
14
15
|
prepareForStorage,
|
|
@@ -25,7 +26,13 @@ const {
|
|
|
25
26
|
const { recordAction, attributeFeedback } = require('./feedback-attribution');
|
|
26
27
|
const {
|
|
27
28
|
distillFeedbackHistory,
|
|
28
|
-
} =
|
|
29
|
+
} = loadOptionalModule('./feedback-history-distiller', () => ({
|
|
30
|
+
distillFeedbackHistory: () => ({
|
|
31
|
+
inferredFields: {},
|
|
32
|
+
conversationWindow: [],
|
|
33
|
+
source: 'public-shell-fallback',
|
|
34
|
+
}),
|
|
35
|
+
}));
|
|
29
36
|
const {
|
|
30
37
|
extractFilePaths: extractConversationPaths,
|
|
31
38
|
extractErrors: extractConversationErrors,
|
|
@@ -41,6 +48,11 @@ const {
|
|
|
41
48
|
buildFeedbackPathsFromDir,
|
|
42
49
|
getFeedbackPaths: resolveFeedbackPaths,
|
|
43
50
|
} = require('./feedback-paths');
|
|
51
|
+
const {
|
|
52
|
+
reflect,
|
|
53
|
+
} = loadOptionalModule('./reflector-agent', () => ({
|
|
54
|
+
reflect: () => null,
|
|
55
|
+
}));
|
|
44
56
|
|
|
45
57
|
const AUDIT_TRAIL_TAG = 'audit-trail';
|
|
46
58
|
|
|
@@ -106,6 +118,12 @@ const DOMAIN_CATEGORIES = [
|
|
|
106
118
|
const HOME = process.env.HOME || process.env.USERPROFILE || '';
|
|
107
119
|
const pendingBackgroundSideEffects = new Set();
|
|
108
120
|
|
|
121
|
+
function loadReflectorAgentModule() {
|
|
122
|
+
const modulePath = path.resolve(__dirname, 'reflector-agent.js');
|
|
123
|
+
if (!fs.existsSync(modulePath)) return null;
|
|
124
|
+
return require(modulePath);
|
|
125
|
+
}
|
|
126
|
+
|
|
109
127
|
/**
|
|
110
128
|
* Update the statusline cache with latest lesson info after feedback capture.
|
|
111
129
|
* The statusline.sh script reads this cache to display lesson context in Claude Code's status bar.
|
|
@@ -197,7 +215,9 @@ function getSelfAuditModule() {
|
|
|
197
215
|
|
|
198
216
|
function getDelegationRuntimeModule() {
|
|
199
217
|
try {
|
|
200
|
-
|
|
218
|
+
const modulePath = path.resolve(__dirname, 'delegation-runtime.js');
|
|
219
|
+
if (!fs.existsSync(modulePath)) return null;
|
|
220
|
+
return require(modulePath);
|
|
201
221
|
} catch {
|
|
202
222
|
return null;
|
|
203
223
|
}
|
|
@@ -261,6 +281,7 @@ function normalizeAnalysisShape(analysis = {}) {
|
|
|
261
281
|
delegation: analysis.delegation || null,
|
|
262
282
|
boostedRisk: analysis.boostedRisk || null,
|
|
263
283
|
recommendations: Array.isArray(analysis.recommendations) ? analysis.recommendations : [],
|
|
284
|
+
actionableRemediations: Array.isArray(analysis.actionableRemediations) ? analysis.actionableRemediations : [],
|
|
264
285
|
source: analysis.source,
|
|
265
286
|
byDomain: Array.isArray(analysis.byDomain) ? analysis.byDomain : [],
|
|
266
287
|
byImportance: Array.isArray(analysis.byImportance) ? analysis.byImportance : [],
|
|
@@ -564,7 +585,7 @@ function saveSummary(summary) {
|
|
|
564
585
|
|
|
565
586
|
// ============================================================
|
|
566
587
|
// ML Side-Effect Helpers — Sequence Tracking (ML-03) and
|
|
567
|
-
// Diversity Tracking (ML-04). Inline
|
|
588
|
+
// Diversity Tracking (ML-04). Inline feedback-loop implementation.
|
|
568
589
|
// ============================================================
|
|
569
590
|
|
|
570
591
|
function inferDomain(tags, context) {
|
|
@@ -943,7 +964,6 @@ function captureFeedback(params) {
|
|
|
943
964
|
let reflection = null;
|
|
944
965
|
if (signal === 'negative' && Array.isArray(params.conversationWindow) && params.conversationWindow.length >= 2) {
|
|
945
966
|
try {
|
|
946
|
-
const { reflect } = require('./reflector-agent');
|
|
947
967
|
reflection = reflect({
|
|
948
968
|
conversationWindow: params.conversationWindow,
|
|
949
969
|
context: inferredContext,
|
|
@@ -1513,11 +1533,22 @@ function analyzeFeedback(logPath) {
|
|
|
1513
1533
|
};
|
|
1514
1534
|
|
|
1515
1535
|
const recommendations = [];
|
|
1536
|
+
// Structured counterpart to `recommendations` — machine-actionable shape so
|
|
1537
|
+
// hooks/agents can act on each item without regex-parsing prose strings.
|
|
1538
|
+
// Each entry: { type, target, evidence, action, rationale }.
|
|
1539
|
+
const actionableRemediations = [];
|
|
1516
1540
|
|
|
1517
1541
|
for (const [skill, stat] of Object.entries(skills)) {
|
|
1518
1542
|
const negRate = stat.total > 0 ? stat.negative / stat.total : 0;
|
|
1519
1543
|
if (stat.total >= 3 && negRate >= 0.5) {
|
|
1520
1544
|
recommendations.push(`IMPROVE skill '${skill}' (${stat.negative}/${stat.total} negative)`);
|
|
1545
|
+
actionableRemediations.push({
|
|
1546
|
+
type: 'skill-improve',
|
|
1547
|
+
target: skill,
|
|
1548
|
+
evidence: { positive: stat.positive, negative: stat.negative, total: stat.total, negativeRate: Math.round(negRate * 1000) / 1000 },
|
|
1549
|
+
action: 'review-and-update-skill',
|
|
1550
|
+
rationale: `Skill '${skill}' has ${stat.negative}/${stat.total} negative feedback events (${Math.round(negRate * 100)}% negative rate).`,
|
|
1551
|
+
});
|
|
1521
1552
|
}
|
|
1522
1553
|
}
|
|
1523
1554
|
|
|
@@ -1525,14 +1556,35 @@ function analyzeFeedback(logPath) {
|
|
|
1525
1556
|
const posRate = stat.total > 0 ? stat.positive / stat.total : 0;
|
|
1526
1557
|
if (stat.total >= 3 && posRate >= 0.8) {
|
|
1527
1558
|
recommendations.push(`REUSE pattern '${tag}' (${stat.positive}/${stat.total} positive)`);
|
|
1559
|
+
actionableRemediations.push({
|
|
1560
|
+
type: 'pattern-reuse',
|
|
1561
|
+
target: tag,
|
|
1562
|
+
evidence: { positive: stat.positive, negative: stat.negative, total: stat.total, positiveRate: Math.round(posRate * 1000) / 1000 },
|
|
1563
|
+
action: 'replicate-pattern',
|
|
1564
|
+
rationale: `Pattern '${tag}' has ${stat.positive}/${stat.total} positive feedback events (${Math.round(posRate * 100)}% positive rate).`,
|
|
1565
|
+
});
|
|
1528
1566
|
}
|
|
1529
1567
|
}
|
|
1530
1568
|
|
|
1531
1569
|
if (recent.length >= 10 && recentRate < approvalRate - 0.1) {
|
|
1532
1570
|
recommendations.push('DECLINING trend in last 20 signals; tighten verification before response.');
|
|
1571
|
+
actionableRemediations.push({
|
|
1572
|
+
type: 'trend-declining',
|
|
1573
|
+
target: 'recent-signals',
|
|
1574
|
+
evidence: { recentRate, approvalRate, sampleSize: recent.length },
|
|
1575
|
+
action: 'tighten-verification-before-response',
|
|
1576
|
+
rationale: `Recent approval rate (${Math.round(recentRate * 100)}%) has dropped ≥10pp below lifetime (${Math.round(approvalRate * 100)}%).`,
|
|
1577
|
+
});
|
|
1533
1578
|
}
|
|
1534
1579
|
if (trend === 'degrading') {
|
|
1535
1580
|
recommendations.push(`DEGRADING 7d trend (${rate7d}) vs 30d (${rate30d}); increase prevention rule injection.`);
|
|
1581
|
+
actionableRemediations.push({
|
|
1582
|
+
type: 'trend-degrading',
|
|
1583
|
+
target: '7d-window',
|
|
1584
|
+
evidence: { rate7d, rate30d, delta: Math.round((rate7d - rate30d) * 1000) / 1000 },
|
|
1585
|
+
action: 'increase-prevention-rule-injection',
|
|
1586
|
+
rationale: `7d rate (${rate7d}) is below 30d rate (${rate30d}) by more than threshold.`,
|
|
1587
|
+
});
|
|
1536
1588
|
}
|
|
1537
1589
|
|
|
1538
1590
|
let boostedRisk = null;
|
|
@@ -1543,9 +1595,23 @@ function analyzeFeedback(logPath) {
|
|
|
1543
1595
|
if (boostedRisk) {
|
|
1544
1596
|
boostedRisk.highRiskDomains.slice(0, 2).forEach((bucket) => {
|
|
1545
1597
|
recommendations.push(`CHECK high-risk domain '${bucket.key}' (${bucket.highRisk}/${bucket.total} high-risk)`);
|
|
1598
|
+
actionableRemediations.push({
|
|
1599
|
+
type: 'high-risk-domain',
|
|
1600
|
+
target: bucket.key,
|
|
1601
|
+
evidence: { highRisk: bucket.highRisk, total: bucket.total, riskRate: bucket.riskRate },
|
|
1602
|
+
action: 'audit-domain-failures',
|
|
1603
|
+
rationale: `Domain '${bucket.key}' has ${bucket.highRisk}/${bucket.total} high-risk events (${Math.round((bucket.riskRate || 0) * 100)}% risk rate).`,
|
|
1604
|
+
});
|
|
1546
1605
|
});
|
|
1547
1606
|
boostedRisk.highRiskTags.slice(0, 2).forEach((bucket) => {
|
|
1548
1607
|
recommendations.push(`CHECK high-risk tag '${bucket.key}' (${bucket.highRisk}/${bucket.total} high-risk)`);
|
|
1608
|
+
actionableRemediations.push({
|
|
1609
|
+
type: 'high-risk-tag',
|
|
1610
|
+
target: bucket.key,
|
|
1611
|
+
evidence: { highRisk: bucket.highRisk, total: bucket.total, riskRate: bucket.riskRate },
|
|
1612
|
+
action: 'audit-tag-failures',
|
|
1613
|
+
rationale: `Tag '${bucket.key}' has ${bucket.highRisk}/${bucket.total} high-risk events (${Math.round((bucket.riskRate || 0) * 100)}% risk rate).`,
|
|
1614
|
+
});
|
|
1549
1615
|
});
|
|
1550
1616
|
}
|
|
1551
1617
|
}
|
|
@@ -1560,9 +1626,23 @@ function analyzeFeedback(logPath) {
|
|
|
1560
1626
|
delegation = delegationRuntime.summarizeDelegation(paths.FEEDBACK_DIR);
|
|
1561
1627
|
if (delegation.attemptCount >= 3 && delegation.verificationFailureRate >= 0.5) {
|
|
1562
1628
|
recommendations.push(`REDUCE delegation: verification failure rate is ${Math.round(delegation.verificationFailureRate * 100)}%`);
|
|
1629
|
+
actionableRemediations.push({
|
|
1630
|
+
type: 'delegation-reduce',
|
|
1631
|
+
target: 'verification-failure-rate',
|
|
1632
|
+
evidence: { verificationFailureRate: delegation.verificationFailureRate, attemptCount: delegation.attemptCount },
|
|
1633
|
+
action: 'reduce-delegation-use',
|
|
1634
|
+
rationale: `Delegation verification failure rate is ${Math.round(delegation.verificationFailureRate * 100)}% across ${delegation.attemptCount} attempts.`,
|
|
1635
|
+
});
|
|
1563
1636
|
}
|
|
1564
1637
|
if (delegation.avoidedDelegationCount >= 3) {
|
|
1565
1638
|
recommendations.push(`REVIEW delegation policy: ${delegation.avoidedDelegationCount} handoff starts were blocked before execution`);
|
|
1639
|
+
actionableRemediations.push({
|
|
1640
|
+
type: 'delegation-policy-review',
|
|
1641
|
+
target: 'handoff-blocks',
|
|
1642
|
+
evidence: { avoidedDelegationCount: delegation.avoidedDelegationCount },
|
|
1643
|
+
action: 'review-delegation-policy',
|
|
1644
|
+
rationale: `${delegation.avoidedDelegationCount} handoff starts were blocked before execution.`,
|
|
1645
|
+
});
|
|
1566
1646
|
}
|
|
1567
1647
|
}
|
|
1568
1648
|
} catch {
|
|
@@ -1570,6 +1650,13 @@ function analyzeFeedback(logPath) {
|
|
|
1570
1650
|
}
|
|
1571
1651
|
diagnostics.categories.slice(0, 2).forEach((bucket) => {
|
|
1572
1652
|
recommendations.push(`DIAGNOSE '${bucket.key}' failures (${bucket.count})`);
|
|
1653
|
+
actionableRemediations.push({
|
|
1654
|
+
type: 'diagnose-failure-category',
|
|
1655
|
+
target: bucket.key,
|
|
1656
|
+
evidence: { count: bucket.count },
|
|
1657
|
+
action: 'investigate-failure-category',
|
|
1658
|
+
rationale: `Failure category '${bucket.key}' has ${bucket.count} diagnosed events.`,
|
|
1659
|
+
});
|
|
1573
1660
|
});
|
|
1574
1661
|
|
|
1575
1662
|
return normalizeAnalysisShape({
|
|
@@ -1591,6 +1678,7 @@ function analyzeFeedback(logPath) {
|
|
|
1591
1678
|
delegation,
|
|
1592
1679
|
boostedRisk,
|
|
1593
1680
|
recommendations,
|
|
1681
|
+
actionableRemediations,
|
|
1594
1682
|
});
|
|
1595
1683
|
}
|
|
1596
1684
|
|
|
@@ -253,7 +253,7 @@ Constraints:
|
|
|
253
253
|
- Return ONLY the JSON array — no markdown, no explanation outside the array.`;
|
|
254
254
|
|
|
255
255
|
async function analyzeWithLLM(entries) {
|
|
256
|
-
const { isAvailable,
|
|
256
|
+
const { isAvailable, callClaudeJson, MODELS } = require('./llm-client');
|
|
257
257
|
if (!isAvailable()) return null;
|
|
258
258
|
|
|
259
259
|
const negativeEntries = entries
|
|
@@ -276,34 +276,28 @@ async function analyzeWithLLM(entries) {
|
|
|
276
276
|
return entry;
|
|
277
277
|
}).join('\n\n');
|
|
278
278
|
|
|
279
|
-
const
|
|
279
|
+
const parsed = await callClaudeJson({
|
|
280
280
|
systemPrompt: LLM_RULES_SYSTEM_PROMPT,
|
|
281
281
|
userPrompt: `Analyze these ${negativeEntries.length} negative feedback entries and generate prevention rules:\n\n${batch}`,
|
|
282
282
|
model: MODELS.SMART,
|
|
283
283
|
maxTokens: 2048,
|
|
284
|
+
cache: true,
|
|
284
285
|
});
|
|
285
286
|
|
|
286
|
-
if (!
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
.
|
|
295
|
-
.
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
suggestedRule: r.message,
|
|
301
|
-
reasoning: r.reasoning || '',
|
|
302
|
-
source: 'llm-analysis',
|
|
303
|
-
}));
|
|
304
|
-
} catch {
|
|
305
|
-
return null;
|
|
306
|
-
}
|
|
287
|
+
if (!Array.isArray(parsed)) return null;
|
|
288
|
+
|
|
289
|
+
return parsed
|
|
290
|
+
.filter((r) => r.pattern && r.action && r.message && r.severity)
|
|
291
|
+
.slice(0, 10)
|
|
292
|
+
.map((r) => ({
|
|
293
|
+
pattern: r.pattern,
|
|
294
|
+
count: negativeEntries.length,
|
|
295
|
+
severity: ['critical', 'high', 'medium'].includes(r.severity) ? r.severity : 'medium',
|
|
296
|
+
hasHighRisk: r.severity === 'critical',
|
|
297
|
+
suggestedRule: r.message,
|
|
298
|
+
reasoning: r.reasoning || '',
|
|
299
|
+
source: 'llm-analysis',
|
|
300
|
+
}));
|
|
307
301
|
}
|
|
308
302
|
|
|
309
303
|
if (require.main === module) {
|
package/scripts/gates-engine.js
CHANGED
|
@@ -5,6 +5,7 @@ const fs = require('fs');
|
|
|
5
5
|
const path = require('path');
|
|
6
6
|
const crypto = require('crypto');
|
|
7
7
|
const { execSync, execFileSync } = require('child_process');
|
|
8
|
+
const { loadOptionalModule } = require('./private-core-boundary');
|
|
8
9
|
|
|
9
10
|
const { isProTier, FREE_TIER_MAX_GATES } = require('./rate-limiter');
|
|
10
11
|
const {
|
|
@@ -1976,12 +1977,9 @@ function buildRecentCorrectiveActionsContext(options = {}) {
|
|
|
1976
1977
|
function buildRelevantLessonContext(toolName, toolInput) {
|
|
1977
1978
|
if (!toolName) return null;
|
|
1978
1979
|
|
|
1979
|
-
|
|
1980
|
-
|
|
1981
|
-
|
|
1982
|
-
} catch {
|
|
1983
|
-
return null;
|
|
1984
|
-
}
|
|
1980
|
+
const { retrieveRelevantLessons } = loadOptionalModule('./lesson-retrieval', () => ({
|
|
1981
|
+
retrieveRelevantLessons: () => [],
|
|
1982
|
+
}));
|
|
1985
1983
|
|
|
1986
1984
|
// Extract a searchable action context from the tool input
|
|
1987
1985
|
const actionContext = extractActionContext(toolName, toolInput);
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
function buildCreatorGrowthCampaign(input = {}) {
|
|
5
|
+
const appUrl = input.appUrl || 'https://thumbgate-production.up.railway.app';
|
|
6
|
+
const webinarTitle = input.webinarTitle || 'Stop AI Agents From Repeating Expensive Mistakes';
|
|
7
|
+
const offerCode = input.offerCode || 'AGENTGATES';
|
|
8
|
+
return {
|
|
9
|
+
campaignId: 'creator_webinar_agent_governance',
|
|
10
|
+
channelFit: ['beehiiv', 'linkedin', 'newsletter', 'webinar', 'youtube'],
|
|
11
|
+
audience: 'founders, engineering managers, AI automators, and creator-operators shipping with coding agents',
|
|
12
|
+
webinar: {
|
|
13
|
+
title: webinarTitle,
|
|
14
|
+
promise: 'In 30 minutes, see how a thumbs-down turns into a pre-action gate that blocks the same agent mistake next time.',
|
|
15
|
+
demoFlow: [
|
|
16
|
+
'Show a risky agent action before ThumbGate.',
|
|
17
|
+
'Capture corrective feedback with context.',
|
|
18
|
+
'Regenerate the prevention rule.',
|
|
19
|
+
'Replay the action and show the gate blocking it.',
|
|
20
|
+
'Export the decision journal and proof report.',
|
|
21
|
+
],
|
|
22
|
+
cta: `${appUrl}/#workflow-sprint-intake?utm_source=beehiiv&utm_campaign=creator_webinar_agent_governance&offer=${offerCode}`,
|
|
23
|
+
},
|
|
24
|
+
paywall: {
|
|
25
|
+
freeMeter: 2,
|
|
26
|
+
paidTrial: '$1 for 14 days',
|
|
27
|
+
paidContent: [
|
|
28
|
+
'Routine-ready security audit prompt',
|
|
29
|
+
'CRE prompt review checklist',
|
|
30
|
+
'Data Table Agent schema planner template',
|
|
31
|
+
'Workspace Agent approval-policy checklist',
|
|
32
|
+
],
|
|
33
|
+
},
|
|
34
|
+
posts: [
|
|
35
|
+
{
|
|
36
|
+
platform: 'linkedin',
|
|
37
|
+
text: 'AI agents are becoming scheduled coworkers. The missing layer is enforcement: approvals, evidence, rollback, and memory that blocks repeat mistakes. ThumbGate turns feedback into pre-action gates.',
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
platform: 'newsletter',
|
|
41
|
+
text: 'This week: how to stop prompting and hoping. Treat prompts as runtime programs, require evidence before tool actions, and use ThumbGate to block known-bad agent patterns.',
|
|
42
|
+
},
|
|
43
|
+
],
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
module.exports = {
|
|
48
|
+
buildCreatorGrowthCampaign,
|
|
49
|
+
};
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* Detection priority (first match wins):
|
|
10
10
|
* 1. THUMBGATE_HARNESS env var — explicit override
|
|
11
11
|
* 2. Tool-name heuristic (Edit/Write/MultiEdit → code-edit)
|
|
12
|
-
* 3. Command-text heuristic (deploy keywords → deploy, SQL keywords → db-write)
|
|
12
|
+
* 3. Command-text heuristic (deploy keywords → deploy, SQL keywords → db-write, routines → routine)
|
|
13
13
|
* 4. null → load only default.json + auto-promoted gates
|
|
14
14
|
*
|
|
15
15
|
* Each harness is ADDITIVE — default.json gates always load first.
|
|
@@ -25,6 +25,7 @@ const HARNESSES = Object.freeze({
|
|
|
25
25
|
deploy: path.join(HARNESS_DIR, 'deploy.json'),
|
|
26
26
|
'code-edit': path.join(HARNESS_DIR, 'code-edit.json'),
|
|
27
27
|
'db-write': path.join(HARNESS_DIR, 'db-write.json'),
|
|
28
|
+
routine: path.join(HARNESS_DIR, 'routine.json'),
|
|
28
29
|
});
|
|
29
30
|
|
|
30
31
|
// ---------------------------------------------------------------------------
|
|
@@ -50,6 +51,14 @@ const DB_WRITE_PATTERNS = [
|
|
|
50
51
|
/\.db\.exec\(|\.db\.prepare\(/i,
|
|
51
52
|
];
|
|
52
53
|
|
|
54
|
+
const ROUTINE_PATTERNS = [
|
|
55
|
+
/\b(routine|scheduled agent|workspace agent|webhook trigger|post[-\s]?merge|nightly|daily audit)\b/i,
|
|
56
|
+
/\b(reasoning_effort|system prompt|developer message|verbosity|length limits)\b/i,
|
|
57
|
+
/\b(gpt-5\.5|gpt-5\.5-pro|xhigh|ultrathink)\b/i,
|
|
58
|
+
/\b(slack|salesforce|gmail|google drive|notion|jira|linear|atlassian)\b.*\b(send|post|write|update|delete|create)\b/i,
|
|
59
|
+
/\b(context|role|expectations|few[-\s]?shot|zero[-\s]?shot|prompt template|prompt library)\b/i,
|
|
60
|
+
];
|
|
61
|
+
|
|
53
62
|
const CODE_EDIT_TOOL_NAMES = new Set(['Edit', 'Write', 'MultiEdit', 'NotebookEdit']);
|
|
54
63
|
|
|
55
64
|
// ---------------------------------------------------------------------------
|
|
@@ -84,6 +93,9 @@ function selectHarness(toolName, toolInput) {
|
|
|
84
93
|
if (DB_WRITE_PATTERNS.some((p) => p.test(commandText))) {
|
|
85
94
|
return HARNESSES['db-write'];
|
|
86
95
|
}
|
|
96
|
+
if (ROUTINE_PATTERNS.some((p) => p.test(commandText))) {
|
|
97
|
+
return HARNESSES.routine;
|
|
98
|
+
}
|
|
87
99
|
if (DEPLOY_PATTERNS.some((p) => p.test(commandText))) {
|
|
88
100
|
return HARNESSES['deploy'];
|
|
89
101
|
}
|
|
@@ -168,7 +180,7 @@ function scoreHarnessAudit(inputs = {}, options = {}) {
|
|
|
168
180
|
const mcpToolCount = Number(inputs.mcpToolCount || 0);
|
|
169
181
|
const progressiveToolIndexPresent = Boolean(inputs.progressiveToolIndexPresent);
|
|
170
182
|
const specializedHarnesses = Array.isArray(inputs.specializedHarnesses) ? inputs.specializedHarnesses : [];
|
|
171
|
-
const hasSpecializedHarnesses = specializedHarnesses.length >=
|
|
183
|
+
const hasSpecializedHarnesses = specializedHarnesses.length >= 4;
|
|
172
184
|
const missingDocs = globalDocs.filter((doc) => doc.exists === false).map((doc) => doc.name);
|
|
173
185
|
const observations = [];
|
|
174
186
|
const recommendations = [];
|
|
@@ -193,8 +205,8 @@ function scoreHarnessAudit(inputs = {}, options = {}) {
|
|
|
193
205
|
|
|
194
206
|
if (!hasSpecializedHarnesses) {
|
|
195
207
|
score -= 18;
|
|
196
|
-
observations.push('Fewer than
|
|
197
|
-
recommendations.push('Add workflow-specific harnesses for deploy, code-edit,
|
|
208
|
+
observations.push('Fewer than four specialized gate harnesses are available for risky workflows.');
|
|
209
|
+
recommendations.push('Add workflow-specific harnesses for deploy, code-edit, database-write, and unattended routine actions so default gates stay lean.');
|
|
198
210
|
} else {
|
|
199
211
|
observations.push(`Specialized harnesses are available: ${specializedHarnesses.join(', ')}.`);
|
|
200
212
|
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
function buildHybridSupervisorPlan(options = {}) {
|
|
4
|
+
const sources = options.sources || [
|
|
5
|
+
{ id: 'feedback_log', type: 'jsonl', description: 'User thumbs-up/down and correction events.' },
|
|
6
|
+
{ id: 'gate_metrics', type: 'sql', description: 'Gate blocks, warnings, pass rates, and timestamps.' },
|
|
7
|
+
{ id: 'docs', type: 'vector', description: 'Public docs and operational guides.' },
|
|
8
|
+
];
|
|
9
|
+
|
|
10
|
+
return {
|
|
11
|
+
pattern: 'multi_step_hybrid_supervisor',
|
|
12
|
+
sources,
|
|
13
|
+
sourceCount: sources.length,
|
|
14
|
+
steps: [
|
|
15
|
+
'classify query into structured, unstructured, graph, or mixed',
|
|
16
|
+
'decompose mixed queries into native-source subqueries',
|
|
17
|
+
'run complementary SQL, graph, and vector searches in parallel',
|
|
18
|
+
'join or reconcile result sets',
|
|
19
|
+
'self-correct with a different query path when overlap is empty',
|
|
20
|
+
'verify final answer against source-specific evidence',
|
|
21
|
+
],
|
|
22
|
+
gates: [
|
|
23
|
+
'prefer native source queries over flattening everything into embeddings',
|
|
24
|
+
'limit initial deployments to 5-10 curated complementary sources',
|
|
25
|
+
'require plain-language source descriptions at ingestion',
|
|
26
|
+
'block final answers when structured and unstructured evidence conflict',
|
|
27
|
+
],
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function classifyHybridQuery(query = '') {
|
|
32
|
+
const text = String(query).toLowerCase();
|
|
33
|
+
const needsStructured = /\b(count|sum|trend|declin|increase|revenue|sales|rate|over time|sql|table)\b/.test(text);
|
|
34
|
+
const needsUnstructured = /\b(reviews?|feedback|reason|complaints?|docs?|semantic|why|quote|citation)\b/.test(text);
|
|
35
|
+
const needsGraph = /\b(similar|related|path|relationship|because|profile|like you)\b/.test(text);
|
|
36
|
+
if ([needsStructured, needsUnstructured, needsGraph].filter(Boolean).length >= 2) return 'hybrid';
|
|
37
|
+
if (needsStructured) return 'structured';
|
|
38
|
+
if (needsGraph) return 'graph';
|
|
39
|
+
if (needsUnstructured) return 'unstructured';
|
|
40
|
+
return 'general';
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function evaluateHybridSupervisorRun(run = {}) {
|
|
44
|
+
const issues = [];
|
|
45
|
+
const queryType = classifyHybridQuery(run.query || '');
|
|
46
|
+
if (queryType === 'hybrid' && !run.decomposed) issues.push('hybrid_query_not_decomposed');
|
|
47
|
+
if (queryType === 'hybrid' && !run.parallelNativeQueries) issues.push('parallel_native_queries_required');
|
|
48
|
+
if (!run.sourceDescriptionsPresent) issues.push('missing_source_descriptions');
|
|
49
|
+
if ((run.sourceCount || 0) > 10 && !run.incrementalRollout) issues.push('too_many_sources_without_incremental_rollout');
|
|
50
|
+
if (run.emptyOverlap && !run.selfCorrected) issues.push('self_correction_required');
|
|
51
|
+
if (run.evidenceConflict && !run.escalated) issues.push('conflicting_evidence_requires_escalation');
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
decision: issues.length ? 'warn' : 'allow',
|
|
55
|
+
issues,
|
|
56
|
+
queryType,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
module.exports = {
|
|
61
|
+
buildHybridSupervisorPlan,
|
|
62
|
+
classifyHybridQuery,
|
|
63
|
+
evaluateHybridSupervisorRun,
|
|
64
|
+
};
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
function buildInferenceCachePolicy(options = {}) {
|
|
4
|
+
return {
|
|
5
|
+
policyId: 'llm_inference_cache',
|
|
6
|
+
layers: [
|
|
7
|
+
{
|
|
8
|
+
id: 'kv_cache',
|
|
9
|
+
owner: 'model_runtime',
|
|
10
|
+
enabled: true,
|
|
11
|
+
action: 'no app changes; rely on inference runtime',
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
id: 'prefix_cache',
|
|
15
|
+
owner: 'agent_harness',
|
|
16
|
+
enabled: options.prefixCache !== false,
|
|
17
|
+
action: 'place stable system prompt, docs, and examples before dynamic fields',
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
id: 'semantic_cache',
|
|
21
|
+
owner: 'application',
|
|
22
|
+
enabled: Boolean(options.semanticCache),
|
|
23
|
+
action: 'cache complete input/output pairs when paraphrased repeat volume is high',
|
|
24
|
+
},
|
|
25
|
+
],
|
|
26
|
+
promptRules: [
|
|
27
|
+
'static content first',
|
|
28
|
+
'dynamic user/session/date fields last',
|
|
29
|
+
'deterministic JSON key order',
|
|
30
|
+
'no generated timestamps inside cached prefix',
|
|
31
|
+
'version cache keys with prompt and policy versions',
|
|
32
|
+
],
|
|
33
|
+
invalidation: {
|
|
34
|
+
prefix: ['system_prompt_version', 'doc_version', 'tool_policy_version'],
|
|
35
|
+
semantic: ['answer_ttl', 'source_doc_version', 'safety_policy_version'],
|
|
36
|
+
},
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function evaluateCacheCandidate(candidate = {}) {
|
|
41
|
+
const issues = [];
|
|
42
|
+
const repeatedPrefixTokens = Number(candidate.repeatedPrefixTokens || 0);
|
|
43
|
+
const requestsPerDay = Number(candidate.requestsPerDay || 0);
|
|
44
|
+
const semanticRepeatRate = Number(candidate.semanticRepeatRate || 0);
|
|
45
|
+
|
|
46
|
+
if (candidate.dynamicFieldsBeforeStatic) issues.push('dynamic_fields_break_prefix_cache');
|
|
47
|
+
if (!candidate.deterministicSerialization) issues.push('deterministic_serialization_required');
|
|
48
|
+
if (repeatedPrefixTokens >= 1024 && requestsPerDay >= 10 && !candidate.prefixCacheEnabled) {
|
|
49
|
+
issues.push('prefix_cache_high_roi_not_enabled');
|
|
50
|
+
}
|
|
51
|
+
if (candidate.semanticCacheEnabled && semanticRepeatRate < 0.15) {
|
|
52
|
+
issues.push('semantic_cache_overhead_not_justified');
|
|
53
|
+
}
|
|
54
|
+
if (candidate.semanticCacheEnabled && !candidate.ttl) {
|
|
55
|
+
issues.push('semantic_cache_ttl_required');
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
decision: issues.length ? 'warn' : 'allow',
|
|
60
|
+
issues,
|
|
61
|
+
recommendedLayers: [
|
|
62
|
+
'kv_cache',
|
|
63
|
+
repeatedPrefixTokens >= 1024 && requestsPerDay >= 10 ? 'prefix_cache' : null,
|
|
64
|
+
semanticRepeatRate >= 0.15 ? 'semantic_cache' : null,
|
|
65
|
+
].filter(Boolean),
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
module.exports = {
|
|
70
|
+
buildInferenceCachePolicy,
|
|
71
|
+
evaluateCacheCandidate,
|
|
72
|
+
};
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
function estimateDifficulty(input = {}) {
|
|
5
|
+
let score = 0;
|
|
6
|
+
const text = String(input.task || input.prompt || '');
|
|
7
|
+
if (text.length > 1200) score += 20;
|
|
8
|
+
if (/ambiguous|research|architecture|security|production|migration|legal|financial/i.test(text)) score += 25;
|
|
9
|
+
if (Number(input.dollarImpact || 0) >= 1000) score += 25;
|
|
10
|
+
if (Array.isArray(input.files) && input.files.length > 5) score += 15;
|
|
11
|
+
if (input.requiresHumanApproval === true) score += 15;
|
|
12
|
+
return Math.max(0, Math.min(100, score));
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function planInferenceBudget(input = {}) {
|
|
16
|
+
const difficulty = Number.isFinite(input.difficulty) ? input.difficulty : estimateDifficulty(input);
|
|
17
|
+
const maxCostCents = Number.isFinite(Number(input.maxCostCents)) ? Number(input.maxCostCents) : 50;
|
|
18
|
+
let depth = 'shallow';
|
|
19
|
+
let reasoningEffort = 'low';
|
|
20
|
+
let expertCount = 1;
|
|
21
|
+
let humanHandoff = false;
|
|
22
|
+
|
|
23
|
+
if (difficulty >= 70) {
|
|
24
|
+
depth = 'deep';
|
|
25
|
+
reasoningEffort = 'high';
|
|
26
|
+
expertCount = 4;
|
|
27
|
+
humanHandoff = true;
|
|
28
|
+
} else if (difficulty >= 35) {
|
|
29
|
+
depth = 'standard';
|
|
30
|
+
reasoningEffort = 'medium';
|
|
31
|
+
expertCount = 2;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (maxCostCents < 20 && depth === 'deep') {
|
|
35
|
+
depth = 'standard';
|
|
36
|
+
reasoningEffort = 'medium';
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return {
|
|
40
|
+
difficulty,
|
|
41
|
+
maxCostCents,
|
|
42
|
+
depth,
|
|
43
|
+
reasoningEffort,
|
|
44
|
+
activeExperts: expertCount,
|
|
45
|
+
humanHandoff,
|
|
46
|
+
telemetry: ['difficulty', 'depth', 'reasoningEffort', 'activeExperts', 'latencyMs', 'costCents', 'outcome'],
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
module.exports = {
|
|
51
|
+
estimateDifficulty,
|
|
52
|
+
planInferenceBudget,
|
|
53
|
+
};
|
|
@@ -12,13 +12,18 @@ const {
|
|
|
12
12
|
constructContextPack,
|
|
13
13
|
recordProvenance,
|
|
14
14
|
} = require('./contextfs');
|
|
15
|
-
const { planIntent } = require('./intent-router');
|
|
16
15
|
const { formatCodeGraphRecallSection } = require('./codegraph-context');
|
|
17
16
|
|
|
18
17
|
const KNOWN_SOURCES = new Set(['github', 'slack', 'linear', 'api', 'cli']);
|
|
19
18
|
const DEFAULT_SOURCE = 'api';
|
|
20
19
|
const DEFAULT_SANDBOX_ROOT = path.join(os.tmpdir(), 'thumbgate-internal-agent-sandboxes');
|
|
21
20
|
|
|
21
|
+
function loadIntentRouterModule() {
|
|
22
|
+
const modulePath = path.resolve(__dirname, 'intent-router.js');
|
|
23
|
+
if (!fs.existsSync(modulePath)) return null;
|
|
24
|
+
return require(modulePath);
|
|
25
|
+
}
|
|
26
|
+
|
|
22
27
|
function normalizeText(value) {
|
|
23
28
|
if (value === undefined || value === null) return '';
|
|
24
29
|
return String(value).trim();
|
|
@@ -422,7 +427,12 @@ function bootstrapInternalAgent(options = {}) {
|
|
|
422
427
|
baseRef: null,
|
|
423
428
|
};
|
|
424
429
|
|
|
425
|
-
const
|
|
430
|
+
const intentRouter = loadIntentRouterModule();
|
|
431
|
+
if (!intentRouter) {
|
|
432
|
+
throw new Error('Internal agent bootstrap requires the ThumbGate private intent router runtime.');
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
const plan = intentRouter.planIntent({
|
|
426
436
|
intentId: invocation.intentId,
|
|
427
437
|
context: startupContext.text,
|
|
428
438
|
mcpProfile: invocation.mcpProfile,
|