thumbgate 1.15.0 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +6 -6
- package/.claude-plugin/plugin.json +3 -3
- package/.well-known/llms.txt +5 -5
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +59 -35
- package/adapters/chatgpt/openapi.yaml +118 -2
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +210 -84
- package/adapters/opencode/opencode.json +1 -1
- package/bench/prompt-eval-suite.json +5 -1
- package/bin/cli.js +157 -8
- package/config/evals/agent-safety-eval.json +338 -22
- package/config/gates/routine.json +43 -0
- package/config/github-about.json +3 -3
- package/config/model-candidates.json +131 -0
- package/openapi/openapi.yaml +118 -2
- package/package.json +55 -48
- package/public/blog.html +7 -7
- package/public/codex-plugin.html +6 -6
- package/public/compare.html +29 -23
- package/public/dashboard.html +82 -10
- package/public/guide.html +28 -28
- package/public/index.html +216 -98
- package/public/learn.html +50 -22
- package/public/lessons.html +1 -1
- package/public/numbers.html +17 -17
- package/public/pro.html +82 -18
- package/scripts/agent-audit-trace.js +55 -0
- package/scripts/agent-memory-lifecycle.js +96 -0
- package/scripts/agent-readiness-plan.js +118 -0
- package/scripts/agentic-data-pipeline.js +21 -1
- package/scripts/agents-sdk-sandbox-plan.js +57 -0
- package/scripts/ai-org-governance.js +98 -0
- package/scripts/ai-search-distribution.js +43 -0
- package/scripts/artifact-agent-plan.js +81 -0
- package/scripts/billing.js +27 -8
- package/scripts/cli-schema.js +18 -2
- package/scripts/code-mode-mcp-plan.js +71 -0
- package/scripts/context-engine.js +1 -2
- package/scripts/context-manager.js +4 -1
- package/scripts/dashboard-render-spec.js +1 -1
- package/scripts/dashboard.js +275 -9
- package/scripts/decision-journal.js +13 -3
- package/scripts/document-workflow-governance.js +62 -0
- package/scripts/enterprise-agent-rollout.js +34 -0
- package/scripts/experience-replay-governance.js +69 -0
- package/scripts/export-hf-dataset.js +1 -1
- package/scripts/feedback-loop.js +92 -4
- package/scripts/feedback-to-rules.js +17 -23
- package/scripts/gates-engine.js +4 -6
- package/scripts/growth-campaigns.js +49 -0
- package/scripts/harness-selector.js +16 -4
- package/scripts/hybrid-supervisor-agent.js +64 -0
- package/scripts/inference-cache-policy.js +72 -0
- package/scripts/inference-economics.js +53 -0
- package/scripts/internal-agent-bootstrap.js +12 -2
- package/scripts/knowledge-layer-plan.js +108 -0
- package/scripts/lesson-inference.js +183 -44
- package/scripts/lesson-search.js +4 -1
- package/scripts/llm-client.js +157 -26
- package/scripts/mailer/resend-mailer.js +112 -1
- package/scripts/mcp-transport-strategy.js +66 -0
- package/scripts/memory-store-governance.js +60 -0
- package/scripts/meta-agent-loop.js +7 -13
- package/scripts/model-access-eligibility.js +38 -0
- package/scripts/model-migration-readiness.js +55 -0
- package/scripts/operational-integrity.js +96 -3
- package/scripts/otel-declarative-config.js +56 -0
- package/scripts/perplexity-client.js +1 -1
- package/scripts/post-training-governance.js +34 -0
- package/scripts/private-core-boundary.js +72 -0
- package/scripts/production-agent-readiness.js +40 -0
- package/scripts/prompt-eval.js +564 -32
- package/scripts/prompt-programs.js +93 -0
- package/scripts/provider-action-normalizer.js +585 -0
- package/scripts/scaling-law-claims.js +60 -0
- package/scripts/security-scanner.js +1 -1
- package/scripts/self-distill-agent.js +7 -32
- package/scripts/seo-gsd.js +232 -55
- package/scripts/skill-rag-router.js +53 -0
- package/scripts/spec-gate.js +1 -1
- package/scripts/student-consistent-training.js +73 -0
- package/scripts/synthetic-data-provenance.js +98 -0
- package/scripts/task-context-result.js +81 -0
- package/scripts/telemetry-analytics.js +149 -0
- package/scripts/thompson-sampling.js +2 -2
- package/scripts/token-savings.js +7 -6
- package/scripts/token-tco.js +46 -0
- package/scripts/tool-registry.js +63 -3
- package/scripts/verification-loop.js +10 -1
- package/scripts/verifier-scoring.js +71 -0
- package/scripts/workflow-sentinel.js +284 -28
- package/scripts/workspace-agent-routines.js +118 -0
- package/src/api/server.js +381 -120
- package/scripts/analytics-report.js +0 -328
- package/scripts/autonomous-workflow.js +0 -377
- package/scripts/billing-setup.js +0 -109
- package/scripts/creator-campaigns.js +0 -239
- package/scripts/cross-encoder-reranker.js +0 -235
- package/scripts/daemon-manager.js +0 -108
- package/scripts/decision-trace.js +0 -354
- package/scripts/delegation-runtime.js +0 -896
- package/scripts/dispatch-brief.js +0 -159
- package/scripts/distribution-surfaces.js +0 -110
- package/scripts/feedback-history-distiller.js +0 -382
- package/scripts/funnel-analytics.js +0 -35
- package/scripts/history-distiller.js +0 -200
- package/scripts/hosted-job-launcher.js +0 -256
- package/scripts/intent-router.js +0 -392
- package/scripts/lesson-reranker.js +0 -263
- package/scripts/lesson-retrieval.js +0 -148
- package/scripts/managed-lesson-agent.js +0 -183
- package/scripts/operational-dashboard.js +0 -103
- package/scripts/operational-summary.js +0 -129
- package/scripts/operator-artifacts.js +0 -608
- package/scripts/optimize-context.js +0 -17
- package/scripts/org-dashboard.js +0 -206
- package/scripts/partner-orchestration.js +0 -146
- package/scripts/predictive-insights.js +0 -356
- package/scripts/pulse.js +0 -80
- package/scripts/reflector-agent.js +0 -221
- package/scripts/sales-pipeline.js +0 -681
- package/scripts/session-episode-store.js +0 -329
- package/scripts/session-health-sensor.js +0 -242
- package/scripts/session-report.js +0 -120
- package/scripts/swarm-coordinator.js +0 -81
- package/scripts/tool-kpi-tracker.js +0 -12
- package/scripts/webhook-delivery.js +0 -62
- package/scripts/workflow-sprint-intake.js +0 -475
|
@@ -339,7 +339,7 @@ function renderTrialWelcomeBodies({ licenseKey, customerId, customerName, trialE
|
|
|
339
339
|
const headline = 'Your ThumbGate Pro trial is live.';
|
|
340
340
|
const subhead = `You have 7 days of Pro access. Trial ends ${trialEndLabel}.`;
|
|
341
341
|
const description =
|
|
342
|
-
'ThumbGate turns thumbs up/down feedback into Pre-Action
|
|
342
|
+
'ThumbGate turns thumbs up/down feedback into Pre-Action Checks that stop repeated AI coding mistakes ' +
|
|
343
343
|
'before the next tool call. Lessons stay on your machine. Repeated failures become Reliability Gateway blocks.';
|
|
344
344
|
const exampleFeedback =
|
|
345
345
|
'thumbs down: the answer skipped exact files and tests; next time include paths, commands, and verification evidence.';
|
|
@@ -503,10 +503,121 @@ async function sendTrialWelcomeEmail({ to, licenseKey, customerId, customerName,
|
|
|
503
503
|
return sendEmail({ to, subject, html, text, replyTo: getReplyTo(), fetchImpl, dnsResolver });
|
|
504
504
|
}
|
|
505
505
|
|
|
506
|
+
function renderNewsletterWelcomeBodies() {
|
|
507
|
+
const supportEmail = getSupportEmail();
|
|
508
|
+
const unsubscribeEmail = getUnsubscribeEmail();
|
|
509
|
+
const businessName = getBusinessName();
|
|
510
|
+
const businessAddress = getBusinessAddress();
|
|
511
|
+
const unsubscribeMailto = `mailto:${unsubscribeEmail}?subject=unsubscribe&body=Please%20remove%20me%20from%20ThumbGate%20emails.`;
|
|
512
|
+
const headline = 'Welcome to ThumbGate.';
|
|
513
|
+
const subhead =
|
|
514
|
+
'One concrete AI coding failure prevented per email. No theory, no fluff.';
|
|
515
|
+
const firstLesson =
|
|
516
|
+
'First lesson: the most expensive AI mistake is the one it repeats. ' +
|
|
517
|
+
'ThumbGate turns thumbs up/down signals into Pre-Action Checks that stop ' +
|
|
518
|
+
'the next recurrence before the tool call runs.';
|
|
519
|
+
const ctaLink = 'https://thumbgate.ai/pro';
|
|
520
|
+
|
|
521
|
+
const text = [
|
|
522
|
+
'Welcome to ThumbGate.',
|
|
523
|
+
'',
|
|
524
|
+
subhead,
|
|
525
|
+
'',
|
|
526
|
+
firstLesson,
|
|
527
|
+
'',
|
|
528
|
+
`Want the full stop-repeating-mistakes loop locally? ${ctaLink}`,
|
|
529
|
+
'',
|
|
530
|
+
`Questions? Reply to this email or write ${supportEmail}.`,
|
|
531
|
+
'',
|
|
532
|
+
'— Igor, founder of ThumbGate',
|
|
533
|
+
'',
|
|
534
|
+
'---',
|
|
535
|
+
`You're getting this because you signed up on thumbgate.ai. Unsubscribe: ${unsubscribeEmail}`,
|
|
536
|
+
`${businessName} · ${businessAddress}`,
|
|
537
|
+
].join('\n');
|
|
538
|
+
|
|
539
|
+
const safeHeadline = escapeHtml(headline);
|
|
540
|
+
const safeSubhead = escapeHtml(subhead);
|
|
541
|
+
const safeFirstLesson = escapeHtml(firstLesson);
|
|
542
|
+
const safeSupportEmail = escapeHtml(supportEmail);
|
|
543
|
+
const safeBusinessName = escapeHtml(businessName);
|
|
544
|
+
const safeBusinessAddress = escapeHtml(businessAddress);
|
|
545
|
+
const safeUnsubscribeEmail = escapeHtml(unsubscribeEmail);
|
|
546
|
+
const safeUnsubscribeMailto = escapeHtml(unsubscribeMailto);
|
|
547
|
+
|
|
548
|
+
const html = `<!doctype html>
|
|
549
|
+
<html>
|
|
550
|
+
<body style="margin:0;background:#f5f7fb;padding:28px 12px;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,Arial,sans-serif;color:#17212b;">
|
|
551
|
+
<table role="presentation" width="100%" cellspacing="0" cellpadding="0" style="border-collapse:collapse;">
|
|
552
|
+
<tr>
|
|
553
|
+
<td align="center">
|
|
554
|
+
<table role="presentation" width="100%" cellspacing="0" cellpadding="0" style="border-collapse:collapse;max-width:640px;background:#ffffff;border:1px solid #d8e2ea;border-radius:10px;overflow:hidden;">
|
|
555
|
+
<tr>
|
|
556
|
+
<td style="background:#071115;padding:24px 28px;color:#e7fbff;">
|
|
557
|
+
<div style="font-size:13px;font-weight:700;letter-spacing:0.02em;text-transform:uppercase;color:#73d4e9;">ThumbGate</div>
|
|
558
|
+
<h1 style="margin:10px 0 6px;font-size:24px;line-height:1.25;color:#ffffff;">${safeHeadline}</h1>
|
|
559
|
+
<p style="margin:0;font-size:14px;line-height:1.5;color:#9cbac4;">${safeSubhead}</p>
|
|
560
|
+
</td>
|
|
561
|
+
</tr>
|
|
562
|
+
<tr>
|
|
563
|
+
<td style="padding:24px 28px 10px;">
|
|
564
|
+
<p style="margin:0 0 18px;font-size:15px;line-height:1.6;color:#344451;">${safeFirstLesson}</p>
|
|
565
|
+
<p style="margin:0 0 22px;">
|
|
566
|
+
<a href="${ctaLink}" style="display:inline-block;background:#45bfd8;color:#061015;text-decoration:none;font-weight:700;padding:12px 22px;border-radius:6px;font-size:15px;">See the full Pro loop</a>
|
|
567
|
+
</p>
|
|
568
|
+
</td>
|
|
569
|
+
</tr>
|
|
570
|
+
<tr>
|
|
571
|
+
<td style="padding:0 28px 22px;">
|
|
572
|
+
<p style="margin:0 0 4px;font-size:14px;line-height:1.6;color:#17212b;">— Igor, founder of ThumbGate</p>
|
|
573
|
+
<p style="margin:0;font-size:13px;line-height:1.55;color:#526273;">
|
|
574
|
+
Questions? Reply or write
|
|
575
|
+
<a href="mailto:${safeSupportEmail}" style="color:#087a91;">${safeSupportEmail}</a>.
|
|
576
|
+
</p>
|
|
577
|
+
</td>
|
|
578
|
+
</tr>
|
|
579
|
+
<tr>
|
|
580
|
+
<td style="padding:16px 28px 22px;border-top:1px solid #e2e8ec;background:#fafbfc;">
|
|
581
|
+
<p style="margin:0 0 6px;font-size:12px;line-height:1.5;color:#7a8790;">
|
|
582
|
+
You signed up on thumbgate.ai.
|
|
583
|
+
<a href="${safeUnsubscribeMailto}" style="color:#7a8790;text-decoration:underline;">Unsubscribe</a>
|
|
584
|
+
(${safeUnsubscribeEmail}).
|
|
585
|
+
</p>
|
|
586
|
+
<p style="margin:0;font-size:12px;line-height:1.5;color:#7a8790;">
|
|
587
|
+
${safeBusinessName} · ${safeBusinessAddress}
|
|
588
|
+
</p>
|
|
589
|
+
</td>
|
|
590
|
+
</tr>
|
|
591
|
+
</table>
|
|
592
|
+
</td>
|
|
593
|
+
</tr>
|
|
594
|
+
</table>
|
|
595
|
+
</body>
|
|
596
|
+
</html>`;
|
|
597
|
+
|
|
598
|
+
return { html, text };
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
async function sendNewsletterWelcomeEmail({ to, fetchImpl, dnsResolver } = {}) {
|
|
602
|
+
if (!isNonEmptyString(to)) throw new Error('sendNewsletterWelcomeEmail: `to` is required');
|
|
603
|
+
const { html, text } = renderNewsletterWelcomeBodies();
|
|
604
|
+
return sendEmail({
|
|
605
|
+
to,
|
|
606
|
+
subject: 'Welcome to ThumbGate — one AI mistake prevented per email',
|
|
607
|
+
html,
|
|
608
|
+
text,
|
|
609
|
+
replyTo: getReplyTo(),
|
|
610
|
+
fetchImpl,
|
|
611
|
+
dnsResolver,
|
|
612
|
+
});
|
|
613
|
+
}
|
|
614
|
+
|
|
506
615
|
module.exports = {
|
|
507
616
|
sendEmail,
|
|
508
617
|
sendTrialWelcomeEmail,
|
|
618
|
+
sendNewsletterWelcomeEmail,
|
|
509
619
|
renderTrialWelcomeBodies,
|
|
620
|
+
renderNewsletterWelcomeBodies,
|
|
510
621
|
_resolveSenderAddress: resolveSenderAddress,
|
|
511
622
|
_hasResendSenderDns: hasResendSenderDns,
|
|
512
623
|
_recordsHaveResendDns: recordsHaveResendDns,
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
function scoreTransportNeed(service = {}) {
|
|
4
|
+
let score = 0;
|
|
5
|
+
if ((service.callsPerMinute || 0) >= 120) score += 30;
|
|
6
|
+
if ((service.concurrentAgents || 0) >= 10) score += 20;
|
|
7
|
+
if (service.streaming) score += 20;
|
|
8
|
+
if (service.existingGrpc) score += 15;
|
|
9
|
+
if (service.doubleStackedJsonShim) score += 15;
|
|
10
|
+
if (service.inferenceDominated) score -= 25;
|
|
11
|
+
return Math.max(0, Math.min(100, score));
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function recommendMcpTransport(service = {}) {
|
|
15
|
+
const score = scoreTransportNeed(service);
|
|
16
|
+
const reasons = [];
|
|
17
|
+
|
|
18
|
+
if ((service.callsPerMinute || 0) >= 120) reasons.push('high_frequency_tool_calls');
|
|
19
|
+
if ((service.concurrentAgents || 0) >= 10) reasons.push('many_concurrent_agents');
|
|
20
|
+
if (service.streaming) reasons.push('streaming_or_long_running_flow');
|
|
21
|
+
if (service.existingGrpc) reasons.push('backend_already_grpc');
|
|
22
|
+
if (service.doubleStackedJsonShim) reasons.push('json_shim_exists_only_for_agents');
|
|
23
|
+
if (service.inferenceDominated) reasons.push('llm_latency_dominates_transport');
|
|
24
|
+
|
|
25
|
+
const transport = score >= 50 ? 'grpc' : 'json_rpc_http';
|
|
26
|
+
return {
|
|
27
|
+
service: service.name || 'unnamed-service',
|
|
28
|
+
score,
|
|
29
|
+
transport,
|
|
30
|
+
reasons,
|
|
31
|
+
rollout: transport === 'grpc'
|
|
32
|
+
? [
|
|
33
|
+
'pilot pluggable transport behind config',
|
|
34
|
+
'reuse protobuf contracts where present',
|
|
35
|
+
'add contract tests and stream retry policy',
|
|
36
|
+
'compare latency throughput and error rate against JSON-RPC baseline',
|
|
37
|
+
'deprecate redundant JSON shim only after soak evidence',
|
|
38
|
+
]
|
|
39
|
+
: [
|
|
40
|
+
'keep JSON-RPC over HTTP',
|
|
41
|
+
'avoid transport churn until tool-call volume or streaming pressure changes',
|
|
42
|
+
'continue validating payloads at MCP boundary',
|
|
43
|
+
],
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function buildMcpTransportMigrationPlan(services = []) {
|
|
48
|
+
const recommendations = services.map(recommendMcpTransport);
|
|
49
|
+
return {
|
|
50
|
+
recommendations,
|
|
51
|
+
pilots: recommendations.filter((item) => item.transport === 'grpc').slice(0, 1),
|
|
52
|
+
guardrails: [
|
|
53
|
+
'tool definitions remain transport agnostic',
|
|
54
|
+
'wire protocol lives behind adapters',
|
|
55
|
+
'semantic tool descriptions stay available to the LLM',
|
|
56
|
+
'no JSON shim removal before contract tests and soak metrics pass',
|
|
57
|
+
],
|
|
58
|
+
metrics: ['p95_tool_latency_ms', 'tool_error_rate', 'stream_reconnects', 'payload_validation_failures'],
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
module.exports = {
|
|
63
|
+
buildMcpTransportMigrationPlan,
|
|
64
|
+
recommendMcpTransport,
|
|
65
|
+
scoreTransportNeed,
|
|
66
|
+
};
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
function normalizeText(value) {
|
|
5
|
+
if (value === undefined || value === null) return '';
|
|
6
|
+
return String(value).trim();
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
function classifyMemoryFile(filePath) {
|
|
10
|
+
const normalized = normalizeText(filePath).toLowerCase();
|
|
11
|
+
if (/preference|style|tone|format/.test(normalized)) return 'preference';
|
|
12
|
+
if (/credential|token|secret|password|key/.test(normalized)) return 'blocked_secret';
|
|
13
|
+
if (/task|completed|todo|draft/.test(normalized)) return 'workflow_state';
|
|
14
|
+
if (/account|customer|user|contact/.test(normalized)) return 'sensitive_context';
|
|
15
|
+
return 'general';
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function actionForClassification(classification) {
|
|
19
|
+
if (classification === 'blocked_secret') return 'block';
|
|
20
|
+
if (classification === 'sensitive_context') return 'redact_before_export';
|
|
21
|
+
return 'allow_reviewed_promotion';
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function buildMemoryStoreGovernance(input = {}) {
|
|
25
|
+
const files = Array.isArray(input.files) ? input.files : [];
|
|
26
|
+
const records = files.map((file) => {
|
|
27
|
+
const path = typeof file === 'string' ? file : file.path;
|
|
28
|
+
const classification = classifyMemoryFile(path);
|
|
29
|
+
return {
|
|
30
|
+
path: normalizeText(path),
|
|
31
|
+
classification,
|
|
32
|
+
promotable: !['blocked_secret', 'sensitive_context'].includes(classification),
|
|
33
|
+
action: actionForClassification(classification),
|
|
34
|
+
};
|
|
35
|
+
}).filter((record) => record.path);
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
generatedAt: normalizeText(input.generatedAt) || new Date().toISOString(),
|
|
39
|
+
storeKind: 'file_backed_agent_memory',
|
|
40
|
+
records,
|
|
41
|
+
policy: {
|
|
42
|
+
export: 'allowed_after_redaction',
|
|
43
|
+
import: 'requires_schema_validation',
|
|
44
|
+
promotion: 'requires_review_and_actionable_context',
|
|
45
|
+
deletion: 'append_decision_journal_entry',
|
|
46
|
+
},
|
|
47
|
+
summary: {
|
|
48
|
+
totalFiles: records.length,
|
|
49
|
+
blocked: records.filter((record) => record.action === 'block').length,
|
|
50
|
+
redactBeforeExport: records.filter((record) => record.action === 'redact_before_export').length,
|
|
51
|
+
promotable: records.filter((record) => record.promotable).length,
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
module.exports = {
|
|
57
|
+
actionForClassification,
|
|
58
|
+
buildMemoryStoreGovernance,
|
|
59
|
+
classifyMemoryFile,
|
|
60
|
+
};
|
|
@@ -36,7 +36,7 @@ const { resolveFeedbackDir } = require('./feedback-paths');
|
|
|
36
36
|
const { parseFeedbackFile, classifySignal, promoteToGates } = require('./feedback-to-rules');
|
|
37
37
|
const { loadAutoGates, saveAutoGates, getAutoGatesPath, patternToGateId } = require('./auto-promote-gates');
|
|
38
38
|
const { readEvolutionState, writeEvolutionState, captureEvolutionSnapshot, applyAcceptedMutation } = require('./evolution-state');
|
|
39
|
-
const { isAvailable,
|
|
39
|
+
const { isAvailable, callClaudeJson, MODELS } = require('./llm-client');
|
|
40
40
|
const { ensureParentDir } = require('./fs-utils');
|
|
41
41
|
|
|
42
42
|
// ---------------------------------------------------------------------------
|
|
@@ -165,24 +165,18 @@ async function generateCandidatesViaLLM(failures, successDef, blockPatterns) {
|
|
|
165
165
|
`Generate ${CANDIDATES_PER_RUN} candidate prevention rules that would catch these failures.`,
|
|
166
166
|
].join('\n\n');
|
|
167
167
|
|
|
168
|
-
const
|
|
168
|
+
const parsed = await callClaudeJson({
|
|
169
169
|
systemPrompt: CANDIDATE_SYSTEM_PROMPT,
|
|
170
170
|
userPrompt,
|
|
171
171
|
model: MODELS.FAST,
|
|
172
172
|
maxTokens: 1200,
|
|
173
|
+
cache: true,
|
|
173
174
|
});
|
|
174
175
|
|
|
175
|
-
if (!
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
if (!Array.isArray(parsed)) return null;
|
|
180
|
-
return parsed
|
|
181
|
-
.filter((r) => r.pattern && r.action && r.message && r.severity)
|
|
182
|
-
.slice(0, CANDIDATES_PER_RUN);
|
|
183
|
-
} catch {
|
|
184
|
-
return null;
|
|
185
|
-
}
|
|
176
|
+
if (!Array.isArray(parsed)) return null;
|
|
177
|
+
return parsed
|
|
178
|
+
.filter((r) => r.pattern && r.action && r.message && r.severity)
|
|
179
|
+
.slice(0, CANDIDATES_PER_RUN);
|
|
186
180
|
}
|
|
187
181
|
|
|
188
182
|
function generateCandidatesHeuristic(failures, blockPatterns) {
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
function normalizeText(value) {
|
|
5
|
+
if (value === undefined || value === null) return '';
|
|
6
|
+
return String(value).trim();
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
function evaluateModelAccessEligibility(input = {}) {
|
|
10
|
+
const model = normalizeText(input.model) || 'unknown';
|
|
11
|
+
const accessType = normalizeText(input.accessType) || 'public';
|
|
12
|
+
const approved = input.approved === true || input.invited === true || input.allowListed === true;
|
|
13
|
+
const maintainerPath = input.openSourceMaintainer === true;
|
|
14
|
+
const gated = /mythos|preview|research|private|invite|glasswing/i.test(`${model} ${accessType}`);
|
|
15
|
+
const issues = [];
|
|
16
|
+
|
|
17
|
+
if (gated && !approved) {
|
|
18
|
+
issues.push('approval_required_before_platform_setup');
|
|
19
|
+
}
|
|
20
|
+
if (gated && !approved && maintainerPath) {
|
|
21
|
+
issues.push('maintainer_path_is_possible_not_guaranteed');
|
|
22
|
+
}
|
|
23
|
+
if (gated && /aws|bedrock|vertex|foundry|azure|gcp/i.test(normalizeText(input.platform)) && !approved) {
|
|
24
|
+
issues.push('platform_docs_do_not_create_model_access');
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
model,
|
|
29
|
+
accessType,
|
|
30
|
+
decision: issues.length === 0 ? 'allow' : 'warn',
|
|
31
|
+
issues,
|
|
32
|
+
fallback: gated && !approved ? 'Use a public model route until approval exists.' : null,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
module.exports = {
|
|
37
|
+
evaluateModelAccessEligibility,
|
|
38
|
+
};
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
function buildModelMigrationPlan(options = {}) {
|
|
4
|
+
const targetModel = options.targetModel || 'gpt-5.5';
|
|
5
|
+
const currentModel = options.currentModel || 'current-codex-default';
|
|
6
|
+
|
|
7
|
+
return {
|
|
8
|
+
targetModel,
|
|
9
|
+
currentModel,
|
|
10
|
+
migrationReason: options.migrationReason || 'better agentic coding, lower token use, and longer research loops',
|
|
11
|
+
benchmarkSuites: [
|
|
12
|
+
'npm run test:high-roi',
|
|
13
|
+
'npm run prove:adapters',
|
|
14
|
+
'npm run prove:automation',
|
|
15
|
+
'npm run self-heal:check',
|
|
16
|
+
],
|
|
17
|
+
evalDimensions: [
|
|
18
|
+
'unsupported_completion_claim_rate',
|
|
19
|
+
'tool_call_accuracy',
|
|
20
|
+
'token_cost_per_verified_task',
|
|
21
|
+
'regression_rate',
|
|
22
|
+
'computer_use_error_rate',
|
|
23
|
+
'research_loop_persistence',
|
|
24
|
+
],
|
|
25
|
+
routingPolicy: {
|
|
26
|
+
lowRisk: 'allow_after_smoke_pass',
|
|
27
|
+
highRisk: 'allow_after_holdout_and_proof_pass',
|
|
28
|
+
destructiveActions: 'human_review_plus_evidence_gate',
|
|
29
|
+
},
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function evaluateModelMigrationResult(result = {}) {
|
|
34
|
+
const issues = [];
|
|
35
|
+
if (!result.targetModel) issues.push('missing_target_model');
|
|
36
|
+
if (!result.baselineModel) issues.push('missing_baseline_model');
|
|
37
|
+
if (!result.highRoiTestsPass) issues.push('high_roi_tests_must_pass');
|
|
38
|
+
if (!result.adapterProofPass) issues.push('adapter_proof_must_pass');
|
|
39
|
+
if (!result.automationProofPass) issues.push('automation_proof_must_pass');
|
|
40
|
+
if (!result.selfHealPass) issues.push('self_heal_must_pass');
|
|
41
|
+
if (!Number.isFinite(result.tokenDeltaPercent)) issues.push('missing_token_delta');
|
|
42
|
+
if (Number(result.regressionCount || 0) > 0) issues.push('model_regressions_present');
|
|
43
|
+
if (result.routeHighRisk && !result.holdoutEvalPass) issues.push('holdout_required_for_high_risk_routing');
|
|
44
|
+
|
|
45
|
+
return {
|
|
46
|
+
decision: issues.length ? 'warn' : 'allow',
|
|
47
|
+
issues,
|
|
48
|
+
canRouteHighRisk: issues.length === 0 && Boolean(result.routeHighRisk),
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
module.exports = {
|
|
53
|
+
buildModelMigrationPlan,
|
|
54
|
+
evaluateModelMigrationResult,
|
|
55
|
+
};
|
|
@@ -603,10 +603,18 @@ function findOpenPrForBranch({ branchName, runner = runGh, env = process.env } =
|
|
|
603
603
|
|
|
604
604
|
function classifyCommand(command) {
|
|
605
605
|
const text = String(command || '').trim();
|
|
606
|
+
const workflowRunMatch = text.match(/\bgh\s+workflow\s+run\s+([^\s]+)/i);
|
|
607
|
+
const refMatch = text.match(/(?:--ref|-r)\s+([^\s]+)/i);
|
|
608
|
+
const fieldArgs = [...text.matchAll(/(?:--field|-f)\s+([A-Za-z0-9_.-]+)=([^\s]+)/gi)]
|
|
609
|
+
.map((match) => ({ name: match[1], value: match[2] }));
|
|
606
610
|
return {
|
|
607
611
|
text,
|
|
608
612
|
isPrCreate: /\bgh\s+pr\s+create\b/i.test(text),
|
|
609
613
|
isPrMerge: /\bgh\s+pr\s+merge\b/i.test(text),
|
|
614
|
+
isWorkflowRun: /\bgh\s+workflow\s+run\b/i.test(text),
|
|
615
|
+
workflowName: workflowRunMatch ? workflowRunMatch[1] : null,
|
|
616
|
+
workflowRef: refMatch ? refMatch[1] : null,
|
|
617
|
+
workflowFields: fieldArgs,
|
|
610
618
|
isPublish: /\b(?:npm|yarn|pnpm)\s+publish\b/i.test(text),
|
|
611
619
|
isReleaseCreate: /\bgh\s+release\s+create\b/i.test(text),
|
|
612
620
|
isTagCreate: /\bgit\s+tag\b/i.test(text),
|
|
@@ -648,23 +656,108 @@ function evaluateOperationalIntegrity(options = {}) {
|
|
|
648
656
|
const commandInfo = classifyCommand(options.command || '');
|
|
649
657
|
const blockers = [];
|
|
650
658
|
|
|
651
|
-
const requiresGovernance = commandInfo.isPrCreate
|
|
659
|
+
const requiresGovernance = commandInfo.isPrCreate
|
|
660
|
+
|| commandInfo.isPrMerge
|
|
661
|
+
|| commandInfo.isWorkflowRun
|
|
662
|
+
|| commandInfo.isPublish
|
|
663
|
+
|| commandInfo.isReleaseCreate
|
|
664
|
+
|| commandInfo.isTagCreate;
|
|
652
665
|
const isPublishLike = commandInfo.isPublish || commandInfo.isReleaseCreate || commandInfo.isTagCreate;
|
|
653
666
|
|
|
654
667
|
if (requiresGovernance && !branchGovernance) {
|
|
655
668
|
blockers.push(buildBlocker(
|
|
656
669
|
'missing_branch_governance',
|
|
657
|
-
'PR, merge, release, and publish actions require explicit branch governance.'
|
|
670
|
+
'PR, workflow dispatch, merge, release, and publish actions require explicit branch governance.'
|
|
658
671
|
));
|
|
659
672
|
}
|
|
660
673
|
|
|
661
674
|
if (branchGovernance && branchGovernance.localOnly === true && requiresGovernance) {
|
|
662
675
|
blockers.push(buildBlocker(
|
|
663
676
|
'local_only_branch',
|
|
664
|
-
'This task is marked local-only. PR, merge, release, and publish actions are blocked.'
|
|
677
|
+
'This task is marked local-only. PR, workflow dispatch, merge, release, and publish actions are blocked.'
|
|
665
678
|
));
|
|
666
679
|
}
|
|
667
680
|
|
|
681
|
+
if (commandInfo.isWorkflowRun) {
|
|
682
|
+
const workflowEvidence = branchGovernance && branchGovernance.workflowDispatch
|
|
683
|
+
&& typeof branchGovernance.workflowDispatch === 'object'
|
|
684
|
+
? branchGovernance.workflowDispatch
|
|
685
|
+
: null;
|
|
686
|
+
const requestedEnvironment = workflowEvidence && workflowEvidence.environment
|
|
687
|
+
? String(workflowEvidence.environment).trim()
|
|
688
|
+
: '';
|
|
689
|
+
const expectedWorkflow = workflowEvidence && workflowEvidence.workflow
|
|
690
|
+
? String(workflowEvidence.workflow).trim()
|
|
691
|
+
: '';
|
|
692
|
+
const expectedRef = workflowEvidence && workflowEvidence.ref
|
|
693
|
+
? String(workflowEvidence.ref).trim()
|
|
694
|
+
: '';
|
|
695
|
+
const expectedSha = workflowEvidence && workflowEvidence.sha
|
|
696
|
+
? String(workflowEvidence.sha).trim()
|
|
697
|
+
: '';
|
|
698
|
+
const expectedJob = workflowEvidence && workflowEvidence.job
|
|
699
|
+
? String(workflowEvidence.job).trim()
|
|
700
|
+
: '';
|
|
701
|
+
|
|
702
|
+
if (!workflowEvidence) {
|
|
703
|
+
blockers.push(buildBlocker(
|
|
704
|
+
'missing_workflow_dispatch_evidence',
|
|
705
|
+
'GitHub Actions workflow dispatch requires explicit workflowDispatch evidence: environment, workflow, ref, sha, and job.'
|
|
706
|
+
));
|
|
707
|
+
}
|
|
708
|
+
if (workflowEvidence && !requestedEnvironment) {
|
|
709
|
+
blockers.push(buildBlocker(
|
|
710
|
+
'missing_workflow_environment',
|
|
711
|
+
'Workflow dispatch requires the requested environment, such as dev, staging, beta, or release.'
|
|
712
|
+
));
|
|
713
|
+
}
|
|
714
|
+
if (workflowEvidence && !expectedWorkflow) {
|
|
715
|
+
blockers.push(buildBlocker(
|
|
716
|
+
'missing_workflow_name',
|
|
717
|
+
'Workflow dispatch requires the expected workflow file name before execution.'
|
|
718
|
+
));
|
|
719
|
+
}
|
|
720
|
+
if (workflowEvidence && expectedWorkflow && commandInfo.workflowName !== expectedWorkflow) {
|
|
721
|
+
blockers.push(buildBlocker(
|
|
722
|
+
'workflow_name_mismatch',
|
|
723
|
+
`Requested ${requestedEnvironment || 'workflow'} dispatch expects ${expectedWorkflow}, but command runs ${commandInfo.workflowName || 'unknown workflow'}.`,
|
|
724
|
+
{ expectedWorkflow, actualWorkflow: commandInfo.workflowName }
|
|
725
|
+
));
|
|
726
|
+
}
|
|
727
|
+
if (workflowEvidence && !expectedRef) {
|
|
728
|
+
blockers.push(buildBlocker(
|
|
729
|
+
'missing_workflow_ref',
|
|
730
|
+
'Workflow dispatch requires an explicit branch/ref before execution.'
|
|
731
|
+
));
|
|
732
|
+
}
|
|
733
|
+
if (workflowEvidence && expectedRef && commandInfo.workflowRef !== expectedRef) {
|
|
734
|
+
blockers.push(buildBlocker(
|
|
735
|
+
'workflow_ref_mismatch',
|
|
736
|
+
`Workflow dispatch expects ref ${expectedRef}, but command uses ${commandInfo.workflowRef || 'no --ref value'}.`,
|
|
737
|
+
{ expectedRef, actualRef: commandInfo.workflowRef }
|
|
738
|
+
));
|
|
739
|
+
}
|
|
740
|
+
if (workflowEvidence && !expectedSha) {
|
|
741
|
+
blockers.push(buildBlocker(
|
|
742
|
+
'missing_workflow_sha',
|
|
743
|
+
'Workflow dispatch requires the HEAD SHA that will be verified after dispatch.'
|
|
744
|
+
));
|
|
745
|
+
}
|
|
746
|
+
if (workflowEvidence && expectedSha && headSha && expectedSha !== headSha) {
|
|
747
|
+
blockers.push(buildBlocker(
|
|
748
|
+
'workflow_sha_mismatch',
|
|
749
|
+
`Workflow dispatch expects SHA ${expectedSha}, but repository HEAD is ${headSha}.`,
|
|
750
|
+
{ expectedSha, headSha }
|
|
751
|
+
));
|
|
752
|
+
}
|
|
753
|
+
if (workflowEvidence && !expectedJob) {
|
|
754
|
+
blockers.push(buildBlocker(
|
|
755
|
+
'missing_workflow_job',
|
|
756
|
+
'Workflow dispatch requires the expected job name to verify before reporting the run URL.'
|
|
757
|
+
));
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
|
|
668
761
|
if (commandInfo.isPrMerge && /--admin\b/i.test(commandInfo.text)) {
|
|
669
762
|
blockers.push(buildBlocker(
|
|
670
763
|
'admin_merge_bypass_forbidden',
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
function buildOtelDeclarativeConfig(input = {}) {
|
|
5
|
+
const serviceName = input.serviceName || 'thumbgate-agent-harness';
|
|
6
|
+
const environment = input.environment || 'production';
|
|
7
|
+
return {
|
|
8
|
+
file: 'otel.yaml',
|
|
9
|
+
envVar: 'OTEL_CONFIG_FILE',
|
|
10
|
+
config: {
|
|
11
|
+
resource: {
|
|
12
|
+
attributes: {
|
|
13
|
+
'service.name': serviceName,
|
|
14
|
+
'deployment.environment': environment,
|
|
15
|
+
},
|
|
16
|
+
},
|
|
17
|
+
traces: {
|
|
18
|
+
sampler: input.sampler || 'parentbased_traceidratio',
|
|
19
|
+
ratio: Number.isFinite(Number(input.ratio)) ? Number(input.ratio) : 0.25,
|
|
20
|
+
dropAttributes: ['authorization', 'cookie', 'x-api-key'],
|
|
21
|
+
},
|
|
22
|
+
metrics: {
|
|
23
|
+
exportIntervalMs: Number.isFinite(Number(input.exportIntervalMs)) ? Number(input.exportIntervalMs) : 60000,
|
|
24
|
+
},
|
|
25
|
+
logs: {
|
|
26
|
+
redactAttributes: ['prompt', 'toolInput', 'secret', 'token'],
|
|
27
|
+
},
|
|
28
|
+
},
|
|
29
|
+
policy: {
|
|
30
|
+
versionControlled: true,
|
|
31
|
+
reviewedBeforeProduction: true,
|
|
32
|
+
dynamicReloadAllowed: input.dynamicReloadAllowed === true,
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function evaluateOtelConfig(config = {}) {
|
|
38
|
+
const issues = [];
|
|
39
|
+
const payload = config.config || config;
|
|
40
|
+
if (!payload.resource?.attributes?.['service.name']) issues.push('missing_service_name');
|
|
41
|
+
if (!payload.traces) issues.push('missing_trace_pipeline');
|
|
42
|
+
if (!payload.metrics) issues.push('missing_metric_pipeline');
|
|
43
|
+
if (!payload.logs) issues.push('missing_log_pipeline');
|
|
44
|
+
if (!Array.isArray(payload.traces?.dropAttributes) || !payload.traces.dropAttributes.includes('authorization')) {
|
|
45
|
+
issues.push('missing_sensitive_trace_attribute_drop');
|
|
46
|
+
}
|
|
47
|
+
return {
|
|
48
|
+
decision: issues.length === 0 ? 'allow' : 'warn',
|
|
49
|
+
issues,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
module.exports = {
|
|
54
|
+
buildOtelDeclarativeConfig,
|
|
55
|
+
evaluateOtelConfig,
|
|
56
|
+
};
|
|
@@ -110,7 +110,7 @@ function extractCitations(response) {
|
|
|
110
110
|
|
|
111
111
|
class PerplexityClient {
|
|
112
112
|
constructor(options = {}) {
|
|
113
|
-
this.apiKey = options.apiKey
|
|
113
|
+
this.apiKey = options.apiKey ?? process.env.PERPLEXITY_API_KEY ?? '';
|
|
114
114
|
this.baseUrl = options.baseUrl || process.env.PERPLEXITY_BASE_URL || DEFAULT_BASE_URL;
|
|
115
115
|
this.fetchFn = options.fetchFn || globalThis.fetch;
|
|
116
116
|
this.timeoutMs = Number(options.timeoutMs || process.env.PERPLEXITY_TIMEOUT_MS || DEFAULT_TIMEOUT_MS);
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
function evaluatePostTrainingPlan(input = {}) {
|
|
5
|
+
const mode = String(input.mode || '').toLowerCase();
|
|
6
|
+
const issues = [];
|
|
7
|
+
if (!['sft', 'rl', 'grpo', 'gspo'].includes(mode)) issues.push('unsupported_post_training_mode');
|
|
8
|
+
if (!input.dataset) issues.push('missing_dataset');
|
|
9
|
+
if (!input.baseCheckpoint) issues.push('missing_base_checkpoint');
|
|
10
|
+
if (input.piiRedacted !== true) issues.push('pii_redaction_required');
|
|
11
|
+
if (input.holdoutEval !== true) issues.push('holdout_eval_required');
|
|
12
|
+
if (input.rewardSpecRequired !== false && ['rl', 'grpo', 'gspo'].includes(mode) && !input.rewardSpec) {
|
|
13
|
+
issues.push('missing_reward_spec');
|
|
14
|
+
}
|
|
15
|
+
if (input.maxSpendCents === undefined) issues.push('missing_spend_cap');
|
|
16
|
+
|
|
17
|
+
return {
|
|
18
|
+
mode,
|
|
19
|
+
decision: issues.length === 0 ? 'allow' : 'warn',
|
|
20
|
+
issues,
|
|
21
|
+
requiredArtifacts: [
|
|
22
|
+
'dataset manifest',
|
|
23
|
+
'PII redaction report',
|
|
24
|
+
'base checkpoint',
|
|
25
|
+
'holdout eval report',
|
|
26
|
+
'spend cap',
|
|
27
|
+
['rl', 'grpo', 'gspo'].includes(mode) ? 'reward specification' : null,
|
|
28
|
+
].filter(Boolean),
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
module.exports = {
|
|
33
|
+
evaluatePostTrainingPlan,
|
|
34
|
+
};
|