thumbgate 0.9.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/README.md +134 -0
- package/.claude-plugin/bundle/icon.png +0 -0
- package/.claude-plugin/bundle/icon.svg +18 -0
- package/.claude-plugin/bundle/server/index.js +24 -0
- package/.claude-plugin/marketplace.json +36 -0
- package/.claude-plugin/plugin.json +21 -0
- package/.well-known/mcp/server-card.json +231 -0
- package/LICENSE +21 -0
- package/README.md +375 -0
- package/adapters/README.md +9 -0
- package/adapters/amp/skills/thumbgate-feedback/SKILL.md +22 -0
- package/adapters/chatgpt/INSTALL.md +83 -0
- package/adapters/chatgpt/openapi.yaml +1281 -0
- package/adapters/claude/.mcp.json +14 -0
- package/adapters/codex/config.toml +9 -0
- package/adapters/gemini/function-declarations.json +224 -0
- package/adapters/mcp/server-stdio.js +788 -0
- package/adapters/opencode/opencode.json +15 -0
- package/bin/cli.js +1484 -0
- package/bin/memory.sh +64 -0
- package/bin/obsidian-sync.sh +20 -0
- package/bin/postinstall.js +37 -0
- package/config/build-metadata.json +4 -0
- package/config/e2e-critical-flows.json +45 -0
- package/config/gate-templates.json +77 -0
- package/config/gates/claim-verification.json +29 -0
- package/config/gates/computer-use.json +39 -0
- package/config/gates/default.json +117 -0
- package/config/github-about.json +25 -0
- package/config/mcp-allowlists.json +135 -0
- package/config/model-tiers.json +33 -0
- package/config/partner-routing.json +132 -0
- package/config/policy-bundles/constrained-v1.json +64 -0
- package/config/policy-bundles/default-v1.json +91 -0
- package/config/rubrics/default-v1.json +52 -0
- package/config/skill-packs/react-testing.json +23 -0
- package/config/skill-packs/stripe-integration/references/api-spec.json +1 -0
- package/config/skill-packs/stripe-integration/references/webhook-guide.md +3 -0
- package/config/skill-specs/pr-reviewer.json +9 -0
- package/config/skill-specs/release-status.json +9 -0
- package/config/skill-specs/ticket-triage.json +9 -0
- package/config/subagent-profiles.json +32 -0
- package/config/tessl-tiles.json +29 -0
- package/config/thumbgate-settings.managed.json +12 -0
- package/openapi/openapi.yaml +1281 -0
- package/package.json +283 -0
- package/plugins/amp-skill/INSTALL.md +52 -0
- package/plugins/amp-skill/SKILL.md +64 -0
- package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +22 -0
- package/plugins/claude-codex-bridge/.mcp.json +12 -0
- package/plugins/claude-codex-bridge/INSTALL.md +43 -0
- package/plugins/claude-codex-bridge/README.md +46 -0
- package/plugins/claude-codex-bridge/scripts/codex-bridge.js +288 -0
- package/plugins/claude-codex-bridge/skills/adversarial-review/SKILL.md +24 -0
- package/plugins/claude-codex-bridge/skills/result/SKILL.md +22 -0
- package/plugins/claude-codex-bridge/skills/review/SKILL.md +28 -0
- package/plugins/claude-codex-bridge/skills/second-pass/SKILL.md +27 -0
- package/plugins/claude-codex-bridge/skills/setup/SKILL.md +21 -0
- package/plugins/claude-codex-bridge/skills/status/SKILL.md +19 -0
- package/plugins/claude-skill/INSTALL.md +55 -0
- package/plugins/claude-skill/SKILL.md +46 -0
- package/plugins/codex-profile/.codex-plugin/plugin.json +43 -0
- package/plugins/codex-profile/.mcp.json +12 -0
- package/plugins/codex-profile/AGENTS.md +20 -0
- package/plugins/codex-profile/INSTALL.md +66 -0
- package/plugins/codex-profile/README.md +37 -0
- package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +23 -0
- package/plugins/cursor-marketplace/CHANGELOG.md +30 -0
- package/plugins/cursor-marketplace/LICENSE +21 -0
- package/plugins/cursor-marketplace/README.md +124 -0
- package/plugins/cursor-marketplace/agents/reliability-reviewer.md +31 -0
- package/plugins/cursor-marketplace/assets/logo-400x400.png +0 -0
- package/plugins/cursor-marketplace/commands/capture-feedback.md +33 -0
- package/plugins/cursor-marketplace/commands/check-gates.md +25 -0
- package/plugins/cursor-marketplace/commands/show-lessons.md +27 -0
- package/plugins/cursor-marketplace/hooks/hooks.json +10 -0
- package/plugins/cursor-marketplace/mcp.json +12 -0
- package/plugins/cursor-marketplace/rules/feedback-capture.mdc +34 -0
- package/plugins/cursor-marketplace/rules/pre-action-gates.mdc +30 -0
- package/plugins/cursor-marketplace/rules/session-continuity.mdc +28 -0
- package/plugins/cursor-marketplace/scripts/gate-check.sh +11 -0
- package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +47 -0
- package/plugins/cursor-marketplace/skills/prevention-rules/SKILL.md +31 -0
- package/plugins/cursor-marketplace/skills/recall-context/SKILL.md +30 -0
- package/plugins/cursor-marketplace/skills/search-lessons/SKILL.md +33 -0
- package/plugins/gemini-extension/INSTALL.md +92 -0
- package/plugins/gemini-extension/gemini_prompt.txt +14 -0
- package/plugins/gemini-extension/tool_contract.json +45 -0
- package/plugins/opencode-profile/INSTALL.md +57 -0
- package/public/assets/instagram-card.png +0 -0
- package/public/assets/tiktok-agent-memory.mp4 +0 -0
- package/public/blog.html +400 -0
- package/public/dashboard.html +1093 -0
- package/public/guide.html +317 -0
- package/public/index.html +1014 -0
- package/public/learn/agent-harness-pattern.html +180 -0
- package/public/learn/ai-agent-persistent-memory.html +202 -0
- package/public/learn/learn.css +45 -0
- package/public/learn/mcp-pre-action-gates-explained.html +172 -0
- package/public/learn/stop-ai-agent-force-push.html +134 -0
- package/public/learn/vibe-coding-safety-net.html +142 -0
- package/public/learn.html +213 -0
- package/public/lessons.html +650 -0
- package/public/vercel.json +8 -0
- package/scripts/__pycache__/train_from_feedback.cpython-312.pyc +0 -0
- package/scripts/a2ui-engine.js +73 -0
- package/scripts/access-anomaly-detector.js +12 -0
- package/scripts/adk-consolidator.js +266 -0
- package/scripts/agent-readiness.js +220 -0
- package/scripts/agent-security-hardening.js +227 -0
- package/scripts/agentic-data-pipeline.js +847 -0
- package/scripts/analytics-report.js +328 -0
- package/scripts/analytics-window.js +158 -0
- package/scripts/async-job-runner.js +1001 -0
- package/scripts/audit-trail.js +398 -0
- package/scripts/auto-promote-gates.js +299 -0
- package/scripts/auto-wire-hooks.js +312 -0
- package/scripts/autonomous-sales-agent.js +39 -0
- package/scripts/autoresearch-runner.js +216 -0
- package/scripts/background-agent-governance.js +237 -0
- package/scripts/behavioral-extraction.js +97 -0
- package/scripts/belief-update.js +84 -0
- package/scripts/billing.js +2438 -0
- package/scripts/bot-detector.js +50 -0
- package/scripts/budget-guard.js +173 -0
- package/scripts/build-claude-mcpb.js +189 -0
- package/scripts/build-metadata.js +97 -0
- package/scripts/check-congruence.js +322 -0
- package/scripts/cli-feedback.js +135 -0
- package/scripts/cli-telemetry.js +87 -0
- package/scripts/cloudflare-dynamic-sandbox.js +315 -0
- package/scripts/code-reasoning.js +350 -0
- package/scripts/codegraph-context.js +466 -0
- package/scripts/commercial-offer.js +56 -0
- package/scripts/computer-use-firewall.js +250 -0
- package/scripts/context-engine.js +694 -0
- package/scripts/contextfs.js +1287 -0
- package/scripts/conversation-context.js +119 -0
- package/scripts/creator-campaigns.js +239 -0
- package/scripts/daemon-manager.js +108 -0
- package/scripts/daily-digest.js +11 -0
- package/scripts/dashboard-render-spec.js +395 -0
- package/scripts/dashboard.js +1058 -0
- package/scripts/data-governance.js +173 -0
- package/scripts/delegation-runtime.js +900 -0
- package/scripts/deploy-gcp.sh +44 -0
- package/scripts/deploy-policy.js +263 -0
- package/scripts/disagreement-mining.js +315 -0
- package/scripts/dispatch-brief.js +159 -0
- package/scripts/distribution-surfaces.js +44 -0
- package/scripts/dpo-optimizer.js +209 -0
- package/scripts/ephemeral-agent-store.js +219 -0
- package/scripts/eval-harness.js +56 -0
- package/scripts/evolution-state.js +241 -0
- package/scripts/experiment-tracker.js +267 -0
- package/scripts/export-databricks-bundle.js +242 -0
- package/scripts/export-dpo-pairs.js +345 -0
- package/scripts/export-kto-pairs.js +310 -0
- package/scripts/export-training.js +448 -0
- package/scripts/failure-diagnostics.js +558 -0
- package/scripts/feedback-attribution.js +313 -0
- package/scripts/feedback-fallback.js +111 -0
- package/scripts/feedback-history-distiller.js +391 -0
- package/scripts/feedback-inbox-read.js +162 -0
- package/scripts/feedback-loop.js +1887 -0
- package/scripts/feedback-paths.js +145 -0
- package/scripts/feedback-quality.js +139 -0
- package/scripts/feedback-root-consolidator.js +238 -0
- package/scripts/feedback-schema.js +426 -0
- package/scripts/feedback-session.js +286 -0
- package/scripts/feedback-to-memory.js +185 -0
- package/scripts/feedback-to-rules.js +163 -0
- package/scripts/filesystem-search.js +404 -0
- package/scripts/funnel-analytics.js +35 -0
- package/scripts/gate-satisfy.js +42 -0
- package/scripts/gate-stats.js +116 -0
- package/scripts/gate-templates.js +70 -0
- package/scripts/gates-engine.js +816 -0
- package/scripts/generate-paperbanana-diagrams.sh +99 -0
- package/scripts/generate-pretool-hook.sh +40 -0
- package/scripts/github-about.js +350 -0
- package/scripts/github-outreach.js +65 -0
- package/scripts/gtm-revenue-loop.js +520 -0
- package/scripts/hallucination-detector.js +226 -0
- package/scripts/hf-papers.js +317 -0
- package/scripts/history-distiller.js +200 -0
- package/scripts/hook-auto-capture.sh +95 -0
- package/scripts/hook-stop-pr-thread-check.sh +68 -0
- package/scripts/hook-stop-self-score.sh +51 -0
- package/scripts/hook-stop-verify-deploy.sh +31 -0
- package/scripts/hook-thumbgate-cache-updater.js +48 -0
- package/scripts/hook-verify-before-done.sh +20 -0
- package/scripts/hosted-config.js +170 -0
- package/scripts/hybrid-feedback-context.js +676 -0
- package/scripts/install-mcp.js +159 -0
- package/scripts/intent-router.js +392 -0
- package/scripts/internal-agent-bootstrap.js +490 -0
- package/scripts/jsonl-watcher.js +155 -0
- package/scripts/lesson-db.js +613 -0
- package/scripts/lesson-inference.js +315 -0
- package/scripts/lesson-retrieval.js +95 -0
- package/scripts/lesson-rotation.js +137 -0
- package/scripts/lesson-search.js +644 -0
- package/scripts/lesson-synthesis.js +196 -0
- package/scripts/license.js +50 -0
- package/scripts/local-model-profile.js +383 -0
- package/scripts/markdown-escape.js +12 -0
- package/scripts/marketing-experiment.js +671 -0
- package/scripts/mcp-config.js +149 -0
- package/scripts/mcp-policy.js +99 -0
- package/scripts/memalign-recall.js +111 -0
- package/scripts/memory-firewall.js +222 -0
- package/scripts/memory-migration.js +296 -0
- package/scripts/meta-policy.js +194 -0
- package/scripts/metered-billing.js +16 -0
- package/scripts/model-tier-router.js +301 -0
- package/scripts/money-watcher.js +71 -0
- package/scripts/multi-hop-recall.js +240 -0
- package/scripts/natural-language-harness.js +330 -0
- package/scripts/obsidian-export.js +712 -0
- package/scripts/operational-dashboard.js +103 -0
- package/scripts/operational-summary.js +93 -0
- package/scripts/optimize-context.js +17 -0
- package/scripts/org-dashboard.js +201 -0
- package/scripts/partner-orchestration.js +146 -0
- package/scripts/per-step-scoring.js +165 -0
- package/scripts/perplexity-marketing.js +466 -0
- package/scripts/pii-scanner.js +153 -0
- package/scripts/plan-gate.js +154 -0
- package/scripts/post-everywhere.js +308 -0
- package/scripts/post-to-x-retry.sh +22 -0
- package/scripts/post-to-x.js +369 -0
- package/scripts/pr-manager.js +236 -0
- package/scripts/predictive-insights.js +356 -0
- package/scripts/principle-extractor.js +162 -0
- package/scripts/pro-features.js +40 -0
- package/scripts/pro-local-dashboard.js +174 -0
- package/scripts/problem-detail.js +53 -0
- package/scripts/product-feedback.js +134 -0
- package/scripts/profile-router.js +245 -0
- package/scripts/prompt-dlp.js +221 -0
- package/scripts/prompt-guard.js +83 -0
- package/scripts/prove-adapters.js +863 -0
- package/scripts/prove-attribution.js +365 -0
- package/scripts/prove-automation.js +653 -0
- package/scripts/prove-autoresearch.js +304 -0
- package/scripts/prove-claim-verification.js +277 -0
- package/scripts/prove-cloudflare-sandbox.js +163 -0
- package/scripts/prove-data-pipeline.js +410 -0
- package/scripts/prove-data-quality.js +227 -0
- package/scripts/prove-evolution.js +352 -0
- package/scripts/prove-harnesses.js +287 -0
- package/scripts/prove-intelligence.js +259 -0
- package/scripts/prove-lancedb.js +371 -0
- package/scripts/prove-local-intelligence.js +342 -0
- package/scripts/prove-loop-closure.js +263 -0
- package/scripts/prove-predictive-insights.js +357 -0
- package/scripts/prove-runtime.js +350 -0
- package/scripts/prove-seo-gsd.js +234 -0
- package/scripts/prove-settings.js +279 -0
- package/scripts/prove-subway-upgrades.js +277 -0
- package/scripts/prove-tessl.js +229 -0
- package/scripts/prove-training-export.js +327 -0
- package/scripts/prove-workflow-contract.js +116 -0
- package/scripts/prove-xmemory.js +332 -0
- package/scripts/publish-decision.js +133 -0
- package/scripts/pulse.js +80 -0
- package/scripts/rate-limiter.js +125 -0
- package/scripts/reddit-dm-outreach.js +182 -0
- package/scripts/reddit-monitor-cron.sh +26 -0
- package/scripts/reflector-agent.js +221 -0
- package/scripts/reminder-engine.js +132 -0
- package/scripts/revenue-status.js +472 -0
- package/scripts/risk-scorer.js +458 -0
- package/scripts/rlaif-self-audit.js +129 -0
- package/scripts/rubric-engine.js +230 -0
- package/scripts/schedule-manager.js +251 -0
- package/scripts/secret-scanner.js +414 -0
- package/scripts/self-heal.js +147 -0
- package/scripts/self-healing-check.js +188 -0
- package/scripts/semantic-layer.js +98 -0
- package/scripts/seo-gsd.js +1153 -0
- package/scripts/settings-hierarchy.js +214 -0
- package/scripts/shieldcortex-memory-firewall-runner.mjs +53 -0
- package/scripts/skill-exporter.js +262 -0
- package/scripts/skill-generator.js +446 -0
- package/scripts/skill-materializer.js +134 -0
- package/scripts/skill-packs.js +136 -0
- package/scripts/skill-proposer.js +99 -0
- package/scripts/skill-quality-tracker.js +284 -0
- package/scripts/slo-alert-engine.js +14 -0
- package/scripts/slow-loop.js +72 -0
- package/scripts/social-analytics/db/schema.sql +32 -0
- package/scripts/social-analytics/digest.js +256 -0
- package/scripts/social-analytics/generate-instagram-card.js +97 -0
- package/scripts/social-analytics/instagram-thumbgate-post.js +73 -0
- package/scripts/social-analytics/mcp-server.js +289 -0
- package/scripts/social-analytics/normalizer.js +580 -0
- package/scripts/social-analytics/notify.js +162 -0
- package/scripts/social-analytics/poll-all.js +107 -0
- package/scripts/social-analytics/pollers/github.js +195 -0
- package/scripts/social-analytics/pollers/instagram.js +253 -0
- package/scripts/social-analytics/pollers/linkedin.js +330 -0
- package/scripts/social-analytics/pollers/plausible.js +247 -0
- package/scripts/social-analytics/pollers/reddit.js +306 -0
- package/scripts/social-analytics/pollers/threads.js +233 -0
- package/scripts/social-analytics/pollers/tiktok.js +203 -0
- package/scripts/social-analytics/pollers/x.js +227 -0
- package/scripts/social-analytics/pollers/youtube.js +304 -0
- package/scripts/social-analytics/pollers/zernio.js +180 -0
- package/scripts/social-analytics/publish-instagram-thumbgate.js +85 -0
- package/scripts/social-analytics/publishers/devto.js +122 -0
- package/scripts/social-analytics/publishers/instagram.js +317 -0
- package/scripts/social-analytics/publishers/linkedin.js +294 -0
- package/scripts/social-analytics/publishers/reddit.js +390 -0
- package/scripts/social-analytics/publishers/threads.js +275 -0
- package/scripts/social-analytics/publishers/tiktok.js +217 -0
- package/scripts/social-analytics/publishers/x.js +259 -0
- package/scripts/social-analytics/publishers/youtube.js +223 -0
- package/scripts/social-analytics/publishers/zernio.js +209 -0
- package/scripts/social-analytics/run-digest.js +34 -0
- package/scripts/social-analytics/store.js +257 -0
- package/scripts/social-analytics/utm.js +143 -0
- package/scripts/social-pipeline.js +2628 -0
- package/scripts/social-quality-gate.js +18 -0
- package/scripts/social-reply-monitor.js +445 -0
- package/scripts/status-dashboard.js +155 -0
- package/scripts/statusline-lesson.js +16 -0
- package/scripts/statusline-tower.js +8 -0
- package/scripts/statusline.sh +116 -0
- package/scripts/stripe-live-status.js +115 -0
- package/scripts/subagent-profiles.js +79 -0
- package/scripts/sync-gh-secrets-from-env.sh +70 -0
- package/scripts/sync-github-about.js +52 -0
- package/scripts/sync-version.js +451 -0
- package/scripts/synthetic-dpo.js +234 -0
- package/scripts/telemetry-analytics.js +821 -0
- package/scripts/tessl-export.js +371 -0
- package/scripts/test-coverage.js +120 -0
- package/scripts/thompson-sampling.js +417 -0
- package/scripts/thumbgate-search.js +189 -0
- package/scripts/tool-kpi-tracker.js +12 -0
- package/scripts/tool-registry.js +811 -0
- package/scripts/train_from_feedback.py +910 -0
- package/scripts/user-profile.js +78 -0
- package/scripts/validate-feedback.js +580 -0
- package/scripts/validate-workflow-contract.js +287 -0
- package/scripts/vector-store.js +198 -0
- package/scripts/verification-loop.js +291 -0
- package/scripts/verify-obsidian-setup.sh +269 -0
- package/scripts/verify-run.js +269 -0
- package/scripts/webhook-delivery.js +62 -0
- package/scripts/weekly-auto-post.js +124 -0
- package/scripts/workflow-runs.js +154 -0
- package/scripts/workflow-sprint-intake.js +475 -0
- package/scripts/workspace-evolver.js +374 -0
- package/scripts/x-autonomous-marketing.js +139 -0
- package/scripts/xmemory-lite.js +405 -0
- package/skills/agent-memory/SKILL.md +97 -0
- package/skills/solve-architecture-autonomy/SKILL.md +17 -0
- package/skills/solve-architecture-autonomy/tool.js +33 -0
- package/skills/thumbgate/SKILL.md +114 -0
- package/skills/thumbgate-feedback/SKILL.md +49 -0
- package/src/api/server.js +4208 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const crypto = require('crypto');
|
|
5
|
+
|
|
6
|
+
const { bootstrapInternalAgent } = require('./internal-agent-bootstrap');
|
|
7
|
+
|
|
8
|
+
const DYNAMIC_WORKLOADS = new Set([
|
|
9
|
+
'analytics_transform',
|
|
10
|
+
'code_mode',
|
|
11
|
+
'creator_analytics',
|
|
12
|
+
'history_distillation',
|
|
13
|
+
'lesson_synthesis',
|
|
14
|
+
'workflow_triage',
|
|
15
|
+
]);
|
|
16
|
+
|
|
17
|
+
const DEFAULT_MAX_SKEW_MS = 5 * 60 * 1000;
|
|
18
|
+
const DEFAULT_SANDBOX_ROUTE = '/sandbox/execute';
|
|
19
|
+
|
|
20
|
+
function normalizeText(value) {
|
|
21
|
+
if (value === undefined || value === null) return '';
|
|
22
|
+
return String(value).trim();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function normalizeBoolean(value) {
|
|
26
|
+
if (typeof value === 'boolean') return value;
|
|
27
|
+
if (value === undefined || value === null) return false;
|
|
28
|
+
const normalized = normalizeText(value).toLowerCase();
|
|
29
|
+
return ['1', 'true', 'yes', 'on'].includes(normalized);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function normalizeNumber(value, fallback = 0) {
|
|
33
|
+
const parsed = Number(value);
|
|
34
|
+
return Number.isFinite(parsed) ? parsed : fallback;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function normalizeStringArray(values = []) {
|
|
38
|
+
if (!Array.isArray(values)) return [];
|
|
39
|
+
return Array.from(new Set(
|
|
40
|
+
values
|
|
41
|
+
.map((value) => normalizeText(value).toLowerCase())
|
|
42
|
+
.filter(Boolean),
|
|
43
|
+
));
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function normalizeTier(value) {
|
|
47
|
+
const normalized = normalizeText(value).toLowerCase();
|
|
48
|
+
return ['free', 'pro', 'team', 'enterprise'].includes(normalized)
|
|
49
|
+
? normalized
|
|
50
|
+
: 'pro';
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function stableStringify(value) {
|
|
54
|
+
if (Array.isArray(value)) {
|
|
55
|
+
return `[${value.map((entry) => stableStringify(entry)).join(',')}]`;
|
|
56
|
+
}
|
|
57
|
+
if (value && typeof value === 'object') {
|
|
58
|
+
return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`).join(',')}}`;
|
|
59
|
+
}
|
|
60
|
+
return JSON.stringify(value);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function buildExecutionId() {
|
|
64
|
+
return `cfw_${Date.now().toString(36)}_${crypto.randomBytes(4).toString('hex')}`;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function buildNetworkPolicy(request) {
|
|
68
|
+
const allowedHosts = normalizeStringArray(request.allowedHosts);
|
|
69
|
+
if (!request.requiresNetwork) {
|
|
70
|
+
return {
|
|
71
|
+
mode: 'deny_all',
|
|
72
|
+
allowedHosts: [],
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
return {
|
|
76
|
+
mode: allowedHosts.length > 0 ? 'allow_list' : 'egress_enabled',
|
|
77
|
+
allowedHosts,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function buildBindings(request) {
|
|
82
|
+
const bindings = ['MEMORY_KV'];
|
|
83
|
+
if (
|
|
84
|
+
request.workloadType === 'history_distillation' ||
|
|
85
|
+
request.workloadType === 'lesson_synthesis' ||
|
|
86
|
+
request.workloadType === 'workflow_triage'
|
|
87
|
+
) {
|
|
88
|
+
bindings.push('GATES_KV');
|
|
89
|
+
}
|
|
90
|
+
return bindings;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function summarizeRequest(request) {
|
|
94
|
+
return {
|
|
95
|
+
workloadType: request.workloadType,
|
|
96
|
+
tier: request.tier,
|
|
97
|
+
tenantId: request.tenantId,
|
|
98
|
+
requiresIsolation: request.requiresIsolation,
|
|
99
|
+
requiresNetwork: request.requiresNetwork,
|
|
100
|
+
requiresRepoAccess: request.requiresRepoAccess,
|
|
101
|
+
contextTokens: request.contextTokens,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function normalizeRequest(input = {}) {
|
|
106
|
+
const workloadType = normalizeText(input.workloadType || input.taskType || input.kind)
|
|
107
|
+
.toLowerCase()
|
|
108
|
+
.replace(/[^a-z0-9]+/g, '_')
|
|
109
|
+
.replace(/^_+|_+$/g, '') || 'generic_automation';
|
|
110
|
+
const providerPreference = normalizeText(input.providerPreference).toLowerCase() || 'auto';
|
|
111
|
+
const repoPath = normalizeText(input.repoPath) || '';
|
|
112
|
+
const requiresRepoAccess = normalizeBoolean(input.requiresRepoAccess)
|
|
113
|
+
|| normalizeBoolean(input.localFileAccess)
|
|
114
|
+
|| Boolean(repoPath);
|
|
115
|
+
const untrustedCode = normalizeBoolean(input.untrustedCode);
|
|
116
|
+
const tier = normalizeTier(input.tier);
|
|
117
|
+
const tenantId = normalizeText(input.tenantId || input.teamId || input.customerId) || null;
|
|
118
|
+
const requiresIsolation = normalizeBoolean(input.requiresIsolation)
|
|
119
|
+
|| untrustedCode
|
|
120
|
+
|| tier === 'team'
|
|
121
|
+
|| tier === 'enterprise'
|
|
122
|
+
|| Boolean(tenantId);
|
|
123
|
+
const contextTokens = normalizeNumber(input.contextTokens, 0);
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
source: normalizeText(input.source) || 'api',
|
|
127
|
+
workloadType,
|
|
128
|
+
providerPreference,
|
|
129
|
+
tier,
|
|
130
|
+
tenantId,
|
|
131
|
+
repoPath,
|
|
132
|
+
requiresRepoAccess,
|
|
133
|
+
requiresIsolation,
|
|
134
|
+
requiresNetwork: normalizeBoolean(input.requiresNetwork),
|
|
135
|
+
untrustedCode,
|
|
136
|
+
contextTokens,
|
|
137
|
+
allowedHosts: normalizeStringArray(input.allowedHosts || input.egressAllowlist),
|
|
138
|
+
traceId: normalizeText(input.traceId) || null,
|
|
139
|
+
context: normalizeText(input.context) || '',
|
|
140
|
+
intentId: normalizeText(input.intentId) || '',
|
|
141
|
+
mcpProfile: normalizeText(input.mcpProfile) || undefined,
|
|
142
|
+
partnerProfile: normalizeText(input.partnerProfile) || undefined,
|
|
143
|
+
delegationMode: normalizeText(input.delegationMode) || 'auto',
|
|
144
|
+
approved: input.approved === true,
|
|
145
|
+
trigger: input.trigger || undefined,
|
|
146
|
+
thread: input.thread || undefined,
|
|
147
|
+
task: input.task || undefined,
|
|
148
|
+
comments: Array.isArray(input.comments) ? input.comments : undefined,
|
|
149
|
+
messages: Array.isArray(input.messages) ? input.messages : undefined,
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function classifyHostedExecution(input = {}) {
|
|
154
|
+
const request = normalizeRequest(input);
|
|
155
|
+
|
|
156
|
+
if (request.providerPreference === 'railway') {
|
|
157
|
+
return {
|
|
158
|
+
provider: 'railway_control_plane',
|
|
159
|
+
reason: 'provider preference pinned to Railway control plane',
|
|
160
|
+
request,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (request.requiresRepoAccess) {
|
|
165
|
+
return {
|
|
166
|
+
provider: 'railway_control_plane',
|
|
167
|
+
reason: 'task requires repo or local filesystem access',
|
|
168
|
+
request,
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if (
|
|
173
|
+
request.providerPreference === 'cloudflare'
|
|
174
|
+
|| request.requiresIsolation
|
|
175
|
+
|| DYNAMIC_WORKLOADS.has(request.workloadType)
|
|
176
|
+
|| request.contextTokens >= 120000
|
|
177
|
+
) {
|
|
178
|
+
return {
|
|
179
|
+
provider: 'cloudflare_dynamic_worker',
|
|
180
|
+
reason: request.providerPreference === 'cloudflare'
|
|
181
|
+
? 'provider preference explicitly requested Cloudflare dynamic workers'
|
|
182
|
+
: 'hosted isolated workload benefits from edge sandbox execution',
|
|
183
|
+
request,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return {
|
|
188
|
+
provider: 'railway_control_plane',
|
|
189
|
+
reason: 'standard hosted workload remains on the Railway control plane',
|
|
190
|
+
request,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function signDispatchEnvelope(bodyText, secret, timestamp) {
|
|
195
|
+
return crypto
|
|
196
|
+
.createHmac('sha256', String(secret || ''))
|
|
197
|
+
.update(`${timestamp}.${bodyText}`)
|
|
198
|
+
.digest('hex');
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
function verifyDispatchEnvelope({
|
|
202
|
+
body,
|
|
203
|
+
secret,
|
|
204
|
+
timestamp,
|
|
205
|
+
signature,
|
|
206
|
+
now = Date.now(),
|
|
207
|
+
maxSkewMs = DEFAULT_MAX_SKEW_MS,
|
|
208
|
+
}) {
|
|
209
|
+
if (!secret || !timestamp || !signature) return false;
|
|
210
|
+
const issuedAt = Date.parse(timestamp);
|
|
211
|
+
if (!Number.isFinite(issuedAt)) return false;
|
|
212
|
+
if (Math.abs(Number(now) - issuedAt) > maxSkewMs) return false;
|
|
213
|
+
const bodyText = typeof body === 'string' ? body : stableStringify(body);
|
|
214
|
+
const expected = signDispatchEnvelope(bodyText, secret, timestamp);
|
|
215
|
+
const expectedBuffer = Buffer.from(expected, 'hex');
|
|
216
|
+
const actualBuffer = Buffer.from(String(signature), 'hex');
|
|
217
|
+
if (expectedBuffer.length !== actualBuffer.length) return false;
|
|
218
|
+
return crypto.timingSafeEqual(expectedBuffer, actualBuffer);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function buildCloudflareSandboxPlan(input = {}, options = {}) {
|
|
222
|
+
const classification = classifyHostedExecution(input);
|
|
223
|
+
const request = classification.request;
|
|
224
|
+
|
|
225
|
+
if (classification.provider !== 'cloudflare_dynamic_worker') {
|
|
226
|
+
return {
|
|
227
|
+
provider: 'railway_control_plane',
|
|
228
|
+
shouldDispatch: false,
|
|
229
|
+
reason: classification.reason,
|
|
230
|
+
route: null,
|
|
231
|
+
request: summarizeRequest(request),
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const timestamp = options.now instanceof Date
|
|
236
|
+
? options.now.toISOString()
|
|
237
|
+
: (normalizeText(options.now) || new Date().toISOString());
|
|
238
|
+
const executionId = normalizeText(options.executionId) || buildExecutionId();
|
|
239
|
+
const secret = options.sharedSecret
|
|
240
|
+
|| process.env.CLOUDFLARE_SANDBOX_SHARED_SECRET
|
|
241
|
+
|| process.env.THUMBGATE_CLOUDFLARE_SANDBOX_SECRET
|
|
242
|
+
|| '';
|
|
243
|
+
const networkPolicy = buildNetworkPolicy(request);
|
|
244
|
+
const bindings = buildBindings(request);
|
|
245
|
+
const bootstrap = options.includeBootstrap === false
|
|
246
|
+
? null
|
|
247
|
+
: bootstrapInternalAgent({
|
|
248
|
+
source: request.source,
|
|
249
|
+
prepareSandbox: false,
|
|
250
|
+
intentId: request.intentId,
|
|
251
|
+
context: request.context,
|
|
252
|
+
mcpProfile: request.mcpProfile,
|
|
253
|
+
partnerProfile: request.partnerProfile,
|
|
254
|
+
delegationMode: request.delegationMode,
|
|
255
|
+
approved: request.approved,
|
|
256
|
+
trigger: request.trigger,
|
|
257
|
+
thread: request.thread,
|
|
258
|
+
task: request.task,
|
|
259
|
+
comments: request.comments,
|
|
260
|
+
messages: request.messages,
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
const envelope = {
|
|
264
|
+
executionId,
|
|
265
|
+
provider: 'cloudflare_dynamic_worker',
|
|
266
|
+
workloadType: request.workloadType,
|
|
267
|
+
tier: request.tier,
|
|
268
|
+
tenantId: request.tenantId,
|
|
269
|
+
traceId: request.traceId || executionId,
|
|
270
|
+
requestedAt: timestamp,
|
|
271
|
+
networkPolicy,
|
|
272
|
+
bindings,
|
|
273
|
+
limits: {
|
|
274
|
+
maxRuntimeMs: request.requiresNetwork ? 60000 : 30000,
|
|
275
|
+
maxContextTokens: request.contextTokens || null,
|
|
276
|
+
},
|
|
277
|
+
bootstrap: bootstrap ? {
|
|
278
|
+
invocation: bootstrap.invocation,
|
|
279
|
+
startupContext: bootstrap.startupContext,
|
|
280
|
+
reviewerLane: bootstrap.reviewerLane,
|
|
281
|
+
middlewarePlan: bootstrap.middlewarePlan,
|
|
282
|
+
intentPlan: bootstrap.intentPlan,
|
|
283
|
+
} : null,
|
|
284
|
+
};
|
|
285
|
+
|
|
286
|
+
const bodyText = stableStringify(envelope);
|
|
287
|
+
const signature = secret ? signDispatchEnvelope(bodyText, secret, timestamp) : '';
|
|
288
|
+
|
|
289
|
+
return {
|
|
290
|
+
provider: 'cloudflare_dynamic_worker',
|
|
291
|
+
shouldDispatch: true,
|
|
292
|
+
reason: classification.reason,
|
|
293
|
+
route: normalizeText(options.route) || DEFAULT_SANDBOX_ROUTE,
|
|
294
|
+
request: summarizeRequest(request),
|
|
295
|
+
executionId,
|
|
296
|
+
envelope,
|
|
297
|
+
headers: {
|
|
298
|
+
'x-thumbgate-sandbox-timestamp': timestamp,
|
|
299
|
+
'x-thumbgate-sandbox-signature': signature,
|
|
300
|
+
},
|
|
301
|
+
signatureReady: Boolean(signature),
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
module.exports = {
|
|
306
|
+
DEFAULT_MAX_SKEW_MS,
|
|
307
|
+
DEFAULT_SANDBOX_ROUTE,
|
|
308
|
+
DYNAMIC_WORKLOADS,
|
|
309
|
+
stableStringify,
|
|
310
|
+
normalizeRequest,
|
|
311
|
+
classifyHostedExecution,
|
|
312
|
+
buildCloudflareSandboxPlan,
|
|
313
|
+
signDispatchEnvelope,
|
|
314
|
+
verifyDispatchEnvelope,
|
|
315
|
+
};
|
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Agentic Code Reasoning — Structured Trace Engine
|
|
4
|
+
*
|
|
5
|
+
* Based on Meta's "Agentic Code Reasoning" paper (arxiv 2603.01896).
|
|
6
|
+
* Forces structured line-level reasoning instead of pattern-matching guesses.
|
|
7
|
+
*
|
|
8
|
+
* Produces a verification trace for every code change claim, self-heal fix,
|
|
9
|
+
* or DPO pair, requiring explicit evidence for each assertion.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const crypto = require('node:crypto');
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* @typedef {Object} TraceStep
|
|
16
|
+
* @property {string} location - File path and line range (e.g. "scripts/self-heal.js:49-69")
|
|
17
|
+
* @property {string} claim - What this step asserts about correctness
|
|
18
|
+
* @property {string} evidence - Concrete evidence supporting the claim
|
|
19
|
+
* @property {'verified'|'unverified'|'refuted'} verdict - Assessment of the claim
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* @typedef {Object} ReasoningTrace
|
|
24
|
+
* @property {string} traceId - Unique identifier for this trace
|
|
25
|
+
* @property {string} timestamp - ISO 8601 timestamp
|
|
26
|
+
* @property {string} type - Trace type: 'self-heal' | 'dpo-pair' | 'proof-gate' | 'verification'
|
|
27
|
+
* @property {string} subject - What is being verified (script name, pair ID, etc.)
|
|
28
|
+
* @property {TraceStep[]} steps - Ordered reasoning steps
|
|
29
|
+
* @property {string[]} edgeCases - Edge cases explicitly addressed or ruled out
|
|
30
|
+
* @property {Object} summary - Aggregated verdict
|
|
31
|
+
* @property {number} summary.totalSteps - Total reasoning steps
|
|
32
|
+
* @property {number} summary.verified - Steps with verified verdict
|
|
33
|
+
* @property {number} summary.unverified - Steps with unverified verdict
|
|
34
|
+
* @property {number} summary.refuted - Steps with refuted verdict
|
|
35
|
+
* @property {number} summary.confidence - Ratio of verified to total (0-1)
|
|
36
|
+
* @property {boolean} summary.passed - True if confidence >= threshold and refuted === 0
|
|
37
|
+
*/
|
|
38
|
+
|
|
39
|
+
const DEFAULT_CONFIDENCE_THRESHOLD = 0.7;
|
|
40
|
+
|
|
41
|
+
function generateTraceId() {
|
|
42
|
+
return `trace-${crypto.randomBytes(6).toString('hex')}`;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function createTrace(type, subject) {
|
|
46
|
+
return {
|
|
47
|
+
traceId: generateTraceId(),
|
|
48
|
+
timestamp: new Date().toISOString(),
|
|
49
|
+
type,
|
|
50
|
+
subject,
|
|
51
|
+
steps: [],
|
|
52
|
+
edgeCases: [],
|
|
53
|
+
summary: null,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function addStep(trace, { location, claim, evidence, verdict = 'unverified' }) {
|
|
58
|
+
if (!location || !claim) {
|
|
59
|
+
throw new Error('TraceStep requires location and claim');
|
|
60
|
+
}
|
|
61
|
+
const validVerdicts = ['verified', 'unverified', 'refuted'];
|
|
62
|
+
if (!validVerdicts.includes(verdict)) {
|
|
63
|
+
throw new Error(`Invalid verdict: ${verdict}. Must be one of: ${validVerdicts.join(', ')}`);
|
|
64
|
+
}
|
|
65
|
+
trace.steps.push({ location, claim, evidence: evidence || '', verdict });
|
|
66
|
+
return trace;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function addEdgeCase(trace, description) {
|
|
70
|
+
if (description) trace.edgeCases.push(description);
|
|
71
|
+
return trace;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function computeControllability(trace) {
|
|
75
|
+
const steps = trace.steps;
|
|
76
|
+
const edgeCases = trace.edgeCases;
|
|
77
|
+
if (steps.length === 0) return { score: 0, flags: ['empty_trace'] };
|
|
78
|
+
|
|
79
|
+
const flags = [];
|
|
80
|
+
const allVerified = steps.every((s) => s.verdict === 'verified');
|
|
81
|
+
const allSameEvidence = new Set(steps.map((s) => s.evidence)).size === 1 && steps.length > 1;
|
|
82
|
+
const shortEvidence = steps.filter((s) => s.evidence.length < 10).length;
|
|
83
|
+
const noEdgeCases = edgeCases.length === 0;
|
|
84
|
+
|
|
85
|
+
if (allVerified && steps.length > 2) flags.push('all_verified');
|
|
86
|
+
if (allSameEvidence) flags.push('identical_evidence');
|
|
87
|
+
if (shortEvidence > steps.length / 2) flags.push('thin_evidence');
|
|
88
|
+
if (noEdgeCases && steps.length > 1) flags.push('no_edge_cases');
|
|
89
|
+
|
|
90
|
+
const score = Math.round((flags.length / 4) * 1000) / 1000;
|
|
91
|
+
return { score, flags };
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function finalizeTrace(trace, { confidenceThreshold = DEFAULT_CONFIDENCE_THRESHOLD } = {}) {
|
|
95
|
+
const totalSteps = trace.steps.length;
|
|
96
|
+
const verified = trace.steps.filter((s) => s.verdict === 'verified').length;
|
|
97
|
+
const unverified = trace.steps.filter((s) => s.verdict === 'unverified').length;
|
|
98
|
+
const refuted = trace.steps.filter((s) => s.verdict === 'refuted').length;
|
|
99
|
+
const confidence = totalSteps > 0 ? Math.round((verified / totalSteps) * 1000) / 1000 : 0;
|
|
100
|
+
const ctrl = computeControllability(trace);
|
|
101
|
+
|
|
102
|
+
trace.summary = {
|
|
103
|
+
totalSteps,
|
|
104
|
+
verified,
|
|
105
|
+
unverified,
|
|
106
|
+
refuted,
|
|
107
|
+
confidence,
|
|
108
|
+
passed: confidence >= confidenceThreshold && refuted === 0,
|
|
109
|
+
controllability: ctrl.score,
|
|
110
|
+
controllabilityFlags: ctrl.flags,
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
return trace;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Build a reasoning trace for a self-heal fix execution.
|
|
118
|
+
*
|
|
119
|
+
* @param {Object} fixResult - Result from runFixPlan for a single script
|
|
120
|
+
* @param {string} fixResult.script - Script name
|
|
121
|
+
* @param {string} fixResult.status - 'success' | 'failed'
|
|
122
|
+
* @param {number} fixResult.exitCode
|
|
123
|
+
* @param {string} fixResult.outputTail - Last 2000 chars of output
|
|
124
|
+
* @param {string[]} changedFiles - Files changed by this fix
|
|
125
|
+
* @returns {ReasoningTrace}
|
|
126
|
+
*/
|
|
127
|
+
function traceForSelfHealFix(fixResult, changedFiles = []) {
|
|
128
|
+
const trace = createTrace('self-heal', fixResult.script);
|
|
129
|
+
|
|
130
|
+
addStep(trace, {
|
|
131
|
+
location: `npm run ${fixResult.script}`,
|
|
132
|
+
claim: `Fix script "${fixResult.script}" executes without error`,
|
|
133
|
+
evidence: fixResult.status === 'success'
|
|
134
|
+
? `Exit code ${fixResult.exitCode}, completed in ${fixResult.durationMs}ms`
|
|
135
|
+
: `Exit code ${fixResult.exitCode}, error: ${fixResult.error || 'non-zero exit'}`,
|
|
136
|
+
verdict: fixResult.status === 'success' ? 'verified' : 'refuted',
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
if (changedFiles.length > 0) {
|
|
140
|
+
addStep(trace, {
|
|
141
|
+
location: changedFiles.join(', '),
|
|
142
|
+
claim: `Fix modified ${changedFiles.length} file(s) — changes are intentional`,
|
|
143
|
+
evidence: `Changed: ${changedFiles.join(', ')}`,
|
|
144
|
+
verdict: 'verified',
|
|
145
|
+
});
|
|
146
|
+
} else {
|
|
147
|
+
addStep(trace, {
|
|
148
|
+
location: `npm run ${fixResult.script}`,
|
|
149
|
+
claim: 'Fix produced no file changes (idempotent or no-op)',
|
|
150
|
+
evidence: 'git diff --name-only returned empty after execution',
|
|
151
|
+
verdict: 'verified',
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const outputTail = (fixResult.outputTail || '').toLowerCase();
|
|
156
|
+
const hasErrors = /error|fail|exception|fatal/i.test(outputTail);
|
|
157
|
+
addStep(trace, {
|
|
158
|
+
location: `npm run ${fixResult.script} (output)`,
|
|
159
|
+
claim: 'Script output contains no error indicators',
|
|
160
|
+
evidence: hasErrors
|
|
161
|
+
? `Output contains error keywords: ${outputTail.slice(-200)}`
|
|
162
|
+
: 'No error keywords in output tail',
|
|
163
|
+
verdict: hasErrors && fixResult.status === 'success' ? 'unverified' : (hasErrors ? 'refuted' : 'verified'),
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
addEdgeCase(trace, 'Script timeout not triggered (completed within deadline)');
|
|
167
|
+
if (changedFiles.length === 0) {
|
|
168
|
+
addEdgeCase(trace, 'No files changed — fix may already be applied or script is no-op');
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return finalizeTrace(trace);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Build a reasoning trace for a DPO preference pair.
|
|
176
|
+
*
|
|
177
|
+
* @param {Object} pair - The DPO pair with prompt, chosen, rejected, metadata
|
|
178
|
+
* @returns {ReasoningTrace}
|
|
179
|
+
*/
|
|
180
|
+
function traceForDpoPair(pair) {
|
|
181
|
+
const trace = createTrace('dpo-pair', `${pair.metadata.errorId}->${pair.metadata.learningId}`);
|
|
182
|
+
|
|
183
|
+
addStep(trace, {
|
|
184
|
+
location: `error:${pair.metadata.errorId}`,
|
|
185
|
+
claim: 'Rejected response represents a genuine mistake',
|
|
186
|
+
evidence: `Error title: "${pair.metadata.errorTitle}"`,
|
|
187
|
+
verdict: pair.metadata.errorTitle ? 'verified' : 'unverified',
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
addStep(trace, {
|
|
191
|
+
location: `learning:${pair.metadata.learningId}`,
|
|
192
|
+
claim: 'Chosen response represents a correct approach',
|
|
193
|
+
evidence: `Learning title: "${pair.metadata.learningTitle}"`,
|
|
194
|
+
verdict: pair.metadata.learningTitle ? 'verified' : 'unverified',
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
const matchedKeys = pair.metadata.matchedKeys || [];
|
|
198
|
+
addStep(trace, {
|
|
199
|
+
location: 'domain-matching',
|
|
200
|
+
claim: `Error and learning share domain context (${matchedKeys.length} key(s))`,
|
|
201
|
+
evidence: `Matched keys: [${matchedKeys.join(', ')}], overlap score: ${pair.metadata.overlapScore}`,
|
|
202
|
+
verdict: matchedKeys.length > 0 ? 'verified' : 'refuted',
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
const rubric = pair.metadata.rubric;
|
|
206
|
+
if (rubric) {
|
|
207
|
+
const hasDelta = rubric.weightedDelta != null && rubric.weightedDelta > 0;
|
|
208
|
+
addStep(trace, {
|
|
209
|
+
location: 'rubric-delta',
|
|
210
|
+
claim: 'Learning scores higher than error on rubric (positive delta)',
|
|
211
|
+
evidence: `Learning: ${rubric.learningWeightedScore}, Error: ${rubric.errorWeightedScore}, Delta: ${rubric.weightedDelta}`,
|
|
212
|
+
verdict: hasDelta ? 'verified' : (rubric.weightedDelta === 0 ? 'unverified' : 'refuted'),
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
const failingCriteria = rubric.errorFailingCriteria || rubric.failingCriteria || [];
|
|
216
|
+
if (failingCriteria.length > 0) {
|
|
217
|
+
addStep(trace, {
|
|
218
|
+
location: 'rubric-failures',
|
|
219
|
+
claim: `Error had ${failingCriteria.length} failing rubric criteria`,
|
|
220
|
+
evidence: `Failing: [${failingCriteria.join(', ')}]`,
|
|
221
|
+
verdict: 'verified',
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
} else {
|
|
225
|
+
addStep(trace, {
|
|
226
|
+
location: 'rubric-delta',
|
|
227
|
+
claim: 'Rubric scores provide quantitative quality signal',
|
|
228
|
+
evidence: 'No rubric data available for this pair',
|
|
229
|
+
verdict: 'unverified',
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
addStep(trace, {
|
|
234
|
+
location: 'prompt-inference',
|
|
235
|
+
claim: 'Inferred prompt captures the shared scenario correctly',
|
|
236
|
+
evidence: `Prompt: "${pair.prompt}"`,
|
|
237
|
+
verdict: pair.prompt && pair.prompt.length > 10 ? 'verified' : 'unverified',
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
addEdgeCase(trace, 'Pair may lack temporal proximity — error and learning from different sessions');
|
|
241
|
+
addEdgeCase(trace, 'Domain overlap is keyword-based — semantic similarity not verified');
|
|
242
|
+
|
|
243
|
+
return finalizeTrace(trace);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Build a reasoning trace for a proof harness check.
|
|
248
|
+
*
|
|
249
|
+
* @param {Object} checkResult - A check from the proof report
|
|
250
|
+
* @param {string} checkResult.name - Check name
|
|
251
|
+
* @param {boolean} checkResult.passed - Whether the check passed
|
|
252
|
+
* @param {Object} checkResult.details - Check-specific details
|
|
253
|
+
* @returns {ReasoningTrace}
|
|
254
|
+
*/
|
|
255
|
+
function traceForProofCheck(checkResult) {
|
|
256
|
+
const trace = createTrace('proof-gate', checkResult.name);
|
|
257
|
+
|
|
258
|
+
addStep(trace, {
|
|
259
|
+
location: `check:${checkResult.name}`,
|
|
260
|
+
claim: `Proof check "${checkResult.name}" passes`,
|
|
261
|
+
evidence: checkResult.passed
|
|
262
|
+
? `Passed with details: ${JSON.stringify(checkResult.details)}`
|
|
263
|
+
: `Failed: ${JSON.stringify(checkResult.details)}`,
|
|
264
|
+
verdict: checkResult.passed ? 'verified' : 'refuted',
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
if (checkResult.details) {
|
|
268
|
+
const details = checkResult.details;
|
|
269
|
+
if (details.status !== undefined) {
|
|
270
|
+
addStep(trace, {
|
|
271
|
+
location: `check:${checkResult.name}/status`,
|
|
272
|
+
claim: 'HTTP/response status is expected value',
|
|
273
|
+
evidence: `Status: ${details.status}`,
|
|
274
|
+
verdict: checkResult.passed ? 'verified' : 'refuted',
|
|
275
|
+
});
|
|
276
|
+
}
|
|
277
|
+
if (details.accepted !== undefined) {
|
|
278
|
+
addStep(trace, {
|
|
279
|
+
location: `check:${checkResult.name}/accepted`,
|
|
280
|
+
claim: `Acceptance state is ${details.accepted}`,
|
|
281
|
+
evidence: `accepted=${details.accepted}`,
|
|
282
|
+
verdict: 'verified',
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
return finalizeTrace(trace);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
/**
|
|
291
|
+
* Aggregate multiple traces into a verification summary.
|
|
292
|
+
*
|
|
293
|
+
* @param {ReasoningTrace[]} traces
|
|
294
|
+
* @returns {Object} Aggregated summary
|
|
295
|
+
*/
|
|
296
|
+
function aggregateTraces(traces) {
|
|
297
|
+
const totalTraces = traces.length;
|
|
298
|
+
const passedTraces = traces.filter((t) => t.summary && t.summary.passed).length;
|
|
299
|
+
const allSteps = traces.flatMap((t) => t.steps);
|
|
300
|
+
const totalSteps = allSteps.length;
|
|
301
|
+
const verified = allSteps.filter((s) => s.verdict === 'verified').length;
|
|
302
|
+
const refuted = allSteps.filter((s) => s.verdict === 'refuted').length;
|
|
303
|
+
const avgConfidence = totalTraces > 0
|
|
304
|
+
? Math.round(traces.reduce((sum, t) => sum + (t.summary ? t.summary.confidence : 0), 0) / totalTraces * 1000) / 1000
|
|
305
|
+
: 0;
|
|
306
|
+
|
|
307
|
+
return {
|
|
308
|
+
totalTraces,
|
|
309
|
+
passedTraces,
|
|
310
|
+
failedTraces: totalTraces - passedTraces,
|
|
311
|
+
totalSteps,
|
|
312
|
+
verified,
|
|
313
|
+
unverified: totalSteps - verified - refuted,
|
|
314
|
+
refuted,
|
|
315
|
+
averageConfidence: avgConfidence,
|
|
316
|
+
allPassed: passedTraces === totalTraces,
|
|
317
|
+
flaggedTraces: traces.filter((t) => t.summary && t.summary.controllability > 0.5).length,
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
module.exports = {
|
|
322
|
+
createTrace,
|
|
323
|
+
addStep,
|
|
324
|
+
addEdgeCase,
|
|
325
|
+
computeControllability,
|
|
326
|
+
finalizeTrace,
|
|
327
|
+
traceForSelfHealFix,
|
|
328
|
+
traceForDpoPair,
|
|
329
|
+
traceForProofCheck,
|
|
330
|
+
aggregateTraces,
|
|
331
|
+
DEFAULT_CONFIDENCE_THRESHOLD,
|
|
332
|
+
/**
|
|
333
|
+
* Wraps a prompt with private reasoning instructions to maximize accuracy.
|
|
334
|
+
*/
|
|
335
|
+
withReasoningPrompt: (userPrompt, role = 'expert senior software engineer') => {
|
|
336
|
+
return `
|
|
337
|
+
You are a ${role} in March 2026.
|
|
338
|
+
|
|
339
|
+
INSTRUCTION:
|
|
340
|
+
1. First, think through the problem step-by-step privately.
|
|
341
|
+
2. Do not reveal your reasoning process in the final output.
|
|
342
|
+
3. Perform an internal self-correction check for logic gaps or hallucinations.
|
|
343
|
+
4. When you are done, provide ONLY the final, concise, and technically accurate answer or code.
|
|
344
|
+
|
|
345
|
+
USER PROMPT:
|
|
346
|
+
${userPrompt}
|
|
347
|
+
`.trim();
|
|
348
|
+
}
|
|
349
|
+
};
|
|
350
|
+
// Tests cover this module through the node:test suite; avoid hardcoding counts here.
|