thumbgate 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/README.md +134 -0
- package/.claude-plugin/bundle/icon.png +0 -0
- package/.claude-plugin/bundle/icon.svg +18 -0
- package/.claude-plugin/bundle/server/index.js +24 -0
- package/.claude-plugin/marketplace.json +36 -0
- package/.claude-plugin/plugin.json +21 -0
- package/.well-known/mcp/server-card.json +231 -0
- package/LICENSE +21 -0
- package/README.md +375 -0
- package/adapters/README.md +9 -0
- package/adapters/amp/skills/rlhf-feedback/SKILL.md +22 -0
- package/adapters/chatgpt/INSTALL.md +83 -0
- package/adapters/chatgpt/openapi.yaml +1281 -0
- package/adapters/claude/.mcp.json +14 -0
- package/adapters/codex/config.toml +9 -0
- package/adapters/gemini/function-declarations.json +224 -0
- package/adapters/mcp/server-stdio.js +788 -0
- package/adapters/opencode/opencode.json +15 -0
- package/bin/cli.js +1483 -0
- package/bin/memory.sh +64 -0
- package/bin/obsidian-sync.sh +20 -0
- package/bin/postinstall.js +37 -0
- package/config/build-metadata.json +4 -0
- package/config/e2e-critical-flows.json +45 -0
- package/config/gate-templates.json +77 -0
- package/config/gates/claim-verification.json +29 -0
- package/config/gates/computer-use.json +39 -0
- package/config/gates/default.json +117 -0
- package/config/github-about.json +25 -0
- package/config/mcp-allowlists.json +135 -0
- package/config/model-tiers.json +33 -0
- package/config/partner-routing.json +132 -0
- package/config/policy-bundles/constrained-v1.json +64 -0
- package/config/policy-bundles/default-v1.json +91 -0
- package/config/rubrics/default-v1.json +52 -0
- package/config/skill-packs/react-testing.json +23 -0
- package/config/skill-packs/stripe-integration/references/api-spec.json +1 -0
- package/config/skill-packs/stripe-integration/references/webhook-guide.md +3 -0
- package/config/skill-specs/pr-reviewer.json +9 -0
- package/config/skill-specs/release-status.json +9 -0
- package/config/skill-specs/ticket-triage.json +9 -0
- package/config/subagent-profiles.json +32 -0
- package/config/tessl-tiles.json +29 -0
- package/config/thumbgate-settings.managed.json +12 -0
- package/openapi/openapi.yaml +1281 -0
- package/package.json +286 -0
- package/plugins/amp-skill/INSTALL.md +52 -0
- package/plugins/amp-skill/SKILL.md +64 -0
- package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +22 -0
- package/plugins/claude-codex-bridge/.mcp.json +12 -0
- package/plugins/claude-codex-bridge/INSTALL.md +43 -0
- package/plugins/claude-codex-bridge/README.md +46 -0
- package/plugins/claude-codex-bridge/scripts/codex-bridge.js +288 -0
- package/plugins/claude-codex-bridge/skills/adversarial-review/SKILL.md +24 -0
- package/plugins/claude-codex-bridge/skills/result/SKILL.md +22 -0
- package/plugins/claude-codex-bridge/skills/review/SKILL.md +28 -0
- package/plugins/claude-codex-bridge/skills/second-pass/SKILL.md +27 -0
- package/plugins/claude-codex-bridge/skills/setup/SKILL.md +21 -0
- package/plugins/claude-codex-bridge/skills/status/SKILL.md +19 -0
- package/plugins/claude-skill/INSTALL.md +55 -0
- package/plugins/claude-skill/SKILL.md +46 -0
- package/plugins/codex-profile/.codex-plugin/plugin.json +43 -0
- package/plugins/codex-profile/.mcp.json +12 -0
- package/plugins/codex-profile/AGENTS.md +20 -0
- package/plugins/codex-profile/INSTALL.md +66 -0
- package/plugins/codex-profile/README.md +37 -0
- package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +23 -0
- package/plugins/cursor-marketplace/CHANGELOG.md +30 -0
- package/plugins/cursor-marketplace/LICENSE +21 -0
- package/plugins/cursor-marketplace/README.md +124 -0
- package/plugins/cursor-marketplace/agents/reliability-reviewer.md +31 -0
- package/plugins/cursor-marketplace/assets/logo-400x400.png +0 -0
- package/plugins/cursor-marketplace/commands/capture-feedback.md +33 -0
- package/plugins/cursor-marketplace/commands/check-gates.md +25 -0
- package/plugins/cursor-marketplace/commands/show-lessons.md +27 -0
- package/plugins/cursor-marketplace/hooks/hooks.json +10 -0
- package/plugins/cursor-marketplace/mcp.json +12 -0
- package/plugins/cursor-marketplace/rules/feedback-capture.mdc +34 -0
- package/plugins/cursor-marketplace/rules/pre-action-gates.mdc +30 -0
- package/plugins/cursor-marketplace/rules/session-continuity.mdc +28 -0
- package/plugins/cursor-marketplace/scripts/gate-check.sh +11 -0
- package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +47 -0
- package/plugins/cursor-marketplace/skills/prevention-rules/SKILL.md +31 -0
- package/plugins/cursor-marketplace/skills/recall-context/SKILL.md +30 -0
- package/plugins/cursor-marketplace/skills/search-lessons/SKILL.md +33 -0
- package/plugins/gemini-extension/INSTALL.md +92 -0
- package/plugins/gemini-extension/gemini_prompt.txt +14 -0
- package/plugins/gemini-extension/tool_contract.json +45 -0
- package/plugins/opencode-profile/INSTALL.md +57 -0
- package/public/assets/instagram-card.png +0 -0
- package/public/assets/tiktok-agent-memory.mp4 +0 -0
- package/public/blog.html +400 -0
- package/public/dashboard.html +1093 -0
- package/public/guide.html +317 -0
- package/public/index.html +1195 -0
- package/public/learn/agent-harness-pattern.html +180 -0
- package/public/learn/ai-agent-persistent-memory.html +202 -0
- package/public/learn/learn.css +45 -0
- package/public/learn/mcp-pre-action-gates-explained.html +172 -0
- package/public/learn/stop-ai-agent-force-push.html +134 -0
- package/public/learn/vibe-coding-safety-net.html +142 -0
- package/public/learn.html +213 -0
- package/public/lessons.html +650 -0
- package/public/vercel.json +8 -0
- package/scripts/__pycache__/train_from_feedback.cpython-314.pyc +0 -0
- package/scripts/a2ui-engine.js +73 -0
- package/scripts/access-anomaly-detector.js +12 -0
- package/scripts/adk-consolidator.js +266 -0
- package/scripts/agent-readiness.js +220 -0
- package/scripts/agent-security-hardening.js +227 -0
- package/scripts/agentic-data-pipeline.js +847 -0
- package/scripts/analytics-report.js +328 -0
- package/scripts/analytics-window.js +158 -0
- package/scripts/async-job-runner.js +1001 -0
- package/scripts/audit-trail.js +398 -0
- package/scripts/auto-promote-gates.js +293 -0
- package/scripts/auto-wire-hooks.js +316 -0
- package/scripts/autonomous-sales-agent.js +39 -0
- package/scripts/autoresearch-runner.js +216 -0
- package/scripts/background-agent-governance.js +237 -0
- package/scripts/behavioral-extraction.js +93 -0
- package/scripts/belief-update.js +84 -0
- package/scripts/billing.js +2438 -0
- package/scripts/bot-detector.js +50 -0
- package/scripts/budget-guard.js +173 -0
- package/scripts/build-claude-mcpb.js +189 -0
- package/scripts/build-metadata.js +97 -0
- package/scripts/check-congruence.js +322 -0
- package/scripts/cli-feedback.js +135 -0
- package/scripts/cli-telemetry.js +87 -0
- package/scripts/cloudflare-dynamic-sandbox.js +315 -0
- package/scripts/code-reasoning.js +350 -0
- package/scripts/codegraph-context.js +466 -0
- package/scripts/commercial-offer.js +56 -0
- package/scripts/computer-use-firewall.js +250 -0
- package/scripts/context-engine.js +694 -0
- package/scripts/contextfs.js +1287 -0
- package/scripts/conversation-context.js +119 -0
- package/scripts/creator-campaigns.js +239 -0
- package/scripts/daemon-manager.js +108 -0
- package/scripts/daily-digest.js +11 -0
- package/scripts/dashboard-render-spec.js +395 -0
- package/scripts/dashboard.js +1058 -0
- package/scripts/data-governance.js +173 -0
- package/scripts/delegation-runtime.js +900 -0
- package/scripts/deploy-gcp.sh +44 -0
- package/scripts/deploy-policy.js +231 -0
- package/scripts/disagreement-mining.js +315 -0
- package/scripts/dispatch-brief.js +159 -0
- package/scripts/distribution-surfaces.js +44 -0
- package/scripts/dpo-optimizer.js +206 -0
- package/scripts/ensure-repo-bootstrap.js +129 -0
- package/scripts/ephemeral-agent-store.js +219 -0
- package/scripts/eval-harness.js +56 -0
- package/scripts/evolution-state.js +241 -0
- package/scripts/experiment-tracker.js +267 -0
- package/scripts/export-databricks-bundle.js +242 -0
- package/scripts/export-dpo-pairs.js +344 -0
- package/scripts/export-kto-pairs.js +309 -0
- package/scripts/export-training.js +450 -0
- package/scripts/failure-diagnostics.js +558 -0
- package/scripts/feedback-attribution.js +313 -0
- package/scripts/feedback-fallback.js +110 -0
- package/scripts/feedback-history-distiller.js +391 -0
- package/scripts/feedback-inbox-read.js +162 -0
- package/scripts/feedback-loop.js +1887 -0
- package/scripts/feedback-paths.js +145 -0
- package/scripts/feedback-quality.js +139 -0
- package/scripts/feedback-root-consolidator.js +238 -0
- package/scripts/feedback-schema.js +426 -0
- package/scripts/feedback-session.js +286 -0
- package/scripts/feedback-to-memory.js +185 -0
- package/scripts/feedback-to-rules.js +164 -0
- package/scripts/filesystem-search.js +405 -0
- package/scripts/funnel-analytics.js +35 -0
- package/scripts/gate-satisfy.js +42 -0
- package/scripts/gate-stats.js +116 -0
- package/scripts/gate-templates.js +70 -0
- package/scripts/gates-engine.js +816 -0
- package/scripts/generate-paperbanana-diagrams.sh +99 -0
- package/scripts/generate-pretool-hook.sh +40 -0
- package/scripts/github-about.js +350 -0
- package/scripts/github-outreach.js +65 -0
- package/scripts/gtm-revenue-loop.js +520 -0
- package/scripts/hallucination-detector.js +226 -0
- package/scripts/hf-papers.js +317 -0
- package/scripts/history-distiller.js +200 -0
- package/scripts/hook-auto-capture.sh +100 -0
- package/scripts/hook-stop-pr-thread-check.sh +68 -0
- package/scripts/hook-stop-self-score.sh +51 -0
- package/scripts/hook-stop-verify-deploy.sh +31 -0
- package/scripts/hook-thumbgate-cache-updater.js +48 -0
- package/scripts/hook-verify-before-done.sh +20 -0
- package/scripts/hosted-config.js +156 -0
- package/scripts/hybrid-feedback-context.js +675 -0
- package/scripts/install-mcp.js +159 -0
- package/scripts/intent-router.js +392 -0
- package/scripts/internal-agent-bootstrap.js +490 -0
- package/scripts/jsonl-watcher.js +155 -0
- package/scripts/lesson-db.js +613 -0
- package/scripts/lesson-inference.js +310 -0
- package/scripts/lesson-retrieval.js +95 -0
- package/scripts/lesson-rotation.js +137 -0
- package/scripts/lesson-search.js +644 -0
- package/scripts/lesson-synthesis.js +196 -0
- package/scripts/license.js +50 -0
- package/scripts/local-model-profile.js +384 -0
- package/scripts/markdown-escape.js +12 -0
- package/scripts/marketing-experiment.js +671 -0
- package/scripts/mcp-config.js +149 -0
- package/scripts/mcp-policy.js +99 -0
- package/scripts/memalign-recall.js +111 -0
- package/scripts/memory-firewall.js +222 -0
- package/scripts/memory-migration.js +296 -0
- package/scripts/meta-policy.js +190 -0
- package/scripts/metered-billing.js +16 -0
- package/scripts/model-tier-router.js +301 -0
- package/scripts/money-watcher.js +71 -0
- package/scripts/multi-hop-recall.js +240 -0
- package/scripts/natural-language-harness.js +330 -0
- package/scripts/obsidian-export.js +713 -0
- package/scripts/operational-dashboard.js +103 -0
- package/scripts/operational-summary.js +93 -0
- package/scripts/optimize-context.js +17 -0
- package/scripts/org-dashboard.js +201 -0
- package/scripts/partner-orchestration.js +146 -0
- package/scripts/per-step-scoring.js +165 -0
- package/scripts/perplexity-marketing.js +466 -0
- package/scripts/pii-scanner.js +153 -0
- package/scripts/plan-gate.js +154 -0
- package/scripts/post-everywhere.js +308 -0
- package/scripts/post-to-x-retry.sh +22 -0
- package/scripts/post-to-x.js +369 -0
- package/scripts/pr-manager.js +236 -0
- package/scripts/predictive-insights.js +356 -0
- package/scripts/principle-extractor.js +162 -0
- package/scripts/pro-features.js +40 -0
- package/scripts/pro-local-dashboard.js +174 -0
- package/scripts/problem-detail.js +53 -0
- package/scripts/product-feedback.js +134 -0
- package/scripts/profile-router.js +245 -0
- package/scripts/prompt-dlp.js +221 -0
- package/scripts/prompt-guard.js +83 -0
- package/scripts/prove-adapters.js +863 -0
- package/scripts/prove-attribution.js +365 -0
- package/scripts/prove-automation.js +653 -0
- package/scripts/prove-autoresearch.js +304 -0
- package/scripts/prove-claim-verification.js +277 -0
- package/scripts/prove-cloudflare-sandbox.js +163 -0
- package/scripts/prove-data-pipeline.js +410 -0
- package/scripts/prove-data-quality.js +227 -0
- package/scripts/prove-evolution.js +352 -0
- package/scripts/prove-harnesses.js +287 -0
- package/scripts/prove-intelligence.js +259 -0
- package/scripts/prove-lancedb.js +371 -0
- package/scripts/prove-local-intelligence.js +342 -0
- package/scripts/prove-loop-closure.js +263 -0
- package/scripts/prove-predictive-insights.js +357 -0
- package/scripts/prove-runtime.js +350 -0
- package/scripts/prove-seo-gsd.js +234 -0
- package/scripts/prove-settings.js +279 -0
- package/scripts/prove-subway-upgrades.js +277 -0
- package/scripts/prove-tessl.js +229 -0
- package/scripts/prove-training-export.js +327 -0
- package/scripts/prove-workflow-contract.js +116 -0
- package/scripts/prove-xmemory.js +332 -0
- package/scripts/publish-decision.js +133 -0
- package/scripts/pulse.js +80 -0
- package/scripts/rate-limiter.js +125 -0
- package/scripts/reddit-dm-outreach.js +182 -0
- package/scripts/reddit-monitor-cron.sh +26 -0
- package/scripts/reflector-agent.js +221 -0
- package/scripts/reminder-engine.js +132 -0
- package/scripts/revenue-status.js +472 -0
- package/scripts/risk-scorer.js +459 -0
- package/scripts/rlaif-self-audit.js +129 -0
- package/scripts/rlhf_session_start.sh +32 -0
- package/scripts/rubric-engine.js +230 -0
- package/scripts/schedule-manager.js +251 -0
- package/scripts/secret-scanner.js +414 -0
- package/scripts/self-heal.js +147 -0
- package/scripts/self-healing-check.js +188 -0
- package/scripts/semantic-layer.js +98 -0
- package/scripts/seo-gsd.js +1153 -0
- package/scripts/settings-hierarchy.js +214 -0
- package/scripts/shieldcortex-memory-firewall-runner.mjs +53 -0
- package/scripts/skill-exporter.js +262 -0
- package/scripts/skill-generator.js +446 -0
- package/scripts/skill-materializer.js +134 -0
- package/scripts/skill-packs.js +136 -0
- package/scripts/skill-proposer.js +99 -0
- package/scripts/skill-quality-tracker.js +282 -0
- package/scripts/slo-alert-engine.js +14 -0
- package/scripts/slow-loop.js +72 -0
- package/scripts/social-analytics/db/schema.sql +32 -0
- package/scripts/social-analytics/db/social-analytics.db +0 -0
- package/scripts/social-analytics/digest.js +256 -0
- package/scripts/social-analytics/generate-instagram-card.js +97 -0
- package/scripts/social-analytics/instagram-thumbgate-post.js +107 -0
- package/scripts/social-analytics/load-env.js +46 -0
- package/scripts/social-analytics/mcp-server.js +289 -0
- package/scripts/social-analytics/normalizer.js +580 -0
- package/scripts/social-analytics/notify.js +162 -0
- package/scripts/social-analytics/poll-all.js +92 -0
- package/scripts/social-analytics/pollers/github.js +195 -0
- package/scripts/social-analytics/pollers/instagram.js +253 -0
- package/scripts/social-analytics/pollers/linkedin.js +330 -0
- package/scripts/social-analytics/pollers/plausible.js +247 -0
- package/scripts/social-analytics/pollers/reddit.js +306 -0
- package/scripts/social-analytics/pollers/threads.js +233 -0
- package/scripts/social-analytics/pollers/tiktok.js +203 -0
- package/scripts/social-analytics/pollers/x.js +227 -0
- package/scripts/social-analytics/pollers/youtube.js +304 -0
- package/scripts/social-analytics/pollers/zernio.js +183 -0
- package/scripts/social-analytics/publish-instagram-thumbgate.js +98 -0
- package/scripts/social-analytics/publish-thumbgate-launch.js +316 -0
- package/scripts/social-analytics/publishers/devto.js +122 -0
- package/scripts/social-analytics/publishers/instagram.js +317 -0
- package/scripts/social-analytics/publishers/linkedin.js +294 -0
- package/scripts/social-analytics/publishers/reddit.js +390 -0
- package/scripts/social-analytics/publishers/threads.js +275 -0
- package/scripts/social-analytics/publishers/tiktok.js +217 -0
- package/scripts/social-analytics/publishers/x.js +259 -0
- package/scripts/social-analytics/publishers/youtube.js +223 -0
- package/scripts/social-analytics/publishers/zernio.js +378 -0
- package/scripts/social-analytics/run-digest.js +34 -0
- package/scripts/social-analytics/store.js +257 -0
- package/scripts/social-analytics/utm.js +143 -0
- package/scripts/social-pipeline.js +2628 -0
- package/scripts/social-quality-gate.js +18 -0
- package/scripts/social-reply-monitor.js +445 -0
- package/scripts/status-dashboard.js +155 -0
- package/scripts/statusline-lesson.js +16 -0
- package/scripts/statusline-tower.js +8 -0
- package/scripts/statusline.sh +116 -0
- package/scripts/stripe-live-status.js +115 -0
- package/scripts/subagent-profiles.js +79 -0
- package/scripts/sync-gh-secrets-from-env.sh +70 -0
- package/scripts/sync-github-about.js +52 -0
- package/scripts/sync-version.js +447 -0
- package/scripts/synthetic-dpo.js +234 -0
- package/scripts/telemetry-analytics.js +821 -0
- package/scripts/tessl-export.js +371 -0
- package/scripts/test-coverage.js +120 -0
- package/scripts/thompson-sampling.js +417 -0
- package/scripts/thumbgate-search.js +189 -0
- package/scripts/tool-kpi-tracker.js +12 -0
- package/scripts/tool-registry.js +811 -0
- package/scripts/train_from_feedback.py +933 -0
- package/scripts/user-profile.js +78 -0
- package/scripts/validate-feedback.js +581 -0
- package/scripts/validate-workflow-contract.js +287 -0
- package/scripts/vector-store.js +197 -0
- package/scripts/verification-loop.js +291 -0
- package/scripts/verify-obsidian-setup.sh +269 -0
- package/scripts/verify-run.js +269 -0
- package/scripts/webhook-delivery.js +62 -0
- package/scripts/weekly-auto-post.js +124 -0
- package/scripts/workflow-runs.js +154 -0
- package/scripts/workflow-sprint-intake.js +475 -0
- package/scripts/workspace-evolver.js +374 -0
- package/scripts/x-autonomous-marketing.js +139 -0
- package/scripts/xmemory-lite.js +405 -0
- package/skills/agent-memory/SKILL.md +97 -0
- package/skills/rlhf-feedback/SKILL.md +49 -0
- package/skills/solve-architecture-autonomy/SKILL.md +17 -0
- package/skills/solve-architecture-autonomy/tool.js +33 -0
- package/skills/thumbgate/SKILL.md +114 -0
- package/src/api/server.js +4206 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* DPO Preference Pair Exporter
|
|
4
|
+
*
|
|
5
|
+
* Transforms error + learning memories into DPO JSONL triples.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const fs = require('fs');
|
|
9
|
+
const path = require('path');
|
|
10
|
+
const { traceForDpoPair, aggregateTraces } = require('./code-reasoning');
|
|
11
|
+
const { resolveFeedbackDir } = require('./feedback-paths');
|
|
12
|
+
|
|
13
|
+
const DEFAULT_LOCAL_MEMORY_LOG = path.join(resolveFeedbackDir(), 'memory-log.jsonl');
|
|
14
|
+
|
|
15
|
+
function readJSONL(filePath) {
|
|
16
|
+
if (!fs.existsSync(filePath)) return [];
|
|
17
|
+
const raw = fs.readFileSync(filePath, 'utf-8').trim();
|
|
18
|
+
if (!raw) return [];
|
|
19
|
+
return raw
|
|
20
|
+
.split('\n')
|
|
21
|
+
.map((line) => {
|
|
22
|
+
try {
|
|
23
|
+
return JSON.parse(line);
|
|
24
|
+
} catch {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
})
|
|
28
|
+
.filter(Boolean);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function extractDomainKeys(memory) {
|
|
32
|
+
const keys = new Set();
|
|
33
|
+
const genericTags = new Set(['feedback', 'positive', 'negative']);
|
|
34
|
+
|
|
35
|
+
if (Array.isArray(memory.tags)) {
|
|
36
|
+
for (const tag of memory.tags) {
|
|
37
|
+
if (!genericTags.has(tag)) keys.add(tag);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const titleWords = (memory.title || '')
|
|
42
|
+
.replace(/^(MISTAKE|SUCCESS|ERROR|LEARNING|PREFERENCE):\s*/i, '')
|
|
43
|
+
.toLowerCase()
|
|
44
|
+
.split(/\s+/)
|
|
45
|
+
.filter((w) => w.length > 3)
|
|
46
|
+
.slice(0, 3);
|
|
47
|
+
|
|
48
|
+
for (const word of titleWords) keys.add(word);
|
|
49
|
+
|
|
50
|
+
return Array.from(keys);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function domainOverlap(keysA, keysB) {
|
|
54
|
+
const setB = new Set(keysB);
|
|
55
|
+
let overlap = 0;
|
|
56
|
+
for (const key of keysA) {
|
|
57
|
+
if (setB.has(key)) overlap++;
|
|
58
|
+
}
|
|
59
|
+
return overlap;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function inferPrompt(error, learning) {
|
|
63
|
+
const shared = (error.tags || []).filter((t) => (learning.tags || []).includes(t));
|
|
64
|
+
if (shared.length > 0) {
|
|
65
|
+
return `Task domain: ${shared.join(', ')}. How should the agent handle this scenario?`;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const scenario = (error.title || '').replace(/^(MISTAKE|ERROR):\s*/i, '').trim();
|
|
69
|
+
if (scenario) return `Scenario: ${scenario}. What is the better response?`;
|
|
70
|
+
return 'How should the agent respond in this situation?';
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function getRubricWeightedScore(memory) {
|
|
74
|
+
if (!memory || !memory.rubricSummary) return null;
|
|
75
|
+
const score = Number(memory.rubricSummary.weightedScore);
|
|
76
|
+
if (!Number.isFinite(score)) return null;
|
|
77
|
+
return score;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function buildRubricDelta(error, learning) {
|
|
81
|
+
const errorScore = getRubricWeightedScore(error);
|
|
82
|
+
const learningScore = getRubricWeightedScore(learning);
|
|
83
|
+
if (errorScore == null && learningScore == null) return null;
|
|
84
|
+
const delta = (learningScore != null && errorScore != null)
|
|
85
|
+
? Math.round((learningScore - errorScore) * 1000) / 1000
|
|
86
|
+
: null;
|
|
87
|
+
return {
|
|
88
|
+
learningWeightedScore: learningScore,
|
|
89
|
+
errorWeightedScore: errorScore,
|
|
90
|
+
weightedDelta: delta,
|
|
91
|
+
errorFailingCriteria: error && error.rubricSummary ? error.rubricSummary.failingCriteria || [] : [],
|
|
92
|
+
learningFailingCriteria: learning && learning.rubricSummary ? learning.rubricSummary.failingCriteria || [] : [],
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Find distractor errors for a learning: same-domain errors that are NOT
|
|
98
|
+
* the best match but look similar (high overlap, low rubric delta).
|
|
99
|
+
* These are harder negatives that train retrieval precision.
|
|
100
|
+
*/
|
|
101
|
+
function findDistractors(learning, errorKeys, usedErrorId, maxDistractors) {
|
|
102
|
+
const learningKeys = extractDomainKeys(learning);
|
|
103
|
+
return errorKeys
|
|
104
|
+
.filter((err) => err.memory.id !== usedErrorId)
|
|
105
|
+
.map((err) => {
|
|
106
|
+
const overlap = domainOverlap(err.keys, learningKeys);
|
|
107
|
+
return { memory: err.memory, overlap };
|
|
108
|
+
})
|
|
109
|
+
.filter((d) => d.overlap > 0)
|
|
110
|
+
.sort((a, b) => b.overlap - a.overlap)
|
|
111
|
+
.slice(0, maxDistractors)
|
|
112
|
+
.map((d) => ({
|
|
113
|
+
id: d.memory.id,
|
|
114
|
+
title: d.memory.title,
|
|
115
|
+
content: d.memory.content,
|
|
116
|
+
overlap: d.overlap,
|
|
117
|
+
}));
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function buildDpoPairs(errors, learnings, { maxDistractors = 2 } = {}) {
|
|
121
|
+
const pairs = [];
|
|
122
|
+
const usedErrors = new Set();
|
|
123
|
+
const usedLearnings = new Set();
|
|
124
|
+
|
|
125
|
+
const errorKeys = errors.map((e) => ({ memory: e, keys: extractDomainKeys(e) }));
|
|
126
|
+
const learningKeys = learnings.map((l) => ({ memory: l, keys: extractDomainKeys(l) }));
|
|
127
|
+
|
|
128
|
+
for (const err of errorKeys) {
|
|
129
|
+
let best = null;
|
|
130
|
+
let bestScore = 0;
|
|
131
|
+
let bestOverlap = 0;
|
|
132
|
+
|
|
133
|
+
for (const learn of learningKeys) {
|
|
134
|
+
if (usedLearnings.has(learn.memory.id)) continue;
|
|
135
|
+
const overlap = domainOverlap(err.keys, learn.keys);
|
|
136
|
+
const rubric = buildRubricDelta(err.memory, learn.memory);
|
|
137
|
+
const rubricDelta = rubric && rubric.weightedDelta != null ? rubric.weightedDelta : 0;
|
|
138
|
+
const score = overlap + Math.max(0, rubricDelta);
|
|
139
|
+
if (score > bestScore) {
|
|
140
|
+
best = learn;
|
|
141
|
+
bestScore = score;
|
|
142
|
+
bestOverlap = overlap;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (best && bestScore > 0 && bestOverlap > 0) {
|
|
147
|
+
const distractors = findDistractors(best.memory, errorKeys, err.memory.id, maxDistractors);
|
|
148
|
+
pairs.push({
|
|
149
|
+
prompt: inferPrompt(err.memory, best.memory),
|
|
150
|
+
chosen: best.memory.content,
|
|
151
|
+
rejected: err.memory.content,
|
|
152
|
+
distractors: distractors.length > 0 ? distractors : undefined,
|
|
153
|
+
metadata: {
|
|
154
|
+
errorId: err.memory.id,
|
|
155
|
+
learningId: best.memory.id,
|
|
156
|
+
matchScore: bestScore,
|
|
157
|
+
overlapScore: domainOverlap(err.keys, best.keys),
|
|
158
|
+
matchedKeys: err.keys.filter((k) => best.keys.includes(k)),
|
|
159
|
+
errorTitle: err.memory.title,
|
|
160
|
+
learningTitle: best.memory.title,
|
|
161
|
+
rubric: buildRubricDelta(err.memory, best.memory),
|
|
162
|
+
distractorCount: distractors.length,
|
|
163
|
+
},
|
|
164
|
+
});
|
|
165
|
+
usedErrors.add(err.memory.id);
|
|
166
|
+
usedLearnings.add(best.memory.id);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
pairs,
|
|
172
|
+
unpairedErrors: errors.filter((e) => !usedErrors.has(e.id)),
|
|
173
|
+
unpairedLearnings: learnings.filter((l) => !usedLearnings.has(l.id)),
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function toJSONL(pairs) {
|
|
178
|
+
return `${pairs.map((p) => JSON.stringify(p)).join('\n')}\n`;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
function exportDpoFromMemories(memories) {
|
|
182
|
+
const errors = memories.filter((m) => m.category === 'error');
|
|
183
|
+
const learnings = memories.filter((m) => m.category === 'learning');
|
|
184
|
+
const result = buildDpoPairs(errors, learnings);
|
|
185
|
+
|
|
186
|
+
const traces = result.pairs.map((pair) => traceForDpoPair(pair));
|
|
187
|
+
const reasoning = aggregateTraces(traces);
|
|
188
|
+
|
|
189
|
+
const pairsWithTraces = result.pairs.map((pair, i) => ({
|
|
190
|
+
...pair,
|
|
191
|
+
metadata: {
|
|
192
|
+
...pair.metadata,
|
|
193
|
+
reasoningTrace: {
|
|
194
|
+
traceId: traces[i].traceId,
|
|
195
|
+
confidence: traces[i].summary.confidence,
|
|
196
|
+
passed: traces[i].summary.passed,
|
|
197
|
+
verified: traces[i].summary.verified,
|
|
198
|
+
refuted: traces[i].summary.refuted,
|
|
199
|
+
edgeCases: traces[i].edgeCases,
|
|
200
|
+
},
|
|
201
|
+
},
|
|
202
|
+
}));
|
|
203
|
+
|
|
204
|
+
return {
|
|
205
|
+
pairs: pairsWithTraces,
|
|
206
|
+
unpairedErrors: result.unpairedErrors,
|
|
207
|
+
unpairedLearnings: result.unpairedLearnings,
|
|
208
|
+
errors,
|
|
209
|
+
learnings,
|
|
210
|
+
reasoning,
|
|
211
|
+
jsonl: toJSONL(pairsWithTraces),
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function parseArgs(argv) {
|
|
216
|
+
const args = {};
|
|
217
|
+
argv.forEach((arg) => {
|
|
218
|
+
if (!arg.startsWith('--')) return;
|
|
219
|
+
const [key, ...rest] = arg.slice(2).split('=');
|
|
220
|
+
args[key] = rest.length ? rest.join('=') : true;
|
|
221
|
+
});
|
|
222
|
+
return args;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function runCli() {
|
|
226
|
+
const args = parseArgs(process.argv.slice(2));
|
|
227
|
+
|
|
228
|
+
if (args.test) {
|
|
229
|
+
runTests();
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
let memories = [];
|
|
234
|
+
|
|
235
|
+
if (args.input) {
|
|
236
|
+
const raw = fs.readFileSync(args.input, 'utf-8');
|
|
237
|
+
const parsed = JSON.parse(raw);
|
|
238
|
+
memories = Array.isArray(parsed) ? parsed : parsed.memories || [];
|
|
239
|
+
} else if (args['from-local']) {
|
|
240
|
+
memories = readJSONL(DEFAULT_LOCAL_MEMORY_LOG);
|
|
241
|
+
} else {
|
|
242
|
+
console.error('Provide --input=<path-to-json> or --from-local');
|
|
243
|
+
process.exit(1);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
const result = exportDpoFromMemories(memories);
|
|
247
|
+
const jsonl = result.jsonl;
|
|
248
|
+
|
|
249
|
+
if (args.output) {
|
|
250
|
+
fs.writeFileSync(args.output, jsonl);
|
|
251
|
+
console.error(`Wrote ${result.pairs.length} DPO pairs to ${args.output}`);
|
|
252
|
+
} else {
|
|
253
|
+
process.stdout.write(jsonl);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
console.error(`Errors=${result.errors.length} Learnings=${result.learnings.length} Pairs=${result.pairs.length}`);
|
|
257
|
+
console.error(`Unpaired errors=${result.unpairedErrors.length} Unpaired learnings=${result.unpairedLearnings.length}`);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function runTests() {
|
|
261
|
+
let passed = 0;
|
|
262
|
+
let failed = 0;
|
|
263
|
+
|
|
264
|
+
function assert(condition, name) {
|
|
265
|
+
if (condition) {
|
|
266
|
+
passed++;
|
|
267
|
+
console.log(` PASS ${name}`);
|
|
268
|
+
} else {
|
|
269
|
+
failed++;
|
|
270
|
+
console.log(` FAIL ${name}`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
console.log('\nexport-dpo-pairs.js tests\n');
|
|
275
|
+
|
|
276
|
+
const errors = [
|
|
277
|
+
{
|
|
278
|
+
id: 1,
|
|
279
|
+
title: 'MISTAKE: Claimed done with no test proof',
|
|
280
|
+
content: 'Claimed completion without running tests.',
|
|
281
|
+
category: 'error',
|
|
282
|
+
tags: ['verification', 'feedback'],
|
|
283
|
+
rubricSummary: {
|
|
284
|
+
weightedScore: 0.32,
|
|
285
|
+
failingCriteria: ['verification_evidence'],
|
|
286
|
+
failingGuardrails: ['testsPassed'],
|
|
287
|
+
},
|
|
288
|
+
},
|
|
289
|
+
{
|
|
290
|
+
id: 2,
|
|
291
|
+
title: 'MISTAKE: Generic mismatch',
|
|
292
|
+
content: 'No matching learning memory for this domain.',
|
|
293
|
+
category: 'error',
|
|
294
|
+
tags: ['unique-tag'],
|
|
295
|
+
},
|
|
296
|
+
];
|
|
297
|
+
|
|
298
|
+
const learnings = [
|
|
299
|
+
{
|
|
300
|
+
id: 10,
|
|
301
|
+
title: 'SUCCESS: Always run tests before completion claims',
|
|
302
|
+
content: 'Run tests and include output before saying complete.',
|
|
303
|
+
category: 'learning',
|
|
304
|
+
tags: ['verification', 'feedback'],
|
|
305
|
+
rubricSummary: {
|
|
306
|
+
weightedScore: 0.89,
|
|
307
|
+
failingCriteria: [],
|
|
308
|
+
failingGuardrails: [],
|
|
309
|
+
},
|
|
310
|
+
},
|
|
311
|
+
];
|
|
312
|
+
|
|
313
|
+
const result = buildDpoPairs(errors, learnings);
|
|
314
|
+
assert(result.pairs.length === 1, 'one pair built from overlapping domain keys');
|
|
315
|
+
assert(result.unpairedErrors.length === 1, 'unpaired error left when no match exists');
|
|
316
|
+
assert(result.unpairedLearnings.length === 0, 'no unpaired learnings');
|
|
317
|
+
|
|
318
|
+
const jsonl = toJSONL(result.pairs);
|
|
319
|
+
assert(jsonl.endsWith('\n'), 'JSONL output ends with newline');
|
|
320
|
+
|
|
321
|
+
const parsed = JSON.parse(jsonl.trim());
|
|
322
|
+
assert(parsed.prompt.includes('verification'), 'inferred prompt includes shared domain');
|
|
323
|
+
assert(parsed.metadata.rubric.weightedDelta > 0, 'rubric delta metadata is attached');
|
|
324
|
+
|
|
325
|
+
console.log(`\nResults: ${passed} passed, ${failed} failed\n`);
|
|
326
|
+
process.exit(failed > 0 ? 1 : 0);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
module.exports = {
|
|
330
|
+
readJSONL,
|
|
331
|
+
extractDomainKeys,
|
|
332
|
+
domainOverlap,
|
|
333
|
+
inferPrompt,
|
|
334
|
+
buildRubricDelta,
|
|
335
|
+
findDistractors,
|
|
336
|
+
buildDpoPairs,
|
|
337
|
+
toJSONL,
|
|
338
|
+
exportDpoFromMemories,
|
|
339
|
+
DEFAULT_LOCAL_MEMORY_LOG,
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
if (require.main === module) {
|
|
343
|
+
runCli();
|
|
344
|
+
}
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* KTO (Kahneman-Tversky Optimization) Exporter
|
|
4
|
+
*
|
|
5
|
+
* Transforms binary up/down feedback into KTO JSONL records.
|
|
6
|
+
* Unlike DPO (which needs paired preferences), KTO works with
|
|
7
|
+
* individual binary signals — a natural fit for thumbs-up/down data.
|
|
8
|
+
*
|
|
9
|
+
* Output format per line:
|
|
10
|
+
* {"prompt": "...", "completion": "...", "label": true/false, "metadata": {...}}
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
const { resolveFeedbackDir } = require('./feedback-paths');
|
|
16
|
+
|
|
17
|
+
const DEFAULT_FEEDBACK_LOG = path.join(resolveFeedbackDir(), 'feedback-log.jsonl');
|
|
18
|
+
const DEFAULT_MEMORY_LOG = path.join(resolveFeedbackDir(), 'memory-log.jsonl');
|
|
19
|
+
|
|
20
|
+
function readJSONL(filePath) {
|
|
21
|
+
if (!fs.existsSync(filePath)) return [];
|
|
22
|
+
const raw = fs.readFileSync(filePath, 'utf-8').trim();
|
|
23
|
+
if (!raw) return [];
|
|
24
|
+
return raw
|
|
25
|
+
.split('\n')
|
|
26
|
+
.map((line) => {
|
|
27
|
+
try {
|
|
28
|
+
return JSON.parse(line);
|
|
29
|
+
} catch {
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
})
|
|
33
|
+
.filter(Boolean);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Infer a prompt string from a feedback entry.
|
|
38
|
+
* Uses context, tags, or domain info to reconstruct what was being asked.
|
|
39
|
+
*/
|
|
40
|
+
function inferPrompt(entry) {
|
|
41
|
+
if (entry.context && entry.context.trim()) {
|
|
42
|
+
return entry.context.trim();
|
|
43
|
+
}
|
|
44
|
+
if (entry.richContext && entry.richContext.domain) {
|
|
45
|
+
return `Task domain: ${entry.richContext.domain}`;
|
|
46
|
+
}
|
|
47
|
+
if (Array.isArray(entry.tags) && entry.tags.length > 0) {
|
|
48
|
+
return `Task: ${entry.tags.join(', ')}`;
|
|
49
|
+
}
|
|
50
|
+
return 'General coding task';
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Infer a completion string from a feedback entry.
|
|
55
|
+
* For positive: whatWorked or content describes the good response.
|
|
56
|
+
* For negative: whatWentWrong or whatToChange describes the bad response.
|
|
57
|
+
*/
|
|
58
|
+
function inferCompletion(entry) {
|
|
59
|
+
const signal = normalizeSignal(entry.signal);
|
|
60
|
+
if (signal === 'positive') {
|
|
61
|
+
if (entry.whatWorked && entry.whatWorked.trim()) return entry.whatWorked.trim();
|
|
62
|
+
if (entry.content && entry.content.trim()) return entry.content.trim();
|
|
63
|
+
return 'Completed task successfully';
|
|
64
|
+
}
|
|
65
|
+
if (entry.whatWentWrong && entry.whatWentWrong.trim()) return entry.whatWentWrong.trim();
|
|
66
|
+
if (entry.whatToChange && entry.whatToChange.trim()) return entry.whatToChange.trim();
|
|
67
|
+
if (entry.content && entry.content.trim()) return entry.content.trim();
|
|
68
|
+
return 'Failed to complete task correctly';
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function normalizeSignal(signal) {
|
|
72
|
+
const value = String(signal || '').trim().toLowerCase();
|
|
73
|
+
if (['up', 'thumbsup', 'thumbs-up', 'thumbs_up', 'positive', 'good'].includes(value)) return 'positive';
|
|
74
|
+
if (['down', 'thumbsdown', 'thumbs-down', 'thumbs_down', 'negative', 'bad'].includes(value)) return 'negative';
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Build a single KTO record from a feedback or memory entry.
|
|
80
|
+
* Returns null if the entry lacks a valid signal.
|
|
81
|
+
*/
|
|
82
|
+
function buildKtoRecord(entry) {
|
|
83
|
+
const signal = normalizeSignal(entry.signal);
|
|
84
|
+
if (!signal) return null;
|
|
85
|
+
|
|
86
|
+
const label = signal === 'positive';
|
|
87
|
+
const prompt = inferPrompt(entry);
|
|
88
|
+
const completion = inferCompletion(entry);
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
prompt,
|
|
92
|
+
completion,
|
|
93
|
+
label,
|
|
94
|
+
metadata: {
|
|
95
|
+
sourceId: entry.id || null,
|
|
96
|
+
signal,
|
|
97
|
+
signalSource: entry.sourceFeedbackId ? 'memory-log' : 'feedback-log',
|
|
98
|
+
tags: entry.tags || [],
|
|
99
|
+
domain: (entry.richContext && entry.richContext.domain) || null,
|
|
100
|
+
outcomeCategory: (entry.richContext && entry.richContext.outcomeCategory) || null,
|
|
101
|
+
timestamp: entry.timestamp || null,
|
|
102
|
+
rubricScore: (entry.rubric && entry.rubric.weightedScore != null)
|
|
103
|
+
? entry.rubric.weightedScore
|
|
104
|
+
: null,
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Build KTO records from an array of feedback/memory entries.
|
|
111
|
+
*/
|
|
112
|
+
function buildKtoPairs(entries) {
|
|
113
|
+
const records = [];
|
|
114
|
+
const skipped = [];
|
|
115
|
+
for (const entry of entries) {
|
|
116
|
+
const record = buildKtoRecord(entry);
|
|
117
|
+
if (record) {
|
|
118
|
+
records.push(record);
|
|
119
|
+
} else {
|
|
120
|
+
skipped.push(entry);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return { records, skipped };
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function toJSONL(records) {
|
|
127
|
+
if (records.length === 0) return '';
|
|
128
|
+
return `${records.map((r) => JSON.stringify(r)).join('\n')}\n`;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function exportKtoFromFeedback(feedbackEntries, memoryEntries) {
|
|
132
|
+
const all = [...feedbackEntries, ...memoryEntries];
|
|
133
|
+
// Deduplicate by id
|
|
134
|
+
const seen = new Set();
|
|
135
|
+
const unique = [];
|
|
136
|
+
for (const entry of all) {
|
|
137
|
+
const key = entry.id || JSON.stringify(entry);
|
|
138
|
+
if (!seen.has(key)) {
|
|
139
|
+
seen.add(key);
|
|
140
|
+
unique.push(entry);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
const result = buildKtoPairs(unique);
|
|
144
|
+
return {
|
|
145
|
+
records: result.records,
|
|
146
|
+
skipped: result.skipped,
|
|
147
|
+
totalInput: unique.length,
|
|
148
|
+
jsonl: toJSONL(result.records),
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function parseArgs(argv) {
|
|
153
|
+
const args = {};
|
|
154
|
+
argv.forEach((arg) => {
|
|
155
|
+
if (!arg.startsWith('--')) return;
|
|
156
|
+
const [key, ...rest] = arg.slice(2).split('=');
|
|
157
|
+
args[key] = rest.length ? rest.join('=') : true;
|
|
158
|
+
});
|
|
159
|
+
return args;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function runCli() {
|
|
163
|
+
const args = parseArgs(process.argv.slice(2));
|
|
164
|
+
|
|
165
|
+
if (args.test) {
|
|
166
|
+
runTests();
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
let feedbackEntries = [];
|
|
171
|
+
let memoryEntries = [];
|
|
172
|
+
|
|
173
|
+
if (args.input) {
|
|
174
|
+
const raw = fs.readFileSync(args.input, 'utf-8');
|
|
175
|
+
const parsed = JSON.parse(raw);
|
|
176
|
+
feedbackEntries = Array.isArray(parsed) ? parsed : parsed.entries || [];
|
|
177
|
+
} else if (args['from-local']) {
|
|
178
|
+
feedbackEntries = readJSONL(DEFAULT_FEEDBACK_LOG);
|
|
179
|
+
memoryEntries = readJSONL(DEFAULT_MEMORY_LOG);
|
|
180
|
+
} else {
|
|
181
|
+
console.error('Provide --input=<path-to-json> or --from-local');
|
|
182
|
+
process.exit(1);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const result = exportKtoFromFeedback(feedbackEntries, memoryEntries);
|
|
186
|
+
|
|
187
|
+
if (args.output) {
|
|
188
|
+
fs.writeFileSync(args.output, result.jsonl);
|
|
189
|
+
console.error(`Wrote ${result.records.length} KTO records to ${args.output}`);
|
|
190
|
+
} else {
|
|
191
|
+
process.stdout.write(result.jsonl);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const positiveCount = result.records.filter((r) => r.label === true).length;
|
|
195
|
+
const negativeCount = result.records.filter((r) => r.label === false).length;
|
|
196
|
+
console.error(`Total=${result.totalInput} Exported=${result.records.length} Positive=${positiveCount} Negative=${negativeCount} Skipped=${result.skipped.length}`);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function runTests() {
|
|
200
|
+
let passed = 0;
|
|
201
|
+
let failed = 0;
|
|
202
|
+
|
|
203
|
+
function assert(condition, name) {
|
|
204
|
+
if (condition) {
|
|
205
|
+
passed++;
|
|
206
|
+
console.log(` PASS ${name}`);
|
|
207
|
+
} else {
|
|
208
|
+
failed++;
|
|
209
|
+
console.log(` FAIL ${name}`);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
console.log('\nexport-kto-pairs.js tests\n');
|
|
214
|
+
|
|
215
|
+
// Test 1: positive signal produces label true
|
|
216
|
+
const pos = buildKtoRecord({
|
|
217
|
+
id: 'fb_1',
|
|
218
|
+
signal: 'positive',
|
|
219
|
+
context: 'Implemented auth',
|
|
220
|
+
whatWorked: 'JWT tokens with refresh rotation',
|
|
221
|
+
tags: ['auth'],
|
|
222
|
+
timestamp: '2025-01-01T00:00:00Z',
|
|
223
|
+
});
|
|
224
|
+
assert(pos !== null, 'positive signal produces a record');
|
|
225
|
+
assert(pos.label === true, 'positive signal produces label: true');
|
|
226
|
+
|
|
227
|
+
// Test 2: negative signal produces label false
|
|
228
|
+
const neg = buildKtoRecord({
|
|
229
|
+
id: 'fb_2',
|
|
230
|
+
signal: 'negative',
|
|
231
|
+
context: 'Tried to deploy',
|
|
232
|
+
whatWentWrong: 'Missing env vars',
|
|
233
|
+
tags: ['deploy'],
|
|
234
|
+
timestamp: '2025-01-01T00:00:00Z',
|
|
235
|
+
});
|
|
236
|
+
assert(neg !== null, 'negative signal produces a record');
|
|
237
|
+
assert(neg.label === false, 'negative signal produces label: false');
|
|
238
|
+
|
|
239
|
+
// Test 3: missing context handled gracefully
|
|
240
|
+
const noCtx = buildKtoRecord({
|
|
241
|
+
id: 'fb_3',
|
|
242
|
+
signal: 'up',
|
|
243
|
+
tags: ['testing'],
|
|
244
|
+
});
|
|
245
|
+
assert(noCtx !== null, 'entry with missing context still produces record');
|
|
246
|
+
assert(noCtx.prompt === 'Task: testing', 'missing context falls back to tags');
|
|
247
|
+
|
|
248
|
+
// Test 4: invalid signal returns null
|
|
249
|
+
const invalid = buildKtoRecord({ id: 'fb_4', signal: 'maybe' });
|
|
250
|
+
assert(invalid === null, 'invalid signal returns null');
|
|
251
|
+
|
|
252
|
+
// Test 5: JSONL output is valid
|
|
253
|
+
const records = [pos, neg];
|
|
254
|
+
const jsonl = toJSONL(records);
|
|
255
|
+
const lines = jsonl.trim().split('\n');
|
|
256
|
+
let allValid = true;
|
|
257
|
+
for (const line of lines) {
|
|
258
|
+
try {
|
|
259
|
+
JSON.parse(line);
|
|
260
|
+
} catch {
|
|
261
|
+
allValid = false;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
assert(allValid, 'JSONL output is valid JSON per line');
|
|
265
|
+
assert(jsonl.endsWith('\n'), 'JSONL output ends with newline');
|
|
266
|
+
|
|
267
|
+
// Test 6: metadata includes signal source and timestamp
|
|
268
|
+
assert(pos.metadata.signalSource === 'feedback-log', 'metadata includes signal source');
|
|
269
|
+
assert(pos.metadata.timestamp === '2025-01-01T00:00:00Z', 'metadata includes timestamp');
|
|
270
|
+
assert(pos.metadata.signal === 'positive', 'metadata includes normalized signal');
|
|
271
|
+
|
|
272
|
+
// Test 7: empty context with richContext domain
|
|
273
|
+
const richCtx = buildKtoRecord({
|
|
274
|
+
id: 'fb_5',
|
|
275
|
+
signal: 'up',
|
|
276
|
+
richContext: { domain: 'security', outcomeCategory: 'quick-success' },
|
|
277
|
+
});
|
|
278
|
+
assert(richCtx.prompt === 'Task domain: security', 'richContext domain used as prompt fallback');
|
|
279
|
+
assert(richCtx.metadata.domain === 'security', 'metadata captures domain');
|
|
280
|
+
|
|
281
|
+
// Test 8: buildKtoPairs filters bad entries
|
|
282
|
+
const result = buildKtoPairs([
|
|
283
|
+
{ id: 'a', signal: 'up', context: 'good' },
|
|
284
|
+
{ id: 'b', signal: 'invalid' },
|
|
285
|
+
{ id: 'c', signal: 'down', context: 'bad', whatWentWrong: 'broke it' },
|
|
286
|
+
]);
|
|
287
|
+
assert(result.records.length === 2, 'buildKtoPairs keeps valid entries');
|
|
288
|
+
assert(result.skipped.length === 1, 'buildKtoPairs tracks skipped entries');
|
|
289
|
+
|
|
290
|
+
console.log(`\nResults: ${passed} passed, ${failed} failed\n`);
|
|
291
|
+
process.exit(failed > 0 ? 1 : 0);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
module.exports = {
|
|
295
|
+
readJSONL,
|
|
296
|
+
normalizeSignal,
|
|
297
|
+
inferPrompt,
|
|
298
|
+
inferCompletion,
|
|
299
|
+
buildKtoRecord,
|
|
300
|
+
buildKtoPairs,
|
|
301
|
+
toJSONL,
|
|
302
|
+
exportKtoFromFeedback,
|
|
303
|
+
DEFAULT_FEEDBACK_LOG,
|
|
304
|
+
DEFAULT_MEMORY_LOG,
|
|
305
|
+
};
|
|
306
|
+
|
|
307
|
+
if (require.main === module) {
|
|
308
|
+
runCli();
|
|
309
|
+
}
|