thumbgate 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/README.md +134 -0
- package/.claude-plugin/bundle/icon.png +0 -0
- package/.claude-plugin/bundle/icon.svg +18 -0
- package/.claude-plugin/bundle/server/index.js +24 -0
- package/.claude-plugin/marketplace.json +36 -0
- package/.claude-plugin/plugin.json +21 -0
- package/.well-known/mcp/server-card.json +231 -0
- package/LICENSE +21 -0
- package/README.md +375 -0
- package/adapters/README.md +9 -0
- package/adapters/amp/skills/rlhf-feedback/SKILL.md +22 -0
- package/adapters/chatgpt/INSTALL.md +83 -0
- package/adapters/chatgpt/openapi.yaml +1281 -0
- package/adapters/claude/.mcp.json +14 -0
- package/adapters/codex/config.toml +9 -0
- package/adapters/gemini/function-declarations.json +224 -0
- package/adapters/mcp/server-stdio.js +788 -0
- package/adapters/opencode/opencode.json +15 -0
- package/bin/cli.js +1483 -0
- package/bin/memory.sh +64 -0
- package/bin/obsidian-sync.sh +20 -0
- package/bin/postinstall.js +37 -0
- package/config/build-metadata.json +4 -0
- package/config/e2e-critical-flows.json +45 -0
- package/config/gate-templates.json +77 -0
- package/config/gates/claim-verification.json +29 -0
- package/config/gates/computer-use.json +39 -0
- package/config/gates/default.json +117 -0
- package/config/github-about.json +25 -0
- package/config/mcp-allowlists.json +135 -0
- package/config/model-tiers.json +33 -0
- package/config/partner-routing.json +132 -0
- package/config/policy-bundles/constrained-v1.json +64 -0
- package/config/policy-bundles/default-v1.json +91 -0
- package/config/rubrics/default-v1.json +52 -0
- package/config/skill-packs/react-testing.json +23 -0
- package/config/skill-packs/stripe-integration/references/api-spec.json +1 -0
- package/config/skill-packs/stripe-integration/references/webhook-guide.md +3 -0
- package/config/skill-specs/pr-reviewer.json +9 -0
- package/config/skill-specs/release-status.json +9 -0
- package/config/skill-specs/ticket-triage.json +9 -0
- package/config/subagent-profiles.json +32 -0
- package/config/tessl-tiles.json +29 -0
- package/config/thumbgate-settings.managed.json +12 -0
- package/openapi/openapi.yaml +1281 -0
- package/package.json +286 -0
- package/plugins/amp-skill/INSTALL.md +52 -0
- package/plugins/amp-skill/SKILL.md +64 -0
- package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +22 -0
- package/plugins/claude-codex-bridge/.mcp.json +12 -0
- package/plugins/claude-codex-bridge/INSTALL.md +43 -0
- package/plugins/claude-codex-bridge/README.md +46 -0
- package/plugins/claude-codex-bridge/scripts/codex-bridge.js +288 -0
- package/plugins/claude-codex-bridge/skills/adversarial-review/SKILL.md +24 -0
- package/plugins/claude-codex-bridge/skills/result/SKILL.md +22 -0
- package/plugins/claude-codex-bridge/skills/review/SKILL.md +28 -0
- package/plugins/claude-codex-bridge/skills/second-pass/SKILL.md +27 -0
- package/plugins/claude-codex-bridge/skills/setup/SKILL.md +21 -0
- package/plugins/claude-codex-bridge/skills/status/SKILL.md +19 -0
- package/plugins/claude-skill/INSTALL.md +55 -0
- package/plugins/claude-skill/SKILL.md +46 -0
- package/plugins/codex-profile/.codex-plugin/plugin.json +43 -0
- package/plugins/codex-profile/.mcp.json +12 -0
- package/plugins/codex-profile/AGENTS.md +20 -0
- package/plugins/codex-profile/INSTALL.md +66 -0
- package/plugins/codex-profile/README.md +37 -0
- package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +23 -0
- package/plugins/cursor-marketplace/CHANGELOG.md +30 -0
- package/plugins/cursor-marketplace/LICENSE +21 -0
- package/plugins/cursor-marketplace/README.md +124 -0
- package/plugins/cursor-marketplace/agents/reliability-reviewer.md +31 -0
- package/plugins/cursor-marketplace/assets/logo-400x400.png +0 -0
- package/plugins/cursor-marketplace/commands/capture-feedback.md +33 -0
- package/plugins/cursor-marketplace/commands/check-gates.md +25 -0
- package/plugins/cursor-marketplace/commands/show-lessons.md +27 -0
- package/plugins/cursor-marketplace/hooks/hooks.json +10 -0
- package/plugins/cursor-marketplace/mcp.json +12 -0
- package/plugins/cursor-marketplace/rules/feedback-capture.mdc +34 -0
- package/plugins/cursor-marketplace/rules/pre-action-gates.mdc +30 -0
- package/plugins/cursor-marketplace/rules/session-continuity.mdc +28 -0
- package/plugins/cursor-marketplace/scripts/gate-check.sh +11 -0
- package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +47 -0
- package/plugins/cursor-marketplace/skills/prevention-rules/SKILL.md +31 -0
- package/plugins/cursor-marketplace/skills/recall-context/SKILL.md +30 -0
- package/plugins/cursor-marketplace/skills/search-lessons/SKILL.md +33 -0
- package/plugins/gemini-extension/INSTALL.md +92 -0
- package/plugins/gemini-extension/gemini_prompt.txt +14 -0
- package/plugins/gemini-extension/tool_contract.json +45 -0
- package/plugins/opencode-profile/INSTALL.md +57 -0
- package/public/assets/instagram-card.png +0 -0
- package/public/assets/tiktok-agent-memory.mp4 +0 -0
- package/public/blog.html +400 -0
- package/public/dashboard.html +1093 -0
- package/public/guide.html +317 -0
- package/public/index.html +1195 -0
- package/public/learn/agent-harness-pattern.html +180 -0
- package/public/learn/ai-agent-persistent-memory.html +202 -0
- package/public/learn/learn.css +45 -0
- package/public/learn/mcp-pre-action-gates-explained.html +172 -0
- package/public/learn/stop-ai-agent-force-push.html +134 -0
- package/public/learn/vibe-coding-safety-net.html +142 -0
- package/public/learn.html +213 -0
- package/public/lessons.html +650 -0
- package/public/vercel.json +8 -0
- package/scripts/__pycache__/train_from_feedback.cpython-314.pyc +0 -0
- package/scripts/a2ui-engine.js +73 -0
- package/scripts/access-anomaly-detector.js +12 -0
- package/scripts/adk-consolidator.js +266 -0
- package/scripts/agent-readiness.js +220 -0
- package/scripts/agent-security-hardening.js +227 -0
- package/scripts/agentic-data-pipeline.js +847 -0
- package/scripts/analytics-report.js +328 -0
- package/scripts/analytics-window.js +158 -0
- package/scripts/async-job-runner.js +1001 -0
- package/scripts/audit-trail.js +398 -0
- package/scripts/auto-promote-gates.js +293 -0
- package/scripts/auto-wire-hooks.js +316 -0
- package/scripts/autonomous-sales-agent.js +39 -0
- package/scripts/autoresearch-runner.js +216 -0
- package/scripts/background-agent-governance.js +237 -0
- package/scripts/behavioral-extraction.js +93 -0
- package/scripts/belief-update.js +84 -0
- package/scripts/billing.js +2438 -0
- package/scripts/bot-detector.js +50 -0
- package/scripts/budget-guard.js +173 -0
- package/scripts/build-claude-mcpb.js +189 -0
- package/scripts/build-metadata.js +97 -0
- package/scripts/check-congruence.js +322 -0
- package/scripts/cli-feedback.js +135 -0
- package/scripts/cli-telemetry.js +87 -0
- package/scripts/cloudflare-dynamic-sandbox.js +315 -0
- package/scripts/code-reasoning.js +350 -0
- package/scripts/codegraph-context.js +466 -0
- package/scripts/commercial-offer.js +56 -0
- package/scripts/computer-use-firewall.js +250 -0
- package/scripts/context-engine.js +694 -0
- package/scripts/contextfs.js +1287 -0
- package/scripts/conversation-context.js +119 -0
- package/scripts/creator-campaigns.js +239 -0
- package/scripts/daemon-manager.js +108 -0
- package/scripts/daily-digest.js +11 -0
- package/scripts/dashboard-render-spec.js +395 -0
- package/scripts/dashboard.js +1058 -0
- package/scripts/data-governance.js +173 -0
- package/scripts/delegation-runtime.js +900 -0
- package/scripts/deploy-gcp.sh +44 -0
- package/scripts/deploy-policy.js +231 -0
- package/scripts/disagreement-mining.js +315 -0
- package/scripts/dispatch-brief.js +159 -0
- package/scripts/distribution-surfaces.js +44 -0
- package/scripts/dpo-optimizer.js +206 -0
- package/scripts/ensure-repo-bootstrap.js +129 -0
- package/scripts/ephemeral-agent-store.js +219 -0
- package/scripts/eval-harness.js +56 -0
- package/scripts/evolution-state.js +241 -0
- package/scripts/experiment-tracker.js +267 -0
- package/scripts/export-databricks-bundle.js +242 -0
- package/scripts/export-dpo-pairs.js +344 -0
- package/scripts/export-kto-pairs.js +309 -0
- package/scripts/export-training.js +450 -0
- package/scripts/failure-diagnostics.js +558 -0
- package/scripts/feedback-attribution.js +313 -0
- package/scripts/feedback-fallback.js +110 -0
- package/scripts/feedback-history-distiller.js +391 -0
- package/scripts/feedback-inbox-read.js +162 -0
- package/scripts/feedback-loop.js +1887 -0
- package/scripts/feedback-paths.js +145 -0
- package/scripts/feedback-quality.js +139 -0
- package/scripts/feedback-root-consolidator.js +238 -0
- package/scripts/feedback-schema.js +426 -0
- package/scripts/feedback-session.js +286 -0
- package/scripts/feedback-to-memory.js +185 -0
- package/scripts/feedback-to-rules.js +164 -0
- package/scripts/filesystem-search.js +405 -0
- package/scripts/funnel-analytics.js +35 -0
- package/scripts/gate-satisfy.js +42 -0
- package/scripts/gate-stats.js +116 -0
- package/scripts/gate-templates.js +70 -0
- package/scripts/gates-engine.js +816 -0
- package/scripts/generate-paperbanana-diagrams.sh +99 -0
- package/scripts/generate-pretool-hook.sh +40 -0
- package/scripts/github-about.js +350 -0
- package/scripts/github-outreach.js +65 -0
- package/scripts/gtm-revenue-loop.js +520 -0
- package/scripts/hallucination-detector.js +226 -0
- package/scripts/hf-papers.js +317 -0
- package/scripts/history-distiller.js +200 -0
- package/scripts/hook-auto-capture.sh +100 -0
- package/scripts/hook-stop-pr-thread-check.sh +68 -0
- package/scripts/hook-stop-self-score.sh +51 -0
- package/scripts/hook-stop-verify-deploy.sh +31 -0
- package/scripts/hook-thumbgate-cache-updater.js +48 -0
- package/scripts/hook-verify-before-done.sh +20 -0
- package/scripts/hosted-config.js +156 -0
- package/scripts/hybrid-feedback-context.js +675 -0
- package/scripts/install-mcp.js +159 -0
- package/scripts/intent-router.js +392 -0
- package/scripts/internal-agent-bootstrap.js +490 -0
- package/scripts/jsonl-watcher.js +155 -0
- package/scripts/lesson-db.js +613 -0
- package/scripts/lesson-inference.js +310 -0
- package/scripts/lesson-retrieval.js +95 -0
- package/scripts/lesson-rotation.js +137 -0
- package/scripts/lesson-search.js +644 -0
- package/scripts/lesson-synthesis.js +196 -0
- package/scripts/license.js +50 -0
- package/scripts/local-model-profile.js +384 -0
- package/scripts/markdown-escape.js +12 -0
- package/scripts/marketing-experiment.js +671 -0
- package/scripts/mcp-config.js +149 -0
- package/scripts/mcp-policy.js +99 -0
- package/scripts/memalign-recall.js +111 -0
- package/scripts/memory-firewall.js +222 -0
- package/scripts/memory-migration.js +296 -0
- package/scripts/meta-policy.js +190 -0
- package/scripts/metered-billing.js +16 -0
- package/scripts/model-tier-router.js +301 -0
- package/scripts/money-watcher.js +71 -0
- package/scripts/multi-hop-recall.js +240 -0
- package/scripts/natural-language-harness.js +330 -0
- package/scripts/obsidian-export.js +713 -0
- package/scripts/operational-dashboard.js +103 -0
- package/scripts/operational-summary.js +93 -0
- package/scripts/optimize-context.js +17 -0
- package/scripts/org-dashboard.js +201 -0
- package/scripts/partner-orchestration.js +146 -0
- package/scripts/per-step-scoring.js +165 -0
- package/scripts/perplexity-marketing.js +466 -0
- package/scripts/pii-scanner.js +153 -0
- package/scripts/plan-gate.js +154 -0
- package/scripts/post-everywhere.js +308 -0
- package/scripts/post-to-x-retry.sh +22 -0
- package/scripts/post-to-x.js +369 -0
- package/scripts/pr-manager.js +236 -0
- package/scripts/predictive-insights.js +356 -0
- package/scripts/principle-extractor.js +162 -0
- package/scripts/pro-features.js +40 -0
- package/scripts/pro-local-dashboard.js +174 -0
- package/scripts/problem-detail.js +53 -0
- package/scripts/product-feedback.js +134 -0
- package/scripts/profile-router.js +245 -0
- package/scripts/prompt-dlp.js +221 -0
- package/scripts/prompt-guard.js +83 -0
- package/scripts/prove-adapters.js +863 -0
- package/scripts/prove-attribution.js +365 -0
- package/scripts/prove-automation.js +653 -0
- package/scripts/prove-autoresearch.js +304 -0
- package/scripts/prove-claim-verification.js +277 -0
- package/scripts/prove-cloudflare-sandbox.js +163 -0
- package/scripts/prove-data-pipeline.js +410 -0
- package/scripts/prove-data-quality.js +227 -0
- package/scripts/prove-evolution.js +352 -0
- package/scripts/prove-harnesses.js +287 -0
- package/scripts/prove-intelligence.js +259 -0
- package/scripts/prove-lancedb.js +371 -0
- package/scripts/prove-local-intelligence.js +342 -0
- package/scripts/prove-loop-closure.js +263 -0
- package/scripts/prove-predictive-insights.js +357 -0
- package/scripts/prove-runtime.js +350 -0
- package/scripts/prove-seo-gsd.js +234 -0
- package/scripts/prove-settings.js +279 -0
- package/scripts/prove-subway-upgrades.js +277 -0
- package/scripts/prove-tessl.js +229 -0
- package/scripts/prove-training-export.js +327 -0
- package/scripts/prove-workflow-contract.js +116 -0
- package/scripts/prove-xmemory.js +332 -0
- package/scripts/publish-decision.js +133 -0
- package/scripts/pulse.js +80 -0
- package/scripts/rate-limiter.js +125 -0
- package/scripts/reddit-dm-outreach.js +182 -0
- package/scripts/reddit-monitor-cron.sh +26 -0
- package/scripts/reflector-agent.js +221 -0
- package/scripts/reminder-engine.js +132 -0
- package/scripts/revenue-status.js +472 -0
- package/scripts/risk-scorer.js +459 -0
- package/scripts/rlaif-self-audit.js +129 -0
- package/scripts/rlhf_session_start.sh +32 -0
- package/scripts/rubric-engine.js +230 -0
- package/scripts/schedule-manager.js +251 -0
- package/scripts/secret-scanner.js +414 -0
- package/scripts/self-heal.js +147 -0
- package/scripts/self-healing-check.js +188 -0
- package/scripts/semantic-layer.js +98 -0
- package/scripts/seo-gsd.js +1153 -0
- package/scripts/settings-hierarchy.js +214 -0
- package/scripts/shieldcortex-memory-firewall-runner.mjs +53 -0
- package/scripts/skill-exporter.js +262 -0
- package/scripts/skill-generator.js +446 -0
- package/scripts/skill-materializer.js +134 -0
- package/scripts/skill-packs.js +136 -0
- package/scripts/skill-proposer.js +99 -0
- package/scripts/skill-quality-tracker.js +282 -0
- package/scripts/slo-alert-engine.js +14 -0
- package/scripts/slow-loop.js +72 -0
- package/scripts/social-analytics/db/schema.sql +32 -0
- package/scripts/social-analytics/db/social-analytics.db +0 -0
- package/scripts/social-analytics/digest.js +256 -0
- package/scripts/social-analytics/generate-instagram-card.js +97 -0
- package/scripts/social-analytics/instagram-thumbgate-post.js +107 -0
- package/scripts/social-analytics/load-env.js +46 -0
- package/scripts/social-analytics/mcp-server.js +289 -0
- package/scripts/social-analytics/normalizer.js +580 -0
- package/scripts/social-analytics/notify.js +162 -0
- package/scripts/social-analytics/poll-all.js +92 -0
- package/scripts/social-analytics/pollers/github.js +195 -0
- package/scripts/social-analytics/pollers/instagram.js +253 -0
- package/scripts/social-analytics/pollers/linkedin.js +330 -0
- package/scripts/social-analytics/pollers/plausible.js +247 -0
- package/scripts/social-analytics/pollers/reddit.js +306 -0
- package/scripts/social-analytics/pollers/threads.js +233 -0
- package/scripts/social-analytics/pollers/tiktok.js +203 -0
- package/scripts/social-analytics/pollers/x.js +227 -0
- package/scripts/social-analytics/pollers/youtube.js +304 -0
- package/scripts/social-analytics/pollers/zernio.js +183 -0
- package/scripts/social-analytics/publish-instagram-thumbgate.js +98 -0
- package/scripts/social-analytics/publish-thumbgate-launch.js +316 -0
- package/scripts/social-analytics/publishers/devto.js +122 -0
- package/scripts/social-analytics/publishers/instagram.js +317 -0
- package/scripts/social-analytics/publishers/linkedin.js +294 -0
- package/scripts/social-analytics/publishers/reddit.js +390 -0
- package/scripts/social-analytics/publishers/threads.js +275 -0
- package/scripts/social-analytics/publishers/tiktok.js +217 -0
- package/scripts/social-analytics/publishers/x.js +259 -0
- package/scripts/social-analytics/publishers/youtube.js +223 -0
- package/scripts/social-analytics/publishers/zernio.js +378 -0
- package/scripts/social-analytics/run-digest.js +34 -0
- package/scripts/social-analytics/store.js +257 -0
- package/scripts/social-analytics/utm.js +143 -0
- package/scripts/social-pipeline.js +2628 -0
- package/scripts/social-quality-gate.js +18 -0
- package/scripts/social-reply-monitor.js +445 -0
- package/scripts/status-dashboard.js +155 -0
- package/scripts/statusline-lesson.js +16 -0
- package/scripts/statusline-tower.js +8 -0
- package/scripts/statusline.sh +116 -0
- package/scripts/stripe-live-status.js +115 -0
- package/scripts/subagent-profiles.js +79 -0
- package/scripts/sync-gh-secrets-from-env.sh +70 -0
- package/scripts/sync-github-about.js +52 -0
- package/scripts/sync-version.js +447 -0
- package/scripts/synthetic-dpo.js +234 -0
- package/scripts/telemetry-analytics.js +821 -0
- package/scripts/tessl-export.js +371 -0
- package/scripts/test-coverage.js +120 -0
- package/scripts/thompson-sampling.js +417 -0
- package/scripts/thumbgate-search.js +189 -0
- package/scripts/tool-kpi-tracker.js +12 -0
- package/scripts/tool-registry.js +811 -0
- package/scripts/train_from_feedback.py +933 -0
- package/scripts/user-profile.js +78 -0
- package/scripts/validate-feedback.js +581 -0
- package/scripts/validate-workflow-contract.js +287 -0
- package/scripts/vector-store.js +197 -0
- package/scripts/verification-loop.js +291 -0
- package/scripts/verify-obsidian-setup.sh +269 -0
- package/scripts/verify-run.js +269 -0
- package/scripts/webhook-delivery.js +62 -0
- package/scripts/weekly-auto-post.js +124 -0
- package/scripts/workflow-runs.js +154 -0
- package/scripts/workflow-sprint-intake.js +475 -0
- package/scripts/workspace-evolver.js +374 -0
- package/scripts/x-autonomous-marketing.js +139 -0
- package/scripts/xmemory-lite.js +405 -0
- package/skills/agent-memory/SKILL.md +97 -0
- package/skills/rlhf-feedback/SKILL.md +49 -0
- package/skills/solve-architecture-autonomy/SKILL.md +17 -0
- package/skills/solve-architecture-autonomy/tool.js +33 -0
- package/skills/thumbgate/SKILL.md +114 -0
- package/src/api/server.js +4206 -0
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
/**
|
|
3
|
+
* Experiment Tracker (AUTORESEARCH-01)
|
|
4
|
+
*
|
|
5
|
+
* Tracks autonomous iteration experiments inspired by Karpathy's autoresearch.
|
|
6
|
+
* Each experiment = a config mutation + test run + measurable score.
|
|
7
|
+
* Keeps/discards based on whether score improves over baseline.
|
|
8
|
+
*
|
|
9
|
+
* Persists experiments to .thumbgate/experiments.jsonl and writes a progress
|
|
10
|
+
* summary to .thumbgate/experiment-progress.json.
|
|
11
|
+
*
|
|
12
|
+
* Zero external dependencies — uses only node:* and existing project modules.
|
|
13
|
+
*
|
|
14
|
+
* Exports: createExperiment, recordResult, getProgress, getBestExperiment,
|
|
15
|
+
* loadExperiments, EXPERIMENT_LOG_PATH
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const fs = require('fs');
|
|
19
|
+
const path = require('path');
|
|
20
|
+
const { getFeedbackPaths, readJSONL } = require('./feedback-loop');
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Paths
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
function getExperimentPaths() {
|
|
27
|
+
const { FEEDBACK_DIR } = getFeedbackPaths();
|
|
28
|
+
return {
|
|
29
|
+
logPath: path.join(FEEDBACK_DIR, 'experiments.jsonl'),
|
|
30
|
+
progressPath: path.join(FEEDBACK_DIR, 'experiment-progress.json'),
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function ensureDir(dirPath) {
|
|
35
|
+
if (!fs.existsSync(dirPath)) {
|
|
36
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function appendJSONL(filePath, record) {
|
|
41
|
+
ensureDir(path.dirname(filePath));
|
|
42
|
+
fs.appendFileSync(filePath, `${JSON.stringify(record)}\n`);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
// Experiment Lifecycle
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Create a new experiment record. Does NOT execute anything — just records
|
|
51
|
+
* the intent so the runner can fill in results later.
|
|
52
|
+
*
|
|
53
|
+
* @param {object} params
|
|
54
|
+
* @param {string} params.name - Human-readable experiment name
|
|
55
|
+
* @param {string} params.hypothesis - What change is being tested
|
|
56
|
+
* @param {string} params.mutationType - Category of mutation (config|prompt|code|threshold)
|
|
57
|
+
* @param {object} [params.mutation] - The actual mutation applied (key/value diff)
|
|
58
|
+
* @param {string} [params.branch] - Git branch name for this experiment
|
|
59
|
+
* @returns {object} experiment record with id and status='pending'
|
|
60
|
+
*/
|
|
61
|
+
function createExperiment(params) {
|
|
62
|
+
if (!params || !params.name || !params.hypothesis) {
|
|
63
|
+
throw new Error('Experiment requires name and hypothesis');
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const validMutationTypes = ['config', 'prompt', 'code', 'threshold'];
|
|
67
|
+
const mutationType = params.mutationType || 'config';
|
|
68
|
+
if (!validMutationTypes.includes(mutationType)) {
|
|
69
|
+
throw new Error(`Invalid mutationType "${mutationType}". Must be one of: ${validMutationTypes.join(', ')}`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const experiment = {
|
|
73
|
+
id: `exp_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
|
74
|
+
name: params.name,
|
|
75
|
+
hypothesis: params.hypothesis,
|
|
76
|
+
mutationType,
|
|
77
|
+
mutation: params.mutation || null,
|
|
78
|
+
branch: params.branch || null,
|
|
79
|
+
status: 'pending',
|
|
80
|
+
createdAt: new Date().toISOString(),
|
|
81
|
+
completedAt: null,
|
|
82
|
+
baseline: null,
|
|
83
|
+
result: null,
|
|
84
|
+
score: null,
|
|
85
|
+
kept: null,
|
|
86
|
+
reason: null,
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
const { logPath } = getExperimentPaths();
|
|
90
|
+
appendJSONL(logPath, experiment);
|
|
91
|
+
return experiment;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Record the result of a completed experiment.
|
|
96
|
+
*
|
|
97
|
+
* @param {object} params
|
|
98
|
+
* @param {string} params.experimentId - ID from createExperiment
|
|
99
|
+
* @param {number} params.score - Measured score (higher = better)
|
|
100
|
+
* @param {number} params.baseline - Baseline score to compare against
|
|
101
|
+
* @param {boolean} [params.testsPassed] - Whether the test suite passed
|
|
102
|
+
* @param {object} [params.metrics] - Additional metrics (coverage, duration, etc.)
|
|
103
|
+
* @returns {object} Updated experiment with kept/discarded decision
|
|
104
|
+
*/
|
|
105
|
+
function recordResult(params) {
|
|
106
|
+
if (!params || !params.experimentId) {
|
|
107
|
+
throw new Error('recordResult requires experimentId');
|
|
108
|
+
}
|
|
109
|
+
if (typeof params.score !== 'number' || typeof params.baseline !== 'number') {
|
|
110
|
+
throw new Error('recordResult requires numeric score and baseline');
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const { logPath } = getExperimentPaths();
|
|
114
|
+
const experiments = loadExperiments();
|
|
115
|
+
const experiment = experiments.find(e => e.id === params.experimentId);
|
|
116
|
+
|
|
117
|
+
if (!experiment) {
|
|
118
|
+
throw new Error(`Experiment ${params.experimentId} not found`);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const improved = params.score > params.baseline;
|
|
122
|
+
const testsPassed = params.testsPassed !== false;
|
|
123
|
+
const kept = improved && testsPassed;
|
|
124
|
+
|
|
125
|
+
const result = {
|
|
126
|
+
...experiment,
|
|
127
|
+
status: 'completed',
|
|
128
|
+
completedAt: new Date().toISOString(),
|
|
129
|
+
baseline: params.baseline,
|
|
130
|
+
score: params.score,
|
|
131
|
+
delta: params.score - params.baseline,
|
|
132
|
+
testsPassed,
|
|
133
|
+
metrics: params.metrics || null,
|
|
134
|
+
kept,
|
|
135
|
+
reason: !testsPassed
|
|
136
|
+
? 'Tests failed — discarded'
|
|
137
|
+
: improved
|
|
138
|
+
? `Score improved by ${(params.score - params.baseline).toFixed(4)}`
|
|
139
|
+
: `Score did not improve (${params.score} <= ${params.baseline})`,
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
appendJSONL(logPath, result);
|
|
143
|
+
updateProgress();
|
|
144
|
+
return result;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
// Progress Tracking
|
|
149
|
+
// ---------------------------------------------------------------------------
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Load all experiment records from the JSONL log.
|
|
153
|
+
* @returns {object[]}
|
|
154
|
+
*/
|
|
155
|
+
function loadExperiments() {
|
|
156
|
+
const { logPath } = getExperimentPaths();
|
|
157
|
+
return readJSONL(logPath);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Recompute and persist experiment progress summary.
|
|
162
|
+
* @returns {object} progress summary
|
|
163
|
+
*/
|
|
164
|
+
function updateProgress() {
|
|
165
|
+
const experiments = loadExperiments();
|
|
166
|
+
const completed = experiments.filter(e => e.status === 'completed');
|
|
167
|
+
const kept = completed.filter(e => e.kept === true);
|
|
168
|
+
const discarded = completed.filter(e => e.kept === false);
|
|
169
|
+
const pending = experiments.filter(e => e.status === 'pending');
|
|
170
|
+
|
|
171
|
+
const bestExperiment = kept.length > 0
|
|
172
|
+
? kept.reduce((best, e) => (e.delta || 0) > (best.delta || 0) ? e : best, kept[0])
|
|
173
|
+
: null;
|
|
174
|
+
|
|
175
|
+
const progress = {
|
|
176
|
+
totalExperiments: experiments.length,
|
|
177
|
+
completed: completed.length,
|
|
178
|
+
kept: kept.length,
|
|
179
|
+
discarded: discarded.length,
|
|
180
|
+
pending: pending.length,
|
|
181
|
+
keepRate: completed.length > 0
|
|
182
|
+
? (kept.length / completed.length * 100).toFixed(1)
|
|
183
|
+
: '0.0',
|
|
184
|
+
bestExperiment: bestExperiment
|
|
185
|
+
? { id: bestExperiment.id, name: bestExperiment.name, delta: bestExperiment.delta }
|
|
186
|
+
: null,
|
|
187
|
+
lastUpdated: new Date().toISOString(),
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
const { progressPath } = getExperimentPaths();
|
|
191
|
+
ensureDir(path.dirname(progressPath));
|
|
192
|
+
fs.writeFileSync(progressPath, JSON.stringify(progress, null, 2) + '\n');
|
|
193
|
+
return progress;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Get current experiment progress.
|
|
198
|
+
* @returns {object} progress summary
|
|
199
|
+
*/
|
|
200
|
+
function getProgress() {
|
|
201
|
+
const { progressPath } = getExperimentPaths();
|
|
202
|
+
if (fs.existsSync(progressPath)) {
|
|
203
|
+
try {
|
|
204
|
+
return JSON.parse(fs.readFileSync(progressPath, 'utf-8'));
|
|
205
|
+
} catch {
|
|
206
|
+
// Fall through to recompute
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
return updateProgress();
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Get the best-performing kept experiment.
|
|
214
|
+
* @returns {object|null}
|
|
215
|
+
*/
|
|
216
|
+
function getBestExperiment() {
|
|
217
|
+
const experiments = loadExperiments();
|
|
218
|
+
const kept = experiments.filter(e => e.kept === true);
|
|
219
|
+
if (kept.length === 0) return null;
|
|
220
|
+
return kept.reduce((best, e) => (e.delta || 0) > (best.delta || 0) ? e : best, kept[0]);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// ---------------------------------------------------------------------------
|
|
224
|
+
// CLI
|
|
225
|
+
// ---------------------------------------------------------------------------
|
|
226
|
+
|
|
227
|
+
if (require.main === module) {
|
|
228
|
+
const args = {};
|
|
229
|
+
process.argv.slice(2).forEach(arg => {
|
|
230
|
+
if (!arg.startsWith('--')) return;
|
|
231
|
+
const [key, ...rest] = arg.slice(2).split('=');
|
|
232
|
+
args[key] = rest.length > 0 ? rest.join('=') : true;
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
if (args.progress) {
|
|
236
|
+
console.log(JSON.stringify(getProgress(), null, 2));
|
|
237
|
+
} else if (args.best) {
|
|
238
|
+
const best = getBestExperiment();
|
|
239
|
+
console.log(best ? JSON.stringify(best, null, 2) : 'No kept experiments yet.');
|
|
240
|
+
} else if (args.list) {
|
|
241
|
+
const exps = loadExperiments().filter(e => e.status === 'completed');
|
|
242
|
+
console.log(`${exps.length} completed experiments (${exps.filter(e => e.kept).length} kept)`);
|
|
243
|
+
exps.slice(-10).forEach(e => {
|
|
244
|
+
const icon = e.kept ? '✓' : '✗';
|
|
245
|
+
console.log(` ${icon} ${e.name} — delta: ${(e.delta || 0).toFixed(4)}`);
|
|
246
|
+
});
|
|
247
|
+
} else {
|
|
248
|
+
console.log(`Usage:
|
|
249
|
+
node scripts/experiment-tracker.js --progress
|
|
250
|
+
node scripts/experiment-tracker.js --best
|
|
251
|
+
node scripts/experiment-tracker.js --list`);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// ---------------------------------------------------------------------------
|
|
256
|
+
// Exports
|
|
257
|
+
// ---------------------------------------------------------------------------
|
|
258
|
+
|
|
259
|
+
module.exports = {
|
|
260
|
+
createExperiment,
|
|
261
|
+
recordResult,
|
|
262
|
+
getProgress,
|
|
263
|
+
getBestExperiment,
|
|
264
|
+
loadExperiments,
|
|
265
|
+
updateProgress,
|
|
266
|
+
getExperimentPaths,
|
|
267
|
+
};
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
|
|
7
|
+
const { getFeedbackPaths } = require('./feedback-loop');
|
|
8
|
+
|
|
9
|
+
const PROJECT_ROOT = path.join(__dirname, '..');
|
|
10
|
+
const DEFAULT_PROOF_DIR = process.env.THUMBGATE_PROOF_DIR
|
|
11
|
+
|| path.join(PROJECT_ROOT, 'proof');
|
|
12
|
+
|
|
13
|
+
function parseArgs(argv) {
|
|
14
|
+
const args = {};
|
|
15
|
+
argv.forEach((arg) => {
|
|
16
|
+
if (!arg.startsWith('--')) return;
|
|
17
|
+
const [key, ...rest] = arg.slice(2).split('=');
|
|
18
|
+
args[key] = rest.length ? rest.join('=') : true;
|
|
19
|
+
});
|
|
20
|
+
return args;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function ensureDir(dirPath) {
|
|
24
|
+
if (!fs.existsSync(dirPath)) {
|
|
25
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function readJSONL(filePath) {
|
|
30
|
+
if (!fs.existsSync(filePath)) return [];
|
|
31
|
+
const raw = fs.readFileSync(filePath, 'utf8').trim();
|
|
32
|
+
if (!raw) return [];
|
|
33
|
+
return raw
|
|
34
|
+
.split('\n')
|
|
35
|
+
.map((line) => {
|
|
36
|
+
try {
|
|
37
|
+
return JSON.parse(line);
|
|
38
|
+
} catch {
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
})
|
|
42
|
+
.filter(Boolean);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function readJSON(filePath) {
|
|
46
|
+
try {
|
|
47
|
+
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
48
|
+
} catch {
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function writeJSONL(filePath, rows) {
|
|
54
|
+
const content = rows.map((row) => JSON.stringify(row)).join('\n');
|
|
55
|
+
fs.writeFileSync(filePath, content ? `${content}\n` : '');
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function getDefaultFeedbackDir() {
|
|
59
|
+
return getFeedbackPaths().FEEDBACK_DIR;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function toBundleRelativePath(...segments) {
|
|
63
|
+
return path.posix.join(...segments);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function normalizeBundleRelativePath(relativePath) {
|
|
67
|
+
return String(relativePath || '').replace(/\\/g, '/');
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function walkJsonFiles(dirPath, acc = []) {
|
|
71
|
+
if (!fs.existsSync(dirPath)) return acc;
|
|
72
|
+
const entries = fs.readdirSync(dirPath, { withFileTypes: true });
|
|
73
|
+
for (const entry of entries) {
|
|
74
|
+
const fullPath = path.join(dirPath, entry.name);
|
|
75
|
+
if (entry.isDirectory()) {
|
|
76
|
+
walkJsonFiles(fullPath, acc);
|
|
77
|
+
} else if (entry.isFile() && entry.name.endsWith('.json')) {
|
|
78
|
+
acc.push(fullPath);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return acc;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function annotateRows(rows, dataset, sourceFile, exportedAt) {
|
|
85
|
+
return rows.map((row, index) => ({
|
|
86
|
+
bundleDataset: dataset,
|
|
87
|
+
bundleRowNumber: index + 1,
|
|
88
|
+
bundleExportedAt: exportedAt,
|
|
89
|
+
bundleSourceFile: sourceFile,
|
|
90
|
+
...row,
|
|
91
|
+
}));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function collectProofReports(proofDir, exportedAt) {
|
|
95
|
+
return walkJsonFiles(proofDir)
|
|
96
|
+
.map((filePath, index) => ({
|
|
97
|
+
bundleDataset: 'proof_reports',
|
|
98
|
+
bundleRowNumber: index + 1,
|
|
99
|
+
bundleExportedAt: exportedAt,
|
|
100
|
+
reportId: path.basename(filePath, '.json'),
|
|
101
|
+
reportCategory: path.basename(path.dirname(filePath)),
|
|
102
|
+
reportPath: normalizeBundleRelativePath(path.relative(proofDir, filePath)),
|
|
103
|
+
report: readJSON(filePath),
|
|
104
|
+
}))
|
|
105
|
+
.filter((row) => row.report);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function buildSqlTemplate(manifest) {
|
|
109
|
+
const lines = [
|
|
110
|
+
'-- Databricks bootstrap for the exported analytics bundle.',
|
|
111
|
+
'-- Replace __CATALOG__, __SCHEMA__, and __BUNDLE_ROOT__ before running.',
|
|
112
|
+
'',
|
|
113
|
+
'CREATE SCHEMA IF NOT EXISTS __CATALOG__.__SCHEMA__;',
|
|
114
|
+
'',
|
|
115
|
+
];
|
|
116
|
+
|
|
117
|
+
for (const table of manifest.tables) {
|
|
118
|
+
lines.push(`CREATE OR REPLACE TABLE __CATALOG__.__SCHEMA__.${table.tableName} AS`);
|
|
119
|
+
lines.push('SELECT *, _metadata.file_path AS source_file');
|
|
120
|
+
lines.push(`FROM read_files('__BUNDLE_ROOT__/${normalizeBundleRelativePath(table.relativePath)}', format => 'json');`);
|
|
121
|
+
lines.push('');
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return lines.join('\n');
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function timestampSlug() {
|
|
128
|
+
return new Date().toISOString().replace(/[:.]/g, '-');
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function exportDatabricksBundle(feedbackDir = getDefaultFeedbackDir(), outputPath, options = {}) {
|
|
132
|
+
const resolvedFeedbackDir = path.resolve(feedbackDir || getDefaultFeedbackDir());
|
|
133
|
+
const resolvedProofDir = path.resolve(options.proofDir || DEFAULT_PROOF_DIR);
|
|
134
|
+
const exportedAt = new Date().toISOString();
|
|
135
|
+
const bundlePath = path.resolve(outputPath || path.join(
|
|
136
|
+
resolvedFeedbackDir,
|
|
137
|
+
'analytics',
|
|
138
|
+
`databricks-${timestampSlug()}`
|
|
139
|
+
));
|
|
140
|
+
const tablesDir = path.join(bundlePath, 'tables');
|
|
141
|
+
ensureDir(tablesDir);
|
|
142
|
+
|
|
143
|
+
const datasets = [
|
|
144
|
+
{
|
|
145
|
+
tableName: 'feedback_events',
|
|
146
|
+
sourcePath: path.join(resolvedFeedbackDir, 'feedback-log.jsonl'),
|
|
147
|
+
description: 'Raw ThumbGate feedback events from feedback-log.jsonl',
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
tableName: 'memory_records',
|
|
151
|
+
sourcePath: path.join(resolvedFeedbackDir, 'memory-log.jsonl'),
|
|
152
|
+
description: 'Promoted learning and mistake memories from memory-log.jsonl',
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
tableName: 'feedback_sequences',
|
|
156
|
+
sourcePath: path.join(resolvedFeedbackDir, 'feedback-sequences.jsonl'),
|
|
157
|
+
description: 'Sequence-model training rows derived from accepted feedback',
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
tableName: 'feedback_attributions',
|
|
161
|
+
sourcePath: path.join(resolvedFeedbackDir, 'attributed-feedback.jsonl'),
|
|
162
|
+
description: 'Tool-call attribution rows for negative feedback events',
|
|
163
|
+
},
|
|
164
|
+
];
|
|
165
|
+
|
|
166
|
+
const tables = datasets.map((dataset) => {
|
|
167
|
+
const rows = annotateRows(
|
|
168
|
+
readJSONL(dataset.sourcePath),
|
|
169
|
+
dataset.tableName,
|
|
170
|
+
path.basename(dataset.sourcePath),
|
|
171
|
+
exportedAt,
|
|
172
|
+
);
|
|
173
|
+
const fileName = `${dataset.tableName}.jsonl`;
|
|
174
|
+
const relativePath = toBundleRelativePath('tables', fileName);
|
|
175
|
+
writeJSONL(path.join(tablesDir, fileName), rows);
|
|
176
|
+
return {
|
|
177
|
+
tableName: dataset.tableName,
|
|
178
|
+
relativePath,
|
|
179
|
+
rowCount: rows.length,
|
|
180
|
+
description: dataset.description,
|
|
181
|
+
};
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
const proofRows = collectProofReports(resolvedProofDir, exportedAt);
|
|
185
|
+
const proofRelativePath = toBundleRelativePath('tables', 'proof_reports.jsonl');
|
|
186
|
+
writeJSONL(path.join(tablesDir, 'proof_reports.jsonl'), proofRows);
|
|
187
|
+
tables.push({
|
|
188
|
+
tableName: 'proof_reports',
|
|
189
|
+
relativePath: proofRelativePath,
|
|
190
|
+
rowCount: proofRows.length,
|
|
191
|
+
description: 'Machine-readable proof artifacts discovered under proof/**/*.json',
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
const manifest = {
|
|
195
|
+
format: 'databricks-analytics-bundle',
|
|
196
|
+
version: 1,
|
|
197
|
+
exportedAt,
|
|
198
|
+
bundlePath,
|
|
199
|
+
feedbackDir: resolvedFeedbackDir,
|
|
200
|
+
proofDir: resolvedProofDir,
|
|
201
|
+
placeholders: {
|
|
202
|
+
catalog: '__CATALOG__',
|
|
203
|
+
schema: '__SCHEMA__',
|
|
204
|
+
bundleRoot: '__BUNDLE_ROOT__',
|
|
205
|
+
},
|
|
206
|
+
tables,
|
|
207
|
+
};
|
|
208
|
+
|
|
209
|
+
const manifestPath = path.join(bundlePath, 'manifest.json');
|
|
210
|
+
const sqlTemplatePath = path.join(bundlePath, 'load_databricks.sql');
|
|
211
|
+
fs.writeFileSync(manifestPath, JSON.stringify(manifest, null, 2) + '\n');
|
|
212
|
+
fs.writeFileSync(sqlTemplatePath, buildSqlTemplate(manifest) + '\n');
|
|
213
|
+
|
|
214
|
+
return {
|
|
215
|
+
bundlePath,
|
|
216
|
+
manifestPath,
|
|
217
|
+
sqlTemplatePath,
|
|
218
|
+
tableCount: tables.length,
|
|
219
|
+
totalRows: tables.reduce((sum, table) => sum + table.rowCount, 0),
|
|
220
|
+
tables,
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
module.exports = {
|
|
225
|
+
DEFAULT_PROOF_DIR,
|
|
226
|
+
buildSqlTemplate,
|
|
227
|
+
collectProofReports,
|
|
228
|
+
exportDatabricksBundle,
|
|
229
|
+
getDefaultFeedbackDir,
|
|
230
|
+
readJSONL,
|
|
231
|
+
toBundleRelativePath,
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
if (require.main === module) {
|
|
235
|
+
const args = parseArgs(process.argv.slice(2));
|
|
236
|
+
const result = exportDatabricksBundle(
|
|
237
|
+
args['feedback-dir'],
|
|
238
|
+
args.output,
|
|
239
|
+
{ proofDir: args['proof-dir'] }
|
|
240
|
+
);
|
|
241
|
+
console.log(JSON.stringify(result, null, 2));
|
|
242
|
+
}
|