npm - thumbgate - Versions diffs - 1.0.0 → 1.1.0 - Mend

thumbgate 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/.claude-plugin/marketplace.json +1 -1
package/.claude-plugin/plugin.json +1 -1
package/.well-known/mcp/server-card.json +1 -1
package/adapters/README.md +1 -1
package/adapters/claude/.mcp.json +2 -2
package/adapters/codex/config.toml +2 -2
package/adapters/mcp/server-stdio.js +10 -1
package/adapters/opencode/opencode.json +1 -1
package/config/mcp-allowlists.json +1 -0
package/package.json +4 -2
package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +1 -1
package/plugins/claude-codex-bridge/.mcp.json +1 -1
package/plugins/codex-profile/.codex-plugin/plugin.json +1 -1
package/plugins/codex-profile/.mcp.json +1 -1
package/plugins/codex-profile/INSTALL.md +1 -1
package/plugins/codex-profile/README.md +1 -1
package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +1 -1
package/plugins/opencode-profile/INSTALL.md +1 -1
package/public/index.html +7 -3
package/scripts/__pycache__/train_from_feedback.cpython-312.pyc +0 -0
package/scripts/export-hf-dataset.js +293 -0
package/scripts/tool-registry.js +11 -0

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "thumbgate",
-  "version": "1.0.0",
+  "version": "1.1.0",
   "plugins": [
     {
       "name": "thumbgate",

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "thumbgate",
   "description": "Pre-action gates that block AI coding agents from repeating known mistakes. Captures feedback, auto-promotes failures into prevention rules, and enforces them via PreToolUse hooks.",
-  "version": "1.0.0",
+  "version": "1.1.0",
   "author": {
     "name": "Igor Ganapolsky"
   },

package/.well-known/mcp/server-card.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "thumbgate",
-  "version": "1.0.0",
+  "version": "1.1.0",
   "description": "ThumbGate — 👍👎 feedback that teaches your AI agent. Thumbs down a mistake, it never happens again.",
   "homepage": "https://github.com/IgorGanapolsky/thumbgate",
   "transport": "stdio",

package/adapters/README.md CHANGED Viewed

@@ -3,7 +3,7 @@
 - `chatgpt/openapi.yaml`: import into GPT Actions.
 - `gemini/function-declarations.json`: Gemini function-calling definitions.
 - `mcp/server-stdio.js`: underlying local MCP stdio server implementation.
-- `claude/.mcp.json`: example Claude Code MCP config using `npx --yes --package thumbgate@1.0.0 thumbgate serve`.
+- `claude/.mcp.json`: example Claude Code MCP config using `npx --yes --package thumbgate@1.1.0 thumbgate serve`.
 - `codex/config.toml`: example Codex MCP profile section using the same version-pinned portable launcher.
 - `amp/skills/thumbgate-feedback/SKILL.md`: Amp skill template.
 - `opencode/opencode.json`: portable OpenCode MCP profile using the same version-pinned portable launcher.

package/adapters/claude/.mcp.json CHANGED Viewed

@@ -2,13 +2,13 @@
   "mcpServers": {
     "thumbgate": {
       "command": "npx",
-      "args": ["--yes", "--package", "thumbgate@1.0.0", "thumbgate", "serve"]
+      "args": ["--yes", "--package", "thumbgate@1.1.0", "thumbgate", "serve"]
     }
   },
   "hooks": {
     "preToolUse": {
       "command": "npx",
-      "args": ["--yes", "--package", "thumbgate@1.0.0", "thumbgate", "gate-check"]
+      "args": ["--yes", "--package", "thumbgate@1.1.0", "thumbgate", "gate-check"]
     }
   }
 }

package/adapters/codex/config.toml CHANGED Viewed

@@ -1,9 +1,9 @@
 # Codex MCP profile (copy into ~/.codex/config.toml or merge section)
 [mcp_servers.thumbgate]
 command = "npx"
-args = ["--yes", "--package", "thumbgate@1.0.0", "thumbgate", "serve"]
+args = ["--yes", "--package", "thumbgate@1.1.0", "thumbgate", "serve"]
 # Hard PreToolUse hook for Codex
 [hooks.pre_tool_use]
 command = "npx"
-args = ["--yes", "--package", "thumbgate@1.0.0", "thumbgate", "gate-check"]
+args = ["--yes", "--package", "thumbgate@1.1.0", "thumbgate", "gate-check"]

package/adapters/mcp/server-stdio.js CHANGED Viewed

@@ -97,6 +97,7 @@ const {
   assembleUnifiedContext,
   formatUnifiedContext,
 } = require('../../scripts/context-manager');
+const { exportHfDataset } = require('../../scripts/export-hf-dataset');
 const PRO_CHECKOUT_URL = 'https://thumbgate-production.up.railway.app/checkout/pro';
@@ -118,7 +119,7 @@ const {
   finalizeSession: finalizeFeedbackSession,
 } = require('../../scripts/feedback-session');
-const SERVER_INFO = { name: 'thumbgate-mcp', version: '1.0.0' };
+const SERVER_INFO = { name: 'thumbgate-mcp', version: '1.1.0' };
 const COMMERCE_CATEGORIES = [
   'product_recommendation',
   'brand_compliance',
@@ -493,6 +494,14 @@ async function callToolInner(name, args) {
     case 'export_dpo_pairs':
       enforceLimit('export_dpo');
       return buildExportDpoResponse(args);
+    case 'export_hf_dataset': {
+      enforceLimit('export_dpo');
+      const outputDir = args.outputDir ? resolveSafePath(args.outputDir) : undefined;
+      return toTextResult(exportHfDataset({
+        outputDir,
+        includeProvenance: args.includeProvenance !== false,
+      }));
+    }
     case 'export_databricks_bundle': {
       enforceLimit('export_databricks');
       const outputPath = args.outputPath ? resolveSafePath(args.outputPath) : undefined;

package/adapters/opencode/opencode.json CHANGED Viewed

@@ -7,7 +7,7 @@
         "npx",
         "--yes",
         "--package",
-        "thumbgate@1.0.0",
+        "thumbgate@1.1.0",
         "thumbgate",
         "serve"
       ],

package/config/mcp-allowlists.json CHANGED Viewed

@@ -22,6 +22,7 @@
       "bootstrap_internal_agent",
       "prevention_rules",
       "export_dpo_pairs",
+      "export_hf_dataset",
       "export_databricks_bundle",
       "construct_context_pack",
       "evaluate_context_pack",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "thumbgate",
-  "version": "1.0.0",
+  "version": "1.1.0",
   "description": "ThumbGate — Make your AI coding agent self-improving. Every mistake becomes a prevention rule that physically blocks the agent from repeating it. Feedback-driven enforcement via PreToolUse hooks, Thompson Sampling for adaptive gates, SQLite+FTS5 lesson DB, and LanceDB vector search. Your agent gets smarter with every session.",
   "homepage": "https://thumbgate-production.up.railway.app",
   "repository": {
@@ -66,7 +66,7 @@
     "social:post-everywhere:dry": "node scripts/post-everywhere.js --dry-run",
     "social:reply-monitor": "node scripts/social-reply-monitor.js",
     "social:reply-monitor:dry": "node scripts/social-reply-monitor.js --dry-run",
-    "test": "npm run test:schema && npm run test:loop && npm run test:dpo && npm run test:kto && npm run test:api && npm run test:proof && npm run test:e2e && npm run test:rlaif && npm run test:attribution && npm run test:quality && npm run test:intelligence && npm run test:training-export && npm run test:deployment && npm run test:operational-integrity && npm run test:workflow && npm run test:billing && npm run test:cli && npm run test:watcher && npm run test:autoresearch && npm run test:ops && npm run test:tessl && npm run test:gates && npm run test:evoskill && npm run test:gates-hardening && npm run test:workers && npm run test:social-analytics && npm run test:memalign && npm run test:xmemory-lite && npm run test:filesystem-search && npm run test:zernio && npm run test:obsidian-export && npm run test:lesson-db && npm run test:lesson-rotation && npm run test:memory-dedup && npm run test:feedback-quality && npm run test:sync-version && npm run test:check-congruence && npm run test:tool-registry && npm run test:feedback-to-rules && npm run test:memory-firewall && npm run test:belief-update && npm run test:hosted-config && npm run test:cloudflare-sandbox && npm run test:mcp-config && npm run test:plan-gate && npm run test:pulse && npm run test:semantic-layer && npm run test:data-pipeline && npm run test:optimize-context && npm run test:principle-extractor && npm run test:analytics-window && npm run test:funnel-analytics && npm run test:experiment-tracker && npm run test:build-metadata && npm run test:context-engine && npm run test:hf-papers && npm run test:marketing-experiment && npm run test:seo-gsd && npm run test:verify-run && npm run test:export-dpo-pairs && npm run test:license && npm run test:bot-detector && npm run test:postinstall && npm run test:funnel-invariants && npm run test:cli-telemetry && npm run test:pro-parity && npm run test:model-tier-router && npm run test:computer-use-firewall && npm run test:skill-exporter && npm run test:statusline && npm run test:evolution && npm run test:org-dashboard && npm run test:multi-hop-recall && npm run test:synthetic-dpo && npm run test:thumbgate-skill && npm run test:learn-hub && npm run test:feedback-fallback && npm run test:metaclaw && npm run test:server-lock && npm run test:control-tower && npm run test:pii-scanner && npm run test:data-governance && npm run test:lesson-inference && npm run test:lesson-retrieval && npm run test:reflector-agent && npm run test:feedback-session && npm run test:feedback-history-distiller && npm run test:hallucination-detector && npm run test:history-distiller && npm run test:predictive-insights && npm run test:prove-predictive-insights && npm run test:statusbar-cli && npm run test:generate-instagram-card && npm run test:instagram-thumbgate-post && npm run test:publish-instagram-thumbgate && npm run test:lesson-synthesis && npm run test:background-governance && npm run test:memory-migration && npm run test:prompt-dlp && npm run test:ephemeral-store && npm run test:agent-security && npm run test:skill-progressive && npm run test:per-step-scoring && npm run test:weekly-auto-post && npm run test:social-quality-gate && npm run test:a2ui-engine && npm run test:gate-satisfy && npm run test:money-watcher && npm run test:utm && npm run test:product-feedback && npm run test:feedback-root-consolidator && npm run test:engagement-audit && npm run test:install-growth-automation && npm run test:publish-thumbgate-launch && npm run test:reconcile-thumbgate-campaign && npm run test:reddit-publisher && npm run test:schedule-thumbgate-campaign && npm run test:social-reply-monitor && npm run test:sync-launch-assets",
+    "test": "npm run test:schema && npm run test:loop && npm run test:dpo && npm run test:kto && npm run test:api && npm run test:proof && npm run test:e2e && npm run test:rlaif && npm run test:attribution && npm run test:quality && npm run test:intelligence && npm run test:training-export && npm run test:deployment && npm run test:operational-integrity && npm run test:workflow && npm run test:billing && npm run test:cli && npm run test:watcher && npm run test:autoresearch && npm run test:ops && npm run test:tessl && npm run test:gates && npm run test:evoskill && npm run test:gates-hardening && npm run test:workers && npm run test:social-analytics && npm run test:memalign && npm run test:xmemory-lite && npm run test:filesystem-search && npm run test:zernio && npm run test:obsidian-export && npm run test:lesson-db && npm run test:lesson-rotation && npm run test:memory-dedup && npm run test:feedback-quality && npm run test:sync-version && npm run test:check-congruence && npm run test:tool-registry && npm run test:feedback-to-rules && npm run test:memory-firewall && npm run test:belief-update && npm run test:hosted-config && npm run test:cloudflare-sandbox && npm run test:mcp-config && npm run test:plan-gate && npm run test:pulse && npm run test:semantic-layer && npm run test:data-pipeline && npm run test:optimize-context && npm run test:principle-extractor && npm run test:analytics-window && npm run test:funnel-analytics && npm run test:experiment-tracker && npm run test:build-metadata && npm run test:context-engine && npm run test:hf-papers && npm run test:marketing-experiment && npm run test:seo-gsd && npm run test:verify-run && npm run test:export-dpo-pairs && npm run test:export-hf-dataset && npm run test:license && npm run test:bot-detector && npm run test:postinstall && npm run test:funnel-invariants && npm run test:cli-telemetry && npm run test:pro-parity && npm run test:model-tier-router && npm run test:computer-use-firewall && npm run test:skill-exporter && npm run test:statusline && npm run test:evolution && npm run test:org-dashboard && npm run test:multi-hop-recall && npm run test:synthetic-dpo && npm run test:thumbgate-skill && npm run test:learn-hub && npm run test:feedback-fallback && npm run test:metaclaw && npm run test:server-lock && npm run test:control-tower && npm run test:pii-scanner && npm run test:data-governance && npm run test:lesson-inference && npm run test:lesson-retrieval && npm run test:reflector-agent && npm run test:feedback-session && npm run test:feedback-history-distiller && npm run test:hallucination-detector && npm run test:history-distiller && npm run test:predictive-insights && npm run test:prove-predictive-insights && npm run test:statusbar-cli && npm run test:generate-instagram-card && npm run test:instagram-thumbgate-post && npm run test:publish-instagram-thumbgate && npm run test:lesson-synthesis && npm run test:background-governance && npm run test:memory-migration && npm run test:prompt-dlp && npm run test:ephemeral-store && npm run test:agent-security && npm run test:skill-progressive && npm run test:per-step-scoring && npm run test:weekly-auto-post && npm run test:social-quality-gate && npm run test:a2ui-engine && npm run test:gate-satisfy && npm run test:money-watcher && npm run test:utm && npm run test:product-feedback && npm run test:feedback-root-consolidator && npm run test:engagement-audit && npm run test:install-growth-automation && npm run test:publish-thumbgate-launch && npm run test:reconcile-thumbgate-campaign && npm run test:reddit-publisher && npm run test:schedule-thumbgate-campaign && npm run test:social-reply-monitor && npm run test:sync-launch-assets",
     "test:feedback-fallback": "node --test tests/feedback-fallback.test.js",
     "test:metaclaw": "node --test tests/metaclaw-features.test.js",
     "test:server-lock": "node --test tests/server-stdio-lock.test.js",
@@ -105,6 +105,8 @@
     "test:seo-gsd": "node --test tests/seo-gsd.test.js",
     "test:verify-run": "node --test tests/verify-run.test.js",
     "test:export-dpo-pairs": "node --test tests/export-dpo-pairs.test.js",
+    "test:export-hf-dataset": "node --test tests/export-hf-dataset.test.js",
+    "export:hf": "node scripts/export-hf-dataset.js",
     "seo:gsd": "node scripts/seo-gsd.js plan",
     "seo:gsd:write": "node scripts/seo-gsd.js plan --write",
     "test:congruence": "node scripts/check-congruence.js",

package/plugins/claude-codex-bridge/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codex-bridge",
-  "version": "1.0.0",
+  "version": "1.1.0",
   "description": "Run Codex review, adversarial review, and second-pass handoffs from Claude Code while keeping ThumbGate reliability memory in the loop.",
   "author": {
     "name": "Igor Ganapolsky",

package/plugins/claude-codex-bridge/.mcp.json CHANGED Viewed

@@ -5,7 +5,7 @@
       "args": [
         "--yes",
         "--package",
-        "thumbgate@1.0.0",
+        "thumbgate@1.1.0",
         "thumbgate",
         "serve"
       ]

package/plugins/codex-profile/.codex-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codex-profile",
-  "version": "1.0.0",
+  "version": "1.1.0",
   "description": "ThumbGate for Codex: pre-action gates, skill packs, hallucination detection, PII scanning, progressive disclosure (82% token savings), and MCP-backed reliability memory.",
   "author": {
     "name": "Igor Ganapolsky",

package/plugins/codex-profile/.mcp.json CHANGED Viewed

@@ -5,7 +5,7 @@
       "args": [
         "--yes",
         "--package",
-        "thumbgate@1.0.0",
+        "thumbgate@1.1.0",
         "thumbgate",
         "serve"
       ]

package/plugins/codex-profile/INSTALL.md CHANGED Viewed

@@ -31,7 +31,7 @@ The following block is appended to `~/.codex/config.toml`:
 ```toml
 [mcp_servers.thumbgate]
 command = "npx"
-args = ["--yes", "--package", "thumbgate@1.0.0", "thumbgate", "serve"]
+args = ["--yes", "--package", "thumbgate@1.1.0", "thumbgate", "serve"]
 ```
 The repo-local Codex app plugin ships the same runtime path through `plugins/codex-profile/.mcp.json`, so the manual config and plugin metadata stay aligned.

package/plugins/codex-profile/README.md CHANGED Viewed

@@ -29,7 +29,7 @@ That profile launches:
 ```toml
 [mcp_servers.thumbgate]
 command = "npx"
-args = ["--yes", "--package", "thumbgate@1.0.0", "thumbgate", "serve"]
+args = ["--yes", "--package", "thumbgate@1.1.0", "thumbgate", "serve"]
 ```
 ## Why this exists

package/plugins/cursor-marketplace/.cursor-plugin/plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "thumbgate",
   "displayName": "ThumbGate",
   "description": "👍👎 Thumbs down a mistake — your AI agent won't repeat it. Thumbs up good work — it remembers the pattern.",
-  "version": "1.0.0",
+  "version": "1.1.0",
   "author": {
     "name": "Igor Ganapolsky"
   },

package/plugins/opencode-profile/INSTALL.md CHANGED Viewed

@@ -25,7 +25,7 @@ The portable profile adds this MCP server entry:
   "mcp": {
     "thumbgate": {
       "type": "local",
-      "command": ["npx", "--yes", "--package", "thumbgate@1.0.0", "thumbgate", "serve"],
+      "command": ["npx", "--yes", "--package", "thumbgate@1.1.0", "thumbgate", "serve"],
       "enabled": true
     }
   }

package/public/index.html CHANGED Viewed

@@ -66,7 +66,9 @@ __GA_BOOTSTRAP__
     "Background Agent Governance — per-agent pass rates, CI auto-feedback",
     "Memory Migration — imports Claude Code MEMORY.md into unlimited SQLite DB",
     "Prompt-Level DLP — scans tool call inputs before execution",
-    "Per-Step Scoring — every gate decision becomes a DPO/KTO training signal"
+    "Per-Step Scoring — every gate decision becomes a DPO/KTO training signal",
+    "HuggingFace Export — share PII-redacted agent traces as open training datasets",
+    "Unified Context — one-call context assembly with session, lessons, guards, and code-graph"
   ],
   "offers": [
     {
@@ -578,7 +580,7 @@ __GA_BOOTSTRAP__
 <!-- HOW IT WORKS -->
 <section class="how-it-works" id="how-it-works">
   <div class="container">
-    <div class="section-label">New in v1.0.0</div>
+    <div class="section-label">New in v1.1.0</div>
     <h2 class="section-title">Three steps to stop repeated AI failures</h2>
     <div class="steps">
       <div class="step">
@@ -670,6 +672,7 @@ __GA_BOOTSTRAP__
           <li>All MCP integrations (Claude Code, Cursor, Codex, etc.)</li>
           <li>PreToolUse hook blocking</li>
           <li>Local SQLite lesson DB</li>
+          <li>Unified context assembly — one call gets session, lessons, guards, and code-graph</li>
           <li><a href="/guide" style="color:var(--cyan);text-decoration:underline;">Setup guide for all agents →</a></li>
         </ul>
         <a href="https://www.npmjs.com/package/thumbgate" target="_blank" rel="noopener" class="btn-free">Install Free</a>
@@ -698,6 +701,7 @@ __GA_BOOTSTRAP__
           <li><a href="/dashboard" style="color:var(--cyan);text-decoration:underline;">Visual gate debugger →</a> see every blocked action and the gate that fired so you can trust the system in minutes</li>
           <li>Auto-connect — activate once with your license key, then your running agents appear automatically on your local dashboard</li>
           <li><a href="/dashboard" style="color:var(--cyan);text-decoration:underline;">DPO training data export →</a> turn real thumbs-downs into ready-to-use preference pairs for fine-tuning (LoRA / JSONL)</li>
+          <li><strong>HuggingFace dataset export</strong> — share PII-redacted agent traces as open training datasets (<code>npm run export:hf</code>)</li>
           <li><strong>Model Hardening Advisor</strong> — get recommendations on when and how to fine-tune your model to natively avoid recurring failures</li>
           <li>Personal local dashboard — every Pro user gets a localhost dashboard without extra cloud setup</li>
           <li>Founder-license support — we help you wire the riskiest flows first: migrations, force-pushes, deploys, and CI</li>
@@ -835,7 +839,7 @@ __GA_BOOTSTRAP__
       <a href="https://www.linkedin.com/in/igorganapolsky" target="_blank" rel="noopener">LinkedIn</a>
       <a href="/blog">Blog</a>
     </div>
-    <span class="footer-copy">© 2026 Max Smith KDP LLC · MIT License · v1.0.0</span>
+    <span class="footer-copy">© 2026 Max Smith KDP LLC · MIT License · v1.1.0</span>
   </div>
 </footer>

package/scripts/__pycache__/train_from_feedback.cpython-312.pyc CHANGED Viewed

Binary file

package/scripts/export-hf-dataset.js ADDED Viewed

@@ -0,0 +1,293 @@
+#!/usr/bin/env node
+'use strict';
+/**
+ * HuggingFace Dataset Exporter
+ *
+ * Exports ThumbGate agent traces as a HuggingFace-compatible dataset in two formats:
+ *
+ * 1. Agent Traces (traces split) — raw feedback entries with tool calls, signals,
+ *    context, and outcomes. Matches the "share your agent traces" initiative.
+ *
+ * 2. DPO Preferences (preferences split) — chosen/rejected preference pairs
+ *    derived from error→learning memory promotion. Ready for DPO/RLHF training.
+ *
+ * Output: Parquet-compatible JSONL files + dataset_info.json (HF Dataset Card metadata).
+ *
+ * HuggingFace Datasets format:
+ *   dataset_dir/
+ *     dataset_info.json        — metadata, features schema, splits
+ *     traces.jsonl             — agent trace rows
+ *     preferences.jsonl        — DPO preference pair rows
+ */
+const fs = require('fs');
+const path = require('path');
+const { resolveFeedbackDir } = require('./feedback-paths');
+const { exportDpoFromMemories } = require('./export-dpo-pairs');
+const { getProvenance } = require('./contextfs');
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function readJSONL(filePath) {
+  if (!fs.existsSync(filePath)) return [];
+  const raw = fs.readFileSync(filePath, 'utf-8').trim();
+  if (!raw) return [];
+  return raw
+    .split('\n')
+    .map((line) => {
+      try { return JSON.parse(line); } catch { return null; }
+    })
+    .filter(Boolean);
+}
+function ensureDir(dirPath) {
+  if (!fs.existsSync(dirPath)) {
+    fs.mkdirSync(dirPath, { recursive: true });
+  }
+}
+function writeJSONL(filePath, rows) {
+  const content = rows.map((row) => JSON.stringify(row)).join('\n');
+  fs.writeFileSync(filePath, content ? `${content}\n` : '');
+}
+// ---------------------------------------------------------------------------
+// PII / path redaction
+// ---------------------------------------------------------------------------
+function redactPaths(text) {
+  if (!text || typeof text !== 'string') return text || '';
+  return text
+    .replace(/\/Users\/[^\s/]+/g, '/Users/redacted')
+    .replace(/\/home\/[^\s/]+/g, '/home/redacted')
+    .replace(/C:\\Users\\[^\s\\]+/g, 'C:\\Users\\redacted');
+}
+function redactEntry(obj) {
+  if (!obj || typeof obj !== 'object') return obj;
+  const out = {};
+  for (const [key, value] of Object.entries(obj)) {
+    if (typeof value === 'string') {
+      out[key] = redactPaths(value);
+    } else if (Array.isArray(value)) {
+      out[key] = value.map((v) => (typeof v === 'string' ? redactPaths(v) : v));
+    } else {
+      out[key] = value;
+    }
+  }
+  return out;
+}
+// ---------------------------------------------------------------------------
+// Trace row builder — converts feedback-log entries to HF trace rows
+// ---------------------------------------------------------------------------
+function buildTraceRow(entry, index) {
+  return {
+    trace_id: entry.id || `trace_${index}`,
+    timestamp: entry.timestamp || null,
+    signal: entry.signal || entry.feedback || 'unknown',
+    tool_name: entry.toolName || entry.actionType || 'unknown',
+    context: redactPaths(entry.context || ''),
+    what_worked: redactPaths(entry.whatWorked || ''),
+    what_went_wrong: redactPaths(entry.whatWentWrong || ''),
+    what_to_change: redactPaths(entry.whatToChange || ''),
+    tags: Array.isArray(entry.tags) ? entry.tags : [],
+    failure_type: entry.failureType || null,
+    source: 'thumbgate',
+  };
+}
+// ---------------------------------------------------------------------------
+// Preference row builder — converts DPO pairs to HF preference rows
+// ---------------------------------------------------------------------------
+function buildPreferenceRow(pair, index) {
+  return {
+    pair_id: `pref_${index}`,
+    prompt: redactPaths(pair.prompt || ''),
+    chosen: redactPaths(pair.chosen || ''),
+    rejected: redactPaths(pair.rejected || ''),
+    match_score: pair.metadata ? pair.metadata.matchScore : null,
+    matched_keys: pair.metadata ? pair.metadata.matchedKeys || [] : [],
+    rubric_delta: pair.metadata && pair.metadata.rubric
+      ? pair.metadata.rubric.weightedDelta
+      : null,
+    source: 'thumbgate',
+  };
+}
+// ---------------------------------------------------------------------------
+// Dataset info (HuggingFace Dataset Card metadata)
+// ---------------------------------------------------------------------------
+function buildDatasetInfo({ traceCount, preferenceCount, exportedAt }) {
+  return {
+    dataset_info: {
+      description: 'Agent traces and DPO preference pairs from ThumbGate — pre-action gates for AI coding agents. Contains real-world tool call feedback, failure patterns, and learned corrections.',
+      citation: '',
+      homepage: 'https://github.com/IgorGanapolsky/ThumbGate',
+      license: 'MIT',
+      features: {
+        traces: {
+          trace_id: { dtype: 'string' },
+          timestamp: { dtype: 'string' },
+          signal: { dtype: 'string' },
+          tool_name: { dtype: 'string' },
+          context: { dtype: 'string' },
+          what_worked: { dtype: 'string' },
+          what_went_wrong: { dtype: 'string' },
+          what_to_change: { dtype: 'string' },
+          tags: { dtype: 'list', inner: { dtype: 'string' } },
+          failure_type: { dtype: 'string' },
+          source: { dtype: 'string' },
+        },
+        preferences: {
+          pair_id: { dtype: 'string' },
+          prompt: { dtype: 'string' },
+          chosen: { dtype: 'string' },
+          rejected: { dtype: 'string' },
+          match_score: { dtype: 'float32' },
+          matched_keys: { dtype: 'list', inner: { dtype: 'string' } },
+          rubric_delta: { dtype: 'float32' },
+          source: { dtype: 'string' },
+        },
+      },
+      splits: {
+        traces: { num_examples: traceCount },
+        preferences: { num_examples: preferenceCount },
+      },
+    },
+    exported_at: exportedAt,
+    exporter: 'thumbgate/export-hf-dataset',
+    version: '1.0.0',
+  };
+}
+// ---------------------------------------------------------------------------
+// Main export function
+// ---------------------------------------------------------------------------
+/**
+ * Export ThumbGate data as a HuggingFace-compatible dataset.
+ *
+ * @param {Object} options
+ * @param {string} [options.outputDir] - Directory to write dataset files
+ * @param {string} [options.feedbackDir] - Override feedback data directory
+ * @param {boolean} [options.includeProvenance] - Include provenance events in traces
+ * @returns {Object} Export summary
+ */
+function exportHfDataset(options = {}) {
+  const feedbackDir = options.feedbackDir || resolveFeedbackDir();
+  const outputDir = options.outputDir || path.join(feedbackDir, 'hf-dataset');
+  const includeProvenance = options.includeProvenance !== false;
+  ensureDir(outputDir);
+  // --- Traces split ---
+  const feedbackLogPath = path.join(feedbackDir, 'feedback-log.jsonl');
+  const feedbackEntries = readJSONL(feedbackLogPath);
+  const traceRows = feedbackEntries.map((entry, i) => buildTraceRow(redactEntry(entry), i));
+  // Optionally append provenance events as traces
+  if (includeProvenance) {
+    try {
+      const provenanceEvents = getProvenance(200);
+      for (const evt of provenanceEvents) {
+        traceRows.push({
+          trace_id: evt.id || `prov_${traceRows.length}`,
+          timestamp: evt.timestamp || null,
+          signal: 'provenance',
+          tool_name: evt.type || 'context_assembly',
+          context: redactPaths(JSON.stringify(evt).slice(0, 500)),
+          what_worked: '',
+          what_went_wrong: '',
+          what_to_change: '',
+          tags: ['provenance'],
+          failure_type: null,
+          source: 'thumbgate',
+        });
+      }
+    } catch {
+      // Provenance read failure should not break export
+    }
+  }
+  writeJSONL(path.join(outputDir, 'traces.jsonl'), traceRows);
+  // --- Preferences split ---
+  const memoryLogPath = path.join(feedbackDir, 'memory-log.jsonl');
+  const memories = readJSONL(memoryLogPath);
+  let preferenceRows = [];
+  if (memories.length > 0) {
+    try {
+      const dpoResult = exportDpoFromMemories(memories);
+      preferenceRows = dpoResult.pairs.map((pair, i) => buildPreferenceRow(pair, i));
+    } catch {
+      // DPO export failure should not break the traces export
+    }
+  }
+  writeJSONL(path.join(outputDir, 'preferences.jsonl'), preferenceRows);
+  // --- Dataset info ---
+  const exportedAt = new Date().toISOString();
+  const info = buildDatasetInfo({
+    traceCount: traceRows.length,
+    preferenceCount: preferenceRows.length,
+    exportedAt,
+  });
+  fs.writeFileSync(
+    path.join(outputDir, 'dataset_info.json'),
+    JSON.stringify(info, null, 2) + '\n',
+  );
+  return {
+    outputDir,
+    traceCount: traceRows.length,
+    preferenceCount: preferenceRows.length,
+    files: ['traces.jsonl', 'preferences.jsonl', 'dataset_info.json'],
+    exportedAt,
+  };
+}
+// ---------------------------------------------------------------------------
+// CLI
+// ---------------------------------------------------------------------------
+function main() {
+  const args = {};
+  process.argv.slice(2).forEach((arg) => {
+    if (!arg.startsWith('--')) return;
+    const [key, ...rest] = arg.slice(2).split('=');
+    args[key] = rest.length ? rest.join('=') : true;
+  });
+  const result = exportHfDataset({
+    outputDir: args.output || undefined,
+    includeProvenance: args.provenance !== 'false',
+  });
+  console.log(`Exported HuggingFace dataset to ${result.outputDir}`);
+  console.log(`  Traces: ${result.traceCount}`);
+  console.log(`  Preferences: ${result.preferenceCount}`);
+  console.log(`  Files: ${result.files.join(', ')}`);
+}
+if (require.main === module) {
+  main();
+}
+module.exports = {
+  exportHfDataset,
+  buildTraceRow,
+  buildPreferenceRow,
+  buildDatasetInfo,
+  redactPaths,
+  redactEntry,
+  readJSONL,
+};

package/scripts/tool-registry.js CHANGED Viewed

@@ -399,6 +399,17 @@ const TOOLS = [
       },
     },
   }),
+  destructiveTool({
+    name: 'export_hf_dataset',
+    description: 'Export ThumbGate agent traces and DPO preference pairs as a HuggingFace-compatible dataset. Produces traces.jsonl, preferences.jsonl, and dataset_info.json with PII-redacted paths. Ready for huggingface-cli upload.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        outputDir: { type: 'string', description: 'Output directory (default: feedback-dir/hf-dataset)' },
+        includeProvenance: { type: 'boolean', description: 'Include provenance events in traces (default: true)' },
+      },
+    },
+  }),
   destructiveTool({
     name: 'export_databricks_bundle',
     description: 'Export ThumbGate logs and proof artifacts as a Databricks-ready analytics bundle',