npm - thumbgate - Versions diffs - 1.14.1 → 1.15.0 - Mend

thumbgate 1.14.1 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.well-known/mcp/server-card.json +1 -1
package/README.md +2 -1
package/adapters/claude/.mcp.json +2 -2
package/adapters/mcp/server-stdio.js +8 -1
package/adapters/opencode/opencode.json +1 -1
package/bin/cli.js +54 -0
package/config/enforcement.json +59 -7
package/config/gates/default.json +33 -0
package/config/mcp-allowlists.json +4 -0
package/config/merge-quality-checks.json +2 -1
package/package.json +17 -5
package/public/codex-plugin.html +7 -1
package/public/dashboard.html +23 -2
package/public/index.html +20 -2
package/public/learn.html +39 -0
package/public/lessons.html +25 -1
package/public/numbers.html +271 -0
package/public/pro.html +7 -1
package/scripts/cli-feedback.js +2 -1
package/scripts/cli-schema.js +43 -4
package/scripts/commercial-offer.js +1 -1
package/scripts/contextfs.js +214 -32
package/scripts/feedback-loop.js +49 -5
package/scripts/harness-selector.js +132 -0
package/scripts/lesson-canonical.js +181 -0
package/scripts/lesson-db.js +71 -10
package/scripts/lesson-synthesis.js +23 -2
package/scripts/native-messaging-audit.js +514 -0
package/scripts/pr-manager.js +47 -7
package/scripts/profile-router.js +16 -1
package/scripts/rule-validator.js +285 -0
package/scripts/seo-gsd.js +182 -2
package/scripts/tool-registry.js +12 -0
package/skills/thumbgate/SKILL.md +1 -1
package/src/api/server.js +53 -0
package/.claude-plugin/README.md +0 -170
package/adapters/README.md +0 -12
package/skills/agent-memory/SKILL.md +0 -97
package/skills/solve-architecture-autonomy/SKILL.md +0 -17
package/skills/solve-architecture-autonomy/tool.js +0 -33
package/skills/thumbgate-feedback/SKILL.md +0 -49

package/scripts/contextfs.js CHANGED Viewed

@@ -605,6 +605,137 @@ function selectFlatContextItems(candidates, maxItems, maxChars) {
   };
 }
+/* ── Summarize-then-expand selection ───────────────────────────────
+ *
+ * Two-pass retrieval that front-loads recall, then spends remaining char
+ * budget on depth for the highest-scoring candidates.
+ *
+ *   Pass 1 — breadth. Walk the ranked candidate list and add each as a
+ *   compact "summary tier" item: title + one-line hint drawn from the
+ *   structured fields (whatToChange / whatWentWrong / first content line).
+ *   A summary is small and bounded (SUMMARY_HINT_MAX chars), so many fit in
+ *   a fraction of the budget. Stops when maxItems or a summary-reservation
+ *   budget cap (SUMMARY_RESERVE_FRACTION of maxChars) is hit — this protects
+ *   enough headroom for Pass 2 to actually do something.
+ *
+ *   Pass 2 — depth. Walk the selected list top-down and try to upgrade each
+ *   summary to the full structured context. The upgrade cost is the delta
+ *   between full doc chars and the summary we already accounted for; if it
+ *   fits under the *overall* maxChars, swap the summary for the full item
+ *   and tag it tier='expanded'. Stop when the budget is exhausted.
+ *
+ * Rationale: the flat selector overcommits chars on the first few full-size
+ * hits and silently drops the tail. Summarize-then-expand means a consumer
+ * always knows which docs matched (full roster of titles), and the model
+ * sees full context for the top answers.
+ *
+ * The option is wired into constructContextPack via `strategy` or the
+ * explicit `summarizeThenExpand` flag. Default behavior is unchanged so
+ * existing callers / tests don't shift.
+ */
+const SUMMARY_HINT_MAX = 160;
+const SUMMARY_RESERVE_FRACTION = 0.35;
+function buildSummaryContext(doc) {
+  const full = buildStructuredContext(doc);
+  // Priority: explicit whatToChange > whatWentWrong > reasoning > first
+  // non-empty content line. We truncate aggressively because a summary's
+  // purpose is to fit dozens per pack, not to win a precision test.
+  const hint = (
+    full.whatToChange
+    || full.whatWentWrong
+    || full.reasoning
+    || (doc.content || '').split('\n').map((l) => l.trim()).find(Boolean)
+    || ''
+  ).slice(0, SUMMARY_HINT_MAX);
+  return {
+    rawContent: hint,
+    reasoning: null,
+    whatWentWrong: null,
+    whatToChange: null,
+    rubricFailure: null,
+  };
+}
+function measureSummaryChars(doc) {
+  const hint = buildSummaryContext(doc).rawContent;
+  return `${doc.title || ''}\n${hint}`.length;
+}
+function selectSummarizeThenExpand(candidates, maxItems, maxChars) {
+  // Pass 1 — breadth. Pack summaries greedily under a share of the budget.
+  const summaryBudget = Math.max(
+    Math.floor(maxChars * SUMMARY_RESERVE_FRACTION),
+    measureSummaryChars({ title: '', content: '' }) + 1,
+  );
+  const selected = [];
+  let usedChars = 0;
+  let skippedByMaxChars = 0;
+  for (const item of candidates) {
+    if (selected.length >= maxItems) break;
+    const summaryLen = measureSummaryChars(item.doc);
+    if (usedChars + summaryLen > summaryBudget) {
+      skippedByMaxChars += 1;
+      continue;
+    }
+    selected.push({
+      id: item.doc.id,
+      namespace: item.doc.namespace,
+      title: item.doc.title,
+      structuredContext: buildSummaryContext(item.doc),
+      tags: item.doc.tags || [],
+      score: item.score,
+      tier: 'summary',
+      _doc: item.doc,
+      _summaryLen: summaryLen,
+    });
+    usedChars += summaryLen;
+  }
+  // Pass 2 — depth. Upgrade top-ranked summaries to full items while the
+  // overall char budget can absorb the delta. Walks in current (score) order
+  // so the most relevant docs are expanded first.
+  let expandedCount = 0;
+  for (const entry of selected) {
+    const fullLen = measureDocumentChars(entry._doc);
+    const delta = fullLen - entry._summaryLen;
+    if (delta <= 0) continue; // already at or under summary size; leave it.
+    if (usedChars + delta > maxChars) continue;
+    entry.structuredContext = buildStructuredContext(entry._doc);
+    entry.tier = 'expanded';
+    usedChars += delta;
+    expandedCount += 1;
+  }
+  // Strip the private helpers before returning — they're builder-only state.
+  const items = selected.map(({ _doc, _summaryLen, ...rest }) => rest);
+  return {
+    items,
+    usedChars,
+    skippedByMaxChars,
+    retrieval: {
+      strategy: 'summarize-then-expand',
+      themeCount: 0,
+      semanticCount: 0,
+      selectedThemes: [],
+      selectedSemanticGroups: [],
+      representativeCount: items.length,
+      expandedEpisodes: expandedCount,
+      summaryCount: items.length - expandedCount,
+      summaryBudget,
+      queryCoverage: null,
+      initialCoverage: null,
+      coverageTarget: null,
+    },
+  };
+}
 /* ── Memex-style Indexed Memory ────────────────────────────────── */
 const MEMEX_INDEX_FILE = 'memex-index.jsonl';
@@ -750,17 +881,38 @@ function constructMemexPack({ query = '', maxItems = 8, maxChars = 6000, namespa
   return pack;
 }
-function constructContextPack({ query = '', maxItems = 8, maxChars = 6000, namespaces = [] } = {}) {
+function constructContextPack({
+  query = '',
+  maxItems = 8,
+  maxChars = 6000,
+  namespaces = [],
+  strategy = null,
+  summarizeThenExpand = false,
+} = {}) {
   const normalizedNamespaces = normalizeNamespaces(namespaces);
   const tokens = tokenizeQuery(query);
   const sourceHash = getSourceHash(normalizedNamespaces);
-  const cacheHit = findSemanticCacheHit({
-    query,
-    namespaces: normalizedNamespaces,
-    maxItems,
-    maxChars,
-  });
+  // Resolve the effective strategy. Explicit `strategy` wins; otherwise
+  // `summarizeThenExpand: true` flips the flag. Default remains auto
+  // (flat | hierarchical) so callers that don't opt in keep their cached
+  // packs addressable.
+  const effectiveStrategy = strategy
+    || (summarizeThenExpand ? 'summarize-then-expand' : null);
+  // Skip the semantic cache for summarize-then-expand packs. The cache key
+  // is (namespaces, maxItems, maxChars) — it doesn't include the strategy,
+  // so a cached flat pack would be served to an STE caller (and vice versa)
+  // with the wrong shape. Cheaper to recompute than to extend the cache key
+  // and invalidate every entry on disk.
+  const cacheHit = effectiveStrategy === 'summarize-then-expand'
+    ? null
+    : findSemanticCacheHit({
+      query,
+      namespaces: normalizedNamespaces,
+      maxItems,
+      maxChars,
+    });
   if (cacheHit) {
     const packId = `pack_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
@@ -796,25 +948,51 @@ function constructContextPack({ query = '', maxItems = 8, maxChars = 6000, names
     .sort((a, b) => b.score - a.score);
   const hierarchicalRetrievalEnabled = shouldUseHierarchicalRetrieval(normalizedNamespaces);
-  const selection = hierarchicalRetrievalEnabled
-    ? retrieveHierarchicalDocuments({
+  let selection;
+  if (effectiveStrategy === 'summarize-then-expand') {
+    // Explicit opt-in: bypass the hierarchical path entirely. The
+    // summarize-then-expand selector assumes a flat ranked list where each
+    // item is a single episode, and mixing it with theme-based hierarchical
+    // retrieval would double-compress the top-of-list.
+    selection = selectSummarizeThenExpand(candidates, maxItems, maxChars);
+  } else if (hierarchicalRetrievalEnabled) {
+    selection = retrieveHierarchicalDocuments({
       documents: candidates.map((candidate) => candidate.doc),
       query,
       maxItems,
       maxChars,
       scorer: scoreDocument,
       measureDocument: measureDocumentChars,
-    })
-    : selectFlatContextItems(candidates, maxItems, maxChars);
+    });
+  } else {
+    selection = selectFlatContextItems(candidates, maxItems, maxChars);
+  }
-  const selected = selection.items.map((doc) => ({
-    id: doc.id,
-    namespace: doc.namespace,
-    title: doc.title,
-    structuredContext: buildStructuredContext(doc),
-    tags: doc.tags || [],
-    score: scoreDocument(doc, tokens),
-  }));
+  // The flat + hierarchical paths emit raw docs; summarize-then-expand emits
+  // fully-shaped items that already carry structuredContext and a `tier`
+  // marker. Detect the shape so we don't double-canonicalize STE items
+  // (which would re-expand every summary into full content).
+  const selected = selection.items.map((item) => {
+    if (item && item.structuredContext) {
+      return {
+        id: item.id,
+        namespace: item.namespace,
+        title: item.title,
+        structuredContext: item.structuredContext,
+        tags: item.tags || [],
+        score: typeof item.score === 'number' ? item.score : scoreDocument(item, tokens),
+        ...(item.tier ? { tier: item.tier } : {}),
+      };
+    }
+    return {
+      id: item.id,
+      namespace: item.namespace,
+      title: item.title,
+      structuredContext: buildStructuredContext(item),
+      tags: item.tags || [],
+      score: scoreDocument(item, tokens),
+    };
+  });
   const usedChars = selection.usedChars;
   const skippedByMaxChars = selection.skippedByMaxChars;
@@ -848,19 +1026,23 @@ function constructContextPack({ query = '', maxItems = 8, maxChars = 6000, names
   };
   appendJsonl(contextFsPath(NAMESPACES.provenance, 'packs.jsonl'), pack);
-  appendSemanticCacheEntry({
-    id: `cache_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
-    timestamp: nowIso(),
-    key: buildSemanticCacheKey({
-      namespaces: normalizedNamespaces,
-      maxItems,
-      maxChars,
-    }),
-    query,
-    tokens,
-    sourceHash,
-    pack,
-  });
+  // Symmetric with the cache read: don't persist STE packs into the shared
+  // semantic cache because the cache key is strategy-agnostic.
+  if (effectiveStrategy !== 'summarize-then-expand') {
+    appendSemanticCacheEntry({
+      id: `cache_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
+      timestamp: nowIso(),
+      key: buildSemanticCacheKey({
+        namespaces: normalizedNamespaces,
+        maxItems,
+        maxChars,
+      }),
+      query,
+      tokens,
+      sourceHash,
+      pack,
+    });
+  }
   recordProvenance({
     type: 'context_pack_constructed',
     packId,

package/scripts/feedback-loop.js CHANGED Viewed

@@ -1167,6 +1167,15 @@ function captureFeedback(params) {
     timestamp: now,
   };
+  // Stamp a cross-session canonical hash on every memory record so future
+  // captures can short-circuit dedup without re-canonicalizing legacy entries.
+  // See scripts/lesson-canonical.js for the normalization contract.
+  try {
+    const { canonicalHash } = require('./lesson-canonical');
+    const hash = canonicalHash(memoryRecord);
+    if (hash) memoryRecord.canonicalHash = hash;
+  } catch (_canonErr) { /* canonical hashing is non-blocking */ }
   // Bayesian Belief Update (Project Bayes)
   try {
     const { updateBelief, shouldPrune } = require('./belief-update');
@@ -1210,14 +1219,49 @@ function captureFeedback(params) {
       const merged = mergeIntoExisting(MEMORY_LOG_PATH, similar.match, memoryRecord, feedbackEvent);
       synthesisResult = { action: 'merged', existingId: similar.match.id, similarity: similar.similarity, occurrences: merged.occurrences };
-      // Auto-promote if threshold reached
+      // Auto-promote if threshold reached, but only after the rule
+      // validator (scripts/rule-validator.js) confirms the proposed trigger
+      // matches the seed lesson and has acceptable precision on recent
+      // overlapping-tag events. This plugs the Autogenesis "validate
+      // before integrate" phase that was missing from the original
+      // promotion path — previously every threshold-crossing lesson
+      // shipped a rule regardless of whether it would over-block positives.
       if (shouldAutoPromote(merged)) {
         const rule = synthesizePreventionRule(merged);
-        synthesisResult.autoPromoted = true;
+        let validation = null;
+        try {
+          const { validateProposedRule } = require('./rule-validator');
+          // Sample the last 50 memory events across both signals. Using
+          // memory-log rather than feedback-log because memory records
+          // carry the richer title/content fields the validator scores
+          // against, and findSimilarLesson already reads this file.
+          const recentEvents = readJSONL(MEMORY_LOG_PATH).slice(-50);
+          validation = validateProposedRule(rule, {
+            seedLesson: merged,
+            recentEvents,
+          });
+          rule.validation = validation;
+        } catch (_valErr) {
+          // Validator failure must not block the existing pipeline; fall
+          // back to the legacy "promote unconditionally" behavior.
+          validation = { shouldPromote: true, reason: 'validator_error', error: _valErr.message };
+          rule.validation = validation;
+        }
         synthesisResult.preventionRule = rule;
-        // Store the synthesized rule
-        const rulesPath = path.join(path.dirname(MEMORY_LOG_PATH), 'synthesized-rules.jsonl');
-        appendJSONLLocal(rulesPath, rule);
+        synthesisResult.validation = validation;
+        if (validation.shouldPromote) {
+          synthesisResult.autoPromoted = true;
+          // Store the synthesized rule
+          const rulesPath = path.join(path.dirname(MEMORY_LOG_PATH), 'synthesized-rules.jsonl');
+          appendJSONLLocal(rulesPath, rule);
+        } else {
+          // Park rejected rules in a side log so operators can audit them.
+          synthesisResult.autoPromoted = false;
+          synthesisResult.rejectionReason = validation.reason;
+          const rejectedPath = path.join(path.dirname(MEMORY_LOG_PATH), 'rejected-rules.jsonl');
+          appendJSONLLocal(rejectedPath, rule);
+        }
       }
     } else {
       // No similar lesson — check exact duplicate, then store

package/scripts/harness-selector.js CHANGED Viewed

@@ -16,8 +16,10 @@
  */
 const path = require('path');
+const fs = require('fs');
 const HARNESS_DIR = path.join(__dirname, '..', 'config', 'gates');
+const ROOT_DIR = path.join(__dirname, '..');
 const HARNESSES = Object.freeze({
   deploy: path.join(HARNESS_DIR, 'deploy.json'),
@@ -113,6 +115,132 @@ function getHarnessPath(name) {
   return HARNESSES[name] ?? null;
 }
+function estimateTokenCount(text, charsPerToken = 4) {
+  const payload = String(text || '');
+  const divisor = Math.max(1, Number(charsPerToken) || 4);
+  return Math.ceil(Buffer.byteLength(payload, 'utf8') / divisor);
+}
+function readIfExists(filePath) {
+  try {
+    return fs.readFileSync(filePath, 'utf8');
+  } catch {
+    return '';
+  }
+}
+function readJsonIfExists(filePath) {
+  try {
+    return JSON.parse(fs.readFileSync(filePath, 'utf8'));
+  } catch {
+    return null;
+  }
+}
+function collectDefaultHarnessAuditInputs(rootDir = ROOT_DIR) {
+  const globalDocNames = ['AGENTS.md', 'CLAUDE.md', 'GEMINI.md'];
+  const globalDocs = globalDocNames.map((name) => {
+    const content = readIfExists(path.join(rootDir, name));
+    return {
+      name,
+      chars: Buffer.byteLength(content, 'utf8'),
+      estimatedTokens: estimateTokenCount(content),
+      exists: content.length > 0,
+    };
+  });
+  const toolIndex = readJsonIfExists(path.join(rootDir, '.well-known', 'mcp', 'tools.json'));
+  const tools = Array.isArray(toolIndex && toolIndex.tools) ? toolIndex.tools : [];
+  return {
+    globalDocs,
+    mcpToolCount: tools.length,
+    progressiveToolIndexPresent: tools.some((tool) => typeof tool.schemaUrl === 'string'),
+    specializedHarnesses: listHarnesses(),
+  };
+}
+function scoreHarnessAudit(inputs = {}, options = {}) {
+  const globalDocs = Array.isArray(inputs.globalDocs) ? inputs.globalDocs : [];
+  const totalDocTokens = globalDocs.reduce((sum, doc) => sum + Number(doc.estimatedTokens || 0), 0);
+  const totalDocChars = globalDocs.reduce((sum, doc) => sum + Number(doc.chars || 0), 0);
+  const docTokenBudget = Number(options.docTokenBudget || 9000);
+  const docsOverBudget = totalDocTokens > docTokenBudget;
+  const mcpToolCount = Number(inputs.mcpToolCount || 0);
+  const progressiveToolIndexPresent = Boolean(inputs.progressiveToolIndexPresent);
+  const specializedHarnesses = Array.isArray(inputs.specializedHarnesses) ? inputs.specializedHarnesses : [];
+  const hasSpecializedHarnesses = specializedHarnesses.length >= 3;
+  const missingDocs = globalDocs.filter((doc) => doc.exists === false).map((doc) => doc.name);
+  const observations = [];
+  const recommendations = [];
+  let score = 100;
+  if (docsOverBudget) {
+    const overageRatio = totalDocTokens / docTokenBudget;
+    score -= Math.min(35, Math.ceil((overageRatio - 1) * 22));
+    observations.push(`Global agent docs use about ${totalDocTokens} tokens against a ${docTokenBudget} token harness budget.`);
+    recommendations.push('Move verbose runbooks into skills, guides, or tool help, then leave AGENTS.md/CLAUDE.md as short discovery pointers.');
+  } else {
+    observations.push(`Global agent docs stay within the ${docTokenBudget} token harness budget.`);
+  }
+  if (!progressiveToolIndexPresent && mcpToolCount > 12) {
+    score -= 25;
+    observations.push(`${mcpToolCount} MCP tools appear preload-only, which can push agents toward instruction bloat.`);
+    recommendations.push('Expose a lightweight MCP tool index with per-tool schema URLs so agents fetch schemas only when needed.');
+  } else if (progressiveToolIndexPresent) {
+    observations.push('Progressive MCP tool discovery is available through schema URLs.');
+  }
+  if (!hasSpecializedHarnesses) {
+    score -= 18;
+    observations.push('Fewer than three specialized gate harnesses are available for risky workflows.');
+    recommendations.push('Add workflow-specific harnesses for deploy, code-edit, and database-write actions so default gates stay lean.');
+  } else {
+    observations.push(`Specialized harnesses are available: ${specializedHarnesses.join(', ')}.`);
+  }
+  if (missingDocs.length > 0) {
+    score -= Math.min(12, missingDocs.length * 4);
+    recommendations.push(`Restore missing global discovery docs or remove stale references: ${missingDocs.join(', ')}.`);
+  }
+  if (recommendations.length === 0) {
+    recommendations.push('Keep using Research -> Plan -> Implement prompts and delegate only subtasks whose summaries are enough for the main context.');
+  } else {
+    recommendations.push('Use Research -> Plan -> Implement prompts so implementation starts after the harness has isolated only the needed context.');
+  }
+  const normalizedScore = Math.max(0, Math.min(100, score));
+  const status = normalizedScore >= 85 ? 'compounding' : normalizedScore >= 65 ? 'watch' : 'bloated';
+  return {
+    name: 'thumbgate-harness-optimization-audit',
+    status,
+    score: normalizedScore,
+    roiPriority: normalizedScore < 85 ? 'conversion' : 'retention',
+    totals: {
+      globalDocChars: totalDocChars,
+      globalDocEstimatedTokens: totalDocTokens,
+      mcpToolCount,
+      specializedHarnessCount: specializedHarnesses.length,
+    },
+    signals: {
+      docsOverBudget,
+      progressiveToolIndexPresent,
+      hasSpecializedHarnesses,
+      missingDocs,
+    },
+    observations,
+    recommendations,
+  };
+}
+function buildHarnessOptimizationAudit(options = {}) {
+  const rootDir = options.rootDir || ROOT_DIR;
+  const inputs = options.inputs || collectDefaultHarnessAuditInputs(rootDir);
+  return scoreHarnessAudit(inputs, options);
+}
 // ---------------------------------------------------------------------------
 // Internal helpers
 // ---------------------------------------------------------------------------
@@ -140,6 +268,10 @@ module.exports = {
   selectHarnessName,
   listHarnesses,
   getHarnessPath,
+  estimateTokenCount,
+  collectDefaultHarnessAuditInputs,
+  scoreHarnessAudit,
+  buildHarnessOptimizationAudit,
   extractCommandText,
   HARNESSES,
   DEPLOY_PATTERNS,