thumbgate 1.14.1 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.claude-plugin/marketplace.json +6 -6
  2. package/.claude-plugin/plugin.json +3 -3
  3. package/.well-known/llms.txt +5 -5
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +60 -35
  6. package/adapters/chatgpt/openapi.yaml +118 -2
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/mcp/server-stdio.js +217 -84
  9. package/adapters/opencode/opencode.json +1 -1
  10. package/bench/prompt-eval-suite.json +5 -1
  11. package/bin/cli.js +211 -8
  12. package/config/enforcement.json +59 -7
  13. package/config/evals/agent-safety-eval.json +338 -22
  14. package/config/gates/default.json +33 -0
  15. package/config/gates/routine.json +43 -0
  16. package/config/github-about.json +3 -3
  17. package/config/mcp-allowlists.json +4 -0
  18. package/config/merge-quality-checks.json +2 -1
  19. package/config/model-candidates.json +131 -0
  20. package/openapi/openapi.yaml +118 -2
  21. package/package.json +70 -51
  22. package/public/blog.html +7 -7
  23. package/public/codex-plugin.html +13 -7
  24. package/public/compare.html +29 -23
  25. package/public/dashboard.html +105 -12
  26. package/public/guide.html +28 -28
  27. package/public/index.html +233 -97
  28. package/public/learn.html +87 -20
  29. package/public/lessons.html +26 -2
  30. package/public/numbers.html +271 -0
  31. package/public/pro.html +89 -19
  32. package/scripts/agent-audit-trace.js +55 -0
  33. package/scripts/agent-memory-lifecycle.js +96 -0
  34. package/scripts/agent-readiness-plan.js +118 -0
  35. package/scripts/agentic-data-pipeline.js +21 -1
  36. package/scripts/agents-sdk-sandbox-plan.js +57 -0
  37. package/scripts/ai-org-governance.js +98 -0
  38. package/scripts/ai-search-distribution.js +43 -0
  39. package/scripts/artifact-agent-plan.js +81 -0
  40. package/scripts/billing.js +27 -8
  41. package/scripts/cli-feedback.js +2 -1
  42. package/scripts/cli-schema.js +60 -5
  43. package/scripts/code-mode-mcp-plan.js +71 -0
  44. package/scripts/commercial-offer.js +1 -1
  45. package/scripts/context-engine.js +1 -2
  46. package/scripts/context-manager.js +4 -1
  47. package/scripts/contextfs.js +214 -32
  48. package/scripts/dashboard-render-spec.js +1 -1
  49. package/scripts/dashboard.js +275 -9
  50. package/scripts/decision-journal.js +13 -3
  51. package/scripts/document-workflow-governance.js +62 -0
  52. package/scripts/enterprise-agent-rollout.js +34 -0
  53. package/scripts/experience-replay-governance.js +69 -0
  54. package/scripts/export-hf-dataset.js +1 -1
  55. package/scripts/feedback-loop.js +141 -9
  56. package/scripts/feedback-to-rules.js +17 -23
  57. package/scripts/gates-engine.js +4 -6
  58. package/scripts/growth-campaigns.js +49 -0
  59. package/scripts/harness-selector.js +145 -1
  60. package/scripts/hybrid-supervisor-agent.js +64 -0
  61. package/scripts/inference-cache-policy.js +72 -0
  62. package/scripts/inference-economics.js +53 -0
  63. package/scripts/internal-agent-bootstrap.js +12 -2
  64. package/scripts/knowledge-layer-plan.js +108 -0
  65. package/scripts/lesson-canonical.js +181 -0
  66. package/scripts/lesson-db.js +71 -10
  67. package/scripts/lesson-inference.js +183 -44
  68. package/scripts/lesson-search.js +4 -1
  69. package/scripts/lesson-synthesis.js +23 -2
  70. package/scripts/llm-client.js +157 -26
  71. package/scripts/mailer/resend-mailer.js +112 -1
  72. package/scripts/mcp-transport-strategy.js +66 -0
  73. package/scripts/memory-store-governance.js +60 -0
  74. package/scripts/meta-agent-loop.js +7 -13
  75. package/scripts/model-access-eligibility.js +38 -0
  76. package/scripts/model-migration-readiness.js +55 -0
  77. package/scripts/native-messaging-audit.js +514 -0
  78. package/scripts/operational-integrity.js +96 -3
  79. package/scripts/otel-declarative-config.js +56 -0
  80. package/scripts/perplexity-client.js +1 -1
  81. package/scripts/post-training-governance.js +34 -0
  82. package/scripts/pr-manager.js +47 -7
  83. package/scripts/private-core-boundary.js +72 -0
  84. package/scripts/production-agent-readiness.js +40 -0
  85. package/scripts/profile-router.js +16 -1
  86. package/scripts/prompt-eval.js +564 -32
  87. package/scripts/prompt-programs.js +93 -0
  88. package/scripts/provider-action-normalizer.js +585 -0
  89. package/scripts/rule-validator.js +285 -0
  90. package/scripts/scaling-law-claims.js +60 -0
  91. package/scripts/security-scanner.js +1 -1
  92. package/scripts/self-distill-agent.js +7 -32
  93. package/scripts/seo-gsd.js +400 -43
  94. package/scripts/skill-rag-router.js +53 -0
  95. package/scripts/spec-gate.js +1 -1
  96. package/scripts/student-consistent-training.js +73 -0
  97. package/scripts/synthetic-data-provenance.js +98 -0
  98. package/scripts/task-context-result.js +81 -0
  99. package/scripts/telemetry-analytics.js +149 -0
  100. package/scripts/thompson-sampling.js +2 -2
  101. package/scripts/token-savings.js +7 -6
  102. package/scripts/token-tco.js +46 -0
  103. package/scripts/tool-registry.js +75 -3
  104. package/scripts/verification-loop.js +10 -1
  105. package/scripts/verifier-scoring.js +71 -0
  106. package/scripts/workflow-sentinel.js +284 -28
  107. package/scripts/workspace-agent-routines.js +118 -0
  108. package/skills/thumbgate/SKILL.md +1 -1
  109. package/src/api/server.js +434 -120
  110. package/.claude-plugin/README.md +0 -170
  111. package/adapters/README.md +0 -12
  112. package/scripts/analytics-report.js +0 -328
  113. package/scripts/autonomous-workflow.js +0 -377
  114. package/scripts/billing-setup.js +0 -109
  115. package/scripts/creator-campaigns.js +0 -239
  116. package/scripts/cross-encoder-reranker.js +0 -235
  117. package/scripts/daemon-manager.js +0 -108
  118. package/scripts/decision-trace.js +0 -354
  119. package/scripts/delegation-runtime.js +0 -896
  120. package/scripts/dispatch-brief.js +0 -159
  121. package/scripts/distribution-surfaces.js +0 -110
  122. package/scripts/feedback-history-distiller.js +0 -382
  123. package/scripts/funnel-analytics.js +0 -35
  124. package/scripts/history-distiller.js +0 -200
  125. package/scripts/hosted-job-launcher.js +0 -256
  126. package/scripts/intent-router.js +0 -392
  127. package/scripts/lesson-reranker.js +0 -263
  128. package/scripts/lesson-retrieval.js +0 -148
  129. package/scripts/managed-lesson-agent.js +0 -183
  130. package/scripts/operational-dashboard.js +0 -103
  131. package/scripts/operational-summary.js +0 -129
  132. package/scripts/operator-artifacts.js +0 -608
  133. package/scripts/optimize-context.js +0 -17
  134. package/scripts/org-dashboard.js +0 -206
  135. package/scripts/partner-orchestration.js +0 -146
  136. package/scripts/predictive-insights.js +0 -356
  137. package/scripts/pulse.js +0 -80
  138. package/scripts/reflector-agent.js +0 -221
  139. package/scripts/sales-pipeline.js +0 -681
  140. package/scripts/session-episode-store.js +0 -329
  141. package/scripts/session-health-sensor.js +0 -242
  142. package/scripts/session-report.js +0 -120
  143. package/scripts/swarm-coordinator.js +0 -81
  144. package/scripts/tool-kpi-tracker.js +0 -12
  145. package/scripts/webhook-delivery.js +0 -62
  146. package/scripts/workflow-sprint-intake.js +0 -475
  147. package/skills/agent-memory/SKILL.md +0 -97
  148. package/skills/solve-architecture-autonomy/SKILL.md +0 -17
  149. package/skills/solve-architecture-autonomy/tool.js +0 -33
  150. package/skills/thumbgate-feedback/SKILL.md +0 -49
@@ -1,392 +0,0 @@
1
- #!/usr/bin/env node
2
- const fs = require('fs');
3
- const path = require('path');
4
- const { getActiveMcpProfile, getAllowedTools } = require('./mcp-policy');
5
- const { loadGatesConfig } = require('./gates-engine');
6
- const { loadModel, samplePosteriors } = require('./thompson-sampling');
7
- const { analyzeCodeGraphImpact } = require('./codegraph-context');
8
- const {
9
- buildPartnerStrategy,
10
- getPartnerActionBias,
11
- } = require('./partner-orchestration');
12
- const {
13
- evaluateDelegation,
14
- normalizeDelegationMode,
15
- } = require('./delegation-runtime');
16
- const { resolveFeedbackDir } = require('./feedback-paths');
17
-
18
- const PROJECT_ROOT = path.join(__dirname, '..');
19
- const DEFAULT_BUNDLE_DIR = path.join(PROJECT_ROOT, 'config', 'policy-bundles');
20
- const RISK_LEVELS = ['low', 'medium', 'high', 'critical'];
21
-
22
- function getDefaultBundleId() {
23
- return process.env.THUMBGATE_POLICY_BUNDLE || 'default-v1';
24
- }
25
-
26
- function getBundlePath(bundleId = getDefaultBundleId()) {
27
- if (process.env.THUMBGATE_POLICY_BUNDLE_PATH) {
28
- return process.env.THUMBGATE_POLICY_BUNDLE_PATH;
29
- }
30
- // Prevent path traversal: resolve and verify result stays within bundle dir (CodeQL S2083)
31
- const candidate = path.resolve(DEFAULT_BUNDLE_DIR, `${bundleId}.json`);
32
- if (!candidate.startsWith(path.resolve(DEFAULT_BUNDLE_DIR))) {
33
- throw new Error(`Invalid bundle ID: path traversal detected in "${bundleId}"`);
34
- }
35
- return candidate;
36
- }
37
-
38
- function validateBundle(bundle) {
39
- if (!bundle || typeof bundle !== 'object') {
40
- throw new Error('Invalid policy bundle: expected object');
41
- }
42
- if (!bundle.bundleId || typeof bundle.bundleId !== 'string') {
43
- throw new Error('Invalid policy bundle: missing bundleId');
44
- }
45
- if (!Array.isArray(bundle.intents) || bundle.intents.length === 0) {
46
- throw new Error('Invalid policy bundle: intents must be a non-empty array');
47
- }
48
-
49
- bundle.intents.forEach((intent) => {
50
- if (!intent.id || typeof intent.id !== 'string') {
51
- throw new Error('Invalid policy bundle: intent id is required');
52
- }
53
- if (!RISK_LEVELS.includes(intent.risk)) {
54
- throw new Error(`Invalid policy bundle: unsupported risk '${intent.risk}' for intent '${intent.id}'`);
55
- }
56
- if (!Array.isArray(intent.actions) || intent.actions.length === 0) {
57
- throw new Error(`Invalid policy bundle: intent '${intent.id}' must define actions`);
58
- }
59
- });
60
-
61
- return true;
62
- }
63
-
64
- function loadPolicyBundle(bundleId = getDefaultBundleId()) {
65
- const raw = fs.readFileSync(getBundlePath(bundleId), 'utf-8');
66
- const parsed = JSON.parse(raw);
67
- validateBundle(parsed);
68
- return parsed;
69
- }
70
-
71
- function getRequiredApprovalRisks(bundle, mcpProfile) {
72
- const approval = bundle.approval || {};
73
- if (approval.profileOverrides && Array.isArray(approval.profileOverrides[mcpProfile])) {
74
- return approval.profileOverrides[mcpProfile];
75
- }
76
- return Array.isArray(approval.requiredRisks) ? approval.requiredRisks : ['high', 'critical'];
77
- }
78
-
79
- function assertKnownMcpProfile(profile) {
80
- getAllowedTools(profile);
81
- return profile;
82
- }
83
-
84
- function listIntents(options = {}) {
85
- const bundle = loadPolicyBundle(options.bundleId);
86
- const profile = assertKnownMcpProfile(options.mcpProfile || getActiveMcpProfile());
87
- const requiredRisks = getRequiredApprovalRisks(bundle, profile);
88
- const partnerStrategy = buildPartnerStrategy({
89
- partnerProfile: options.partnerProfile,
90
- tokenBudget: DEFAULT_TOKEN_BUDGET,
91
- });
92
-
93
- return {
94
- bundleId: bundle.bundleId,
95
- mcpProfile: profile,
96
- partnerProfile: partnerStrategy.profile,
97
- partnerStrategy: {
98
- verificationMode: partnerStrategy.verificationMode,
99
- recommendedChecks: partnerStrategy.recommendedChecks,
100
- },
101
- intents: bundle.intents.map((intent) => ({
102
- id: intent.id,
103
- description: intent.description,
104
- risk: intent.risk,
105
- actionCount: intent.actions.length,
106
- requiresApproval: requiredRisks.includes(intent.risk),
107
- })),
108
- };
109
- }
110
-
111
- /* ── Token Budget Defaults ──────────────────────────────────────── */
112
- const DEFAULT_TOKEN_BUDGET = {
113
- total: 12000,
114
- perAction: 4000,
115
- contextPack: 6000,
116
- };
117
-
118
- function resolveTokenBudget(overrides = {}) {
119
- const budget = { ...DEFAULT_TOKEN_BUDGET };
120
- if (typeof overrides.total === 'number' && overrides.total > 0) budget.total = overrides.total;
121
- if (typeof overrides.perAction === 'number' && overrides.perAction > 0) budget.perAction = overrides.perAction;
122
- if (typeof overrides.contextPack === 'number' && overrides.contextPack > 0) budget.contextPack = overrides.contextPack;
123
- return budget;
124
- }
125
-
126
- /* ── Planning Decomposition ────────────────────────────────────── */
127
-
128
- function decomposeActions(actions) {
129
- if (!Array.isArray(actions) || actions.length === 0) return [];
130
-
131
- const phases = [];
132
- let currentPhase = { kind: actions[0].kind, actions: [] };
133
-
134
- actions.forEach((action) => {
135
- if (action.kind === currentPhase.kind) {
136
- currentPhase.actions.push(action);
137
- } else {
138
- phases.push(currentPhase);
139
- currentPhase = { kind: action.kind, actions: [action] };
140
- }
141
- });
142
- phases.push(currentPhase);
143
-
144
- return phases.map((phase, i) => ({
145
- phaseIndex: i,
146
- kind: phase.kind,
147
- parallel: phase.actions.length > 1,
148
- actions: phase.actions,
149
- }));
150
- }
151
-
152
- function mergeUnique(values = []) {
153
- return Array.from(new Set(values.filter(Boolean)));
154
- }
155
-
156
- function planIntent(options = {}) {
157
- const bundle = loadPolicyBundle(options.bundleId);
158
- const profile = assertKnownMcpProfile(options.mcpProfile || getActiveMcpProfile());
159
- const intentId = String(options.intentId || '').trim();
160
- const context = String(options.context || '').trim();
161
- const approved = options.approved === true;
162
- const tokenBudget = resolveTokenBudget(options.tokenBudget);
163
- const delegationMode = normalizeDelegationMode(options.delegationMode);
164
-
165
- if (!intentId) {
166
- throw new Error('intentId is required');
167
- }
168
-
169
- const intent = bundle.intents.find((item) => item.id === intentId);
170
- if (!intent) {
171
- throw new Error(`Unknown intent: ${intentId}`);
172
- }
173
-
174
- const requiredRisks = getRequiredApprovalRisks(bundle, profile);
175
- const requiresApproval = requiredRisks.includes(intent.risk);
176
- const checkpointRequired = requiresApproval && !approved;
177
- const partnerStrategy = buildPartnerStrategy({
178
- partnerProfile: options.partnerProfile,
179
- tokenBudget,
180
- });
181
- const rankedActions = rankActions(intent.actions, {
182
- modelPath: options.modelPath,
183
- partnerStrategy,
184
- });
185
- const plannedActions = partnerStrategy.profile === 'balanced'
186
- ? intent.actions
187
- : rankedActions.ranked;
188
- const phases = decomposeActions(plannedActions);
189
- const codegraphImpact = analyzeCodeGraphImpact({
190
- intentId,
191
- context,
192
- repoPath: options.repoPath,
193
- });
194
- const partnerChecks = mergeUnique([
195
- ...partnerStrategy.recommendedChecks,
196
- ...codegraphImpact.verificationHints,
197
- ]);
198
- const enrichedPartnerStrategy = {
199
- ...partnerStrategy,
200
- recommendedChecks: partnerChecks,
201
- };
202
- const basePlan = {
203
- bundleId: bundle.bundleId,
204
- mcpProfile: profile,
205
- partnerProfile: enrichedPartnerStrategy.profile,
206
- generatedAt: new Date().toISOString(),
207
- status: checkpointRequired ? 'checkpoint_required' : 'ready',
208
- intent: {
209
- id: intent.id,
210
- description: intent.description,
211
- risk: intent.risk,
212
- },
213
- context,
214
- requiresApproval,
215
- approved,
216
- checkpoint: checkpointRequired
217
- ? {
218
- type: 'human_approval',
219
- reason: `Intent '${intent.id}' has risk '${intent.risk}' under profile '${profile}'.`,
220
- requiredForRiskLevels: requiredRisks,
221
- }
222
- : null,
223
- actions: plannedActions,
224
- phases,
225
- tokenBudget: enrichedPartnerStrategy.tokenBudget || tokenBudget,
226
- partnerStrategy: enrichedPartnerStrategy,
227
- actionScores: rankedActions.scores,
228
- codegraphImpact,
229
- killSwitches: loadGatesConfig().gates
230
- .filter((g) => {
231
- const isHighRisk = ['high', 'critical'].includes(intent.risk);
232
- if (isHighRisk && (g.severity === 'high' || g.severity === 'critical')) return true;
233
-
234
- const actionNames = plannedActions.map((a) => a.name);
235
- return g.trigger && actionNames.some((name) => g.trigger.toLowerCase().includes(name.toLowerCase()));
236
- })
237
- .map((g) => ({
238
- id: g.id,
239
- layer: g.layer || 'Execution',
240
- action: g.action,
241
- severity: g.severity,
242
- })),
243
- };
244
- const delegation = evaluateDelegation({
245
- delegationMode,
246
- plan: basePlan,
247
- mcpProfile: profile,
248
- context,
249
- repoPath: options.repoPath,
250
- });
251
-
252
- return {
253
- ...basePlan,
254
- executionMode: delegation.executionMode,
255
- delegationEligible: delegation.delegationEligible,
256
- delegationScore: delegation.delegationScore,
257
- delegationReason: delegation.delegationReason,
258
- delegateProfile: delegation.delegateProfile,
259
- handoffContract: delegation.handoffContract,
260
- };
261
- }
262
-
263
- const ACTION_CATEGORY_MAP = {
264
- capture_feedback: 'code_edit',
265
- feedback_summary: 'debugging',
266
- search_lessons: 'search',
267
- retrieve_lessons: 'search',
268
- search_thumbgate: 'search',
269
- prevention_rules: 'security',
270
- construct_context_pack: 'architecture',
271
- export_dpo_pairs: 'testing',
272
- export_databricks_bundle: 'testing',
273
- context_provenance: 'search',
274
- evaluate_context_pack: 'pr_review',
275
- };
276
-
277
- function getDefaultModelPath() {
278
- const feedbackDir = resolveFeedbackDir();
279
- return path.join(feedbackDir, 'feedback_model.json');
280
- }
281
-
282
- function getPartnerActionPriority(action, partnerStrategy) {
283
- if (!action || !partnerStrategy || partnerStrategy.verificationMode !== 'evidence_first') {
284
- return 1;
285
- }
286
-
287
- if (action.name === 'construct_context_pack' || action.name === 'context_provenance') {
288
- return 0;
289
- }
290
-
291
- return 1;
292
- }
293
-
294
- function scoreActions(actions, modelPath, options = {}) {
295
- const partnerStrategy = options.partnerStrategy || buildPartnerStrategy({
296
- partnerProfile: options.partnerProfile,
297
- });
298
- const model = loadModel(modelPath || getDefaultModelPath());
299
- const posteriors = samplePosteriors(model);
300
- const partnerScore = posteriors[partnerStrategy.partnerCategory] !== undefined
301
- ? posteriors[partnerStrategy.partnerCategory]
302
- : 0.5;
303
-
304
- return actions.map((action, index) => {
305
- const category = ACTION_CATEGORY_MAP[action.name] || 'uncategorized';
306
- const categoryScore = posteriors[category] !== undefined ? posteriors[category] : 0.5;
307
- const partnerBias = getPartnerActionBias(action, partnerStrategy);
308
- const score = Math.max(0, Math.min(1, (categoryScore * 0.7) + (partnerScore * 0.3) + partnerBias));
309
- return {
310
- action,
311
- category,
312
- actionScore: categoryScore,
313
- partnerProfile: partnerStrategy.profile,
314
- partnerCategory: partnerStrategy.partnerCategory,
315
- partnerScore,
316
- partnerBias,
317
- partnerPriority: getPartnerActionPriority(action, partnerStrategy),
318
- score,
319
- index,
320
- };
321
- }).sort((a, b) => {
322
- if (a.partnerPriority !== b.partnerPriority) {
323
- return a.partnerPriority - b.partnerPriority;
324
- }
325
- return b.score - a.score || a.index - b.index;
326
- });
327
- }
328
-
329
- function rankActions(actions, options = {}) {
330
- const modelPath = options.modelPath || getDefaultModelPath();
331
- const partnerStrategy = options.partnerStrategy || buildPartnerStrategy({
332
- partnerProfile: options.partnerProfile,
333
- });
334
- const scored = scoreActions(actions, modelPath, { partnerStrategy });
335
- return {
336
- ranked: scored.map((s) => s.action),
337
- scores: scored.map((s) => ({
338
- name: s.action.name,
339
- category: s.category,
340
- partnerProfile: s.partnerProfile,
341
- partnerCategory: s.partnerCategory,
342
- actionScore: s.actionScore,
343
- partnerScore: s.partnerScore,
344
- partnerBias: s.partnerBias,
345
- partnerPriority: s.partnerPriority,
346
- score: s.score,
347
- })),
348
- };
349
- }
350
-
351
- module.exports = {
352
- DEFAULT_BUNDLE_DIR,
353
- DEFAULT_TOKEN_BUDGET,
354
- RISK_LEVELS,
355
- getDefaultBundleId,
356
- getBundlePath,
357
- validateBundle,
358
- loadPolicyBundle,
359
- getRequiredApprovalRisks,
360
- assertKnownMcpProfile,
361
- listIntents,
362
- planIntent,
363
- resolveTokenBudget,
364
- decomposeActions,
365
- ACTION_CATEGORY_MAP,
366
- scoreActions,
367
- rankActions,
368
- };
369
-
370
- if (require.main === module) {
371
- const args = process.argv.slice(2);
372
- const intentArg = args.find((arg) => arg.startsWith('--intent='));
373
- const profileArg = args.find((arg) => arg.startsWith('--profile='));
374
- const bundleArg = args.find((arg) => arg.startsWith('--bundle='));
375
- const approved = args.includes('--approved');
376
-
377
- if (!intentArg) {
378
- console.log(JSON.stringify(listIntents({
379
- mcpProfile: profileArg ? profileArg.replace('--profile=', '') : undefined,
380
- bundleId: bundleArg ? bundleArg.replace('--bundle=', '') : undefined,
381
- }), null, 2));
382
- process.exit(0);
383
- }
384
-
385
- const plan = planIntent({
386
- intentId: intentArg.replace('--intent=', ''),
387
- mcpProfile: profileArg ? profileArg.replace('--profile=', '') : undefined,
388
- bundleId: bundleArg ? bundleArg.replace('--bundle=', '') : undefined,
389
- approved,
390
- });
391
- console.log(JSON.stringify(plan, null, 2));
392
- }
@@ -1,263 +0,0 @@
1
- 'use strict';
2
-
3
- /**
4
- * Cross-encoder reranker for lesson retrieval.
5
- *
6
- * Unlike the bi-encoders already in use (Jaccard + bigram Jaccard), a
7
- * cross-encoder processes the (query, lesson) pair jointly — so it can
8
- * catch relevance signals that independent scoring misses:
9
- *
10
- * - Field-weighted BM25: a query term in `whatWentWrong` is worth more
11
- * than the same term in `tags`
12
- * - Synonym/alias expansion: "force-push" ↔ "push --force", "deploy" ↔
13
- * "deployment", etc.
14
- * - Signal coherence: failure-sounding queries boost negative-signal lessons
15
- * - Tool name joint scoring: query toolName × lesson toolsUsed
16
- * - Score blending: reranked score is blended with the original retrieval
17
- * score so we never fully discard the bi-encoder's signal
18
- *
19
- * Usage:
20
- * const { rerankLessons } = require('./lesson-reranker');
21
- * const reranked = rerankLessons(query, candidates, { topK: 5, toolName });
22
- */
23
-
24
- // BM25 hyper-parameters
25
- const BM25_K1 = 1.5; // term saturation
26
- const BM25_B = 0.75; // length normalisation
27
-
28
- // Weight given to each lesson field when scoring a (query, lesson) pair.
29
- // Higher weight = query terms appearing in that field contribute more to score.
30
- const FIELD_WEIGHTS = {
31
- whatWentWrong: 3.0,
32
- whatToChange: 2.5,
33
- howToAvoid: 2.0,
34
- whatWorked: 2.0,
35
- summary: 1.5,
36
- content: 1.5,
37
- context: 1.2,
38
- title: 1.0,
39
- rootCause: 1.0,
40
- reasoning: 0.8,
41
- tags: 0.5,
42
- category: 0.4,
43
- };
44
-
45
- // Synonym clusters: any term in a group matches all others.
46
- const SYNONYM_GROUPS = [
47
- ['force-push', 'force push', 'push --force', 'git push --force', 'force_push'],
48
- ['main', 'main branch', 'master', 'trunk', 'protected branch'],
49
- ['env', '.env', 'environment variable', 'env var', 'dotenv', 'secret'],
50
- ['deploy', 'deployment', 'ship', 'release', 'publish', 'rollout'],
51
- ['db', 'database', 'sqlite', 'postgres', 'postgresql', 'migration', 'migrate'],
52
- ['test', 'tests', 'test suite', 'spec', 'failing test', 'test failure'],
53
- ['ci', 'ci/cd', 'pipeline', 'github actions', 'workflow', 'build'],
54
- ['lint', 'linter', 'eslint', 'prettier', 'format'],
55
- ['auth', 'authentication', 'authorization', 'token', 'api key', 'credential'],
56
- ['delete', 'remove', 'rm', 'drop', 'destroy', 'wipe'],
57
- ['merge', 'pull request', 'pr', 'rebase', 'squash'],
58
- ];
59
-
60
- // Regex patterns that indicate the query is about a failure/mistake.
61
- const FAILURE_PATTERN = /fail|error|wrong|broken|mistake|bad|incorrect|problem|issue|bug|crash|broke|exception/i;
62
-
63
- /**
64
- * Tokenise text into lowercase word-like tokens of length >= 2.
65
- * Hyphens and underscores are treated as delimiters so "force-push"
66
- * becomes ["force", "push"].
67
- * Exported so tests can verify expansion behaviour.
68
- */
69
- function tokenize(text) {
70
- if (!text) return [];
71
- return text
72
- .toLowerCase()
73
- .replace(/[^\w\s]/g, ' ') // replace all non-word, non-space chars (incl. hyphens, dots) with space
74
- .split(/[\s_]+/) // split on whitespace and underscores
75
- .filter((t) => t.length >= 2);
76
- }
77
-
78
- /**
79
- * Expand a set of query tokens with synonyms from SYNONYM_GROUPS.
80
- * Returns a deduplicated array of all terms (originals + expansions).
81
- */
82
- function expandTerms(terms) {
83
- const expanded = new Set(terms);
84
- for (const term of terms) {
85
- for (const group of SYNONYM_GROUPS) {
86
- if (group.some((syn) => syn.split(/\s+/).some((w) => w === term || term.includes(w)))) {
87
- group.forEach((syn) => tokenize(syn).forEach((t) => expanded.add(t)));
88
- }
89
- }
90
- }
91
- return [...expanded];
92
- }
93
-
94
- /**
95
- * Extract the text value of a named field from a lesson candidate.
96
- * Handles both the flat structure from lesson-retrieval.js and the nested
97
- * { lesson: { whatWentWrong, ... } } structure from lesson-search.js.
98
- */
99
- function getField(candidate, field) {
100
- const nested = candidate.lesson;
101
- const val = (nested && nested[field]) || candidate[field] || '';
102
- if (Array.isArray(val)) return val.join(' ');
103
- return String(val);
104
- }
105
-
106
- /**
107
- * Compute field-weighted BM25 scores for a list of candidates (BM25F variant).
108
- *
109
- * BM25F processes the (query, lesson) pair jointly: query terms are weighted
110
- * differently depending on which lesson field they appear in (via FIELD_WEIGHTS).
111
- * IDF is computed at document level (how many docs contain the term across any
112
- * field) so it stays positive regardless of field weights.
113
- *
114
- * Returns an array of { candidate, bm25Score } objects in the same order
115
- * as the input.
116
- */
117
- function fieldWeightedBM25(queryTerms, candidates) {
118
- const N = candidates.length;
119
- if (N === 0) return [];
120
-
121
- const fieldEntries = Object.entries(FIELD_WEIGHTS);
122
- const fieldKeys = Object.keys(FIELD_WEIGHTS);
123
-
124
- // Precompute per-document, per-field token arrays (avoid re-tokenising)
125
- const docFieldTokens = candidates.map((candidate) => {
126
- const fieldMap = {};
127
- for (const field of fieldKeys) {
128
- fieldMap[field] = tokenize(getField(candidate, field));
129
- }
130
- return fieldMap;
131
- });
132
-
133
- // Per-field average token lengths across all documents
134
- const avgFieldLen = {};
135
- for (const field of fieldKeys) {
136
- const total = docFieldTokens.reduce((sum, d) => sum + d[field].length, 0);
137
- avgFieldLen[field] = total / N || 1; // fallback to 1 to avoid /0
138
- }
139
-
140
- // Document-level df: count of documents that contain each term (any field).
141
- // Keeping df as a plain count (not field-weighted) ensures IDF is always positive.
142
- const df = new Map();
143
- for (let i = 0; i < N; i++) {
144
- const seenInDoc = new Set();
145
- for (const field of fieldKeys) {
146
- for (const tok of docFieldTokens[i][field]) {
147
- if (!seenInDoc.has(tok)) {
148
- df.set(tok, (df.get(tok) || 0) + 1);
149
- seenInDoc.add(tok);
150
- }
151
- }
152
- }
153
- }
154
-
155
- return candidates.map((candidate, i) => {
156
- let score = 0;
157
-
158
- for (const qTerm of queryTerms) {
159
- const termDf = df.get(qTerm) || 0;
160
- if (termDf === 0) continue;
161
-
162
- // IDF is always positive because df ≤ N
163
- const idf = Math.log((N - termDf + 0.5) / (termDf + 0.5) + 1);
164
- if (idf <= 0) continue;
165
-
166
- // BM25F: compute weighted sum of per-field normalised TF, then scale by IDF
167
- let weightedTF = 0;
168
- for (const [field, fieldWeight] of fieldEntries) {
169
- const tokens = docFieldTokens[i][field];
170
- const fieldLen = tokens.length;
171
- if (fieldLen === 0) continue;
172
-
173
- let tf = 0;
174
- for (const t of tokens) {
175
- if (t === qTerm) tf++;
176
- }
177
- if (tf === 0) continue;
178
-
179
- const avgLen = avgFieldLen[field];
180
- const normTF = tf / (tf + BM25_K1 * (1 - BM25_B + BM25_B * fieldLen / avgLen));
181
- weightedTF += fieldWeight * normTF;
182
- }
183
-
184
- score += idf * weightedTF;
185
- }
186
-
187
- return { candidate, bm25Score: score };
188
- });
189
- }
190
-
191
- /**
192
- * Rerank a list of lesson candidates using a cross-encoder approach.
193
- *
194
- * @param {string} query - The original retrieval query / action context
195
- * @param {Array} candidates - Lesson objects from the bi-encoder stage
196
- * @param {object} options
197
- * @param {number} [options.topK=5] - How many results to return
198
- * @param {string} [options.toolName] - Tool name from the triggering hook call
199
- * @param {number} [options.blendWeight=0.7] - Weight given to BM25 score vs original
200
- * retrieval score (0 = all original, 1 = all BM25)
201
- * @returns {Array} Reranked candidates with `rerankedScore` field added
202
- */
203
- function rerankLessons(query, candidates, options = {}) {
204
- const {
205
- topK = 5,
206
- toolName = '',
207
- blendWeight = 0.7,
208
- } = options;
209
-
210
- if (!candidates || candidates.length === 0) return [];
211
- if (candidates.length === 1) return candidates.slice(0, topK);
212
-
213
- // Build expanded query term set
214
- const rawTerms = tokenize((query || '') + (toolName ? ' ' + toolName : ''));
215
- const queryTerms = expandTerms(rawTerms);
216
-
217
- const isFailureQuery = FAILURE_PATTERN.test(query || '');
218
-
219
- // Compute BM25 scores for all candidates
220
- const bm25Results = fieldWeightedBM25(queryTerms, candidates);
221
-
222
- // Normalise BM25 scores to [0, 1]
223
- const maxBm25 = Math.max(...bm25Results.map((r) => r.bm25Score), 1e-9);
224
-
225
- const reranked = bm25Results.map(({ candidate, bm25Score }) => {
226
- const normBm25 = bm25Score / maxBm25;
227
-
228
- // Original bi-encoder score (field name differs between retrieval paths)
229
- const origScore = candidate.relevanceScore ?? candidate.score ?? 0;
230
-
231
- // Blend BM25 with original score
232
- let finalScore = blendWeight * normBm25 + (1 - blendWeight) * origScore;
233
-
234
- // Signal coherence bonus: failure queries → negative lessons rank higher
235
- const candidateSignal =
236
- candidate.signal ||
237
- (candidate.tags && candidate.tags.includes('negative') ? 'negative' : null);
238
- if (isFailureQuery && candidateSignal === 'negative') {
239
- finalScore *= 1.2;
240
- }
241
-
242
- // Tool name joint bonus: exact tool match between query context and lesson
243
- if (toolName) {
244
- const lessonTools = [
245
- ...(candidate.metadata?.toolsUsed || []),
246
- getField(candidate, 'toolUsed'),
247
- getField(candidate, 'toolName'),
248
- ].map((t) => (t || '').toLowerCase());
249
-
250
- if (lessonTools.some((t) => t && t.includes(toolName.toLowerCase()))) {
251
- finalScore *= 1.3;
252
- }
253
- }
254
-
255
- return { ...candidate, rerankedScore: Number(finalScore.toFixed(6)) };
256
- });
257
-
258
- return reranked
259
- .sort((a, b) => b.rerankedScore - a.rerankedScore)
260
- .slice(0, topK);
261
- }
262
-
263
- module.exports = { rerankLessons, fieldWeightedBM25, tokenize, expandTerms };