thumbgate 1.15.0 → 1.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/.claude-plugin/marketplace.json +6 -6
  2. package/.claude-plugin/plugin.json +3 -3
  3. package/.well-known/llms.txt +5 -5
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +59 -35
  6. package/adapters/chatgpt/openapi.yaml +118 -2
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/mcp/server-stdio.js +210 -84
  9. package/adapters/opencode/opencode.json +1 -1
  10. package/bench/prompt-eval-suite.json +5 -1
  11. package/bin/cli.js +157 -8
  12. package/config/evals/agent-safety-eval.json +338 -22
  13. package/config/gates/routine.json +43 -0
  14. package/config/github-about.json +3 -3
  15. package/config/model-candidates.json +131 -0
  16. package/openapi/openapi.yaml +118 -2
  17. package/package.json +57 -49
  18. package/public/blog.html +7 -7
  19. package/public/codex-plugin.html +6 -6
  20. package/public/compare.html +29 -23
  21. package/public/dashboard.html +82 -10
  22. package/public/guide.html +28 -28
  23. package/public/index.html +216 -98
  24. package/public/learn.html +50 -22
  25. package/public/lessons.html +1 -1
  26. package/public/numbers.html +17 -17
  27. package/public/pro.html +82 -18
  28. package/scripts/agent-audit-trace.js +55 -0
  29. package/scripts/agent-memory-lifecycle.js +96 -0
  30. package/scripts/agent-readiness-plan.js +118 -0
  31. package/scripts/agentic-data-pipeline.js +21 -1
  32. package/scripts/agents-sdk-sandbox-plan.js +57 -0
  33. package/scripts/ai-org-governance.js +98 -0
  34. package/scripts/ai-search-distribution.js +43 -0
  35. package/scripts/artifact-agent-plan.js +81 -0
  36. package/scripts/billing.js +27 -8
  37. package/scripts/cli-schema.js +18 -2
  38. package/scripts/code-mode-mcp-plan.js +71 -0
  39. package/scripts/context-engine.js +1 -2
  40. package/scripts/context-manager.js +4 -1
  41. package/scripts/dashboard-render-spec.js +1 -1
  42. package/scripts/dashboard.js +275 -9
  43. package/scripts/decision-journal.js +13 -3
  44. package/scripts/document-workflow-governance.js +62 -0
  45. package/scripts/enterprise-agent-rollout.js +34 -0
  46. package/scripts/experience-replay-governance.js +69 -0
  47. package/scripts/export-hf-dataset.js +1 -1
  48. package/scripts/feedback-loop.js +92 -4
  49. package/scripts/feedback-to-rules.js +17 -23
  50. package/scripts/gates-engine.js +4 -6
  51. package/scripts/growth-campaigns.js +49 -0
  52. package/scripts/harness-selector.js +16 -4
  53. package/scripts/hybrid-supervisor-agent.js +64 -0
  54. package/scripts/inference-cache-policy.js +72 -0
  55. package/scripts/inference-economics.js +53 -0
  56. package/scripts/internal-agent-bootstrap.js +12 -2
  57. package/scripts/knowledge-layer-plan.js +108 -0
  58. package/scripts/lesson-inference.js +183 -44
  59. package/scripts/lesson-search.js +4 -1
  60. package/scripts/llm-client.js +157 -26
  61. package/scripts/mailer/resend-mailer.js +112 -1
  62. package/scripts/mcp-transport-strategy.js +66 -0
  63. package/scripts/memory-store-governance.js +60 -0
  64. package/scripts/meta-agent-loop.js +7 -13
  65. package/scripts/model-access-eligibility.js +38 -0
  66. package/scripts/model-migration-readiness.js +55 -0
  67. package/scripts/operational-integrity.js +96 -3
  68. package/scripts/otel-declarative-config.js +56 -0
  69. package/scripts/perplexity-client.js +1 -1
  70. package/scripts/post-training-governance.js +34 -0
  71. package/scripts/private-core-boundary.js +72 -0
  72. package/scripts/production-agent-readiness.js +40 -0
  73. package/scripts/prompt-eval.js +564 -32
  74. package/scripts/prompt-programs.js +93 -0
  75. package/scripts/provider-action-normalizer.js +585 -0
  76. package/scripts/scaling-law-claims.js +60 -0
  77. package/scripts/security-scanner.js +1 -1
  78. package/scripts/self-distill-agent.js +7 -32
  79. package/scripts/seo-gsd.js +232 -55
  80. package/scripts/skill-rag-router.js +53 -0
  81. package/scripts/spec-gate.js +1 -1
  82. package/scripts/student-consistent-training.js +73 -0
  83. package/scripts/synthetic-data-provenance.js +98 -0
  84. package/scripts/task-context-result.js +81 -0
  85. package/scripts/telemetry-analytics.js +149 -0
  86. package/scripts/thompson-sampling.js +2 -2
  87. package/scripts/token-savings.js +7 -6
  88. package/scripts/token-tco.js +46 -0
  89. package/scripts/tool-registry.js +63 -3
  90. package/scripts/verification-loop.js +10 -1
  91. package/scripts/verifier-scoring.js +71 -0
  92. package/scripts/workflow-sentinel.js +284 -28
  93. package/scripts/workspace-agent-routines.js +118 -0
  94. package/src/api/server.js +381 -120
  95. package/scripts/analytics-report.js +0 -328
  96. package/scripts/autonomous-workflow.js +0 -377
  97. package/scripts/billing-setup.js +0 -109
  98. package/scripts/creator-campaigns.js +0 -239
  99. package/scripts/cross-encoder-reranker.js +0 -235
  100. package/scripts/daemon-manager.js +0 -108
  101. package/scripts/decision-trace.js +0 -354
  102. package/scripts/delegation-runtime.js +0 -896
  103. package/scripts/dispatch-brief.js +0 -159
  104. package/scripts/distribution-surfaces.js +0 -110
  105. package/scripts/feedback-history-distiller.js +0 -382
  106. package/scripts/funnel-analytics.js +0 -35
  107. package/scripts/history-distiller.js +0 -200
  108. package/scripts/hosted-job-launcher.js +0 -256
  109. package/scripts/intent-router.js +0 -392
  110. package/scripts/lesson-reranker.js +0 -263
  111. package/scripts/lesson-retrieval.js +0 -148
  112. package/scripts/managed-lesson-agent.js +0 -183
  113. package/scripts/operational-dashboard.js +0 -103
  114. package/scripts/operational-summary.js +0 -129
  115. package/scripts/operator-artifacts.js +0 -608
  116. package/scripts/optimize-context.js +0 -17
  117. package/scripts/org-dashboard.js +0 -206
  118. package/scripts/partner-orchestration.js +0 -146
  119. package/scripts/predictive-insights.js +0 -356
  120. package/scripts/pulse.js +0 -80
  121. package/scripts/reflector-agent.js +0 -221
  122. package/scripts/sales-pipeline.js +0 -681
  123. package/scripts/session-episode-store.js +0 -329
  124. package/scripts/session-health-sensor.js +0 -242
  125. package/scripts/session-report.js +0 -120
  126. package/scripts/swarm-coordinator.js +0 -81
  127. package/scripts/tool-kpi-tracker.js +0 -12
  128. package/scripts/webhook-delivery.js +0 -62
  129. package/scripts/workflow-sprint-intake.js +0 -475
@@ -1,392 +0,0 @@
1
- #!/usr/bin/env node
2
- const fs = require('fs');
3
- const path = require('path');
4
- const { getActiveMcpProfile, getAllowedTools } = require('./mcp-policy');
5
- const { loadGatesConfig } = require('./gates-engine');
6
- const { loadModel, samplePosteriors } = require('./thompson-sampling');
7
- const { analyzeCodeGraphImpact } = require('./codegraph-context');
8
- const {
9
- buildPartnerStrategy,
10
- getPartnerActionBias,
11
- } = require('./partner-orchestration');
12
- const {
13
- evaluateDelegation,
14
- normalizeDelegationMode,
15
- } = require('./delegation-runtime');
16
- const { resolveFeedbackDir } = require('./feedback-paths');
17
-
18
- const PROJECT_ROOT = path.join(__dirname, '..');
19
- const DEFAULT_BUNDLE_DIR = path.join(PROJECT_ROOT, 'config', 'policy-bundles');
20
- const RISK_LEVELS = ['low', 'medium', 'high', 'critical'];
21
-
22
- function getDefaultBundleId() {
23
- return process.env.THUMBGATE_POLICY_BUNDLE || 'default-v1';
24
- }
25
-
26
- function getBundlePath(bundleId = getDefaultBundleId()) {
27
- if (process.env.THUMBGATE_POLICY_BUNDLE_PATH) {
28
- return process.env.THUMBGATE_POLICY_BUNDLE_PATH;
29
- }
30
- // Prevent path traversal: resolve and verify result stays within bundle dir (CodeQL S2083)
31
- const candidate = path.resolve(DEFAULT_BUNDLE_DIR, `${bundleId}.json`);
32
- if (!candidate.startsWith(path.resolve(DEFAULT_BUNDLE_DIR))) {
33
- throw new Error(`Invalid bundle ID: path traversal detected in "${bundleId}"`);
34
- }
35
- return candidate;
36
- }
37
-
38
- function validateBundle(bundle) {
39
- if (!bundle || typeof bundle !== 'object') {
40
- throw new Error('Invalid policy bundle: expected object');
41
- }
42
- if (!bundle.bundleId || typeof bundle.bundleId !== 'string') {
43
- throw new Error('Invalid policy bundle: missing bundleId');
44
- }
45
- if (!Array.isArray(bundle.intents) || bundle.intents.length === 0) {
46
- throw new Error('Invalid policy bundle: intents must be a non-empty array');
47
- }
48
-
49
- bundle.intents.forEach((intent) => {
50
- if (!intent.id || typeof intent.id !== 'string') {
51
- throw new Error('Invalid policy bundle: intent id is required');
52
- }
53
- if (!RISK_LEVELS.includes(intent.risk)) {
54
- throw new Error(`Invalid policy bundle: unsupported risk '${intent.risk}' for intent '${intent.id}'`);
55
- }
56
- if (!Array.isArray(intent.actions) || intent.actions.length === 0) {
57
- throw new Error(`Invalid policy bundle: intent '${intent.id}' must define actions`);
58
- }
59
- });
60
-
61
- return true;
62
- }
63
-
64
- function loadPolicyBundle(bundleId = getDefaultBundleId()) {
65
- const raw = fs.readFileSync(getBundlePath(bundleId), 'utf-8');
66
- const parsed = JSON.parse(raw);
67
- validateBundle(parsed);
68
- return parsed;
69
- }
70
-
71
- function getRequiredApprovalRisks(bundle, mcpProfile) {
72
- const approval = bundle.approval || {};
73
- if (approval.profileOverrides && Array.isArray(approval.profileOverrides[mcpProfile])) {
74
- return approval.profileOverrides[mcpProfile];
75
- }
76
- return Array.isArray(approval.requiredRisks) ? approval.requiredRisks : ['high', 'critical'];
77
- }
78
-
79
- function assertKnownMcpProfile(profile) {
80
- getAllowedTools(profile);
81
- return profile;
82
- }
83
-
84
- function listIntents(options = {}) {
85
- const bundle = loadPolicyBundle(options.bundleId);
86
- const profile = assertKnownMcpProfile(options.mcpProfile || getActiveMcpProfile());
87
- const requiredRisks = getRequiredApprovalRisks(bundle, profile);
88
- const partnerStrategy = buildPartnerStrategy({
89
- partnerProfile: options.partnerProfile,
90
- tokenBudget: DEFAULT_TOKEN_BUDGET,
91
- });
92
-
93
- return {
94
- bundleId: bundle.bundleId,
95
- mcpProfile: profile,
96
- partnerProfile: partnerStrategy.profile,
97
- partnerStrategy: {
98
- verificationMode: partnerStrategy.verificationMode,
99
- recommendedChecks: partnerStrategy.recommendedChecks,
100
- },
101
- intents: bundle.intents.map((intent) => ({
102
- id: intent.id,
103
- description: intent.description,
104
- risk: intent.risk,
105
- actionCount: intent.actions.length,
106
- requiresApproval: requiredRisks.includes(intent.risk),
107
- })),
108
- };
109
- }
110
-
111
- /* ── Token Budget Defaults ──────────────────────────────────────── */
112
- const DEFAULT_TOKEN_BUDGET = {
113
- total: 12000,
114
- perAction: 4000,
115
- contextPack: 6000,
116
- };
117
-
118
- function resolveTokenBudget(overrides = {}) {
119
- const budget = { ...DEFAULT_TOKEN_BUDGET };
120
- if (typeof overrides.total === 'number' && overrides.total > 0) budget.total = overrides.total;
121
- if (typeof overrides.perAction === 'number' && overrides.perAction > 0) budget.perAction = overrides.perAction;
122
- if (typeof overrides.contextPack === 'number' && overrides.contextPack > 0) budget.contextPack = overrides.contextPack;
123
- return budget;
124
- }
125
-
126
- /* ── Planning Decomposition ────────────────────────────────────── */
127
-
128
- function decomposeActions(actions) {
129
- if (!Array.isArray(actions) || actions.length === 0) return [];
130
-
131
- const phases = [];
132
- let currentPhase = { kind: actions[0].kind, actions: [] };
133
-
134
- actions.forEach((action) => {
135
- if (action.kind === currentPhase.kind) {
136
- currentPhase.actions.push(action);
137
- } else {
138
- phases.push(currentPhase);
139
- currentPhase = { kind: action.kind, actions: [action] };
140
- }
141
- });
142
- phases.push(currentPhase);
143
-
144
- return phases.map((phase, i) => ({
145
- phaseIndex: i,
146
- kind: phase.kind,
147
- parallel: phase.actions.length > 1,
148
- actions: phase.actions,
149
- }));
150
- }
151
-
152
- function mergeUnique(values = []) {
153
- return Array.from(new Set(values.filter(Boolean)));
154
- }
155
-
156
- function planIntent(options = {}) {
157
- const bundle = loadPolicyBundle(options.bundleId);
158
- const profile = assertKnownMcpProfile(options.mcpProfile || getActiveMcpProfile());
159
- const intentId = String(options.intentId || '').trim();
160
- const context = String(options.context || '').trim();
161
- const approved = options.approved === true;
162
- const tokenBudget = resolveTokenBudget(options.tokenBudget);
163
- const delegationMode = normalizeDelegationMode(options.delegationMode);
164
-
165
- if (!intentId) {
166
- throw new Error('intentId is required');
167
- }
168
-
169
- const intent = bundle.intents.find((item) => item.id === intentId);
170
- if (!intent) {
171
- throw new Error(`Unknown intent: ${intentId}`);
172
- }
173
-
174
- const requiredRisks = getRequiredApprovalRisks(bundle, profile);
175
- const requiresApproval = requiredRisks.includes(intent.risk);
176
- const checkpointRequired = requiresApproval && !approved;
177
- const partnerStrategy = buildPartnerStrategy({
178
- partnerProfile: options.partnerProfile,
179
- tokenBudget,
180
- });
181
- const rankedActions = rankActions(intent.actions, {
182
- modelPath: options.modelPath,
183
- partnerStrategy,
184
- });
185
- const plannedActions = partnerStrategy.profile === 'balanced'
186
- ? intent.actions
187
- : rankedActions.ranked;
188
- const phases = decomposeActions(plannedActions);
189
- const codegraphImpact = analyzeCodeGraphImpact({
190
- intentId,
191
- context,
192
- repoPath: options.repoPath,
193
- });
194
- const partnerChecks = mergeUnique([
195
- ...partnerStrategy.recommendedChecks,
196
- ...codegraphImpact.verificationHints,
197
- ]);
198
- const enrichedPartnerStrategy = {
199
- ...partnerStrategy,
200
- recommendedChecks: partnerChecks,
201
- };
202
- const basePlan = {
203
- bundleId: bundle.bundleId,
204
- mcpProfile: profile,
205
- partnerProfile: enrichedPartnerStrategy.profile,
206
- generatedAt: new Date().toISOString(),
207
- status: checkpointRequired ? 'checkpoint_required' : 'ready',
208
- intent: {
209
- id: intent.id,
210
- description: intent.description,
211
- risk: intent.risk,
212
- },
213
- context,
214
- requiresApproval,
215
- approved,
216
- checkpoint: checkpointRequired
217
- ? {
218
- type: 'human_approval',
219
- reason: `Intent '${intent.id}' has risk '${intent.risk}' under profile '${profile}'.`,
220
- requiredForRiskLevels: requiredRisks,
221
- }
222
- : null,
223
- actions: plannedActions,
224
- phases,
225
- tokenBudget: enrichedPartnerStrategy.tokenBudget || tokenBudget,
226
- partnerStrategy: enrichedPartnerStrategy,
227
- actionScores: rankedActions.scores,
228
- codegraphImpact,
229
- killSwitches: loadGatesConfig().gates
230
- .filter((g) => {
231
- const isHighRisk = ['high', 'critical'].includes(intent.risk);
232
- if (isHighRisk && (g.severity === 'high' || g.severity === 'critical')) return true;
233
-
234
- const actionNames = plannedActions.map((a) => a.name);
235
- return g.trigger && actionNames.some((name) => g.trigger.toLowerCase().includes(name.toLowerCase()));
236
- })
237
- .map((g) => ({
238
- id: g.id,
239
- layer: g.layer || 'Execution',
240
- action: g.action,
241
- severity: g.severity,
242
- })),
243
- };
244
- const delegation = evaluateDelegation({
245
- delegationMode,
246
- plan: basePlan,
247
- mcpProfile: profile,
248
- context,
249
- repoPath: options.repoPath,
250
- });
251
-
252
- return {
253
- ...basePlan,
254
- executionMode: delegation.executionMode,
255
- delegationEligible: delegation.delegationEligible,
256
- delegationScore: delegation.delegationScore,
257
- delegationReason: delegation.delegationReason,
258
- delegateProfile: delegation.delegateProfile,
259
- handoffContract: delegation.handoffContract,
260
- };
261
- }
262
-
263
- const ACTION_CATEGORY_MAP = {
264
- capture_feedback: 'code_edit',
265
- feedback_summary: 'debugging',
266
- search_lessons: 'search',
267
- retrieve_lessons: 'search',
268
- search_thumbgate: 'search',
269
- prevention_rules: 'security',
270
- construct_context_pack: 'architecture',
271
- export_dpo_pairs: 'testing',
272
- export_databricks_bundle: 'testing',
273
- context_provenance: 'search',
274
- evaluate_context_pack: 'pr_review',
275
- };
276
-
277
- function getDefaultModelPath() {
278
- const feedbackDir = resolveFeedbackDir();
279
- return path.join(feedbackDir, 'feedback_model.json');
280
- }
281
-
282
- function getPartnerActionPriority(action, partnerStrategy) {
283
- if (!action || !partnerStrategy || partnerStrategy.verificationMode !== 'evidence_first') {
284
- return 1;
285
- }
286
-
287
- if (action.name === 'construct_context_pack' || action.name === 'context_provenance') {
288
- return 0;
289
- }
290
-
291
- return 1;
292
- }
293
-
294
- function scoreActions(actions, modelPath, options = {}) {
295
- const partnerStrategy = options.partnerStrategy || buildPartnerStrategy({
296
- partnerProfile: options.partnerProfile,
297
- });
298
- const model = loadModel(modelPath || getDefaultModelPath());
299
- const posteriors = samplePosteriors(model);
300
- const partnerScore = posteriors[partnerStrategy.partnerCategory] !== undefined
301
- ? posteriors[partnerStrategy.partnerCategory]
302
- : 0.5;
303
-
304
- return actions.map((action, index) => {
305
- const category = ACTION_CATEGORY_MAP[action.name] || 'uncategorized';
306
- const categoryScore = posteriors[category] !== undefined ? posteriors[category] : 0.5;
307
- const partnerBias = getPartnerActionBias(action, partnerStrategy);
308
- const score = Math.max(0, Math.min(1, (categoryScore * 0.7) + (partnerScore * 0.3) + partnerBias));
309
- return {
310
- action,
311
- category,
312
- actionScore: categoryScore,
313
- partnerProfile: partnerStrategy.profile,
314
- partnerCategory: partnerStrategy.partnerCategory,
315
- partnerScore,
316
- partnerBias,
317
- partnerPriority: getPartnerActionPriority(action, partnerStrategy),
318
- score,
319
- index,
320
- };
321
- }).sort((a, b) => {
322
- if (a.partnerPriority !== b.partnerPriority) {
323
- return a.partnerPriority - b.partnerPriority;
324
- }
325
- return b.score - a.score || a.index - b.index;
326
- });
327
- }
328
-
329
- function rankActions(actions, options = {}) {
330
- const modelPath = options.modelPath || getDefaultModelPath();
331
- const partnerStrategy = options.partnerStrategy || buildPartnerStrategy({
332
- partnerProfile: options.partnerProfile,
333
- });
334
- const scored = scoreActions(actions, modelPath, { partnerStrategy });
335
- return {
336
- ranked: scored.map((s) => s.action),
337
- scores: scored.map((s) => ({
338
- name: s.action.name,
339
- category: s.category,
340
- partnerProfile: s.partnerProfile,
341
- partnerCategory: s.partnerCategory,
342
- actionScore: s.actionScore,
343
- partnerScore: s.partnerScore,
344
- partnerBias: s.partnerBias,
345
- partnerPriority: s.partnerPriority,
346
- score: s.score,
347
- })),
348
- };
349
- }
350
-
351
- module.exports = {
352
- DEFAULT_BUNDLE_DIR,
353
- DEFAULT_TOKEN_BUDGET,
354
- RISK_LEVELS,
355
- getDefaultBundleId,
356
- getBundlePath,
357
- validateBundle,
358
- loadPolicyBundle,
359
- getRequiredApprovalRisks,
360
- assertKnownMcpProfile,
361
- listIntents,
362
- planIntent,
363
- resolveTokenBudget,
364
- decomposeActions,
365
- ACTION_CATEGORY_MAP,
366
- scoreActions,
367
- rankActions,
368
- };
369
-
370
- if (require.main === module) {
371
- const args = process.argv.slice(2);
372
- const intentArg = args.find((arg) => arg.startsWith('--intent='));
373
- const profileArg = args.find((arg) => arg.startsWith('--profile='));
374
- const bundleArg = args.find((arg) => arg.startsWith('--bundle='));
375
- const approved = args.includes('--approved');
376
-
377
- if (!intentArg) {
378
- console.log(JSON.stringify(listIntents({
379
- mcpProfile: profileArg ? profileArg.replace('--profile=', '') : undefined,
380
- bundleId: bundleArg ? bundleArg.replace('--bundle=', '') : undefined,
381
- }), null, 2));
382
- process.exit(0);
383
- }
384
-
385
- const plan = planIntent({
386
- intentId: intentArg.replace('--intent=', ''),
387
- mcpProfile: profileArg ? profileArg.replace('--profile=', '') : undefined,
388
- bundleId: bundleArg ? bundleArg.replace('--bundle=', '') : undefined,
389
- approved,
390
- });
391
- console.log(JSON.stringify(plan, null, 2));
392
- }
@@ -1,263 +0,0 @@
1
- 'use strict';
2
-
3
- /**
4
- * Cross-encoder reranker for lesson retrieval.
5
- *
6
- * Unlike the bi-encoders already in use (Jaccard + bigram Jaccard), a
7
- * cross-encoder processes the (query, lesson) pair jointly — so it can
8
- * catch relevance signals that independent scoring misses:
9
- *
10
- * - Field-weighted BM25: a query term in `whatWentWrong` is worth more
11
- * than the same term in `tags`
12
- * - Synonym/alias expansion: "force-push" ↔ "push --force", "deploy" ↔
13
- * "deployment", etc.
14
- * - Signal coherence: failure-sounding queries boost negative-signal lessons
15
- * - Tool name joint scoring: query toolName × lesson toolsUsed
16
- * - Score blending: reranked score is blended with the original retrieval
17
- * score so we never fully discard the bi-encoder's signal
18
- *
19
- * Usage:
20
- * const { rerankLessons } = require('./lesson-reranker');
21
- * const reranked = rerankLessons(query, candidates, { topK: 5, toolName });
22
- */
23
-
24
- // BM25 hyper-parameters
25
- const BM25_K1 = 1.5; // term saturation
26
- const BM25_B = 0.75; // length normalisation
27
-
28
- // Weight given to each lesson field when scoring a (query, lesson) pair.
29
- // Higher weight = query terms appearing in that field contribute more to score.
30
- const FIELD_WEIGHTS = {
31
- whatWentWrong: 3.0,
32
- whatToChange: 2.5,
33
- howToAvoid: 2.0,
34
- whatWorked: 2.0,
35
- summary: 1.5,
36
- content: 1.5,
37
- context: 1.2,
38
- title: 1.0,
39
- rootCause: 1.0,
40
- reasoning: 0.8,
41
- tags: 0.5,
42
- category: 0.4,
43
- };
44
-
45
- // Synonym clusters: any term in a group matches all others.
46
- const SYNONYM_GROUPS = [
47
- ['force-push', 'force push', 'push --force', 'git push --force', 'force_push'],
48
- ['main', 'main branch', 'master', 'trunk', 'protected branch'],
49
- ['env', '.env', 'environment variable', 'env var', 'dotenv', 'secret'],
50
- ['deploy', 'deployment', 'ship', 'release', 'publish', 'rollout'],
51
- ['db', 'database', 'sqlite', 'postgres', 'postgresql', 'migration', 'migrate'],
52
- ['test', 'tests', 'test suite', 'spec', 'failing test', 'test failure'],
53
- ['ci', 'ci/cd', 'pipeline', 'github actions', 'workflow', 'build'],
54
- ['lint', 'linter', 'eslint', 'prettier', 'format'],
55
- ['auth', 'authentication', 'authorization', 'token', 'api key', 'credential'],
56
- ['delete', 'remove', 'rm', 'drop', 'destroy', 'wipe'],
57
- ['merge', 'pull request', 'pr', 'rebase', 'squash'],
58
- ];
59
-
60
- // Regex patterns that indicate the query is about a failure/mistake.
61
- const FAILURE_PATTERN = /fail|error|wrong|broken|mistake|bad|incorrect|problem|issue|bug|crash|broke|exception/i;
62
-
63
- /**
64
- * Tokenise text into lowercase word-like tokens of length >= 2.
65
- * Hyphens and underscores are treated as delimiters so "force-push"
66
- * becomes ["force", "push"].
67
- * Exported so tests can verify expansion behaviour.
68
- */
69
- function tokenize(text) {
70
- if (!text) return [];
71
- return text
72
- .toLowerCase()
73
- .replace(/[^\w\s]/g, ' ') // replace all non-word, non-space chars (incl. hyphens, dots) with space
74
- .split(/[\s_]+/) // split on whitespace and underscores
75
- .filter((t) => t.length >= 2);
76
- }
77
-
78
- /**
79
- * Expand a set of query tokens with synonyms from SYNONYM_GROUPS.
80
- * Returns a deduplicated array of all terms (originals + expansions).
81
- */
82
- function expandTerms(terms) {
83
- const expanded = new Set(terms);
84
- for (const term of terms) {
85
- for (const group of SYNONYM_GROUPS) {
86
- if (group.some((syn) => syn.split(/\s+/).some((w) => w === term || term.includes(w)))) {
87
- group.forEach((syn) => tokenize(syn).forEach((t) => expanded.add(t)));
88
- }
89
- }
90
- }
91
- return [...expanded];
92
- }
93
-
94
- /**
95
- * Extract the text value of a named field from a lesson candidate.
96
- * Handles both the flat structure from lesson-retrieval.js and the nested
97
- * { lesson: { whatWentWrong, ... } } structure from lesson-search.js.
98
- */
99
- function getField(candidate, field) {
100
- const nested = candidate.lesson;
101
- const val = (nested && nested[field]) || candidate[field] || '';
102
- if (Array.isArray(val)) return val.join(' ');
103
- return String(val);
104
- }
105
-
106
- /**
107
- * Compute field-weighted BM25 scores for a list of candidates (BM25F variant).
108
- *
109
- * BM25F processes the (query, lesson) pair jointly: query terms are weighted
110
- * differently depending on which lesson field they appear in (via FIELD_WEIGHTS).
111
- * IDF is computed at document level (how many docs contain the term across any
112
- * field) so it stays positive regardless of field weights.
113
- *
114
- * Returns an array of { candidate, bm25Score } objects in the same order
115
- * as the input.
116
- */
117
- function fieldWeightedBM25(queryTerms, candidates) {
118
- const N = candidates.length;
119
- if (N === 0) return [];
120
-
121
- const fieldEntries = Object.entries(FIELD_WEIGHTS);
122
- const fieldKeys = Object.keys(FIELD_WEIGHTS);
123
-
124
- // Precompute per-document, per-field token arrays (avoid re-tokenising)
125
- const docFieldTokens = candidates.map((candidate) => {
126
- const fieldMap = {};
127
- for (const field of fieldKeys) {
128
- fieldMap[field] = tokenize(getField(candidate, field));
129
- }
130
- return fieldMap;
131
- });
132
-
133
- // Per-field average token lengths across all documents
134
- const avgFieldLen = {};
135
- for (const field of fieldKeys) {
136
- const total = docFieldTokens.reduce((sum, d) => sum + d[field].length, 0);
137
- avgFieldLen[field] = total / N || 1; // fallback to 1 to avoid /0
138
- }
139
-
140
- // Document-level df: count of documents that contain each term (any field).
141
- // Keeping df as a plain count (not field-weighted) ensures IDF is always positive.
142
- const df = new Map();
143
- for (let i = 0; i < N; i++) {
144
- const seenInDoc = new Set();
145
- for (const field of fieldKeys) {
146
- for (const tok of docFieldTokens[i][field]) {
147
- if (!seenInDoc.has(tok)) {
148
- df.set(tok, (df.get(tok) || 0) + 1);
149
- seenInDoc.add(tok);
150
- }
151
- }
152
- }
153
- }
154
-
155
- return candidates.map((candidate, i) => {
156
- let score = 0;
157
-
158
- for (const qTerm of queryTerms) {
159
- const termDf = df.get(qTerm) || 0;
160
- if (termDf === 0) continue;
161
-
162
- // IDF is always positive because df ≤ N
163
- const idf = Math.log((N - termDf + 0.5) / (termDf + 0.5) + 1);
164
- if (idf <= 0) continue;
165
-
166
- // BM25F: compute weighted sum of per-field normalised TF, then scale by IDF
167
- let weightedTF = 0;
168
- for (const [field, fieldWeight] of fieldEntries) {
169
- const tokens = docFieldTokens[i][field];
170
- const fieldLen = tokens.length;
171
- if (fieldLen === 0) continue;
172
-
173
- let tf = 0;
174
- for (const t of tokens) {
175
- if (t === qTerm) tf++;
176
- }
177
- if (tf === 0) continue;
178
-
179
- const avgLen = avgFieldLen[field];
180
- const normTF = tf / (tf + BM25_K1 * (1 - BM25_B + BM25_B * fieldLen / avgLen));
181
- weightedTF += fieldWeight * normTF;
182
- }
183
-
184
- score += idf * weightedTF;
185
- }
186
-
187
- return { candidate, bm25Score: score };
188
- });
189
- }
190
-
191
- /**
192
- * Rerank a list of lesson candidates using a cross-encoder approach.
193
- *
194
- * @param {string} query - The original retrieval query / action context
195
- * @param {Array} candidates - Lesson objects from the bi-encoder stage
196
- * @param {object} options
197
- * @param {number} [options.topK=5] - How many results to return
198
- * @param {string} [options.toolName] - Tool name from the triggering hook call
199
- * @param {number} [options.blendWeight=0.7] - Weight given to BM25 score vs original
200
- * retrieval score (0 = all original, 1 = all BM25)
201
- * @returns {Array} Reranked candidates with `rerankedScore` field added
202
- */
203
- function rerankLessons(query, candidates, options = {}) {
204
- const {
205
- topK = 5,
206
- toolName = '',
207
- blendWeight = 0.7,
208
- } = options;
209
-
210
- if (!candidates || candidates.length === 0) return [];
211
- if (candidates.length === 1) return candidates.slice(0, topK);
212
-
213
- // Build expanded query term set
214
- const rawTerms = tokenize((query || '') + (toolName ? ' ' + toolName : ''));
215
- const queryTerms = expandTerms(rawTerms);
216
-
217
- const isFailureQuery = FAILURE_PATTERN.test(query || '');
218
-
219
- // Compute BM25 scores for all candidates
220
- const bm25Results = fieldWeightedBM25(queryTerms, candidates);
221
-
222
- // Normalise BM25 scores to [0, 1]
223
- const maxBm25 = Math.max(...bm25Results.map((r) => r.bm25Score), 1e-9);
224
-
225
- const reranked = bm25Results.map(({ candidate, bm25Score }) => {
226
- const normBm25 = bm25Score / maxBm25;
227
-
228
- // Original bi-encoder score (field name differs between retrieval paths)
229
- const origScore = candidate.relevanceScore ?? candidate.score ?? 0;
230
-
231
- // Blend BM25 with original score
232
- let finalScore = blendWeight * normBm25 + (1 - blendWeight) * origScore;
233
-
234
- // Signal coherence bonus: failure queries → negative lessons rank higher
235
- const candidateSignal =
236
- candidate.signal ||
237
- (candidate.tags && candidate.tags.includes('negative') ? 'negative' : null);
238
- if (isFailureQuery && candidateSignal === 'negative') {
239
- finalScore *= 1.2;
240
- }
241
-
242
- // Tool name joint bonus: exact tool match between query context and lesson
243
- if (toolName) {
244
- const lessonTools = [
245
- ...(candidate.metadata?.toolsUsed || []),
246
- getField(candidate, 'toolUsed'),
247
- getField(candidate, 'toolName'),
248
- ].map((t) => (t || '').toLowerCase());
249
-
250
- if (lessonTools.some((t) => t && t.includes(toolName.toLowerCase()))) {
251
- finalScore *= 1.3;
252
- }
253
- }
254
-
255
- return { ...candidate, rerankedScore: Number(finalScore.toFixed(6)) };
256
- });
257
-
258
- return reranked
259
- .sort((a, b) => b.rerankedScore - a.rerankedScore)
260
- .slice(0, topK);
261
- }
262
-
263
- module.exports = { rerankLessons, fieldWeightedBM25, tokenize, expandTerms };