thumbgate 1.14.1 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +6 -6
- package/.claude-plugin/plugin.json +3 -3
- package/.well-known/llms.txt +5 -5
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +60 -35
- package/adapters/chatgpt/openapi.yaml +118 -2
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +217 -84
- package/adapters/opencode/opencode.json +1 -1
- package/bench/prompt-eval-suite.json +5 -1
- package/bin/cli.js +211 -8
- package/config/enforcement.json +59 -7
- package/config/evals/agent-safety-eval.json +338 -22
- package/config/gates/default.json +33 -0
- package/config/gates/routine.json +43 -0
- package/config/github-about.json +3 -3
- package/config/mcp-allowlists.json +4 -0
- package/config/merge-quality-checks.json +2 -1
- package/config/model-candidates.json +131 -0
- package/openapi/openapi.yaml +118 -2
- package/package.json +70 -51
- package/public/blog.html +7 -7
- package/public/codex-plugin.html +13 -7
- package/public/compare.html +29 -23
- package/public/dashboard.html +105 -12
- package/public/guide.html +28 -28
- package/public/index.html +233 -97
- package/public/learn.html +87 -20
- package/public/lessons.html +26 -2
- package/public/numbers.html +271 -0
- package/public/pro.html +89 -19
- package/scripts/agent-audit-trace.js +55 -0
- package/scripts/agent-memory-lifecycle.js +96 -0
- package/scripts/agent-readiness-plan.js +118 -0
- package/scripts/agentic-data-pipeline.js +21 -1
- package/scripts/agents-sdk-sandbox-plan.js +57 -0
- package/scripts/ai-org-governance.js +98 -0
- package/scripts/ai-search-distribution.js +43 -0
- package/scripts/artifact-agent-plan.js +81 -0
- package/scripts/billing.js +27 -8
- package/scripts/cli-feedback.js +2 -1
- package/scripts/cli-schema.js +60 -5
- package/scripts/code-mode-mcp-plan.js +71 -0
- package/scripts/commercial-offer.js +1 -1
- package/scripts/context-engine.js +1 -2
- package/scripts/context-manager.js +4 -1
- package/scripts/contextfs.js +214 -32
- package/scripts/dashboard-render-spec.js +1 -1
- package/scripts/dashboard.js +275 -9
- package/scripts/decision-journal.js +13 -3
- package/scripts/document-workflow-governance.js +62 -0
- package/scripts/enterprise-agent-rollout.js +34 -0
- package/scripts/experience-replay-governance.js +69 -0
- package/scripts/export-hf-dataset.js +1 -1
- package/scripts/feedback-loop.js +141 -9
- package/scripts/feedback-to-rules.js +17 -23
- package/scripts/gates-engine.js +4 -6
- package/scripts/growth-campaigns.js +49 -0
- package/scripts/harness-selector.js +145 -1
- package/scripts/hybrid-supervisor-agent.js +64 -0
- package/scripts/inference-cache-policy.js +72 -0
- package/scripts/inference-economics.js +53 -0
- package/scripts/internal-agent-bootstrap.js +12 -2
- package/scripts/knowledge-layer-plan.js +108 -0
- package/scripts/lesson-canonical.js +181 -0
- package/scripts/lesson-db.js +71 -10
- package/scripts/lesson-inference.js +183 -44
- package/scripts/lesson-search.js +4 -1
- package/scripts/lesson-synthesis.js +23 -2
- package/scripts/llm-client.js +157 -26
- package/scripts/mailer/resend-mailer.js +112 -1
- package/scripts/mcp-transport-strategy.js +66 -0
- package/scripts/memory-store-governance.js +60 -0
- package/scripts/meta-agent-loop.js +7 -13
- package/scripts/model-access-eligibility.js +38 -0
- package/scripts/model-migration-readiness.js +55 -0
- package/scripts/native-messaging-audit.js +514 -0
- package/scripts/operational-integrity.js +96 -3
- package/scripts/otel-declarative-config.js +56 -0
- package/scripts/perplexity-client.js +1 -1
- package/scripts/post-training-governance.js +34 -0
- package/scripts/pr-manager.js +47 -7
- package/scripts/private-core-boundary.js +72 -0
- package/scripts/production-agent-readiness.js +40 -0
- package/scripts/profile-router.js +16 -1
- package/scripts/prompt-eval.js +564 -32
- package/scripts/prompt-programs.js +93 -0
- package/scripts/provider-action-normalizer.js +585 -0
- package/scripts/rule-validator.js +285 -0
- package/scripts/scaling-law-claims.js +60 -0
- package/scripts/security-scanner.js +1 -1
- package/scripts/self-distill-agent.js +7 -32
- package/scripts/seo-gsd.js +400 -43
- package/scripts/skill-rag-router.js +53 -0
- package/scripts/spec-gate.js +1 -1
- package/scripts/student-consistent-training.js +73 -0
- package/scripts/synthetic-data-provenance.js +98 -0
- package/scripts/task-context-result.js +81 -0
- package/scripts/telemetry-analytics.js +149 -0
- package/scripts/thompson-sampling.js +2 -2
- package/scripts/token-savings.js +7 -6
- package/scripts/token-tco.js +46 -0
- package/scripts/tool-registry.js +75 -3
- package/scripts/verification-loop.js +10 -1
- package/scripts/verifier-scoring.js +71 -0
- package/scripts/workflow-sentinel.js +284 -28
- package/scripts/workspace-agent-routines.js +118 -0
- package/skills/thumbgate/SKILL.md +1 -1
- package/src/api/server.js +434 -120
- package/.claude-plugin/README.md +0 -170
- package/adapters/README.md +0 -12
- package/scripts/analytics-report.js +0 -328
- package/scripts/autonomous-workflow.js +0 -377
- package/scripts/billing-setup.js +0 -109
- package/scripts/creator-campaigns.js +0 -239
- package/scripts/cross-encoder-reranker.js +0 -235
- package/scripts/daemon-manager.js +0 -108
- package/scripts/decision-trace.js +0 -354
- package/scripts/delegation-runtime.js +0 -896
- package/scripts/dispatch-brief.js +0 -159
- package/scripts/distribution-surfaces.js +0 -110
- package/scripts/feedback-history-distiller.js +0 -382
- package/scripts/funnel-analytics.js +0 -35
- package/scripts/history-distiller.js +0 -200
- package/scripts/hosted-job-launcher.js +0 -256
- package/scripts/intent-router.js +0 -392
- package/scripts/lesson-reranker.js +0 -263
- package/scripts/lesson-retrieval.js +0 -148
- package/scripts/managed-lesson-agent.js +0 -183
- package/scripts/operational-dashboard.js +0 -103
- package/scripts/operational-summary.js +0 -129
- package/scripts/operator-artifacts.js +0 -608
- package/scripts/optimize-context.js +0 -17
- package/scripts/org-dashboard.js +0 -206
- package/scripts/partner-orchestration.js +0 -146
- package/scripts/predictive-insights.js +0 -356
- package/scripts/pulse.js +0 -80
- package/scripts/reflector-agent.js +0 -221
- package/scripts/sales-pipeline.js +0 -681
- package/scripts/session-episode-store.js +0 -329
- package/scripts/session-health-sensor.js +0 -242
- package/scripts/session-report.js +0 -120
- package/scripts/swarm-coordinator.js +0 -81
- package/scripts/tool-kpi-tracker.js +0 -12
- package/scripts/webhook-delivery.js +0 -62
- package/scripts/workflow-sprint-intake.js +0 -475
- package/skills/agent-memory/SKILL.md +0 -97
- package/skills/solve-architecture-autonomy/SKILL.md +0 -17
- package/skills/solve-architecture-autonomy/tool.js +0 -33
- package/skills/thumbgate-feedback/SKILL.md +0 -49
package/scripts/intent-router.js
DELETED
|
@@ -1,392 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
const fs = require('fs');
|
|
3
|
-
const path = require('path');
|
|
4
|
-
const { getActiveMcpProfile, getAllowedTools } = require('./mcp-policy');
|
|
5
|
-
const { loadGatesConfig } = require('./gates-engine');
|
|
6
|
-
const { loadModel, samplePosteriors } = require('./thompson-sampling');
|
|
7
|
-
const { analyzeCodeGraphImpact } = require('./codegraph-context');
|
|
8
|
-
const {
|
|
9
|
-
buildPartnerStrategy,
|
|
10
|
-
getPartnerActionBias,
|
|
11
|
-
} = require('./partner-orchestration');
|
|
12
|
-
const {
|
|
13
|
-
evaluateDelegation,
|
|
14
|
-
normalizeDelegationMode,
|
|
15
|
-
} = require('./delegation-runtime');
|
|
16
|
-
const { resolveFeedbackDir } = require('./feedback-paths');
|
|
17
|
-
|
|
18
|
-
const PROJECT_ROOT = path.join(__dirname, '..');
|
|
19
|
-
const DEFAULT_BUNDLE_DIR = path.join(PROJECT_ROOT, 'config', 'policy-bundles');
|
|
20
|
-
const RISK_LEVELS = ['low', 'medium', 'high', 'critical'];
|
|
21
|
-
|
|
22
|
-
function getDefaultBundleId() {
|
|
23
|
-
return process.env.THUMBGATE_POLICY_BUNDLE || 'default-v1';
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
function getBundlePath(bundleId = getDefaultBundleId()) {
|
|
27
|
-
if (process.env.THUMBGATE_POLICY_BUNDLE_PATH) {
|
|
28
|
-
return process.env.THUMBGATE_POLICY_BUNDLE_PATH;
|
|
29
|
-
}
|
|
30
|
-
// Prevent path traversal: resolve and verify result stays within bundle dir (CodeQL S2083)
|
|
31
|
-
const candidate = path.resolve(DEFAULT_BUNDLE_DIR, `${bundleId}.json`);
|
|
32
|
-
if (!candidate.startsWith(path.resolve(DEFAULT_BUNDLE_DIR))) {
|
|
33
|
-
throw new Error(`Invalid bundle ID: path traversal detected in "${bundleId}"`);
|
|
34
|
-
}
|
|
35
|
-
return candidate;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
function validateBundle(bundle) {
|
|
39
|
-
if (!bundle || typeof bundle !== 'object') {
|
|
40
|
-
throw new Error('Invalid policy bundle: expected object');
|
|
41
|
-
}
|
|
42
|
-
if (!bundle.bundleId || typeof bundle.bundleId !== 'string') {
|
|
43
|
-
throw new Error('Invalid policy bundle: missing bundleId');
|
|
44
|
-
}
|
|
45
|
-
if (!Array.isArray(bundle.intents) || bundle.intents.length === 0) {
|
|
46
|
-
throw new Error('Invalid policy bundle: intents must be a non-empty array');
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
bundle.intents.forEach((intent) => {
|
|
50
|
-
if (!intent.id || typeof intent.id !== 'string') {
|
|
51
|
-
throw new Error('Invalid policy bundle: intent id is required');
|
|
52
|
-
}
|
|
53
|
-
if (!RISK_LEVELS.includes(intent.risk)) {
|
|
54
|
-
throw new Error(`Invalid policy bundle: unsupported risk '${intent.risk}' for intent '${intent.id}'`);
|
|
55
|
-
}
|
|
56
|
-
if (!Array.isArray(intent.actions) || intent.actions.length === 0) {
|
|
57
|
-
throw new Error(`Invalid policy bundle: intent '${intent.id}' must define actions`);
|
|
58
|
-
}
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
return true;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
function loadPolicyBundle(bundleId = getDefaultBundleId()) {
|
|
65
|
-
const raw = fs.readFileSync(getBundlePath(bundleId), 'utf-8');
|
|
66
|
-
const parsed = JSON.parse(raw);
|
|
67
|
-
validateBundle(parsed);
|
|
68
|
-
return parsed;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
function getRequiredApprovalRisks(bundle, mcpProfile) {
|
|
72
|
-
const approval = bundle.approval || {};
|
|
73
|
-
if (approval.profileOverrides && Array.isArray(approval.profileOverrides[mcpProfile])) {
|
|
74
|
-
return approval.profileOverrides[mcpProfile];
|
|
75
|
-
}
|
|
76
|
-
return Array.isArray(approval.requiredRisks) ? approval.requiredRisks : ['high', 'critical'];
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
function assertKnownMcpProfile(profile) {
|
|
80
|
-
getAllowedTools(profile);
|
|
81
|
-
return profile;
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
function listIntents(options = {}) {
|
|
85
|
-
const bundle = loadPolicyBundle(options.bundleId);
|
|
86
|
-
const profile = assertKnownMcpProfile(options.mcpProfile || getActiveMcpProfile());
|
|
87
|
-
const requiredRisks = getRequiredApprovalRisks(bundle, profile);
|
|
88
|
-
const partnerStrategy = buildPartnerStrategy({
|
|
89
|
-
partnerProfile: options.partnerProfile,
|
|
90
|
-
tokenBudget: DEFAULT_TOKEN_BUDGET,
|
|
91
|
-
});
|
|
92
|
-
|
|
93
|
-
return {
|
|
94
|
-
bundleId: bundle.bundleId,
|
|
95
|
-
mcpProfile: profile,
|
|
96
|
-
partnerProfile: partnerStrategy.profile,
|
|
97
|
-
partnerStrategy: {
|
|
98
|
-
verificationMode: partnerStrategy.verificationMode,
|
|
99
|
-
recommendedChecks: partnerStrategy.recommendedChecks,
|
|
100
|
-
},
|
|
101
|
-
intents: bundle.intents.map((intent) => ({
|
|
102
|
-
id: intent.id,
|
|
103
|
-
description: intent.description,
|
|
104
|
-
risk: intent.risk,
|
|
105
|
-
actionCount: intent.actions.length,
|
|
106
|
-
requiresApproval: requiredRisks.includes(intent.risk),
|
|
107
|
-
})),
|
|
108
|
-
};
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/* ── Token Budget Defaults ──────────────────────────────────────── */
|
|
112
|
-
const DEFAULT_TOKEN_BUDGET = {
|
|
113
|
-
total: 12000,
|
|
114
|
-
perAction: 4000,
|
|
115
|
-
contextPack: 6000,
|
|
116
|
-
};
|
|
117
|
-
|
|
118
|
-
function resolveTokenBudget(overrides = {}) {
|
|
119
|
-
const budget = { ...DEFAULT_TOKEN_BUDGET };
|
|
120
|
-
if (typeof overrides.total === 'number' && overrides.total > 0) budget.total = overrides.total;
|
|
121
|
-
if (typeof overrides.perAction === 'number' && overrides.perAction > 0) budget.perAction = overrides.perAction;
|
|
122
|
-
if (typeof overrides.contextPack === 'number' && overrides.contextPack > 0) budget.contextPack = overrides.contextPack;
|
|
123
|
-
return budget;
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
/* ── Planning Decomposition ────────────────────────────────────── */
|
|
127
|
-
|
|
128
|
-
function decomposeActions(actions) {
|
|
129
|
-
if (!Array.isArray(actions) || actions.length === 0) return [];
|
|
130
|
-
|
|
131
|
-
const phases = [];
|
|
132
|
-
let currentPhase = { kind: actions[0].kind, actions: [] };
|
|
133
|
-
|
|
134
|
-
actions.forEach((action) => {
|
|
135
|
-
if (action.kind === currentPhase.kind) {
|
|
136
|
-
currentPhase.actions.push(action);
|
|
137
|
-
} else {
|
|
138
|
-
phases.push(currentPhase);
|
|
139
|
-
currentPhase = { kind: action.kind, actions: [action] };
|
|
140
|
-
}
|
|
141
|
-
});
|
|
142
|
-
phases.push(currentPhase);
|
|
143
|
-
|
|
144
|
-
return phases.map((phase, i) => ({
|
|
145
|
-
phaseIndex: i,
|
|
146
|
-
kind: phase.kind,
|
|
147
|
-
parallel: phase.actions.length > 1,
|
|
148
|
-
actions: phase.actions,
|
|
149
|
-
}));
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
function mergeUnique(values = []) {
|
|
153
|
-
return Array.from(new Set(values.filter(Boolean)));
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
function planIntent(options = {}) {
|
|
157
|
-
const bundle = loadPolicyBundle(options.bundleId);
|
|
158
|
-
const profile = assertKnownMcpProfile(options.mcpProfile || getActiveMcpProfile());
|
|
159
|
-
const intentId = String(options.intentId || '').trim();
|
|
160
|
-
const context = String(options.context || '').trim();
|
|
161
|
-
const approved = options.approved === true;
|
|
162
|
-
const tokenBudget = resolveTokenBudget(options.tokenBudget);
|
|
163
|
-
const delegationMode = normalizeDelegationMode(options.delegationMode);
|
|
164
|
-
|
|
165
|
-
if (!intentId) {
|
|
166
|
-
throw new Error('intentId is required');
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
const intent = bundle.intents.find((item) => item.id === intentId);
|
|
170
|
-
if (!intent) {
|
|
171
|
-
throw new Error(`Unknown intent: ${intentId}`);
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
const requiredRisks = getRequiredApprovalRisks(bundle, profile);
|
|
175
|
-
const requiresApproval = requiredRisks.includes(intent.risk);
|
|
176
|
-
const checkpointRequired = requiresApproval && !approved;
|
|
177
|
-
const partnerStrategy = buildPartnerStrategy({
|
|
178
|
-
partnerProfile: options.partnerProfile,
|
|
179
|
-
tokenBudget,
|
|
180
|
-
});
|
|
181
|
-
const rankedActions = rankActions(intent.actions, {
|
|
182
|
-
modelPath: options.modelPath,
|
|
183
|
-
partnerStrategy,
|
|
184
|
-
});
|
|
185
|
-
const plannedActions = partnerStrategy.profile === 'balanced'
|
|
186
|
-
? intent.actions
|
|
187
|
-
: rankedActions.ranked;
|
|
188
|
-
const phases = decomposeActions(plannedActions);
|
|
189
|
-
const codegraphImpact = analyzeCodeGraphImpact({
|
|
190
|
-
intentId,
|
|
191
|
-
context,
|
|
192
|
-
repoPath: options.repoPath,
|
|
193
|
-
});
|
|
194
|
-
const partnerChecks = mergeUnique([
|
|
195
|
-
...partnerStrategy.recommendedChecks,
|
|
196
|
-
...codegraphImpact.verificationHints,
|
|
197
|
-
]);
|
|
198
|
-
const enrichedPartnerStrategy = {
|
|
199
|
-
...partnerStrategy,
|
|
200
|
-
recommendedChecks: partnerChecks,
|
|
201
|
-
};
|
|
202
|
-
const basePlan = {
|
|
203
|
-
bundleId: bundle.bundleId,
|
|
204
|
-
mcpProfile: profile,
|
|
205
|
-
partnerProfile: enrichedPartnerStrategy.profile,
|
|
206
|
-
generatedAt: new Date().toISOString(),
|
|
207
|
-
status: checkpointRequired ? 'checkpoint_required' : 'ready',
|
|
208
|
-
intent: {
|
|
209
|
-
id: intent.id,
|
|
210
|
-
description: intent.description,
|
|
211
|
-
risk: intent.risk,
|
|
212
|
-
},
|
|
213
|
-
context,
|
|
214
|
-
requiresApproval,
|
|
215
|
-
approved,
|
|
216
|
-
checkpoint: checkpointRequired
|
|
217
|
-
? {
|
|
218
|
-
type: 'human_approval',
|
|
219
|
-
reason: `Intent '${intent.id}' has risk '${intent.risk}' under profile '${profile}'.`,
|
|
220
|
-
requiredForRiskLevels: requiredRisks,
|
|
221
|
-
}
|
|
222
|
-
: null,
|
|
223
|
-
actions: plannedActions,
|
|
224
|
-
phases,
|
|
225
|
-
tokenBudget: enrichedPartnerStrategy.tokenBudget || tokenBudget,
|
|
226
|
-
partnerStrategy: enrichedPartnerStrategy,
|
|
227
|
-
actionScores: rankedActions.scores,
|
|
228
|
-
codegraphImpact,
|
|
229
|
-
killSwitches: loadGatesConfig().gates
|
|
230
|
-
.filter((g) => {
|
|
231
|
-
const isHighRisk = ['high', 'critical'].includes(intent.risk);
|
|
232
|
-
if (isHighRisk && (g.severity === 'high' || g.severity === 'critical')) return true;
|
|
233
|
-
|
|
234
|
-
const actionNames = plannedActions.map((a) => a.name);
|
|
235
|
-
return g.trigger && actionNames.some((name) => g.trigger.toLowerCase().includes(name.toLowerCase()));
|
|
236
|
-
})
|
|
237
|
-
.map((g) => ({
|
|
238
|
-
id: g.id,
|
|
239
|
-
layer: g.layer || 'Execution',
|
|
240
|
-
action: g.action,
|
|
241
|
-
severity: g.severity,
|
|
242
|
-
})),
|
|
243
|
-
};
|
|
244
|
-
const delegation = evaluateDelegation({
|
|
245
|
-
delegationMode,
|
|
246
|
-
plan: basePlan,
|
|
247
|
-
mcpProfile: profile,
|
|
248
|
-
context,
|
|
249
|
-
repoPath: options.repoPath,
|
|
250
|
-
});
|
|
251
|
-
|
|
252
|
-
return {
|
|
253
|
-
...basePlan,
|
|
254
|
-
executionMode: delegation.executionMode,
|
|
255
|
-
delegationEligible: delegation.delegationEligible,
|
|
256
|
-
delegationScore: delegation.delegationScore,
|
|
257
|
-
delegationReason: delegation.delegationReason,
|
|
258
|
-
delegateProfile: delegation.delegateProfile,
|
|
259
|
-
handoffContract: delegation.handoffContract,
|
|
260
|
-
};
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
const ACTION_CATEGORY_MAP = {
|
|
264
|
-
capture_feedback: 'code_edit',
|
|
265
|
-
feedback_summary: 'debugging',
|
|
266
|
-
search_lessons: 'search',
|
|
267
|
-
retrieve_lessons: 'search',
|
|
268
|
-
search_thumbgate: 'search',
|
|
269
|
-
prevention_rules: 'security',
|
|
270
|
-
construct_context_pack: 'architecture',
|
|
271
|
-
export_dpo_pairs: 'testing',
|
|
272
|
-
export_databricks_bundle: 'testing',
|
|
273
|
-
context_provenance: 'search',
|
|
274
|
-
evaluate_context_pack: 'pr_review',
|
|
275
|
-
};
|
|
276
|
-
|
|
277
|
-
function getDefaultModelPath() {
|
|
278
|
-
const feedbackDir = resolveFeedbackDir();
|
|
279
|
-
return path.join(feedbackDir, 'feedback_model.json');
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
function getPartnerActionPriority(action, partnerStrategy) {
|
|
283
|
-
if (!action || !partnerStrategy || partnerStrategy.verificationMode !== 'evidence_first') {
|
|
284
|
-
return 1;
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
if (action.name === 'construct_context_pack' || action.name === 'context_provenance') {
|
|
288
|
-
return 0;
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
return 1;
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
function scoreActions(actions, modelPath, options = {}) {
|
|
295
|
-
const partnerStrategy = options.partnerStrategy || buildPartnerStrategy({
|
|
296
|
-
partnerProfile: options.partnerProfile,
|
|
297
|
-
});
|
|
298
|
-
const model = loadModel(modelPath || getDefaultModelPath());
|
|
299
|
-
const posteriors = samplePosteriors(model);
|
|
300
|
-
const partnerScore = posteriors[partnerStrategy.partnerCategory] !== undefined
|
|
301
|
-
? posteriors[partnerStrategy.partnerCategory]
|
|
302
|
-
: 0.5;
|
|
303
|
-
|
|
304
|
-
return actions.map((action, index) => {
|
|
305
|
-
const category = ACTION_CATEGORY_MAP[action.name] || 'uncategorized';
|
|
306
|
-
const categoryScore = posteriors[category] !== undefined ? posteriors[category] : 0.5;
|
|
307
|
-
const partnerBias = getPartnerActionBias(action, partnerStrategy);
|
|
308
|
-
const score = Math.max(0, Math.min(1, (categoryScore * 0.7) + (partnerScore * 0.3) + partnerBias));
|
|
309
|
-
return {
|
|
310
|
-
action,
|
|
311
|
-
category,
|
|
312
|
-
actionScore: categoryScore,
|
|
313
|
-
partnerProfile: partnerStrategy.profile,
|
|
314
|
-
partnerCategory: partnerStrategy.partnerCategory,
|
|
315
|
-
partnerScore,
|
|
316
|
-
partnerBias,
|
|
317
|
-
partnerPriority: getPartnerActionPriority(action, partnerStrategy),
|
|
318
|
-
score,
|
|
319
|
-
index,
|
|
320
|
-
};
|
|
321
|
-
}).sort((a, b) => {
|
|
322
|
-
if (a.partnerPriority !== b.partnerPriority) {
|
|
323
|
-
return a.partnerPriority - b.partnerPriority;
|
|
324
|
-
}
|
|
325
|
-
return b.score - a.score || a.index - b.index;
|
|
326
|
-
});
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
function rankActions(actions, options = {}) {
|
|
330
|
-
const modelPath = options.modelPath || getDefaultModelPath();
|
|
331
|
-
const partnerStrategy = options.partnerStrategy || buildPartnerStrategy({
|
|
332
|
-
partnerProfile: options.partnerProfile,
|
|
333
|
-
});
|
|
334
|
-
const scored = scoreActions(actions, modelPath, { partnerStrategy });
|
|
335
|
-
return {
|
|
336
|
-
ranked: scored.map((s) => s.action),
|
|
337
|
-
scores: scored.map((s) => ({
|
|
338
|
-
name: s.action.name,
|
|
339
|
-
category: s.category,
|
|
340
|
-
partnerProfile: s.partnerProfile,
|
|
341
|
-
partnerCategory: s.partnerCategory,
|
|
342
|
-
actionScore: s.actionScore,
|
|
343
|
-
partnerScore: s.partnerScore,
|
|
344
|
-
partnerBias: s.partnerBias,
|
|
345
|
-
partnerPriority: s.partnerPriority,
|
|
346
|
-
score: s.score,
|
|
347
|
-
})),
|
|
348
|
-
};
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
module.exports = {
|
|
352
|
-
DEFAULT_BUNDLE_DIR,
|
|
353
|
-
DEFAULT_TOKEN_BUDGET,
|
|
354
|
-
RISK_LEVELS,
|
|
355
|
-
getDefaultBundleId,
|
|
356
|
-
getBundlePath,
|
|
357
|
-
validateBundle,
|
|
358
|
-
loadPolicyBundle,
|
|
359
|
-
getRequiredApprovalRisks,
|
|
360
|
-
assertKnownMcpProfile,
|
|
361
|
-
listIntents,
|
|
362
|
-
planIntent,
|
|
363
|
-
resolveTokenBudget,
|
|
364
|
-
decomposeActions,
|
|
365
|
-
ACTION_CATEGORY_MAP,
|
|
366
|
-
scoreActions,
|
|
367
|
-
rankActions,
|
|
368
|
-
};
|
|
369
|
-
|
|
370
|
-
if (require.main === module) {
|
|
371
|
-
const args = process.argv.slice(2);
|
|
372
|
-
const intentArg = args.find((arg) => arg.startsWith('--intent='));
|
|
373
|
-
const profileArg = args.find((arg) => arg.startsWith('--profile='));
|
|
374
|
-
const bundleArg = args.find((arg) => arg.startsWith('--bundle='));
|
|
375
|
-
const approved = args.includes('--approved');
|
|
376
|
-
|
|
377
|
-
if (!intentArg) {
|
|
378
|
-
console.log(JSON.stringify(listIntents({
|
|
379
|
-
mcpProfile: profileArg ? profileArg.replace('--profile=', '') : undefined,
|
|
380
|
-
bundleId: bundleArg ? bundleArg.replace('--bundle=', '') : undefined,
|
|
381
|
-
}), null, 2));
|
|
382
|
-
process.exit(0);
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
const plan = planIntent({
|
|
386
|
-
intentId: intentArg.replace('--intent=', ''),
|
|
387
|
-
mcpProfile: profileArg ? profileArg.replace('--profile=', '') : undefined,
|
|
388
|
-
bundleId: bundleArg ? bundleArg.replace('--bundle=', '') : undefined,
|
|
389
|
-
approved,
|
|
390
|
-
});
|
|
391
|
-
console.log(JSON.stringify(plan, null, 2));
|
|
392
|
-
}
|
|
@@ -1,263 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Cross-encoder reranker for lesson retrieval.
|
|
5
|
-
*
|
|
6
|
-
* Unlike the bi-encoders already in use (Jaccard + bigram Jaccard), a
|
|
7
|
-
* cross-encoder processes the (query, lesson) pair jointly — so it can
|
|
8
|
-
* catch relevance signals that independent scoring misses:
|
|
9
|
-
*
|
|
10
|
-
* - Field-weighted BM25: a query term in `whatWentWrong` is worth more
|
|
11
|
-
* than the same term in `tags`
|
|
12
|
-
* - Synonym/alias expansion: "force-push" ↔ "push --force", "deploy" ↔
|
|
13
|
-
* "deployment", etc.
|
|
14
|
-
* - Signal coherence: failure-sounding queries boost negative-signal lessons
|
|
15
|
-
* - Tool name joint scoring: query toolName × lesson toolsUsed
|
|
16
|
-
* - Score blending: reranked score is blended with the original retrieval
|
|
17
|
-
* score so we never fully discard the bi-encoder's signal
|
|
18
|
-
*
|
|
19
|
-
* Usage:
|
|
20
|
-
* const { rerankLessons } = require('./lesson-reranker');
|
|
21
|
-
* const reranked = rerankLessons(query, candidates, { topK: 5, toolName });
|
|
22
|
-
*/
|
|
23
|
-
|
|
24
|
-
// BM25 hyper-parameters
|
|
25
|
-
const BM25_K1 = 1.5; // term saturation
|
|
26
|
-
const BM25_B = 0.75; // length normalisation
|
|
27
|
-
|
|
28
|
-
// Weight given to each lesson field when scoring a (query, lesson) pair.
|
|
29
|
-
// Higher weight = query terms appearing in that field contribute more to score.
|
|
30
|
-
const FIELD_WEIGHTS = {
|
|
31
|
-
whatWentWrong: 3.0,
|
|
32
|
-
whatToChange: 2.5,
|
|
33
|
-
howToAvoid: 2.0,
|
|
34
|
-
whatWorked: 2.0,
|
|
35
|
-
summary: 1.5,
|
|
36
|
-
content: 1.5,
|
|
37
|
-
context: 1.2,
|
|
38
|
-
title: 1.0,
|
|
39
|
-
rootCause: 1.0,
|
|
40
|
-
reasoning: 0.8,
|
|
41
|
-
tags: 0.5,
|
|
42
|
-
category: 0.4,
|
|
43
|
-
};
|
|
44
|
-
|
|
45
|
-
// Synonym clusters: any term in a group matches all others.
|
|
46
|
-
const SYNONYM_GROUPS = [
|
|
47
|
-
['force-push', 'force push', 'push --force', 'git push --force', 'force_push'],
|
|
48
|
-
['main', 'main branch', 'master', 'trunk', 'protected branch'],
|
|
49
|
-
['env', '.env', 'environment variable', 'env var', 'dotenv', 'secret'],
|
|
50
|
-
['deploy', 'deployment', 'ship', 'release', 'publish', 'rollout'],
|
|
51
|
-
['db', 'database', 'sqlite', 'postgres', 'postgresql', 'migration', 'migrate'],
|
|
52
|
-
['test', 'tests', 'test suite', 'spec', 'failing test', 'test failure'],
|
|
53
|
-
['ci', 'ci/cd', 'pipeline', 'github actions', 'workflow', 'build'],
|
|
54
|
-
['lint', 'linter', 'eslint', 'prettier', 'format'],
|
|
55
|
-
['auth', 'authentication', 'authorization', 'token', 'api key', 'credential'],
|
|
56
|
-
['delete', 'remove', 'rm', 'drop', 'destroy', 'wipe'],
|
|
57
|
-
['merge', 'pull request', 'pr', 'rebase', 'squash'],
|
|
58
|
-
];
|
|
59
|
-
|
|
60
|
-
// Regex patterns that indicate the query is about a failure/mistake.
|
|
61
|
-
const FAILURE_PATTERN = /fail|error|wrong|broken|mistake|bad|incorrect|problem|issue|bug|crash|broke|exception/i;
|
|
62
|
-
|
|
63
|
-
/**
|
|
64
|
-
* Tokenise text into lowercase word-like tokens of length >= 2.
|
|
65
|
-
* Hyphens and underscores are treated as delimiters so "force-push"
|
|
66
|
-
* becomes ["force", "push"].
|
|
67
|
-
* Exported so tests can verify expansion behaviour.
|
|
68
|
-
*/
|
|
69
|
-
function tokenize(text) {
|
|
70
|
-
if (!text) return [];
|
|
71
|
-
return text
|
|
72
|
-
.toLowerCase()
|
|
73
|
-
.replace(/[^\w\s]/g, ' ') // replace all non-word, non-space chars (incl. hyphens, dots) with space
|
|
74
|
-
.split(/[\s_]+/) // split on whitespace and underscores
|
|
75
|
-
.filter((t) => t.length >= 2);
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
/**
|
|
79
|
-
* Expand a set of query tokens with synonyms from SYNONYM_GROUPS.
|
|
80
|
-
* Returns a deduplicated array of all terms (originals + expansions).
|
|
81
|
-
*/
|
|
82
|
-
function expandTerms(terms) {
|
|
83
|
-
const expanded = new Set(terms);
|
|
84
|
-
for (const term of terms) {
|
|
85
|
-
for (const group of SYNONYM_GROUPS) {
|
|
86
|
-
if (group.some((syn) => syn.split(/\s+/).some((w) => w === term || term.includes(w)))) {
|
|
87
|
-
group.forEach((syn) => tokenize(syn).forEach((t) => expanded.add(t)));
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
return [...expanded];
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* Extract the text value of a named field from a lesson candidate.
|
|
96
|
-
* Handles both the flat structure from lesson-retrieval.js and the nested
|
|
97
|
-
* { lesson: { whatWentWrong, ... } } structure from lesson-search.js.
|
|
98
|
-
*/
|
|
99
|
-
function getField(candidate, field) {
|
|
100
|
-
const nested = candidate.lesson;
|
|
101
|
-
const val = (nested && nested[field]) || candidate[field] || '';
|
|
102
|
-
if (Array.isArray(val)) return val.join(' ');
|
|
103
|
-
return String(val);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
/**
|
|
107
|
-
* Compute field-weighted BM25 scores for a list of candidates (BM25F variant).
|
|
108
|
-
*
|
|
109
|
-
* BM25F processes the (query, lesson) pair jointly: query terms are weighted
|
|
110
|
-
* differently depending on which lesson field they appear in (via FIELD_WEIGHTS).
|
|
111
|
-
* IDF is computed at document level (how many docs contain the term across any
|
|
112
|
-
* field) so it stays positive regardless of field weights.
|
|
113
|
-
*
|
|
114
|
-
* Returns an array of { candidate, bm25Score } objects in the same order
|
|
115
|
-
* as the input.
|
|
116
|
-
*/
|
|
117
|
-
function fieldWeightedBM25(queryTerms, candidates) {
|
|
118
|
-
const N = candidates.length;
|
|
119
|
-
if (N === 0) return [];
|
|
120
|
-
|
|
121
|
-
const fieldEntries = Object.entries(FIELD_WEIGHTS);
|
|
122
|
-
const fieldKeys = Object.keys(FIELD_WEIGHTS);
|
|
123
|
-
|
|
124
|
-
// Precompute per-document, per-field token arrays (avoid re-tokenising)
|
|
125
|
-
const docFieldTokens = candidates.map((candidate) => {
|
|
126
|
-
const fieldMap = {};
|
|
127
|
-
for (const field of fieldKeys) {
|
|
128
|
-
fieldMap[field] = tokenize(getField(candidate, field));
|
|
129
|
-
}
|
|
130
|
-
return fieldMap;
|
|
131
|
-
});
|
|
132
|
-
|
|
133
|
-
// Per-field average token lengths across all documents
|
|
134
|
-
const avgFieldLen = {};
|
|
135
|
-
for (const field of fieldKeys) {
|
|
136
|
-
const total = docFieldTokens.reduce((sum, d) => sum + d[field].length, 0);
|
|
137
|
-
avgFieldLen[field] = total / N || 1; // fallback to 1 to avoid /0
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
// Document-level df: count of documents that contain each term (any field).
|
|
141
|
-
// Keeping df as a plain count (not field-weighted) ensures IDF is always positive.
|
|
142
|
-
const df = new Map();
|
|
143
|
-
for (let i = 0; i < N; i++) {
|
|
144
|
-
const seenInDoc = new Set();
|
|
145
|
-
for (const field of fieldKeys) {
|
|
146
|
-
for (const tok of docFieldTokens[i][field]) {
|
|
147
|
-
if (!seenInDoc.has(tok)) {
|
|
148
|
-
df.set(tok, (df.get(tok) || 0) + 1);
|
|
149
|
-
seenInDoc.add(tok);
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
return candidates.map((candidate, i) => {
|
|
156
|
-
let score = 0;
|
|
157
|
-
|
|
158
|
-
for (const qTerm of queryTerms) {
|
|
159
|
-
const termDf = df.get(qTerm) || 0;
|
|
160
|
-
if (termDf === 0) continue;
|
|
161
|
-
|
|
162
|
-
// IDF is always positive because df ≤ N
|
|
163
|
-
const idf = Math.log((N - termDf + 0.5) / (termDf + 0.5) + 1);
|
|
164
|
-
if (idf <= 0) continue;
|
|
165
|
-
|
|
166
|
-
// BM25F: compute weighted sum of per-field normalised TF, then scale by IDF
|
|
167
|
-
let weightedTF = 0;
|
|
168
|
-
for (const [field, fieldWeight] of fieldEntries) {
|
|
169
|
-
const tokens = docFieldTokens[i][field];
|
|
170
|
-
const fieldLen = tokens.length;
|
|
171
|
-
if (fieldLen === 0) continue;
|
|
172
|
-
|
|
173
|
-
let tf = 0;
|
|
174
|
-
for (const t of tokens) {
|
|
175
|
-
if (t === qTerm) tf++;
|
|
176
|
-
}
|
|
177
|
-
if (tf === 0) continue;
|
|
178
|
-
|
|
179
|
-
const avgLen = avgFieldLen[field];
|
|
180
|
-
const normTF = tf / (tf + BM25_K1 * (1 - BM25_B + BM25_B * fieldLen / avgLen));
|
|
181
|
-
weightedTF += fieldWeight * normTF;
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
score += idf * weightedTF;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
return { candidate, bm25Score: score };
|
|
188
|
-
});
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
/**
|
|
192
|
-
* Rerank a list of lesson candidates using a cross-encoder approach.
|
|
193
|
-
*
|
|
194
|
-
* @param {string} query - The original retrieval query / action context
|
|
195
|
-
* @param {Array} candidates - Lesson objects from the bi-encoder stage
|
|
196
|
-
* @param {object} options
|
|
197
|
-
* @param {number} [options.topK=5] - How many results to return
|
|
198
|
-
* @param {string} [options.toolName] - Tool name from the triggering hook call
|
|
199
|
-
* @param {number} [options.blendWeight=0.7] - Weight given to BM25 score vs original
|
|
200
|
-
* retrieval score (0 = all original, 1 = all BM25)
|
|
201
|
-
* @returns {Array} Reranked candidates with `rerankedScore` field added
|
|
202
|
-
*/
|
|
203
|
-
function rerankLessons(query, candidates, options = {}) {
|
|
204
|
-
const {
|
|
205
|
-
topK = 5,
|
|
206
|
-
toolName = '',
|
|
207
|
-
blendWeight = 0.7,
|
|
208
|
-
} = options;
|
|
209
|
-
|
|
210
|
-
if (!candidates || candidates.length === 0) return [];
|
|
211
|
-
if (candidates.length === 1) return candidates.slice(0, topK);
|
|
212
|
-
|
|
213
|
-
// Build expanded query term set
|
|
214
|
-
const rawTerms = tokenize((query || '') + (toolName ? ' ' + toolName : ''));
|
|
215
|
-
const queryTerms = expandTerms(rawTerms);
|
|
216
|
-
|
|
217
|
-
const isFailureQuery = FAILURE_PATTERN.test(query || '');
|
|
218
|
-
|
|
219
|
-
// Compute BM25 scores for all candidates
|
|
220
|
-
const bm25Results = fieldWeightedBM25(queryTerms, candidates);
|
|
221
|
-
|
|
222
|
-
// Normalise BM25 scores to [0, 1]
|
|
223
|
-
const maxBm25 = Math.max(...bm25Results.map((r) => r.bm25Score), 1e-9);
|
|
224
|
-
|
|
225
|
-
const reranked = bm25Results.map(({ candidate, bm25Score }) => {
|
|
226
|
-
const normBm25 = bm25Score / maxBm25;
|
|
227
|
-
|
|
228
|
-
// Original bi-encoder score (field name differs between retrieval paths)
|
|
229
|
-
const origScore = candidate.relevanceScore ?? candidate.score ?? 0;
|
|
230
|
-
|
|
231
|
-
// Blend BM25 with original score
|
|
232
|
-
let finalScore = blendWeight * normBm25 + (1 - blendWeight) * origScore;
|
|
233
|
-
|
|
234
|
-
// Signal coherence bonus: failure queries → negative lessons rank higher
|
|
235
|
-
const candidateSignal =
|
|
236
|
-
candidate.signal ||
|
|
237
|
-
(candidate.tags && candidate.tags.includes('negative') ? 'negative' : null);
|
|
238
|
-
if (isFailureQuery && candidateSignal === 'negative') {
|
|
239
|
-
finalScore *= 1.2;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
// Tool name joint bonus: exact tool match between query context and lesson
|
|
243
|
-
if (toolName) {
|
|
244
|
-
const lessonTools = [
|
|
245
|
-
...(candidate.metadata?.toolsUsed || []),
|
|
246
|
-
getField(candidate, 'toolUsed'),
|
|
247
|
-
getField(candidate, 'toolName'),
|
|
248
|
-
].map((t) => (t || '').toLowerCase());
|
|
249
|
-
|
|
250
|
-
if (lessonTools.some((t) => t && t.includes(toolName.toLowerCase()))) {
|
|
251
|
-
finalScore *= 1.3;
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
return { ...candidate, rerankedScore: Number(finalScore.toFixed(6)) };
|
|
256
|
-
});
|
|
257
|
-
|
|
258
|
-
return reranked
|
|
259
|
-
.sort((a, b) => b.rerankedScore - a.rerankedScore)
|
|
260
|
-
.slice(0, topK);
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
module.exports = { rerankLessons, fieldWeightedBM25, tokenize, expandTerms };
|