codetrap 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +132 -98
- package/docs/installation.md +61 -63
- package/package.json +4 -3
- package/plugins/codetrap-agent/.codex-plugin/plugin.json +2 -3
- package/plugins/codetrap-agent/hooks/post-flight-capture.example.md +19 -17
- package/plugins/codetrap-agent/hooks.json +2 -2
- package/{skills → plugins/codetrap-agent/skills}/codetrap-add/SKILL.md +10 -4
- package/plugins/codetrap-agent/skills/codetrap-capture/SKILL.md +14 -3
- package/plugins/codetrap-agent/skills/codetrap-capture-external/SKILL.md +52 -9
- package/plugins/codetrap-agent/skills/codetrap-check/SKILL.md +74 -6
- package/{skills → plugins/codetrap-agent/skills}/codetrap-search/SKILL.md +6 -5
- package/plugins/codetrap-agent/templates/AGENTS.codetrap-maintainer.md +15 -0
- package/plugins/codetrap-agent/templates/AGENTS.codetrap.md +16 -5
- package/scripts/release-preflight.ts +15 -0
- package/scripts/search-policy-sweep.ts +131 -0
- package/src/commands/workflow.ts +172 -68
- package/src/db/embedding-queries.ts +230 -48
- package/src/db/queries.ts +0 -25
- package/src/db/repository.ts +32 -21
- package/src/db/schema.ts +80 -0
- package/src/index.ts +34 -4
- package/src/lib/codex-setup.ts +247 -0
- package/src/lib/command-requests.ts +112 -1
- package/src/lib/config.ts +57 -7
- package/src/lib/constants.ts +1 -1
- package/src/lib/doctor.ts +42 -12
- package/src/lib/embedder.ts +118 -3
- package/src/lib/embedding-health.ts +3 -1
- package/src/lib/embedding-job.ts +3 -0
- package/src/lib/embedding-management.ts +65 -0
- package/src/lib/embedding-runtime.ts +177 -0
- package/src/lib/output-json.ts +0 -2
- package/src/lib/scope-context.ts +12 -6
- package/src/lib/scope-migration.ts +2 -1
- package/src/lib/scope.ts +0 -2
- package/src/lib/search-eval.ts +38 -18
- package/src/lib/search-policy-sweep.ts +563 -0
- package/src/lib/search-policy.ts +0 -4
- package/src/lib/search-service.ts +14 -15
- package/src/lib/session-candidate-document.ts +175 -0
- package/src/lib/session-candidate-scope.ts +6 -0
- package/src/lib/session-capture.ts +298 -32
- package/src/lib/session-codec.ts +1 -8
- package/src/lib/session-operations.ts +83 -60
- package/src/lib/session-review.ts +327 -0
- package/src/lib/session-store.ts +87 -73
- package/src/lib/store.ts +74 -10
- package/src/lib/string-list.ts +3 -0
- package/src/lib/text-lines.ts +7 -0
- package/src/lib/trap-search-document.ts +2 -1
- package/src/lib/value-types.ts +3 -0
- package/src/web/client-review.ts +171 -0
- package/src/web/client-script.ts +426 -51
- package/src/web/client-shell.ts +414 -0
- package/src/web/client-text.ts +112 -0
- package/src/web/project-registry.ts +3 -5
- package/src/web/server.ts +117 -103
- package/src/web/static.ts +364 -19
- package/skills/codetrap-capture-external/SKILL.md +0 -62
- package/skills/codetrap-check/SKILL.md +0 -69
- package/src/lib/embedding-index.ts +0 -53
|
@@ -0,0 +1,563 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import type { TrapSearchResult } from "../domain/trap";
|
|
3
|
+
import { SCOPES, SEARCH_MODES, type Scope, type SearchMode } from "./constants";
|
|
4
|
+
import { isRecord } from "./value-types";
|
|
5
|
+
import {
|
|
6
|
+
defaultEmbeddingRuntime,
|
|
7
|
+
embeddingRuntimeFrom,
|
|
8
|
+
type EmbeddingRuntimeInput,
|
|
9
|
+
} from "./embedding-runtime";
|
|
10
|
+
import {
|
|
11
|
+
DEFAULT_SEARCH_EVAL_FIXTURE,
|
|
12
|
+
EvalEmbedder,
|
|
13
|
+
evaluateSearchFixtureCases,
|
|
14
|
+
readEvalFixture,
|
|
15
|
+
type EvalCaseReport,
|
|
16
|
+
type SearchEvalMetrics,
|
|
17
|
+
} from "./search-eval";
|
|
18
|
+
import { ScopedRepositoryContext } from "./scope-context";
|
|
19
|
+
import {
|
|
20
|
+
DEFAULT_RANKING_CONFIG,
|
|
21
|
+
type RankingConfig,
|
|
22
|
+
} from "./search-policy";
|
|
23
|
+
|
|
24
|
+
export type RankingCandidate = {
|
|
25
|
+
name: string;
|
|
26
|
+
config: RankingConfig;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export type GoldTarget = {
|
|
30
|
+
scope?: Scope;
|
|
31
|
+
id?: number;
|
|
32
|
+
title?: string;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
export type LiveEvalCase = {
|
|
36
|
+
query: string;
|
|
37
|
+
mode?: SearchMode;
|
|
38
|
+
scope?: Scope;
|
|
39
|
+
limit?: number;
|
|
40
|
+
gold?: GoldTarget[];
|
|
41
|
+
minRecallAt3?: number;
|
|
42
|
+
minRecallAt5?: number;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
export type ComparableSearchCase = {
|
|
46
|
+
key: string;
|
|
47
|
+
query: string;
|
|
48
|
+
mode: SearchMode;
|
|
49
|
+
scope?: Scope;
|
|
50
|
+
scored: boolean;
|
|
51
|
+
recallAt3: number;
|
|
52
|
+
recallAt5: number;
|
|
53
|
+
reciprocalRank: number;
|
|
54
|
+
passed: boolean;
|
|
55
|
+
topResults: { id: number; scope?: Scope; title: string; sources: string[]; diagnostics: string[] }[];
|
|
56
|
+
warnings: string[];
|
|
57
|
+
error?: string;
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
export type CaseDelta = {
|
|
61
|
+
query: string;
|
|
62
|
+
mode: SearchMode;
|
|
63
|
+
scope?: Scope;
|
|
64
|
+
before: number;
|
|
65
|
+
after: number;
|
|
66
|
+
beforeTop: string[];
|
|
67
|
+
afterTop: string[];
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
export type SweepCandidateReport = {
|
|
71
|
+
name: string;
|
|
72
|
+
config: RankingConfig;
|
|
73
|
+
total_cases: number;
|
|
74
|
+
scored_cases: number;
|
|
75
|
+
metrics: SearchEvalMetrics;
|
|
76
|
+
cases: ComparableSearchCase[];
|
|
77
|
+
failures: ComparableSearchCase[];
|
|
78
|
+
regressions: CaseDelta[];
|
|
79
|
+
improvements: CaseDelta[];
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
export type PolicySweepReport = {
|
|
83
|
+
mode: "fixture" | "live";
|
|
84
|
+
source: string;
|
|
85
|
+
cwd?: string;
|
|
86
|
+
candidate_count: number;
|
|
87
|
+
baseline: SweepCandidateReport;
|
|
88
|
+
candidates: SweepCandidateReport[];
|
|
89
|
+
best: SweepCandidateReport;
|
|
90
|
+
recommendation: string;
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
type FixtureSweepOptions = {
|
|
94
|
+
fixturePath?: string;
|
|
95
|
+
candidates?: RankingCandidate[];
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
type LiveSweepOptions = {
|
|
99
|
+
cwd: string;
|
|
100
|
+
cases: LiveEvalCase[];
|
|
101
|
+
candidates?: RankingCandidate[];
|
|
102
|
+
embeddings?: EmbeddingRuntimeInput;
|
|
103
|
+
defaultScope?: Scope;
|
|
104
|
+
home?: string;
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
export const DEFAULT_POLICY_SWEEP_CANDIDATES: RankingCandidate[] = [
|
|
108
|
+
candidate("default", {}),
|
|
109
|
+
candidate("title-tag-heavy", {
|
|
110
|
+
titleTokenBoost: 0.24,
|
|
111
|
+
tagTokenBoost: 0.28,
|
|
112
|
+
maxBoost: 0.55,
|
|
113
|
+
}),
|
|
114
|
+
candidate("identifier-heavy", {
|
|
115
|
+
identifierBoost: 0.3,
|
|
116
|
+
maxBoost: 0.55,
|
|
117
|
+
}),
|
|
118
|
+
candidate("scope-heavy", {
|
|
119
|
+
pathMatchBoost: 0.2,
|
|
120
|
+
moduleMatchBoost: 0.14,
|
|
121
|
+
ownerMatchBoost: 0.08,
|
|
122
|
+
maxBoost: 0.55,
|
|
123
|
+
}),
|
|
124
|
+
candidate("severity-light", {
|
|
125
|
+
severityBoost: {
|
|
126
|
+
warning: 0,
|
|
127
|
+
error: 0.02,
|
|
128
|
+
critical: 0.03,
|
|
129
|
+
},
|
|
130
|
+
}),
|
|
131
|
+
candidate("semantic-loose", {
|
|
132
|
+
semanticMinScore: 0.2,
|
|
133
|
+
}),
|
|
134
|
+
candidate("semantic-strict", {
|
|
135
|
+
semanticMinScore: 0.4,
|
|
136
|
+
}),
|
|
137
|
+
];
|
|
138
|
+
|
|
139
|
+
export async function runFixturePolicySweep(options: FixtureSweepOptions = {}): Promise<PolicySweepReport> {
|
|
140
|
+
const fixturePath = options.fixturePath ?? DEFAULT_SEARCH_EVAL_FIXTURE;
|
|
141
|
+
const fixture = readEvalFixture(fixturePath);
|
|
142
|
+
const candidates = options.candidates ?? DEFAULT_POLICY_SWEEP_CANDIDATES;
|
|
143
|
+
const reports: SweepCandidateReport[] = [];
|
|
144
|
+
let baselineCases: ComparableSearchCase[] | undefined;
|
|
145
|
+
|
|
146
|
+
for (const item of candidates) {
|
|
147
|
+
const detailed = await evaluateSearchFixtureCases(fixture, new EvalEmbedder(), item.config);
|
|
148
|
+
const cases = detailed.cases.map(fromEvalCaseReport);
|
|
149
|
+
if (!baselineCases) baselineCases = cases;
|
|
150
|
+
reports.push(candidateReport(item, cases, baselineCases));
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return buildSweepReport("fixture", fixturePath, reports);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export async function runLivePolicySweep(options: LiveSweepOptions): Promise<PolicySweepReport> {
|
|
157
|
+
if (options.cases.length === 0) throw new Error("Live sweep requires at least one query case.");
|
|
158
|
+
|
|
159
|
+
const candidates = options.candidates ?? DEFAULT_POLICY_SWEEP_CANDIDATES;
|
|
160
|
+
const embeddings = options.embeddings ?? defaultEmbeddingRuntime();
|
|
161
|
+
const reports: SweepCandidateReport[] = [];
|
|
162
|
+
let baselineCases: ComparableSearchCase[] | undefined;
|
|
163
|
+
|
|
164
|
+
for (const item of candidates) {
|
|
165
|
+
const cases = await evaluateLiveCases({
|
|
166
|
+
...options,
|
|
167
|
+
embeddings,
|
|
168
|
+
defaultScope: options.defaultScope ?? "project",
|
|
169
|
+
ranking: item.config,
|
|
170
|
+
});
|
|
171
|
+
if (!baselineCases) baselineCases = cases;
|
|
172
|
+
reports.push(candidateReport(item, cases, baselineCases));
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return buildSweepReport("live", "live project", reports, options.cwd);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export function readLiveEvalCases(path: string): LiveEvalCase[] {
|
|
179
|
+
const parsed = JSON.parse(readFileSync(path, "utf-8")) as unknown;
|
|
180
|
+
const records = Array.isArray(parsed)
|
|
181
|
+
? parsed
|
|
182
|
+
: isRecord(parsed) && Array.isArray(parsed.queries)
|
|
183
|
+
? parsed.queries
|
|
184
|
+
: null;
|
|
185
|
+
if (!records) throw new Error("Live queries file must be an array or an object with a queries array.");
|
|
186
|
+
return records.map(normalizeLiveEvalCase);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export function formatPolicySweepReport(report: PolicySweepReport): string {
|
|
190
|
+
const lines = [
|
|
191
|
+
`Search policy sweep (${report.mode})`,
|
|
192
|
+
`Source: ${report.source}`,
|
|
193
|
+
];
|
|
194
|
+
if (report.cwd) lines.push(`cwd: ${report.cwd}`);
|
|
195
|
+
lines.push(
|
|
196
|
+
`Candidates: ${report.candidate_count}`,
|
|
197
|
+
`Baseline: ${summaryLine(report.baseline)}`,
|
|
198
|
+
`Best: ${summaryLine(report.best)}`,
|
|
199
|
+
`Recommendation: ${report.recommendation}`,
|
|
200
|
+
"Results:"
|
|
201
|
+
);
|
|
202
|
+
|
|
203
|
+
for (const candidate of report.candidates) {
|
|
204
|
+
lines.push(` - ${summaryLine(candidate)}`);
|
|
205
|
+
if (candidate.regressions.length > 0) {
|
|
206
|
+
lines.push(` regressions: ${formatDeltas(candidate.regressions)}`);
|
|
207
|
+
}
|
|
208
|
+
if (candidate.improvements.length > 0) {
|
|
209
|
+
lines.push(` improvements: ${formatDeltas(candidate.improvements)}`);
|
|
210
|
+
}
|
|
211
|
+
if (candidate.failures.length > 0) {
|
|
212
|
+
lines.push(` failures: ${candidate.failures.slice(0, 3).map((item) => item.query).join("; ")}`);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return lines.join("\n");
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function candidate(name: string, patch: Partial<RankingConfig>): RankingCandidate {
|
|
220
|
+
return {
|
|
221
|
+
name,
|
|
222
|
+
config: {
|
|
223
|
+
...DEFAULT_RANKING_CONFIG,
|
|
224
|
+
...patch,
|
|
225
|
+
severityBoost: {
|
|
226
|
+
...DEFAULT_RANKING_CONFIG.severityBoost,
|
|
227
|
+
...(patch.severityBoost ?? {}),
|
|
228
|
+
},
|
|
229
|
+
},
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
async function evaluateLiveCases(options: LiveSweepOptions & { ranking: RankingConfig }): Promise<ComparableSearchCase[]> {
|
|
234
|
+
const defaultScope = options.defaultScope ?? "project";
|
|
235
|
+
const scopes = new ScopedRepositoryContext(
|
|
236
|
+
options.cwd,
|
|
237
|
+
embeddingRuntimeFrom(options.embeddings),
|
|
238
|
+
options.home,
|
|
239
|
+
options.ranking
|
|
240
|
+
);
|
|
241
|
+
|
|
242
|
+
const out: ComparableSearchCase[] = [];
|
|
243
|
+
for (const input of options.cases) {
|
|
244
|
+
const scope = input.scope ?? defaultScope;
|
|
245
|
+
const mode = input.mode ?? "hybrid";
|
|
246
|
+
try {
|
|
247
|
+
const results = await scopes.repositoryFor(scope).search(input.query, {
|
|
248
|
+
mode,
|
|
249
|
+
scope,
|
|
250
|
+
limit: input.limit ?? 5,
|
|
251
|
+
});
|
|
252
|
+
out.push(liveCaseReport(input, mode, scope, results));
|
|
253
|
+
} catch (error) {
|
|
254
|
+
out.push({
|
|
255
|
+
key: caseKey(input.query, mode, scope),
|
|
256
|
+
query: input.query,
|
|
257
|
+
mode,
|
|
258
|
+
scope,
|
|
259
|
+
scored: (input.gold ?? []).length > 0,
|
|
260
|
+
recallAt3: 0,
|
|
261
|
+
recallAt5: 0,
|
|
262
|
+
reciprocalRank: 0,
|
|
263
|
+
passed: false,
|
|
264
|
+
topResults: [],
|
|
265
|
+
warnings: [],
|
|
266
|
+
error: error instanceof Error ? error.message : String(error),
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
return out;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function liveCaseReport(
|
|
274
|
+
input: LiveEvalCase,
|
|
275
|
+
mode: SearchMode,
|
|
276
|
+
scope: Scope,
|
|
277
|
+
results: TrapSearchResult[]
|
|
278
|
+
): ComparableSearchCase {
|
|
279
|
+
const gold = input.gold ?? [];
|
|
280
|
+
const warnings = new Set<string>();
|
|
281
|
+
const recallAt3 = gold.length > 0 ? recall(gold, results.slice(0, 3), warnings, scope) : 1;
|
|
282
|
+
const recallAt5 = gold.length > 0 ? recall(gold, results.slice(0, 5), warnings, scope) : 1;
|
|
283
|
+
const firstRank = gold.length > 0 ? firstMatchRank(gold, results, warnings, scope) : -1;
|
|
284
|
+
const reciprocalRank = firstRank >= 0 ? 1 / (firstRank + 1) : 0;
|
|
285
|
+
const minRecallAt3 = input.minRecallAt3 ?? (gold.length > 0 ? 1 : 0);
|
|
286
|
+
const minRecallAt5 = input.minRecallAt5 ?? (gold.length > 0 ? 1 : 0);
|
|
287
|
+
|
|
288
|
+
return {
|
|
289
|
+
key: caseKey(input.query, mode, scope),
|
|
290
|
+
query: input.query,
|
|
291
|
+
mode,
|
|
292
|
+
scope,
|
|
293
|
+
scored: gold.length > 0,
|
|
294
|
+
recallAt3,
|
|
295
|
+
recallAt5,
|
|
296
|
+
reciprocalRank,
|
|
297
|
+
passed: recallAt3 >= minRecallAt3 && recallAt5 >= minRecallAt5,
|
|
298
|
+
topResults: results.map((result) => ({
|
|
299
|
+
id: result.trap.id,
|
|
300
|
+
scope: result.trap.scope === "project" || result.trap.scope === "global" ? result.trap.scope : scope,
|
|
301
|
+
title: result.trap.title,
|
|
302
|
+
sources: result.sources ?? [],
|
|
303
|
+
diagnostics: (result.diagnostics ?? []).map((diagnostic) => diagnostic.code),
|
|
304
|
+
})),
|
|
305
|
+
warnings: [...warnings],
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
function recall(gold: GoldTarget[], results: TrapSearchResult[], warnings: Set<string>, defaultScope: Scope): number {
|
|
310
|
+
return gold.filter((target) => targetMatches(target, results, warnings, defaultScope)).length / gold.length;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
function firstMatchRank(
|
|
314
|
+
gold: GoldTarget[],
|
|
315
|
+
results: TrapSearchResult[],
|
|
316
|
+
warnings: Set<string>,
|
|
317
|
+
defaultScope: Scope
|
|
318
|
+
): number {
|
|
319
|
+
return results.findIndex((result) =>
|
|
320
|
+
gold.some((target) => targetMatches(target, [result], warnings, defaultScope))
|
|
321
|
+
);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
function targetMatches(
|
|
325
|
+
target: GoldTarget,
|
|
326
|
+
results: TrapSearchResult[],
|
|
327
|
+
warnings: Set<string>,
|
|
328
|
+
defaultScope: Scope
|
|
329
|
+
): boolean {
|
|
330
|
+
const scope = target.scope ?? defaultScope;
|
|
331
|
+
const scoped = results.filter((result) => result.trap.scope === scope);
|
|
332
|
+
if (target.id !== undefined) {
|
|
333
|
+
const idMatch = scoped.find((result) => result.trap.id === target.id);
|
|
334
|
+
if (idMatch) {
|
|
335
|
+
if (target.title && idMatch.trap.title !== target.title) {
|
|
336
|
+
warnings.add(`gold_title_mismatch:${target.id}`);
|
|
337
|
+
}
|
|
338
|
+
return true;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
if (!target.title) return false;
|
|
342
|
+
|
|
343
|
+
const titleMatches = scoped.filter((result) => result.trap.title === target.title);
|
|
344
|
+
if (titleMatches.length > 1) warnings.add(`gold_title_ambiguous:${target.title}`);
|
|
345
|
+
if (titleMatches.length > 0) {
|
|
346
|
+
if (target.id !== undefined && titleMatches.every((result) => result.trap.id !== target.id)) {
|
|
347
|
+
warnings.add(`gold_id_drift:${target.id}->${titleMatches.map((result) => result.trap.id).join(",")}`);
|
|
348
|
+
}
|
|
349
|
+
return true;
|
|
350
|
+
}
|
|
351
|
+
return false;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
function fromEvalCaseReport(item: EvalCaseReport): ComparableSearchCase {
|
|
355
|
+
return {
|
|
356
|
+
key: caseKey(item.query, item.mode),
|
|
357
|
+
query: item.query,
|
|
358
|
+
mode: item.mode,
|
|
359
|
+
scored: item.goldTrapIds.length > 0,
|
|
360
|
+
recallAt3: item.recallAt3,
|
|
361
|
+
recallAt5: item.recallAt5,
|
|
362
|
+
reciprocalRank: item.reciprocalRank,
|
|
363
|
+
passed: item.passed,
|
|
364
|
+
topResults: item.topResults.map((result) => ({
|
|
365
|
+
id: result.id,
|
|
366
|
+
title: result.title,
|
|
367
|
+
sources: result.sources,
|
|
368
|
+
diagnostics: result.diagnostics,
|
|
369
|
+
})),
|
|
370
|
+
warnings: [],
|
|
371
|
+
error: item.error,
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
function candidateReport(
|
|
376
|
+
candidate: RankingCandidate,
|
|
377
|
+
cases: ComparableSearchCase[],
|
|
378
|
+
baselineCases: ComparableSearchCase[]
|
|
379
|
+
): SweepCandidateReport {
|
|
380
|
+
return {
|
|
381
|
+
name: candidate.name,
|
|
382
|
+
config: candidate.config,
|
|
383
|
+
total_cases: cases.length,
|
|
384
|
+
scored_cases: cases.filter((item) => item.scored).length,
|
|
385
|
+
metrics: aggregateMetrics(cases),
|
|
386
|
+
cases,
|
|
387
|
+
failures: cases.filter((item) => !item.passed),
|
|
388
|
+
regressions: deltas(cases, baselineCases, "regression"),
|
|
389
|
+
improvements: deltas(cases, baselineCases, "improvement"),
|
|
390
|
+
};
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
function aggregateMetrics(cases: ComparableSearchCase[]): SearchEvalMetrics {
|
|
394
|
+
const scored = cases.filter((item) => item.scored);
|
|
395
|
+
const total = scored.length || 1;
|
|
396
|
+
return {
|
|
397
|
+
recall_at_3: round(scored.reduce((sum, item) => sum + item.recallAt3, 0) / total),
|
|
398
|
+
recall_at_5: round(scored.reduce((sum, item) => sum + item.recallAt5, 0) / total),
|
|
399
|
+
mrr: round(scored.reduce((sum, item) => sum + item.reciprocalRank, 0) / total),
|
|
400
|
+
hybrid_fallback_count: cases.filter((item) =>
|
|
401
|
+
item.topResults.some((result) =>
|
|
402
|
+
result.diagnostics.some((code) =>
|
|
403
|
+
["semantic_unavailable", "semantic_no_candidates", "semantic_failed"].includes(code)
|
|
404
|
+
)
|
|
405
|
+
)
|
|
406
|
+
).length,
|
|
407
|
+
semantic_error_count: cases.filter((item) => item.error).length,
|
|
408
|
+
};
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
function deltas(
|
|
412
|
+
cases: ComparableSearchCase[],
|
|
413
|
+
baselineCases: ComparableSearchCase[],
|
|
414
|
+
direction: "regression" | "improvement"
|
|
415
|
+
): CaseDelta[] {
|
|
416
|
+
const baselineByKey = new Map(baselineCases.map((item) => [item.key, item]));
|
|
417
|
+
const out: CaseDelta[] = [];
|
|
418
|
+
for (const item of cases) {
|
|
419
|
+
const baseline = baselineByKey.get(item.key);
|
|
420
|
+
if (!baseline || !item.scored) continue;
|
|
421
|
+
const diff = item.reciprocalRank - baseline.reciprocalRank;
|
|
422
|
+
const changed = direction === "regression" ? diff < -0.0001 : diff > 0.0001;
|
|
423
|
+
if (!changed) continue;
|
|
424
|
+
out.push({
|
|
425
|
+
query: item.query,
|
|
426
|
+
mode: item.mode,
|
|
427
|
+
scope: item.scope,
|
|
428
|
+
before: baseline.reciprocalRank,
|
|
429
|
+
after: item.reciprocalRank,
|
|
430
|
+
beforeTop: topTitles(baseline),
|
|
431
|
+
afterTop: topTitles(item),
|
|
432
|
+
});
|
|
433
|
+
}
|
|
434
|
+
return out;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
function buildSweepReport(
|
|
438
|
+
mode: PolicySweepReport["mode"],
|
|
439
|
+
source: string,
|
|
440
|
+
candidates: SweepCandidateReport[],
|
|
441
|
+
cwd?: string
|
|
442
|
+
): PolicySweepReport {
|
|
443
|
+
if (candidates.length === 0) throw new Error("At least one ranking candidate is required.");
|
|
444
|
+
const baseline = candidates[0]!;
|
|
445
|
+
const best = [...candidates].sort(compareCandidates)[0]!;
|
|
446
|
+
return {
|
|
447
|
+
mode,
|
|
448
|
+
source,
|
|
449
|
+
cwd,
|
|
450
|
+
candidate_count: candidates.length,
|
|
451
|
+
baseline,
|
|
452
|
+
candidates,
|
|
453
|
+
best,
|
|
454
|
+
recommendation: recommendation(baseline, best, candidates),
|
|
455
|
+
};
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
function compareCandidates(a: SweepCandidateReport, b: SweepCandidateReport): number {
|
|
459
|
+
return (
|
|
460
|
+
b.metrics.recall_at_3 - a.metrics.recall_at_3 ||
|
|
461
|
+
b.metrics.mrr - a.metrics.mrr ||
|
|
462
|
+
a.failures.length - b.failures.length ||
|
|
463
|
+
a.regressions.length - b.regressions.length
|
|
464
|
+
);
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
function recommendation(baseline: SweepCandidateReport, best: SweepCandidateReport, candidates: SweepCandidateReport[]): string {
|
|
468
|
+
if (baseline.scored_cases === 0) return "No scored live cases yet; add gold targets before using this as an optimization signal.";
|
|
469
|
+
const allTie = candidates.every((item) =>
|
|
470
|
+
item.metrics.recall_at_3 === baseline.metrics.recall_at_3 &&
|
|
471
|
+
item.metrics.recall_at_5 === baseline.metrics.recall_at_5 &&
|
|
472
|
+
item.metrics.mrr === baseline.metrics.mrr
|
|
473
|
+
);
|
|
474
|
+
if (allTie) return "All candidates tie; add harder miss/noisy_hit eval cases before changing ranking config.";
|
|
475
|
+
if (best.name === baseline.name) return "The default config is still best on this fixture.";
|
|
476
|
+
if (best.regressions.length > 0) return `${best.name} improves aggregate metrics but has regressions; inspect before adopting.`;
|
|
477
|
+
return `${best.name} is the strongest candidate on these cases; inspect changed rankings before editing defaults.`;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
function normalizeLiveEvalCase(value: unknown): LiveEvalCase {
|
|
481
|
+
if (!isRecord(value)) throw new Error("Each live query case must be an object.");
|
|
482
|
+
const query = stringField(value, "query");
|
|
483
|
+
const mode = optionalEnum(value, "mode", SEARCH_MODES);
|
|
484
|
+
const scope = optionalEnum(value, "scope", SCOPES);
|
|
485
|
+
return {
|
|
486
|
+
query,
|
|
487
|
+
mode,
|
|
488
|
+
scope,
|
|
489
|
+
limit: optionalPositiveInt(value, "limit"),
|
|
490
|
+
gold: normalizeGoldTargets(value.gold),
|
|
491
|
+
minRecallAt3: optionalScore(value, "minRecallAt3"),
|
|
492
|
+
minRecallAt5: optionalScore(value, "minRecallAt5"),
|
|
493
|
+
};
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
function normalizeGoldTargets(value: unknown): GoldTarget[] | undefined {
|
|
497
|
+
if (value === undefined) return undefined;
|
|
498
|
+
if (!Array.isArray(value)) throw new Error("gold must be an array.");
|
|
499
|
+
return value.map((item) => {
|
|
500
|
+
if (!isRecord(item)) throw new Error("gold entries must be objects.");
|
|
501
|
+
const id = item.id === undefined ? undefined : Number(item.id);
|
|
502
|
+
if (id !== undefined && (!Number.isInteger(id) || id <= 0)) throw new Error("gold.id must be a positive integer.");
|
|
503
|
+
const scope = optionalEnum(item, "scope", SCOPES);
|
|
504
|
+
const title = typeof item.title === "string" && item.title.trim() ? item.title.trim() : undefined;
|
|
505
|
+
if (id === undefined && !title) throw new Error("gold entries require id or title.");
|
|
506
|
+
return { id, title, scope };
|
|
507
|
+
});
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
function caseKey(query: string, mode: SearchMode, scope?: Scope): string {
|
|
511
|
+
return `${scope ?? "fixture"}\0${mode}\0${query}`;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
function topTitles(item: ComparableSearchCase): string[] {
|
|
515
|
+
return item.topResults.slice(0, 3).map((result) => `#${result.id} ${result.title}`);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
function summaryLine(item: SweepCandidateReport): string {
|
|
519
|
+
return `${item.name} R@3=${item.metrics.recall_at_3} R@5=${item.metrics.recall_at_5} MRR=${item.metrics.mrr} failures=${item.failures.length} regressions=${item.regressions.length}`;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
function formatDeltas(items: CaseDelta[]): string {
|
|
523
|
+
return items
|
|
524
|
+
.slice(0, 3)
|
|
525
|
+
.map((item) => `${item.query} ${round(item.before)}->${round(item.after)}`)
|
|
526
|
+
.join("; ");
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
function optionalEnum<T extends readonly string[]>(value: Record<string, unknown>, key: string, choices: T): T[number] | undefined {
|
|
530
|
+
const field = value[key];
|
|
531
|
+
if (field === undefined) return undefined;
|
|
532
|
+
if (typeof field !== "string" || !(choices as readonly string[]).includes(field)) {
|
|
533
|
+
throw new Error(`${key} must be one of: ${choices.join(", ")}`);
|
|
534
|
+
}
|
|
535
|
+
return field as T[number];
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
function stringField(value: Record<string, unknown>, key: string): string {
|
|
539
|
+
const field = value[key];
|
|
540
|
+
if (typeof field !== "string" || field.trim() === "") throw new Error(`${key} is required.`);
|
|
541
|
+
return field.trim();
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
function optionalPositiveInt(value: Record<string, unknown>, key: string): number | undefined {
|
|
545
|
+
const field = value[key];
|
|
546
|
+
if (field === undefined) return undefined;
|
|
547
|
+
const number = Number(field);
|
|
548
|
+
if (!Number.isInteger(number) || number <= 0) throw new Error(`${key} must be a positive integer.`);
|
|
549
|
+
return number;
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
function optionalScore(value: Record<string, unknown>, key: string): number | undefined {
|
|
553
|
+
const field = value[key];
|
|
554
|
+
if (field === undefined) return undefined;
|
|
555
|
+
const number = Number(field);
|
|
556
|
+
if (!Number.isFinite(number) || number < 0 || number > 1) throw new Error(`${key} must be between 0 and 1.`);
|
|
557
|
+
return number;
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
function round(value: number): number {
|
|
562
|
+
return Math.round(value * 10000) / 10000;
|
|
563
|
+
}
|
package/src/lib/search-policy.ts
CHANGED
|
@@ -120,10 +120,6 @@ export class TrapSearchPolicy {
|
|
|
120
120
|
return trapMatchesApplicability(trap, filter);
|
|
121
121
|
}
|
|
122
122
|
|
|
123
|
-
filterTraps(traps: Trap[], filter: ApplicabilityFilter): Trap[] {
|
|
124
|
-
return traps.filter((trap) => this.matchesTrap(trap, filter));
|
|
125
|
-
}
|
|
126
|
-
|
|
127
123
|
prepareRetrievedResults(
|
|
128
124
|
results: TrapSearchResult[],
|
|
129
125
|
source: SearchRetrievalSource,
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
import type { Database } from "bun:sqlite";
|
|
2
|
+
import * as embeddingQueries from "../db/embedding-queries";
|
|
2
3
|
import * as queries from "../db/queries";
|
|
3
4
|
import type { TrapSearchResult } from "../domain/trap";
|
|
4
5
|
import type { SearchMode, TrapStatus } from "./constants";
|
|
6
|
+
import { cosineSimilarity } from "./embedder";
|
|
5
7
|
import {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
} from "./embedder";
|
|
8
|
+
embeddingRuntimeFrom,
|
|
9
|
+
type EmbeddingRuntime,
|
|
10
|
+
type EmbeddingRuntimeInput,
|
|
11
|
+
} from "./embedding-runtime";
|
|
11
12
|
import {
|
|
12
13
|
DEFAULT_RANKING_CONFIG,
|
|
13
14
|
TrapSearchPolicy,
|
|
14
15
|
type RankingConfig,
|
|
15
16
|
type SearchRetrievalPlan,
|
|
16
17
|
} from "./search-policy";
|
|
17
|
-
import { DatabaseEmbeddingIndex } from "./embedding-index";
|
|
18
18
|
|
|
19
19
|
export interface SearchOptions {
|
|
20
20
|
category?: string;
|
|
@@ -33,15 +33,15 @@ const DEFAULT_LIMIT = 20;
|
|
|
33
33
|
|
|
34
34
|
export class SearchService {
|
|
35
35
|
private readonly policy: TrapSearchPolicy;
|
|
36
|
-
private readonly
|
|
36
|
+
private readonly embeddings: EmbeddingRuntime;
|
|
37
37
|
|
|
38
38
|
constructor(
|
|
39
39
|
private readonly db: Database,
|
|
40
|
-
|
|
40
|
+
embeddings?: EmbeddingRuntimeInput,
|
|
41
41
|
ranking: RankingConfig = DEFAULT_RANKING_CONFIG
|
|
42
42
|
) {
|
|
43
|
+
this.embeddings = embeddingRuntimeFrom(embeddings);
|
|
43
44
|
this.policy = new TrapSearchPolicy(ranking);
|
|
44
|
-
this.embeddingIndex = new DatabaseEmbeddingIndex(db);
|
|
45
45
|
}
|
|
46
46
|
|
|
47
47
|
async search(query: string, opts: SearchOptions = {}): Promise<TrapSearchResult[]> {
|
|
@@ -106,15 +106,14 @@ export class SearchService {
|
|
|
106
106
|
query: string,
|
|
107
107
|
plan: SearchRetrievalPlan
|
|
108
108
|
): Promise<TrapSearchResult[]> {
|
|
109
|
-
|
|
110
|
-
throw new EmbeddingProviderUnavailableError();
|
|
111
|
-
}
|
|
109
|
+
const provider = this.embeddings.requireProvider();
|
|
112
110
|
|
|
113
|
-
const [queryEmbedding] = await
|
|
111
|
+
const [queryEmbedding] = await provider.embed([query], "retrieval.query");
|
|
114
112
|
if (!queryEmbedding) return [];
|
|
115
113
|
|
|
116
|
-
const config =
|
|
117
|
-
|
|
114
|
+
const config = this.embeddings.config();
|
|
115
|
+
if (!config) throw this.embeddings.unavailableError();
|
|
116
|
+
const candidates = embeddingQueries.getAllFreshEmbeddings(this.db, config, plan.semanticStorageFilter);
|
|
118
117
|
|
|
119
118
|
const results = candidates
|
|
120
119
|
.map(({ trap, embedding }) => {
|