codetrap 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/plugins/marketplace.json +20 -0
- package/README.md +112 -33
- package/docs/installation.md +18 -10
- package/package.json +4 -1
- package/plugins/codetrap-agent/.codex-plugin/plugin.json +34 -0
- package/plugins/codetrap-agent/hooks/post-flight-capture.example.md +25 -0
- package/plugins/codetrap-agent/hooks/pre-edit.example.sh +10 -0
- package/plugins/codetrap-agent/hooks.json +11 -0
- package/plugins/codetrap-agent/skills/codetrap-capture/SKILL.md +19 -0
- package/plugins/codetrap-agent/skills/codetrap-check/SKILL.md +14 -0
- package/plugins/codetrap-agent/templates/AGENTS.codetrap.md +25 -0
- package/scripts/release-preflight.ts +55 -0
- package/skills/codetrap-add/SKILL.md +4 -1
- package/skills/codetrap-check/SKILL.md +24 -4
- package/skills/codetrap-search/SKILL.md +32 -12
- package/src/commands/command-result.ts +29 -0
- package/src/commands/router.ts +6 -400
- package/src/commands/workflow.ts +419 -0
- package/src/db/embedding-queries.ts +33 -0
- package/src/db/queries.ts +165 -48
- package/src/db/repository.ts +72 -15
- package/src/db/schema.ts +35 -0
- package/src/domain/trap.ts +38 -10
- package/src/index.ts +13 -1
- package/src/lib/command-requests.ts +133 -0
- package/src/lib/config.ts +102 -0
- package/src/lib/constants.ts +1 -1
- package/src/lib/doctor.ts +86 -0
- package/src/lib/embedding-health.ts +49 -0
- package/src/lib/embedding-index.ts +53 -0
- package/src/lib/format.ts +6 -2
- package/src/lib/output-json.ts +141 -0
- package/src/lib/scope-context.ts +118 -0
- package/src/lib/scope-maintenance.ts +71 -0
- package/src/lib/scope-migration.ts +315 -0
- package/src/lib/scope-path.ts +99 -0
- package/src/lib/scope.ts +16 -11
- package/src/lib/search-normalizer.ts +6 -0
- package/src/lib/search-policy.ts +365 -0
- package/src/lib/search-result-card.ts +2 -7
- package/src/lib/search-service.ts +67 -120
- package/src/lib/store.ts +129 -108
- package/src/lib/trap-archive.ts +9 -42
- package/src/lib/trap-codec.ts +113 -0
- package/src/lib/trap-json-fields.ts +12 -0
- package/src/lib/trap-lifecycle.ts +37 -0
- package/src/lib/trap-mutation-result.ts +36 -0
- package/src/lib/trap-operations.ts +30 -9
- package/src/lib/trap-scope-match.ts +112 -0
- package/src/lib/trap-search-document.ts +8 -1
- package/src/lib/trap-transfer.ts +88 -0
- package/src/mcp/server.ts +77 -72
- package/src/mcp/tools.ts +32 -5
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
import type { RankingSignal, Trap, TrapSearchResult } from "../domain/trap";
|
|
2
|
+
import type { TrapStatus } from "./constants";
|
|
3
|
+
import { EmbeddingProviderUnavailableError } from "./embedder";
|
|
4
|
+
import { parseTrapPathGlobs, parseTrapTags } from "./trap-json-fields";
|
|
5
|
+
import {
|
|
6
|
+
hasSpecificPathMatch,
|
|
7
|
+
trapMatchesApplicability,
|
|
8
|
+
type ApplicabilityFilter,
|
|
9
|
+
} from "./trap-scope-match";
|
|
10
|
+
|
|
11
|
+
export interface SearchPolicyOptions extends ApplicabilityFilter {
|
|
12
|
+
category?: string;
|
|
13
|
+
scope?: string;
|
|
14
|
+
status?: TrapStatus | "all";
|
|
15
|
+
limit?: number;
|
|
16
|
+
rerank?: boolean;
|
|
17
|
+
includeRankingSignals?: boolean;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export type SearchStorageFilter = {
|
|
21
|
+
category?: string;
|
|
22
|
+
scope?: string;
|
|
23
|
+
status?: TrapStatus | "all";
|
|
24
|
+
module?: string;
|
|
25
|
+
owner?: string;
|
|
26
|
+
limit?: number;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export type SemanticStorageFilter = {
|
|
30
|
+
category?: string;
|
|
31
|
+
scope?: string;
|
|
32
|
+
status?: TrapStatus | "all";
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
export type SearchRetrievalSource = "fts" | "semantic";
|
|
36
|
+
|
|
37
|
+
export interface SearchRetrievalPlan {
|
|
38
|
+
resultLimit: number;
|
|
39
|
+
candidateLimit: number;
|
|
40
|
+
ftsStorageFilter: SearchStorageFilter & { limit: number };
|
|
41
|
+
semanticStorageFilter: SemanticStorageFilter;
|
|
42
|
+
applicabilityFilter: ApplicabilityFilter;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export interface RankingConfig {
|
|
46
|
+
rrfK: number;
|
|
47
|
+
semanticMinScore: number;
|
|
48
|
+
lengthNormAnchor: number;
|
|
49
|
+
maxBoost: number;
|
|
50
|
+
titleTokenBoost: number;
|
|
51
|
+
tagTokenBoost: number;
|
|
52
|
+
identifierBoost: number;
|
|
53
|
+
severityBoost: Record<string, number>;
|
|
54
|
+
pathMatchBoost: number;
|
|
55
|
+
moduleMatchBoost: number;
|
|
56
|
+
ownerMatchBoost: number;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export const DEFAULT_RANKING_CONFIG: RankingConfig = {
|
|
60
|
+
rrfK: 60,
|
|
61
|
+
semanticMinScore: 0.3,
|
|
62
|
+
lengthNormAnchor: 500,
|
|
63
|
+
maxBoost: 0.45,
|
|
64
|
+
titleTokenBoost: 0.16,
|
|
65
|
+
tagTokenBoost: 0.2,
|
|
66
|
+
identifierBoost: 0.18,
|
|
67
|
+
severityBoost: {
|
|
68
|
+
warning: 0,
|
|
69
|
+
error: 0.04,
|
|
70
|
+
critical: 0.07,
|
|
71
|
+
},
|
|
72
|
+
pathMatchBoost: 0.12,
|
|
73
|
+
moduleMatchBoost: 0.08,
|
|
74
|
+
ownerMatchBoost: 0.04,
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
export class TrapSearchPolicy {
|
|
78
|
+
constructor(private readonly ranking: RankingConfig = DEFAULT_RANKING_CONFIG) {}
|
|
79
|
+
|
|
80
|
+
plan(opts: SearchPolicyOptions, defaultLimit: number): SearchRetrievalPlan {
|
|
81
|
+
const resultLimit = opts.limit ?? defaultLimit;
|
|
82
|
+
const candidateLimit = this.candidateLimit(opts, resultLimit);
|
|
83
|
+
return {
|
|
84
|
+
resultLimit,
|
|
85
|
+
candidateLimit,
|
|
86
|
+
ftsStorageFilter: {
|
|
87
|
+
category: opts.category,
|
|
88
|
+
scope: opts.scope,
|
|
89
|
+
status: opts.status,
|
|
90
|
+
module: opts.module,
|
|
91
|
+
owner: opts.owner,
|
|
92
|
+
limit: candidateLimit,
|
|
93
|
+
},
|
|
94
|
+
semanticStorageFilter: {
|
|
95
|
+
category: opts.category,
|
|
96
|
+
scope: opts.scope,
|
|
97
|
+
status: opts.status,
|
|
98
|
+
},
|
|
99
|
+
applicabilityFilter: {
|
|
100
|
+
path: opts.path,
|
|
101
|
+
module: opts.module,
|
|
102
|
+
owner: opts.owner,
|
|
103
|
+
},
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
candidateLimit(opts: SearchPolicyOptions, resultLimit: number): number {
|
|
108
|
+
return shouldOverfetch(opts) ? Math.max(resultLimit * 5, 50) : resultLimit;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
semanticMinScore(): number {
|
|
112
|
+
return this.ranking.semanticMinScore;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
filterResults(results: TrapSearchResult[], filter: ApplicabilityFilter): TrapSearchResult[] {
|
|
116
|
+
return results.filter((result) => trapMatchesApplicability(result.trap, filter));
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
matchesTrap(trap: Trap, filter: ApplicabilityFilter): boolean {
|
|
120
|
+
return trapMatchesApplicability(trap, filter);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
filterTraps(traps: Trap[], filter: ApplicabilityFilter): Trap[] {
|
|
124
|
+
return traps.filter((trap) => this.matchesTrap(trap, filter));
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
prepareRetrievedResults(
|
|
128
|
+
results: TrapSearchResult[],
|
|
129
|
+
source: SearchRetrievalSource,
|
|
130
|
+
plan: SearchRetrievalPlan
|
|
131
|
+
): TrapSearchResult[] {
|
|
132
|
+
const applicable = this.filterResults(results, plan.applicabilityFilter);
|
|
133
|
+
if (source === "semantic") {
|
|
134
|
+
return applicable
|
|
135
|
+
.filter((result) => (result.score ?? 0) >= this.semanticMinScore())
|
|
136
|
+
.sort((a, b) => (b.score ?? 0) - (a.score ?? 0))
|
|
137
|
+
.slice(0, plan.candidateLimit);
|
|
138
|
+
}
|
|
139
|
+
return applicable.slice(0, plan.candidateLimit);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
finalizeResults(
|
|
143
|
+
results: TrapSearchResult[],
|
|
144
|
+
query: string,
|
|
145
|
+
opts: SearchPolicyOptions,
|
|
146
|
+
plan: SearchRetrievalPlan
|
|
147
|
+
): TrapSearchResult[] {
|
|
148
|
+
return this.rankResults(results, query, opts, plan.resultLimit);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
rankResults(
|
|
152
|
+
results: TrapSearchResult[],
|
|
153
|
+
query: string,
|
|
154
|
+
opts: SearchPolicyOptions,
|
|
155
|
+
limit: number
|
|
156
|
+
): TrapSearchResult[] {
|
|
157
|
+
return applyReranking(results, query, opts, this.ranking).slice(0, limit);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
fuse(
|
|
161
|
+
ftsResults: TrapSearchResult[],
|
|
162
|
+
semanticResults: TrapSearchResult[],
|
|
163
|
+
query: string,
|
|
164
|
+
opts: SearchPolicyOptions,
|
|
165
|
+
limit: number
|
|
166
|
+
): TrapSearchResult[] {
|
|
167
|
+
const byId = new Map<number, TrapSearchResult & { score: number; sources: ("fts" | "semantic")[] }>();
|
|
168
|
+
|
|
169
|
+
addRankedResults(byId, ftsResults, "fts", this.ranking);
|
|
170
|
+
addRankedResults(byId, semanticResults, "semantic", this.ranking);
|
|
171
|
+
|
|
172
|
+
const fused = [...byId.values()]
|
|
173
|
+
.map((result) => ({
|
|
174
|
+
...result,
|
|
175
|
+
score: applyLengthNormalization(result.score, result.trap, this.ranking),
|
|
176
|
+
rank: applyLengthNormalization(result.score, result.trap, this.ranking),
|
|
177
|
+
}))
|
|
178
|
+
.sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
|
|
179
|
+
|
|
180
|
+
return this.rankResults(fused, query, opts, limit);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
fuseAndFinalize(
|
|
184
|
+
ftsResults: TrapSearchResult[],
|
|
185
|
+
semanticResults: TrapSearchResult[],
|
|
186
|
+
query: string,
|
|
187
|
+
opts: SearchPolicyOptions,
|
|
188
|
+
plan: SearchRetrievalPlan
|
|
189
|
+
): TrapSearchResult[] {
|
|
190
|
+
return this.fuse(ftsResults, semanticResults, query, opts, plan.resultLimit);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
withDiagnostics(
|
|
194
|
+
results: TrapSearchResult[],
|
|
195
|
+
diagnostic: { code: string; message: string }
|
|
196
|
+
): TrapSearchResult[] {
|
|
197
|
+
return results.map((result) => ({
|
|
198
|
+
...result,
|
|
199
|
+
diagnostics: [...(result.diagnostics ?? []), diagnostic],
|
|
200
|
+
}));
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
semanticDiagnostic(error: unknown): { code: string; message: string } {
|
|
204
|
+
if (error instanceof EmbeddingProviderUnavailableError) {
|
|
205
|
+
return {
|
|
206
|
+
code: "semantic_unavailable",
|
|
207
|
+
message: error.message,
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
return {
|
|
211
|
+
code: "semantic_failed",
|
|
212
|
+
message: error instanceof Error ? error.message : "Semantic search failed; hybrid search returned FTS results.",
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function addRankedResults(
|
|
218
|
+
byId: Map<number, TrapSearchResult & { score: number; sources: ("fts" | "semantic")[] }>,
|
|
219
|
+
results: TrapSearchResult[],
|
|
220
|
+
source: "fts" | "semantic",
|
|
221
|
+
ranking: RankingConfig
|
|
222
|
+
): void {
|
|
223
|
+
results.forEach((result, index) => {
|
|
224
|
+
const score = 1 / (ranking.rrfK + index + 1);
|
|
225
|
+
const existing = byId.get(result.trap.id);
|
|
226
|
+
if (existing) {
|
|
227
|
+
existing.score += score;
|
|
228
|
+
if (!existing.sources.includes(source)) existing.sources.push(source);
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
byId.set(result.trap.id, {
|
|
232
|
+
...result,
|
|
233
|
+
score,
|
|
234
|
+
sources: [source],
|
|
235
|
+
});
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
function applyLengthNormalization(score: number, trap: Trap, ranking: RankingConfig): number {
|
|
240
|
+
const length = `${trap.context}\n${trap.mistake}\n${trap.fix}`.length;
|
|
241
|
+
if (length <= ranking.lengthNormAnchor) return score;
|
|
242
|
+
return score * Math.sqrt(ranking.lengthNormAnchor / length);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function applyReranking(
|
|
246
|
+
results: TrapSearchResult[],
|
|
247
|
+
query: string,
|
|
248
|
+
opts: SearchPolicyOptions,
|
|
249
|
+
ranking: RankingConfig
|
|
250
|
+
): TrapSearchResult[] {
|
|
251
|
+
if (opts.rerank === false) return stripRankingSignals(results, opts);
|
|
252
|
+
|
|
253
|
+
const queryInfo = analyzeQuery(query);
|
|
254
|
+
return results
|
|
255
|
+
.map((result) => {
|
|
256
|
+
const signals = rankingSignals(result.trap, queryInfo, opts, ranking);
|
|
257
|
+
const boost = Math.min(
|
|
258
|
+
ranking.maxBoost,
|
|
259
|
+
signals.reduce((sum, signal) => sum + signal.weight, 0)
|
|
260
|
+
);
|
|
261
|
+
const score = (result.score ?? result.rank ?? 0) * (1 + boost);
|
|
262
|
+
return {
|
|
263
|
+
...result,
|
|
264
|
+
score,
|
|
265
|
+
rank: score,
|
|
266
|
+
...(opts.includeRankingSignals && signals.length > 0 ? { ranking_signals: signals } : {}),
|
|
267
|
+
};
|
|
268
|
+
})
|
|
269
|
+
.sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function stripRankingSignals(results: TrapSearchResult[], opts: SearchPolicyOptions): TrapSearchResult[] {
|
|
273
|
+
if (opts.includeRankingSignals) return results.map((result) => ({ ...result, ranking_signals: [] }));
|
|
274
|
+
return results.map(({ ranking_signals: _rankingSignals, ...result }) => result);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function rankingSignals(
|
|
278
|
+
trap: Trap,
|
|
279
|
+
query: QueryInfo,
|
|
280
|
+
filter: ApplicabilityFilter,
|
|
281
|
+
ranking: RankingConfig
|
|
282
|
+
): RankingSignal[] {
|
|
283
|
+
const signals: RankingSignal[] = [];
|
|
284
|
+
const titleTokens = tokenize(trap.title);
|
|
285
|
+
const tags = parseTrapTags(trap.tags).map((tag) => tag.toLowerCase());
|
|
286
|
+
const allFieldTokens = tokenize([
|
|
287
|
+
trap.title,
|
|
288
|
+
trap.context,
|
|
289
|
+
trap.mistake,
|
|
290
|
+
trap.fix,
|
|
291
|
+
trap.before_code ?? "",
|
|
292
|
+
trap.after_code ?? "",
|
|
293
|
+
parseTrapPathGlobs(trap.path_globs).join(" "),
|
|
294
|
+
trap.module ?? "",
|
|
295
|
+
trap.owner ?? "",
|
|
296
|
+
].join(" "));
|
|
297
|
+
|
|
298
|
+
for (const token of query.tokens) {
|
|
299
|
+
if (titleTokens.has(token)) {
|
|
300
|
+
signals.push({ code: "title_token_exact", weight: ranking.titleTokenBoost, detail: token });
|
|
301
|
+
break;
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
for (const token of query.tokens) {
|
|
306
|
+
if (tags.includes(token)) {
|
|
307
|
+
signals.push({ code: "tag_exact", weight: ranking.tagTokenBoost, detail: token });
|
|
308
|
+
break;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
for (const token of query.identifierTokens) {
|
|
313
|
+
if (allFieldTokens.has(token)) {
|
|
314
|
+
signals.push({ code: "code_identifier_exact", weight: ranking.identifierBoost, detail: token });
|
|
315
|
+
break;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
const severityBoost = ranking.severityBoost[trap.severity] ?? 0;
|
|
320
|
+
if (severityBoost > 0) {
|
|
321
|
+
signals.push({ code: "severity", weight: severityBoost, detail: trap.severity });
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
if (hasSpecificPathMatch(trap, filter.path)) {
|
|
325
|
+
signals.push({ code: "path_scope_match", weight: ranking.pathMatchBoost, detail: filter.path });
|
|
326
|
+
}
|
|
327
|
+
if (filter.module && trap.module === filter.module) {
|
|
328
|
+
signals.push({ code: "module_scope_match", weight: ranking.moduleMatchBoost, detail: filter.module });
|
|
329
|
+
}
|
|
330
|
+
if (filter.owner && trap.owner === filter.owner) {
|
|
331
|
+
signals.push({ code: "owner_scope_match", weight: ranking.ownerMatchBoost, detail: filter.owner });
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
return signals;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
type QueryInfo = {
|
|
338
|
+
tokens: Set<string>;
|
|
339
|
+
identifierTokens: Set<string>;
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
function analyzeQuery(query: string): QueryInfo {
|
|
343
|
+
const tokens = tokenize(query);
|
|
344
|
+
return {
|
|
345
|
+
tokens,
|
|
346
|
+
identifierTokens: new Set([...tokens].filter(isIdentifierLike)),
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
function tokenize(value: string): Set<string> {
|
|
351
|
+
return new Set((value.match(/[A-Za-z0-9_.$/@:-]+/g) ?? []).map((token) => token.toLowerCase()));
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
function isIdentifierLike(token: string): boolean {
|
|
355
|
+
return (
|
|
356
|
+
/[_.$/@:-]/.test(token) ||
|
|
357
|
+
/\d/.test(token) ||
|
|
358
|
+
/[a-z][A-Z]/.test(token) ||
|
|
359
|
+
token.length >= 8
|
|
360
|
+
);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
function shouldOverfetch(opts: SearchPolicyOptions): boolean {
|
|
364
|
+
return Boolean(opts.path || opts.module || opts.owner || opts.rerank !== false);
|
|
365
|
+
}
|
|
@@ -15,13 +15,8 @@ export function toTrapActionCard(result: TrapSearchResult, scope: Scope): TrapAc
|
|
|
15
15
|
severity: trap.severity,
|
|
16
16
|
score: result.score ?? null,
|
|
17
17
|
sources: result.sources ?? [],
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
details_args: {
|
|
21
|
-
id: trap.id,
|
|
22
|
-
scope,
|
|
23
|
-
},
|
|
24
|
-
},
|
|
18
|
+
...(result.diagnostics ? { diagnostics: result.diagnostics } : {}),
|
|
19
|
+
...(result.ranking_signals ? { ranking_signals: result.ranking_signals } : {}),
|
|
25
20
|
};
|
|
26
21
|
}
|
|
27
22
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import type { Database } from "bun:sqlite";
|
|
2
|
-
import * as embeddingQueries from "../db/embedding-queries";
|
|
3
2
|
import * as queries from "../db/queries";
|
|
4
|
-
import type {
|
|
3
|
+
import type { TrapSearchResult } from "../domain/trap";
|
|
5
4
|
import type { SearchMode, TrapStatus } from "./constants";
|
|
6
5
|
import {
|
|
7
6
|
cosineSimilarity,
|
|
@@ -9,6 +8,13 @@ import {
|
|
|
9
8
|
embeddingConfig,
|
|
10
9
|
type EmbeddingProvider,
|
|
11
10
|
} from "./embedder";
|
|
11
|
+
import {
|
|
12
|
+
DEFAULT_RANKING_CONFIG,
|
|
13
|
+
TrapSearchPolicy,
|
|
14
|
+
type RankingConfig,
|
|
15
|
+
type SearchRetrievalPlan,
|
|
16
|
+
} from "./search-policy";
|
|
17
|
+
import { DatabaseEmbeddingIndex } from "./embedding-index";
|
|
12
18
|
|
|
13
19
|
export interface SearchOptions {
|
|
14
20
|
category?: string;
|
|
@@ -16,28 +22,27 @@ export interface SearchOptions {
|
|
|
16
22
|
limit?: number;
|
|
17
23
|
mode?: SearchMode;
|
|
18
24
|
status?: TrapStatus | "all";
|
|
25
|
+
path?: string;
|
|
26
|
+
module?: string;
|
|
27
|
+
owner?: string;
|
|
28
|
+
rerank?: boolean;
|
|
29
|
+
includeRankingSignals?: boolean;
|
|
19
30
|
}
|
|
20
31
|
|
|
21
|
-
export interface RankingConfig {
|
|
22
|
-
rrfK: number;
|
|
23
|
-
semanticMinScore: number;
|
|
24
|
-
lengthNormAnchor: number;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
export const DEFAULT_RANKING_CONFIG: RankingConfig = {
|
|
28
|
-
rrfK: 60,
|
|
29
|
-
semanticMinScore: 0.3,
|
|
30
|
-
lengthNormAnchor: 500,
|
|
31
|
-
};
|
|
32
|
-
|
|
33
32
|
const DEFAULT_LIMIT = 20;
|
|
34
33
|
|
|
35
34
|
export class SearchService {
|
|
35
|
+
private readonly policy: TrapSearchPolicy;
|
|
36
|
+
private readonly embeddingIndex: DatabaseEmbeddingIndex;
|
|
37
|
+
|
|
36
38
|
constructor(
|
|
37
39
|
private readonly db: Database,
|
|
38
40
|
private readonly embedder?: EmbeddingProvider,
|
|
39
|
-
|
|
40
|
-
) {
|
|
41
|
+
ranking: RankingConfig = DEFAULT_RANKING_CONFIG
|
|
42
|
+
) {
|
|
43
|
+
this.policy = new TrapSearchPolicy(ranking);
|
|
44
|
+
this.embeddingIndex = new DatabaseEmbeddingIndex(db);
|
|
45
|
+
}
|
|
41
46
|
|
|
42
47
|
async search(query: string, opts: SearchOptions = {}): Promise<TrapSearchResult[]> {
|
|
43
48
|
if (!query.trim()) return [];
|
|
@@ -56,14 +61,51 @@ export class SearchService {
|
|
|
56
61
|
}
|
|
57
62
|
|
|
58
63
|
ftsSearch(query: string, opts: SearchOptions = {}): TrapSearchResult[] {
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
sources: ["fts"],
|
|
62
|
-
score: ftsScore(result.rank),
|
|
63
|
-
}));
|
|
64
|
+
const plan = this.policy.plan(opts, DEFAULT_LIMIT);
|
|
65
|
+
return this.policy.finalizeResults(this.retrieveFtsCandidates(query, plan), query, opts, plan);
|
|
64
66
|
}
|
|
65
67
|
|
|
66
68
|
async semanticSearch(query: string, opts: SearchOptions = {}): Promise<TrapSearchResult[]> {
|
|
69
|
+
const plan = this.policy.plan(opts, DEFAULT_LIMIT);
|
|
70
|
+
return this.policy.finalizeResults(await this.retrieveSemanticCandidates(query, plan), query, opts, plan);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
async hybridSearch(query: string, opts: SearchOptions = {}): Promise<TrapSearchResult[]> {
|
|
74
|
+
const plan = this.policy.plan(opts, DEFAULT_LIMIT);
|
|
75
|
+
const ftsCandidates = this.retrieveFtsCandidates(query, plan);
|
|
76
|
+
|
|
77
|
+
try {
|
|
78
|
+
const semanticCandidates = await this.retrieveSemanticCandidates(query, plan);
|
|
79
|
+
if (semanticCandidates.length === 0) {
|
|
80
|
+
return this.policy.withDiagnostics(this.policy.finalizeResults(ftsCandidates, query, opts, plan), {
|
|
81
|
+
code: "semantic_no_candidates",
|
|
82
|
+
message: "Hybrid search used FTS results because no fresh semantic candidates passed the score threshold.",
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
return this.policy.fuseAndFinalize(ftsCandidates, semanticCandidates, query, opts, plan);
|
|
86
|
+
} catch (error) {
|
|
87
|
+
return this.policy.withDiagnostics(
|
|
88
|
+
this.policy.finalizeResults(ftsCandidates, query, opts, plan),
|
|
89
|
+
this.policy.semanticDiagnostic(error)
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
private retrieveFtsCandidates(query: string, plan: SearchRetrievalPlan): TrapSearchResult[] {
|
|
95
|
+
const candidates = queries
|
|
96
|
+
.searchTraps(this.db, query, plan.ftsStorageFilter)
|
|
97
|
+
.map((result) => ({
|
|
98
|
+
...result,
|
|
99
|
+
sources: ["fts"] as ("fts")[],
|
|
100
|
+
score: ftsScore(result.rank),
|
|
101
|
+
}));
|
|
102
|
+
return this.policy.prepareRetrievedResults(candidates, "fts", plan);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
private async retrieveSemanticCandidates(
|
|
106
|
+
query: string,
|
|
107
|
+
plan: SearchRetrievalPlan
|
|
108
|
+
): Promise<TrapSearchResult[]> {
|
|
67
109
|
if (!this.embedder) {
|
|
68
110
|
throw new EmbeddingProviderUnavailableError();
|
|
69
111
|
}
|
|
@@ -72,13 +114,9 @@ export class SearchService {
|
|
|
72
114
|
if (!queryEmbedding) return [];
|
|
73
115
|
|
|
74
116
|
const config = embeddingConfig(this.embedder);
|
|
75
|
-
const candidates =
|
|
76
|
-
category: opts.category,
|
|
77
|
-
scope: opts.scope,
|
|
78
|
-
status: opts.status,
|
|
79
|
-
});
|
|
117
|
+
const candidates = this.embeddingIndex.freshEmbeddings(config, plan.semanticStorageFilter);
|
|
80
118
|
|
|
81
|
-
|
|
119
|
+
const results = candidates
|
|
82
120
|
.map(({ trap, embedding }) => {
|
|
83
121
|
const score = cosineSimilarity(queryEmbedding, embedding);
|
|
84
122
|
return {
|
|
@@ -88,102 +126,11 @@ export class SearchService {
|
|
|
88
126
|
score,
|
|
89
127
|
};
|
|
90
128
|
})
|
|
91
|
-
.
|
|
92
|
-
|
|
93
|
-
.slice(0, opts.limit ?? DEFAULT_LIMIT);
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
async hybridSearch(query: string, opts: SearchOptions = {}): Promise<TrapSearchResult[]> {
|
|
97
|
-
const limit = opts.limit ?? DEFAULT_LIMIT;
|
|
98
|
-
const ftsResults = this.ftsSearch(query, { ...opts, limit });
|
|
99
|
-
|
|
100
|
-
try {
|
|
101
|
-
const semanticResults = await this.semanticSearch(query, { ...opts, limit });
|
|
102
|
-
if (semanticResults.length === 0) {
|
|
103
|
-
return withDiagnostics(ftsResults, {
|
|
104
|
-
code: "semantic_no_candidates",
|
|
105
|
-
message: "Hybrid search used FTS results because no fresh semantic candidates passed the score threshold.",
|
|
106
|
-
});
|
|
107
|
-
}
|
|
108
|
-
return rrfFuse(ftsResults, semanticResults, limit, this.ranking);
|
|
109
|
-
} catch (error) {
|
|
110
|
-
return withDiagnostics(ftsResults, semanticDiagnostic(error));
|
|
111
|
-
}
|
|
129
|
+
.map((result) => result as TrapSearchResult);
|
|
130
|
+
return this.policy.prepareRetrievedResults(results, "semantic", plan);
|
|
112
131
|
}
|
|
113
132
|
}
|
|
114
133
|
|
|
115
|
-
export function rrfFuse(
|
|
116
|
-
ftsResults: TrapSearchResult[],
|
|
117
|
-
semanticResults: TrapSearchResult[],
|
|
118
|
-
limit = DEFAULT_LIMIT,
|
|
119
|
-
ranking: RankingConfig = DEFAULT_RANKING_CONFIG
|
|
120
|
-
): TrapSearchResult[] {
|
|
121
|
-
const byId = new Map<number, TrapSearchResult & { score: number; sources: ("fts" | "semantic")[] }>();
|
|
122
|
-
|
|
123
|
-
addRankedResults(byId, ftsResults, "fts", ranking);
|
|
124
|
-
addRankedResults(byId, semanticResults, "semantic", ranking);
|
|
125
|
-
|
|
126
|
-
return [...byId.values()]
|
|
127
|
-
.map((result) => ({
|
|
128
|
-
...result,
|
|
129
|
-
score: applyLengthNormalization(result.score, result.trap, ranking),
|
|
130
|
-
rank: applyLengthNormalization(result.score, result.trap, ranking),
|
|
131
|
-
}))
|
|
132
|
-
.sort((a, b) => (b.score ?? 0) - (a.score ?? 0))
|
|
133
|
-
.slice(0, limit);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
function addRankedResults(
|
|
137
|
-
byId: Map<number, TrapSearchResult & { score: number; sources: ("fts" | "semantic")[] }>,
|
|
138
|
-
results: TrapSearchResult[],
|
|
139
|
-
source: "fts" | "semantic",
|
|
140
|
-
ranking: RankingConfig
|
|
141
|
-
): void {
|
|
142
|
-
results.forEach((result, index) => {
|
|
143
|
-
const score = 1 / (ranking.rrfK + index + 1);
|
|
144
|
-
const existing = byId.get(result.trap.id);
|
|
145
|
-
if (existing) {
|
|
146
|
-
existing.score += score;
|
|
147
|
-
if (!existing.sources.includes(source)) existing.sources.push(source);
|
|
148
|
-
return;
|
|
149
|
-
}
|
|
150
|
-
byId.set(result.trap.id, {
|
|
151
|
-
...result,
|
|
152
|
-
score,
|
|
153
|
-
sources: [source],
|
|
154
|
-
});
|
|
155
|
-
});
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
function applyLengthNormalization(score: number, trap: Trap, ranking: RankingConfig): number {
|
|
159
|
-
const length = `${trap.context}\n${trap.mistake}\n${trap.fix}`.length;
|
|
160
|
-
if (length <= ranking.lengthNormAnchor) return score;
|
|
161
|
-
return score * Math.sqrt(ranking.lengthNormAnchor / length);
|
|
162
|
-
}
|
|
163
|
-
|
|
164
134
|
function ftsScore(rank: number): number {
|
|
165
135
|
return Number.isFinite(rank) ? -rank : 0;
|
|
166
136
|
}
|
|
167
|
-
|
|
168
|
-
function withDiagnostics(
|
|
169
|
-
results: TrapSearchResult[],
|
|
170
|
-
diagnostic: { code: string; message: string }
|
|
171
|
-
): TrapSearchResult[] {
|
|
172
|
-
return results.map((result) => ({
|
|
173
|
-
...result,
|
|
174
|
-
diagnostics: [...(result.diagnostics ?? []), diagnostic],
|
|
175
|
-
}));
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
function semanticDiagnostic(error: unknown): { code: string; message: string } {
|
|
179
|
-
if (error instanceof EmbeddingProviderUnavailableError) {
|
|
180
|
-
return {
|
|
181
|
-
code: "semantic_unavailable",
|
|
182
|
-
message: error.message,
|
|
183
|
-
};
|
|
184
|
-
}
|
|
185
|
-
return {
|
|
186
|
-
code: "semantic_failed",
|
|
187
|
-
message: error instanceof Error ? error.message : "Semantic search failed; hybrid search returned FTS results.",
|
|
188
|
-
};
|
|
189
|
-
}
|