codetrap 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/LICENSE +22 -0
- package/README.md +305 -0
- package/docs/installation.md +306 -0
- package/package.json +62 -0
- package/scripts/build-release.ts +64 -0
- package/scripts/check-release-version.ts +19 -0
- package/skills/codetrap-add/SKILL.md +65 -0
- package/skills/codetrap-check/SKILL.md +47 -0
- package/skills/codetrap-search/SKILL.md +43 -0
- package/src/commands/router.ts +407 -0
- package/src/db/connection.ts +36 -0
- package/src/db/embedding-queries.ts +154 -0
- package/src/db/queries.ts +296 -0
- package/src/db/repository.ts +141 -0
- package/src/db/schema.ts +205 -0
- package/src/domain/trap.ts +304 -0
- package/src/index.ts +58 -0
- package/src/lib/constants.ts +56 -0
- package/src/lib/embedder.ts +133 -0
- package/src/lib/embedding-job.ts +68 -0
- package/src/lib/format.ts +97 -0
- package/src/lib/fts-query.ts +17 -0
- package/src/lib/scope.ts +30 -0
- package/src/lib/search-normalizer.ts +92 -0
- package/src/lib/search-result-card.ts +38 -0
- package/src/lib/search-service.ts +189 -0
- package/src/lib/store.ts +272 -0
- package/src/lib/trap-archive.ts +91 -0
- package/src/lib/trap-json-fields.ts +42 -0
- package/src/lib/trap-operations.ts +127 -0
- package/src/lib/trap-search-document.ts +73 -0
- package/src/mcp/resources.ts +26 -0
- package/src/mcp/server.ts +167 -0
- package/src/mcp/tools.ts +106 -0
- package/src/mcp-server.ts +6 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import type { Trap } from "../domain/trap";
|
|
2
|
+
import { PASSAGE_VERSION } from "./trap-search-document";
|
|
3
|
+
|
|
4
|
+
export type EmbeddingTask = "retrieval.query" | "retrieval.passage";
|
|
5
|
+
|
|
6
|
+
export interface EmbeddingProvider {
|
|
7
|
+
provider: string;
|
|
8
|
+
model: string;
|
|
9
|
+
dimensions: number;
|
|
10
|
+
embed(texts: string[], task: EmbeddingTask): Promise<Float32Array[]>;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface EmbeddingConfig {
|
|
14
|
+
provider: string;
|
|
15
|
+
model: string;
|
|
16
|
+
dimensions: number;
|
|
17
|
+
passageVersion: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface StoredEmbedding {
|
|
21
|
+
trap_id: number;
|
|
22
|
+
provider: string;
|
|
23
|
+
model: string;
|
|
24
|
+
dimensions: number;
|
|
25
|
+
passage_version: number;
|
|
26
|
+
passage_hash: string;
|
|
27
|
+
embedding: Float32Array;
|
|
28
|
+
updated_at?: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface FreshEmbedding {
|
|
32
|
+
trap: Trap;
|
|
33
|
+
embedding: Float32Array;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export class EmbeddingProviderUnavailableError extends Error {
|
|
37
|
+
constructor(message = "Embedding provider is unavailable. Set JINA_API_KEY or use --mode fts.") {
|
|
38
|
+
super(message);
|
|
39
|
+
this.name = "EmbeddingProviderUnavailableError";
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export class JinaEmbedder implements EmbeddingProvider {
|
|
44
|
+
readonly provider = "jina";
|
|
45
|
+
readonly model = "jina-embeddings-v5-text-small";
|
|
46
|
+
readonly dimensions = 1024;
|
|
47
|
+
|
|
48
|
+
constructor(
|
|
49
|
+
private readonly apiKey: string,
|
|
50
|
+
private readonly baseURL = "https://api.jina.ai/v1"
|
|
51
|
+
) {}
|
|
52
|
+
|
|
53
|
+
async embed(texts: string[], task: EmbeddingTask): Promise<Float32Array[]> {
|
|
54
|
+
if (texts.length === 0) return [];
|
|
55
|
+
|
|
56
|
+
const response = await fetch(`${this.baseURL}/embeddings`, {
|
|
57
|
+
method: "POST",
|
|
58
|
+
headers: {
|
|
59
|
+
"Authorization": `Bearer ${this.apiKey}`,
|
|
60
|
+
"Content-Type": "application/json",
|
|
61
|
+
},
|
|
62
|
+
body: JSON.stringify({
|
|
63
|
+
model: this.model,
|
|
64
|
+
input: texts,
|
|
65
|
+
task,
|
|
66
|
+
}),
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
if (!response.ok) {
|
|
70
|
+
const body = await response.text();
|
|
71
|
+
throw new Error(`Jina embeddings request failed (${response.status}): ${body}`);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const payload = (await response.json()) as {
|
|
75
|
+
data?: { embedding?: number[]; index?: number }[];
|
|
76
|
+
};
|
|
77
|
+
const rows = payload.data ?? [];
|
|
78
|
+
if (rows.length !== texts.length) {
|
|
79
|
+
throw new Error(`Jina embeddings returned ${rows.length} vectors for ${texts.length} inputs.`);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return rows
|
|
83
|
+
.slice()
|
|
84
|
+
.sort((a, b) => (a.index ?? 0) - (b.index ?? 0))
|
|
85
|
+
.map((row) => {
|
|
86
|
+
if (!Array.isArray(row.embedding)) {
|
|
87
|
+
throw new Error("Jina embeddings response is missing an embedding vector.");
|
|
88
|
+
}
|
|
89
|
+
return Float32Array.from(row.embedding);
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export function createDefaultEmbeddingProvider(): EmbeddingProvider | undefined {
|
|
95
|
+
const apiKey = process.env.JINA_API_KEY;
|
|
96
|
+
return apiKey ? new JinaEmbedder(apiKey) : undefined;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export function embeddingConfig(provider: EmbeddingProvider): EmbeddingConfig {
|
|
100
|
+
return {
|
|
101
|
+
provider: provider.provider,
|
|
102
|
+
model: provider.model,
|
|
103
|
+
dimensions: provider.dimensions,
|
|
104
|
+
passageVersion: PASSAGE_VERSION,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export function encodeEmbedding(embedding: Float32Array): Buffer {
|
|
109
|
+
return Buffer.from(embedding.buffer, embedding.byteOffset, embedding.byteLength);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export function decodeEmbedding(blob: Uint8Array | ArrayBuffer): Float32Array {
|
|
113
|
+
const bytes = blob instanceof Uint8Array ? blob : new Uint8Array(blob);
|
|
114
|
+
const copy = new Uint8Array(bytes.byteLength);
|
|
115
|
+
copy.set(bytes);
|
|
116
|
+
return new Float32Array(copy.buffer);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
|
120
|
+
if (a.length !== b.length || a.length === 0) return 0;
|
|
121
|
+
|
|
122
|
+
let dot = 0;
|
|
123
|
+
let normA = 0;
|
|
124
|
+
let normB = 0;
|
|
125
|
+
for (let i = 0; i < a.length; i++) {
|
|
126
|
+
dot += a[i] * b[i];
|
|
127
|
+
normA += a[i] * a[i];
|
|
128
|
+
normB += b[i] * b[i];
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if (normA === 0 || normB === 0) return 0;
|
|
132
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
133
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import type { Trap } from "../domain/trap";
|
|
2
|
+
import {
|
|
3
|
+
embeddingConfig,
|
|
4
|
+
type EmbeddingConfig,
|
|
5
|
+
type EmbeddingProvider,
|
|
6
|
+
type StoredEmbedding,
|
|
7
|
+
} from "./embedder";
|
|
8
|
+
import { buildTrapPassage, hashTrapPassage } from "./trap-search-document";
|
|
9
|
+
|
|
10
|
+
export interface EmbeddingJobOptions {
|
|
11
|
+
scope?: string;
|
|
12
|
+
category?: string;
|
|
13
|
+
limit?: number;
|
|
14
|
+
force?: boolean;
|
|
15
|
+
batchSize?: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface EmbeddingJobResult {
|
|
19
|
+
generated: number;
|
|
20
|
+
skipped: number;
|
|
21
|
+
batches: number;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface EmbeddingJobAdapter {
|
|
25
|
+
countEmbeddable(opts: { scope?: string; category?: string }): number;
|
|
26
|
+
trapsNeedingEmbeddings(config: EmbeddingConfig, opts: EmbeddingJobOptions): Trap[];
|
|
27
|
+
saveEmbedding(record: StoredEmbedding): void;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const DEFAULT_BATCH_SIZE = 16;
|
|
31
|
+
|
|
32
|
+
export async function runEmbeddingJob(
|
|
33
|
+
adapter: EmbeddingJobAdapter,
|
|
34
|
+
provider: EmbeddingProvider,
|
|
35
|
+
opts: EmbeddingJobOptions = {}
|
|
36
|
+
): Promise<EmbeddingJobResult> {
|
|
37
|
+
const config = embeddingConfig(provider);
|
|
38
|
+
const total = adapter.countEmbeddable({ scope: opts.scope, category: opts.category });
|
|
39
|
+
const traps = adapter.trapsNeedingEmbeddings(config, opts);
|
|
40
|
+
if (traps.length === 0) return { generated: 0, skipped: total, batches: 0 };
|
|
41
|
+
|
|
42
|
+
const batchSize = opts.batchSize ?? DEFAULT_BATCH_SIZE;
|
|
43
|
+
let batches = 0;
|
|
44
|
+
|
|
45
|
+
for (let start = 0; start < traps.length; start += batchSize) {
|
|
46
|
+
const batch = traps.slice(start, start + batchSize);
|
|
47
|
+
const passages = batch.map(buildTrapPassage);
|
|
48
|
+
const embeddings = await provider.embed(passages, "retrieval.passage");
|
|
49
|
+
if (embeddings.length !== batch.length) {
|
|
50
|
+
throw new Error(`Embedding provider returned ${embeddings.length} vectors for ${batch.length} traps.`);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
for (let i = 0; i < batch.length; i++) {
|
|
54
|
+
adapter.saveEmbedding({
|
|
55
|
+
trap_id: batch[i].id,
|
|
56
|
+
provider: config.provider,
|
|
57
|
+
model: config.model,
|
|
58
|
+
dimensions: config.dimensions,
|
|
59
|
+
passage_version: config.passageVersion,
|
|
60
|
+
passage_hash: hashTrapPassage(passages[i]),
|
|
61
|
+
embedding: embeddings[i],
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
batches++;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return { generated: traps.length, skipped: Math.max(0, total - traps.length), batches };
|
|
68
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { CATEGORY_LABELS, SEVERITY_ICONS, type Category, type Severity } from "./constants";
|
|
2
|
+
import type { Trap, TrapActionCard, TrapDetails, TrapEvidence } from "../domain/trap";
|
|
3
|
+
import { parseEvidenceRelatedFiles, parseTrapTags } from "./trap-json-fields";
|
|
4
|
+
export type { Trap } from "../domain/trap";
|
|
5
|
+
|
|
6
|
+
export function formatTrapShort(t: Trap, scopeLabel: string): string {
|
|
7
|
+
const sev = SEVERITY_ICONS[t.severity as Severity] ?? t.severity;
|
|
8
|
+
const cat = CATEGORY_LABELS[t.category as Category] ?? t.category;
|
|
9
|
+
return `[${scopeLabel}] [${sev}] [${cat}] #${t.id} ${t.title}`;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export function formatTrapActionCard(card: TrapActionCard): string {
|
|
13
|
+
const sev = SEVERITY_ICONS[card.severity as Severity] ?? card.severity;
|
|
14
|
+
const sourceLabel = card.sources.length > 0 ? card.sources.join("+") : "unknown";
|
|
15
|
+
return `\
|
|
16
|
+
[${card.scope}] [${sev}] #${card.trap_id} ${card.title}
|
|
17
|
+
Why relevant: ${card.why_relevant}
|
|
18
|
+
Avoid: ${card.avoid}
|
|
19
|
+
Do instead: ${card.do_instead}
|
|
20
|
+
Score: ${formatScore(card.score)} (${sourceLabel})
|
|
21
|
+
Next: get_trap id=${card.next_action.details_args.id} scope=${card.next_action.details_args.scope}`;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function formatTrapDetail(t: Trap, scopeLabel: string): string {
|
|
25
|
+
const sev = SEVERITY_ICONS[t.severity as Severity] ?? t.severity;
|
|
26
|
+
const cat = CATEGORY_LABELS[t.category as Category] ?? t.category;
|
|
27
|
+
const tags = parseTrapTags(t.tags);
|
|
28
|
+
let out = `\
|
|
29
|
+
══════════════════════════════════════════
|
|
30
|
+
#${t.id} ${t.title}
|
|
31
|
+
══════════════════════════════════════════
|
|
32
|
+
Scope: ${scopeLabel} (${t.scope})
|
|
33
|
+
Severity: ${sev}
|
|
34
|
+
Category: ${cat}
|
|
35
|
+
Tags: ${tags.join(", ") || "-"}
|
|
36
|
+
Hit count: ${t.hit_count}
|
|
37
|
+
Created: ${t.created_at}
|
|
38
|
+
Updated: ${t.updated_at}
|
|
39
|
+
──────────────────────────────────────────
|
|
40
|
+
Context:
|
|
41
|
+
${t.context}
|
|
42
|
+
|
|
43
|
+
Mistake (what AI tends to do wrong):
|
|
44
|
+
${t.mistake}
|
|
45
|
+
|
|
46
|
+
Fix (what should be done instead):
|
|
47
|
+
${t.fix}`;
|
|
48
|
+
|
|
49
|
+
if (t.before_code) {
|
|
50
|
+
out += `\n\nBefore (wrong):\n${indent(t.before_code, 2)}`;
|
|
51
|
+
}
|
|
52
|
+
if (t.after_code) {
|
|
53
|
+
out += `\n\nAfter (correct):\n${indent(t.after_code, 2)}`;
|
|
54
|
+
}
|
|
55
|
+
return out;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function formatTrapDetails(details: TrapDetails): string {
|
|
59
|
+
const base = formatTrapDetail(details.trap, details.scope);
|
|
60
|
+
const lifecycle = `\
|
|
61
|
+
|
|
62
|
+
Lifecycle:
|
|
63
|
+
Status: ${details.trap.status}
|
|
64
|
+
State key: ${details.trap.state_key ?? "-"}
|
|
65
|
+
Supersedes: ${details.trap.supersedes_id ?? "-"}
|
|
66
|
+
Valid from: ${details.trap.valid_from}
|
|
67
|
+
Valid until: ${details.trap.valid_until ?? "-"}`;
|
|
68
|
+
|
|
69
|
+
if (details.evidence.length === 0) return `${base}${lifecycle}`;
|
|
70
|
+
|
|
71
|
+
return `${base}${lifecycle}
|
|
72
|
+
|
|
73
|
+
Evidence:
|
|
74
|
+
${details.evidence.map(formatEvidence).join("\n\n")}`;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function formatEvidence(evidence: TrapEvidence): string {
|
|
78
|
+
const relatedFiles = parseEvidenceRelatedFiles(evidence.related_files);
|
|
79
|
+
return `\
|
|
80
|
+
- #${evidence.id} ${evidence.source_type}
|
|
81
|
+
Observed: ${evidence.observed_at}
|
|
82
|
+
Source: ${evidence.source_ref ?? "-"}
|
|
83
|
+
Files: ${relatedFiles.join(", ") || "-"}
|
|
84
|
+
Note: ${evidence.note ?? "-"}`;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function formatScore(score: number | null): string {
|
|
88
|
+
return score === null ? "-" : score.toFixed(4);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function indent(text: string, spaces: number): string {
|
|
92
|
+
const pad = " ".repeat(spaces);
|
|
93
|
+
return text
|
|
94
|
+
.split("\n")
|
|
95
|
+
.map((l) => pad + l)
|
|
96
|
+
.join("\n");
|
|
97
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export function prepareFTSQuery(query: string): string {
|
|
2
|
+
const terms = tokenizeLiteralQuery(query);
|
|
3
|
+
if (terms.length === 0) return "";
|
|
4
|
+
return terms.map(quoteFTSTerm).join(" OR ");
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
function tokenizeLiteralQuery(query: string): string[] {
|
|
8
|
+
return query
|
|
9
|
+
.trim()
|
|
10
|
+
.split(/\s+/)
|
|
11
|
+
.map((term) => term.trim())
|
|
12
|
+
.filter(Boolean);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function quoteFTSTerm(term: string): string {
|
|
16
|
+
return `"${term.replaceAll('"', '""')}"`;
|
|
17
|
+
}
|
package/src/lib/scope.ts
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { existsSync, mkdirSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
import { CODETRAP_DIR, TRAPS_DB_FILE } from "./constants";
|
|
5
|
+
|
|
6
|
+
export function getGlobalDir(): string {
|
|
7
|
+
const dir = join(homedir(), CODETRAP_DIR);
|
|
8
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
9
|
+
return dir;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export function getGlobalDB(): string {
|
|
13
|
+
return join(getGlobalDir(), TRAPS_DB_FILE);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function findProjectRoot(cwd: string): string | null {
|
|
17
|
+
let dir = cwd;
|
|
18
|
+
while (true) {
|
|
19
|
+
if (existsSync(join(dir, CODETRAP_DIR))) return dir;
|
|
20
|
+
const parent = join(dir, "..");
|
|
21
|
+
if (parent === dir) return null;
|
|
22
|
+
dir = parent;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function getProjectDB(root: string): string {
|
|
27
|
+
const dir = join(root, CODETRAP_DIR);
|
|
28
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
29
|
+
return join(dir, TRAPS_DB_FILE);
|
|
30
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
const CJK_RUN = /[\u3400-\u9fff\uf900-\ufaff\u3040-\u30ff\uac00-\ud7af]+/gu;
|
|
2
|
+
const ASCII_TOKEN = /[A-Za-z0-9_.$/@:-]+/g;
|
|
3
|
+
|
|
4
|
+
const SYNONYMS: Record<string, string[]> = {
|
|
5
|
+
"请求": ["http", "https", "fetch", "request", "axios"],
|
|
6
|
+
"认证": ["auth", "authentication", "login", "session"],
|
|
7
|
+
"数据库": ["db", "sql", "sqlite", "migration"],
|
|
8
|
+
"配置": ["config", "env", "environment"],
|
|
9
|
+
"缓存": ["cache", "redis"],
|
|
10
|
+
"路由": ["route", "router"],
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export type SearchTextFields = {
|
|
14
|
+
title?: string;
|
|
15
|
+
context?: string;
|
|
16
|
+
mistake?: string;
|
|
17
|
+
fix?: string;
|
|
18
|
+
tags?: string | string[];
|
|
19
|
+
before_code?: string | null;
|
|
20
|
+
after_code?: string | null;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export const SEARCH_TEXT_FIELD_NAMES = [
|
|
24
|
+
"title",
|
|
25
|
+
"context",
|
|
26
|
+
"mistake",
|
|
27
|
+
"fix",
|
|
28
|
+
"tags",
|
|
29
|
+
"before_code",
|
|
30
|
+
"after_code",
|
|
31
|
+
] as const;
|
|
32
|
+
|
|
33
|
+
export function bigramCJK(input: string): string[] {
|
|
34
|
+
const grams: string[] = [];
|
|
35
|
+
for (const run of input.matchAll(CJK_RUN)) {
|
|
36
|
+
const chars = Array.from(run[0]);
|
|
37
|
+
if (chars.length === 1) {
|
|
38
|
+
grams.push(chars[0]);
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
for (let i = 0; i < chars.length - 1; i++) {
|
|
42
|
+
grams.push(chars[i] + chars[i + 1]);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return grams;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function buildSearchText(fields: SearchTextFields): string {
|
|
49
|
+
const source = fieldsToText(fields);
|
|
50
|
+
const tokens = new Set<string>();
|
|
51
|
+
|
|
52
|
+
for (const token of source.match(ASCII_TOKEN) ?? []) {
|
|
53
|
+
tokens.add(token.toLowerCase());
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
for (const gram of bigramCJK(source)) {
|
|
57
|
+
addTokenWithSynonyms(tokens, gram);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
for (const [trigger, expansions] of Object.entries(SYNONYMS)) {
|
|
61
|
+
if (source.includes(trigger)) {
|
|
62
|
+
tokens.add(trigger);
|
|
63
|
+
for (const expansion of expansions) tokens.add(expansion);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return [...tokens].join(" ");
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export function normalizeQuery(query: string): string {
|
|
71
|
+
return buildSearchText({ title: query });
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function addTokenWithSynonyms(tokens: Set<string>, token: string): void {
|
|
75
|
+
tokens.add(token);
|
|
76
|
+
for (const expansion of SYNONYMS[token] ?? []) {
|
|
77
|
+
tokens.add(expansion);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function fieldsToText(fields: SearchTextFields): string {
|
|
82
|
+
const parts: string[] = [];
|
|
83
|
+
for (const field of SEARCH_TEXT_FIELD_NAMES) {
|
|
84
|
+
const value = fields[field];
|
|
85
|
+
if (Array.isArray(value)) {
|
|
86
|
+
parts.push(value.join(" "));
|
|
87
|
+
} else if (typeof value === "string") {
|
|
88
|
+
parts.push(value);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return parts.join(" ");
|
|
92
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import type { TrapActionCard, TrapSearchResult } from "../domain/trap";
|
|
2
|
+
import type { Scope } from "./constants";
|
|
3
|
+
|
|
4
|
+
const MAX_CARD_FIELD_LENGTH = 220;
|
|
5
|
+
|
|
6
|
+
export function toTrapActionCard(result: TrapSearchResult, scope: Scope): TrapActionCard {
|
|
7
|
+
const trap = result.trap;
|
|
8
|
+
return {
|
|
9
|
+
trap_id: trap.id,
|
|
10
|
+
scope,
|
|
11
|
+
title: trap.title,
|
|
12
|
+
why_relevant: compact(trap.context),
|
|
13
|
+
avoid: compact(trap.mistake),
|
|
14
|
+
do_instead: compact(trap.fix),
|
|
15
|
+
severity: trap.severity,
|
|
16
|
+
score: result.score ?? null,
|
|
17
|
+
sources: result.sources ?? [],
|
|
18
|
+
next_action: {
|
|
19
|
+
details_tool: "get_trap",
|
|
20
|
+
details_args: {
|
|
21
|
+
id: trap.id,
|
|
22
|
+
scope,
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function toTrapActionCards(groups: { results: TrapSearchResult[]; scope: string }[]): TrapActionCard[] {
|
|
29
|
+
return groups.flatMap((group) =>
|
|
30
|
+
group.results.map((result) => toTrapActionCard(result, group.scope as Scope))
|
|
31
|
+
);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function compact(value: string): string {
|
|
35
|
+
const normalized = value.replace(/\s+/g, " ").trim();
|
|
36
|
+
if (normalized.length <= MAX_CARD_FIELD_LENGTH) return normalized;
|
|
37
|
+
return `${normalized.slice(0, MAX_CARD_FIELD_LENGTH - 3).trimEnd()}...`;
|
|
38
|
+
}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import type { Database } from "bun:sqlite";
|
|
2
|
+
import * as embeddingQueries from "../db/embedding-queries";
|
|
3
|
+
import * as queries from "../db/queries";
|
|
4
|
+
import type { Trap, TrapSearchResult } from "../domain/trap";
|
|
5
|
+
import type { SearchMode, TrapStatus } from "./constants";
|
|
6
|
+
import {
|
|
7
|
+
cosineSimilarity,
|
|
8
|
+
EmbeddingProviderUnavailableError,
|
|
9
|
+
embeddingConfig,
|
|
10
|
+
type EmbeddingProvider,
|
|
11
|
+
} from "./embedder";
|
|
12
|
+
|
|
13
|
+
export interface SearchOptions {
|
|
14
|
+
category?: string;
|
|
15
|
+
scope?: string;
|
|
16
|
+
limit?: number;
|
|
17
|
+
mode?: SearchMode;
|
|
18
|
+
status?: TrapStatus | "all";
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface RankingConfig {
|
|
22
|
+
rrfK: number;
|
|
23
|
+
semanticMinScore: number;
|
|
24
|
+
lengthNormAnchor: number;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export const DEFAULT_RANKING_CONFIG: RankingConfig = {
|
|
28
|
+
rrfK: 60,
|
|
29
|
+
semanticMinScore: 0.3,
|
|
30
|
+
lengthNormAnchor: 500,
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const DEFAULT_LIMIT = 20;
|
|
34
|
+
|
|
35
|
+
export class SearchService {
|
|
36
|
+
constructor(
|
|
37
|
+
private readonly db: Database,
|
|
38
|
+
private readonly embedder?: EmbeddingProvider,
|
|
39
|
+
private readonly ranking: RankingConfig = DEFAULT_RANKING_CONFIG
|
|
40
|
+
) {}
|
|
41
|
+
|
|
42
|
+
async search(query: string, opts: SearchOptions = {}): Promise<TrapSearchResult[]> {
|
|
43
|
+
if (!query.trim()) return [];
|
|
44
|
+
|
|
45
|
+
const mode = opts.mode ?? "fts";
|
|
46
|
+
switch (mode) {
|
|
47
|
+
case "fts":
|
|
48
|
+
return this.ftsSearch(query, opts);
|
|
49
|
+
case "semantic":
|
|
50
|
+
return this.semanticSearch(query, opts);
|
|
51
|
+
case "hybrid":
|
|
52
|
+
return this.hybridSearch(query, opts);
|
|
53
|
+
default:
|
|
54
|
+
throw new Error(`Invalid search mode: ${mode satisfies never}`);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
ftsSearch(query: string, opts: SearchOptions = {}): TrapSearchResult[] {
|
|
59
|
+
return queries.searchTraps(this.db, query, opts).map((result) => ({
|
|
60
|
+
...result,
|
|
61
|
+
sources: ["fts"],
|
|
62
|
+
score: ftsScore(result.rank),
|
|
63
|
+
}));
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async semanticSearch(query: string, opts: SearchOptions = {}): Promise<TrapSearchResult[]> {
|
|
67
|
+
if (!this.embedder) {
|
|
68
|
+
throw new EmbeddingProviderUnavailableError();
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const [queryEmbedding] = await this.embedder.embed([query], "retrieval.query");
|
|
72
|
+
if (!queryEmbedding) return [];
|
|
73
|
+
|
|
74
|
+
const config = embeddingConfig(this.embedder);
|
|
75
|
+
const candidates = embeddingQueries.getAllFreshEmbeddings(this.db, config, {
|
|
76
|
+
category: opts.category,
|
|
77
|
+
scope: opts.scope,
|
|
78
|
+
status: opts.status,
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
return candidates
|
|
82
|
+
.map(({ trap, embedding }) => {
|
|
83
|
+
const score = cosineSimilarity(queryEmbedding, embedding);
|
|
84
|
+
return {
|
|
85
|
+
trap,
|
|
86
|
+
rank: score,
|
|
87
|
+
sources: ["semantic"] as ("semantic")[],
|
|
88
|
+
score,
|
|
89
|
+
};
|
|
90
|
+
})
|
|
91
|
+
.filter((result) => (result.score ?? 0) >= this.ranking.semanticMinScore)
|
|
92
|
+
.sort((a, b) => (b.score ?? 0) - (a.score ?? 0))
|
|
93
|
+
.slice(0, opts.limit ?? DEFAULT_LIMIT);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
async hybridSearch(query: string, opts: SearchOptions = {}): Promise<TrapSearchResult[]> {
|
|
97
|
+
const limit = opts.limit ?? DEFAULT_LIMIT;
|
|
98
|
+
const ftsResults = this.ftsSearch(query, { ...opts, limit });
|
|
99
|
+
|
|
100
|
+
try {
|
|
101
|
+
const semanticResults = await this.semanticSearch(query, { ...opts, limit });
|
|
102
|
+
if (semanticResults.length === 0) {
|
|
103
|
+
return withDiagnostics(ftsResults, {
|
|
104
|
+
code: "semantic_no_candidates",
|
|
105
|
+
message: "Hybrid search used FTS results because no fresh semantic candidates passed the score threshold.",
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
return rrfFuse(ftsResults, semanticResults, limit, this.ranking);
|
|
109
|
+
} catch (error) {
|
|
110
|
+
return withDiagnostics(ftsResults, semanticDiagnostic(error));
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export function rrfFuse(
|
|
116
|
+
ftsResults: TrapSearchResult[],
|
|
117
|
+
semanticResults: TrapSearchResult[],
|
|
118
|
+
limit = DEFAULT_LIMIT,
|
|
119
|
+
ranking: RankingConfig = DEFAULT_RANKING_CONFIG
|
|
120
|
+
): TrapSearchResult[] {
|
|
121
|
+
const byId = new Map<number, TrapSearchResult & { score: number; sources: ("fts" | "semantic")[] }>();
|
|
122
|
+
|
|
123
|
+
addRankedResults(byId, ftsResults, "fts", ranking);
|
|
124
|
+
addRankedResults(byId, semanticResults, "semantic", ranking);
|
|
125
|
+
|
|
126
|
+
return [...byId.values()]
|
|
127
|
+
.map((result) => ({
|
|
128
|
+
...result,
|
|
129
|
+
score: applyLengthNormalization(result.score, result.trap, ranking),
|
|
130
|
+
rank: applyLengthNormalization(result.score, result.trap, ranking),
|
|
131
|
+
}))
|
|
132
|
+
.sort((a, b) => (b.score ?? 0) - (a.score ?? 0))
|
|
133
|
+
.slice(0, limit);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function addRankedResults(
|
|
137
|
+
byId: Map<number, TrapSearchResult & { score: number; sources: ("fts" | "semantic")[] }>,
|
|
138
|
+
results: TrapSearchResult[],
|
|
139
|
+
source: "fts" | "semantic",
|
|
140
|
+
ranking: RankingConfig
|
|
141
|
+
): void {
|
|
142
|
+
results.forEach((result, index) => {
|
|
143
|
+
const score = 1 / (ranking.rrfK + index + 1);
|
|
144
|
+
const existing = byId.get(result.trap.id);
|
|
145
|
+
if (existing) {
|
|
146
|
+
existing.score += score;
|
|
147
|
+
if (!existing.sources.includes(source)) existing.sources.push(source);
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
byId.set(result.trap.id, {
|
|
151
|
+
...result,
|
|
152
|
+
score,
|
|
153
|
+
sources: [source],
|
|
154
|
+
});
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function applyLengthNormalization(score: number, trap: Trap, ranking: RankingConfig): number {
|
|
159
|
+
const length = `${trap.context}\n${trap.mistake}\n${trap.fix}`.length;
|
|
160
|
+
if (length <= ranking.lengthNormAnchor) return score;
|
|
161
|
+
return score * Math.sqrt(ranking.lengthNormAnchor / length);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function ftsScore(rank: number): number {
|
|
165
|
+
return Number.isFinite(rank) ? -rank : 0;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function withDiagnostics(
|
|
169
|
+
results: TrapSearchResult[],
|
|
170
|
+
diagnostic: { code: string; message: string }
|
|
171
|
+
): TrapSearchResult[] {
|
|
172
|
+
return results.map((result) => ({
|
|
173
|
+
...result,
|
|
174
|
+
diagnostics: [...(result.diagnostics ?? []), diagnostic],
|
|
175
|
+
}));
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function semanticDiagnostic(error: unknown): { code: string; message: string } {
|
|
179
|
+
if (error instanceof EmbeddingProviderUnavailableError) {
|
|
180
|
+
return {
|
|
181
|
+
code: "semantic_unavailable",
|
|
182
|
+
message: error.message,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
return {
|
|
186
|
+
code: "semantic_failed",
|
|
187
|
+
message: error instanceof Error ? error.message : "Semantic search failed; hybrid search returned FTS results.",
|
|
188
|
+
};
|
|
189
|
+
}
|