opencode-semantic-search 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +165 -0
- package/README.md +138 -0
- package/SETUP.md +541 -0
- package/bin/opencode-semantic-search.mjs +70 -0
- package/bun.lock +61 -0
- package/index.ts +138 -0
- package/install.sh +260 -0
- package/package.json +67 -0
- package/src/chunker/fallback.ts +77 -0
- package/src/chunker/index.ts +16 -0
- package/src/chunker/treesitter.ts +119 -0
- package/src/config.ts +157 -0
- package/src/diagnostics/bundle.ts +63 -0
- package/src/diagnostics/routing.ts +37 -0
- package/src/embedder/interface.ts +62 -0
- package/src/embedder/ollama.ts +60 -0
- package/src/embedder/openai.ts +71 -0
- package/src/indexer/delta.ts +165 -0
- package/src/indexer/gc.ts +10 -0
- package/src/indexer/incremental.ts +105 -0
- package/src/indexer/pipeline.test.ts +126 -0
- package/src/indexer/pipeline.ts +394 -0
- package/src/indexer/pool.ts +25 -0
- package/src/indexer/resume.ts +14 -0
- package/src/logger.ts +121 -0
- package/src/runtime.ts +111 -0
- package/src/search/context.ts +17 -0
- package/src/search/hybrid.ts +65 -0
- package/src/store/schema.sql +31 -0
- package/src/store/sqlite.ts +269 -0
- package/src/tools/diagnostic_bundle.ts +34 -0
- package/src/tools/index_status.ts +73 -0
- package/src/tools/reindex.ts +71 -0
- package/src/tools/semantic_search.ts +91 -0
- package/src/tools/smart_grep.ts +198 -0
- package/src/tui_toast.ts +191 -0
- package/src/types.d.ts +1 -0
package/src/logger.ts
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { appendFileSync } from "fs";
|
|
2
|
+
|
|
3
|
+
export type LogLevel = "debug" | "info" | "warn" | "error";
|
|
4
|
+
|
|
5
|
+
interface AppLogClient {
|
|
6
|
+
app?: {
|
|
7
|
+
log?: (entry: {
|
|
8
|
+
body: {
|
|
9
|
+
service: string;
|
|
10
|
+
level: LogLevel;
|
|
11
|
+
message: string;
|
|
12
|
+
extra?: Record<string, unknown>;
|
|
13
|
+
};
|
|
14
|
+
}) => Promise<unknown>;
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
interface LoggerOptions {
|
|
19
|
+
enabled: boolean;
|
|
20
|
+
level: LogLevel;
|
|
21
|
+
verbosePaths?: string[];
|
|
22
|
+
client?: AppLogClient;
|
|
23
|
+
/** Absolute path to a local log file. When set, all log entries are appended as newline-delimited JSON (in addition to any other sink). Useful for local testing without the OpenCode host. */
|
|
24
|
+
logFile?: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
interface LogPayload {
|
|
28
|
+
message: string;
|
|
29
|
+
extra?: Record<string, unknown>;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const levelWeight: Record<LogLevel, number> = {
|
|
33
|
+
debug: 10,
|
|
34
|
+
info: 20,
|
|
35
|
+
warn: 30,
|
|
36
|
+
error: 40
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
function normalizeError(error: unknown): { name?: string; message: string; stack?: string } {
|
|
40
|
+
if (error instanceof Error) {
|
|
41
|
+
return {
|
|
42
|
+
name: error.name,
|
|
43
|
+
message: error.message,
|
|
44
|
+
stack: error.stack
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
if (typeof error === "string") return { message: error };
|
|
48
|
+
return { message: "Unknown error", stack: JSON.stringify(error) };
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function pickPath(extra?: Record<string, unknown>): string | undefined {
|
|
52
|
+
const candidate = extra?.filePath ?? extra?.path;
|
|
53
|
+
return typeof candidate === "string" ? candidate : undefined;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export interface Logger {
|
|
57
|
+
debug(scope: string, payload: LogPayload): Promise<void>;
|
|
58
|
+
info(scope: string, payload: LogPayload): Promise<void>;
|
|
59
|
+
warn(scope: string, payload: LogPayload): Promise<void>;
|
|
60
|
+
error(scope: string, payload: LogPayload & { error?: unknown }): Promise<void>;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function createLogger(options: LoggerOptions): Logger {
|
|
64
|
+
const verbosePaths = options.verbosePaths ?? [];
|
|
65
|
+
const appRef = options.client?.app;
|
|
66
|
+
const emitAppLog = appRef?.log?.bind(appRef);
|
|
67
|
+
const logFile = options.logFile ?? null;
|
|
68
|
+
|
|
69
|
+
function shouldLog(level: LogLevel, extra?: Record<string, unknown>): boolean {
|
|
70
|
+
if (!options.enabled) return false;
|
|
71
|
+
if (levelWeight[level] < levelWeight[options.level]) return false;
|
|
72
|
+
if (level !== "debug" || verbosePaths.length === 0) return true;
|
|
73
|
+
const path = pickPath(extra);
|
|
74
|
+
if (!path) return true;
|
|
75
|
+
return verbosePaths.some((prefix) => path.startsWith(prefix));
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async function emit(level: LogLevel, scope: string, payload: LogPayload & { error?: unknown }): Promise<void> {
|
|
79
|
+
const errorDetails = payload.error ? normalizeError(payload.error) : undefined;
|
|
80
|
+
const extra = {
|
|
81
|
+
scope,
|
|
82
|
+
...(payload.extra ?? {}),
|
|
83
|
+
...(errorDetails ? { error: errorDetails } : {})
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
if (!shouldLog(level, extra)) return;
|
|
87
|
+
|
|
88
|
+
const message = `[${scope}] ${payload.message}`;
|
|
89
|
+
const entry = { ts: new Date().toISOString(), service: "opencode-semantic-search", level, message, extra };
|
|
90
|
+
|
|
91
|
+
if (logFile) {
|
|
92
|
+
try {
|
|
93
|
+
appendFileSync(logFile, JSON.stringify(entry) + "\n");
|
|
94
|
+
} catch {
|
|
95
|
+
// Ignore file-write errors so a bad path never crashes the plugin.
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (emitAppLog) {
|
|
100
|
+
await emitAppLog({ body: { service: entry.service, level, message, extra } });
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const sink = level === "error" ? console.error : level === "warn" ? console.warn : console.log;
|
|
105
|
+
sink(JSON.stringify(entry));
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return {
|
|
109
|
+
debug: (scope, payload) => emit("debug", scope, payload),
|
|
110
|
+
info: (scope, payload) => emit("info", scope, payload),
|
|
111
|
+
warn: (scope, payload) => emit("warn", scope, payload),
|
|
112
|
+
error: (scope, payload) => emit("error", scope, payload)
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export const noopLogger: Logger = {
|
|
117
|
+
async debug() {},
|
|
118
|
+
async info() {},
|
|
119
|
+
async warn() {},
|
|
120
|
+
async error() {}
|
|
121
|
+
};
|
package/src/runtime.ts
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import crypto from "node:crypto";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import type { OpencodeClient } from "@opencode-ai/sdk";
|
|
4
|
+
import type { PluginConfig } from "./config";
|
|
5
|
+
import { createEmbedder, type Embedder } from "./embedder/interface";
|
|
6
|
+
import type { Logger } from "./logger";
|
|
7
|
+
import { SemanticStore } from "./store/sqlite";
|
|
8
|
+
import { notifyIndexingProgressToast, resetIndexingToastThrottle } from "./tui_toast";
|
|
9
|
+
|
|
10
|
+
export type IndexingPhase = "idle" | "scanning" | "indexing" | "gc";
|
|
11
|
+
|
|
12
|
+
export interface IndexingProgressState {
|
|
13
|
+
phase: IndexingPhase;
|
|
14
|
+
current: number;
|
|
15
|
+
total: number;
|
|
16
|
+
startedAt: number;
|
|
17
|
+
label?: string;
|
|
18
|
+
source?: "background" | "reindex";
|
|
19
|
+
/** Paths that failed during the last indexing run (when reported on idle). */
|
|
20
|
+
failedFiles?: string[];
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface DeltaProgressPayload {
|
|
24
|
+
phase: IndexingPhase;
|
|
25
|
+
current: number;
|
|
26
|
+
total: number;
|
|
27
|
+
label?: string;
|
|
28
|
+
failedFiles?: string[];
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface RuntimeContext {
|
|
32
|
+
worktree: string;
|
|
33
|
+
dbPath: string;
|
|
34
|
+
config: PluginConfig;
|
|
35
|
+
store: SemanticStore;
|
|
36
|
+
embedder: Embedder;
|
|
37
|
+
logger: Logger;
|
|
38
|
+
indexingLock: boolean;
|
|
39
|
+
indexingProgress: IndexingProgressState;
|
|
40
|
+
/** Set by the OpenCode plugin host — enables `src/tui_toast.ts` showTuiToast. */
|
|
41
|
+
opencodeClient?: OpencodeClient;
|
|
42
|
+
/** OpenCode project `directory` — used as TUI API `query.directory` when showing toasts. */
|
|
43
|
+
projectDirectory?: string;
|
|
44
|
+
/** Throttle state for `notifyIndexingProgressToast` in `src/tui_toast.ts`. */
|
|
45
|
+
indexingToastLastMs?: number;
|
|
46
|
+
indexingToastLastPhase?: IndexingPhase;
|
|
47
|
+
indexingToastLastLabel?: string;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function createIdleIndexingProgress(): IndexingProgressState {
|
|
51
|
+
return { phase: "idle", current: 0, total: 0, startedAt: Date.now() };
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function applyDeltaProgress(
|
|
55
|
+
runtime: RuntimeContext,
|
|
56
|
+
payload: DeltaProgressPayload,
|
|
57
|
+
source: "background" | "reindex"
|
|
58
|
+
): void {
|
|
59
|
+
const prev = runtime.indexingProgress;
|
|
60
|
+
const startedAt =
|
|
61
|
+
payload.phase === "scanning" && prev.phase === "idle" ? Date.now() : prev.startedAt;
|
|
62
|
+
runtime.indexingProgress = {
|
|
63
|
+
phase: payload.phase,
|
|
64
|
+
current: payload.current,
|
|
65
|
+
total: payload.total,
|
|
66
|
+
startedAt,
|
|
67
|
+
label: payload.label,
|
|
68
|
+
source: payload.phase === "idle" ? undefined : source,
|
|
69
|
+
failedFiles: payload.failedFiles,
|
|
70
|
+
};
|
|
71
|
+
if (payload.phase === "idle") {
|
|
72
|
+
resetIndexingToastThrottle(runtime);
|
|
73
|
+
} else {
|
|
74
|
+
notifyIndexingProgressToast(runtime);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function setIncrementalIndexingProgress(runtime: RuntimeContext, filePath: string): void {
|
|
79
|
+
runtime.indexingProgress = {
|
|
80
|
+
phase: "indexing",
|
|
81
|
+
current: 1,
|
|
82
|
+
total: 1,
|
|
83
|
+
startedAt: Date.now(),
|
|
84
|
+
label: path.basename(filePath),
|
|
85
|
+
source: "background",
|
|
86
|
+
};
|
|
87
|
+
notifyIndexingProgressToast(runtime);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export function clearIndexingProgress(runtime: RuntimeContext): void {
|
|
91
|
+
runtime.indexingProgress = createIdleIndexingProgress();
|
|
92
|
+
resetIndexingToastThrottle(runtime);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export async function createRuntime(worktree: string, config: PluginConfig, logger: Logger): Promise<RuntimeContext> {
|
|
96
|
+
const digest = crypto.createHash("sha256").update(worktree).digest("hex").slice(0, 16);
|
|
97
|
+
const dbPath = path.join(config.storage.global_cache_dir, digest, "embeddings.db");
|
|
98
|
+
const schemaFile = path.join(path.dirname(new URL(import.meta.url).pathname), "store", "schema.sql");
|
|
99
|
+
const store = new SemanticStore(dbPath, schemaFile, logger);
|
|
100
|
+
const embedder = await createEmbedder(config, logger);
|
|
101
|
+
return {
|
|
102
|
+
worktree,
|
|
103
|
+
dbPath,
|
|
104
|
+
config,
|
|
105
|
+
store,
|
|
106
|
+
embedder,
|
|
107
|
+
logger,
|
|
108
|
+
indexingLock: false,
|
|
109
|
+
indexingProgress: createIdleIndexingProgress(),
|
|
110
|
+
};
|
|
111
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export interface ContextSlice {
|
|
2
|
+
startLine: number;
|
|
3
|
+
endLine: number;
|
|
4
|
+
preview: string;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export function buildContextPreview(fullText: string, startLine: number, endLine: number, contextLines: number): ContextSlice {
|
|
8
|
+
const lines = fullText.split("\n");
|
|
9
|
+
const from = Math.max(1, startLine - contextLines);
|
|
10
|
+
const to = Math.min(lines.length, endLine + contextLines);
|
|
11
|
+
const preview = lines.slice(from - 1, to).join("\n");
|
|
12
|
+
return {
|
|
13
|
+
startLine: from,
|
|
14
|
+
endLine: to,
|
|
15
|
+
preview,
|
|
16
|
+
};
|
|
17
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import type { PluginConfig } from "../config";
|
|
2
|
+
import { withQueryExpansion, type Embedder } from "../embedder/interface";
|
|
3
|
+
import type { SearchRow, SemanticStore } from "../store/sqlite";
|
|
4
|
+
|
|
5
|
+
export interface RankedResult {
|
|
6
|
+
file: string;
|
|
7
|
+
startLine: number;
|
|
8
|
+
endLine: number;
|
|
9
|
+
score: number;
|
|
10
|
+
vectorScore: number;
|
|
11
|
+
bm25Score: number;
|
|
12
|
+
preview: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function identifierBoost(query: string, text: string, base: number, factor: number): number {
|
|
16
|
+
const tokens = query
|
|
17
|
+
.split(/\W+/)
|
|
18
|
+
.map((t) => t.trim())
|
|
19
|
+
.filter(Boolean);
|
|
20
|
+
const hasIdentifier = tokens.some((token) => text.includes(token));
|
|
21
|
+
return hasIdentifier ? base * factor : base;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function rankRow(row: SearchRow, query: string, config: PluginConfig): number {
|
|
25
|
+
const score =
|
|
26
|
+
config.search.hybrid.weight_vector * row.vectorScore +
|
|
27
|
+
config.search.hybrid.weight_bm25 * row.bm25Score +
|
|
28
|
+
config.search.hybrid.weight_rrf * (1 / (1 + row.vectorScore + row.bm25Score));
|
|
29
|
+
return identifierBoost(query, row.text, score, config.search.hybrid.identifier_boost);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export async function hybridSearch(
|
|
33
|
+
store: SemanticStore,
|
|
34
|
+
embedder: Embedder,
|
|
35
|
+
query: string,
|
|
36
|
+
topK: number,
|
|
37
|
+
config: PluginConfig,
|
|
38
|
+
): Promise<RankedResult[]> {
|
|
39
|
+
const [queryEmbedding] = await embedder.embed([withQueryExpansion(query)]);
|
|
40
|
+
const multiplier = config.search.hybrid.candidate_multiplier ?? 1.5;
|
|
41
|
+
const rows = store.searchHybrid(queryEmbedding ?? [], query, Math.ceil(topK * multiplier));
|
|
42
|
+
const sorted = rows
|
|
43
|
+
.map((row) => {
|
|
44
|
+
const score = rankRow(row, query, config);
|
|
45
|
+
return {
|
|
46
|
+
file: row.filePath,
|
|
47
|
+
startLine: row.startLine,
|
|
48
|
+
endLine: row.endLine,
|
|
49
|
+
score,
|
|
50
|
+
vectorScore: row.vectorScore,
|
|
51
|
+
bm25Score: row.bm25Score,
|
|
52
|
+
preview: row.text,
|
|
53
|
+
};
|
|
54
|
+
})
|
|
55
|
+
.filter((row) => row.score >= config.search.similarity_threshold)
|
|
56
|
+
.sort((a, b) => b.score - a.score);
|
|
57
|
+
const seen = new Set<string>();
|
|
58
|
+
const deduped = sorted.filter((r) => {
|
|
59
|
+
const key = r.preview.trim().slice(0, 120);
|
|
60
|
+
if (seen.has(key)) return false;
|
|
61
|
+
seen.add(key);
|
|
62
|
+
return true;
|
|
63
|
+
});
|
|
64
|
+
return deduped.slice(0, topK);
|
|
65
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
2
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
3
|
+
path TEXT NOT NULL UNIQUE,
|
|
4
|
+
content_hash TEXT NOT NULL,
|
|
5
|
+
indexed_at INTEGER NOT NULL
|
|
6
|
+
);
|
|
7
|
+
|
|
8
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
9
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
10
|
+
file_id INTEGER NOT NULL,
|
|
11
|
+
start_line INTEGER NOT NULL,
|
|
12
|
+
end_line INTEGER NOT NULL,
|
|
13
|
+
text TEXT NOT NULL,
|
|
14
|
+
chunk_hash TEXT NOT NULL UNIQUE,
|
|
15
|
+
embedding TEXT NOT NULL,
|
|
16
|
+
FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE
|
|
17
|
+
);
|
|
18
|
+
|
|
19
|
+
CREATE TABLE IF NOT EXISTS sync_state (
|
|
20
|
+
key TEXT PRIMARY KEY,
|
|
21
|
+
value TEXT NOT NULL
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
25
|
+
text,
|
|
26
|
+
chunk_id UNINDEXED,
|
|
27
|
+
tokenize='porter unicode61'
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_file_id ON chunks(file_id);
|
|
31
|
+
CREATE INDEX IF NOT EXISTS idx_files_indexed_at ON files(indexed_at);
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
import { Database } from "bun:sqlite";
|
|
2
|
+
import { load as loadSqliteVec } from "sqlite-vec";
|
|
3
|
+
import crypto from "node:crypto";
|
|
4
|
+
import fs from "node:fs";
|
|
5
|
+
import path from "node:path";
|
|
6
|
+
import type { Logger } from "../logger";
|
|
7
|
+
|
|
8
|
+
export interface SearchRow {
|
|
9
|
+
chunkId: number;
|
|
10
|
+
filePath: string;
|
|
11
|
+
startLine: number;
|
|
12
|
+
endLine: number;
|
|
13
|
+
text: string;
|
|
14
|
+
vectorScore: number;
|
|
15
|
+
bm25Score: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function projectHash(worktree: string): string {
|
|
19
|
+
return crypto.createHash("sha256").update(worktree).digest("hex").slice(0, 16);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function loadSchema(db: Database, schemaFile: string): void {
|
|
23
|
+
const sql = fs.readFileSync(schemaFile, "utf8");
|
|
24
|
+
db.exec(sql);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Split camelCase and separators so FTS5 can match substrings like getUser / profile. */
|
|
28
|
+
function expandCodeIdentifiers(text: string): string {
|
|
29
|
+
const split = text
|
|
30
|
+
.replace(/([a-z])([A-Z])/g, "$1 $2")
|
|
31
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2")
|
|
32
|
+
.replace(/[_\-./]/g, " ");
|
|
33
|
+
return `${text} ${split}`.toLowerCase();
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const ftsSchemaVersionKey = "fts_schema_version";
|
|
37
|
+
|
|
38
|
+
export class SemanticStore {
|
|
39
|
+
public readonly db: Database;
|
|
40
|
+
private readonly logger?: Logger;
|
|
41
|
+
|
|
42
|
+
constructor(dbPath: string, schemaFile: string, logger?: Logger) {
|
|
43
|
+
this.logger = logger;
|
|
44
|
+
try {
|
|
45
|
+
fs.mkdirSync(path.dirname(dbPath), { recursive: true });
|
|
46
|
+
this.db = new Database(dbPath, { create: true });
|
|
47
|
+
this.db.exec("PRAGMA journal_mode=WAL;");
|
|
48
|
+
this.db.exec("PRAGMA busy_timeout=5000;");
|
|
49
|
+
this.db.exec("PRAGMA foreign_keys=ON;");
|
|
50
|
+
loadSqliteVec(this.db as never);
|
|
51
|
+
loadSchema(this.db, schemaFile);
|
|
52
|
+
this.migrateFtsSchemaIfNeeded();
|
|
53
|
+
this.db.exec("CREATE VIRTUAL TABLE IF NOT EXISTS chunk_vec USING vec0(chunk_id INTEGER, embedding FLOAT[768]);");
|
|
54
|
+
void this.logger?.info("store.sqlite", {
|
|
55
|
+
message: "Database initialized",
|
|
56
|
+
extra: { dbPath }
|
|
57
|
+
});
|
|
58
|
+
} catch (error) {
|
|
59
|
+
void this.logger?.error("store.sqlite", {
|
|
60
|
+
message: "Database open/init failed",
|
|
61
|
+
extra: { dbPath, schemaFile },
|
|
62
|
+
error
|
|
63
|
+
});
|
|
64
|
+
throw error;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
close(): void {
|
|
69
|
+
this.db.close();
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Rebuild FTS5 when tokenizer changes (porter unicode61 + expanded identifier text). */
|
|
73
|
+
private migrateFtsSchemaIfNeeded(): void {
|
|
74
|
+
if (this.getSyncState(ftsSchemaVersionKey) === "2") return;
|
|
75
|
+
const tx = this.db.transaction(() => {
|
|
76
|
+
this.db.exec("DROP TABLE IF EXISTS chunks_fts");
|
|
77
|
+
this.db.exec(`
|
|
78
|
+
CREATE VIRTUAL TABLE chunks_fts USING fts5(
|
|
79
|
+
text,
|
|
80
|
+
chunk_id UNINDEXED,
|
|
81
|
+
tokenize='porter unicode61'
|
|
82
|
+
);
|
|
83
|
+
`);
|
|
84
|
+
const rows = this.db.query("SELECT id, text FROM chunks").all() as Array<{ id: number; text: string }>;
|
|
85
|
+
const insert = this.db.query("INSERT INTO chunks_fts(rowid, text, chunk_id) VALUES(?, ?, ?)");
|
|
86
|
+
for (const row of rows) {
|
|
87
|
+
insert.run(row.id, expandCodeIdentifiers(row.text), row.id);
|
|
88
|
+
}
|
|
89
|
+
this.setSyncState(ftsSchemaVersionKey, "2");
|
|
90
|
+
});
|
|
91
|
+
tx();
|
|
92
|
+
void this.logger?.info("store.sqlite", {
|
|
93
|
+
message: "chunks_fts migrated to schema v2 (porter unicode61 + identifier expansion)",
|
|
94
|
+
extra: { chunkRows: this.db.query("SELECT COUNT(*) AS c FROM chunks").get() as { c: number } }
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
getSyncState(key: string): string | null {
|
|
99
|
+
const row = this.db.query("SELECT value FROM sync_state WHERE key = ?").get(key) as { value: string } | null;
|
|
100
|
+
return row?.value ?? null;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
setSyncState(key: string, value: string): void {
|
|
104
|
+
this.db.query("INSERT INTO sync_state(key, value) VALUES(?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value").run(key, value);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
upsertFile(filePath: string, contentHash: string): number {
|
|
108
|
+
const now = Date.now();
|
|
109
|
+
this.db.query(
|
|
110
|
+
"INSERT INTO files(path, content_hash, indexed_at) VALUES(?, ?, ?) ON CONFLICT(path) DO UPDATE SET content_hash=excluded.content_hash, indexed_at=excluded.indexed_at",
|
|
111
|
+
).run(filePath, contentHash, now);
|
|
112
|
+
const row = this.db.query("SELECT id FROM files WHERE path = ?").get(filePath) as { id: number };
|
|
113
|
+
return row.id;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
removeMissingFiles(existing: Set<string>): void {
|
|
117
|
+
const rows = this.db.query("SELECT path FROM files").all() as Array<{ path: string }>;
|
|
118
|
+
for (const row of rows) {
|
|
119
|
+
if (existing.has(row.path)) continue;
|
|
120
|
+
this.db.query("DELETE FROM files WHERE path = ?").run(row.path);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
clearChunksForFile(fileId: number): void {
|
|
125
|
+
const chunkRows = this.db.query("SELECT id FROM chunks WHERE file_id = ?").all(fileId) as Array<{ id: number }>;
|
|
126
|
+
for (const row of chunkRows) {
|
|
127
|
+
this.db.query("DELETE FROM chunks_fts WHERE chunk_id = ?").run(row.id);
|
|
128
|
+
this.db.query("DELETE FROM chunk_vec WHERE chunk_id = ?").run(row.id);
|
|
129
|
+
}
|
|
130
|
+
this.db.query("DELETE FROM chunks WHERE file_id = ?").run(fileId);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Replace all chunks for a file atomically: clear existing rows then insert new ones.
|
|
135
|
+
* Use for indexing so concurrent workers do not interleave deletes/inserts.
|
|
136
|
+
*/
|
|
137
|
+
writeFileChunks(
|
|
138
|
+
fileId: number,
|
|
139
|
+
writes: Array<{ startLine: number; endLine: number; text: string; chunkHash: string; embedding: number[] }>
|
|
140
|
+
): void {
|
|
141
|
+
const tx = this.db.transaction(() => {
|
|
142
|
+
this.clearChunksForFile(fileId);
|
|
143
|
+
for (const w of writes) {
|
|
144
|
+
this.insertChunk(fileId, w.startLine, w.endLine, w.text, w.chunkHash, w.embedding);
|
|
145
|
+
}
|
|
146
|
+
});
|
|
147
|
+
tx();
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Upsert the file record AND replace all its chunks in one atomic transaction.
|
|
152
|
+
* Prevents the atomicity gap where `upsertFile` commits a new content_hash before
|
|
153
|
+
* `writeFileChunks` succeeds — which would permanently hide the file from future
|
|
154
|
+
* delta scans if the chunk write later fails.
|
|
155
|
+
*/
|
|
156
|
+
writeFileAndChunks(
|
|
157
|
+
filePath: string,
|
|
158
|
+
contentHash: string,
|
|
159
|
+
writes: Array<{ startLine: number; endLine: number; text: string; chunkHash: string; embedding: number[] }>
|
|
160
|
+
): number {
|
|
161
|
+
let fileId = 0;
|
|
162
|
+
const tx = this.db.transaction(() => {
|
|
163
|
+
fileId = this.upsertFile(filePath, contentHash);
|
|
164
|
+
this.clearChunksForFile(fileId);
|
|
165
|
+
for (const w of writes) {
|
|
166
|
+
this.insertChunk(fileId, w.startLine, w.endLine, w.text, w.chunkHash, w.embedding);
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
tx();
|
|
170
|
+
return fileId;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
insertChunk(fileId: number, startLine: number, endLine: number, text: string, chunkHash: string, embedding: number[]): number {
|
|
174
|
+
this.db
|
|
175
|
+
.query("INSERT OR IGNORE INTO chunks(file_id, start_line, end_line, text, chunk_hash, embedding) VALUES(?, ?, ?, ?, ?, ?)")
|
|
176
|
+
.run(fileId, startLine, endLine, text, chunkHash, JSON.stringify(embedding));
|
|
177
|
+
const row = this.db.query("SELECT id FROM chunks WHERE chunk_hash = ?").get(chunkHash) as { id: number };
|
|
178
|
+
const vecLiteral = JSON.stringify(embedding);
|
|
179
|
+
this.db.query("INSERT OR REPLACE INTO chunk_vec(chunk_id, embedding) VALUES(?, ?)").run(row.id, vecLiteral);
|
|
180
|
+
this.db.query("INSERT OR REPLACE INTO chunks_fts(rowid, text, chunk_id) VALUES(?, ?, ?)").run(
|
|
181
|
+
row.id,
|
|
182
|
+
expandCodeIdentifiers(text),
|
|
183
|
+
row.id,
|
|
184
|
+
);
|
|
185
|
+
return row.id;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
getChunkByHash(chunkHash: string): { embedding: number[] } | null {
|
|
189
|
+
const row = this.db.query("SELECT embedding FROM chunks WHERE chunk_hash = ?").get(chunkHash) as { embedding: string } | null;
|
|
190
|
+
if (!row) return null;
|
|
191
|
+
return { embedding: JSON.parse(row.embedding) as number[] };
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
stats(): { files: number; chunks: number; lastSync: string | null } {
|
|
195
|
+
const files = (this.db.query("SELECT COUNT(*) AS c FROM files").get() as { c: number }).c;
|
|
196
|
+
const chunks = (this.db.query("SELECT COUNT(*) AS c FROM chunks").get() as { c: number }).c;
|
|
197
|
+
const lastSync = this.getSyncState("last_sync");
|
|
198
|
+
return { files, chunks, lastSync };
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
searchHybrid(queryEmbedding: number[], queryText: string, topK: number): SearchRow[] {
|
|
202
|
+
const vecRows = this.db
|
|
203
|
+
.query(
|
|
204
|
+
`
|
|
205
|
+
SELECT c.id as chunkId, f.path as filePath, c.start_line as startLine, c.end_line as endLine, c.text as text,
|
|
206
|
+
(1.0 / (1.0 + distance)) as vectorScore
|
|
207
|
+
FROM chunk_vec v
|
|
208
|
+
JOIN chunks c ON c.id = v.chunk_id
|
|
209
|
+
JOIN files f ON f.id = c.file_id
|
|
210
|
+
WHERE v.embedding MATCH ?
|
|
211
|
+
AND v.k = ?
|
|
212
|
+
ORDER BY distance
|
|
213
|
+
`,
|
|
214
|
+
)
|
|
215
|
+
.all(JSON.stringify(queryEmbedding), topK) as Array<Omit<SearchRow, "bm25Score">>;
|
|
216
|
+
|
|
217
|
+
const sanitizedBm25Query = queryText
|
|
218
|
+
.match(/[a-zA-Z0-9_]{2,}/g)
|
|
219
|
+
?.join(" OR ");
|
|
220
|
+
|
|
221
|
+
const bm25Rows = sanitizedBm25Query
|
|
222
|
+
? (this.db
|
|
223
|
+
.query(
|
|
224
|
+
`
|
|
225
|
+
SELECT c.id as chunkId, f.path as filePath, c.start_line as startLine, c.end_line as endLine, c.text as text,
|
|
226
|
+
bm25(chunks_fts) as score
|
|
227
|
+
FROM chunks_fts
|
|
228
|
+
JOIN chunks c ON c.id = chunks_fts.chunk_id
|
|
229
|
+
JOIN files f ON f.id = c.file_id
|
|
230
|
+
WHERE chunks_fts MATCH ?
|
|
231
|
+
ORDER BY score
|
|
232
|
+
LIMIT ?
|
|
233
|
+
`,
|
|
234
|
+
)
|
|
235
|
+
.all(sanitizedBm25Query, topK) as Array<{
|
|
236
|
+
chunkId: number;
|
|
237
|
+
filePath: string;
|
|
238
|
+
startLine: number;
|
|
239
|
+
endLine: number;
|
|
240
|
+
text: string;
|
|
241
|
+
score: number;
|
|
242
|
+
}>)
|
|
243
|
+
: [];
|
|
244
|
+
|
|
245
|
+
const maxBm25 = Math.max(...bm25Rows.map((r) => -r.score));
|
|
246
|
+
const safeMax = maxBm25 <= 0 || !isFinite(maxBm25) ? 1 : maxBm25;
|
|
247
|
+
|
|
248
|
+
const merged = new Map<number, SearchRow>();
|
|
249
|
+
for (const row of vecRows) merged.set(row.chunkId, { ...row, bm25Score: 0 });
|
|
250
|
+
for (const row of bm25Rows) {
|
|
251
|
+
const existing = merged.get(row.chunkId);
|
|
252
|
+
const normalizedBm25 = (-row.score) / safeMax;
|
|
253
|
+
if (!existing) {
|
|
254
|
+
merged.set(row.chunkId, {
|
|
255
|
+
chunkId: row.chunkId,
|
|
256
|
+
filePath: row.filePath,
|
|
257
|
+
startLine: row.startLine,
|
|
258
|
+
endLine: row.endLine,
|
|
259
|
+
text: row.text,
|
|
260
|
+
vectorScore: 0,
|
|
261
|
+
bm25Score: normalizedBm25,
|
|
262
|
+
});
|
|
263
|
+
} else {
|
|
264
|
+
existing.bm25Score = normalizedBm25;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
return [...merged.values()];
|
|
268
|
+
}
|
|
269
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { ToolContext } from "@opencode-ai/plugin";
|
|
2
|
+
import { tool } from "@opencode-ai/plugin";
|
|
3
|
+
import { buildDiagnosticBundle } from "../diagnostics/bundle";
|
|
4
|
+
import type { RuntimeContext } from "../runtime";
|
|
5
|
+
|
|
6
|
+
export async function executeDiagnosticBundle(
|
|
7
|
+
runtime: RuntimeContext,
|
|
8
|
+
ctx?: Pick<ToolContext, "metadata">
|
|
9
|
+
): Promise<string> {
|
|
10
|
+
const startedAt = Date.now();
|
|
11
|
+
ctx?.metadata?.({ title: "diagnostic_bundle", metadata: { phase: "collecting" } });
|
|
12
|
+
const bundle = await buildDiagnosticBundle(runtime);
|
|
13
|
+
const elapsedMs = Date.now() - startedAt;
|
|
14
|
+
await runtime.logger.debug("tool.diagnostic_bundle", {
|
|
15
|
+
message: "Diagnostic bundle requested",
|
|
16
|
+
extra: { filesIndexed: bundle.index.files_indexed, chunks: bundle.index.chunks_indexed },
|
|
17
|
+
});
|
|
18
|
+
ctx?.metadata?.({
|
|
19
|
+
title: "diagnostic_bundle",
|
|
20
|
+
metadata: { phase: "done", elapsed_ms: elapsedMs, chunks: bundle.index.chunks_indexed },
|
|
21
|
+
});
|
|
22
|
+
return JSON.stringify(bundle, null, 2);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function createDiagnosticBundleTool(runtime: RuntimeContext) {
|
|
26
|
+
return tool({
|
|
27
|
+
description:
|
|
28
|
+
"Export a JSON diagnostic bundle for support and debugging: embedding provider health, index stats, DB path, and recent smart-grep routing outcomes. Same data as `bun run diagnostic:bundle` from the plugin repo.",
|
|
29
|
+
args: {},
|
|
30
|
+
async execute(_args, toolCtx) {
|
|
31
|
+
return executeDiagnosticBundle(runtime, toolCtx);
|
|
32
|
+
},
|
|
33
|
+
});
|
|
34
|
+
}
|