@gmickel/gno 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +256 -0
- package/assets/skill/SKILL.md +112 -0
- package/assets/skill/cli-reference.md +327 -0
- package/assets/skill/examples.md +234 -0
- package/assets/skill/mcp-reference.md +159 -0
- package/package.json +90 -0
- package/src/app/constants.ts +313 -0
- package/src/cli/colors.ts +65 -0
- package/src/cli/commands/ask.ts +545 -0
- package/src/cli/commands/cleanup.ts +105 -0
- package/src/cli/commands/collection/add.ts +120 -0
- package/src/cli/commands/collection/index.ts +10 -0
- package/src/cli/commands/collection/list.ts +108 -0
- package/src/cli/commands/collection/remove.ts +64 -0
- package/src/cli/commands/collection/rename.ts +95 -0
- package/src/cli/commands/context/add.ts +67 -0
- package/src/cli/commands/context/check.ts +153 -0
- package/src/cli/commands/context/index.ts +10 -0
- package/src/cli/commands/context/list.ts +109 -0
- package/src/cli/commands/context/rm.ts +52 -0
- package/src/cli/commands/doctor.ts +393 -0
- package/src/cli/commands/embed.ts +462 -0
- package/src/cli/commands/get.ts +356 -0
- package/src/cli/commands/index-cmd.ts +119 -0
- package/src/cli/commands/index.ts +102 -0
- package/src/cli/commands/init.ts +328 -0
- package/src/cli/commands/ls.ts +217 -0
- package/src/cli/commands/mcp/config.ts +300 -0
- package/src/cli/commands/mcp/index.ts +24 -0
- package/src/cli/commands/mcp/install.ts +203 -0
- package/src/cli/commands/mcp/paths.ts +470 -0
- package/src/cli/commands/mcp/status.ts +222 -0
- package/src/cli/commands/mcp/uninstall.ts +158 -0
- package/src/cli/commands/mcp.ts +20 -0
- package/src/cli/commands/models/clear.ts +103 -0
- package/src/cli/commands/models/index.ts +32 -0
- package/src/cli/commands/models/list.ts +214 -0
- package/src/cli/commands/models/path.ts +51 -0
- package/src/cli/commands/models/pull.ts +199 -0
- package/src/cli/commands/models/use.ts +85 -0
- package/src/cli/commands/multi-get.ts +400 -0
- package/src/cli/commands/query.ts +220 -0
- package/src/cli/commands/ref-parser.ts +108 -0
- package/src/cli/commands/reset.ts +191 -0
- package/src/cli/commands/search.ts +136 -0
- package/src/cli/commands/shared.ts +156 -0
- package/src/cli/commands/skill/index.ts +19 -0
- package/src/cli/commands/skill/install.ts +197 -0
- package/src/cli/commands/skill/paths-cmd.ts +81 -0
- package/src/cli/commands/skill/paths.ts +191 -0
- package/src/cli/commands/skill/show.ts +73 -0
- package/src/cli/commands/skill/uninstall.ts +141 -0
- package/src/cli/commands/status.ts +205 -0
- package/src/cli/commands/update.ts +68 -0
- package/src/cli/commands/vsearch.ts +188 -0
- package/src/cli/context.ts +64 -0
- package/src/cli/errors.ts +64 -0
- package/src/cli/format/search-results.ts +211 -0
- package/src/cli/options.ts +183 -0
- package/src/cli/program.ts +1330 -0
- package/src/cli/run.ts +213 -0
- package/src/cli/ui.ts +92 -0
- package/src/config/defaults.ts +20 -0
- package/src/config/index.ts +55 -0
- package/src/config/loader.ts +161 -0
- package/src/config/paths.ts +87 -0
- package/src/config/saver.ts +153 -0
- package/src/config/types.ts +280 -0
- package/src/converters/adapters/markitdownTs/adapter.ts +140 -0
- package/src/converters/adapters/officeparser/adapter.ts +126 -0
- package/src/converters/canonicalize.ts +89 -0
- package/src/converters/errors.ts +218 -0
- package/src/converters/index.ts +51 -0
- package/src/converters/mime.ts +163 -0
- package/src/converters/native/markdown.ts +115 -0
- package/src/converters/native/plaintext.ts +56 -0
- package/src/converters/path.ts +48 -0
- package/src/converters/pipeline.ts +159 -0
- package/src/converters/registry.ts +74 -0
- package/src/converters/types.ts +123 -0
- package/src/converters/versions.ts +24 -0
- package/src/index.ts +27 -0
- package/src/ingestion/chunker.ts +238 -0
- package/src/ingestion/index.ts +32 -0
- package/src/ingestion/language.ts +276 -0
- package/src/ingestion/sync.ts +671 -0
- package/src/ingestion/types.ts +219 -0
- package/src/ingestion/walker.ts +235 -0
- package/src/llm/cache.ts +467 -0
- package/src/llm/errors.ts +191 -0
- package/src/llm/index.ts +58 -0
- package/src/llm/nodeLlamaCpp/adapter.ts +133 -0
- package/src/llm/nodeLlamaCpp/embedding.ts +165 -0
- package/src/llm/nodeLlamaCpp/generation.ts +88 -0
- package/src/llm/nodeLlamaCpp/lifecycle.ts +317 -0
- package/src/llm/nodeLlamaCpp/rerank.ts +94 -0
- package/src/llm/registry.ts +86 -0
- package/src/llm/types.ts +129 -0
- package/src/mcp/resources/index.ts +151 -0
- package/src/mcp/server.ts +229 -0
- package/src/mcp/tools/get.ts +220 -0
- package/src/mcp/tools/index.ts +160 -0
- package/src/mcp/tools/multi-get.ts +263 -0
- package/src/mcp/tools/query.ts +226 -0
- package/src/mcp/tools/search.ts +119 -0
- package/src/mcp/tools/status.ts +81 -0
- package/src/mcp/tools/vsearch.ts +198 -0
- package/src/pipeline/chunk-lookup.ts +44 -0
- package/src/pipeline/expansion.ts +256 -0
- package/src/pipeline/explain.ts +115 -0
- package/src/pipeline/fusion.ts +185 -0
- package/src/pipeline/hybrid.ts +535 -0
- package/src/pipeline/index.ts +64 -0
- package/src/pipeline/query-language.ts +118 -0
- package/src/pipeline/rerank.ts +223 -0
- package/src/pipeline/search.ts +261 -0
- package/src/pipeline/types.ts +328 -0
- package/src/pipeline/vsearch.ts +348 -0
- package/src/store/index.ts +41 -0
- package/src/store/migrations/001-initial.ts +196 -0
- package/src/store/migrations/index.ts +20 -0
- package/src/store/migrations/runner.ts +187 -0
- package/src/store/sqlite/adapter.ts +1242 -0
- package/src/store/sqlite/index.ts +7 -0
- package/src/store/sqlite/setup.ts +129 -0
- package/src/store/sqlite/types.ts +28 -0
- package/src/store/types.ts +506 -0
- package/src/store/vector/index.ts +13 -0
- package/src/store/vector/sqlite-vec.ts +373 -0
- package/src/store/vector/stats.ts +152 -0
- package/src/store/vector/types.ts +115 -0
|
@@ -0,0 +1,1242 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SQLite implementation of StorePort.
|
|
3
|
+
* Uses bun:sqlite for database operations.
|
|
4
|
+
*
|
|
5
|
+
* Note: bun:sqlite is synchronous but we use async for interface consistency.
|
|
6
|
+
*
|
|
7
|
+
* @module src/store/sqlite/adapter
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
// CRITICAL: Import setup FIRST to configure custom SQLite before any Database use
|
|
11
|
+
import './setup';
|
|
12
|
+
import { Database } from 'bun:sqlite';
|
|
13
|
+
import { buildUri, deriveDocid } from '../../app/constants';
|
|
14
|
+
import type { Collection, Context, FtsTokenizer } from '../../config/types';
|
|
15
|
+
import { migrations, runMigrations } from '../migrations';
|
|
16
|
+
import type {
|
|
17
|
+
ChunkInput,
|
|
18
|
+
ChunkRow,
|
|
19
|
+
CleanupStats,
|
|
20
|
+
CollectionRow,
|
|
21
|
+
ContextRow,
|
|
22
|
+
DocumentInput,
|
|
23
|
+
DocumentRow,
|
|
24
|
+
FtsResult,
|
|
25
|
+
FtsSearchOptions,
|
|
26
|
+
IndexStatus,
|
|
27
|
+
IngestErrorInput,
|
|
28
|
+
IngestErrorRow,
|
|
29
|
+
MigrationResult,
|
|
30
|
+
StorePort,
|
|
31
|
+
StoreResult,
|
|
32
|
+
} from '../types';
|
|
33
|
+
import { err, ok } from '../types';
|
|
34
|
+
import type { SqliteDbProvider } from './types';
|
|
35
|
+
|
|
36
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
37
|
+
// FTS5 Query Escaping
|
|
38
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
/** Whitespace regex for splitting FTS5 tokens */
|
|
41
|
+
const WHITESPACE_REGEX = /\s+/;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Escape a query string for safe FTS5 MATCH.
|
|
45
|
+
* Wraps each token in double quotes to treat as literal terms.
|
|
46
|
+
* Handles special chars: ? * - + ( ) " : ^ etc.
|
|
47
|
+
*/
|
|
48
|
+
function escapeFts5Query(query: string): string {
|
|
49
|
+
// Split on whitespace, filter empty, quote each token
|
|
50
|
+
return query
|
|
51
|
+
.split(WHITESPACE_REGEX)
|
|
52
|
+
.filter((t) => t.length > 0)
|
|
53
|
+
.map((token) => {
|
|
54
|
+
// Escape internal double quotes by doubling them
|
|
55
|
+
const escaped = token.replace(/"/g, '""');
|
|
56
|
+
return `"${escaped}"`;
|
|
57
|
+
})
|
|
58
|
+
.join(' ');
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
62
|
+
// SQLite Adapter Implementation
|
|
63
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
64
|
+
|
|
65
|
+
/** Regex to strip .sqlite extension from db path */
|
|
66
|
+
const SQLITE_EXT_REGEX = /\.sqlite$/;
|
|
67
|
+
|
|
68
|
+
/** Regex to strip index- prefix from db name */
|
|
69
|
+
const INDEX_PREFIX_REGEX = /^index-/;
|
|
70
|
+
|
|
71
|
+
export class SqliteAdapter implements StorePort, SqliteDbProvider {
|
|
72
|
+
private db: Database | null = null;
|
|
73
|
+
private dbPath = '';
|
|
74
|
+
private ftsTokenizer: FtsTokenizer = 'unicode61';
|
|
75
|
+
private configPath = ''; // Set by CLI layer for status output
|
|
76
|
+
private txDepth = 0; // Transaction nesting depth
|
|
77
|
+
private txCounter = 0; // Savepoint counter for unique names
|
|
78
|
+
|
|
79
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
80
|
+
// Lifecycle
|
|
81
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
async open(
|
|
84
|
+
dbPath: string,
|
|
85
|
+
ftsTokenizer: FtsTokenizer
|
|
86
|
+
): Promise<StoreResult<MigrationResult>> {
|
|
87
|
+
try {
|
|
88
|
+
this.db = new Database(dbPath, { create: true });
|
|
89
|
+
this.dbPath = dbPath;
|
|
90
|
+
this.ftsTokenizer = ftsTokenizer;
|
|
91
|
+
|
|
92
|
+
// Enable pragmas for performance and safety
|
|
93
|
+
this.db.exec('PRAGMA foreign_keys = ON');
|
|
94
|
+
this.db.exec('PRAGMA busy_timeout = 5000');
|
|
95
|
+
|
|
96
|
+
// CI mode: trade durability for speed (no fsync, memory journal)
|
|
97
|
+
// Safe for tests since we don't need crash recovery
|
|
98
|
+
if (process.env.CI) {
|
|
99
|
+
this.db.exec('PRAGMA journal_mode = MEMORY');
|
|
100
|
+
this.db.exec('PRAGMA synchronous = OFF');
|
|
101
|
+
this.db.exec('PRAGMA temp_store = MEMORY');
|
|
102
|
+
} else {
|
|
103
|
+
this.db.exec('PRAGMA journal_mode = WAL');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Run migrations
|
|
107
|
+
const result = runMigrations(this.db, migrations, ftsTokenizer);
|
|
108
|
+
if (!result.ok) {
|
|
109
|
+
this.db.close();
|
|
110
|
+
this.db = null;
|
|
111
|
+
return result;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return result;
|
|
115
|
+
} catch (cause) {
|
|
116
|
+
const message =
|
|
117
|
+
cause instanceof Error ? cause.message : 'Failed to open database';
|
|
118
|
+
return err('CONNECTION_FAILED', message, cause);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
async close(): Promise<void> {
|
|
123
|
+
if (this.db) {
|
|
124
|
+
this.db.close();
|
|
125
|
+
this.db = null;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
isOpen(): boolean {
|
|
130
|
+
return this.db !== null;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Run an async function within a single SQLite transaction.
|
|
135
|
+
* Uses SAVEPOINT for nesting safety.
|
|
136
|
+
*
|
|
137
|
+
* Note: bun:sqlite's Database#transaction is synchronous, so we use
|
|
138
|
+
* explicit BEGIN/COMMIT to support async callbacks.
|
|
139
|
+
*/
|
|
140
|
+
async withTransaction<T>(fn: () => Promise<T>): Promise<StoreResult<T>> {
|
|
141
|
+
const db = this.ensureOpen();
|
|
142
|
+
|
|
143
|
+
const isOuter = this.txDepth === 0;
|
|
144
|
+
const savepoint = `sp_${++this.txCounter}`;
|
|
145
|
+
|
|
146
|
+
try {
|
|
147
|
+
if (isOuter) {
|
|
148
|
+
// IMMEDIATE reduces lock churn for bulk writes
|
|
149
|
+
db.exec('BEGIN IMMEDIATE');
|
|
150
|
+
} else {
|
|
151
|
+
db.exec(`SAVEPOINT ${savepoint}`);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
this.txDepth += 1;
|
|
155
|
+
const value = await fn();
|
|
156
|
+
this.txDepth -= 1;
|
|
157
|
+
|
|
158
|
+
if (isOuter) {
|
|
159
|
+
db.exec('COMMIT');
|
|
160
|
+
} else {
|
|
161
|
+
db.exec(`RELEASE ${savepoint}`);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return ok(value);
|
|
165
|
+
} catch (cause) {
|
|
166
|
+
this.txDepth = Math.max(0, this.txDepth - 1);
|
|
167
|
+
|
|
168
|
+
try {
|
|
169
|
+
if (isOuter) {
|
|
170
|
+
db.exec('ROLLBACK');
|
|
171
|
+
} else {
|
|
172
|
+
db.exec(`ROLLBACK TO ${savepoint}`);
|
|
173
|
+
db.exec(`RELEASE ${savepoint}`);
|
|
174
|
+
}
|
|
175
|
+
} catch {
|
|
176
|
+
// Ignore rollback failures; report original error
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const message =
|
|
180
|
+
cause instanceof Error ? cause.message : 'Transaction failed';
|
|
181
|
+
return err('TRANSACTION_FAILED', message, cause);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Set config path for status output (called by CLI layer).
|
|
187
|
+
*/
|
|
188
|
+
setConfigPath(configPath: string): void {
|
|
189
|
+
this.configPath = configPath;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Get raw SQLite database handle for vector operations.
|
|
194
|
+
* Part of SqliteDbProvider interface - use with isSqliteDbProvider() type guard.
|
|
195
|
+
*/
|
|
196
|
+
getRawDb(): Database {
|
|
197
|
+
return this.ensureOpen();
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
private ensureOpen(): Database {
|
|
201
|
+
if (!this.db) {
|
|
202
|
+
throw new Error('Database not open');
|
|
203
|
+
}
|
|
204
|
+
return this.db;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
208
|
+
// Config Sync
|
|
209
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
210
|
+
|
|
211
|
+
async syncCollections(collections: Collection[]): Promise<StoreResult<void>> {
|
|
212
|
+
try {
|
|
213
|
+
const db = this.ensureOpen();
|
|
214
|
+
|
|
215
|
+
const transaction = db.transaction(() => {
|
|
216
|
+
// Get existing collection names
|
|
217
|
+
const existing = new Set(
|
|
218
|
+
db
|
|
219
|
+
.query<{ name: string }, []>('SELECT name FROM collections')
|
|
220
|
+
.all()
|
|
221
|
+
.map((r) => r.name)
|
|
222
|
+
);
|
|
223
|
+
|
|
224
|
+
const incoming = new Set(collections.map((c) => c.name));
|
|
225
|
+
|
|
226
|
+
// Delete removed collections
|
|
227
|
+
for (const name of existing) {
|
|
228
|
+
if (!incoming.has(name)) {
|
|
229
|
+
db.run('DELETE FROM collections WHERE name = ?', [name]);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Upsert collections
|
|
234
|
+
const stmt = db.prepare(`
|
|
235
|
+
INSERT INTO collections (name, path, pattern, include, exclude, update_cmd, language_hint, synced_at)
|
|
236
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))
|
|
237
|
+
ON CONFLICT(name) DO UPDATE SET
|
|
238
|
+
path = excluded.path,
|
|
239
|
+
pattern = excluded.pattern,
|
|
240
|
+
include = excluded.include,
|
|
241
|
+
exclude = excluded.exclude,
|
|
242
|
+
update_cmd = excluded.update_cmd,
|
|
243
|
+
language_hint = excluded.language_hint,
|
|
244
|
+
synced_at = datetime('now')
|
|
245
|
+
`);
|
|
246
|
+
|
|
247
|
+
for (const c of collections) {
|
|
248
|
+
stmt.run(
|
|
249
|
+
c.name,
|
|
250
|
+
c.path,
|
|
251
|
+
c.pattern,
|
|
252
|
+
c.include.length > 0 ? JSON.stringify(c.include) : null,
|
|
253
|
+
c.exclude.length > 0 ? JSON.stringify(c.exclude) : null,
|
|
254
|
+
c.updateCmd ?? null,
|
|
255
|
+
c.languageHint ?? null
|
|
256
|
+
);
|
|
257
|
+
}
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
transaction();
|
|
261
|
+
return ok(undefined);
|
|
262
|
+
} catch (cause) {
|
|
263
|
+
return err(
|
|
264
|
+
'QUERY_FAILED',
|
|
265
|
+
cause instanceof Error ? cause.message : 'Failed to sync collections',
|
|
266
|
+
cause
|
|
267
|
+
);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
async syncContexts(contexts: Context[]): Promise<StoreResult<void>> {
|
|
272
|
+
try {
|
|
273
|
+
const db = this.ensureOpen();
|
|
274
|
+
|
|
275
|
+
const transaction = db.transaction(() => {
|
|
276
|
+
// Delete all and re-insert (contexts are small)
|
|
277
|
+
db.run('DELETE FROM contexts');
|
|
278
|
+
|
|
279
|
+
const stmt = db.prepare(`
|
|
280
|
+
INSERT INTO contexts (scope_type, scope_key, text, synced_at)
|
|
281
|
+
VALUES (?, ?, ?, datetime('now'))
|
|
282
|
+
`);
|
|
283
|
+
|
|
284
|
+
for (const c of contexts) {
|
|
285
|
+
stmt.run(c.scopeType, c.scopeKey, c.text);
|
|
286
|
+
}
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
transaction();
|
|
290
|
+
return ok(undefined);
|
|
291
|
+
} catch (cause) {
|
|
292
|
+
return err(
|
|
293
|
+
'QUERY_FAILED',
|
|
294
|
+
cause instanceof Error ? cause.message : 'Failed to sync contexts',
|
|
295
|
+
cause
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
async getCollections(): Promise<StoreResult<CollectionRow[]>> {
|
|
301
|
+
try {
|
|
302
|
+
const db = this.ensureOpen();
|
|
303
|
+
const rows = db
|
|
304
|
+
.query<DbCollectionRow, []>('SELECT * FROM collections')
|
|
305
|
+
.all();
|
|
306
|
+
|
|
307
|
+
return ok(rows.map(mapCollectionRow));
|
|
308
|
+
} catch (cause) {
|
|
309
|
+
return err(
|
|
310
|
+
'QUERY_FAILED',
|
|
311
|
+
cause instanceof Error ? cause.message : 'Failed to get collections',
|
|
312
|
+
cause
|
|
313
|
+
);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
async getContexts(): Promise<StoreResult<ContextRow[]>> {
|
|
318
|
+
try {
|
|
319
|
+
const db = this.ensureOpen();
|
|
320
|
+
const rows = db.query<DbContextRow, []>('SELECT * FROM contexts').all();
|
|
321
|
+
|
|
322
|
+
return ok(rows.map(mapContextRow));
|
|
323
|
+
} catch (cause) {
|
|
324
|
+
return err(
|
|
325
|
+
'QUERY_FAILED',
|
|
326
|
+
cause instanceof Error ? cause.message : 'Failed to get contexts',
|
|
327
|
+
cause
|
|
328
|
+
);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
333
|
+
// Documents
|
|
334
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
335
|
+
|
|
336
|
+
async upsertDocument(doc: DocumentInput): Promise<StoreResult<string>> {
|
|
337
|
+
try {
|
|
338
|
+
const db = this.ensureOpen();
|
|
339
|
+
const docid = deriveDocid(doc.sourceHash);
|
|
340
|
+
const uri = buildUri(doc.collection, doc.relPath);
|
|
341
|
+
|
|
342
|
+
db.run(
|
|
343
|
+
`
|
|
344
|
+
INSERT INTO documents (
|
|
345
|
+
collection, rel_path, source_hash, source_mime, source_ext,
|
|
346
|
+
source_size, source_mtime, docid, uri, title, mirror_hash,
|
|
347
|
+
converter_id, converter_version, language_hint, active,
|
|
348
|
+
last_error_code, last_error_message, last_error_at, updated_at
|
|
349
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1, ?, ?, ?, datetime('now'))
|
|
350
|
+
ON CONFLICT(collection, rel_path) DO UPDATE SET
|
|
351
|
+
source_hash = excluded.source_hash,
|
|
352
|
+
source_mime = excluded.source_mime,
|
|
353
|
+
source_ext = excluded.source_ext,
|
|
354
|
+
source_size = excluded.source_size,
|
|
355
|
+
source_mtime = excluded.source_mtime,
|
|
356
|
+
docid = excluded.docid,
|
|
357
|
+
uri = excluded.uri,
|
|
358
|
+
title = excluded.title,
|
|
359
|
+
mirror_hash = excluded.mirror_hash,
|
|
360
|
+
converter_id = excluded.converter_id,
|
|
361
|
+
converter_version = excluded.converter_version,
|
|
362
|
+
language_hint = excluded.language_hint,
|
|
363
|
+
active = 1,
|
|
364
|
+
last_error_code = excluded.last_error_code,
|
|
365
|
+
last_error_message = excluded.last_error_message,
|
|
366
|
+
last_error_at = excluded.last_error_at,
|
|
367
|
+
updated_at = datetime('now')
|
|
368
|
+
`,
|
|
369
|
+
[
|
|
370
|
+
doc.collection,
|
|
371
|
+
doc.relPath,
|
|
372
|
+
doc.sourceHash,
|
|
373
|
+
doc.sourceMime,
|
|
374
|
+
doc.sourceExt,
|
|
375
|
+
doc.sourceSize,
|
|
376
|
+
doc.sourceMtime,
|
|
377
|
+
docid,
|
|
378
|
+
uri,
|
|
379
|
+
doc.title ?? null,
|
|
380
|
+
doc.mirrorHash ?? null,
|
|
381
|
+
doc.converterId ?? null,
|
|
382
|
+
doc.converterVersion ?? null,
|
|
383
|
+
doc.languageHint ?? null,
|
|
384
|
+
doc.lastErrorCode ?? null,
|
|
385
|
+
doc.lastErrorMessage ?? null,
|
|
386
|
+
doc.lastErrorCode ? new Date().toISOString() : null,
|
|
387
|
+
]
|
|
388
|
+
);
|
|
389
|
+
|
|
390
|
+
return ok(docid);
|
|
391
|
+
} catch (cause) {
|
|
392
|
+
return err(
|
|
393
|
+
'QUERY_FAILED',
|
|
394
|
+
cause instanceof Error ? cause.message : 'Failed to upsert document',
|
|
395
|
+
cause
|
|
396
|
+
);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
async getDocument(
|
|
401
|
+
collection: string,
|
|
402
|
+
relPath: string
|
|
403
|
+
): Promise<StoreResult<DocumentRow | null>> {
|
|
404
|
+
try {
|
|
405
|
+
const db = this.ensureOpen();
|
|
406
|
+
const row = db
|
|
407
|
+
.query<DbDocumentRow, [string, string]>(
|
|
408
|
+
'SELECT * FROM documents WHERE collection = ? AND rel_path = ?'
|
|
409
|
+
)
|
|
410
|
+
.get(collection, relPath);
|
|
411
|
+
|
|
412
|
+
return ok(row ? mapDocumentRow(row) : null);
|
|
413
|
+
} catch (cause) {
|
|
414
|
+
return err(
|
|
415
|
+
'QUERY_FAILED',
|
|
416
|
+
cause instanceof Error ? cause.message : 'Failed to get document',
|
|
417
|
+
cause
|
|
418
|
+
);
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
async getDocumentByDocid(
|
|
423
|
+
docid: string
|
|
424
|
+
): Promise<StoreResult<DocumentRow | null>> {
|
|
425
|
+
try {
|
|
426
|
+
const db = this.ensureOpen();
|
|
427
|
+
const row = db
|
|
428
|
+
.query<DbDocumentRow, [string]>(
|
|
429
|
+
'SELECT * FROM documents WHERE docid = ?'
|
|
430
|
+
)
|
|
431
|
+
.get(docid);
|
|
432
|
+
|
|
433
|
+
return ok(row ? mapDocumentRow(row) : null);
|
|
434
|
+
} catch (cause) {
|
|
435
|
+
return err(
|
|
436
|
+
'QUERY_FAILED',
|
|
437
|
+
cause instanceof Error
|
|
438
|
+
? cause.message
|
|
439
|
+
: 'Failed to get document by docid',
|
|
440
|
+
cause
|
|
441
|
+
);
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
async getDocumentByUri(
|
|
446
|
+
uri: string
|
|
447
|
+
): Promise<StoreResult<DocumentRow | null>> {
|
|
448
|
+
try {
|
|
449
|
+
const db = this.ensureOpen();
|
|
450
|
+
const row = db
|
|
451
|
+
.query<DbDocumentRow, [string]>('SELECT * FROM documents WHERE uri = ?')
|
|
452
|
+
.get(uri);
|
|
453
|
+
|
|
454
|
+
return ok(row ? mapDocumentRow(row) : null);
|
|
455
|
+
} catch (cause) {
|
|
456
|
+
return err(
|
|
457
|
+
'QUERY_FAILED',
|
|
458
|
+
cause instanceof Error
|
|
459
|
+
? cause.message
|
|
460
|
+
: 'Failed to get document by uri',
|
|
461
|
+
cause
|
|
462
|
+
);
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
async listDocuments(
|
|
467
|
+
collection?: string
|
|
468
|
+
): Promise<StoreResult<DocumentRow[]>> {
|
|
469
|
+
try {
|
|
470
|
+
const db = this.ensureOpen();
|
|
471
|
+
|
|
472
|
+
const rows = collection
|
|
473
|
+
? db
|
|
474
|
+
.query<DbDocumentRow, [string]>(
|
|
475
|
+
'SELECT * FROM documents WHERE collection = ?'
|
|
476
|
+
)
|
|
477
|
+
.all(collection)
|
|
478
|
+
: db.query<DbDocumentRow, []>('SELECT * FROM documents').all();
|
|
479
|
+
|
|
480
|
+
return ok(rows.map(mapDocumentRow));
|
|
481
|
+
} catch (cause) {
|
|
482
|
+
return err(
|
|
483
|
+
'QUERY_FAILED',
|
|
484
|
+
cause instanceof Error ? cause.message : 'Failed to list documents',
|
|
485
|
+
cause
|
|
486
|
+
);
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
async markInactive(
|
|
491
|
+
collection: string,
|
|
492
|
+
relPaths: string[]
|
|
493
|
+
): Promise<StoreResult<number>> {
|
|
494
|
+
try {
|
|
495
|
+
const db = this.ensureOpen();
|
|
496
|
+
|
|
497
|
+
if (relPaths.length === 0) {
|
|
498
|
+
return ok(0);
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
const placeholders = relPaths.map(() => '?').join(',');
|
|
502
|
+
const result = db.run(
|
|
503
|
+
`UPDATE documents SET active = 0, updated_at = datetime('now')
|
|
504
|
+
WHERE collection = ? AND rel_path IN (${placeholders})`,
|
|
505
|
+
[collection, ...relPaths]
|
|
506
|
+
);
|
|
507
|
+
|
|
508
|
+
return ok(result.changes);
|
|
509
|
+
} catch (cause) {
|
|
510
|
+
return err(
|
|
511
|
+
'QUERY_FAILED',
|
|
512
|
+
cause instanceof Error
|
|
513
|
+
? cause.message
|
|
514
|
+
: 'Failed to mark documents inactive',
|
|
515
|
+
cause
|
|
516
|
+
);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
521
|
+
// Content
|
|
522
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
523
|
+
|
|
524
|
+
async upsertContent(
|
|
525
|
+
mirrorHash: string,
|
|
526
|
+
markdown: string
|
|
527
|
+
): Promise<StoreResult<void>> {
|
|
528
|
+
try {
|
|
529
|
+
const db = this.ensureOpen();
|
|
530
|
+
|
|
531
|
+
db.run(
|
|
532
|
+
`INSERT INTO content (mirror_hash, markdown)
|
|
533
|
+
VALUES (?, ?)
|
|
534
|
+
ON CONFLICT(mirror_hash) DO NOTHING`,
|
|
535
|
+
[mirrorHash, markdown]
|
|
536
|
+
);
|
|
537
|
+
|
|
538
|
+
return ok(undefined);
|
|
539
|
+
} catch (cause) {
|
|
540
|
+
return err(
|
|
541
|
+
'QUERY_FAILED',
|
|
542
|
+
cause instanceof Error ? cause.message : 'Failed to upsert content',
|
|
543
|
+
cause
|
|
544
|
+
);
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
async getContent(mirrorHash: string): Promise<StoreResult<string | null>> {
|
|
549
|
+
try {
|
|
550
|
+
const db = this.ensureOpen();
|
|
551
|
+
|
|
552
|
+
const row = db
|
|
553
|
+
.query<{ markdown: string }, [string]>(
|
|
554
|
+
'SELECT markdown FROM content WHERE mirror_hash = ?'
|
|
555
|
+
)
|
|
556
|
+
.get(mirrorHash);
|
|
557
|
+
|
|
558
|
+
return ok(row?.markdown ?? null);
|
|
559
|
+
} catch (cause) {
|
|
560
|
+
return err(
|
|
561
|
+
'QUERY_FAILED',
|
|
562
|
+
cause instanceof Error ? cause.message : 'Failed to get content',
|
|
563
|
+
cause
|
|
564
|
+
);
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
569
|
+
// Chunks
|
|
570
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
571
|
+
|
|
572
|
+
async upsertChunks(
|
|
573
|
+
mirrorHash: string,
|
|
574
|
+
chunks: ChunkInput[]
|
|
575
|
+
): Promise<StoreResult<void>> {
|
|
576
|
+
try {
|
|
577
|
+
const db = this.ensureOpen();
|
|
578
|
+
|
|
579
|
+
const transaction = db.transaction(() => {
|
|
580
|
+
// Delete existing chunks for this hash
|
|
581
|
+
db.run('DELETE FROM content_chunks WHERE mirror_hash = ?', [
|
|
582
|
+
mirrorHash,
|
|
583
|
+
]);
|
|
584
|
+
|
|
585
|
+
// Insert new chunks
|
|
586
|
+
const stmt = db.prepare(`
|
|
587
|
+
INSERT INTO content_chunks (mirror_hash, seq, pos, text, start_line, end_line, language, token_count)
|
|
588
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
589
|
+
`);
|
|
590
|
+
|
|
591
|
+
for (const chunk of chunks) {
|
|
592
|
+
stmt.run(
|
|
593
|
+
mirrorHash,
|
|
594
|
+
chunk.seq,
|
|
595
|
+
chunk.pos,
|
|
596
|
+
chunk.text,
|
|
597
|
+
chunk.startLine,
|
|
598
|
+
chunk.endLine,
|
|
599
|
+
chunk.language ?? null,
|
|
600
|
+
chunk.tokenCount ?? null
|
|
601
|
+
);
|
|
602
|
+
}
|
|
603
|
+
});
|
|
604
|
+
|
|
605
|
+
transaction();
|
|
606
|
+
return ok(undefined);
|
|
607
|
+
} catch (cause) {
|
|
608
|
+
return err(
|
|
609
|
+
'QUERY_FAILED',
|
|
610
|
+
cause instanceof Error ? cause.message : 'Failed to upsert chunks',
|
|
611
|
+
cause
|
|
612
|
+
);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
async getChunks(mirrorHash: string): Promise<StoreResult<ChunkRow[]>> {
|
|
617
|
+
try {
|
|
618
|
+
const db = this.ensureOpen();
|
|
619
|
+
|
|
620
|
+
const rows = db
|
|
621
|
+
.query<DbChunkRow, [string]>(
|
|
622
|
+
'SELECT * FROM content_chunks WHERE mirror_hash = ? ORDER BY seq'
|
|
623
|
+
)
|
|
624
|
+
.all(mirrorHash);
|
|
625
|
+
|
|
626
|
+
return ok(rows.map(mapChunkRow));
|
|
627
|
+
} catch (cause) {
|
|
628
|
+
return err(
|
|
629
|
+
'QUERY_FAILED',
|
|
630
|
+
cause instanceof Error ? cause.message : 'Failed to get chunks',
|
|
631
|
+
cause
|
|
632
|
+
);
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
async getChunksBatch(
|
|
637
|
+
mirrorHashes: string[]
|
|
638
|
+
): Promise<StoreResult<Map<string, ChunkRow[]>>> {
|
|
639
|
+
try {
|
|
640
|
+
// Early return for empty input
|
|
641
|
+
if (mirrorHashes.length === 0) {
|
|
642
|
+
return ok(new Map());
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
// Dedupe and filter empty strings
|
|
646
|
+
const uniqueHashes = [
|
|
647
|
+
...new Set(mirrorHashes.filter((h) => h.trim().length > 0)),
|
|
648
|
+
];
|
|
649
|
+
if (uniqueHashes.length === 0) {
|
|
650
|
+
return ok(new Map());
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
const db = this.ensureOpen();
|
|
654
|
+
const result = new Map<string, ChunkRow[]>();
|
|
655
|
+
|
|
656
|
+
// SQLite SQLITE_LIMIT_VARIABLE_NUMBER defaults to 999
|
|
657
|
+
// Reserve 99 for potential future filter params (collection, language, etc.)
|
|
658
|
+
const SQLITE_MAX_PARAMS = 900;
|
|
659
|
+
|
|
660
|
+
// Batch queries to respect SQLite parameter limit
|
|
661
|
+
for (let i = 0; i < uniqueHashes.length; i += SQLITE_MAX_PARAMS) {
|
|
662
|
+
const batch = uniqueHashes.slice(i, i + SQLITE_MAX_PARAMS);
|
|
663
|
+
const placeholders = batch.map(() => '?').join(',');
|
|
664
|
+
const sql = `SELECT * FROM content_chunks
|
|
665
|
+
WHERE mirror_hash IN (${placeholders})
|
|
666
|
+
ORDER BY mirror_hash, seq`;
|
|
667
|
+
const rows = db.query<DbChunkRow, string[]>(sql).all(...batch);
|
|
668
|
+
|
|
669
|
+
// Group by mirrorHash, preserving seq order from ORDER BY
|
|
670
|
+
for (const row of rows) {
|
|
671
|
+
const mapped = mapChunkRow(row);
|
|
672
|
+
const existing = result.get(mapped.mirrorHash) ?? [];
|
|
673
|
+
existing.push(mapped);
|
|
674
|
+
result.set(mapped.mirrorHash, existing);
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
return ok(result);
|
|
679
|
+
} catch (cause) {
|
|
680
|
+
return err(
|
|
681
|
+
'QUERY_FAILED',
|
|
682
|
+
cause instanceof Error ? cause.message : 'Failed to get chunks batch',
|
|
683
|
+
cause
|
|
684
|
+
);
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
689
|
+
// FTS Search
|
|
690
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
691
|
+
|
|
692
|
+
async searchFts(
|
|
693
|
+
query: string,
|
|
694
|
+
options: FtsSearchOptions = {}
|
|
695
|
+
): Promise<StoreResult<FtsResult[]>> {
|
|
696
|
+
try {
|
|
697
|
+
const db = this.ensureOpen();
|
|
698
|
+
const limit = options.limit ?? 20;
|
|
699
|
+
|
|
700
|
+
// Join FTS results with chunks and documents
|
|
701
|
+
// Use bm25() function explicitly - fts.rank doesn't work with JOINs
|
|
702
|
+
// Note: Multiple docs can share mirror_hash (content-addressed storage)
|
|
703
|
+
// Deduplication by uri+seq is done in search.ts to avoid FTS function context issues
|
|
704
|
+
const sql = `
|
|
705
|
+
SELECT
|
|
706
|
+
c.mirror_hash,
|
|
707
|
+
c.seq,
|
|
708
|
+
bm25(content_fts) as score,
|
|
709
|
+
${options.snippet ? "snippet(content_fts, 0, '<mark>', '</mark>', '...', 32) as snippet," : ''}
|
|
710
|
+
d.docid,
|
|
711
|
+
d.uri,
|
|
712
|
+
d.title,
|
|
713
|
+
d.collection,
|
|
714
|
+
d.rel_path,
|
|
715
|
+
d.source_mime,
|
|
716
|
+
d.source_ext,
|
|
717
|
+
d.source_mtime,
|
|
718
|
+
d.source_size,
|
|
719
|
+
d.source_hash
|
|
720
|
+
FROM content_fts fts
|
|
721
|
+
JOIN content_chunks c ON c.rowid = fts.rowid
|
|
722
|
+
JOIN documents d ON d.mirror_hash = c.mirror_hash AND d.active = 1
|
|
723
|
+
WHERE content_fts MATCH ?
|
|
724
|
+
${options.collection ? 'AND d.collection = ?' : ''}
|
|
725
|
+
${options.language ? 'AND c.language = ?' : ''}
|
|
726
|
+
ORDER BY bm25(content_fts)
|
|
727
|
+
LIMIT ?
|
|
728
|
+
`;
|
|
729
|
+
|
|
730
|
+
const params: (string | number)[] = [escapeFts5Query(query)];
|
|
731
|
+
if (options.collection) {
|
|
732
|
+
params.push(options.collection);
|
|
733
|
+
}
|
|
734
|
+
if (options.language) {
|
|
735
|
+
params.push(options.language);
|
|
736
|
+
}
|
|
737
|
+
params.push(limit);
|
|
738
|
+
|
|
739
|
+
interface FtsRow {
|
|
740
|
+
mirror_hash: string;
|
|
741
|
+
seq: number;
|
|
742
|
+
score: number;
|
|
743
|
+
snippet?: string;
|
|
744
|
+
docid: string;
|
|
745
|
+
uri: string;
|
|
746
|
+
title: string | null;
|
|
747
|
+
collection: string;
|
|
748
|
+
rel_path: string;
|
|
749
|
+
source_mime: string | null;
|
|
750
|
+
source_ext: string | null;
|
|
751
|
+
source_mtime: string | null;
|
|
752
|
+
source_size: number | null;
|
|
753
|
+
source_hash: string | null;
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
const rows = db.query<FtsRow, (string | number)[]>(sql).all(...params);
|
|
757
|
+
|
|
758
|
+
return ok(
|
|
759
|
+
rows.map((r) => ({
|
|
760
|
+
mirrorHash: r.mirror_hash,
|
|
761
|
+
seq: r.seq,
|
|
762
|
+
score: r.score, // Raw bm25() - smaller (more negative) is better
|
|
763
|
+
snippet: r.snippet,
|
|
764
|
+
docid: r.docid,
|
|
765
|
+
uri: r.uri,
|
|
766
|
+
title: r.title ?? undefined,
|
|
767
|
+
collection: r.collection,
|
|
768
|
+
relPath: r.rel_path,
|
|
769
|
+
sourceMime: r.source_mime ?? undefined,
|
|
770
|
+
sourceExt: r.source_ext ?? undefined,
|
|
771
|
+
sourceMtime: r.source_mtime ?? undefined,
|
|
772
|
+
sourceSize: r.source_size ?? undefined,
|
|
773
|
+
sourceHash: r.source_hash ?? undefined,
|
|
774
|
+
}))
|
|
775
|
+
);
|
|
776
|
+
} catch (cause) {
|
|
777
|
+
const message = cause instanceof Error ? cause.message : '';
|
|
778
|
+
// Detect FTS5 syntax errors and return INVALID_INPUT for consistent handling
|
|
779
|
+
const isSyntaxError =
|
|
780
|
+
message.includes('malformed MATCH') ||
|
|
781
|
+
message.includes('fts5: syntax error') ||
|
|
782
|
+
message.includes('fts5:');
|
|
783
|
+
return err(
|
|
784
|
+
isSyntaxError ? 'INVALID_INPUT' : 'QUERY_FAILED',
|
|
785
|
+
message || 'Failed to search FTS',
|
|
786
|
+
cause
|
|
787
|
+
);
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
async rebuildFtsForHash(mirrorHash: string): Promise<StoreResult<void>> {
|
|
792
|
+
try {
|
|
793
|
+
const db = this.ensureOpen();
|
|
794
|
+
|
|
795
|
+
const transaction = db.transaction(() => {
|
|
796
|
+
// Get chunks for this hash
|
|
797
|
+
const chunks = db
|
|
798
|
+
.query<{ rowid: number; text: string }, [string]>(
|
|
799
|
+
'SELECT rowid, text FROM content_chunks WHERE mirror_hash = ?'
|
|
800
|
+
)
|
|
801
|
+
.all(mirrorHash);
|
|
802
|
+
|
|
803
|
+
// Delete old FTS entries for these rowids
|
|
804
|
+
for (const chunk of chunks) {
|
|
805
|
+
db.run('DELETE FROM content_fts WHERE rowid = ?', [chunk.rowid]);
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
// Insert new FTS entries
|
|
809
|
+
const stmt = db.prepare(
|
|
810
|
+
'INSERT INTO content_fts (rowid, text) VALUES (?, ?)'
|
|
811
|
+
);
|
|
812
|
+
for (const chunk of chunks) {
|
|
813
|
+
stmt.run(chunk.rowid, chunk.text);
|
|
814
|
+
}
|
|
815
|
+
});
|
|
816
|
+
|
|
817
|
+
transaction();
|
|
818
|
+
return ok(undefined);
|
|
819
|
+
} catch (cause) {
|
|
820
|
+
return err(
|
|
821
|
+
'QUERY_FAILED',
|
|
822
|
+
cause instanceof Error ? cause.message : 'Failed to rebuild FTS',
|
|
823
|
+
cause
|
|
824
|
+
);
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
829
|
+
// Status
|
|
830
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
831
|
+
|
|
832
|
+
async getStatus(): Promise<StoreResult<IndexStatus>> {
|
|
833
|
+
try {
|
|
834
|
+
const db = this.ensureOpen();
|
|
835
|
+
|
|
836
|
+
// Get version
|
|
837
|
+
const versionRow = db
|
|
838
|
+
.query<{ value: string }, []>(
|
|
839
|
+
"SELECT value FROM schema_meta WHERE key = 'version'"
|
|
840
|
+
)
|
|
841
|
+
.get();
|
|
842
|
+
const version = versionRow?.value ?? '0';
|
|
843
|
+
|
|
844
|
+
// Derive indexName from dbPath (basename without extension)
|
|
845
|
+
const indexName =
|
|
846
|
+
this.dbPath
|
|
847
|
+
.split('/')
|
|
848
|
+
.pop()
|
|
849
|
+
?.replace(SQLITE_EXT_REGEX, '')
|
|
850
|
+
?.replace(INDEX_PREFIX_REGEX, '') || 'default';
|
|
851
|
+
|
|
852
|
+
// Get collection stats with chunk counts
|
|
853
|
+
interface CollectionStat {
|
|
854
|
+
name: string;
|
|
855
|
+
path: string;
|
|
856
|
+
total: number;
|
|
857
|
+
active: number;
|
|
858
|
+
errored: number;
|
|
859
|
+
chunked: number;
|
|
860
|
+
chunk_count: number;
|
|
861
|
+
embedded_count: number;
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
const collectionStats = db
|
|
865
|
+
.query<CollectionStat, []>(
|
|
866
|
+
`
|
|
867
|
+
SELECT
|
|
868
|
+
c.name,
|
|
869
|
+
c.path,
|
|
870
|
+
COUNT(DISTINCT d.id) as total,
|
|
871
|
+
SUM(CASE WHEN d.active = 1 THEN 1 ELSE 0 END) as active,
|
|
872
|
+
SUM(CASE WHEN d.last_error_code IS NOT NULL THEN 1 ELSE 0 END) as errored,
|
|
873
|
+
SUM(CASE WHEN d.mirror_hash IS NOT NULL THEN 1 ELSE 0 END) as chunked,
|
|
874
|
+
(SELECT COUNT(*) FROM content_chunks cc
|
|
875
|
+
JOIN documents d2 ON d2.mirror_hash = cc.mirror_hash
|
|
876
|
+
WHERE d2.collection = c.name AND d2.active = 1) as chunk_count,
|
|
877
|
+
(SELECT COUNT(*) FROM content_vectors cv
|
|
878
|
+
JOIN documents d3 ON d3.mirror_hash = cv.mirror_hash
|
|
879
|
+
WHERE d3.collection = c.name AND d3.active = 1) as embedded_count
|
|
880
|
+
FROM collections c
|
|
881
|
+
LEFT JOIN documents d ON d.collection = c.name
|
|
882
|
+
GROUP BY c.name, c.path
|
|
883
|
+
`
|
|
884
|
+
)
|
|
885
|
+
.all();
|
|
886
|
+
|
|
887
|
+
// Get totals
|
|
888
|
+
const totalsRow = db
|
|
889
|
+
.query<{ total: number; active: number }, []>(
|
|
890
|
+
`
|
|
891
|
+
SELECT
|
|
892
|
+
COUNT(*) as total,
|
|
893
|
+
SUM(CASE WHEN active = 1 THEN 1 ELSE 0 END) as active
|
|
894
|
+
FROM documents
|
|
895
|
+
`
|
|
896
|
+
)
|
|
897
|
+
.get();
|
|
898
|
+
|
|
899
|
+
const chunkCount =
|
|
900
|
+
db
|
|
901
|
+
.query<{ count: number }, []>(
|
|
902
|
+
'SELECT COUNT(*) as count FROM content_chunks'
|
|
903
|
+
)
|
|
904
|
+
.get()?.count ?? 0;
|
|
905
|
+
|
|
906
|
+
// Embedding backlog: chunks from active docs without vectors
|
|
907
|
+
// Uses EXISTS to avoid duplicates when multiple docs share mirror_hash
|
|
908
|
+
const backlogRow = db
|
|
909
|
+
.query<{ count: number }, []>(
|
|
910
|
+
`
|
|
911
|
+
SELECT COUNT(*) as count FROM content_chunks c
|
|
912
|
+
WHERE EXISTS (
|
|
913
|
+
SELECT 1 FROM documents d
|
|
914
|
+
WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
|
|
915
|
+
)
|
|
916
|
+
AND NOT EXISTS (
|
|
917
|
+
SELECT 1 FROM content_vectors v
|
|
918
|
+
WHERE v.mirror_hash = c.mirror_hash AND v.seq = c.seq
|
|
919
|
+
)
|
|
920
|
+
`
|
|
921
|
+
)
|
|
922
|
+
.get();
|
|
923
|
+
|
|
924
|
+
// Recent errors (last 24h)
|
|
925
|
+
const recentErrorsRow = db
|
|
926
|
+
.query<{ count: number }, []>(
|
|
927
|
+
`
|
|
928
|
+
SELECT COUNT(*) as count FROM ingest_errors
|
|
929
|
+
WHERE occurred_at > datetime('now', '-1 day')
|
|
930
|
+
`
|
|
931
|
+
)
|
|
932
|
+
.get();
|
|
933
|
+
|
|
934
|
+
// Last updated (max updated_at from documents)
|
|
935
|
+
const lastUpdatedRow = db
|
|
936
|
+
.query<{ last_updated: string | null }, []>(
|
|
937
|
+
'SELECT MAX(updated_at) as last_updated FROM documents'
|
|
938
|
+
)
|
|
939
|
+
.get();
|
|
940
|
+
|
|
941
|
+
// Health check: no recent errors and DB is accessible
|
|
942
|
+
const recentErrors = recentErrorsRow?.count ?? 0;
|
|
943
|
+
const healthy = recentErrors === 0;
|
|
944
|
+
|
|
945
|
+
return ok({
|
|
946
|
+
version,
|
|
947
|
+
indexName,
|
|
948
|
+
configPath: this.configPath,
|
|
949
|
+
dbPath: this.dbPath,
|
|
950
|
+
ftsTokenizer: this.ftsTokenizer,
|
|
951
|
+
collections: collectionStats.map((s) => ({
|
|
952
|
+
name: s.name,
|
|
953
|
+
path: s.path,
|
|
954
|
+
totalDocuments: s.total,
|
|
955
|
+
activeDocuments: s.active,
|
|
956
|
+
errorDocuments: s.errored,
|
|
957
|
+
chunkedDocuments: s.chunked,
|
|
958
|
+
totalChunks: s.chunk_count,
|
|
959
|
+
embeddedChunks: s.embedded_count,
|
|
960
|
+
})),
|
|
961
|
+
totalDocuments: totalsRow?.total ?? 0,
|
|
962
|
+
activeDocuments: totalsRow?.active ?? 0,
|
|
963
|
+
totalChunks: chunkCount,
|
|
964
|
+
embeddingBacklog: backlogRow?.count ?? 0,
|
|
965
|
+
recentErrors,
|
|
966
|
+
lastUpdatedAt: lastUpdatedRow?.last_updated ?? null,
|
|
967
|
+
healthy,
|
|
968
|
+
});
|
|
969
|
+
} catch (cause) {
|
|
970
|
+
return err(
|
|
971
|
+
'QUERY_FAILED',
|
|
972
|
+
cause instanceof Error ? cause.message : 'Failed to get status',
|
|
973
|
+
cause
|
|
974
|
+
);
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
979
|
+
// Errors
|
|
980
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
981
|
+
|
|
982
|
+
async recordError(error: IngestErrorInput): Promise<StoreResult<void>> {
|
|
983
|
+
try {
|
|
984
|
+
const db = this.ensureOpen();
|
|
985
|
+
|
|
986
|
+
db.run(
|
|
987
|
+
`INSERT INTO ingest_errors (collection, rel_path, code, message, details_json)
|
|
988
|
+
VALUES (?, ?, ?, ?, ?)`,
|
|
989
|
+
[
|
|
990
|
+
error.collection,
|
|
991
|
+
error.relPath,
|
|
992
|
+
error.code,
|
|
993
|
+
error.message,
|
|
994
|
+
error.details ? JSON.stringify(error.details) : null,
|
|
995
|
+
]
|
|
996
|
+
);
|
|
997
|
+
|
|
998
|
+
return ok(undefined);
|
|
999
|
+
} catch (cause) {
|
|
1000
|
+
return err(
|
|
1001
|
+
'QUERY_FAILED',
|
|
1002
|
+
cause instanceof Error ? cause.message : 'Failed to record error',
|
|
1003
|
+
cause
|
|
1004
|
+
);
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
async getRecentErrors(limit = 50): Promise<StoreResult<IngestErrorRow[]>> {
|
|
1009
|
+
try {
|
|
1010
|
+
const db = this.ensureOpen();
|
|
1011
|
+
|
|
1012
|
+
const rows = db
|
|
1013
|
+
.query<DbIngestErrorRow, [number]>(
|
|
1014
|
+
'SELECT * FROM ingest_errors ORDER BY occurred_at DESC LIMIT ?'
|
|
1015
|
+
)
|
|
1016
|
+
.all(limit);
|
|
1017
|
+
|
|
1018
|
+
return ok(rows.map(mapIngestErrorRow));
|
|
1019
|
+
} catch (cause) {
|
|
1020
|
+
return err(
|
|
1021
|
+
'QUERY_FAILED',
|
|
1022
|
+
cause instanceof Error ? cause.message : 'Failed to get recent errors',
|
|
1023
|
+
cause
|
|
1024
|
+
);
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1029
|
+
// Cleanup
|
|
1030
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1031
|
+
|
|
1032
|
+
async cleanupOrphans(): Promise<StoreResult<CleanupStats>> {
|
|
1033
|
+
try {
|
|
1034
|
+
const db = this.ensureOpen();
|
|
1035
|
+
|
|
1036
|
+
let orphanedContent = 0;
|
|
1037
|
+
let orphanedChunks = 0;
|
|
1038
|
+
let orphanedVectors = 0;
|
|
1039
|
+
let expiredCache = 0;
|
|
1040
|
+
|
|
1041
|
+
const transaction = db.transaction(() => {
|
|
1042
|
+
// Delete content not referenced by any active document
|
|
1043
|
+
const contentResult = db.run(`
|
|
1044
|
+
DELETE FROM content WHERE mirror_hash NOT IN (
|
|
1045
|
+
SELECT DISTINCT mirror_hash FROM documents WHERE mirror_hash IS NOT NULL AND active = 1
|
|
1046
|
+
)
|
|
1047
|
+
`);
|
|
1048
|
+
orphanedContent = contentResult.changes;
|
|
1049
|
+
|
|
1050
|
+
// Delete chunks for deleted content
|
|
1051
|
+
const chunksResult = db.run(`
|
|
1052
|
+
DELETE FROM content_chunks WHERE mirror_hash NOT IN (
|
|
1053
|
+
SELECT mirror_hash FROM content
|
|
1054
|
+
)
|
|
1055
|
+
`);
|
|
1056
|
+
orphanedChunks = chunksResult.changes;
|
|
1057
|
+
|
|
1058
|
+
// Delete vectors for deleted chunks
|
|
1059
|
+
const vectorsResult = db.run(`
|
|
1060
|
+
DELETE FROM content_vectors WHERE (mirror_hash, seq) NOT IN (
|
|
1061
|
+
SELECT mirror_hash, seq FROM content_chunks
|
|
1062
|
+
)
|
|
1063
|
+
`);
|
|
1064
|
+
orphanedVectors = vectorsResult.changes;
|
|
1065
|
+
|
|
1066
|
+
// Delete expired cache entries
|
|
1067
|
+
const cacheResult = db.run(`
|
|
1068
|
+
DELETE FROM llm_cache WHERE expires_at IS NOT NULL AND expires_at < datetime('now')
|
|
1069
|
+
`);
|
|
1070
|
+
expiredCache = cacheResult.changes;
|
|
1071
|
+
|
|
1072
|
+
// Rebuild FTS index (remove orphaned entries)
|
|
1073
|
+
db.run(`
|
|
1074
|
+
DELETE FROM content_fts WHERE rowid NOT IN (
|
|
1075
|
+
SELECT rowid FROM content_chunks
|
|
1076
|
+
)
|
|
1077
|
+
`);
|
|
1078
|
+
});
|
|
1079
|
+
|
|
1080
|
+
transaction();
|
|
1081
|
+
|
|
1082
|
+
return ok({
|
|
1083
|
+
orphanedContent,
|
|
1084
|
+
orphanedChunks,
|
|
1085
|
+
orphanedVectors,
|
|
1086
|
+
expiredCache,
|
|
1087
|
+
});
|
|
1088
|
+
} catch (cause) {
|
|
1089
|
+
return err(
|
|
1090
|
+
'QUERY_FAILED',
|
|
1091
|
+
cause instanceof Error ? cause.message : 'Failed to cleanup orphans',
|
|
1092
|
+
cause
|
|
1093
|
+
);
|
|
1094
|
+
}
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
1099
|
+
// DB Row Types (snake_case from SQLite)
|
|
1100
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
1101
|
+
|
|
1102
|
+
interface DbCollectionRow {
|
|
1103
|
+
name: string;
|
|
1104
|
+
path: string;
|
|
1105
|
+
pattern: string;
|
|
1106
|
+
include: string | null;
|
|
1107
|
+
exclude: string | null;
|
|
1108
|
+
update_cmd: string | null;
|
|
1109
|
+
language_hint: string | null;
|
|
1110
|
+
synced_at: string;
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
interface DbContextRow {
|
|
1114
|
+
scope_type: 'global' | 'collection' | 'prefix';
|
|
1115
|
+
scope_key: string;
|
|
1116
|
+
text: string;
|
|
1117
|
+
synced_at: string;
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
interface DbDocumentRow {
|
|
1121
|
+
id: number;
|
|
1122
|
+
collection: string;
|
|
1123
|
+
rel_path: string;
|
|
1124
|
+
source_hash: string;
|
|
1125
|
+
source_mime: string;
|
|
1126
|
+
source_ext: string;
|
|
1127
|
+
source_size: number;
|
|
1128
|
+
source_mtime: string;
|
|
1129
|
+
docid: string;
|
|
1130
|
+
uri: string;
|
|
1131
|
+
title: string | null;
|
|
1132
|
+
mirror_hash: string | null;
|
|
1133
|
+
converter_id: string | null;
|
|
1134
|
+
converter_version: string | null;
|
|
1135
|
+
language_hint: string | null;
|
|
1136
|
+
active: number;
|
|
1137
|
+
last_error_code: string | null;
|
|
1138
|
+
last_error_message: string | null;
|
|
1139
|
+
last_error_at: string | null;
|
|
1140
|
+
created_at: string;
|
|
1141
|
+
updated_at: string;
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
interface DbChunkRow {
|
|
1145
|
+
mirror_hash: string;
|
|
1146
|
+
seq: number;
|
|
1147
|
+
pos: number;
|
|
1148
|
+
text: string;
|
|
1149
|
+
start_line: number;
|
|
1150
|
+
end_line: number;
|
|
1151
|
+
language: string | null;
|
|
1152
|
+
token_count: number | null;
|
|
1153
|
+
created_at: string;
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
interface DbIngestErrorRow {
|
|
1157
|
+
id: number;
|
|
1158
|
+
collection: string;
|
|
1159
|
+
rel_path: string;
|
|
1160
|
+
occurred_at: string;
|
|
1161
|
+
code: string;
|
|
1162
|
+
message: string;
|
|
1163
|
+
details_json: string | null;
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
1167
|
+
// Row Mappers (snake_case -> camelCase)
|
|
1168
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
1169
|
+
|
|
1170
|
+
function mapCollectionRow(row: DbCollectionRow): CollectionRow {
|
|
1171
|
+
return {
|
|
1172
|
+
name: row.name,
|
|
1173
|
+
path: row.path,
|
|
1174
|
+
pattern: row.pattern,
|
|
1175
|
+
include: row.include ? JSON.parse(row.include) : null,
|
|
1176
|
+
exclude: row.exclude ? JSON.parse(row.exclude) : null,
|
|
1177
|
+
updateCmd: row.update_cmd,
|
|
1178
|
+
languageHint: row.language_hint,
|
|
1179
|
+
syncedAt: row.synced_at,
|
|
1180
|
+
};
|
|
1181
|
+
}
|
|
1182
|
+
|
|
1183
|
+
function mapContextRow(row: DbContextRow): ContextRow {
|
|
1184
|
+
return {
|
|
1185
|
+
scopeType: row.scope_type,
|
|
1186
|
+
scopeKey: row.scope_key,
|
|
1187
|
+
text: row.text,
|
|
1188
|
+
syncedAt: row.synced_at,
|
|
1189
|
+
};
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
function mapDocumentRow(row: DbDocumentRow): DocumentRow {
|
|
1193
|
+
return {
|
|
1194
|
+
id: row.id,
|
|
1195
|
+
collection: row.collection,
|
|
1196
|
+
relPath: row.rel_path,
|
|
1197
|
+
sourceHash: row.source_hash,
|
|
1198
|
+
sourceMime: row.source_mime,
|
|
1199
|
+
sourceExt: row.source_ext,
|
|
1200
|
+
sourceSize: row.source_size,
|
|
1201
|
+
sourceMtime: row.source_mtime,
|
|
1202
|
+
docid: row.docid,
|
|
1203
|
+
uri: row.uri,
|
|
1204
|
+
title: row.title,
|
|
1205
|
+
mirrorHash: row.mirror_hash,
|
|
1206
|
+
converterId: row.converter_id,
|
|
1207
|
+
converterVersion: row.converter_version,
|
|
1208
|
+
languageHint: row.language_hint,
|
|
1209
|
+
active: row.active === 1,
|
|
1210
|
+
lastErrorCode: row.last_error_code,
|
|
1211
|
+
lastErrorMessage: row.last_error_message,
|
|
1212
|
+
lastErrorAt: row.last_error_at,
|
|
1213
|
+
createdAt: row.created_at,
|
|
1214
|
+
updatedAt: row.updated_at,
|
|
1215
|
+
};
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
function mapChunkRow(row: DbChunkRow): ChunkRow {
|
|
1219
|
+
return {
|
|
1220
|
+
mirrorHash: row.mirror_hash,
|
|
1221
|
+
seq: row.seq,
|
|
1222
|
+
pos: row.pos,
|
|
1223
|
+
text: row.text,
|
|
1224
|
+
startLine: row.start_line,
|
|
1225
|
+
endLine: row.end_line,
|
|
1226
|
+
language: row.language,
|
|
1227
|
+
tokenCount: row.token_count,
|
|
1228
|
+
createdAt: row.created_at,
|
|
1229
|
+
};
|
|
1230
|
+
}
|
|
1231
|
+
|
|
1232
|
+
function mapIngestErrorRow(row: DbIngestErrorRow): IngestErrorRow {
|
|
1233
|
+
return {
|
|
1234
|
+
id: row.id,
|
|
1235
|
+
collection: row.collection,
|
|
1236
|
+
relPath: row.rel_path,
|
|
1237
|
+
occurredAt: row.occurred_at,
|
|
1238
|
+
code: row.code,
|
|
1239
|
+
message: row.message,
|
|
1240
|
+
detailsJson: row.details_json,
|
|
1241
|
+
};
|
|
1242
|
+
}
|