@biaoo/tiangong-wiki 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/README.zh-CN.md +14 -0
- package/assets/sqlite-extensions/darwin-arm64/libsimple.dylib +0 -0
- package/assets/sqlite-extensions/darwin-x64/libsimple.dylib +0 -0
- package/assets/sqlite-extensions/linux-x64/libsimple.so +0 -0
- package/assets/sqlite-extensions/win32-x64/simple.dll +0 -0
- package/assets/wiki.config.default.json +3 -0
- package/dist/commands/rebuild-fts.js +41 -0
- package/dist/core/config.js +12 -0
- package/dist/core/db.js +92 -62
- package/dist/core/fts.js +68 -0
- package/dist/core/indexer.js +2 -2
- package/dist/core/runtime.js +1 -1
- package/dist/core/sqlite-extensions.js +78 -0
- package/dist/core/sync.js +4 -4
- package/dist/core/vault-processing.js +396 -176
- package/dist/core/vault.js +10 -0
- package/dist/daemon/server.js +51 -1
- package/dist/index.js +2 -0
- package/dist/operations/dashboard.js +5 -0
- package/dist/operations/query.js +4 -4
- package/dist/operations/write.js +74 -1
- package/package.json +1 -1
- package/references/cli-interface.md +26 -0
- package/references/troubleshooting.md +3 -1
package/README.md
CHANGED
|
@@ -89,10 +89,24 @@ That means commands still work best from inside a workspace, but they can also r
|
|
|
89
89
|
```bash
|
|
90
90
|
tiangong-wiki find --type concept --status active # structured query
|
|
91
91
|
tiangong-wiki fts "Bayesian" # full-text search
|
|
92
|
+
tiangong-wiki rebuild-fts --check # inspect FTS drift / metadata
|
|
93
|
+
tiangong-wiki rebuild-fts # rebuild FTS index explicitly
|
|
92
94
|
tiangong-wiki search "convergence conditions" # semantic search
|
|
93
95
|
tiangong-wiki graph bayes-theorem --depth 2 # graph traversal
|
|
94
96
|
```
|
|
95
97
|
|
|
98
|
+
`wiki.config.json` now supports:
|
|
99
|
+
|
|
100
|
+
```json
|
|
101
|
+
{
|
|
102
|
+
"fts": {
|
|
103
|
+
"tokenizer": "simple"
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
`simple` is now the default. Set `tokenizer` to `default` only if you want the legacy `Intl.Segmenter`-based FTS behavior instead of the bundled `wangfenjin/simple` SQLite extension.
|
|
109
|
+
|
|
96
110
|
```bash
|
|
97
111
|
tiangong-wiki daemon start # start the daemon in the background
|
|
98
112
|
tiangong-wiki dashboard # open dashboard in browser
|
package/README.zh-CN.md
CHANGED
|
@@ -89,10 +89,24 @@ tiangong-wiki sync # 索引 Markdown 文件
|
|
|
89
89
|
```bash
|
|
90
90
|
tiangong-wiki find --type concept --status active # 结构化查询
|
|
91
91
|
tiangong-wiki fts "贝叶斯" # 全文搜索
|
|
92
|
+
tiangong-wiki rebuild-fts --check # 检查 FTS 漂移 / 元数据
|
|
93
|
+
tiangong-wiki rebuild-fts # 显式重建 FTS 索引
|
|
92
94
|
tiangong-wiki search "优化算法的收敛条件" # 语义搜索
|
|
93
95
|
tiangong-wiki graph bayes-theorem --depth 2 # 图遍历
|
|
94
96
|
```
|
|
95
97
|
|
|
98
|
+
`wiki.config.json` 现在支持:
|
|
99
|
+
|
|
100
|
+
```json
|
|
101
|
+
{
|
|
102
|
+
"fts": {
|
|
103
|
+
"tokenizer": "simple"
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
现在默认就是 `simple`。只有在你想退回到基于 `Intl.Segmenter` 的旧 FTS 行为时,才需要把 `tokenizer` 显式设为 `default`。
|
|
109
|
+
|
|
96
110
|
```bash
|
|
97
111
|
tiangong-wiki daemon start # 后台启动 daemon
|
|
98
112
|
tiangong-wiki dashboard # 在浏览器中打开仪表盘
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { executeServerBackedOperation, requestDaemonJson } from "../daemon/client.js";
|
|
2
|
+
import { buildCliWriteActor } from "../daemon/write-actor.js";
|
|
3
|
+
import { rebuildFtsCommand } from "../operations/write.js";
|
|
4
|
+
import { writeJson } from "../utils/output.js";
|
|
5
|
+
export function registerRebuildFtsCommand(program) {
|
|
6
|
+
program
|
|
7
|
+
.command("rebuild-fts")
|
|
8
|
+
.description("Validate or rebuild the SQLite FTS index")
|
|
9
|
+
.option("--mode <mode>", "Override tokenizer mode for this command (default|simple)")
|
|
10
|
+
.option("--check", "Only inspect FTS drift and metadata without rebuilding")
|
|
11
|
+
.action(async (options) => {
|
|
12
|
+
const result = await executeServerBackedOperation({
|
|
13
|
+
kind: options.check === true ? "read" : "write",
|
|
14
|
+
local: () => rebuildFtsCommand(process.env, {
|
|
15
|
+
mode: options.mode ?? undefined,
|
|
16
|
+
check: options.check === true,
|
|
17
|
+
}),
|
|
18
|
+
remote: (endpoint) => options.check === true
|
|
19
|
+
? requestDaemonJson({
|
|
20
|
+
endpoint,
|
|
21
|
+
method: "GET",
|
|
22
|
+
path: "/fts/rebuild",
|
|
23
|
+
query: {
|
|
24
|
+
check: true,
|
|
25
|
+
mode: options.mode ?? undefined,
|
|
26
|
+
},
|
|
27
|
+
})
|
|
28
|
+
: requestDaemonJson({
|
|
29
|
+
endpoint,
|
|
30
|
+
method: "POST",
|
|
31
|
+
path: "/fts/rebuild",
|
|
32
|
+
body: {
|
|
33
|
+
actor: buildCliWriteActor(process.env),
|
|
34
|
+
mode: options.mode ?? undefined,
|
|
35
|
+
},
|
|
36
|
+
timeoutMs: 310_000,
|
|
37
|
+
}),
|
|
38
|
+
});
|
|
39
|
+
writeJson(result);
|
|
40
|
+
});
|
|
41
|
+
}
|
package/dist/core/config.js
CHANGED
|
@@ -16,6 +16,15 @@ function ensureStringArray(value, label) {
|
|
|
16
16
|
}
|
|
17
17
|
return value;
|
|
18
18
|
}
|
|
19
|
+
function ensureFtsTokenizerMode(value, label) {
|
|
20
|
+
if (value === undefined) {
|
|
21
|
+
return "simple";
|
|
22
|
+
}
|
|
23
|
+
if (value === "default" || value === "simple") {
|
|
24
|
+
return value;
|
|
25
|
+
}
|
|
26
|
+
throw new AppError(`${label} must be "default" or "simple"`, "config");
|
|
27
|
+
}
|
|
19
28
|
function ensureVaultFileTypes(value, label) {
|
|
20
29
|
const normalized = ensureStringArray(value, label).map((item, index) => {
|
|
21
30
|
const fileType = item.trim().replace(/^\./, "").toLowerCase();
|
|
@@ -81,6 +90,9 @@ export function loadConfig(configPath) {
|
|
|
81
90
|
assertCondition(Number.isInteger(raw.schemaVersion), "schemaVersion must be an integer", "config");
|
|
82
91
|
const baseConfig = {
|
|
83
92
|
schemaVersion: Number(raw.schemaVersion),
|
|
93
|
+
fts: {
|
|
94
|
+
tokenizer: ensureFtsTokenizerMode(ensureObject(raw.fts ?? {}, "fts").tokenizer, "fts.tokenizer"),
|
|
95
|
+
},
|
|
84
96
|
customColumns: ensureColumnMap(raw.customColumns ?? {}, "customColumns"),
|
|
85
97
|
defaultSummaryFields: ensureStringArray(raw.defaultSummaryFields ?? [], "defaultSummaryFields"),
|
|
86
98
|
vaultFileTypes: ensureVaultFileTypes(raw.vaultFileTypes ?? DEFAULT_VAULT_FILE_TYPES, "vaultFileTypes"),
|
package/dist/core/db.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import Database from "better-sqlite3";
|
|
2
|
-
import
|
|
2
|
+
import { buildFtsRow, createFtsTable, ftsTableMatchesMode, FTS_INDEX_VERSION, isLegacyExternalContentFts, } from "./fts.js";
|
|
3
|
+
import { loadSqliteExtensions } from "./sqlite-extensions.js";
|
|
3
4
|
import { AppError } from "../utils/errors.js";
|
|
4
|
-
import {
|
|
5
|
+
import { toOffsetIso } from "../utils/time.js";
|
|
5
6
|
export const SCHEMA_VERSION = "1";
|
|
6
|
-
const FTS_INDEX_VERSION = "2";
|
|
7
7
|
function tableExists(db, tableName) {
|
|
8
8
|
const row = db
|
|
9
9
|
.prepare("SELECT name FROM sqlite_master WHERE type IN ('table', 'view') AND name = ?")
|
|
@@ -28,45 +28,6 @@ function ensureTableColumns(db, tableName, definitions) {
|
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
30
|
}
|
|
31
|
-
function createFtsTable(db) {
|
|
32
|
-
db.exec(`
|
|
33
|
-
CREATE VIRTUAL TABLE pages_fts USING fts5(
|
|
34
|
-
title,
|
|
35
|
-
tags,
|
|
36
|
-
summary_text
|
|
37
|
-
);
|
|
38
|
-
`);
|
|
39
|
-
}
|
|
40
|
-
function normalizeTagsForFts(rawTags) {
|
|
41
|
-
if (!rawTags) {
|
|
42
|
-
return "";
|
|
43
|
-
}
|
|
44
|
-
try {
|
|
45
|
-
const parsed = JSON.parse(rawTags);
|
|
46
|
-
if (Array.isArray(parsed)) {
|
|
47
|
-
return parsed
|
|
48
|
-
.map((value) => String(value).trim())
|
|
49
|
-
.filter(Boolean)
|
|
50
|
-
.join(" ");
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
catch {
|
|
54
|
-
// Fall back to the stored value if legacy data is not valid JSON.
|
|
55
|
-
}
|
|
56
|
-
return rawTags;
|
|
57
|
-
}
|
|
58
|
-
function buildFtsRow(row) {
|
|
59
|
-
return {
|
|
60
|
-
rowid: row.rowid,
|
|
61
|
-
title: segmentForFts(row.title),
|
|
62
|
-
tags: segmentForFts(normalizeTagsForFts(row.tags)),
|
|
63
|
-
summary_text: segmentForFts(row.summaryText ?? ""),
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
function isLegacyExternalContentFts(db) {
|
|
67
|
-
const sql = getTableSql(db, "pages_fts");
|
|
68
|
-
return typeof sql === "string" && /content\s*=\s*'pages'/i.test(sql);
|
|
69
|
-
}
|
|
70
31
|
function ensureBaseTables(db, embeddingDimensions) {
|
|
71
32
|
db.exec(`
|
|
72
33
|
CREATE TABLE IF NOT EXISTS pages (
|
|
@@ -177,6 +138,7 @@ function ensureBaseTables(db, embeddingDimensions) {
|
|
|
177
138
|
decision: "TEXT",
|
|
178
139
|
result_manifest_path: "TEXT",
|
|
179
140
|
last_error_at: "TEXT",
|
|
141
|
+
last_error_code: "TEXT",
|
|
180
142
|
retry_after: "TEXT",
|
|
181
143
|
created_page_ids: "TEXT",
|
|
182
144
|
updated_page_ids: "TEXT",
|
|
@@ -239,7 +201,7 @@ export function setMetaValues(db, values) {
|
|
|
239
201
|
});
|
|
240
202
|
transaction(values);
|
|
241
203
|
}
|
|
242
|
-
export function rebuildFts(db) {
|
|
204
|
+
export function rebuildFts(db, config, extensionVersion) {
|
|
243
205
|
if (!tableExists(db, "pages_fts")) {
|
|
244
206
|
return;
|
|
245
207
|
}
|
|
@@ -249,27 +211,83 @@ export function rebuildFts(db) {
|
|
|
249
211
|
const transaction = db.transaction(() => {
|
|
250
212
|
clearStatement.run();
|
|
251
213
|
for (const row of rows) {
|
|
252
|
-
insertStatement.run(buildFtsRow(row));
|
|
214
|
+
insertStatement.run(buildFtsRow(row, config.fts.tokenizer));
|
|
253
215
|
}
|
|
254
216
|
});
|
|
255
217
|
transaction();
|
|
218
|
+
setMetaValues(db, {
|
|
219
|
+
fts_index_version: FTS_INDEX_VERSION,
|
|
220
|
+
fts_tokenizer_mode: config.fts.tokenizer,
|
|
221
|
+
fts_extension_version: extensionVersion,
|
|
222
|
+
fts_last_rebuild_at: toOffsetIso(),
|
|
223
|
+
});
|
|
256
224
|
}
|
|
257
|
-
function
|
|
225
|
+
export function inspectFtsIndex(db, config, extensionVersion) {
|
|
258
226
|
const hasTable = tableExists(db, "pages_fts");
|
|
259
|
-
const
|
|
260
|
-
const
|
|
261
|
-
const
|
|
262
|
-
|
|
263
|
-
|
|
227
|
+
const tableSql = hasTable ? getTableSql(db, "pages_fts") : null;
|
|
228
|
+
const storedIndexVersion = getMeta(db, "fts_index_version");
|
|
229
|
+
const storedTokenizerMode = getMeta(db, "fts_tokenizer_mode");
|
|
230
|
+
const storedExtensionVersion = getMeta(db, "fts_extension_version");
|
|
231
|
+
const lastRebuildAt = getMeta(db, "fts_last_rebuild_at");
|
|
232
|
+
const rowCount = hasTable
|
|
233
|
+
? (db.prepare("SELECT COUNT(*) AS count FROM pages_fts").get()?.count ?? 0)
|
|
234
|
+
: 0;
|
|
235
|
+
const problems = [];
|
|
236
|
+
const needsRecreate = !hasTable || isLegacyExternalContentFts(tableSql) || !ftsTableMatchesMode(tableSql, config.fts.tokenizer);
|
|
237
|
+
if (!hasTable) {
|
|
238
|
+
problems.push("pages_fts table is missing.");
|
|
239
|
+
}
|
|
240
|
+
else if (isLegacyExternalContentFts(tableSql)) {
|
|
241
|
+
problems.push("pages_fts uses the legacy external-content schema.");
|
|
242
|
+
}
|
|
243
|
+
else if (!ftsTableMatchesMode(tableSql, config.fts.tokenizer)) {
|
|
244
|
+
problems.push(`pages_fts tokenizer schema does not match configured mode ${config.fts.tokenizer}.`);
|
|
245
|
+
}
|
|
246
|
+
let needsRebuild = needsRecreate;
|
|
247
|
+
if (storedIndexVersion !== FTS_INDEX_VERSION) {
|
|
248
|
+
problems.push(storedIndexVersion === null
|
|
249
|
+
? "fts_index_version metadata is missing."
|
|
250
|
+
: `fts_index_version mismatch: expected ${FTS_INDEX_VERSION}, found ${storedIndexVersion}.`);
|
|
251
|
+
needsRebuild = true;
|
|
252
|
+
}
|
|
253
|
+
if (storedTokenizerMode !== config.fts.tokenizer) {
|
|
254
|
+
problems.push(storedTokenizerMode === null
|
|
255
|
+
? "fts_tokenizer_mode metadata is missing."
|
|
256
|
+
: `fts_tokenizer_mode mismatch: expected ${config.fts.tokenizer}, found ${storedTokenizerMode}.`);
|
|
257
|
+
needsRebuild = true;
|
|
258
|
+
}
|
|
259
|
+
if (storedExtensionVersion !== extensionVersion) {
|
|
260
|
+
if (!(storedExtensionVersion === null && extensionVersion === null)) {
|
|
261
|
+
problems.push(storedExtensionVersion === null
|
|
262
|
+
? `fts_extension_version metadata is missing for configured mode ${config.fts.tokenizer}.`
|
|
263
|
+
: `fts_extension_version mismatch: expected ${extensionVersion ?? "null"}, found ${storedExtensionVersion}.`);
|
|
264
|
+
needsRebuild = true;
|
|
265
|
+
}
|
|
264
266
|
}
|
|
265
|
-
|
|
266
|
-
|
|
267
|
+
return {
|
|
268
|
+
mode: config.fts.tokenizer,
|
|
269
|
+
hasTable,
|
|
270
|
+
rowCount,
|
|
271
|
+
expectedIndexVersion: FTS_INDEX_VERSION,
|
|
272
|
+
storedIndexVersion,
|
|
273
|
+
storedTokenizerMode,
|
|
274
|
+
storedExtensionVersion,
|
|
275
|
+
lastRebuildAt,
|
|
276
|
+
needsRecreate,
|
|
277
|
+
needsRebuild,
|
|
278
|
+
problems,
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
function ensureFtsTable(db, config, extensionVersion) {
|
|
282
|
+
const inspection = inspectFtsIndex(db, config, extensionVersion);
|
|
283
|
+
if (inspection.needsRecreate && inspection.hasTable) {
|
|
284
|
+
db.exec("DROP TABLE pages_fts");
|
|
267
285
|
}
|
|
268
|
-
if (
|
|
269
|
-
|
|
286
|
+
if (inspection.needsRecreate) {
|
|
287
|
+
createFtsTable(db, config.fts.tokenizer);
|
|
270
288
|
}
|
|
271
|
-
if (needsRebuild
|
|
272
|
-
|
|
289
|
+
if (inspection.needsRebuild) {
|
|
290
|
+
rebuildFts(db, config, extensionVersion);
|
|
273
291
|
}
|
|
274
292
|
}
|
|
275
293
|
export function resetVectorTable(db, embeddingDimensions) {
|
|
@@ -282,7 +300,7 @@ export function resetVectorTable(db, embeddingDimensions) {
|
|
|
282
300
|
);
|
|
283
301
|
`);
|
|
284
302
|
}
|
|
285
|
-
export function clearAllIndexedData(db) {
|
|
303
|
+
export function clearAllIndexedData(db, config, extensionVersion) {
|
|
286
304
|
db.exec(`
|
|
287
305
|
DELETE FROM edges;
|
|
288
306
|
DELETE FROM pages;
|
|
@@ -294,17 +312,21 @@ export function clearAllIndexedData(db) {
|
|
|
294
312
|
db.exec("DELETE FROM vec_pages");
|
|
295
313
|
}
|
|
296
314
|
if (tableExists(db, "pages_fts")) {
|
|
297
|
-
rebuildFts(db);
|
|
315
|
+
rebuildFts(db, config, extensionVersion);
|
|
298
316
|
}
|
|
299
317
|
db.prepare("DELETE FROM sync_meta WHERE key IN ('last_sync_at', 'last_sync_id', 'last_full_rebuild_at', 'embedding_profile')").run();
|
|
300
318
|
}
|
|
301
|
-
export function openDb(dbPath, config, embeddingDimensions) {
|
|
319
|
+
export function openDb(dbPath, config, embeddingDimensions, packageRoot, options = {}) {
|
|
302
320
|
const db = new Database(dbPath);
|
|
303
321
|
db.pragma("journal_mode = WAL");
|
|
304
322
|
db.pragma("foreign_keys = ON");
|
|
305
|
-
|
|
323
|
+
const extensionResult = loadSqliteExtensions(db, config, packageRoot);
|
|
324
|
+
const ftsMetadataExtensionVersion = config.fts.tokenizer === "simple" ? extensionResult.loadedSimpleVersion : null;
|
|
306
325
|
ensureBaseTables(db, embeddingDimensions);
|
|
307
|
-
|
|
326
|
+
const initialFtsInspection = inspectFtsIndex(db, config, ftsMetadataExtensionVersion);
|
|
327
|
+
if (options.ensureFts !== false) {
|
|
328
|
+
ensureFtsTable(db, config, ftsMetadataExtensionVersion);
|
|
329
|
+
}
|
|
308
330
|
const vectorDimensions = getVectorTableDimensions(db);
|
|
309
331
|
const vectorDimensionsChanged = vectorDimensions !== null && vectorDimensions !== embeddingDimensions;
|
|
310
332
|
const storedSchemaVersion = getMeta(db, "schema_version");
|
|
@@ -319,5 +341,13 @@ export function openDb(dbPath, config, embeddingDimensions) {
|
|
|
319
341
|
schema_version: SCHEMA_VERSION,
|
|
320
342
|
...(storedConfigVersion === null ? { config_version: config.configVersion } : {}),
|
|
321
343
|
});
|
|
322
|
-
return {
|
|
344
|
+
return {
|
|
345
|
+
db,
|
|
346
|
+
configChanged,
|
|
347
|
+
vectorDimensions,
|
|
348
|
+
vectorDimensionsChanged,
|
|
349
|
+
ftsExtensionVersion: ftsMetadataExtensionVersion,
|
|
350
|
+
simpleExtensionPath: extensionResult.simpleExtensionPath,
|
|
351
|
+
initialFtsInspection,
|
|
352
|
+
};
|
|
323
353
|
}
|
package/dist/core/fts.js
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { normalizeFtsQuery, segmentForFts } from "../utils/segmenter.js";
|
|
2
|
+
export const FTS_INDEX_VERSION = "3";
|
|
3
|
+
export function createFtsTable(db, mode) {
|
|
4
|
+
db.exec(`
|
|
5
|
+
CREATE VIRTUAL TABLE pages_fts USING fts5(
|
|
6
|
+
title,
|
|
7
|
+
tags,
|
|
8
|
+
summary_text
|
|
9
|
+
${mode === "simple" ? ", tokenize = 'simple'" : ""}
|
|
10
|
+
);
|
|
11
|
+
`);
|
|
12
|
+
}
|
|
13
|
+
export function normalizeTagsForFts(rawTags) {
|
|
14
|
+
if (!rawTags) {
|
|
15
|
+
return "";
|
|
16
|
+
}
|
|
17
|
+
try {
|
|
18
|
+
const parsed = JSON.parse(rawTags);
|
|
19
|
+
if (Array.isArray(parsed)) {
|
|
20
|
+
return parsed
|
|
21
|
+
.map((value) => String(value).trim())
|
|
22
|
+
.filter(Boolean)
|
|
23
|
+
.join(" ");
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
// Fall back to the stored value if legacy data is not valid JSON.
|
|
28
|
+
}
|
|
29
|
+
return rawTags;
|
|
30
|
+
}
|
|
31
|
+
export function buildFtsRow(row, mode) {
|
|
32
|
+
const tags = normalizeTagsForFts(row.tags);
|
|
33
|
+
if (mode === "simple") {
|
|
34
|
+
return {
|
|
35
|
+
rowid: row.rowid,
|
|
36
|
+
title: row.title,
|
|
37
|
+
tags,
|
|
38
|
+
summary_text: row.summaryText ?? "",
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
return {
|
|
42
|
+
rowid: row.rowid,
|
|
43
|
+
title: segmentForFts(row.title),
|
|
44
|
+
tags: segmentForFts(tags),
|
|
45
|
+
summary_text: segmentForFts(row.summaryText ?? ""),
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
export function buildFtsQueryPlan(query, mode) {
|
|
49
|
+
if (mode === "simple") {
|
|
50
|
+
return {
|
|
51
|
+
whereClause: "pages_fts MATCH simple_query(?)",
|
|
52
|
+
params: [query.trim()],
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
return {
|
|
56
|
+
whereClause: "pages_fts MATCH ?",
|
|
57
|
+
params: [normalizeFtsQuery(query)],
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
export function isLegacyExternalContentFts(sql) {
|
|
61
|
+
return typeof sql === "string" && /content\s*=\s*'pages'/i.test(sql);
|
|
62
|
+
}
|
|
63
|
+
export function isSimpleTokenizerSql(sql) {
|
|
64
|
+
return typeof sql === "string" && /tokenize\s*=\s*'simple'/i.test(sql);
|
|
65
|
+
}
|
|
66
|
+
export function ftsTableMatchesMode(sql, mode) {
|
|
67
|
+
return mode === "simple" ? isSimpleTokenizerSql(sql) : !isSimpleTokenizerSql(sql);
|
|
68
|
+
}
|
package/dist/core/indexer.js
CHANGED
|
@@ -145,7 +145,7 @@ function buildUpdateStatement(config) {
|
|
|
145
145
|
];
|
|
146
146
|
return `UPDATE pages SET ${columns.map((column) => `${column} = @${column}`).join(", ")} WHERE id = @id`;
|
|
147
147
|
}
|
|
148
|
-
export function applyChanges(db, changes, wikiPath, config) {
|
|
148
|
+
export function applyChanges(db, changes, wikiPath, config, ftsExtensionVersion) {
|
|
149
149
|
const parseResults = [...changes.added, ...changes.modified].map((entry) => ({
|
|
150
150
|
entry,
|
|
151
151
|
result: parsePage(entry.filePath, wikiPath, config),
|
|
@@ -227,7 +227,7 @@ export function applyChanges(db, changes, wikiPath, config) {
|
|
|
227
227
|
deleted.push(page.id);
|
|
228
228
|
}
|
|
229
229
|
if (hasContentChanges) {
|
|
230
|
-
rebuildFts(db);
|
|
230
|
+
rebuildFts(db, config, ftsExtensionVersion);
|
|
231
231
|
}
|
|
232
232
|
});
|
|
233
233
|
transaction();
|
package/dist/core/runtime.js
CHANGED
|
@@ -15,6 +15,6 @@ export function loadRuntimeConfig(env = process.env) {
|
|
|
15
15
|
export function openRuntimeDb(env = process.env) {
|
|
16
16
|
const { paths, config } = loadRuntimeConfig(env);
|
|
17
17
|
const embeddingClient = EmbeddingClient.fromEnv(env);
|
|
18
|
-
const { db, vectorDimensions, vectorDimensionsChanged } = openDb(paths.dbPath, config, embeddingClient?.settings.dimensions ?? getEmbeddingDimensionFromEnv(env));
|
|
18
|
+
const { db, vectorDimensions, vectorDimensionsChanged } = openDb(paths.dbPath, config, embeddingClient?.settings.dimensions ?? getEmbeddingDimensionFromEnv(env), paths.packageRoot);
|
|
19
19
|
return { db, paths, config, embeddingClient, vectorDimensions, vectorDimensionsChanged };
|
|
20
20
|
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import * as sqliteVec from "sqlite-vec";
|
|
3
|
+
import { AppError } from "../utils/errors.js";
|
|
4
|
+
import { pathExistsSync } from "../utils/fs.js";
|
|
5
|
+
import { isSimpleTokenizerSql } from "./fts.js";
|
|
6
|
+
import { getPackageRoot } from "./paths.js";
|
|
7
|
+
export const BUNDLED_SIMPLE_EXTENSION_VERSION = "v0.7.1";
|
|
8
|
+
const SIMPLE_ASSET_MAP = {
|
|
9
|
+
darwin: {
|
|
10
|
+
arm64: "assets/sqlite-extensions/darwin-arm64/libsimple.dylib",
|
|
11
|
+
x64: "assets/sqlite-extensions/darwin-x64/libsimple.dylib",
|
|
12
|
+
},
|
|
13
|
+
linux: {
|
|
14
|
+
x64: "assets/sqlite-extensions/linux-x64/libsimple.so",
|
|
15
|
+
},
|
|
16
|
+
win32: {
|
|
17
|
+
x64: "assets/sqlite-extensions/win32-x64/simple.dll",
|
|
18
|
+
},
|
|
19
|
+
};
|
|
20
|
+
function getExistingFtsSql(db) {
|
|
21
|
+
const row = db
|
|
22
|
+
.prepare("SELECT sql FROM sqlite_master WHERE type IN ('table', 'view') AND name = 'pages_fts'")
|
|
23
|
+
.get();
|
|
24
|
+
return row?.sql ?? null;
|
|
25
|
+
}
|
|
26
|
+
function resolveBundledSimpleExtensionPath(packageRoot) {
|
|
27
|
+
const byArch = SIMPLE_ASSET_MAP[process.platform];
|
|
28
|
+
if (!byArch) {
|
|
29
|
+
throw new AppError(`Bundled simple extension is not available for platform ${process.platform}-${process.arch}.`, "config", {
|
|
30
|
+
platform: process.platform,
|
|
31
|
+
arch: process.arch,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
const relativePath = byArch[process.arch];
|
|
35
|
+
if (!relativePath) {
|
|
36
|
+
throw new AppError(`Bundled simple extension is not available for platform ${process.platform}-${process.arch}.`, "config", {
|
|
37
|
+
platform: process.platform,
|
|
38
|
+
arch: process.arch,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
const extensionPath = path.join(packageRoot, relativePath);
|
|
42
|
+
if (!pathExistsSync(extensionPath)) {
|
|
43
|
+
throw new AppError(`Bundled simple extension not found: ${extensionPath}`, "runtime", {
|
|
44
|
+
platform: process.platform,
|
|
45
|
+
arch: process.arch,
|
|
46
|
+
extensionPath,
|
|
47
|
+
version: BUNDLED_SIMPLE_EXTENSION_VERSION,
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
return extensionPath;
|
|
51
|
+
}
|
|
52
|
+
export function loadSqliteExtensions(db, config, packageRoot) {
|
|
53
|
+
sqliteVec.load(db);
|
|
54
|
+
const shouldLoadSimple = config.fts.tokenizer === "simple" || isSimpleTokenizerSql(getExistingFtsSql(db));
|
|
55
|
+
if (!shouldLoadSimple) {
|
|
56
|
+
return {
|
|
57
|
+
simpleLoaded: false,
|
|
58
|
+
loadedSimpleVersion: null,
|
|
59
|
+
simpleExtensionPath: null,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
const simpleExtensionPath = resolveBundledSimpleExtensionPath(packageRoot ?? getPackageRoot());
|
|
63
|
+
try {
|
|
64
|
+
db.loadExtension(simpleExtensionPath);
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
throw new AppError(`Failed to load bundled simple extension: ${simpleExtensionPath}`, "runtime", {
|
|
68
|
+
extensionPath: simpleExtensionPath,
|
|
69
|
+
version: BUNDLED_SIMPLE_EXTENSION_VERSION,
|
|
70
|
+
cause: error instanceof Error ? error.message : String(error),
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
return {
|
|
74
|
+
simpleLoaded: true,
|
|
75
|
+
loadedSimpleVersion: BUNDLED_SIMPLE_EXTENSION_VERSION,
|
|
76
|
+
simpleExtensionPath,
|
|
77
|
+
};
|
|
78
|
+
}
|
package/dist/core/sync.js
CHANGED
|
@@ -78,7 +78,7 @@ export async function syncWorkspace(options = {}) {
|
|
|
78
78
|
}
|
|
79
79
|
const config = loadConfig(runtimePaths.configPath);
|
|
80
80
|
const embeddingClient = EmbeddingClient.fromEnv(env);
|
|
81
|
-
const { db, configChanged, vectorDimensionsChanged } = openDb(runtimePaths.dbPath, config, embeddingClient?.settings.dimensions ?? getEmbeddingDimension(env));
|
|
81
|
+
const { db, configChanged, vectorDimensionsChanged, ftsExtensionVersion } = openDb(runtimePaths.dbPath, config, embeddingClient?.settings.dimensions ?? getEmbeddingDimension(env), runtimePaths.packageRoot);
|
|
82
82
|
try {
|
|
83
83
|
let mode = options.targetPaths && options.targetPaths.length > 0 && !options.force ? "path" : "full";
|
|
84
84
|
let upgradedToFullSync = false;
|
|
@@ -93,7 +93,7 @@ export async function syncWorkspace(options = {}) {
|
|
|
93
93
|
throw new AppError("Embedding profile changed, cannot skip embedding.", "config");
|
|
94
94
|
}
|
|
95
95
|
if (options.force) {
|
|
96
|
-
clearAllIndexedData(db);
|
|
96
|
+
clearAllIndexedData(db, config, ftsExtensionVersion);
|
|
97
97
|
mode = "full";
|
|
98
98
|
}
|
|
99
99
|
const changes = mode === "path"
|
|
@@ -106,7 +106,7 @@ export async function syncWorkspace(options = {}) {
|
|
|
106
106
|
changes.unchanged.length === 0) {
|
|
107
107
|
throw new AppError(`No page matched the requested --path value(s).`, "not_found");
|
|
108
108
|
}
|
|
109
|
-
const applyResult = applyChanges(db, changes, runtimePaths.wikiPath, config);
|
|
109
|
+
const applyResult = applyChanges(db, changes, runtimePaths.wikiPath, config, ftsExtensionVersion);
|
|
110
110
|
if (applyResult.parseErrors.length > 0) {
|
|
111
111
|
throw new AppError("Failed to parse one or more wiki pages during sync.", "runtime", {
|
|
112
112
|
parseErrors: applyResult.parseErrors,
|
|
@@ -195,7 +195,7 @@ export async function embedPendingPages(env = process.env) {
|
|
|
195
195
|
if (!embeddingClient) {
|
|
196
196
|
return;
|
|
197
197
|
}
|
|
198
|
-
const { db } = openDb(runtimePaths.dbPath, config, embeddingClient.settings.dimensions);
|
|
198
|
+
const { db } = openDb(runtimePaths.dbPath, config, embeddingClient.settings.dimensions, runtimePaths.packageRoot);
|
|
199
199
|
try {
|
|
200
200
|
const targets = getEmbeddingTargets(db, false, [], []);
|
|
201
201
|
const result = await embedPages(db, embeddingClient, targets);
|