@biaoo/tiangong-wiki 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/README.zh-CN.md +14 -0
- package/assets/sqlite-extensions/darwin-arm64/libsimple.dylib +0 -0
- package/assets/sqlite-extensions/darwin-x64/libsimple.dylib +0 -0
- package/assets/sqlite-extensions/linux-x64/libsimple.so +0 -0
- package/assets/sqlite-extensions/win32-x64/simple.dll +0 -0
- package/assets/wiki.config.default.json +3 -0
- package/dist/commands/rebuild-fts.js +41 -0
- package/dist/core/config.js +12 -0
- package/dist/core/db.js +91 -62
- package/dist/core/fts.js +68 -0
- package/dist/core/indexer.js +11 -11
- package/dist/core/runtime.js +1 -1
- package/dist/core/sqlite-extensions.js +78 -0
- package/dist/core/sync.js +4 -4
- package/dist/core/vault-processing.js +3 -3
- package/dist/daemon/server.js +50 -1
- package/dist/index.js +2 -0
- package/dist/operations/query.js +4 -4
- package/dist/operations/write.js +74 -1
- package/package.json +1 -1
- package/references/cli-interface.md +26 -0
package/README.md
CHANGED
|
@@ -89,10 +89,24 @@ That means commands still work best from inside a workspace, but they can also r
|
|
|
89
89
|
```bash
|
|
90
90
|
tiangong-wiki find --type concept --status active # structured query
|
|
91
91
|
tiangong-wiki fts "Bayesian" # full-text search
|
|
92
|
+
tiangong-wiki rebuild-fts --check # inspect FTS drift / metadata
|
|
93
|
+
tiangong-wiki rebuild-fts # rebuild FTS index explicitly
|
|
92
94
|
tiangong-wiki search "convergence conditions" # semantic search
|
|
93
95
|
tiangong-wiki graph bayes-theorem --depth 2 # graph traversal
|
|
94
96
|
```
|
|
95
97
|
|
|
98
|
+
`wiki.config.json` now supports:
|
|
99
|
+
|
|
100
|
+
```json
|
|
101
|
+
{
|
|
102
|
+
"fts": {
|
|
103
|
+
"tokenizer": "simple"
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
`simple` is now the default. Set `tokenizer` to `default` only if you want the legacy `Intl.Segmenter`-based FTS behavior instead of the bundled `wangfenjin/simple` SQLite extension.
|
|
109
|
+
|
|
96
110
|
```bash
|
|
97
111
|
tiangong-wiki daemon start # start the daemon in the background
|
|
98
112
|
tiangong-wiki dashboard # open dashboard in browser
|
package/README.zh-CN.md
CHANGED
|
@@ -89,10 +89,24 @@ tiangong-wiki sync # 索引 Markdown 文件
|
|
|
89
89
|
```bash
|
|
90
90
|
tiangong-wiki find --type concept --status active # 结构化查询
|
|
91
91
|
tiangong-wiki fts "贝叶斯" # 全文搜索
|
|
92
|
+
tiangong-wiki rebuild-fts --check # 检查 FTS 漂移 / 元数据
|
|
93
|
+
tiangong-wiki rebuild-fts # 显式重建 FTS 索引
|
|
92
94
|
tiangong-wiki search "优化算法的收敛条件" # 语义搜索
|
|
93
95
|
tiangong-wiki graph bayes-theorem --depth 2 # 图遍历
|
|
94
96
|
```
|
|
95
97
|
|
|
98
|
+
`wiki.config.json` 现在支持:
|
|
99
|
+
|
|
100
|
+
```json
|
|
101
|
+
{
|
|
102
|
+
"fts": {
|
|
103
|
+
"tokenizer": "simple"
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
现在默认就是 `simple`。只有在你想退回到基于 `Intl.Segmenter` 的旧 FTS 行为时,才需要把 `tokenizer` 显式设为 `default`。
|
|
109
|
+
|
|
96
110
|
```bash
|
|
97
111
|
tiangong-wiki daemon start # 后台启动 daemon
|
|
98
112
|
tiangong-wiki dashboard # 在浏览器中打开仪表盘
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { executeServerBackedOperation, requestDaemonJson } from "../daemon/client.js";
|
|
2
|
+
import { buildCliWriteActor } from "../daemon/write-actor.js";
|
|
3
|
+
import { rebuildFtsCommand } from "../operations/write.js";
|
|
4
|
+
import { writeJson } from "../utils/output.js";
|
|
5
|
+
export function registerRebuildFtsCommand(program) {
|
|
6
|
+
program
|
|
7
|
+
.command("rebuild-fts")
|
|
8
|
+
.description("Validate or rebuild the SQLite FTS index")
|
|
9
|
+
.option("--mode <mode>", "Override tokenizer mode for this command (default|simple)")
|
|
10
|
+
.option("--check", "Only inspect FTS drift and metadata without rebuilding")
|
|
11
|
+
.action(async (options) => {
|
|
12
|
+
const result = await executeServerBackedOperation({
|
|
13
|
+
kind: options.check === true ? "read" : "write",
|
|
14
|
+
local: () => rebuildFtsCommand(process.env, {
|
|
15
|
+
mode: options.mode ?? undefined,
|
|
16
|
+
check: options.check === true,
|
|
17
|
+
}),
|
|
18
|
+
remote: (endpoint) => options.check === true
|
|
19
|
+
? requestDaemonJson({
|
|
20
|
+
endpoint,
|
|
21
|
+
method: "GET",
|
|
22
|
+
path: "/fts/rebuild",
|
|
23
|
+
query: {
|
|
24
|
+
check: true,
|
|
25
|
+
mode: options.mode ?? undefined,
|
|
26
|
+
},
|
|
27
|
+
})
|
|
28
|
+
: requestDaemonJson({
|
|
29
|
+
endpoint,
|
|
30
|
+
method: "POST",
|
|
31
|
+
path: "/fts/rebuild",
|
|
32
|
+
body: {
|
|
33
|
+
actor: buildCliWriteActor(process.env),
|
|
34
|
+
mode: options.mode ?? undefined,
|
|
35
|
+
},
|
|
36
|
+
timeoutMs: 310_000,
|
|
37
|
+
}),
|
|
38
|
+
});
|
|
39
|
+
writeJson(result);
|
|
40
|
+
});
|
|
41
|
+
}
|
package/dist/core/config.js
CHANGED
|
@@ -16,6 +16,15 @@ function ensureStringArray(value, label) {
|
|
|
16
16
|
}
|
|
17
17
|
return value;
|
|
18
18
|
}
|
|
19
|
+
function ensureFtsTokenizerMode(value, label) {
|
|
20
|
+
if (value === undefined) {
|
|
21
|
+
return "simple";
|
|
22
|
+
}
|
|
23
|
+
if (value === "default" || value === "simple") {
|
|
24
|
+
return value;
|
|
25
|
+
}
|
|
26
|
+
throw new AppError(`${label} must be "default" or "simple"`, "config");
|
|
27
|
+
}
|
|
19
28
|
function ensureVaultFileTypes(value, label) {
|
|
20
29
|
const normalized = ensureStringArray(value, label).map((item, index) => {
|
|
21
30
|
const fileType = item.trim().replace(/^\./, "").toLowerCase();
|
|
@@ -81,6 +90,9 @@ export function loadConfig(configPath) {
|
|
|
81
90
|
assertCondition(Number.isInteger(raw.schemaVersion), "schemaVersion must be an integer", "config");
|
|
82
91
|
const baseConfig = {
|
|
83
92
|
schemaVersion: Number(raw.schemaVersion),
|
|
93
|
+
fts: {
|
|
94
|
+
tokenizer: ensureFtsTokenizerMode(ensureObject(raw.fts ?? {}, "fts").tokenizer, "fts.tokenizer"),
|
|
95
|
+
},
|
|
84
96
|
customColumns: ensureColumnMap(raw.customColumns ?? {}, "customColumns"),
|
|
85
97
|
defaultSummaryFields: ensureStringArray(raw.defaultSummaryFields ?? [], "defaultSummaryFields"),
|
|
86
98
|
vaultFileTypes: ensureVaultFileTypes(raw.vaultFileTypes ?? DEFAULT_VAULT_FILE_TYPES, "vaultFileTypes"),
|
package/dist/core/db.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import Database from "better-sqlite3";
|
|
2
|
-
import
|
|
2
|
+
import { buildFtsRow, createFtsTable, ftsTableMatchesMode, FTS_INDEX_VERSION, isLegacyExternalContentFts, } from "./fts.js";
|
|
3
|
+
import { loadSqliteExtensions } from "./sqlite-extensions.js";
|
|
3
4
|
import { AppError } from "../utils/errors.js";
|
|
4
|
-
import {
|
|
5
|
+
import { toOffsetIso } from "../utils/time.js";
|
|
5
6
|
export const SCHEMA_VERSION = "1";
|
|
6
|
-
const FTS_INDEX_VERSION = "2";
|
|
7
7
|
function tableExists(db, tableName) {
|
|
8
8
|
const row = db
|
|
9
9
|
.prepare("SELECT name FROM sqlite_master WHERE type IN ('table', 'view') AND name = ?")
|
|
@@ -28,45 +28,6 @@ function ensureTableColumns(db, tableName, definitions) {
|
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
30
|
}
|
|
31
|
-
function createFtsTable(db) {
|
|
32
|
-
db.exec(`
|
|
33
|
-
CREATE VIRTUAL TABLE pages_fts USING fts5(
|
|
34
|
-
title,
|
|
35
|
-
tags,
|
|
36
|
-
summary_text
|
|
37
|
-
);
|
|
38
|
-
`);
|
|
39
|
-
}
|
|
40
|
-
function normalizeTagsForFts(rawTags) {
|
|
41
|
-
if (!rawTags) {
|
|
42
|
-
return "";
|
|
43
|
-
}
|
|
44
|
-
try {
|
|
45
|
-
const parsed = JSON.parse(rawTags);
|
|
46
|
-
if (Array.isArray(parsed)) {
|
|
47
|
-
return parsed
|
|
48
|
-
.map((value) => String(value).trim())
|
|
49
|
-
.filter(Boolean)
|
|
50
|
-
.join(" ");
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
catch {
|
|
54
|
-
// Fall back to the stored value if legacy data is not valid JSON.
|
|
55
|
-
}
|
|
56
|
-
return rawTags;
|
|
57
|
-
}
|
|
58
|
-
function buildFtsRow(row) {
|
|
59
|
-
return {
|
|
60
|
-
rowid: row.rowid,
|
|
61
|
-
title: segmentForFts(row.title),
|
|
62
|
-
tags: segmentForFts(normalizeTagsForFts(row.tags)),
|
|
63
|
-
summary_text: segmentForFts(row.summaryText ?? ""),
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
function isLegacyExternalContentFts(db) {
|
|
67
|
-
const sql = getTableSql(db, "pages_fts");
|
|
68
|
-
return typeof sql === "string" && /content\s*=\s*'pages'/i.test(sql);
|
|
69
|
-
}
|
|
70
31
|
function ensureBaseTables(db, embeddingDimensions) {
|
|
71
32
|
db.exec(`
|
|
72
33
|
CREATE TABLE IF NOT EXISTS pages (
|
|
@@ -240,7 +201,7 @@ export function setMetaValues(db, values) {
|
|
|
240
201
|
});
|
|
241
202
|
transaction(values);
|
|
242
203
|
}
|
|
243
|
-
export function rebuildFts(db) {
|
|
204
|
+
export function rebuildFts(db, config, extensionVersion) {
|
|
244
205
|
if (!tableExists(db, "pages_fts")) {
|
|
245
206
|
return;
|
|
246
207
|
}
|
|
@@ -250,27 +211,83 @@ export function rebuildFts(db) {
|
|
|
250
211
|
const transaction = db.transaction(() => {
|
|
251
212
|
clearStatement.run();
|
|
252
213
|
for (const row of rows) {
|
|
253
|
-
insertStatement.run(buildFtsRow(row));
|
|
214
|
+
insertStatement.run(buildFtsRow(row, config.fts.tokenizer));
|
|
254
215
|
}
|
|
255
216
|
});
|
|
256
217
|
transaction();
|
|
218
|
+
setMetaValues(db, {
|
|
219
|
+
fts_index_version: FTS_INDEX_VERSION,
|
|
220
|
+
fts_tokenizer_mode: config.fts.tokenizer,
|
|
221
|
+
fts_extension_version: extensionVersion,
|
|
222
|
+
fts_last_rebuild_at: toOffsetIso(),
|
|
223
|
+
});
|
|
257
224
|
}
|
|
258
|
-
function
|
|
225
|
+
export function inspectFtsIndex(db, config, extensionVersion) {
|
|
259
226
|
const hasTable = tableExists(db, "pages_fts");
|
|
260
|
-
const
|
|
261
|
-
const
|
|
262
|
-
const
|
|
263
|
-
|
|
264
|
-
|
|
227
|
+
const tableSql = hasTable ? getTableSql(db, "pages_fts") : null;
|
|
228
|
+
const storedIndexVersion = getMeta(db, "fts_index_version");
|
|
229
|
+
const storedTokenizerMode = getMeta(db, "fts_tokenizer_mode");
|
|
230
|
+
const storedExtensionVersion = getMeta(db, "fts_extension_version");
|
|
231
|
+
const lastRebuildAt = getMeta(db, "fts_last_rebuild_at");
|
|
232
|
+
const rowCount = hasTable
|
|
233
|
+
? (db.prepare("SELECT COUNT(*) AS count FROM pages_fts").get()?.count ?? 0)
|
|
234
|
+
: 0;
|
|
235
|
+
const problems = [];
|
|
236
|
+
const needsRecreate = !hasTable || isLegacyExternalContentFts(tableSql) || !ftsTableMatchesMode(tableSql, config.fts.tokenizer);
|
|
237
|
+
if (!hasTable) {
|
|
238
|
+
problems.push("pages_fts table is missing.");
|
|
239
|
+
}
|
|
240
|
+
else if (isLegacyExternalContentFts(tableSql)) {
|
|
241
|
+
problems.push("pages_fts uses the legacy external-content schema.");
|
|
242
|
+
}
|
|
243
|
+
else if (!ftsTableMatchesMode(tableSql, config.fts.tokenizer)) {
|
|
244
|
+
problems.push(`pages_fts tokenizer schema does not match configured mode ${config.fts.tokenizer}.`);
|
|
245
|
+
}
|
|
246
|
+
let needsRebuild = needsRecreate;
|
|
247
|
+
if (storedIndexVersion !== FTS_INDEX_VERSION) {
|
|
248
|
+
problems.push(storedIndexVersion === null
|
|
249
|
+
? "fts_index_version metadata is missing."
|
|
250
|
+
: `fts_index_version mismatch: expected ${FTS_INDEX_VERSION}, found ${storedIndexVersion}.`);
|
|
251
|
+
needsRebuild = true;
|
|
252
|
+
}
|
|
253
|
+
if (storedTokenizerMode !== config.fts.tokenizer) {
|
|
254
|
+
problems.push(storedTokenizerMode === null
|
|
255
|
+
? "fts_tokenizer_mode metadata is missing."
|
|
256
|
+
: `fts_tokenizer_mode mismatch: expected ${config.fts.tokenizer}, found ${storedTokenizerMode}.`);
|
|
257
|
+
needsRebuild = true;
|
|
258
|
+
}
|
|
259
|
+
if (storedExtensionVersion !== extensionVersion) {
|
|
260
|
+
if (!(storedExtensionVersion === null && extensionVersion === null)) {
|
|
261
|
+
problems.push(storedExtensionVersion === null
|
|
262
|
+
? `fts_extension_version metadata is missing for configured mode ${config.fts.tokenizer}.`
|
|
263
|
+
: `fts_extension_version mismatch: expected ${extensionVersion ?? "null"}, found ${storedExtensionVersion}.`);
|
|
264
|
+
needsRebuild = true;
|
|
265
|
+
}
|
|
265
266
|
}
|
|
266
|
-
|
|
267
|
-
|
|
267
|
+
return {
|
|
268
|
+
mode: config.fts.tokenizer,
|
|
269
|
+
hasTable,
|
|
270
|
+
rowCount,
|
|
271
|
+
expectedIndexVersion: FTS_INDEX_VERSION,
|
|
272
|
+
storedIndexVersion,
|
|
273
|
+
storedTokenizerMode,
|
|
274
|
+
storedExtensionVersion,
|
|
275
|
+
lastRebuildAt,
|
|
276
|
+
needsRecreate,
|
|
277
|
+
needsRebuild,
|
|
278
|
+
problems,
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
function ensureFtsTable(db, config, extensionVersion) {
|
|
282
|
+
const inspection = inspectFtsIndex(db, config, extensionVersion);
|
|
283
|
+
if (inspection.needsRecreate && inspection.hasTable) {
|
|
284
|
+
db.exec("DROP TABLE pages_fts");
|
|
268
285
|
}
|
|
269
|
-
if (
|
|
270
|
-
|
|
286
|
+
if (inspection.needsRecreate) {
|
|
287
|
+
createFtsTable(db, config.fts.tokenizer);
|
|
271
288
|
}
|
|
272
|
-
if (needsRebuild
|
|
273
|
-
|
|
289
|
+
if (inspection.needsRebuild) {
|
|
290
|
+
rebuildFts(db, config, extensionVersion);
|
|
274
291
|
}
|
|
275
292
|
}
|
|
276
293
|
export function resetVectorTable(db, embeddingDimensions) {
|
|
@@ -283,7 +300,7 @@ export function resetVectorTable(db, embeddingDimensions) {
|
|
|
283
300
|
);
|
|
284
301
|
`);
|
|
285
302
|
}
|
|
286
|
-
export function clearAllIndexedData(db) {
|
|
303
|
+
export function clearAllIndexedData(db, config, extensionVersion) {
|
|
287
304
|
db.exec(`
|
|
288
305
|
DELETE FROM edges;
|
|
289
306
|
DELETE FROM pages;
|
|
@@ -295,17 +312,21 @@ export function clearAllIndexedData(db) {
|
|
|
295
312
|
db.exec("DELETE FROM vec_pages");
|
|
296
313
|
}
|
|
297
314
|
if (tableExists(db, "pages_fts")) {
|
|
298
|
-
rebuildFts(db);
|
|
315
|
+
rebuildFts(db, config, extensionVersion);
|
|
299
316
|
}
|
|
300
317
|
db.prepare("DELETE FROM sync_meta WHERE key IN ('last_sync_at', 'last_sync_id', 'last_full_rebuild_at', 'embedding_profile')").run();
|
|
301
318
|
}
|
|
302
|
-
export function openDb(dbPath, config, embeddingDimensions) {
|
|
319
|
+
export function openDb(dbPath, config, embeddingDimensions, packageRoot, options = {}) {
|
|
303
320
|
const db = new Database(dbPath);
|
|
304
321
|
db.pragma("journal_mode = WAL");
|
|
305
322
|
db.pragma("foreign_keys = ON");
|
|
306
|
-
|
|
323
|
+
const extensionResult = loadSqliteExtensions(db, config, packageRoot);
|
|
324
|
+
const ftsMetadataExtensionVersion = config.fts.tokenizer === "simple" ? extensionResult.loadedSimpleVersion : null;
|
|
307
325
|
ensureBaseTables(db, embeddingDimensions);
|
|
308
|
-
|
|
326
|
+
const initialFtsInspection = inspectFtsIndex(db, config, ftsMetadataExtensionVersion);
|
|
327
|
+
if (options.ensureFts !== false) {
|
|
328
|
+
ensureFtsTable(db, config, ftsMetadataExtensionVersion);
|
|
329
|
+
}
|
|
309
330
|
const vectorDimensions = getVectorTableDimensions(db);
|
|
310
331
|
const vectorDimensionsChanged = vectorDimensions !== null && vectorDimensions !== embeddingDimensions;
|
|
311
332
|
const storedSchemaVersion = getMeta(db, "schema_version");
|
|
@@ -320,5 +341,13 @@ export function openDb(dbPath, config, embeddingDimensions) {
|
|
|
320
341
|
schema_version: SCHEMA_VERSION,
|
|
321
342
|
...(storedConfigVersion === null ? { config_version: config.configVersion } : {}),
|
|
322
343
|
});
|
|
323
|
-
return {
|
|
344
|
+
return {
|
|
345
|
+
db,
|
|
346
|
+
configChanged,
|
|
347
|
+
vectorDimensions,
|
|
348
|
+
vectorDimensionsChanged,
|
|
349
|
+
ftsExtensionVersion: ftsMetadataExtensionVersion,
|
|
350
|
+
simpleExtensionPath: extensionResult.simpleExtensionPath,
|
|
351
|
+
initialFtsInspection,
|
|
352
|
+
};
|
|
324
353
|
}
|
package/dist/core/fts.js
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { normalizeFtsQuery, segmentForFts } from "../utils/segmenter.js";
|
|
2
|
+
export const FTS_INDEX_VERSION = "3";
|
|
3
|
+
export function createFtsTable(db, mode) {
|
|
4
|
+
db.exec(`
|
|
5
|
+
CREATE VIRTUAL TABLE pages_fts USING fts5(
|
|
6
|
+
title,
|
|
7
|
+
tags,
|
|
8
|
+
summary_text
|
|
9
|
+
${mode === "simple" ? ", tokenize = 'simple'" : ""}
|
|
10
|
+
);
|
|
11
|
+
`);
|
|
12
|
+
}
|
|
13
|
+
export function normalizeTagsForFts(rawTags) {
|
|
14
|
+
if (!rawTags) {
|
|
15
|
+
return "";
|
|
16
|
+
}
|
|
17
|
+
try {
|
|
18
|
+
const parsed = JSON.parse(rawTags);
|
|
19
|
+
if (Array.isArray(parsed)) {
|
|
20
|
+
return parsed
|
|
21
|
+
.map((value) => String(value).trim())
|
|
22
|
+
.filter(Boolean)
|
|
23
|
+
.join(" ");
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
// Fall back to the stored value if legacy data is not valid JSON.
|
|
28
|
+
}
|
|
29
|
+
return rawTags;
|
|
30
|
+
}
|
|
31
|
+
export function buildFtsRow(row, mode) {
|
|
32
|
+
const tags = normalizeTagsForFts(row.tags);
|
|
33
|
+
if (mode === "simple") {
|
|
34
|
+
return {
|
|
35
|
+
rowid: row.rowid,
|
|
36
|
+
title: row.title,
|
|
37
|
+
tags,
|
|
38
|
+
summary_text: row.summaryText ?? "",
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
return {
|
|
42
|
+
rowid: row.rowid,
|
|
43
|
+
title: segmentForFts(row.title),
|
|
44
|
+
tags: segmentForFts(tags),
|
|
45
|
+
summary_text: segmentForFts(row.summaryText ?? ""),
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
export function buildFtsQueryPlan(query, mode) {
|
|
49
|
+
if (mode === "simple") {
|
|
50
|
+
return {
|
|
51
|
+
whereClause: "pages_fts MATCH simple_query(?)",
|
|
52
|
+
params: [query.trim()],
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
return {
|
|
56
|
+
whereClause: "pages_fts MATCH ?",
|
|
57
|
+
params: [normalizeFtsQuery(query)],
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
export function isLegacyExternalContentFts(sql) {
|
|
61
|
+
return typeof sql === "string" && /content\s*=\s*'pages'/i.test(sql);
|
|
62
|
+
}
|
|
63
|
+
export function isSimpleTokenizerSql(sql) {
|
|
64
|
+
return typeof sql === "string" && /tokenize\s*=\s*'simple'/i.test(sql);
|
|
65
|
+
}
|
|
66
|
+
export function ftsTableMatchesMode(sql, mode) {
|
|
67
|
+
return mode === "simple" ? isSimpleTokenizerSql(sql) : !isSimpleTokenizerSql(sql);
|
|
68
|
+
}
|
package/dist/core/indexer.js
CHANGED
|
@@ -145,7 +145,7 @@ function buildUpdateStatement(config) {
|
|
|
145
145
|
];
|
|
146
146
|
return `UPDATE pages SET ${columns.map((column) => `${column} = @${column}`).join(", ")} WHERE id = @id`;
|
|
147
147
|
}
|
|
148
|
-
export function applyChanges(db, changes, wikiPath, config) {
|
|
148
|
+
export function applyChanges(db, changes, wikiPath, config, ftsExtensionVersion) {
|
|
149
149
|
const parseResults = [...changes.added, ...changes.modified].map((entry) => ({
|
|
150
150
|
entry,
|
|
151
151
|
result: parsePage(entry.filePath, wikiPath, config),
|
|
@@ -184,6 +184,15 @@ export function applyChanges(db, changes, wikiPath, config) {
|
|
|
184
184
|
const summaryChangedIds = [];
|
|
185
185
|
const hasContentChanges = parsedEntries.length > 0 || changes.deleted.length > 0;
|
|
186
186
|
const transaction = db.transaction(() => {
|
|
187
|
+
for (const page of changes.deleted) {
|
|
188
|
+
const existing = selectPageRowid.get(page.id);
|
|
189
|
+
deleteEdgesBySourcePage.run(page.id);
|
|
190
|
+
if (existing) {
|
|
191
|
+
deleteVecRow.run(BigInt(existing.rowid));
|
|
192
|
+
}
|
|
193
|
+
deletePage.run(page.id);
|
|
194
|
+
deleted.push(page.id);
|
|
195
|
+
}
|
|
187
196
|
for (const { entry, parsed } of parsedEntries) {
|
|
188
197
|
const existing = selectExistingPage.get(entry.id);
|
|
189
198
|
const isInsert = !existing;
|
|
@@ -217,17 +226,8 @@ export function applyChanges(db, changes, wikiPath, config) {
|
|
|
217
226
|
});
|
|
218
227
|
}
|
|
219
228
|
}
|
|
220
|
-
for (const page of changes.deleted) {
|
|
221
|
-
const existing = selectPageRowid.get(page.id);
|
|
222
|
-
deleteEdgesBySourcePage.run(page.id);
|
|
223
|
-
if (existing) {
|
|
224
|
-
deleteVecRow.run(BigInt(existing.rowid));
|
|
225
|
-
}
|
|
226
|
-
deletePage.run(page.id);
|
|
227
|
-
deleted.push(page.id);
|
|
228
|
-
}
|
|
229
229
|
if (hasContentChanges) {
|
|
230
|
-
rebuildFts(db);
|
|
230
|
+
rebuildFts(db, config, ftsExtensionVersion);
|
|
231
231
|
}
|
|
232
232
|
});
|
|
233
233
|
transaction();
|
package/dist/core/runtime.js
CHANGED
|
@@ -15,6 +15,6 @@ export function loadRuntimeConfig(env = process.env) {
|
|
|
15
15
|
export function openRuntimeDb(env = process.env) {
|
|
16
16
|
const { paths, config } = loadRuntimeConfig(env);
|
|
17
17
|
const embeddingClient = EmbeddingClient.fromEnv(env);
|
|
18
|
-
const { db, vectorDimensions, vectorDimensionsChanged } = openDb(paths.dbPath, config, embeddingClient?.settings.dimensions ?? getEmbeddingDimensionFromEnv(env));
|
|
18
|
+
const { db, vectorDimensions, vectorDimensionsChanged } = openDb(paths.dbPath, config, embeddingClient?.settings.dimensions ?? getEmbeddingDimensionFromEnv(env), paths.packageRoot);
|
|
19
19
|
return { db, paths, config, embeddingClient, vectorDimensions, vectorDimensionsChanged };
|
|
20
20
|
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import * as sqliteVec from "sqlite-vec";
|
|
3
|
+
import { AppError } from "../utils/errors.js";
|
|
4
|
+
import { pathExistsSync } from "../utils/fs.js";
|
|
5
|
+
import { isSimpleTokenizerSql } from "./fts.js";
|
|
6
|
+
import { getPackageRoot } from "./paths.js";
|
|
7
|
+
export const BUNDLED_SIMPLE_EXTENSION_VERSION = "v0.7.1";
|
|
8
|
+
const SIMPLE_ASSET_MAP = {
|
|
9
|
+
darwin: {
|
|
10
|
+
arm64: "assets/sqlite-extensions/darwin-arm64/libsimple.dylib",
|
|
11
|
+
x64: "assets/sqlite-extensions/darwin-x64/libsimple.dylib",
|
|
12
|
+
},
|
|
13
|
+
linux: {
|
|
14
|
+
x64: "assets/sqlite-extensions/linux-x64/libsimple.so",
|
|
15
|
+
},
|
|
16
|
+
win32: {
|
|
17
|
+
x64: "assets/sqlite-extensions/win32-x64/simple.dll",
|
|
18
|
+
},
|
|
19
|
+
};
|
|
20
|
+
function getExistingFtsSql(db) {
|
|
21
|
+
const row = db
|
|
22
|
+
.prepare("SELECT sql FROM sqlite_master WHERE type IN ('table', 'view') AND name = 'pages_fts'")
|
|
23
|
+
.get();
|
|
24
|
+
return row?.sql ?? null;
|
|
25
|
+
}
|
|
26
|
+
function resolveBundledSimpleExtensionPath(packageRoot) {
|
|
27
|
+
const byArch = SIMPLE_ASSET_MAP[process.platform];
|
|
28
|
+
if (!byArch) {
|
|
29
|
+
throw new AppError(`Bundled simple extension is not available for platform ${process.platform}-${process.arch}.`, "config", {
|
|
30
|
+
platform: process.platform,
|
|
31
|
+
arch: process.arch,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
const relativePath = byArch[process.arch];
|
|
35
|
+
if (!relativePath) {
|
|
36
|
+
throw new AppError(`Bundled simple extension is not available for platform ${process.platform}-${process.arch}.`, "config", {
|
|
37
|
+
platform: process.platform,
|
|
38
|
+
arch: process.arch,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
const extensionPath = path.join(packageRoot, relativePath);
|
|
42
|
+
if (!pathExistsSync(extensionPath)) {
|
|
43
|
+
throw new AppError(`Bundled simple extension not found: ${extensionPath}`, "runtime", {
|
|
44
|
+
platform: process.platform,
|
|
45
|
+
arch: process.arch,
|
|
46
|
+
extensionPath,
|
|
47
|
+
version: BUNDLED_SIMPLE_EXTENSION_VERSION,
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
return extensionPath;
|
|
51
|
+
}
|
|
52
|
+
export function loadSqliteExtensions(db, config, packageRoot) {
|
|
53
|
+
sqliteVec.load(db);
|
|
54
|
+
const shouldLoadSimple = config.fts.tokenizer === "simple" || isSimpleTokenizerSql(getExistingFtsSql(db));
|
|
55
|
+
if (!shouldLoadSimple) {
|
|
56
|
+
return {
|
|
57
|
+
simpleLoaded: false,
|
|
58
|
+
loadedSimpleVersion: null,
|
|
59
|
+
simpleExtensionPath: null,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
const simpleExtensionPath = resolveBundledSimpleExtensionPath(packageRoot ?? getPackageRoot());
|
|
63
|
+
try {
|
|
64
|
+
db.loadExtension(simpleExtensionPath);
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
throw new AppError(`Failed to load bundled simple extension: ${simpleExtensionPath}`, "runtime", {
|
|
68
|
+
extensionPath: simpleExtensionPath,
|
|
69
|
+
version: BUNDLED_SIMPLE_EXTENSION_VERSION,
|
|
70
|
+
cause: error instanceof Error ? error.message : String(error),
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
return {
|
|
74
|
+
simpleLoaded: true,
|
|
75
|
+
loadedSimpleVersion: BUNDLED_SIMPLE_EXTENSION_VERSION,
|
|
76
|
+
simpleExtensionPath,
|
|
77
|
+
};
|
|
78
|
+
}
|
package/dist/core/sync.js
CHANGED
|
@@ -78,7 +78,7 @@ export async function syncWorkspace(options = {}) {
|
|
|
78
78
|
}
|
|
79
79
|
const config = loadConfig(runtimePaths.configPath);
|
|
80
80
|
const embeddingClient = EmbeddingClient.fromEnv(env);
|
|
81
|
-
const { db, configChanged, vectorDimensionsChanged } = openDb(runtimePaths.dbPath, config, embeddingClient?.settings.dimensions ?? getEmbeddingDimension(env));
|
|
81
|
+
const { db, configChanged, vectorDimensionsChanged, ftsExtensionVersion } = openDb(runtimePaths.dbPath, config, embeddingClient?.settings.dimensions ?? getEmbeddingDimension(env), runtimePaths.packageRoot);
|
|
82
82
|
try {
|
|
83
83
|
let mode = options.targetPaths && options.targetPaths.length > 0 && !options.force ? "path" : "full";
|
|
84
84
|
let upgradedToFullSync = false;
|
|
@@ -93,7 +93,7 @@ export async function syncWorkspace(options = {}) {
|
|
|
93
93
|
throw new AppError("Embedding profile changed, cannot skip embedding.", "config");
|
|
94
94
|
}
|
|
95
95
|
if (options.force) {
|
|
96
|
-
clearAllIndexedData(db);
|
|
96
|
+
clearAllIndexedData(db, config, ftsExtensionVersion);
|
|
97
97
|
mode = "full";
|
|
98
98
|
}
|
|
99
99
|
const changes = mode === "path"
|
|
@@ -106,7 +106,7 @@ export async function syncWorkspace(options = {}) {
|
|
|
106
106
|
changes.unchanged.length === 0) {
|
|
107
107
|
throw new AppError(`No page matched the requested --path value(s).`, "not_found");
|
|
108
108
|
}
|
|
109
|
-
const applyResult = applyChanges(db, changes, runtimePaths.wikiPath, config);
|
|
109
|
+
const applyResult = applyChanges(db, changes, runtimePaths.wikiPath, config, ftsExtensionVersion);
|
|
110
110
|
if (applyResult.parseErrors.length > 0) {
|
|
111
111
|
throw new AppError("Failed to parse one or more wiki pages during sync.", "runtime", {
|
|
112
112
|
parseErrors: applyResult.parseErrors,
|
|
@@ -195,7 +195,7 @@ export async function embedPendingPages(env = process.env) {
|
|
|
195
195
|
if (!embeddingClient) {
|
|
196
196
|
return;
|
|
197
197
|
}
|
|
198
|
-
const { db } = openDb(runtimePaths.dbPath, config, embeddingClient.settings.dimensions);
|
|
198
|
+
const { db } = openDb(runtimePaths.dbPath, config, embeddingClient.settings.dimensions, runtimePaths.packageRoot);
|
|
199
199
|
try {
|
|
200
200
|
const targets = getEmbeddingTargets(db, false, [], []);
|
|
201
201
|
const result = await embedPages(db, embeddingClient, targets);
|
|
@@ -815,7 +815,7 @@ async function processClaimedQueueItem(input) {
|
|
|
815
815
|
export function getVaultQueueSnapshot(env = process.env, status) {
|
|
816
816
|
const paths = resolveRuntimePaths(env);
|
|
817
817
|
const config = loadConfig(paths.configPath);
|
|
818
|
-
const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384);
|
|
818
|
+
const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384, paths.packageRoot);
|
|
819
819
|
try {
|
|
820
820
|
const items = fetchQueueItemsByStatus(db, status);
|
|
821
821
|
const counts = db.prepare(`
|
|
@@ -843,7 +843,7 @@ export function getVaultQueueSnapshot(env = process.env, status) {
|
|
|
843
843
|
export function getVaultQueueItem(env = process.env, fileId) {
|
|
844
844
|
const paths = resolveRuntimePaths(env);
|
|
845
845
|
const config = loadConfig(paths.configPath);
|
|
846
|
-
const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384);
|
|
846
|
+
const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384, paths.packageRoot);
|
|
847
847
|
try {
|
|
848
848
|
return fetchQueueItemByFileId(db, fileId);
|
|
849
849
|
}
|
|
@@ -865,7 +865,7 @@ export async function processVaultQueueBatch(env = process.env, options = {}) {
|
|
|
865
865
|
}
|
|
866
866
|
const paths = resolveRuntimePaths(env);
|
|
867
867
|
const config = loadConfig(paths.configPath);
|
|
868
|
-
const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384);
|
|
868
|
+
const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384, paths.packageRoot);
|
|
869
869
|
try {
|
|
870
870
|
const result = {
|
|
871
871
|
enabled: true,
|
package/dist/daemon/server.js
CHANGED
|
@@ -15,7 +15,7 @@ import { getDashboardGraphOverview, getDashboardLintSummary, getDashboardPageDet
|
|
|
15
15
|
import { diffVaultFiles, findPages, ftsSearchPages, getPageInfo, getVaultQueue, getWikiStat, listPages, listVaultFiles, renderLintResult, runLint, searchPages, traverseGraph, } from "../operations/query.js";
|
|
16
16
|
import { createTemplate, listTemplates, listTypes, recommendTypes, showTemplate, showType, } from "../operations/type-template.js";
|
|
17
17
|
import { runTemplateLint } from "../operations/template-lint.js";
|
|
18
|
-
import { createPage, runSync, runSyncCommand, updatePage } from "../operations/write.js";
|
|
18
|
+
import { createPage, rebuildFtsCommand, runSync, runSyncCommand, updatePage } from "../operations/write.js";
|
|
19
19
|
import { AppError, asAppError } from "../utils/errors.js";
|
|
20
20
|
import { pathExistsSync } from "../utils/fs.js";
|
|
21
21
|
import { addSeconds, toOffsetIso } from "../utils/time.js";
|
|
@@ -614,6 +614,28 @@ export async function runDaemonServer(options) {
|
|
|
614
614
|
});
|
|
615
615
|
}
|
|
616
616
|
};
|
|
617
|
+
const runRebuildFtsTransaction = async (actor, input) => {
|
|
618
|
+
try {
|
|
619
|
+
const result = rebuildFtsCommand(env, input);
|
|
620
|
+
return await finalizeJournaledWrite(actor, {
|
|
621
|
+
operation: "rebuild-fts",
|
|
622
|
+
resourceId: "pages_fts",
|
|
623
|
+
revisionBefore: null,
|
|
624
|
+
revisionAfter: null,
|
|
625
|
+
result,
|
|
626
|
+
});
|
|
627
|
+
}
|
|
628
|
+
catch (error) {
|
|
629
|
+
const appError = asAppError(error);
|
|
630
|
+
return recordSyncFailureAndThrow(actor, {
|
|
631
|
+
operation: "rebuild-fts",
|
|
632
|
+
resourceId: "pages_fts",
|
|
633
|
+
revisionBefore: null,
|
|
634
|
+
revisionAfter: null,
|
|
635
|
+
error: appError,
|
|
636
|
+
});
|
|
637
|
+
}
|
|
638
|
+
};
|
|
617
639
|
const runCycleTransaction = async (actor, task) => {
|
|
618
640
|
try {
|
|
619
641
|
const result = await runCycleTask(task);
|
|
@@ -743,6 +765,22 @@ export async function runDaemonServer(options) {
|
|
|
743
765
|
writeJsonResponse(response, 200, result);
|
|
744
766
|
return;
|
|
745
767
|
}
|
|
768
|
+
if (method === "POST" && pathname === "/fts/rebuild") {
|
|
769
|
+
const body = await readJsonBody(request);
|
|
770
|
+
const actor = resolveWriteActor(request, body, buildCliWriteActor(env));
|
|
771
|
+
const result = await enqueueWriteTask("rebuild-fts", () => runRebuildFtsTransaction(actor, {
|
|
772
|
+
mode: body.mode === "simple" ? "simple" : body.mode === "default" ? "default" : undefined,
|
|
773
|
+
}), {
|
|
774
|
+
summarizeResult: (payload) => ({
|
|
775
|
+
rebuilt: payload.rebuilt,
|
|
776
|
+
mode: payload.mode,
|
|
777
|
+
rowCount: payload.rowCount,
|
|
778
|
+
needsRebuild: payload.needsRebuild,
|
|
779
|
+
}),
|
|
780
|
+
});
|
|
781
|
+
writeJsonResponse(response, 200, result);
|
|
782
|
+
return;
|
|
783
|
+
}
|
|
746
784
|
if (method === "GET" && pathname === "/write-queue/summary") {
|
|
747
785
|
writeJsonResponse(response, 200, writeQueue.getSummary());
|
|
748
786
|
return;
|
|
@@ -885,6 +923,17 @@ export async function runDaemonServer(options) {
|
|
|
885
923
|
}));
|
|
886
924
|
return;
|
|
887
925
|
}
|
|
926
|
+
if (method === "GET" && pathname === "/fts/rebuild") {
|
|
927
|
+
writeJsonResponse(response, 200, rebuildFtsCommand(env, {
|
|
928
|
+
mode: url.searchParams.get("mode") === "simple"
|
|
929
|
+
? "simple"
|
|
930
|
+
: url.searchParams.get("mode") === "default"
|
|
931
|
+
? "default"
|
|
932
|
+
: undefined,
|
|
933
|
+
check: url.searchParams.get("check") === "true",
|
|
934
|
+
}));
|
|
935
|
+
return;
|
|
936
|
+
}
|
|
888
937
|
if (method === "GET" && pathname === "/search") {
|
|
889
938
|
writeJsonResponse(response, 200, await searchPages(env, {
|
|
890
939
|
query: url.searchParams.get("query") ?? "",
|
package/dist/index.js
CHANGED
|
@@ -16,6 +16,7 @@ import { registerInitCommand } from "./commands/init.js";
|
|
|
16
16
|
import { registerLintCommand } from "./commands/lint.js";
|
|
17
17
|
import { registerListCommand } from "./commands/list.js";
|
|
18
18
|
import { registerPageInfoCommand } from "./commands/page-info.js";
|
|
19
|
+
import { registerRebuildFtsCommand } from "./commands/rebuild-fts.js";
|
|
19
20
|
import { registerSearchCommand } from "./commands/search.js";
|
|
20
21
|
import { registerSetupCommand } from "./commands/setup.js";
|
|
21
22
|
import { registerSkillCommand } from "./commands/skill.js";
|
|
@@ -80,6 +81,7 @@ function buildProgram() {
|
|
|
80
81
|
registerFindCommand(program, runtimeConfig);
|
|
81
82
|
registerSearchCommand(program);
|
|
82
83
|
registerFtsCommand(program);
|
|
84
|
+
registerRebuildFtsCommand(program);
|
|
83
85
|
registerGraphCommand(program);
|
|
84
86
|
registerPageInfoCommand(program);
|
|
85
87
|
registerListCommand(program);
|
package/dist/operations/query.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
2
|
import { getMeta } from "../core/db.js";
|
|
3
|
+
import { buildFtsQueryPlan } from "../core/fts.js";
|
|
3
4
|
import { parsePage } from "../core/frontmatter.js";
|
|
4
5
|
import { normalizePageId, resolvePagePath } from "../core/paths.js";
|
|
5
6
|
import { compactPageSummary } from "../core/presenters.js";
|
|
@@ -10,7 +11,6 @@ import { getVaultQueueSnapshot } from "../core/vault-processing.js";
|
|
|
10
11
|
import { camelToSnake } from "../utils/case.js";
|
|
11
12
|
import { AppError } from "../utils/errors.js";
|
|
12
13
|
import { listFilesRecursiveSync, pathExistsSync } from "../utils/fs.js";
|
|
13
|
-
import { normalizeFtsQuery } from "../utils/segmenter.js";
|
|
14
14
|
function parsePositiveLimit(value, label, fallback) {
|
|
15
15
|
const normalized = value ?? fallback;
|
|
16
16
|
const limit = Number.parseInt(String(normalized), 10);
|
|
@@ -306,18 +306,18 @@ export function ftsSearchPages(env = process.env, options) {
|
|
|
306
306
|
const { db, config } = openRuntimeDb(env);
|
|
307
307
|
try {
|
|
308
308
|
const limit = parsePositiveLimit(options.limit, "--limit", 20);
|
|
309
|
-
const
|
|
309
|
+
const queryPlan = buildFtsQueryPlan(options.query, config.fts.tokenizer);
|
|
310
310
|
const rows = db
|
|
311
311
|
.prepare(`
|
|
312
312
|
SELECT ${listPageColumns(config).map((column) => `pages.${column}`).join(", ")}, bm25(pages_fts) AS rank
|
|
313
313
|
FROM pages_fts
|
|
314
314
|
JOIN pages ON pages.rowid = pages_fts.rowid
|
|
315
|
-
WHERE
|
|
315
|
+
WHERE ${queryPlan.whereClause}
|
|
316
316
|
${options.type ? "AND pages.page_type = ?" : ""}
|
|
317
317
|
ORDER BY rank
|
|
318
318
|
LIMIT ?
|
|
319
319
|
`)
|
|
320
|
-
.all(...(options.type ? [
|
|
320
|
+
.all(...(options.type ? [...queryPlan.params, options.type, limit] : [...queryPlan.params, limit]));
|
|
321
321
|
return rows.map((row) => ({
|
|
322
322
|
...compactPageSummary(mapPageRow(row, config), config),
|
|
323
323
|
summaryText: row.summary_text,
|
package/dist/operations/write.js
CHANGED
|
@@ -1,18 +1,33 @@
|
|
|
1
1
|
import { getTemplate } from "../core/config.js";
|
|
2
|
+
import { inspectFtsIndex, openDb, rebuildFts as rebuildFtsIndex } from "../core/db.js";
|
|
3
|
+
import { createFtsTable, FTS_INDEX_VERSION } from "../core/fts.js";
|
|
2
4
|
import { resolveAgentSettings } from "../core/paths.js";
|
|
3
5
|
import { normalizePageId } from "../core/paths.js";
|
|
4
6
|
import { createPageFromTemplate } from "../core/page-files.js";
|
|
5
7
|
import { updatePageById } from "../core/page-files.js";
|
|
6
8
|
import { readCanonicalPageSourceById } from "../core/page-source.js";
|
|
7
|
-
import { loadRuntimeConfig } from "../core/runtime.js";
|
|
9
|
+
import { getEmbeddingDimensionFromEnv, loadRuntimeConfig } from "../core/runtime.js";
|
|
8
10
|
import { openRuntimeDb } from "../core/runtime.js";
|
|
9
11
|
import { syncWorkspace } from "../core/sync.js";
|
|
10
12
|
import { getVaultQueueItem, processVaultQueueBatch } from "../core/vault-processing.js";
|
|
11
13
|
import { selectPageById } from "../core/query.js";
|
|
12
14
|
import { AppError, asAppError } from "../utils/errors.js";
|
|
15
|
+
import { pathExistsSync } from "../utils/fs.js";
|
|
13
16
|
function isPlainObject(value) {
|
|
14
17
|
return Object.prototype.toString.call(value) === "[object Object]";
|
|
15
18
|
}
|
|
19
|
+
function applyFtsModeOverride(config, mode) {
|
|
20
|
+
if (!mode || config.fts.tokenizer === mode) {
|
|
21
|
+
return config;
|
|
22
|
+
}
|
|
23
|
+
return {
|
|
24
|
+
...config,
|
|
25
|
+
fts: {
|
|
26
|
+
...config.fts,
|
|
27
|
+
tokenizer: mode,
|
|
28
|
+
},
|
|
29
|
+
};
|
|
30
|
+
}
|
|
16
31
|
function assertValidSyncCommandOptions(options) {
|
|
17
32
|
if (options.vaultFileId && !options.process) {
|
|
18
33
|
throw new AppError("--vault-file requires --process.", "config");
|
|
@@ -132,6 +147,64 @@ export async function runSyncCommand(env = process.env, options = {}) {
|
|
|
132
147
|
}),
|
|
133
148
|
};
|
|
134
149
|
}
|
|
150
|
+
export function rebuildFtsCommand(env = process.env, options = {}) {
|
|
151
|
+
const { paths, config } = loadRuntimeConfig(env);
|
|
152
|
+
const effectiveConfig = applyFtsModeOverride(config, options.mode);
|
|
153
|
+
if (!pathExistsSync(paths.dbPath)) {
|
|
154
|
+
return {
|
|
155
|
+
checked: true,
|
|
156
|
+
rebuilt: false,
|
|
157
|
+
mode: effectiveConfig.fts.tokenizer,
|
|
158
|
+
rowCount: 0,
|
|
159
|
+
expectedIndexVersion: FTS_INDEX_VERSION,
|
|
160
|
+
storedIndexVersion: null,
|
|
161
|
+
storedTokenizerMode: null,
|
|
162
|
+
storedExtensionVersion: null,
|
|
163
|
+
extensionVersion: null,
|
|
164
|
+
simpleExtensionPath: null,
|
|
165
|
+
lastRebuildAt: null,
|
|
166
|
+
needsRecreate: true,
|
|
167
|
+
needsRebuild: true,
|
|
168
|
+
problems: [`Wiki database does not exist yet: ${paths.dbPath}`],
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
const { db, ftsExtensionVersion, simpleExtensionPath, initialFtsInspection } = openDb(paths.dbPath, effectiveConfig, getEmbeddingDimensionFromEnv(env), paths.packageRoot, {
|
|
172
|
+
ensureFts: false,
|
|
173
|
+
});
|
|
174
|
+
try {
|
|
175
|
+
if (!options.check) {
|
|
176
|
+
if (initialFtsInspection.needsRecreate) {
|
|
177
|
+
if (initialFtsInspection.hasTable) {
|
|
178
|
+
db.exec("DROP TABLE pages_fts");
|
|
179
|
+
}
|
|
180
|
+
createFtsTable(db, effectiveConfig.fts.tokenizer);
|
|
181
|
+
}
|
|
182
|
+
rebuildFtsIndex(db, effectiveConfig, ftsExtensionVersion);
|
|
183
|
+
}
|
|
184
|
+
const inspection = options.check
|
|
185
|
+
? initialFtsInspection
|
|
186
|
+
: inspectFtsIndex(db, effectiveConfig, ftsExtensionVersion);
|
|
187
|
+
return {
|
|
188
|
+
checked: true,
|
|
189
|
+
rebuilt: options.check ? false : true,
|
|
190
|
+
mode: effectiveConfig.fts.tokenizer,
|
|
191
|
+
rowCount: inspection.rowCount,
|
|
192
|
+
expectedIndexVersion: inspection.expectedIndexVersion,
|
|
193
|
+
storedIndexVersion: inspection.storedIndexVersion,
|
|
194
|
+
storedTokenizerMode: inspection.storedTokenizerMode,
|
|
195
|
+
storedExtensionVersion: inspection.storedExtensionVersion,
|
|
196
|
+
extensionVersion: ftsExtensionVersion,
|
|
197
|
+
simpleExtensionPath,
|
|
198
|
+
lastRebuildAt: inspection.lastRebuildAt,
|
|
199
|
+
needsRecreate: inspection.needsRecreate,
|
|
200
|
+
needsRebuild: inspection.needsRebuild,
|
|
201
|
+
problems: inspection.problems,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
finally {
|
|
205
|
+
db.close();
|
|
206
|
+
}
|
|
207
|
+
}
|
|
135
208
|
export async function createPage(env = process.env, options) {
|
|
136
209
|
const { paths, config } = loadRuntimeConfig(env);
|
|
137
210
|
getTemplate(config, options.type);
|
package/package.json
CHANGED
|
@@ -35,6 +35,7 @@ Global workspace resolution priority:
|
|
|
35
35
|
| `find` | Query pages by structured metadata filters |
|
|
36
36
|
| `search` | Semantic search over page summary embeddings |
|
|
37
37
|
| `fts` | Full-text search over title, tags, and summary text |
|
|
38
|
+
| `rebuild-fts` | Inspect or rebuild the SQLite FTS index |
|
|
38
39
|
| `graph` | Traverse the knowledge graph from a root node |
|
|
39
40
|
| `page-info` | Show full metadata and edges for a single page |
|
|
40
41
|
| `list` | List wiki pages |
|
|
@@ -158,6 +159,31 @@ tiangong-wiki fts <query> [--type <pageType>] [--limit <n>]
|
|
|
158
159
|
|
|
159
160
|
Full-text search against the `pages_fts` table (title, tags, summary_text). Default limit: 20.
|
|
160
161
|
|
|
162
|
+
`wiki.config.json` can set:
|
|
163
|
+
|
|
164
|
+
```json
|
|
165
|
+
{
|
|
166
|
+
"fts": {
|
|
167
|
+
"tokenizer": "simple"
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
`simple` is the default. Set `tokenizer` to `default` only if you need the legacy `Intl.Segmenter`-based FTS behavior.
|
|
173
|
+
|
|
174
|
+
### rebuild-fts
|
|
175
|
+
|
|
176
|
+
```
|
|
177
|
+
tiangong-wiki rebuild-fts [--mode <default|simple>] [--check]
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Inspects or rebuilds the `pages_fts` table and its metadata. Useful after changing `wiki.config.json` FTS tokenizer mode, repairing drift, or forcing a clean rebuild.
|
|
181
|
+
|
|
182
|
+
Options:
|
|
183
|
+
|
|
184
|
+
- `--mode <default|simple>` — temporarily override the configured tokenizer mode for this command
|
|
185
|
+
- `--check` — report drift and metadata without rebuilding
|
|
186
|
+
|
|
161
187
|
### graph
|
|
162
188
|
|
|
163
189
|
```
|