@biaoo/tiangong-wiki 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -89,10 +89,24 @@ That means commands still work best from inside a workspace, but they can also r
89
89
  ```bash
90
90
  tiangong-wiki find --type concept --status active # structured query
91
91
  tiangong-wiki fts "Bayesian" # full-text search
92
+ tiangong-wiki rebuild-fts --check # inspect FTS drift / metadata
93
+ tiangong-wiki rebuild-fts # rebuild FTS index explicitly
92
94
  tiangong-wiki search "convergence conditions" # semantic search
93
95
  tiangong-wiki graph bayes-theorem --depth 2 # graph traversal
94
96
  ```
95
97
 
98
+ `wiki.config.json` now supports:
99
+
100
+ ```json
101
+ {
102
+ "fts": {
103
+ "tokenizer": "simple"
104
+ }
105
+ }
106
+ ```
107
+
108
+ `simple` is now the default. Set `tokenizer` to `default` only if you want the legacy `Intl.Segmenter`-based FTS behavior instead of the bundled `wangfenjin/simple` SQLite extension.
109
+
96
110
  ```bash
97
111
  tiangong-wiki daemon start # start the daemon in the background
98
112
  tiangong-wiki dashboard # open dashboard in browser
package/README.zh-CN.md CHANGED
@@ -89,10 +89,24 @@ tiangong-wiki sync # 索引 Markdown 文件
89
89
  ```bash
90
90
  tiangong-wiki find --type concept --status active # 结构化查询
91
91
  tiangong-wiki fts "贝叶斯" # 全文搜索
92
+ tiangong-wiki rebuild-fts --check # 检查 FTS 漂移 / 元数据
93
+ tiangong-wiki rebuild-fts # 显式重建 FTS 索引
92
94
  tiangong-wiki search "优化算法的收敛条件" # 语义搜索
93
95
  tiangong-wiki graph bayes-theorem --depth 2 # 图遍历
94
96
  ```
95
97
 
98
+ `wiki.config.json` 现在支持:
99
+
100
+ ```json
101
+ {
102
+ "fts": {
103
+ "tokenizer": "simple"
104
+ }
105
+ }
106
+ ```
107
+
108
+ 现在默认就是 `simple`。只有在你想退回到基于 `Intl.Segmenter` 的旧 FTS 行为时,才需要把 `tokenizer` 显式设为 `default`。
109
+
96
110
  ```bash
97
111
  tiangong-wiki daemon start # 后台启动 daemon
98
112
  tiangong-wiki dashboard # 在浏览器中打开仪表盘
@@ -1,5 +1,8 @@
1
1
  {
2
2
  "schemaVersion": 1,
3
+ "fts": {
4
+ "tokenizer": "simple"
5
+ },
3
6
  "customColumns": {},
4
7
  "defaultSummaryFields": [
5
8
  "title",
@@ -0,0 +1,41 @@
1
+ import { executeServerBackedOperation, requestDaemonJson } from "../daemon/client.js";
2
+ import { buildCliWriteActor } from "../daemon/write-actor.js";
3
+ import { rebuildFtsCommand } from "../operations/write.js";
4
+ import { writeJson } from "../utils/output.js";
5
+ export function registerRebuildFtsCommand(program) {
6
+ program
7
+ .command("rebuild-fts")
8
+ .description("Validate or rebuild the SQLite FTS index")
9
+ .option("--mode <mode>", "Override tokenizer mode for this command (default|simple)")
10
+ .option("--check", "Only inspect FTS drift and metadata without rebuilding")
11
+ .action(async (options) => {
12
+ const result = await executeServerBackedOperation({
13
+ kind: options.check === true ? "read" : "write",
14
+ local: () => rebuildFtsCommand(process.env, {
15
+ mode: options.mode ?? undefined,
16
+ check: options.check === true,
17
+ }),
18
+ remote: (endpoint) => options.check === true
19
+ ? requestDaemonJson({
20
+ endpoint,
21
+ method: "GET",
22
+ path: "/fts/rebuild",
23
+ query: {
24
+ check: true,
25
+ mode: options.mode ?? undefined,
26
+ },
27
+ })
28
+ : requestDaemonJson({
29
+ endpoint,
30
+ method: "POST",
31
+ path: "/fts/rebuild",
32
+ body: {
33
+ actor: buildCliWriteActor(process.env),
34
+ mode: options.mode ?? undefined,
35
+ },
36
+ timeoutMs: 310_000,
37
+ }),
38
+ });
39
+ writeJson(result);
40
+ });
41
+ }
@@ -16,6 +16,15 @@ function ensureStringArray(value, label) {
16
16
  }
17
17
  return value;
18
18
  }
19
+ function ensureFtsTokenizerMode(value, label) {
20
+ if (value === undefined) {
21
+ return "simple";
22
+ }
23
+ if (value === "default" || value === "simple") {
24
+ return value;
25
+ }
26
+ throw new AppError(`${label} must be "default" or "simple"`, "config");
27
+ }
19
28
  function ensureVaultFileTypes(value, label) {
20
29
  const normalized = ensureStringArray(value, label).map((item, index) => {
21
30
  const fileType = item.trim().replace(/^\./, "").toLowerCase();
@@ -81,6 +90,9 @@ export function loadConfig(configPath) {
81
90
  assertCondition(Number.isInteger(raw.schemaVersion), "schemaVersion must be an integer", "config");
82
91
  const baseConfig = {
83
92
  schemaVersion: Number(raw.schemaVersion),
93
+ fts: {
94
+ tokenizer: ensureFtsTokenizerMode(ensureObject(raw.fts ?? {}, "fts").tokenizer, "fts.tokenizer"),
95
+ },
84
96
  customColumns: ensureColumnMap(raw.customColumns ?? {}, "customColumns"),
85
97
  defaultSummaryFields: ensureStringArray(raw.defaultSummaryFields ?? [], "defaultSummaryFields"),
86
98
  vaultFileTypes: ensureVaultFileTypes(raw.vaultFileTypes ?? DEFAULT_VAULT_FILE_TYPES, "vaultFileTypes"),
package/dist/core/db.js CHANGED
@@ -1,9 +1,9 @@
1
1
  import Database from "better-sqlite3";
2
- import * as sqliteVec from "sqlite-vec";
2
+ import { buildFtsRow, createFtsTable, ftsTableMatchesMode, FTS_INDEX_VERSION, isLegacyExternalContentFts, } from "./fts.js";
3
+ import { loadSqliteExtensions } from "./sqlite-extensions.js";
3
4
  import { AppError } from "../utils/errors.js";
4
- import { segmentForFts } from "../utils/segmenter.js";
5
+ import { toOffsetIso } from "../utils/time.js";
5
6
  export const SCHEMA_VERSION = "1";
6
- const FTS_INDEX_VERSION = "2";
7
7
  function tableExists(db, tableName) {
8
8
  const row = db
9
9
  .prepare("SELECT name FROM sqlite_master WHERE type IN ('table', 'view') AND name = ?")
@@ -28,45 +28,6 @@ function ensureTableColumns(db, tableName, definitions) {
28
28
  }
29
29
  }
30
30
  }
31
- function createFtsTable(db) {
32
- db.exec(`
33
- CREATE VIRTUAL TABLE pages_fts USING fts5(
34
- title,
35
- tags,
36
- summary_text
37
- );
38
- `);
39
- }
40
- function normalizeTagsForFts(rawTags) {
41
- if (!rawTags) {
42
- return "";
43
- }
44
- try {
45
- const parsed = JSON.parse(rawTags);
46
- if (Array.isArray(parsed)) {
47
- return parsed
48
- .map((value) => String(value).trim())
49
- .filter(Boolean)
50
- .join(" ");
51
- }
52
- }
53
- catch {
54
- // Fall back to the stored value if legacy data is not valid JSON.
55
- }
56
- return rawTags;
57
- }
58
- function buildFtsRow(row) {
59
- return {
60
- rowid: row.rowid,
61
- title: segmentForFts(row.title),
62
- tags: segmentForFts(normalizeTagsForFts(row.tags)),
63
- summary_text: segmentForFts(row.summaryText ?? ""),
64
- };
65
- }
66
- function isLegacyExternalContentFts(db) {
67
- const sql = getTableSql(db, "pages_fts");
68
- return typeof sql === "string" && /content\s*=\s*'pages'/i.test(sql);
69
- }
70
31
  function ensureBaseTables(db, embeddingDimensions) {
71
32
  db.exec(`
72
33
  CREATE TABLE IF NOT EXISTS pages (
@@ -240,7 +201,7 @@ export function setMetaValues(db, values) {
240
201
  });
241
202
  transaction(values);
242
203
  }
243
- export function rebuildFts(db) {
204
+ export function rebuildFts(db, config, extensionVersion) {
244
205
  if (!tableExists(db, "pages_fts")) {
245
206
  return;
246
207
  }
@@ -250,27 +211,83 @@ export function rebuildFts(db) {
250
211
  const transaction = db.transaction(() => {
251
212
  clearStatement.run();
252
213
  for (const row of rows) {
253
- insertStatement.run(buildFtsRow(row));
214
+ insertStatement.run(buildFtsRow(row, config.fts.tokenizer));
254
215
  }
255
216
  });
256
217
  transaction();
218
+ setMetaValues(db, {
219
+ fts_index_version: FTS_INDEX_VERSION,
220
+ fts_tokenizer_mode: config.fts.tokenizer,
221
+ fts_extension_version: extensionVersion,
222
+ fts_last_rebuild_at: toOffsetIso(),
223
+ });
257
224
  }
258
- function ensureFtsTable(db) {
225
+ export function inspectFtsIndex(db, config, extensionVersion) {
259
226
  const hasTable = tableExists(db, "pages_fts");
260
- const storedFtsIndexVersion = getMeta(db, "fts_index_version");
261
- const needsRecreate = !hasTable || isLegacyExternalContentFts(db);
262
- const needsRebuild = needsRecreate || storedFtsIndexVersion !== FTS_INDEX_VERSION;
263
- if (needsRecreate && hasTable) {
264
- db.exec("DROP TABLE pages_fts");
227
+ const tableSql = hasTable ? getTableSql(db, "pages_fts") : null;
228
+ const storedIndexVersion = getMeta(db, "fts_index_version");
229
+ const storedTokenizerMode = getMeta(db, "fts_tokenizer_mode");
230
+ const storedExtensionVersion = getMeta(db, "fts_extension_version");
231
+ const lastRebuildAt = getMeta(db, "fts_last_rebuild_at");
232
+ const rowCount = hasTable
233
+ ? (db.prepare("SELECT COUNT(*) AS count FROM pages_fts").get()?.count ?? 0)
234
+ : 0;
235
+ const problems = [];
236
+ const needsRecreate = !hasTable || isLegacyExternalContentFts(tableSql) || !ftsTableMatchesMode(tableSql, config.fts.tokenizer);
237
+ if (!hasTable) {
238
+ problems.push("pages_fts table is missing.");
239
+ }
240
+ else if (isLegacyExternalContentFts(tableSql)) {
241
+ problems.push("pages_fts uses the legacy external-content schema.");
242
+ }
243
+ else if (!ftsTableMatchesMode(tableSql, config.fts.tokenizer)) {
244
+ problems.push(`pages_fts tokenizer schema does not match configured mode ${config.fts.tokenizer}.`);
245
+ }
246
+ let needsRebuild = needsRecreate;
247
+ if (storedIndexVersion !== FTS_INDEX_VERSION) {
248
+ problems.push(storedIndexVersion === null
249
+ ? "fts_index_version metadata is missing."
250
+ : `fts_index_version mismatch: expected ${FTS_INDEX_VERSION}, found ${storedIndexVersion}.`);
251
+ needsRebuild = true;
252
+ }
253
+ if (storedTokenizerMode !== config.fts.tokenizer) {
254
+ problems.push(storedTokenizerMode === null
255
+ ? "fts_tokenizer_mode metadata is missing."
256
+ : `fts_tokenizer_mode mismatch: expected ${config.fts.tokenizer}, found ${storedTokenizerMode}.`);
257
+ needsRebuild = true;
258
+ }
259
+ if (storedExtensionVersion !== extensionVersion) {
260
+ if (!(storedExtensionVersion === null && extensionVersion === null)) {
261
+ problems.push(storedExtensionVersion === null
262
+ ? `fts_extension_version metadata is missing for configured mode ${config.fts.tokenizer}.`
263
+ : `fts_extension_version mismatch: expected ${extensionVersion ?? "null"}, found ${storedExtensionVersion}.`);
264
+ needsRebuild = true;
265
+ }
265
266
  }
266
- if (needsRecreate) {
267
- createFtsTable(db);
267
+ return {
268
+ mode: config.fts.tokenizer,
269
+ hasTable,
270
+ rowCount,
271
+ expectedIndexVersion: FTS_INDEX_VERSION,
272
+ storedIndexVersion,
273
+ storedTokenizerMode,
274
+ storedExtensionVersion,
275
+ lastRebuildAt,
276
+ needsRecreate,
277
+ needsRebuild,
278
+ problems,
279
+ };
280
+ }
281
+ function ensureFtsTable(db, config, extensionVersion) {
282
+ const inspection = inspectFtsIndex(db, config, extensionVersion);
283
+ if (inspection.needsRecreate && inspection.hasTable) {
284
+ db.exec("DROP TABLE pages_fts");
268
285
  }
269
- if (needsRebuild) {
270
- rebuildFts(db);
286
+ if (inspection.needsRecreate) {
287
+ createFtsTable(db, config.fts.tokenizer);
271
288
  }
272
- if (needsRebuild || storedFtsIndexVersion !== FTS_INDEX_VERSION) {
273
- setMeta(db, "fts_index_version", FTS_INDEX_VERSION);
289
+ if (inspection.needsRebuild) {
290
+ rebuildFts(db, config, extensionVersion);
274
291
  }
275
292
  }
276
293
  export function resetVectorTable(db, embeddingDimensions) {
@@ -283,7 +300,7 @@ export function resetVectorTable(db, embeddingDimensions) {
283
300
  );
284
301
  `);
285
302
  }
286
- export function clearAllIndexedData(db) {
303
+ export function clearAllIndexedData(db, config, extensionVersion) {
287
304
  db.exec(`
288
305
  DELETE FROM edges;
289
306
  DELETE FROM pages;
@@ -295,17 +312,21 @@ export function clearAllIndexedData(db) {
295
312
  db.exec("DELETE FROM vec_pages");
296
313
  }
297
314
  if (tableExists(db, "pages_fts")) {
298
- rebuildFts(db);
315
+ rebuildFts(db, config, extensionVersion);
299
316
  }
300
317
  db.prepare("DELETE FROM sync_meta WHERE key IN ('last_sync_at', 'last_sync_id', 'last_full_rebuild_at', 'embedding_profile')").run();
301
318
  }
302
- export function openDb(dbPath, config, embeddingDimensions) {
319
+ export function openDb(dbPath, config, embeddingDimensions, packageRoot, options = {}) {
303
320
  const db = new Database(dbPath);
304
321
  db.pragma("journal_mode = WAL");
305
322
  db.pragma("foreign_keys = ON");
306
- sqliteVec.load(db);
323
+ const extensionResult = loadSqliteExtensions(db, config, packageRoot);
324
+ const ftsMetadataExtensionVersion = config.fts.tokenizer === "simple" ? extensionResult.loadedSimpleVersion : null;
307
325
  ensureBaseTables(db, embeddingDimensions);
308
- ensureFtsTable(db);
326
+ const initialFtsInspection = inspectFtsIndex(db, config, ftsMetadataExtensionVersion);
327
+ if (options.ensureFts !== false) {
328
+ ensureFtsTable(db, config, ftsMetadataExtensionVersion);
329
+ }
309
330
  const vectorDimensions = getVectorTableDimensions(db);
310
331
  const vectorDimensionsChanged = vectorDimensions !== null && vectorDimensions !== embeddingDimensions;
311
332
  const storedSchemaVersion = getMeta(db, "schema_version");
@@ -320,5 +341,13 @@ export function openDb(dbPath, config, embeddingDimensions) {
320
341
  schema_version: SCHEMA_VERSION,
321
342
  ...(storedConfigVersion === null ? { config_version: config.configVersion } : {}),
322
343
  });
323
- return { db, configChanged, vectorDimensions, vectorDimensionsChanged };
344
+ return {
345
+ db,
346
+ configChanged,
347
+ vectorDimensions,
348
+ vectorDimensionsChanged,
349
+ ftsExtensionVersion: ftsMetadataExtensionVersion,
350
+ simpleExtensionPath: extensionResult.simpleExtensionPath,
351
+ initialFtsInspection,
352
+ };
324
353
  }
@@ -0,0 +1,68 @@
1
+ import { normalizeFtsQuery, segmentForFts } from "../utils/segmenter.js";
2
+ export const FTS_INDEX_VERSION = "3";
3
+ export function createFtsTable(db, mode) {
4
+ db.exec(`
5
+ CREATE VIRTUAL TABLE pages_fts USING fts5(
6
+ title,
7
+ tags,
8
+ summary_text
9
+ ${mode === "simple" ? ", tokenize = 'simple'" : ""}
10
+ );
11
+ `);
12
+ }
13
+ export function normalizeTagsForFts(rawTags) {
14
+ if (!rawTags) {
15
+ return "";
16
+ }
17
+ try {
18
+ const parsed = JSON.parse(rawTags);
19
+ if (Array.isArray(parsed)) {
20
+ return parsed
21
+ .map((value) => String(value).trim())
22
+ .filter(Boolean)
23
+ .join(" ");
24
+ }
25
+ }
26
+ catch {
27
+ // Fall back to the stored value if legacy data is not valid JSON.
28
+ }
29
+ return rawTags;
30
+ }
31
+ export function buildFtsRow(row, mode) {
32
+ const tags = normalizeTagsForFts(row.tags);
33
+ if (mode === "simple") {
34
+ return {
35
+ rowid: row.rowid,
36
+ title: row.title,
37
+ tags,
38
+ summary_text: row.summaryText ?? "",
39
+ };
40
+ }
41
+ return {
42
+ rowid: row.rowid,
43
+ title: segmentForFts(row.title),
44
+ tags: segmentForFts(tags),
45
+ summary_text: segmentForFts(row.summaryText ?? ""),
46
+ };
47
+ }
48
+ export function buildFtsQueryPlan(query, mode) {
49
+ if (mode === "simple") {
50
+ return {
51
+ whereClause: "pages_fts MATCH simple_query(?)",
52
+ params: [query.trim()],
53
+ };
54
+ }
55
+ return {
56
+ whereClause: "pages_fts MATCH ?",
57
+ params: [normalizeFtsQuery(query)],
58
+ };
59
+ }
60
+ export function isLegacyExternalContentFts(sql) {
61
+ return typeof sql === "string" && /content\s*=\s*'pages'/i.test(sql);
62
+ }
63
+ export function isSimpleTokenizerSql(sql) {
64
+ return typeof sql === "string" && /tokenize\s*=\s*'simple'/i.test(sql);
65
+ }
66
+ export function ftsTableMatchesMode(sql, mode) {
67
+ return mode === "simple" ? isSimpleTokenizerSql(sql) : !isSimpleTokenizerSql(sql);
68
+ }
@@ -145,7 +145,7 @@ function buildUpdateStatement(config) {
145
145
  ];
146
146
  return `UPDATE pages SET ${columns.map((column) => `${column} = @${column}`).join(", ")} WHERE id = @id`;
147
147
  }
148
- export function applyChanges(db, changes, wikiPath, config) {
148
+ export function applyChanges(db, changes, wikiPath, config, ftsExtensionVersion) {
149
149
  const parseResults = [...changes.added, ...changes.modified].map((entry) => ({
150
150
  entry,
151
151
  result: parsePage(entry.filePath, wikiPath, config),
@@ -184,6 +184,15 @@ export function applyChanges(db, changes, wikiPath, config) {
184
184
  const summaryChangedIds = [];
185
185
  const hasContentChanges = parsedEntries.length > 0 || changes.deleted.length > 0;
186
186
  const transaction = db.transaction(() => {
187
+ for (const page of changes.deleted) {
188
+ const existing = selectPageRowid.get(page.id);
189
+ deleteEdgesBySourcePage.run(page.id);
190
+ if (existing) {
191
+ deleteVecRow.run(BigInt(existing.rowid));
192
+ }
193
+ deletePage.run(page.id);
194
+ deleted.push(page.id);
195
+ }
187
196
  for (const { entry, parsed } of parsedEntries) {
188
197
  const existing = selectExistingPage.get(entry.id);
189
198
  const isInsert = !existing;
@@ -217,17 +226,8 @@ export function applyChanges(db, changes, wikiPath, config) {
217
226
  });
218
227
  }
219
228
  }
220
- for (const page of changes.deleted) {
221
- const existing = selectPageRowid.get(page.id);
222
- deleteEdgesBySourcePage.run(page.id);
223
- if (existing) {
224
- deleteVecRow.run(BigInt(existing.rowid));
225
- }
226
- deletePage.run(page.id);
227
- deleted.push(page.id);
228
- }
229
229
  if (hasContentChanges) {
230
- rebuildFts(db);
230
+ rebuildFts(db, config, ftsExtensionVersion);
231
231
  }
232
232
  });
233
233
  transaction();
@@ -15,6 +15,6 @@ export function loadRuntimeConfig(env = process.env) {
15
15
  export function openRuntimeDb(env = process.env) {
16
16
  const { paths, config } = loadRuntimeConfig(env);
17
17
  const embeddingClient = EmbeddingClient.fromEnv(env);
18
- const { db, vectorDimensions, vectorDimensionsChanged } = openDb(paths.dbPath, config, embeddingClient?.settings.dimensions ?? getEmbeddingDimensionFromEnv(env));
18
+ const { db, vectorDimensions, vectorDimensionsChanged } = openDb(paths.dbPath, config, embeddingClient?.settings.dimensions ?? getEmbeddingDimensionFromEnv(env), paths.packageRoot);
19
19
  return { db, paths, config, embeddingClient, vectorDimensions, vectorDimensionsChanged };
20
20
  }
@@ -0,0 +1,78 @@
1
+ import path from "node:path";
2
+ import * as sqliteVec from "sqlite-vec";
3
+ import { AppError } from "../utils/errors.js";
4
+ import { pathExistsSync } from "../utils/fs.js";
5
+ import { isSimpleTokenizerSql } from "./fts.js";
6
+ import { getPackageRoot } from "./paths.js";
7
+ export const BUNDLED_SIMPLE_EXTENSION_VERSION = "v0.7.1";
8
+ const SIMPLE_ASSET_MAP = {
9
+ darwin: {
10
+ arm64: "assets/sqlite-extensions/darwin-arm64/libsimple.dylib",
11
+ x64: "assets/sqlite-extensions/darwin-x64/libsimple.dylib",
12
+ },
13
+ linux: {
14
+ x64: "assets/sqlite-extensions/linux-x64/libsimple.so",
15
+ },
16
+ win32: {
17
+ x64: "assets/sqlite-extensions/win32-x64/simple.dll",
18
+ },
19
+ };
20
+ function getExistingFtsSql(db) {
21
+ const row = db
22
+ .prepare("SELECT sql FROM sqlite_master WHERE type IN ('table', 'view') AND name = 'pages_fts'")
23
+ .get();
24
+ return row?.sql ?? null;
25
+ }
26
+ function resolveBundledSimpleExtensionPath(packageRoot) {
27
+ const byArch = SIMPLE_ASSET_MAP[process.platform];
28
+ if (!byArch) {
29
+ throw new AppError(`Bundled simple extension is not available for platform ${process.platform}-${process.arch}.`, "config", {
30
+ platform: process.platform,
31
+ arch: process.arch,
32
+ });
33
+ }
34
+ const relativePath = byArch[process.arch];
35
+ if (!relativePath) {
36
+ throw new AppError(`Bundled simple extension is not available for platform ${process.platform}-${process.arch}.`, "config", {
37
+ platform: process.platform,
38
+ arch: process.arch,
39
+ });
40
+ }
41
+ const extensionPath = path.join(packageRoot, relativePath);
42
+ if (!pathExistsSync(extensionPath)) {
43
+ throw new AppError(`Bundled simple extension not found: ${extensionPath}`, "runtime", {
44
+ platform: process.platform,
45
+ arch: process.arch,
46
+ extensionPath,
47
+ version: BUNDLED_SIMPLE_EXTENSION_VERSION,
48
+ });
49
+ }
50
+ return extensionPath;
51
+ }
52
+ export function loadSqliteExtensions(db, config, packageRoot) {
53
+ sqliteVec.load(db);
54
+ const shouldLoadSimple = config.fts.tokenizer === "simple" || isSimpleTokenizerSql(getExistingFtsSql(db));
55
+ if (!shouldLoadSimple) {
56
+ return {
57
+ simpleLoaded: false,
58
+ loadedSimpleVersion: null,
59
+ simpleExtensionPath: null,
60
+ };
61
+ }
62
+ const simpleExtensionPath = resolveBundledSimpleExtensionPath(packageRoot ?? getPackageRoot());
63
+ try {
64
+ db.loadExtension(simpleExtensionPath);
65
+ }
66
+ catch (error) {
67
+ throw new AppError(`Failed to load bundled simple extension: ${simpleExtensionPath}`, "runtime", {
68
+ extensionPath: simpleExtensionPath,
69
+ version: BUNDLED_SIMPLE_EXTENSION_VERSION,
70
+ cause: error instanceof Error ? error.message : String(error),
71
+ });
72
+ }
73
+ return {
74
+ simpleLoaded: true,
75
+ loadedSimpleVersion: BUNDLED_SIMPLE_EXTENSION_VERSION,
76
+ simpleExtensionPath,
77
+ };
78
+ }
package/dist/core/sync.js CHANGED
@@ -78,7 +78,7 @@ export async function syncWorkspace(options = {}) {
78
78
  }
79
79
  const config = loadConfig(runtimePaths.configPath);
80
80
  const embeddingClient = EmbeddingClient.fromEnv(env);
81
- const { db, configChanged, vectorDimensionsChanged } = openDb(runtimePaths.dbPath, config, embeddingClient?.settings.dimensions ?? getEmbeddingDimension(env));
81
+ const { db, configChanged, vectorDimensionsChanged, ftsExtensionVersion } = openDb(runtimePaths.dbPath, config, embeddingClient?.settings.dimensions ?? getEmbeddingDimension(env), runtimePaths.packageRoot);
82
82
  try {
83
83
  let mode = options.targetPaths && options.targetPaths.length > 0 && !options.force ? "path" : "full";
84
84
  let upgradedToFullSync = false;
@@ -93,7 +93,7 @@ export async function syncWorkspace(options = {}) {
93
93
  throw new AppError("Embedding profile changed, cannot skip embedding.", "config");
94
94
  }
95
95
  if (options.force) {
96
- clearAllIndexedData(db);
96
+ clearAllIndexedData(db, config, ftsExtensionVersion);
97
97
  mode = "full";
98
98
  }
99
99
  const changes = mode === "path"
@@ -106,7 +106,7 @@ export async function syncWorkspace(options = {}) {
106
106
  changes.unchanged.length === 0) {
107
107
  throw new AppError(`No page matched the requested --path value(s).`, "not_found");
108
108
  }
109
- const applyResult = applyChanges(db, changes, runtimePaths.wikiPath, config);
109
+ const applyResult = applyChanges(db, changes, runtimePaths.wikiPath, config, ftsExtensionVersion);
110
110
  if (applyResult.parseErrors.length > 0) {
111
111
  throw new AppError("Failed to parse one or more wiki pages during sync.", "runtime", {
112
112
  parseErrors: applyResult.parseErrors,
@@ -195,7 +195,7 @@ export async function embedPendingPages(env = process.env) {
195
195
  if (!embeddingClient) {
196
196
  return;
197
197
  }
198
- const { db } = openDb(runtimePaths.dbPath, config, embeddingClient.settings.dimensions);
198
+ const { db } = openDb(runtimePaths.dbPath, config, embeddingClient.settings.dimensions, runtimePaths.packageRoot);
199
199
  try {
200
200
  const targets = getEmbeddingTargets(db, false, [], []);
201
201
  const result = await embedPages(db, embeddingClient, targets);
@@ -815,7 +815,7 @@ async function processClaimedQueueItem(input) {
815
815
  export function getVaultQueueSnapshot(env = process.env, status) {
816
816
  const paths = resolveRuntimePaths(env);
817
817
  const config = loadConfig(paths.configPath);
818
- const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384);
818
+ const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384, paths.packageRoot);
819
819
  try {
820
820
  const items = fetchQueueItemsByStatus(db, status);
821
821
  const counts = db.prepare(`
@@ -843,7 +843,7 @@ export function getVaultQueueSnapshot(env = process.env, status) {
843
843
  export function getVaultQueueItem(env = process.env, fileId) {
844
844
  const paths = resolveRuntimePaths(env);
845
845
  const config = loadConfig(paths.configPath);
846
- const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384);
846
+ const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384, paths.packageRoot);
847
847
  try {
848
848
  return fetchQueueItemByFileId(db, fileId);
849
849
  }
@@ -865,7 +865,7 @@ export async function processVaultQueueBatch(env = process.env, options = {}) {
865
865
  }
866
866
  const paths = resolveRuntimePaths(env);
867
867
  const config = loadConfig(paths.configPath);
868
- const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384);
868
+ const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384, paths.packageRoot);
869
869
  try {
870
870
  const result = {
871
871
  enabled: true,
@@ -15,7 +15,7 @@ import { getDashboardGraphOverview, getDashboardLintSummary, getDashboardPageDet
15
15
  import { diffVaultFiles, findPages, ftsSearchPages, getPageInfo, getVaultQueue, getWikiStat, listPages, listVaultFiles, renderLintResult, runLint, searchPages, traverseGraph, } from "../operations/query.js";
16
16
  import { createTemplate, listTemplates, listTypes, recommendTypes, showTemplate, showType, } from "../operations/type-template.js";
17
17
  import { runTemplateLint } from "../operations/template-lint.js";
18
- import { createPage, runSync, runSyncCommand, updatePage } from "../operations/write.js";
18
+ import { createPage, rebuildFtsCommand, runSync, runSyncCommand, updatePage } from "../operations/write.js";
19
19
  import { AppError, asAppError } from "../utils/errors.js";
20
20
  import { pathExistsSync } from "../utils/fs.js";
21
21
  import { addSeconds, toOffsetIso } from "../utils/time.js";
@@ -614,6 +614,28 @@ export async function runDaemonServer(options) {
614
614
  });
615
615
  }
616
616
  };
617
+ const runRebuildFtsTransaction = async (actor, input) => {
618
+ try {
619
+ const result = rebuildFtsCommand(env, input);
620
+ return await finalizeJournaledWrite(actor, {
621
+ operation: "rebuild-fts",
622
+ resourceId: "pages_fts",
623
+ revisionBefore: null,
624
+ revisionAfter: null,
625
+ result,
626
+ });
627
+ }
628
+ catch (error) {
629
+ const appError = asAppError(error);
630
+ return recordSyncFailureAndThrow(actor, {
631
+ operation: "rebuild-fts",
632
+ resourceId: "pages_fts",
633
+ revisionBefore: null,
634
+ revisionAfter: null,
635
+ error: appError,
636
+ });
637
+ }
638
+ };
617
639
  const runCycleTransaction = async (actor, task) => {
618
640
  try {
619
641
  const result = await runCycleTask(task);
@@ -743,6 +765,22 @@ export async function runDaemonServer(options) {
743
765
  writeJsonResponse(response, 200, result);
744
766
  return;
745
767
  }
768
+ if (method === "POST" && pathname === "/fts/rebuild") {
769
+ const body = await readJsonBody(request);
770
+ const actor = resolveWriteActor(request, body, buildCliWriteActor(env));
771
+ const result = await enqueueWriteTask("rebuild-fts", () => runRebuildFtsTransaction(actor, {
772
+ mode: body.mode === "simple" ? "simple" : body.mode === "default" ? "default" : undefined,
773
+ }), {
774
+ summarizeResult: (payload) => ({
775
+ rebuilt: payload.rebuilt,
776
+ mode: payload.mode,
777
+ rowCount: payload.rowCount,
778
+ needsRebuild: payload.needsRebuild,
779
+ }),
780
+ });
781
+ writeJsonResponse(response, 200, result);
782
+ return;
783
+ }
746
784
  if (method === "GET" && pathname === "/write-queue/summary") {
747
785
  writeJsonResponse(response, 200, writeQueue.getSummary());
748
786
  return;
@@ -885,6 +923,17 @@ export async function runDaemonServer(options) {
885
923
  }));
886
924
  return;
887
925
  }
926
+ if (method === "GET" && pathname === "/fts/rebuild") {
927
+ writeJsonResponse(response, 200, rebuildFtsCommand(env, {
928
+ mode: url.searchParams.get("mode") === "simple"
929
+ ? "simple"
930
+ : url.searchParams.get("mode") === "default"
931
+ ? "default"
932
+ : undefined,
933
+ check: url.searchParams.get("check") === "true",
934
+ }));
935
+ return;
936
+ }
888
937
  if (method === "GET" && pathname === "/search") {
889
938
  writeJsonResponse(response, 200, await searchPages(env, {
890
939
  query: url.searchParams.get("query") ?? "",
package/dist/index.js CHANGED
@@ -16,6 +16,7 @@ import { registerInitCommand } from "./commands/init.js";
16
16
  import { registerLintCommand } from "./commands/lint.js";
17
17
  import { registerListCommand } from "./commands/list.js";
18
18
  import { registerPageInfoCommand } from "./commands/page-info.js";
19
+ import { registerRebuildFtsCommand } from "./commands/rebuild-fts.js";
19
20
  import { registerSearchCommand } from "./commands/search.js";
20
21
  import { registerSetupCommand } from "./commands/setup.js";
21
22
  import { registerSkillCommand } from "./commands/skill.js";
@@ -80,6 +81,7 @@ function buildProgram() {
80
81
  registerFindCommand(program, runtimeConfig);
81
82
  registerSearchCommand(program);
82
83
  registerFtsCommand(program);
84
+ registerRebuildFtsCommand(program);
83
85
  registerGraphCommand(program);
84
86
  registerPageInfoCommand(program);
85
87
  registerListCommand(program);
@@ -1,5 +1,6 @@
1
1
  import path from "node:path";
2
2
  import { getMeta } from "../core/db.js";
3
+ import { buildFtsQueryPlan } from "../core/fts.js";
3
4
  import { parsePage } from "../core/frontmatter.js";
4
5
  import { normalizePageId, resolvePagePath } from "../core/paths.js";
5
6
  import { compactPageSummary } from "../core/presenters.js";
@@ -10,7 +11,6 @@ import { getVaultQueueSnapshot } from "../core/vault-processing.js";
10
11
  import { camelToSnake } from "../utils/case.js";
11
12
  import { AppError } from "../utils/errors.js";
12
13
  import { listFilesRecursiveSync, pathExistsSync } from "../utils/fs.js";
13
- import { normalizeFtsQuery } from "../utils/segmenter.js";
14
14
  function parsePositiveLimit(value, label, fallback) {
15
15
  const normalized = value ?? fallback;
16
16
  const limit = Number.parseInt(String(normalized), 10);
@@ -306,18 +306,18 @@ export function ftsSearchPages(env = process.env, options) {
306
306
  const { db, config } = openRuntimeDb(env);
307
307
  try {
308
308
  const limit = parsePositiveLimit(options.limit, "--limit", 20);
309
- const normalizedQuery = normalizeFtsQuery(options.query);
309
+ const queryPlan = buildFtsQueryPlan(options.query, config.fts.tokenizer);
310
310
  const rows = db
311
311
  .prepare(`
312
312
  SELECT ${listPageColumns(config).map((column) => `pages.${column}`).join(", ")}, bm25(pages_fts) AS rank
313
313
  FROM pages_fts
314
314
  JOIN pages ON pages.rowid = pages_fts.rowid
315
- WHERE pages_fts MATCH ?
315
+ WHERE ${queryPlan.whereClause}
316
316
  ${options.type ? "AND pages.page_type = ?" : ""}
317
317
  ORDER BY rank
318
318
  LIMIT ?
319
319
  `)
320
- .all(...(options.type ? [normalizedQuery, options.type, limit] : [normalizedQuery, limit]));
320
+ .all(...(options.type ? [...queryPlan.params, options.type, limit] : [...queryPlan.params, limit]));
321
321
  return rows.map((row) => ({
322
322
  ...compactPageSummary(mapPageRow(row, config), config),
323
323
  summaryText: row.summary_text,
@@ -1,18 +1,33 @@
1
1
  import { getTemplate } from "../core/config.js";
2
+ import { inspectFtsIndex, openDb, rebuildFts as rebuildFtsIndex } from "../core/db.js";
3
+ import { createFtsTable, FTS_INDEX_VERSION } from "../core/fts.js";
2
4
  import { resolveAgentSettings } from "../core/paths.js";
3
5
  import { normalizePageId } from "../core/paths.js";
4
6
  import { createPageFromTemplate } from "../core/page-files.js";
5
7
  import { updatePageById } from "../core/page-files.js";
6
8
  import { readCanonicalPageSourceById } from "../core/page-source.js";
7
- import { loadRuntimeConfig } from "../core/runtime.js";
9
+ import { getEmbeddingDimensionFromEnv, loadRuntimeConfig } from "../core/runtime.js";
8
10
  import { openRuntimeDb } from "../core/runtime.js";
9
11
  import { syncWorkspace } from "../core/sync.js";
10
12
  import { getVaultQueueItem, processVaultQueueBatch } from "../core/vault-processing.js";
11
13
  import { selectPageById } from "../core/query.js";
12
14
  import { AppError, asAppError } from "../utils/errors.js";
15
+ import { pathExistsSync } from "../utils/fs.js";
13
16
  function isPlainObject(value) {
14
17
  return Object.prototype.toString.call(value) === "[object Object]";
15
18
  }
19
+ function applyFtsModeOverride(config, mode) {
20
+ if (!mode || config.fts.tokenizer === mode) {
21
+ return config;
22
+ }
23
+ return {
24
+ ...config,
25
+ fts: {
26
+ ...config.fts,
27
+ tokenizer: mode,
28
+ },
29
+ };
30
+ }
16
31
  function assertValidSyncCommandOptions(options) {
17
32
  if (options.vaultFileId && !options.process) {
18
33
  throw new AppError("--vault-file requires --process.", "config");
@@ -132,6 +147,64 @@ export async function runSyncCommand(env = process.env, options = {}) {
132
147
  }),
133
148
  };
134
149
  }
150
+ export function rebuildFtsCommand(env = process.env, options = {}) {
151
+ const { paths, config } = loadRuntimeConfig(env);
152
+ const effectiveConfig = applyFtsModeOverride(config, options.mode);
153
+ if (!pathExistsSync(paths.dbPath)) {
154
+ return {
155
+ checked: true,
156
+ rebuilt: false,
157
+ mode: effectiveConfig.fts.tokenizer,
158
+ rowCount: 0,
159
+ expectedIndexVersion: FTS_INDEX_VERSION,
160
+ storedIndexVersion: null,
161
+ storedTokenizerMode: null,
162
+ storedExtensionVersion: null,
163
+ extensionVersion: null,
164
+ simpleExtensionPath: null,
165
+ lastRebuildAt: null,
166
+ needsRecreate: true,
167
+ needsRebuild: true,
168
+ problems: [`Wiki database does not exist yet: ${paths.dbPath}`],
169
+ };
170
+ }
171
+ const { db, ftsExtensionVersion, simpleExtensionPath, initialFtsInspection } = openDb(paths.dbPath, effectiveConfig, getEmbeddingDimensionFromEnv(env), paths.packageRoot, {
172
+ ensureFts: false,
173
+ });
174
+ try {
175
+ if (!options.check) {
176
+ if (initialFtsInspection.needsRecreate) {
177
+ if (initialFtsInspection.hasTable) {
178
+ db.exec("DROP TABLE pages_fts");
179
+ }
180
+ createFtsTable(db, effectiveConfig.fts.tokenizer);
181
+ }
182
+ rebuildFtsIndex(db, effectiveConfig, ftsExtensionVersion);
183
+ }
184
+ const inspection = options.check
185
+ ? initialFtsInspection
186
+ : inspectFtsIndex(db, effectiveConfig, ftsExtensionVersion);
187
+ return {
188
+ checked: true,
189
+ rebuilt: options.check ? false : true,
190
+ mode: effectiveConfig.fts.tokenizer,
191
+ rowCount: inspection.rowCount,
192
+ expectedIndexVersion: inspection.expectedIndexVersion,
193
+ storedIndexVersion: inspection.storedIndexVersion,
194
+ storedTokenizerMode: inspection.storedTokenizerMode,
195
+ storedExtensionVersion: inspection.storedExtensionVersion,
196
+ extensionVersion: ftsExtensionVersion,
197
+ simpleExtensionPath,
198
+ lastRebuildAt: inspection.lastRebuildAt,
199
+ needsRecreate: inspection.needsRecreate,
200
+ needsRebuild: inspection.needsRebuild,
201
+ problems: inspection.problems,
202
+ };
203
+ }
204
+ finally {
205
+ db.close();
206
+ }
207
+ }
135
208
  export async function createPage(env = process.env, options) {
136
209
  const { paths, config } = loadRuntimeConfig(env);
137
210
  getTemplate(config, options.type);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@biaoo/tiangong-wiki",
3
- "version": "0.3.3",
3
+ "version": "0.3.5",
4
4
  "description": "Local-first wiki index and query engine for Markdown knowledge pages (Tiangong Wiki).",
5
5
  "type": "module",
6
6
  "publishConfig": {
@@ -35,6 +35,7 @@ Global workspace resolution priority:
35
35
  | `find` | Query pages by structured metadata filters |
36
36
  | `search` | Semantic search over page summary embeddings |
37
37
  | `fts` | Full-text search over title, tags, and summary text |
38
+ | `rebuild-fts` | Inspect or rebuild the SQLite FTS index |
38
39
  | `graph` | Traverse the knowledge graph from a root node |
39
40
  | `page-info` | Show full metadata and edges for a single page |
40
41
  | `list` | List wiki pages |
@@ -158,6 +159,31 @@ tiangong-wiki fts <query> [--type <pageType>] [--limit <n>]
158
159
 
159
160
  Full-text search against the `pages_fts` table (title, tags, summary_text). Default limit: 20.
160
161
 
162
+ `wiki.config.json` can set:
163
+
164
+ ```json
165
+ {
166
+ "fts": {
167
+ "tokenizer": "simple"
168
+ }
169
+ }
170
+ ```
171
+
172
+ `simple` is the default. Set `tokenizer` to `default` only if you need the legacy `Intl.Segmenter`-based FTS behavior.
173
+
174
+ ### rebuild-fts
175
+
176
+ ```
177
+ tiangong-wiki rebuild-fts [--mode <default|simple>] [--check]
178
+ ```
179
+
180
+ Inspects or rebuilds the `pages_fts` table and its metadata. Useful after changing `wiki.config.json` FTS tokenizer mode, repairing drift, or forcing a clean rebuild.
181
+
182
+ Options:
183
+
184
+ - `--mode <default|simple>` — temporarily override the configured tokenizer mode for this command
185
+ - `--check` — report drift and metadata without rebuilding
186
+
161
187
  ### graph
162
188
 
163
189
  ```