@biaoo/tiangong-wiki 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +167 -0
  3. package/README.zh-CN.md +167 -0
  4. package/SKILL.md +116 -0
  5. package/agents/openai.yaml +4 -0
  6. package/assets/config.example.env +18 -0
  7. package/assets/templates/achievement.md +32 -0
  8. package/assets/templates/bridge.md +33 -0
  9. package/assets/templates/concept.md +47 -0
  10. package/assets/templates/faq.md +31 -0
  11. package/assets/templates/lesson.md +31 -0
  12. package/assets/templates/method.md +31 -0
  13. package/assets/templates/misconception.md +35 -0
  14. package/assets/templates/person.md +31 -0
  15. package/assets/templates/research-note.md +34 -0
  16. package/assets/templates/resume.md +34 -0
  17. package/assets/templates/source-summary.md +35 -0
  18. package/assets/vllm/qwen3_5_openai_developer.jinja +182 -0
  19. package/assets/wiki.config.default.json +193 -0
  20. package/dist/commands/check-config.js +77 -0
  21. package/dist/commands/create.js +32 -0
  22. package/dist/commands/daemon.js +186 -0
  23. package/dist/commands/dashboard.js +112 -0
  24. package/dist/commands/doctor.js +22 -0
  25. package/dist/commands/export-graph.js +28 -0
  26. package/dist/commands/export-index.js +31 -0
  27. package/dist/commands/find.js +36 -0
  28. package/dist/commands/fts.js +32 -0
  29. package/dist/commands/graph.js +35 -0
  30. package/dist/commands/init.js +48 -0
  31. package/dist/commands/lint.js +35 -0
  32. package/dist/commands/list.js +28 -0
  33. package/dist/commands/page-info.js +24 -0
  34. package/dist/commands/search.js +32 -0
  35. package/dist/commands/setup.js +15 -0
  36. package/dist/commands/stat.js +20 -0
  37. package/dist/commands/sync.js +38 -0
  38. package/dist/commands/template.js +71 -0
  39. package/dist/commands/type.js +88 -0
  40. package/dist/commands/vault.js +64 -0
  41. package/dist/core/agent.js +201 -0
  42. package/dist/core/cli-env.js +129 -0
  43. package/dist/core/codex-workflow.js +233 -0
  44. package/dist/core/config.js +126 -0
  45. package/dist/core/db.js +292 -0
  46. package/dist/core/embedding.js +104 -0
  47. package/dist/core/frontmatter.js +287 -0
  48. package/dist/core/indexer.js +241 -0
  49. package/dist/core/onboarding.js +967 -0
  50. package/dist/core/page-files.js +91 -0
  51. package/dist/core/paths.js +161 -0
  52. package/dist/core/presenters.js +23 -0
  53. package/dist/core/query.js +58 -0
  54. package/dist/core/runtime.js +20 -0
  55. package/dist/core/sync.js +235 -0
  56. package/dist/core/synology.js +412 -0
  57. package/dist/core/template-evolution.js +38 -0
  58. package/dist/core/vault-processing.js +742 -0
  59. package/dist/core/vault.js +594 -0
  60. package/dist/core/workflow-context.js +188 -0
  61. package/dist/core/workflow-result.js +162 -0
  62. package/dist/core/workspace-bootstrap.js +30 -0
  63. package/dist/core/workspace-skills.js +220 -0
  64. package/dist/daemon/client.js +147 -0
  65. package/dist/daemon/server.js +807 -0
  66. package/dist/daemon/state.js +53 -0
  67. package/dist/dashboard/assets/index-1FgAUZ28.css +1 -0
  68. package/dist/dashboard/assets/index-6A0PWT4X.js +154 -0
  69. package/dist/dashboard/assets/jetbrains-mono-cyrillic-400-normal-BEIGL1Tu.woff2 +0 -0
  70. package/dist/dashboard/assets/jetbrains-mono-cyrillic-400-normal-ugxPyKxw.woff +0 -0
  71. package/dist/dashboard/assets/jetbrains-mono-cyrillic-500-normal-DJqRU3vO.woff +0 -0
  72. package/dist/dashboard/assets/jetbrains-mono-cyrillic-500-normal-DmUKJPL_.woff2 +0 -0
  73. package/dist/dashboard/assets/jetbrains-mono-cyrillic-700-normal-BWTpRfYl.woff2 +0 -0
  74. package/dist/dashboard/assets/jetbrains-mono-cyrillic-700-normal-CEoEElIJ.woff +0 -0
  75. package/dist/dashboard/assets/jetbrains-mono-greek-400-normal-B9oWc5Lo.woff +0 -0
  76. package/dist/dashboard/assets/jetbrains-mono-greek-400-normal-C190GLew.woff2 +0 -0
  77. package/dist/dashboard/assets/jetbrains-mono-greek-500-normal-D7SFKleX.woff +0 -0
  78. package/dist/dashboard/assets/jetbrains-mono-greek-500-normal-JpySY46c.woff2 +0 -0
  79. package/dist/dashboard/assets/jetbrains-mono-greek-700-normal-C6CZE3T8.woff2 +0 -0
  80. package/dist/dashboard/assets/jetbrains-mono-greek-700-normal-DEigVDxa.woff +0 -0
  81. package/dist/dashboard/assets/jetbrains-mono-latin-400-normal-6-qcROiO.woff +0 -0
  82. package/dist/dashboard/assets/jetbrains-mono-latin-400-normal-V6pRDFza.woff2 +0 -0
  83. package/dist/dashboard/assets/jetbrains-mono-latin-500-normal-BWZEU5yA.woff2 +0 -0
  84. package/dist/dashboard/assets/jetbrains-mono-latin-500-normal-CJOVTJB7.woff +0 -0
  85. package/dist/dashboard/assets/jetbrains-mono-latin-700-normal-BYuf6tUa.woff2 +0 -0
  86. package/dist/dashboard/assets/jetbrains-mono-latin-700-normal-D3wTyLJW.woff +0 -0
  87. package/dist/dashboard/assets/jetbrains-mono-latin-ext-400-normal-Bc8Ftmh3.woff2 +0 -0
  88. package/dist/dashboard/assets/jetbrains-mono-latin-ext-400-normal-fXTG6kC5.woff +0 -0
  89. package/dist/dashboard/assets/jetbrains-mono-latin-ext-500-normal-Cut-4mMH.woff2 +0 -0
  90. package/dist/dashboard/assets/jetbrains-mono-latin-ext-500-normal-ckzbgY84.woff +0 -0
  91. package/dist/dashboard/assets/jetbrains-mono-latin-ext-700-normal-CZipNAKV.woff2 +0 -0
  92. package/dist/dashboard/assets/jetbrains-mono-latin-ext-700-normal-CxPITLHs.woff +0 -0
  93. package/dist/dashboard/assets/jetbrains-mono-vietnamese-400-normal-CqNFfHCs.woff +0 -0
  94. package/dist/dashboard/assets/jetbrains-mono-vietnamese-500-normal-DNRqzVM1.woff +0 -0
  95. package/dist/dashboard/assets/jetbrains-mono-vietnamese-700-normal-BDLVIk2r.woff +0 -0
  96. package/dist/dashboard/assets/space-grotesk-latin-400-normal-BnQMeOim.woff +0 -0
  97. package/dist/dashboard/assets/space-grotesk-latin-400-normal-CJ-V5oYT.woff2 +0 -0
  98. package/dist/dashboard/assets/space-grotesk-latin-500-normal-CNSSEhBt.woff +0 -0
  99. package/dist/dashboard/assets/space-grotesk-latin-500-normal-lFbtlQH6.woff2 +0 -0
  100. package/dist/dashboard/assets/space-grotesk-latin-700-normal-CwsQ-cCU.woff +0 -0
  101. package/dist/dashboard/assets/space-grotesk-latin-700-normal-RjhwGPKo.woff2 +0 -0
  102. package/dist/dashboard/assets/space-grotesk-latin-ext-400-normal-CfP_5XZW.woff2 +0 -0
  103. package/dist/dashboard/assets/space-grotesk-latin-ext-400-normal-DRPE3kg4.woff +0 -0
  104. package/dist/dashboard/assets/space-grotesk-latin-ext-500-normal-3dgZTiw9.woff +0 -0
  105. package/dist/dashboard/assets/space-grotesk-latin-ext-500-normal-DUe3BAxM.woff2 +0 -0
  106. package/dist/dashboard/assets/space-grotesk-latin-ext-700-normal-BQnZhY3m.woff2 +0 -0
  107. package/dist/dashboard/assets/space-grotesk-latin-ext-700-normal-HVCqSBdx.woff +0 -0
  108. package/dist/dashboard/assets/space-grotesk-vietnamese-400-normal-B7xT_GF5.woff2 +0 -0
  109. package/dist/dashboard/assets/space-grotesk-vietnamese-400-normal-BIWiOVfw.woff +0 -0
  110. package/dist/dashboard/assets/space-grotesk-vietnamese-500-normal-BTqKIpxg.woff +0 -0
  111. package/dist/dashboard/assets/space-grotesk-vietnamese-500-normal-BmEvtly_.woff2 +0 -0
  112. package/dist/dashboard/assets/space-grotesk-vietnamese-700-normal-DMty7AZE.woff2 +0 -0
  113. package/dist/dashboard/assets/space-grotesk-vietnamese-700-normal-Duxec5Rn.woff +0 -0
  114. package/dist/dashboard/index.html +18 -0
  115. package/dist/index.js +86 -0
  116. package/dist/operations/dashboard.js +1231 -0
  117. package/dist/operations/export.js +110 -0
  118. package/dist/operations/query.js +649 -0
  119. package/dist/operations/type-template.js +210 -0
  120. package/dist/operations/write.js +143 -0
  121. package/dist/types/config.js +1 -0
  122. package/dist/types/page.js +1 -0
  123. package/dist/utils/case.js +22 -0
  124. package/dist/utils/errors.js +26 -0
  125. package/dist/utils/fs.js +77 -0
  126. package/dist/utils/output.js +33 -0
  127. package/dist/utils/process.js +60 -0
  128. package/dist/utils/segmenter.js +24 -0
  129. package/dist/utils/slug.js +10 -0
  130. package/dist/utils/time.js +24 -0
  131. package/package.json +64 -0
  132. package/references/cli-interface.md +312 -0
  133. package/references/env.md +122 -0
  134. package/references/template-design-guide.md +271 -0
  135. package/references/vault-to-wiki-instruction.md +110 -0
  136. package/references/wiki-maintenance-instruction.md +190 -0
@@ -0,0 +1,594 @@
1
+ import { execFileSync } from "node:child_process";
2
+ import { readFileSync } from "node:fs";
3
+ import path from "node:path";
4
+ import AdmZip from "adm-zip";
5
+ import { normalizeSynologyRemotePath, withSynologyClient } from "./synology.js";
6
+ import { AppError } from "../utils/errors.js";
7
+ import { ensureDirSync, fileStatSync, listFilesRecursiveSync, pathExistsSync, readTextFileSync, sha256FileSync, sha256Text, writeTextFileSync, } from "../utils/fs.js";
8
+ import { toOffsetIso } from "../utils/time.js";
9
+ function normalizeVaultId(root, filePath) {
10
+ return path.relative(root, filePath).split(path.sep).join("/");
11
+ }
12
+ function computeVaultHash(mode, fileId, filePath, fileSize, fileMtime) {
13
+ if (mode === "mtime") {
14
+ return sha256Text(`${fileId}:${filePath}:${fileSize}:${fileMtime}`);
15
+ }
16
+ return sha256FileSync(filePath);
17
+ }
18
+ function normalizeVaultFileExtension(filePath) {
19
+ const fileExt = path.extname(filePath).replace(/^\./, "").toLowerCase();
20
+ return fileExt || null;
21
+ }
22
+ function createAllowedVaultFileTypeSet(vaultFileTypes) {
23
+ return new Set(vaultFileTypes.map((item) => item.trim().replace(/^\./, "").toLowerCase()).filter(Boolean));
24
+ }
25
+ function isAllowedVaultFile(filePath, allowedFileTypes) {
26
+ const fileExt = normalizeVaultFileExtension(filePath);
27
+ return fileExt !== null && allowedFileTypes.has(fileExt);
28
+ }
29
+ function localVaultFiles(vaultPath, hashMode, vaultFileTypes) {
30
+ const indexedAt = toOffsetIso();
31
+ const allowedFileTypes = createAllowedVaultFileTypeSet(vaultFileTypes);
32
+ return listFilesRecursiveSync(vaultPath).filter((filePath) => isAllowedVaultFile(filePath, allowedFileTypes)).map((filePath) => {
33
+ const stats = fileStatSync(filePath);
34
+ const id = normalizeVaultId(vaultPath, filePath);
35
+ const fileExt = normalizeVaultFileExtension(filePath);
36
+ return {
37
+ id,
38
+ fileName: path.basename(filePath),
39
+ fileExt,
40
+ sourceType: fileExt,
41
+ fileSize: stats.size,
42
+ filePath,
43
+ contentHash: computeVaultHash(hashMode, id, filePath, stats.size, stats.mtimeMs),
44
+ fileMtime: stats.mtimeMs,
45
+ indexedAt,
46
+ };
47
+ });
48
+ }
49
+ function getSynologyCacheMetaPath(localPath) {
50
+ return `${localPath}.wiki-cache.json`;
51
+ }
52
+ export function getSynologyCacheLocalPath(vaultPath, file) {
53
+ return path.join(vaultPath, ...file.id.split("/"));
54
+ }
55
+ function readSynologyCacheMetadata(localPath) {
56
+ const metadataPath = getSynologyCacheMetaPath(localPath);
57
+ if (!pathExistsSync(metadataPath)) {
58
+ return null;
59
+ }
60
+ try {
61
+ const parsed = JSON.parse(readTextFileSync(metadataPath));
62
+ if (typeof parsed.remotePath === "string" &&
63
+ typeof parsed.fileSize === "number" &&
64
+ (typeof parsed.fileMtime === "number" || parsed.fileMtime === null)) {
65
+ return {
66
+ remotePath: parsed.remotePath,
67
+ fileSize: parsed.fileSize,
68
+ fileMtime: parsed.fileMtime,
69
+ };
70
+ }
71
+ }
72
+ catch {
73
+ return null;
74
+ }
75
+ return null;
76
+ }
77
+ function writeSynologyCacheMetadata(localPath, file) {
78
+ writeTextFileSync(getSynologyCacheMetaPath(localPath), `${JSON.stringify({
79
+ remotePath: file.filePath,
80
+ fileSize: file.fileSize,
81
+ fileMtime: file.fileMtime,
82
+ }, null, 2)}\n`);
83
+ }
84
+ function isSynologyCacheFresh(localPath, file) {
85
+ if (!pathExistsSync(localPath)) {
86
+ return false;
87
+ }
88
+ const metadata = readSynologyCacheMetadata(localPath);
89
+ if (!metadata) {
90
+ return false;
91
+ }
92
+ return (metadata.remotePath === file.filePath &&
93
+ metadata.fileSize === file.fileSize &&
94
+ metadata.fileMtime === file.fileMtime);
95
+ }
96
+ export function getSynologyCacheStatus(vaultPath, file, env = process.env) {
97
+ const source = (env.VAULT_SOURCE ?? "local").trim().toLowerCase();
98
+ const localPath = getSynologyCacheLocalPath(vaultPath, file);
99
+ const metadataPath = getSynologyCacheMetaPath(localPath);
100
+ if (source !== "synology") {
101
+ return {
102
+ kind: "not-applicable",
103
+ localPath: file.filePath,
104
+ metadataPath,
105
+ };
106
+ }
107
+ if (isSynologyCacheFresh(localPath, file)) {
108
+ return {
109
+ kind: "fresh",
110
+ localPath,
111
+ metadataPath,
112
+ };
113
+ }
114
+ if (pathExistsSync(localPath)) {
115
+ return {
116
+ kind: "stale",
117
+ localPath,
118
+ metadataPath,
119
+ };
120
+ }
121
+ return {
122
+ kind: "missing",
123
+ localPath,
124
+ metadataPath,
125
+ };
126
+ }
127
+ function getSynologyItemPath(item) {
128
+ const candidate = item.path ?? item.real_path ?? item.additional?.real_path;
129
+ return typeof candidate === "string" && candidate.length > 0 ? candidate : null;
130
+ }
131
+ function isSynologyDirectory(item) {
132
+ return item.isdir === true || item.type === "dir" || item.additional?.type === "dir";
133
+ }
134
+ async function scanSynologyFolder(client, remoteRoot, currentFolder, results, allowedFileTypes) {
135
+ const indexedAt = toOffsetIso();
136
+ const items = await client.listFolderAll(currentFolder);
137
+ for (const item of items) {
138
+ const filePath = getSynologyItemPath(item);
139
+ if (!filePath) {
140
+ continue;
141
+ }
142
+ if (isSynologyDirectory(item)) {
143
+ await scanSynologyFolder(client, remoteRoot, filePath, results, allowedFileTypes);
144
+ continue;
145
+ }
146
+ if (!isAllowedVaultFile(filePath, allowedFileTypes)) {
147
+ continue;
148
+ }
149
+ const relativeId = path.posix.relative(remoteRoot, filePath).replace(/^\/+/, "");
150
+ if (!relativeId || relativeId.startsWith("../")) {
151
+ continue;
152
+ }
153
+ const fileExt = normalizeVaultFileExtension(filePath);
154
+ const fileSize = Number(item.additional?.size ?? item.size ?? 0);
155
+ const fileMtime = Number(item.additional?.time?.mtime ?? item.time?.mtime ?? 0);
156
+ results.push({
157
+ id: relativeId,
158
+ fileName: item.name ?? path.basename(filePath),
159
+ fileExt,
160
+ sourceType: fileExt,
161
+ fileSize,
162
+ filePath,
163
+ contentHash: sha256Text(`${relativeId}:${filePath}:${fileSize}:${fileMtime}`),
164
+ fileMtime,
165
+ indexedAt,
166
+ });
167
+ }
168
+ }
169
+ async function synologyVaultFiles(remoteRoot, vaultPath, env, hashMode, vaultFileTypes) {
170
+ const results = [];
171
+ const normalizedRoot = normalizeSynologyRemotePath(remoteRoot);
172
+ const allowedFileTypes = createAllowedVaultFileTypeSet(vaultFileTypes);
173
+ return withSynologyClient(env, async (client) => {
174
+ await scanSynologyFolder(client, normalizedRoot, normalizedRoot, results, allowedFileTypes);
175
+ const sorted = results.sort((left, right) => left.id.localeCompare(right.id));
176
+ if (hashMode !== "content") {
177
+ return sorted;
178
+ }
179
+ const hashed = [];
180
+ for (const file of sorted) {
181
+ const localPath = await ensureLocalVaultFile(file, vaultPath, env, client);
182
+ hashed.push({
183
+ ...file,
184
+ contentHash: sha256FileSync(localPath),
185
+ });
186
+ }
187
+ return hashed;
188
+ });
189
+ }
190
+ function getExistingVaultFiles(db) {
191
+ const rows = db.prepare(`
192
+ SELECT
193
+ id,
194
+ file_name AS fileName,
195
+ file_ext AS fileExt,
196
+ source_type AS sourceType,
197
+ file_size AS fileSize,
198
+ file_path AS filePath,
199
+ content_hash AS contentHash,
200
+ file_mtime AS fileMtime,
201
+ indexed_at AS indexedAt
202
+ FROM vault_files
203
+ `).all();
204
+ return new Map(rows.map((row) => [row.id, row]));
205
+ }
206
+ export function getVaultQueuePriority(fileExt) {
207
+ const normalized = (fileExt ?? "").toLowerCase();
208
+ if (normalized === "pdf") {
209
+ return 100;
210
+ }
211
+ if (normalized === "docx") {
212
+ return 95;
213
+ }
214
+ if (normalized === "pptx") {
215
+ return 90;
216
+ }
217
+ if (normalized === "xlsx") {
218
+ return 85;
219
+ }
220
+ if (normalized === "md") {
221
+ return 80;
222
+ }
223
+ if (normalized === "txt") {
224
+ return 70;
225
+ }
226
+ if (normalized === "csv") {
227
+ return 65;
228
+ }
229
+ if (normalized === "png" || normalized === "jpg" || normalized === "jpeg" || normalized === "webp") {
230
+ return 10;
231
+ }
232
+ return 20;
233
+ }
234
+ export async function collectVaultFiles(vaultPath, vaultFileTypes, env = process.env) {
235
+ const source = (env.VAULT_SOURCE ?? "local").trim().toLowerCase();
236
+ const hashMode = ((env.VAULT_HASH_MODE ?? "content").trim().toLowerCase() === "mtime"
237
+ ? "mtime"
238
+ : "content");
239
+ if (source === "synology") {
240
+ const remotePath = env.VAULT_SYNOLOGY_REMOTE_PATH;
241
+ if (!remotePath) {
242
+ throw new AppError("VAULT_SYNOLOGY_REMOTE_PATH is required when VAULT_SOURCE=synology", "config");
243
+ }
244
+ return synologyVaultFiles(remotePath, vaultPath, env, hashMode, vaultFileTypes);
245
+ }
246
+ return localVaultFiles(vaultPath, hashMode, vaultFileTypes);
247
+ }
248
+ export async function ensureLocalVaultFile(file, vaultPath, env = process.env, client) {
249
+ const source = (env.VAULT_SOURCE ?? "local").trim().toLowerCase();
250
+ if (source !== "synology") {
251
+ return file.filePath;
252
+ }
253
+ const remoteRoot = env.VAULT_SYNOLOGY_REMOTE_PATH;
254
+ if (!remoteRoot) {
255
+ throw new AppError("VAULT_SYNOLOGY_REMOTE_PATH is required when VAULT_SOURCE=synology", "config");
256
+ }
257
+ const localPath = getSynologyCacheLocalPath(vaultPath, file);
258
+ if (isSynologyCacheFresh(localPath, file)) {
259
+ return localPath;
260
+ }
261
+ ensureDirSync(path.dirname(localPath));
262
+ const remotePath = path.posix.join(normalizeSynologyRemotePath(remoteRoot), file.id);
263
+ if (client) {
264
+ await client.downloadFile(remotePath, localPath);
265
+ }
266
+ else {
267
+ await withSynologyClient(env, async (synologyClient) => {
268
+ await synologyClient.downloadFile(remotePath, localPath);
269
+ });
270
+ }
271
+ writeSynologyCacheMetadata(localPath, file);
272
+ return localPath;
273
+ }
274
+ const TEXT_EXTS = new Set([".txt", ".md", ".markdown", ".json", ".csv", ".tsv", ".yaml", ".yml"]);
275
+ const ZIP_EXTS = new Set([".docx", ".pptx", ".xlsx"]);
276
+ function readTextDirect(filePath) {
277
+ try {
278
+ const text = readFileSync(filePath, "utf8");
279
+ return text.replace(/\0/g, " ").trim();
280
+ }
281
+ catch {
282
+ return "";
283
+ }
284
+ }
285
+ function printableRatio(text) {
286
+ if (!text)
287
+ return 0;
288
+ let count = 0;
289
+ for (let i = 0; i < text.length; i++) {
290
+ const code = text.charCodeAt(i);
291
+ if (code === 9 || code === 10 || code === 13 || (code >= 32 && code !== 127)) {
292
+ count++;
293
+ }
294
+ }
295
+ return count / Math.max(text.length, 1);
296
+ }
297
+ function tryPlainText(filePath) {
298
+ const text = readTextDirect(filePath);
299
+ return printableRatio(text) >= 0.85 ? text : "";
300
+ }
301
+ function stripXmlText(xmlBuffer) {
302
+ try {
303
+ const xml = xmlBuffer.toString("utf8");
304
+ const withoutPi = xml.replace(/<\?[^?]*\?>/g, "");
305
+ const text = withoutPi.replace(/<[^>]+>/g, " ");
306
+ return text.replace(/\s+/g, " ").trim();
307
+ }
308
+ catch {
309
+ return "";
310
+ }
311
+ }
312
+ function extractZipXml(filePath) {
313
+ try {
314
+ const zip = new AdmZip(filePath);
315
+ const ext = path.extname(filePath).toLowerCase();
316
+ const entries = zip.getEntries().sort((a, b) => a.entryName.localeCompare(b.entryName));
317
+ const snippets = [];
318
+ for (const entry of entries) {
319
+ const name = entry.entryName.toLowerCase();
320
+ if (ext === ".docx" && !name.startsWith("word/"))
321
+ continue;
322
+ if (ext === ".pptx" && !name.startsWith("ppt/slides/"))
323
+ continue;
324
+ if (ext === ".xlsx" && !(name.startsWith("xl/sharedstrings") || name.startsWith("xl/worksheets/")))
325
+ continue;
326
+ if (!name.endsWith(".xml"))
327
+ continue;
328
+ const text = stripXmlText(entry.getData());
329
+ if (text)
330
+ snippets.push(text);
331
+ }
332
+ return snippets.join("\n").trim();
333
+ }
334
+ catch {
335
+ return "";
336
+ }
337
+ }
338
+ function extractPdfText(filePath) {
339
+ try {
340
+ const result = execFileSync("/usr/bin/mdls", ["-raw", "-name", "kMDItemTextContent", filePath], {
341
+ encoding: "utf8",
342
+ stdio: ["pipe", "pipe", "pipe"],
343
+ });
344
+ const text = (result || "").trim();
345
+ if (text && text !== "(null)")
346
+ return text;
347
+ }
348
+ catch { /* ignore */ }
349
+ try {
350
+ const result = execFileSync("/usr/bin/strings", ["-n", "6", filePath], {
351
+ encoding: "utf8",
352
+ stdio: ["pipe", "pipe", "pipe"],
353
+ });
354
+ const lines = result.split("\n").map((l) => l.trim()).filter(Boolean).slice(0, 400);
355
+ if (lines.length)
356
+ return lines.join("\n");
357
+ }
358
+ catch { /* ignore */ }
359
+ return "";
360
+ }
361
+ export function extractVaultText(filePath) {
362
+ const ext = path.extname(filePath).toLowerCase();
363
+ if (TEXT_EXTS.has(ext)) {
364
+ return readTextDirect(filePath);
365
+ }
366
+ if (ZIP_EXTS.has(ext)) {
367
+ const zipped = extractZipXml(filePath);
368
+ return zipped || tryPlainText(filePath);
369
+ }
370
+ if (ext === ".pdf") {
371
+ const pdfText = extractPdfText(filePath);
372
+ return pdfText || tryPlainText(filePath);
373
+ }
374
+ return tryPlainText(filePath);
375
+ }
376
+ export function syncVaultIndex(db, currentFiles, syncId) {
377
+ const existing = getExistingVaultFiles(db);
378
+ const current = new Map(currentFiles.map((file) => [file.id, file]));
379
+ const existingQueue = new Map(db.prepare(`
380
+ SELECT file_id AS fileId, status
381
+ FROM vault_processing_queue
382
+ `).all().map((row) => [row.fileId, row.status]));
383
+ const detectedAt = toOffsetIso();
384
+ const changes = [];
385
+ const queueStats = {
386
+ pendingAdded: 0,
387
+ pendingReset: 0,
388
+ removed: 0,
389
+ };
390
+ for (const file of currentFiles) {
391
+ const previous = existing.get(file.id);
392
+ if (!previous) {
393
+ changes.push({ fileId: file.id, action: "added", detectedAt, syncId });
394
+ continue;
395
+ }
396
+ if (previous.contentHash !== file.contentHash) {
397
+ changes.push({ fileId: file.id, action: "modified", detectedAt, syncId });
398
+ }
399
+ }
400
+ for (const [id] of existing) {
401
+ if (!current.has(id)) {
402
+ changes.push({ fileId: id, action: "removed", detectedAt, syncId });
403
+ }
404
+ }
405
+ const upsertStatement = db.prepare(`
406
+ INSERT INTO vault_files(
407
+ id, file_name, file_ext, source_type, file_size, file_path, content_hash, file_mtime, indexed_at
408
+ ) VALUES (
409
+ @id, @file_name, @file_ext, @source_type, @file_size, @file_path, @content_hash, @file_mtime, @indexed_at
410
+ )
411
+ ON CONFLICT(id) DO UPDATE SET
412
+ file_name = excluded.file_name,
413
+ file_ext = excluded.file_ext,
414
+ source_type = excluded.source_type,
415
+ file_size = excluded.file_size,
416
+ file_path = excluded.file_path,
417
+ content_hash = excluded.content_hash,
418
+ file_mtime = excluded.file_mtime,
419
+ indexed_at = excluded.indexed_at
420
+ `);
421
+ const insertChange = db.prepare(`
422
+ INSERT INTO vault_changelog(file_id, action, detected_at, sync_id)
423
+ VALUES(@file_id, @action, @detected_at, @sync_id)
424
+ `);
425
+ const deleteMissing = db.prepare("DELETE FROM vault_files WHERE id = ?");
426
+ const deleteQueue = db.prepare("DELETE FROM vault_processing_queue WHERE file_id = ?");
427
+ const upsertQueue = db.prepare(`
428
+ INSERT INTO vault_processing_queue(
429
+ file_id,
430
+ status,
431
+ priority,
432
+ queued_at,
433
+ claimed_at,
434
+ started_at,
435
+ processed_at,
436
+ result_page_id,
437
+ error_message,
438
+ attempts,
439
+ thread_id,
440
+ workflow_version,
441
+ decision,
442
+ result_manifest_path,
443
+ last_error_at,
444
+ retry_after,
445
+ created_page_ids,
446
+ updated_page_ids,
447
+ applied_type_names,
448
+ proposed_type_names,
449
+ skills_used
450
+ ) VALUES (
451
+ @file_id,
452
+ 'pending',
453
+ @priority,
454
+ @queued_at,
455
+ NULL,
456
+ NULL,
457
+ NULL,
458
+ NULL,
459
+ NULL,
460
+ 0,
461
+ NULL,
462
+ NULL,
463
+ NULL,
464
+ NULL,
465
+ NULL,
466
+ NULL,
467
+ NULL,
468
+ NULL,
469
+ NULL,
470
+ NULL,
471
+ NULL
472
+ )
473
+ ON CONFLICT(file_id) DO UPDATE SET
474
+ status = CASE
475
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.status
476
+ ELSE 'pending'
477
+ END,
478
+ priority = excluded.priority,
479
+ queued_at = excluded.queued_at,
480
+ claimed_at = CASE
481
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.claimed_at
482
+ ELSE NULL
483
+ END,
484
+ started_at = CASE
485
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.started_at
486
+ ELSE NULL
487
+ END,
488
+ processed_at = CASE
489
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.processed_at
490
+ ELSE NULL
491
+ END,
492
+ error_message = CASE
493
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.error_message
494
+ ELSE NULL
495
+ END,
496
+ thread_id = CASE
497
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.thread_id
498
+ ELSE NULL
499
+ END,
500
+ workflow_version = CASE
501
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.workflow_version
502
+ ELSE NULL
503
+ END,
504
+ decision = CASE
505
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.decision
506
+ ELSE NULL
507
+ END,
508
+ result_manifest_path = CASE
509
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.result_manifest_path
510
+ ELSE NULL
511
+ END,
512
+ last_error_at = CASE
513
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.last_error_at
514
+ ELSE NULL
515
+ END,
516
+ retry_after = CASE
517
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.retry_after
518
+ ELSE NULL
519
+ END,
520
+ created_page_ids = CASE
521
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.created_page_ids
522
+ ELSE NULL
523
+ END,
524
+ updated_page_ids = CASE
525
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.updated_page_ids
526
+ ELSE NULL
527
+ END,
528
+ applied_type_names = CASE
529
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.applied_type_names
530
+ ELSE NULL
531
+ END,
532
+ proposed_type_names = CASE
533
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.proposed_type_names
534
+ ELSE NULL
535
+ END,
536
+ skills_used = CASE
537
+ WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.skills_used
538
+ ELSE NULL
539
+ END
540
+ `);
541
+ const transaction = db.transaction(() => {
542
+ for (const file of currentFiles) {
543
+ upsertStatement.run({
544
+ id: file.id,
545
+ file_name: file.fileName,
546
+ file_ext: file.fileExt,
547
+ source_type: file.sourceType,
548
+ file_size: file.fileSize,
549
+ file_path: file.filePath,
550
+ content_hash: file.contentHash,
551
+ file_mtime: file.fileMtime,
552
+ indexed_at: file.indexedAt,
553
+ });
554
+ }
555
+ for (const [id] of existing) {
556
+ if (!current.has(id)) {
557
+ deleteMissing.run(id);
558
+ if (existingQueue.has(id)) {
559
+ deleteQueue.run(id);
560
+ queueStats.removed += 1;
561
+ }
562
+ }
563
+ }
564
+ for (const change of changes) {
565
+ insertChange.run({
566
+ file_id: change.fileId,
567
+ action: change.action,
568
+ detected_at: change.detectedAt,
569
+ sync_id: change.syncId,
570
+ });
571
+ if (change.action === "added" || change.action === "modified") {
572
+ const file = current.get(change.fileId);
573
+ if (file) {
574
+ const previousStatus = existingQueue.get(change.fileId);
575
+ if (previousStatus) {
576
+ if (previousStatus !== "processing") {
577
+ queueStats.pendingReset += 1;
578
+ }
579
+ }
580
+ else {
581
+ queueStats.pendingAdded += 1;
582
+ }
583
+ upsertQueue.run({
584
+ file_id: change.fileId,
585
+ priority: getVaultQueuePriority(file.fileExt),
586
+ queued_at: change.detectedAt,
587
+ });
588
+ }
589
+ }
590
+ }
591
+ });
592
+ transaction();
593
+ return { files: currentFiles.length, changes, queue: queueStats };
594
+ }