oh-my-llmwikimode 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +494 -0
  3. package/bin/llmwiki.js +1493 -0
  4. package/docs/INSTALLATION.md +228 -0
  5. package/docs/SCOPE_LOCK.md +79 -0
  6. package/docs/STAGE1_GUIDE.md +265 -0
  7. package/docs/STAGE2_AGENT_TEAM_GUIDE.md +141 -0
  8. package/docs/STAGE3_CONVERSATIONAL_GROWTH_GUIDE.md +50 -0
  9. package/docs/TEST_WORKSHEET.md +120 -0
  10. package/docs/github-private-bootstrap.md +53 -0
  11. package/docs/release.md +79 -0
  12. package/docs/stage4-slice1-manual-test.md +259 -0
  13. package/docs/stage4-slice1-user-guide.md +269 -0
  14. package/docs/user-guide-ko.md +452 -0
  15. package/package.json +76 -0
  16. package/scripts/install-llmwiki.ps1 +229 -0
  17. package/src/config.js +74 -0
  18. package/src/curator/browser-data.js +134 -0
  19. package/src/curator/queue.js +324 -0
  20. package/src/curator/schema.js +237 -0
  21. package/src/curator/scoring.js +83 -0
  22. package/src/hooks.js +199 -0
  23. package/src/librarian/schema.js +218 -0
  24. package/src/librarian/weekly-digest.js +478 -0
  25. package/src/security.js +127 -0
  26. package/src/server.js +860 -0
  27. package/src/stage4/graph-reasoning/analyzer.js +255 -0
  28. package/src/stage4/graph-reasoning/browser-data.js +130 -0
  29. package/src/stage4/graph-reasoning/index.js +35 -0
  30. package/src/stage4/graph-reasoning/loader.js +122 -0
  31. package/src/stage4/graph-reasoning/queue.js +154 -0
  32. package/src/stage4/graph-reasoning/schema.js +190 -0
  33. package/src/team/browser-data.js +142 -0
  34. package/src/team/capabilities.js +79 -0
  35. package/src/team/dispatch.js +108 -0
  36. package/src/team/queue.js +290 -0
  37. package/src/team/schema.js +225 -0
  38. package/src/team/shared-memory.js +183 -0
  39. package/src/todo/browser-data.js +71 -0
  40. package/src/todo/queue.js +159 -0
  41. package/src/todo/schema.js +90 -0
  42. package/src/utils/embedding-model.js +111 -0
  43. package/src/wiki/alias-suggestions.js +180 -0
  44. package/src/wiki/browser-data.js +284 -0
  45. package/src/wiki/doctor.js +218 -0
  46. package/src/wiki/entry-normalizer.js +139 -0
  47. package/src/wiki/ingest.js +443 -0
  48. package/src/wiki/lesson-proposal-analyzer.js +463 -0
  49. package/src/wiki/lesson-proposal-manager.js +331 -0
  50. package/src/wiki/lesson-template.js +182 -0
  51. package/src/wiki/lint.js +294 -0
  52. package/src/wiki/notebooklm-adapter.js +264 -0
  53. package/src/wiki/query.js +304 -0
  54. package/src/wiki/raw-manager.js +400 -0
  55. package/src/wiki/search-feedback.js +211 -0
  56. package/src/wiki/semantic-index.js +333 -0
  57. package/src/wiki/semantic-search.js +170 -0
  58. package/src/wiki/source-ledger.js +370 -0
  59. package/src/wiki/store.js +1329 -0
  60. package/src/wiki/usage-events.js +144 -0
@@ -0,0 +1,211 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import crypto from "node:crypto";
4
+
5
+ /**
6
+ * B2 Search Feedback — Append-Only Failure Log
7
+ *
8
+ * Stores zero-result search queries under .system/search-feedback/failures.jsonl
9
+ * Schema: {version, id, ts, query, query_hash, result_count, max_results, top_paths, source, actor, review_required, zero_results}
10
+ */
11
+
12
+ export const SEARCH_FEEDBACK_VERSION = 1;
13
+ export const SEARCH_FEEDBACK_DIR = ".system/search-feedback";
14
+ export const SEARCH_FEEDBACK_FILE = "failures.jsonl";
15
+ export const MAX_QUERY_LENGTH = 200;
16
+
17
+ // Key-value secret patterns (key = secret) — keep key, redact value
18
+ const KEY_VALUE_PATTERNS = [
19
+ /\btoken\s*[:=]\s*\S+/gi,
20
+ /\bapi[_-]?key\s*[:=]\s*\S+/gi,
21
+ /\bpassword\s*[:=]\s*\S+/gi,
22
+ /\bsecret\s*[:=]\s*\S+/gi,
23
+ /\bauth\s*[:=]\s*\S+/gi,
24
+ /\bbearer\s+\S+/gi,
25
+ ];
26
+
27
+ // Standalone token patterns — replace entire match
28
+ const STANDALONE_PATTERNS = [
29
+ /ghp_[a-zA-Z0-9]{36}/g,
30
+ /sk-(?:proj-)?[a-zA-Z0-9_-]+/g,
31
+ ];
32
+
33
+ export function getSearchFeedbackPaths(wikiRoot) {
34
+ const dir = path.join(wikiRoot, SEARCH_FEEDBACK_DIR);
35
+ return {
36
+ dir,
37
+ failuresFile: path.join(dir, SEARCH_FEEDBACK_FILE),
38
+ suggestionsDir: path.join(wikiRoot, ".system/search-feedback/suggestions"),
39
+ };
40
+ }
41
+
42
+ function getFeedbackDir(wikiRoot) {
43
+ return path.join(wikiRoot, SEARCH_FEEDBACK_DIR);
44
+ }
45
+
46
+ function getFeedbackFilePath(wikiRoot) {
47
+ return path.join(getFeedbackDir(wikiRoot), SEARCH_FEEDBACK_FILE);
48
+ }
49
+
50
+ function ensureFeedbackDir(wikiRoot) {
51
+ const dir = getFeedbackDir(wikiRoot);
52
+ fs.mkdirSync(dir, { recursive: true });
53
+ return dir;
54
+ }
55
+
56
+ function hashQuery(query) {
57
+ if (!query) return "";
58
+ return crypto.createHash("sha256").update(String(query)).digest("hex");
59
+ }
60
+
61
+ function shortHash(value) {
62
+ return crypto.createHash("sha256").update(String(value)).digest("hex").slice(0, 12);
63
+ }
64
+
65
+ /**
66
+ * Redact secrets and truncate query to MAX_QUERY_LENGTH characters.
67
+ *
68
+ * @param {string} query
69
+ * @returns {string}
70
+ */
71
+ export function redactSearchQuery(query) {
72
+ let redacted = String(query || "");
73
+
74
+ // Redact key-value secrets (keep key, replace value)
75
+ for (const pattern of KEY_VALUE_PATTERNS) {
76
+ redacted = redacted.replace(pattern, (match) => {
77
+ const prefix = match.split(/[=:\s]/)[0];
78
+ return `${prefix}=***REDACTED***`;
79
+ });
80
+ }
81
+
82
+ // Redact standalone tokens (replace entire match)
83
+ for (const pattern of STANDALONE_PATTERNS) {
84
+ redacted = redacted.replace(pattern, "***REDACTED***");
85
+ }
86
+
87
+ if (redacted.length > MAX_QUERY_LENGTH) {
88
+ redacted = redacted.slice(0, MAX_QUERY_LENGTH - 3) + "...";
89
+ }
90
+
91
+ return redacted;
92
+ }
93
+
94
+ function generateFeedbackId(queryHash, timestamp) {
95
+ return `sf_${shortHash(queryHash + timestamp)}`;
96
+ }
97
+
98
+ /**
99
+ * Append a search failure event.
100
+ *
101
+ * @param {string} wikiRoot
102
+ * @param {Object} event
103
+ * @param {string} event.query - Original query text
104
+ * @param {number} event.result_count - Number of results (MVP: must be 0)
105
+ * @param {number} [event.max_results=3] - Max results requested
106
+ * @param {string[]} [event.top_paths=[]] - Top result paths if any
107
+ * @param {string} [event.source="wiki_search"] - Source: wiki_search, wiki_query, manual_cli
108
+ * @param {string} [event.actor="local"] - Actor identifier
109
+ * @param {Object} [options]
110
+ * @param {Date|string} [options.now] - Injected timestamp for determinism
111
+ * @returns {{success: boolean, path?: string, record?: Object, error?: string}}
112
+ */
113
+ export function appendSearchFailure(wikiRoot, event, options = {}) {
114
+ try {
115
+ ensureFeedbackDir(wikiRoot);
116
+
117
+ const query = String(event.query || "").trim();
118
+ if (!query) {
119
+ return { success: false, error: "query is required" };
120
+ }
121
+
122
+ const resultCount = Number(event.result_count);
123
+ if (resultCount !== 0) {
124
+ return { success: false, error: "MVP only records zero-result failures" };
125
+ }
126
+
127
+ const now = options.now ? new Date(options.now) : new Date();
128
+ const ts = now.toISOString();
129
+ const queryHash = hashQuery(query);
130
+ const redactedQuery = redactSearchQuery(query);
131
+
132
+ const record = {
133
+ version: SEARCH_FEEDBACK_VERSION,
134
+ id: generateFeedbackId(queryHash, ts),
135
+ ts,
136
+ query: redactedQuery,
137
+ query_hash: queryHash,
138
+ result_count: 0,
139
+ max_results: Math.max(1, Math.min(Number(event.max_results) || 3, 20)),
140
+ top_paths: Array.isArray(event.top_paths) ? event.top_paths : [],
141
+ source: String(event.source || "wiki_search").trim(),
142
+ actor: String(event.actor || "local").trim(),
143
+ review_required: true,
144
+ zero_results: true,
145
+ };
146
+
147
+ const filePath = getFeedbackFilePath(wikiRoot);
148
+ const line = JSON.stringify(record);
149
+ fs.appendFileSync(filePath, `${line}\n`, { encoding: "utf-8" });
150
+
151
+ return { success: true, path: filePath, record };
152
+ } catch (error) {
153
+ return { success: false, error: error.message };
154
+ }
155
+ }
156
+
157
+ /**
158
+ * Read search failure events.
159
+ *
160
+ * @param {string} wikiRoot
161
+ * @param {Object} [options]
162
+ * @param {number} [options.limit=100] - Max events to return
163
+ * @param {boolean} [options.reverse=false] - Newest first when true; default is oldest-first
164
+ * @returns {{success: boolean, events?: Array, error?: string}}
165
+ */
166
+ export function readSearchFailures(wikiRoot, options = {}) {
167
+ try {
168
+ const filePath = getFeedbackFilePath(wikiRoot);
169
+
170
+ if (!fs.existsSync(filePath)) {
171
+ return { success: true, events: [] };
172
+ }
173
+
174
+ const limit = Number(options.limit) || 100;
175
+ const reverse = options.reverse === true;
176
+
177
+ const lines = fs
178
+ .readFileSync(filePath, "utf-8")
179
+ .split(/\r?\n/)
180
+ .filter(Boolean);
181
+
182
+ const events = [];
183
+ for (const line of lines) {
184
+ try {
185
+ events.push(JSON.parse(line));
186
+ } catch {
187
+ // Skip malformed lines
188
+ }
189
+ }
190
+
191
+ // Default is oldest-first (newest-last) per plan
192
+ if (reverse) {
193
+ events.reverse();
194
+ }
195
+
196
+ return { success: true, events: events.slice(0, limit) };
197
+ } catch (error) {
198
+ return { success: false, error: error.message };
199
+ }
200
+ }
201
+
202
+ /**
203
+ * Count total search failures.
204
+ *
205
+ * @param {string} wikiRoot
206
+ * @returns {number}
207
+ */
208
+ export function countSearchFailures(wikiRoot) {
209
+ const result = readSearchFailures(wikiRoot, { limit: Infinity, reverse: false });
210
+ return result.success ? result.events.length : 0;
211
+ }
@@ -0,0 +1,333 @@
1
+ import crypto from "node:crypto";
2
+ import fs from "node:fs";
3
+ import path from "node:path";
4
+ import { EmbeddingModel } from "../utils/embedding-model.js";
5
+ import { countSearchFailures } from "./search-feedback.js";
6
+
7
+ export const SEMANTIC_INDEX_VERSION = "b2.0";
8
+ export const SEMANTIC_INDEX_DIR = "data/semantic-index";
9
+ export const SEMANTIC_INDEX_FILE = "embeddings.jsonl";
10
+ export const SEMANTIC_BATCH_SIZE = 32;
11
+ export const SEMANTIC_AUTO_BUILD_FAILURE_THRESHOLD = 10;
12
+ export const SEMANTIC_AUTO_BUILD_STALE_MS = 7 * 24 * 60 * 60 * 1000;
13
+
14
+ const ENTRY_ROOTS = [
15
+ { relativePath: "editorial/lessons", category: "lessons" },
16
+ { relativePath: "inbox", category: "inbox" },
17
+ { relativePath: "problems", category: "problems" },
18
+ ];
19
+
20
+ function compareStrings(left, right) {
21
+ const a = String(left || "");
22
+ const b = String(right || "");
23
+ if (a < b) return -1;
24
+ if (a > b) return 1;
25
+ return 0;
26
+ }
27
+
28
+ function normalizeList(value) {
29
+ if (Array.isArray(value)) return value.map((item) => String(item || "").trim()).filter(Boolean);
30
+ if (!value) return [];
31
+ return [String(value).trim()].filter(Boolean);
32
+ }
33
+
34
+ function unquoteScalar(value) {
35
+ const trimmed = String(value || "").trim();
36
+ if (trimmed.startsWith('"') && trimmed.endsWith('"')) {
37
+ try {
38
+ return JSON.parse(trimmed);
39
+ } catch {
40
+ return trimmed.slice(1, -1);
41
+ }
42
+ }
43
+ return trimmed;
44
+ }
45
+
46
+ function parseMarkdownEntry(content) {
47
+ const match = String(content || "").match(/^\uFEFF?---\s*\n([\s\S]*?)\n---\s*\n?([\s\S]*)$/);
48
+ if (!match) return { frontmatter: {}, body: String(content || "") };
49
+
50
+ const frontmatter = {};
51
+ let currentKey = null;
52
+ for (const line of match[1].split(/\r?\n/)) {
53
+ const itemMatch = line.trim().match(/^-\s+(.*)$/);
54
+ if (itemMatch && currentKey) {
55
+ if (!Array.isArray(frontmatter[currentKey])) frontmatter[currentKey] = [];
56
+ frontmatter[currentKey].push(unquoteScalar(itemMatch[1]));
57
+ continue;
58
+ }
59
+
60
+ const keyMatch = line.match(/^(\w+):\s*(.*)$/);
61
+ if (!keyMatch) continue;
62
+ const [, key, rawValue] = keyMatch;
63
+ currentKey = key;
64
+ if (!rawValue.trim()) {
65
+ frontmatter[key] = [];
66
+ } else {
67
+ frontmatter[key] = unquoteScalar(rawValue);
68
+ }
69
+ }
70
+
71
+ return { frontmatter, body: match[2].trim() };
72
+ }
73
+
74
+ function hashContent(content) {
75
+ return crypto.createHash("sha256").update(String(content || "")).digest("hex");
76
+ }
77
+
78
+ function getNow(options) {
79
+ return options.now ? new Date(options.now).toISOString() : new Date().toISOString();
80
+ }
81
+
82
+ function ensureSemanticIndexDir(wikiRoot) {
83
+ const dir = path.join(wikiRoot, SEMANTIC_INDEX_DIR);
84
+ fs.mkdirSync(dir, { recursive: true });
85
+ return dir;
86
+ }
87
+
88
+ export function getSemanticIndexPath(wikiRoot) {
89
+ return path.join(wikiRoot, SEMANTIC_INDEX_DIR, SEMANTIC_INDEX_FILE);
90
+ }
91
+
92
+ function writeJsonl(filePath, records) {
93
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
94
+ const content = records.length > 0
95
+ ? `${records.map((record) => JSON.stringify(record)).join("\n")}\n`
96
+ : "";
97
+ const tmpFile = `${filePath}.${Date.now()}.${Math.random().toString(36).slice(2)}.tmp`;
98
+ fs.writeFileSync(tmpFile, content, "utf-8");
99
+ fs.renameSync(tmpFile, filePath);
100
+ }
101
+
102
+ function scanMarkdownEntries(wikiRoot) {
103
+ const records = [];
104
+
105
+ function scanDir(directory, category) {
106
+ if (!fs.existsSync(directory)) return;
107
+ const dirents = fs.readdirSync(directory, { withFileTypes: true })
108
+ .sort((a, b) => compareStrings(a.name, b.name));
109
+
110
+ for (const dirent of dirents) {
111
+ const fullPath = path.join(directory, dirent.name);
112
+ if (dirent.isSymbolicLink()) continue;
113
+ if (dirent.isDirectory()) {
114
+ scanDir(fullPath, category);
115
+ } else if (dirent.isFile() && dirent.name.endsWith(".md")) {
116
+ const content = fs.readFileSync(fullPath, "utf-8");
117
+ const { frontmatter, body } = parseMarkdownEntry(content);
118
+ const entryPath = path.relative(wikiRoot, fullPath).replace(/\\/g, "/");
119
+ const title = String(frontmatter.title || path.basename(entryPath, ".md")).trim();
120
+ const indexText = [
121
+ title,
122
+ frontmatter.summary,
123
+ ...normalizeList(frontmatter.aliases),
124
+ ...normalizeList(frontmatter.tags),
125
+ body,
126
+ ].filter(Boolean).join("\n");
127
+
128
+ records.push({
129
+ category,
130
+ entry_path: entryPath,
131
+ title,
132
+ content,
133
+ content_hash: hashContent(content),
134
+ index_text: indexText,
135
+ });
136
+ }
137
+ }
138
+ }
139
+
140
+ for (const root of ENTRY_ROOTS) {
141
+ scanDir(path.join(wikiRoot, root.relativePath), root.category);
142
+ }
143
+
144
+ return records.sort((a, b) => compareStrings(a.entry_path, b.entry_path));
145
+ }
146
+
147
+ async function embedRecords(records, options = {}) {
148
+ const model = options.model || new EmbeddingModel();
149
+ if (!model.isAvailable()) {
150
+ return { success: false, error: "Embedding model is not available" };
151
+ }
152
+
153
+ await model.load();
154
+ const embeddingsByPath = new Map();
155
+ for (let index = 0; index < records.length; index += SEMANTIC_BATCH_SIZE) {
156
+ const batch = records.slice(index, index + SEMANTIC_BATCH_SIZE);
157
+ const embeddings = await model.embedBatch(batch.map((record) => record.index_text));
158
+ batch.forEach((record, offset) => {
159
+ embeddingsByPath.set(record.entry_path, embeddings[offset]);
160
+ });
161
+ }
162
+ return { success: true, embeddingsByPath };
163
+ }
164
+
165
+ export function loadSemanticIndex(wikiRoot) {
166
+ try {
167
+ const filePath = getSemanticIndexPath(wikiRoot);
168
+ if (!fs.existsSync(filePath)) {
169
+ return { success: true, entries: [], count: 0, path: filePath };
170
+ }
171
+
172
+ const entries = [];
173
+ const lines = fs.readFileSync(filePath, "utf-8").split(/\r?\n/).filter(Boolean);
174
+ for (const line of lines) {
175
+ try {
176
+ const parsed = JSON.parse(line);
177
+ if (parsed && parsed.entry_path && Array.isArray(parsed.embedding)) {
178
+ entries.push(parsed);
179
+ }
180
+ } catch {
181
+ // Skip malformed lines so one bad record does not disable the index.
182
+ }
183
+ }
184
+
185
+ entries.sort((a, b) => compareStrings(a.entry_path, b.entry_path));
186
+ return { success: true, entries, count: entries.length, path: filePath };
187
+ } catch (error) {
188
+ return { success: false, error: error.message };
189
+ }
190
+ }
191
+
192
+ export function getSemanticIndexSize(wikiRoot) {
193
+ const result = loadSemanticIndex(wikiRoot);
194
+ return result.success ? result.entries.length : 0;
195
+ }
196
+
197
+ export async function buildSemanticIndex(wikiRoot, options = {}) {
198
+ try {
199
+ ensureSemanticIndexDir(wikiRoot);
200
+ const scannedEntries = scanMarkdownEntries(wikiRoot);
201
+ const embedded = await embedRecords(scannedEntries, options);
202
+ if (!embedded.success) return embedded;
203
+
204
+ const ts = getNow(options);
205
+ const records = scannedEntries.map((entry) => ({
206
+ version: SEMANTIC_INDEX_VERSION,
207
+ entry_path: entry.entry_path,
208
+ title: entry.title,
209
+ content_hash: entry.content_hash,
210
+ embedding: embedded.embeddingsByPath.get(entry.entry_path),
211
+ ts,
212
+ source: "batch_index",
213
+ }));
214
+
215
+ const filePath = getSemanticIndexPath(wikiRoot);
216
+ writeJsonl(filePath, records);
217
+ return { success: true, count: records.length, path: filePath };
218
+ } catch (error) {
219
+ return { success: false, error: error.message };
220
+ }
221
+ }
222
+
223
+ export async function updateSemanticIndex(wikiRoot, options = {}) {
224
+ try {
225
+ ensureSemanticIndexDir(wikiRoot);
226
+ const existing = loadSemanticIndex(wikiRoot);
227
+ if (!existing.success) return existing;
228
+
229
+ const existingByPath = new Map(existing.entries.map((entry) => [entry.entry_path, entry]));
230
+ const scannedEntries = scanMarkdownEntries(wikiRoot);
231
+ const changedEntries = scannedEntries.filter((entry) => {
232
+ const previous = existingByPath.get(entry.entry_path);
233
+ return !previous || previous.content_hash !== entry.content_hash;
234
+ });
235
+
236
+ const embedded = await embedRecords(changedEntries, options);
237
+ if (!embedded.success) return embedded;
238
+
239
+ const ts = getNow(options);
240
+ const records = scannedEntries.map((entry) => {
241
+ const previous = existingByPath.get(entry.entry_path);
242
+ if (previous && previous.content_hash === entry.content_hash) {
243
+ return previous;
244
+ }
245
+ return {
246
+ version: SEMANTIC_INDEX_VERSION,
247
+ entry_path: entry.entry_path,
248
+ title: entry.title,
249
+ content_hash: entry.content_hash,
250
+ embedding: embedded.embeddingsByPath.get(entry.entry_path),
251
+ ts,
252
+ source: "batch_index",
253
+ };
254
+ });
255
+
256
+ const filePath = getSemanticIndexPath(wikiRoot);
257
+ writeJsonl(filePath, records);
258
+ return { success: true, count: records.length, path: filePath };
259
+ } catch (error) {
260
+ return { success: false, error: error.message };
261
+ }
262
+ }
263
+
264
+ function getAutoBuildNowMs(options) {
265
+ if (!options.now) return Date.now();
266
+ const nowMs = new Date(options.now).getTime();
267
+ return Number.isFinite(nowMs) ? nowMs : Date.now();
268
+ }
269
+
270
+ function isSemanticIndexStale(wikiRoot, options = {}) {
271
+ const filePath = getSemanticIndexPath(wikiRoot);
272
+ if (!fs.existsSync(filePath)) return true;
273
+ const updatedAtMs = fs.statSync(filePath).mtimeMs;
274
+ return getAutoBuildNowMs(options) - updatedAtMs >= SEMANTIC_AUTO_BUILD_STALE_MS;
275
+ }
276
+
277
+ function warnAutoBuildFailure(action, error) {
278
+ const detail = error ? String(error) : "unknown error";
279
+ console.warn(`[llmwiki] Semantic index auto-${action} failed: ${detail}`);
280
+ }
281
+
282
+ export async function maybeAutoBuildSemanticIndex(wikiRoot, options = {}) {
283
+ try {
284
+ const failureCount = countSearchFailures(wikiRoot);
285
+ if (failureCount < SEMANTIC_AUTO_BUILD_FAILURE_THRESHOLD) {
286
+ return { triggered: false, reason: "failure_threshold_not_met" };
287
+ }
288
+
289
+ const indexResult = loadSemanticIndex(wikiRoot);
290
+ if (!indexResult.success) {
291
+ return { triggered: false, reason: `index_load_failed: ${indexResult.error || "unknown error"}` };
292
+ }
293
+
294
+ const indexPath = getSemanticIndexPath(wikiRoot);
295
+ const indexExists = fs.existsSync(indexPath);
296
+ const indexIsSmall = indexResult.count < SEMANTIC_AUTO_BUILD_FAILURE_THRESHOLD;
297
+ const indexIsStale = isSemanticIndexStale(wikiRoot, options);
298
+ if (!indexIsSmall && !indexIsStale) {
299
+ return { triggered: false, reason: "index_current" };
300
+ }
301
+
302
+ const action = !indexExists || indexIsSmall ? "build" : "update";
303
+ const result = action === "build"
304
+ ? await buildSemanticIndex(wikiRoot, options)
305
+ : await updateSemanticIndex(wikiRoot, options);
306
+
307
+ if (!result.success) {
308
+ warnAutoBuildFailure(action, result.error);
309
+ return { triggered: false, reason: `${action}_failed: ${result.error || "unknown error"}` };
310
+ }
311
+
312
+ return { triggered: true, reason: action === "build" ? "built" : "updated" };
313
+ } catch (error) {
314
+ warnAutoBuildFailure("build", error?.message || error);
315
+ return { triggered: false, reason: `auto_build_failed: ${error?.message || "unknown error"}` };
316
+ }
317
+ }
318
+
319
+ export async function buildIndex(wikiRoot, options = {}) {
320
+ return buildSemanticIndex(wikiRoot, options);
321
+ }
322
+
323
+ export async function updateIndex(wikiRoot, options = {}) {
324
+ return updateSemanticIndex(wikiRoot, options);
325
+ }
326
+
327
+ export function loadIndex(wikiRoot) {
328
+ return loadSemanticIndex(wikiRoot);
329
+ }
330
+
331
+ export function getIndexSize(wikiRoot) {
332
+ return getSemanticIndexSize(wikiRoot);
333
+ }
@@ -0,0 +1,170 @@
1
+ import crypto from "node:crypto";
2
+ import fs from "node:fs";
3
+ import path from "node:path";
4
+ import { EmbeddingModel } from "../utils/embedding-model.js";
5
+ import { loadSemanticIndex } from "./semantic-index.js";
6
+
7
+ const DEFAULT_TOP_K = 5;
8
+ const DEFAULT_THRESHOLD = 0.6;
9
+ const SEMANTIC_RESULT_VERSION = "b2.0";
10
+ const EXCLUDED_STATUSES = new Set([
11
+ "private",
12
+ "rejected",
13
+ "superseded",
14
+ "needs-clarification",
15
+ ]);
16
+
17
+ function hashQuery(query) {
18
+ const normalized = String(query || "").trim();
19
+ if (!normalized) return "";
20
+ return crypto.createHash("sha256").update(normalized).digest("hex");
21
+ }
22
+
23
+ function semanticResultEnvelope(query, results, options = {}) {
24
+ return {
25
+ version: SEMANTIC_RESULT_VERSION,
26
+ query_hash: hashQuery(query),
27
+ method: "semantic",
28
+ results,
29
+ fallback_from_keyword: options.fallbackFromKeyword === true,
30
+ review_required: false,
31
+ };
32
+ }
33
+
34
+ function normalizeTopK(options) {
35
+ const requestedTopK = options.topK ?? options.maxResults ?? DEFAULT_TOP_K;
36
+ return Math.max(1, Math.min(Number(requestedTopK) || DEFAULT_TOP_K, 50));
37
+ }
38
+
39
+ function isPathInside(parentPath, childPath) {
40
+ const relative = path.relative(parentPath, childPath);
41
+ return relative === "" || (!!relative && !relative.startsWith("..") && !path.isAbsolute(relative));
42
+ }
43
+
44
+ function resolveSemanticEntryPath(wikiRoot, entryPath) {
45
+ const normalizedEntryPath = String(entryPath || "").replace(/\\/g, "/").trim();
46
+ if (!normalizedEntryPath || path.isAbsolute(normalizedEntryPath)) return null;
47
+
48
+ const rootPath = path.resolve(wikiRoot);
49
+ const fullPath = path.resolve(rootPath, ...normalizedEntryPath.split("/").filter(Boolean));
50
+ if (!isPathInside(rootPath, fullPath)) return null;
51
+
52
+ try {
53
+ const realRoot = fs.realpathSync(rootPath);
54
+ const realPath = fs.realpathSync(fullPath);
55
+ if (!isPathInside(realRoot, realPath)) return null;
56
+ return realPath;
57
+ } catch {
58
+ return null;
59
+ }
60
+ }
61
+
62
+ function unquoteStatus(value) {
63
+ const trimmed = String(value || "").trim();
64
+ if (trimmed.length >= 2) {
65
+ const first = trimmed[0];
66
+ const last = trimmed[trimmed.length - 1];
67
+ if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
68
+ return trimmed.slice(1, -1).trim();
69
+ }
70
+ }
71
+ return trimmed;
72
+ }
73
+
74
+ function readMarkdownStatus(content) {
75
+ const match = String(content || "").match(/^\uFEFF?---\s*\n([\s\S]*?)\n---\s*\n?/);
76
+ if (!match) return "candidate";
77
+
78
+ for (const line of match[1].split(/\r?\n/)) {
79
+ const statusMatch = line.match(/^status:\s*(.*)$/i);
80
+ if (statusMatch) return unquoteStatus(statusMatch[1]).toLowerCase() || "candidate";
81
+ }
82
+ return "candidate";
83
+ }
84
+
85
+ function isSemanticEntrySearchable(wikiRoot, entry) {
86
+ const sourcePath = resolveSemanticEntryPath(wikiRoot, entry?.entry_path);
87
+ if (!sourcePath) return false;
88
+
89
+ try {
90
+ const stat = fs.statSync(sourcePath);
91
+ if (!stat.isFile()) return false;
92
+
93
+ const status = readMarkdownStatus(fs.readFileSync(sourcePath, "utf-8"));
94
+ return !EXCLUDED_STATUSES.has(status);
95
+ } catch {
96
+ return false;
97
+ }
98
+ }
99
+
100
+ function compareStrings(left, right) {
101
+ const a = String(left || "");
102
+ const b = String(right || "");
103
+ if (a < b) return -1;
104
+ if (a > b) return 1;
105
+ return 0;
106
+ }
107
+
108
+ function vectorNorm(vector) {
109
+ return Math.sqrt(vector.reduce((sum, value) => sum + value * value, 0));
110
+ }
111
+
112
+ export function cosineSimilarity(left, right) {
113
+ if (!Array.isArray(left) || !Array.isArray(right)) return 0;
114
+ if (left.length === 0 || left.length !== right.length) return 0;
115
+
116
+ let dot = 0;
117
+ for (let index = 0; index < left.length; index += 1) {
118
+ dot += Number(left[index] || 0) * Number(right[index] || 0);
119
+ }
120
+
121
+ const leftNorm = vectorNorm(left);
122
+ const rightNorm = vectorNorm(right);
123
+ if (leftNorm === 0 || rightNorm === 0) return 0;
124
+ return dot / (leftNorm * rightNorm);
125
+ }
126
+
127
+ export async function semanticSearch(wikiRoot, query, options = {}) {
128
+ const searchText = String(query || "").trim();
129
+
130
+ try {
131
+ if (!searchText) return semanticResultEnvelope(searchText, [], options);
132
+
133
+ const topK = normalizeTopK(options);
134
+ const threshold = typeof options.threshold === "number" ? options.threshold : DEFAULT_THRESHOLD;
135
+ const indexResult = Array.isArray(options.entries)
136
+ ? { success: true, entries: options.entries }
137
+ : loadSemanticIndex(wikiRoot);
138
+ if (!indexResult.success || indexResult.entries.length === 0) {
139
+ return semanticResultEnvelope(searchText, [], options);
140
+ }
141
+
142
+ const model = options.model || new EmbeddingModel();
143
+ if (!model.isAvailable()) return semanticResultEnvelope(searchText, [], options);
144
+ await model.load();
145
+ const queryEmbedding = await model.embed(searchText);
146
+
147
+ const results = [];
148
+ for (const entry of indexResult.entries) {
149
+ const similarity = cosineSimilarity(queryEmbedding, entry.embedding);
150
+ if (similarity < threshold) continue;
151
+ if (!isSemanticEntrySearchable(wikiRoot, entry)) continue;
152
+ results.push({
153
+ path: entry.entry_path,
154
+ title: entry.title,
155
+ similarity,
156
+ });
157
+ }
158
+
159
+ results.sort((a, b) => {
160
+ if (b.similarity !== a.similarity) return b.similarity - a.similarity;
161
+ const titleCompare = compareStrings(a.title, b.title);
162
+ if (titleCompare !== 0) return titleCompare;
163
+ return compareStrings(a.path, b.path);
164
+ });
165
+
166
+ return semanticResultEnvelope(searchText, results.slice(0, topK), options);
167
+ } catch {
168
+ return semanticResultEnvelope(searchText, [], options);
169
+ }
170
+ }