@titan-design/brain 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -40
- package/dist/adapters-ZNJ4FL2E.js +14 -0
- package/dist/brain-service-2QO6JM3Z.js +11 -0
- package/dist/chunk-4SD4JRLS.js +840 -0
- package/dist/chunk-BDNH2E2O.js +112 -0
- package/dist/chunk-PO3GJPIC.js +66 -0
- package/dist/chunk-QL2GPXP6.js +1803 -0
- package/dist/chunk-ZVXSW52A.js +307 -0
- package/dist/cli.js +10942 -3829
- package/dist/command-resolution-FJHE2YBQ.js +134 -0
- package/dist/file-scanner-LBBH5I44.js +12 -0
- package/dist/search-HNUALOXQ.js +14 -0
- package/package.json +4 -1
- package/scripts/diagnostic/assemble.ts +384 -0
- package/scripts/diagnostic/quality-gate.sh +60 -0
- package/scripts/diagnostic/run.sh +352 -0
- package/scripts/diagnostic/schema.json +54 -0
|
@@ -0,0 +1,840 @@
|
|
|
1
|
+
import {
|
|
2
|
+
scanForChanges
|
|
3
|
+
} from "./chunk-PO3GJPIC.js";
|
|
4
|
+
|
|
5
|
+
// src/services/indexing.ts
|
|
6
|
+
import { createHash } from "crypto";
|
|
7
|
+
import { readFileSync as readFileSync2, writeFileSync, mkdirSync as mkdirSync2, existsSync as existsSync2 } from "fs";
|
|
8
|
+
import { basename, dirname, join as join2, relative } from "path";
|
|
9
|
+
|
|
10
|
+
// src/services/markdown-parser.ts
|
|
11
|
+
import matter from "gray-matter";
|
|
12
|
+
|
|
13
|
+
// src/types.ts
|
|
14
|
+
var VALID_CORE_NOTE_TYPES = [
|
|
15
|
+
"note",
|
|
16
|
+
"decision",
|
|
17
|
+
"pattern",
|
|
18
|
+
"research",
|
|
19
|
+
"meeting",
|
|
20
|
+
"session-log",
|
|
21
|
+
"guide"
|
|
22
|
+
];
|
|
23
|
+
var VALID_NOTE_TIERS = ["slow", "fast"];
|
|
24
|
+
var VALID_NOTE_CONFIDENCES = ["high", "medium", "low", "speculative"];
|
|
25
|
+
var VALID_NOTE_STATUSES = ["current", "outdated", "deprecated", "draft"];
|
|
26
|
+
var VALID_INBOX_SOURCES = [
|
|
27
|
+
"cli",
|
|
28
|
+
"rss",
|
|
29
|
+
"crawler",
|
|
30
|
+
"alert",
|
|
31
|
+
"api",
|
|
32
|
+
"file",
|
|
33
|
+
"notion",
|
|
34
|
+
"linear"
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
// src/utils.ts
|
|
38
|
+
function slugify(text) {
|
|
39
|
+
return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
|
|
40
|
+
}
|
|
41
|
+
function parseIntervalDays(interval) {
|
|
42
|
+
const match = interval.match(/^(\d+)\s*(d|w|m)$/);
|
|
43
|
+
if (!match) return 90;
|
|
44
|
+
const value = parseInt(match[1], 10);
|
|
45
|
+
switch (match[2]) {
|
|
46
|
+
case "d":
|
|
47
|
+
return value;
|
|
48
|
+
case "w":
|
|
49
|
+
return value * 7;
|
|
50
|
+
case "m":
|
|
51
|
+
return value * 30;
|
|
52
|
+
default:
|
|
53
|
+
return 90;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// src/services/markdown-parser.ts
|
|
58
|
+
var MAX_CHUNK_TOKENS = 512;
|
|
59
|
+
var FENCE_OPEN = /^```/;
|
|
60
|
+
var FENCE_CLOSE = /^```\s*$/;
|
|
61
|
+
var TABLE_LINE = /^\|.+\|/;
|
|
62
|
+
var MIN_CHUNK_LENGTH = 20;
|
|
63
|
+
var IMAGE_REF = /!\[([^\]]*)\]\(([^)]+)\)/g;
|
|
64
|
+
function estimateTokens(text) {
|
|
65
|
+
if (text.length === 0) return 0;
|
|
66
|
+
return Math.ceil(text.length / 4);
|
|
67
|
+
}
|
|
68
|
+
function parseMarkdown(filePath, content) {
|
|
69
|
+
const { data, content: body } = matter(content);
|
|
70
|
+
const id = deriveId(filePath, data);
|
|
71
|
+
const frontmatter = coerceFrontmatter(filePath, data);
|
|
72
|
+
const chunks = chunkBody(body);
|
|
73
|
+
const relations = extractRelations(id, data);
|
|
74
|
+
const imageRefs = extractImageReferences(body);
|
|
75
|
+
return {
|
|
76
|
+
id,
|
|
77
|
+
filePath,
|
|
78
|
+
frontmatter,
|
|
79
|
+
rawFrontmatter: data,
|
|
80
|
+
content: body,
|
|
81
|
+
chunks,
|
|
82
|
+
relations,
|
|
83
|
+
imageRefs: imageRefs.length > 0 ? imageRefs : void 0
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
function deriveId(filePath, data) {
|
|
87
|
+
if (typeof data.id === "string" && data.id.length > 0) return data.id;
|
|
88
|
+
if (typeof data.title === "string" && data.title.length > 0) return slugify(data.title);
|
|
89
|
+
const filename = filePath.split("/").pop() ?? filePath;
|
|
90
|
+
return filename.replace(/\.md$/, "");
|
|
91
|
+
}
|
|
92
|
+
function coerceFrontmatter(filePath, data) {
|
|
93
|
+
const filename = (filePath.split("/").pop() ?? filePath).replace(/\.md$/, "");
|
|
94
|
+
const hasModule = typeof data.module === "string";
|
|
95
|
+
const type = hasModule && typeof data.type === "string" ? data.type : coerceEnum(data.type, VALID_CORE_NOTE_TYPES, "note");
|
|
96
|
+
return {
|
|
97
|
+
id: typeof data.id === "string" ? data.id : void 0,
|
|
98
|
+
title: typeof data.title === "string" ? data.title : filename,
|
|
99
|
+
type,
|
|
100
|
+
tier: coerceEnum(data.tier, VALID_NOTE_TIERS, "slow"),
|
|
101
|
+
category: coerceString(data.category),
|
|
102
|
+
tags: coerceTags(data.tags),
|
|
103
|
+
summary: coerceString(data.summary),
|
|
104
|
+
confidence: coerceEnum(data.confidence, VALID_NOTE_CONFIDENCES, void 0),
|
|
105
|
+
status: coerceEnum(data.status, VALID_NOTE_STATUSES, void 0),
|
|
106
|
+
sources: coerceSources(data.sources),
|
|
107
|
+
created: coerceDate(data.created),
|
|
108
|
+
modified: coerceDate(data.modified),
|
|
109
|
+
"last-reviewed": coerceDate(data["last-reviewed"]),
|
|
110
|
+
"review-interval": coerceReviewInterval(data["review-interval"]),
|
|
111
|
+
expires: coerceDate(data.expires),
|
|
112
|
+
date: coerceDate(data.date),
|
|
113
|
+
participants: coerceStringArray(data.participants),
|
|
114
|
+
project: coerceString(data.project),
|
|
115
|
+
outcome: coerceString(data.outcome),
|
|
116
|
+
related: coerceStringArray(data.related),
|
|
117
|
+
supersedes: coerceString(data.supersedes),
|
|
118
|
+
parent: coerceString(data.parent),
|
|
119
|
+
module: coerceString(data.module),
|
|
120
|
+
"module-instance": coerceString(data["module-instance"]),
|
|
121
|
+
"content-dir": coerceString(data["content-dir"])
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
function coerceString(value) {
|
|
125
|
+
if (typeof value === "string") return value;
|
|
126
|
+
if (value != null && typeof value !== "object") return String(value);
|
|
127
|
+
return void 0;
|
|
128
|
+
}
|
|
129
|
+
function coerceEnum(value, valid, fallback) {
|
|
130
|
+
if (typeof value === "string" && valid.includes(value)) return value;
|
|
131
|
+
return fallback;
|
|
132
|
+
}
|
|
133
|
+
function coerceTags(value) {
|
|
134
|
+
if (Array.isArray(value)) {
|
|
135
|
+
return value.filter((v) => typeof v === "string");
|
|
136
|
+
}
|
|
137
|
+
if (typeof value === "string") {
|
|
138
|
+
if (value.includes(",")) {
|
|
139
|
+
return value.split(",").map((s) => s.trim()).filter(Boolean);
|
|
140
|
+
}
|
|
141
|
+
return [value];
|
|
142
|
+
}
|
|
143
|
+
return void 0;
|
|
144
|
+
}
|
|
145
|
+
function coerceDate(value) {
|
|
146
|
+
if (value instanceof Date) {
|
|
147
|
+
return isNaN(value.getTime()) ? void 0 : value.toISOString();
|
|
148
|
+
}
|
|
149
|
+
if (typeof value === "string") return value;
|
|
150
|
+
return void 0;
|
|
151
|
+
}
|
|
152
|
+
function coerceSources(value) {
|
|
153
|
+
if (!Array.isArray(value)) return void 0;
|
|
154
|
+
const valid = [];
|
|
155
|
+
for (const entry of value) {
|
|
156
|
+
if (typeof entry === "object" && entry !== null && typeof entry.url === "string") {
|
|
157
|
+
valid.push({
|
|
158
|
+
url: entry.url,
|
|
159
|
+
accessed: typeof entry.accessed === "string" ? entry.accessed : "",
|
|
160
|
+
type: typeof entry.type === "string" ? entry.type : ""
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return valid.length > 0 ? valid : void 0;
|
|
165
|
+
}
|
|
166
|
+
function coerceReviewInterval(value) {
|
|
167
|
+
if (typeof value === "string" && /^\d+[dwm]$/.test(value)) return value;
|
|
168
|
+
return void 0;
|
|
169
|
+
}
|
|
170
|
+
function coerceStringArray(value) {
|
|
171
|
+
if (Array.isArray(value)) {
|
|
172
|
+
const filtered = value.filter((v) => typeof v === "string");
|
|
173
|
+
return filtered.length > 0 ? filtered : void 0;
|
|
174
|
+
}
|
|
175
|
+
return void 0;
|
|
176
|
+
}
|
|
177
|
+
function buildAncestry(stack) {
|
|
178
|
+
if (stack.length === 0) return null;
|
|
179
|
+
return stack.map((h) => `${"#".repeat(h.level)} ${h.text}`).join("\n");
|
|
180
|
+
}
|
|
181
|
+
function splitIntoSections(body) {
|
|
182
|
+
const lines = body.split("\n");
|
|
183
|
+
const sections = [];
|
|
184
|
+
const headingStack = [];
|
|
185
|
+
let current = { heading: null, headingLevel: 0, headingAncestry: null, lines: [] };
|
|
186
|
+
for (const line of lines) {
|
|
187
|
+
const match = line.match(/^(#{1,3})\s+(.+)$/);
|
|
188
|
+
if (match) {
|
|
189
|
+
sections.push(current);
|
|
190
|
+
const level = match[1].length;
|
|
191
|
+
const text = match[2];
|
|
192
|
+
while (headingStack.length > 0 && headingStack[headingStack.length - 1].level >= level) {
|
|
193
|
+
headingStack.pop();
|
|
194
|
+
}
|
|
195
|
+
headingStack.push({ level, text });
|
|
196
|
+
current = {
|
|
197
|
+
heading: text,
|
|
198
|
+
headingLevel: level,
|
|
199
|
+
headingAncestry: buildAncestry(headingStack),
|
|
200
|
+
lines: []
|
|
201
|
+
};
|
|
202
|
+
} else {
|
|
203
|
+
current.lines.push(line);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
sections.push(current);
|
|
207
|
+
return sections;
|
|
208
|
+
}
|
|
209
|
+
function prependAncestry(ancestry, text) {
|
|
210
|
+
if (!ancestry) return text;
|
|
211
|
+
return `${ancestry}
|
|
212
|
+
|
|
213
|
+
${text}`;
|
|
214
|
+
}
|
|
215
|
+
function chunkBody(body) {
|
|
216
|
+
const sections = splitIntoSections(body);
|
|
217
|
+
const chunks = [];
|
|
218
|
+
let position = 0;
|
|
219
|
+
for (const section of sections) {
|
|
220
|
+
const text = synthesizeImageParagraphs(section.lines.join("\n").trim());
|
|
221
|
+
if (text.length < MIN_CHUNK_LENGTH) continue;
|
|
222
|
+
const contentWithAncestry = prependAncestry(section.headingAncestry, text);
|
|
223
|
+
const tokens = estimateTokens(contentWithAncestry);
|
|
224
|
+
const containsTable = /^\|.+\|/m.test(text);
|
|
225
|
+
if (tokens <= MAX_CHUNK_TOKENS && !containsTable) {
|
|
226
|
+
chunks.push({
|
|
227
|
+
heading: section.heading,
|
|
228
|
+
headingAncestry: section.headingAncestry,
|
|
229
|
+
text: contentWithAncestry,
|
|
230
|
+
tokenCount: tokens,
|
|
231
|
+
chunkType: "section",
|
|
232
|
+
cutType: "heading_boundary",
|
|
233
|
+
position: position++
|
|
234
|
+
});
|
|
235
|
+
} else {
|
|
236
|
+
const subChunks = splitOversizedSection(
|
|
237
|
+
section.heading,
|
|
238
|
+
section.headingAncestry,
|
|
239
|
+
text,
|
|
240
|
+
position
|
|
241
|
+
);
|
|
242
|
+
position += subChunks.length;
|
|
243
|
+
chunks.push(...subChunks);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
return chunks;
|
|
247
|
+
}
|
|
248
|
+
function splitOversizedSection(heading, headingAncestry, text, startPosition) {
|
|
249
|
+
const paragraphs = splitParagraphsProtectingFences(text);
|
|
250
|
+
const chunks = [];
|
|
251
|
+
let buffer = "";
|
|
252
|
+
let overlapPrefix = "";
|
|
253
|
+
let pos = startPosition;
|
|
254
|
+
const ancestryPrefix = headingAncestry ? headingAncestry + "\n\n" : "";
|
|
255
|
+
const ancestryTokens = estimateTokens(ancestryPrefix);
|
|
256
|
+
const chunkBudget = MAX_CHUNK_TOKENS - ancestryTokens;
|
|
257
|
+
for (const para of paragraphs) {
|
|
258
|
+
const isTablePara = TABLE_LINE.test(para);
|
|
259
|
+
const bufferIsTable = TABLE_LINE.test(buffer);
|
|
260
|
+
const boundaryChange = buffer.length > 0 && isTablePara !== bufferIsTable;
|
|
261
|
+
const budgetForContent = overlapPrefix.length > 0 ? chunkBudget - estimateTokens(overlapPrefix + "\n\n") : chunkBudget;
|
|
262
|
+
const bufferWithPara = buffer.length > 0 ? buffer + "\n\n" + para : para;
|
|
263
|
+
if ((estimateTokens(bufferWithPara) > budgetForContent || boundaryChange) && buffer.length > 0) {
|
|
264
|
+
const rawText = overlapPrefix.length > 0 ? overlapPrefix + "\n\n" + buffer : buffer;
|
|
265
|
+
const chunkText = ancestryPrefix + rawText.trim();
|
|
266
|
+
const tokenCount = estimateTokens(chunkText);
|
|
267
|
+
const cutType = para.startsWith("```") ? "code_fence" : isTablePara || bufferIsTable ? "table_boundary" : "paragraph_end";
|
|
268
|
+
chunks.push({
|
|
269
|
+
heading,
|
|
270
|
+
headingAncestry,
|
|
271
|
+
text: chunkText,
|
|
272
|
+
tokenCount,
|
|
273
|
+
chunkType: "paragraph",
|
|
274
|
+
cutType,
|
|
275
|
+
position: pos++
|
|
276
|
+
});
|
|
277
|
+
overlapPrefix = bufferIsTable ? "" : extractOverlap(buffer);
|
|
278
|
+
buffer = para;
|
|
279
|
+
} else {
|
|
280
|
+
buffer = buffer.length > 0 ? buffer + "\n\n" + para : para;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
if (buffer.length > 0) {
|
|
284
|
+
const rawText = overlapPrefix.length > 0 ? overlapPrefix + "\n\n" + buffer : buffer;
|
|
285
|
+
const chunkText = ancestryPrefix + rawText.trim();
|
|
286
|
+
const tokenCount = estimateTokens(chunkText);
|
|
287
|
+
chunks.push({
|
|
288
|
+
heading,
|
|
289
|
+
headingAncestry,
|
|
290
|
+
text: chunkText,
|
|
291
|
+
tokenCount,
|
|
292
|
+
chunkType: "paragraph",
|
|
293
|
+
cutType: "paragraph_end",
|
|
294
|
+
position: pos++
|
|
295
|
+
});
|
|
296
|
+
}
|
|
297
|
+
return chunks;
|
|
298
|
+
}
|
|
299
|
+
function extractOverlap(text) {
|
|
300
|
+
const targetTokens = Math.ceil(estimateTokens(text) * 0.1);
|
|
301
|
+
const targetChars = targetTokens * 4;
|
|
302
|
+
if (text.length <= targetChars) return text;
|
|
303
|
+
return text.slice(-targetChars);
|
|
304
|
+
}
|
|
305
|
+
function splitParagraphsProtectingFences(text) {
|
|
306
|
+
const lines = text.split("\n");
|
|
307
|
+
const paragraphs = [];
|
|
308
|
+
let current = [];
|
|
309
|
+
let inFence = false;
|
|
310
|
+
let inTable = false;
|
|
311
|
+
for (let i = 0; i < lines.length; i++) {
|
|
312
|
+
const line = lines[i];
|
|
313
|
+
if (!inFence && FENCE_OPEN.test(line)) {
|
|
314
|
+
if (current.length > 0) {
|
|
315
|
+
const joined2 = current.join("\n").trim();
|
|
316
|
+
if (joined2.length > 0) paragraphs.push(joined2);
|
|
317
|
+
current = [];
|
|
318
|
+
}
|
|
319
|
+
inFence = true;
|
|
320
|
+
inTable = false;
|
|
321
|
+
current.push(line);
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
if (inFence) {
|
|
325
|
+
current.push(line);
|
|
326
|
+
if (FENCE_CLOSE.test(line) && current.length > 1) {
|
|
327
|
+
const joined2 = current.join("\n").trim();
|
|
328
|
+
if (joined2.length > 0) paragraphs.push(joined2);
|
|
329
|
+
current = [];
|
|
330
|
+
inFence = false;
|
|
331
|
+
}
|
|
332
|
+
continue;
|
|
333
|
+
}
|
|
334
|
+
if (TABLE_LINE.test(line)) {
|
|
335
|
+
if (!inTable) {
|
|
336
|
+
if (current.length > 0) {
|
|
337
|
+
const joined2 = current.join("\n").trim();
|
|
338
|
+
if (joined2.length > 0) paragraphs.push(joined2);
|
|
339
|
+
current = [];
|
|
340
|
+
}
|
|
341
|
+
inTable = true;
|
|
342
|
+
}
|
|
343
|
+
current.push(line);
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
if (inTable) {
|
|
347
|
+
const joined2 = current.join("\n").trim();
|
|
348
|
+
if (joined2.length > 0) paragraphs.push(joined2);
|
|
349
|
+
current = [];
|
|
350
|
+
inTable = false;
|
|
351
|
+
}
|
|
352
|
+
if (line.trim() === "") {
|
|
353
|
+
if (current.length > 0) {
|
|
354
|
+
const joined2 = current.join("\n").trim();
|
|
355
|
+
if (joined2.length > 0) paragraphs.push(synthesizeImageContext(joined2));
|
|
356
|
+
current = [];
|
|
357
|
+
}
|
|
358
|
+
} else {
|
|
359
|
+
current.push(line);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
const joined = current.join("\n").trim();
|
|
363
|
+
if (joined.length > 0) paragraphs.push(synthesizeImageContext(joined));
|
|
364
|
+
return paragraphs;
|
|
365
|
+
}
|
|
366
|
+
function extractImageReferences(content) {
|
|
367
|
+
const refs = [];
|
|
368
|
+
const regex = new RegExp(IMAGE_REF.source, IMAGE_REF.flags);
|
|
369
|
+
let match;
|
|
370
|
+
while ((match = regex.exec(content)) !== null) {
|
|
371
|
+
refs.push({ alt: match[1], path: match[2] });
|
|
372
|
+
}
|
|
373
|
+
return refs;
|
|
374
|
+
}
|
|
375
|
+
function synthesizeImageParagraphs(text) {
|
|
376
|
+
return text.split(/\n\n/).map((p) => synthesizeImageContext(p)).join("\n\n");
|
|
377
|
+
}
|
|
378
|
+
function synthesizeImageContext(paragraph) {
|
|
379
|
+
const imageMatch = paragraph.match(/!\[([^\]]*)\]\(([^)]+)\)/);
|
|
380
|
+
if (!imageMatch) return paragraph;
|
|
381
|
+
const alt = imageMatch[1];
|
|
382
|
+
const textWithoutImage = paragraph.replace(/!\[[^\]]*\]\([^)]+\)/, "").trim();
|
|
383
|
+
if (textWithoutImage.length < 50 && alt.length > 0) {
|
|
384
|
+
return `[Image: ${alt}] ${textWithoutImage}`.trim();
|
|
385
|
+
}
|
|
386
|
+
return paragraph;
|
|
387
|
+
}
|
|
388
|
+
function extractNoteLinks(content) {
|
|
389
|
+
const linkPattern = /\[([^\]]*)\]\(([^)]+)\)/g;
|
|
390
|
+
const links = [];
|
|
391
|
+
for (const match of content.matchAll(linkPattern)) {
|
|
392
|
+
const target = match[2];
|
|
393
|
+
if (target.startsWith("http") || target.startsWith("#") || target.startsWith("mailto:"))
|
|
394
|
+
continue;
|
|
395
|
+
const slug = target.replace(/\.md$/, "").split("/").pop() ?? target;
|
|
396
|
+
if (slug.length > 0) links.push(slug);
|
|
397
|
+
}
|
|
398
|
+
return [...new Set(links)];
|
|
399
|
+
}
|
|
400
|
+
function extractRelations(sourceId, data) {
|
|
401
|
+
const relations = [];
|
|
402
|
+
if (Array.isArray(data.related)) {
|
|
403
|
+
for (const target of data.related) {
|
|
404
|
+
if (typeof target === "string") {
|
|
405
|
+
relations.push({
|
|
406
|
+
sourceId,
|
|
407
|
+
targetId: target,
|
|
408
|
+
type: "related-to"
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
if (typeof data.supersedes === "string") {
|
|
414
|
+
relations.push({
|
|
415
|
+
sourceId,
|
|
416
|
+
targetId: data.supersedes,
|
|
417
|
+
type: "supersedes"
|
|
418
|
+
});
|
|
419
|
+
}
|
|
420
|
+
if (typeof data.parent === "string") {
|
|
421
|
+
relations.push({
|
|
422
|
+
sourceId,
|
|
423
|
+
targetId: data.parent,
|
|
424
|
+
type: "parent"
|
|
425
|
+
});
|
|
426
|
+
}
|
|
427
|
+
return relations;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// src/services/content-dir.ts
|
|
431
|
+
import { mkdirSync, existsSync, readdirSync, readFileSync, renameSync, rmSync } from "fs";
|
|
432
|
+
import { join, extname } from "path";
|
|
433
|
+
var INDEXABLE_EXTENSIONS = /* @__PURE__ */ new Set([".md", ".txt", ".json", ".yaml", ".yml"]);
|
|
434
|
+
function readIndexableContent(contentDir) {
|
|
435
|
+
if (!existsSync(contentDir)) return "";
|
|
436
|
+
const files = readdirSync(contentDir);
|
|
437
|
+
const parts = [];
|
|
438
|
+
for (const file of files.sort()) {
|
|
439
|
+
const ext = extname(file).toLowerCase();
|
|
440
|
+
if (!INDEXABLE_EXTENSIONS.has(ext)) continue;
|
|
441
|
+
try {
|
|
442
|
+
const content = readFileSync(join(contentDir, file), "utf-8");
|
|
443
|
+
parts.push(content);
|
|
444
|
+
} catch {
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
return parts.join("\n\n");
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// src/services/graph.ts
|
|
451
|
+
var MAX_DEPTH = 3;
|
|
452
|
+
var BIDIRECTIONAL_TYPES = /* @__PURE__ */ new Set(["related-to", "related", "parent", "depends_on"]);
|
|
453
|
+
function distanceToCosineSim(distance) {
|
|
454
|
+
return 1 - distance * distance / 2;
|
|
455
|
+
}
|
|
456
|
+
function computeAutoLinks(db, noteId, threshold = 0.65, maxLinks = 5, embedding) {
|
|
457
|
+
let queryEmbedding = embedding;
|
|
458
|
+
if (!queryEmbedding) {
|
|
459
|
+
const chunks = db.getChunksForNote(noteId);
|
|
460
|
+
if (chunks.length === 0) return [];
|
|
461
|
+
const stored = db.getChunkEmbedding(chunks[0].id);
|
|
462
|
+
if (!stored) return [];
|
|
463
|
+
queryEmbedding = stored;
|
|
464
|
+
}
|
|
465
|
+
const vectorResults = db.searchVector(queryEmbedding, maxLinks + 1);
|
|
466
|
+
const existing = db.getRelationsFrom(noteId);
|
|
467
|
+
const existingTargets = new Set(existing.map((r) => r.targetId));
|
|
468
|
+
const links = [];
|
|
469
|
+
const seen = /* @__PURE__ */ new Set();
|
|
470
|
+
for (const result of vectorResults) {
|
|
471
|
+
if (result.noteId === noteId) continue;
|
|
472
|
+
if (seen.has(result.noteId)) continue;
|
|
473
|
+
const cosineSim = distanceToCosineSim(result.distance);
|
|
474
|
+
if (cosineSim < threshold) continue;
|
|
475
|
+
if (existingTargets.has(result.noteId)) continue;
|
|
476
|
+
seen.add(result.noteId);
|
|
477
|
+
links.push({
|
|
478
|
+
sourceId: noteId,
|
|
479
|
+
targetId: result.noteId,
|
|
480
|
+
type: "related"
|
|
481
|
+
});
|
|
482
|
+
}
|
|
483
|
+
return links;
|
|
484
|
+
}
|
|
485
|
+
function traverseGraph(db, rootId, depth) {
|
|
486
|
+
const effectiveDepth = Math.min(depth, MAX_DEPTH);
|
|
487
|
+
const visited = /* @__PURE__ */ new Map();
|
|
488
|
+
visited.set(rootId, 0);
|
|
489
|
+
const allEdges = [];
|
|
490
|
+
let frontier = [rootId];
|
|
491
|
+
for (let d = 0; d < effectiveDepth; d++) {
|
|
492
|
+
const relationsBatch = db.getRelationsBatch(frontier);
|
|
493
|
+
const nextFrontier = [];
|
|
494
|
+
for (const nodeId of frontier) {
|
|
495
|
+
const rels = relationsBatch.get(nodeId);
|
|
496
|
+
if (!rels) continue;
|
|
497
|
+
for (const rel of rels.from) {
|
|
498
|
+
allEdges.push(rel);
|
|
499
|
+
if (!visited.has(rel.targetId)) {
|
|
500
|
+
visited.set(rel.targetId, d + 1);
|
|
501
|
+
nextFrontier.push(rel.targetId);
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
for (const rel of rels.to) {
|
|
505
|
+
if (!BIDIRECTIONAL_TYPES.has(rel.type)) continue;
|
|
506
|
+
allEdges.push(rel);
|
|
507
|
+
if (!visited.has(rel.sourceId)) {
|
|
508
|
+
visited.set(rel.sourceId, d + 1);
|
|
509
|
+
nextFrontier.push(rel.sourceId);
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
frontier = nextFrontier;
|
|
514
|
+
}
|
|
515
|
+
const allIds = [...visited.keys()];
|
|
516
|
+
const notesById = db.getNotesByIds(allIds);
|
|
517
|
+
const root = {
|
|
518
|
+
id: rootId,
|
|
519
|
+
title: notesById.get(rootId)?.title ?? rootId,
|
|
520
|
+
type: notesById.get(rootId)?.type ?? "note",
|
|
521
|
+
tier: notesById.get(rootId)?.tier ?? "slow",
|
|
522
|
+
depth: 0
|
|
523
|
+
};
|
|
524
|
+
const nodes = [];
|
|
525
|
+
for (const [id, nodeDepth] of visited) {
|
|
526
|
+
const note = notesById.get(id);
|
|
527
|
+
nodes.push({
|
|
528
|
+
id,
|
|
529
|
+
title: note?.title ?? id,
|
|
530
|
+
type: note?.type ?? "note",
|
|
531
|
+
tier: note?.tier ?? "slow",
|
|
532
|
+
depth: nodeDepth
|
|
533
|
+
});
|
|
534
|
+
}
|
|
535
|
+
const nodeIds = new Set(visited.keys());
|
|
536
|
+
const edges = deduplicateEdges(
|
|
537
|
+
allEdges.filter((e) => nodeIds.has(e.sourceId) && nodeIds.has(e.targetId))
|
|
538
|
+
);
|
|
539
|
+
return { root, nodes, edges };
|
|
540
|
+
}
|
|
541
|
+
function expandResults(db, noteIds, depth) {
|
|
542
|
+
if (noteIds.length === 0) return [];
|
|
543
|
+
const inputSet = new Set(noteIds);
|
|
544
|
+
const scoreMap = /* @__PURE__ */ new Map();
|
|
545
|
+
for (const noteId of noteIds) {
|
|
546
|
+
const result = traverseGraph(db, noteId, depth);
|
|
547
|
+
for (const node of result.nodes) {
|
|
548
|
+
if (inputSet.has(node.id)) continue;
|
|
549
|
+
const decayed = Math.pow(0.5, node.depth);
|
|
550
|
+
const existing = scoreMap.get(node.id) ?? 0;
|
|
551
|
+
scoreMap.set(node.id, Math.max(existing, decayed));
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
return Array.from(scoreMap.entries()).map(([noteId, decayedScore]) => ({ noteId, decayedScore })).sort((a, b) => b.decayedScore - a.decayedScore);
|
|
555
|
+
}
|
|
556
|
+
function computeGraphScores(db, rootId, maxDepth = 3) {
|
|
557
|
+
const result = traverseGraph(db, rootId, maxDepth);
|
|
558
|
+
const scores = /* @__PURE__ */ new Map();
|
|
559
|
+
const edgeTypeByTarget = /* @__PURE__ */ new Map();
|
|
560
|
+
for (const edge of result.edges) {
|
|
561
|
+
const neighbor = edge.sourceId === rootId ? edge.targetId : edge.sourceId;
|
|
562
|
+
if (!edgeTypeByTarget.has(neighbor)) {
|
|
563
|
+
edgeTypeByTarget.set(neighbor, edge.type);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
for (const node of result.nodes) {
|
|
567
|
+
if (node.id === rootId) continue;
|
|
568
|
+
const depth = node.depth;
|
|
569
|
+
const score = 1 / Math.pow(2, depth);
|
|
570
|
+
const relationType = edgeTypeByTarget.get(node.id) ?? "related";
|
|
571
|
+
const existing = scores.get(node.id);
|
|
572
|
+
if (!existing || score > existing.score) {
|
|
573
|
+
scores.set(node.id, { noteId: node.id, score, relationType, depth });
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
return scores;
|
|
577
|
+
}
|
|
578
|
+
function deduplicateEdges(edges) {
|
|
579
|
+
const seen = /* @__PURE__ */ new Set();
|
|
580
|
+
const result = [];
|
|
581
|
+
for (const edge of edges) {
|
|
582
|
+
const key = `${edge.sourceId}|${edge.targetId}|${edge.type}`;
|
|
583
|
+
if (!seen.has(key)) {
|
|
584
|
+
seen.add(key);
|
|
585
|
+
result.push(edge);
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
return result;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// src/services/indexing.ts
|
|
592
|
+
function isSkippedFile(filePath) {
|
|
593
|
+
return filePath.includes("/_templates/") || basename(filePath) === "_index.md";
|
|
594
|
+
}
|
|
595
|
+
function addFrontmatterField(filePath, field, value) {
|
|
596
|
+
const content = readFileSync2(filePath, "utf-8");
|
|
597
|
+
const endOfFrontmatter = content.indexOf("\n---", 4);
|
|
598
|
+
if (endOfFrontmatter === -1) return;
|
|
599
|
+
const frontmatter = content.slice(0, endOfFrontmatter);
|
|
600
|
+
const fieldRegex = new RegExp(`^${field}:.*$`, "m");
|
|
601
|
+
let updated;
|
|
602
|
+
if (fieldRegex.test(frontmatter)) {
|
|
603
|
+
updated = frontmatter.replace(fieldRegex, `${field}: ${value}`) + content.slice(endOfFrontmatter);
|
|
604
|
+
} else {
|
|
605
|
+
updated = frontmatter + `
|
|
606
|
+
${field}: ${value}` + content.slice(endOfFrontmatter);
|
|
607
|
+
}
|
|
608
|
+
writeFileSync(filePath, updated, "utf-8");
|
|
609
|
+
}
|
|
610
|
+
function frontmatterToRecord(parsed) {
|
|
611
|
+
const fm = parsed.frontmatter;
|
|
612
|
+
const metadata = JSON.stringify(parsed.rawFrontmatter);
|
|
613
|
+
return {
|
|
614
|
+
id: parsed.id,
|
|
615
|
+
filePath: parsed.filePath,
|
|
616
|
+
title: fm.title,
|
|
617
|
+
type: fm.type,
|
|
618
|
+
tier: fm.tier,
|
|
619
|
+
category: fm.category ?? null,
|
|
620
|
+
tags: fm.tags ? fm.tags.join(",") : null,
|
|
621
|
+
summary: fm.summary ?? null,
|
|
622
|
+
confidence: fm.confidence ?? null,
|
|
623
|
+
status: fm.status ?? "current",
|
|
624
|
+
sources: fm.sources ? JSON.stringify(fm.sources) : null,
|
|
625
|
+
createdAt: fm.created ?? null,
|
|
626
|
+
modifiedAt: fm.modified ?? null,
|
|
627
|
+
lastReviewed: fm["last-reviewed"] ?? null,
|
|
628
|
+
reviewInterval: fm["review-interval"] ?? null,
|
|
629
|
+
expires: fm.expires ?? null,
|
|
630
|
+
metadata,
|
|
631
|
+
module: fm.module ?? null,
|
|
632
|
+
moduleInstance: fm["module-instance"] ?? null,
|
|
633
|
+
contentDir: fm["content-dir"] ?? null
|
|
634
|
+
};
|
|
635
|
+
}
|
|
636
|
+
function chunkId(noteId, content) {
|
|
637
|
+
const hash = createHash("sha256").update(`${noteId}:${content}`).digest("hex").slice(0, 16);
|
|
638
|
+
return `${noteId}::${hash}`;
|
|
639
|
+
}
|
|
640
|
+
function rawChunksToChunks(noteId, rawChunks) {
|
|
641
|
+
return rawChunks.map((rc) => ({
|
|
642
|
+
id: chunkId(noteId, rc.text),
|
|
643
|
+
noteId,
|
|
644
|
+
heading: rc.heading,
|
|
645
|
+
headingAncestry: rc.headingAncestry,
|
|
646
|
+
content: rc.text,
|
|
647
|
+
tokenCount: rc.tokenCount,
|
|
648
|
+
chunkType: rc.chunkType,
|
|
649
|
+
cutType: rc.cutType,
|
|
650
|
+
position: rc.position
|
|
651
|
+
}));
|
|
652
|
+
}
|
|
653
|
+
function inboxItemToMarkdown(item) {
|
|
654
|
+
const now = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
|
|
655
|
+
const title = item.title ?? "Inbox capture";
|
|
656
|
+
const id = slugify(title) || item.id.slice(0, 8);
|
|
657
|
+
const lines = [
|
|
658
|
+
"---",
|
|
659
|
+
`id: ${id}`,
|
|
660
|
+
`title: "${title.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`,
|
|
661
|
+
"type: note",
|
|
662
|
+
"tier: fast",
|
|
663
|
+
`status: draft`,
|
|
664
|
+
`created: ${now}`,
|
|
665
|
+
`modified: ${now}`
|
|
666
|
+
];
|
|
667
|
+
if (item.sourceUrl) {
|
|
668
|
+
lines.push("sources:");
|
|
669
|
+
lines.push(` - url: "${item.sourceUrl}"`);
|
|
670
|
+
lines.push(` accessed: "${now}"`);
|
|
671
|
+
lines.push(' type: "web"');
|
|
672
|
+
}
|
|
673
|
+
lines.push("---", "", item.content);
|
|
674
|
+
return lines.join("\n");
|
|
675
|
+
}
|
|
676
|
+
async function indexSingleFile(db, embedder, filePath, content, hash, mtime) {
|
|
677
|
+
const parsed = parseMarkdown(filePath, content);
|
|
678
|
+
const noteRecord = frontmatterToRecord(parsed);
|
|
679
|
+
db.upsertNote(noteRecord);
|
|
680
|
+
let ftsContent = parsed.content;
|
|
681
|
+
if (noteRecord.contentDir) {
|
|
682
|
+
const dirContent = readIndexableContent(noteRecord.contentDir);
|
|
683
|
+
if (dirContent) {
|
|
684
|
+
ftsContent = ftsContent + "\n\n" + dirContent;
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
db.upsertNoteFTS(
|
|
688
|
+
parsed.id,
|
|
689
|
+
parsed.frontmatter.title,
|
|
690
|
+
parsed.frontmatter.summary ?? "",
|
|
691
|
+
ftsContent
|
|
692
|
+
);
|
|
693
|
+
const chunks = rawChunksToChunks(parsed.id, parsed.chunks);
|
|
694
|
+
let vectors = [];
|
|
695
|
+
if (chunks.length > 0) {
|
|
696
|
+
const texts = chunks.map((c) => c.content);
|
|
697
|
+
const embeddings = await embedder.embed(texts);
|
|
698
|
+
vectors = embeddings.map((e) => new Float32Array(e));
|
|
699
|
+
db.upsertChunks(parsed.id, chunks, vectors);
|
|
700
|
+
}
|
|
701
|
+
if (parsed.relations.length > 0) {
|
|
702
|
+
db.upsertRelations(parsed.id, parsed.relations);
|
|
703
|
+
}
|
|
704
|
+
const noteLinks = extractNoteLinks(parsed.content);
|
|
705
|
+
const linkRelations = [];
|
|
706
|
+
for (const targetSlug of noteLinks) {
|
|
707
|
+
const targetNote = db.getNoteById(targetSlug);
|
|
708
|
+
if (targetNote && targetSlug !== parsed.id) {
|
|
709
|
+
linkRelations.push({ sourceId: parsed.id, targetId: targetSlug, type: "related-to" });
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
if (linkRelations.length > 0) {
|
|
713
|
+
db.upsertRelations(parsed.id, linkRelations);
|
|
714
|
+
}
|
|
715
|
+
if (vectors.length > 0) {
|
|
716
|
+
const autoLinks = computeAutoLinks(db, parsed.id, 0.85, 5, vectors[0]);
|
|
717
|
+
if (autoLinks.length > 0) {
|
|
718
|
+
const existingRelations = db.getRelationsFrom(parsed.id);
|
|
719
|
+
db.upsertRelations(parsed.id, [...existingRelations, ...autoLinks]);
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
db.upsertFile({
|
|
723
|
+
path: filePath,
|
|
724
|
+
hash,
|
|
725
|
+
mtime,
|
|
726
|
+
indexedAt: Date.now()
|
|
727
|
+
});
|
|
728
|
+
return parsed.id;
|
|
729
|
+
}
|
|
730
|
+
async function indexFiles(db, embedder, notesDir, opts) {
|
|
731
|
+
if (!opts.force) {
|
|
732
|
+
db.checkModelMatch(embedder.model);
|
|
733
|
+
}
|
|
734
|
+
if (opts.force) {
|
|
735
|
+
const allNotes = db.getAllNotes();
|
|
736
|
+
for (const note of allNotes) {
|
|
737
|
+
db.deleteChunksForNote(note.id);
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
db.setEmbeddingModel(embedder.model, embedder.dimensions);
|
|
741
|
+
const knownFiles = opts.force ? /* @__PURE__ */ new Map() : db.getAllFiles();
|
|
742
|
+
const changes = await scanForChanges(notesDir, knownFiles);
|
|
743
|
+
let indexed = 0;
|
|
744
|
+
let deleted = 0;
|
|
745
|
+
const indexedNoteIds = [];
|
|
746
|
+
const toProcess = [...changes.new, ...changes.modified].filter((f) => !isSkippedFile(f.path));
|
|
747
|
+
for (const file of toProcess) {
|
|
748
|
+
const content = readFileSync2(file.path, "utf-8");
|
|
749
|
+
const noteId = await indexSingleFile(db, embedder, file.path, content, file.hash, file.mtime);
|
|
750
|
+
indexedNoteIds.push(noteId);
|
|
751
|
+
indexed++;
|
|
752
|
+
}
|
|
753
|
+
for (const filePath of changes.deleted.filter((p) => !isSkippedFile(p))) {
|
|
754
|
+
const note = db.getNoteByFilePath(filePath);
|
|
755
|
+
if (note) {
|
|
756
|
+
db.deleteNote(note.id);
|
|
757
|
+
}
|
|
758
|
+
db.deleteFile(filePath);
|
|
759
|
+
deleted++;
|
|
760
|
+
}
|
|
761
|
+
return { indexed, deleted, unchanged: changes.unchanged, indexedNoteIds };
|
|
762
|
+
}
|
|
763
|
+
async function processInbox(db, notesDir, embedder) {
|
|
764
|
+
const pending = db.getInboxItems("pending");
|
|
765
|
+
let processed = 0;
|
|
766
|
+
for (const item of pending) {
|
|
767
|
+
db.updateInboxStatus(item.id, "processing");
|
|
768
|
+
try {
|
|
769
|
+
const markdown = inboxItemToMarkdown(item);
|
|
770
|
+
const parsed = parseMarkdown("inbox-item.md", markdown);
|
|
771
|
+
const now = /* @__PURE__ */ new Date();
|
|
772
|
+
const yyyy = String(now.getFullYear());
|
|
773
|
+
const mm = String(now.getMonth() + 1).padStart(2, "0");
|
|
774
|
+
const dd = String(now.getDate()).padStart(2, "0");
|
|
775
|
+
const outPath = join2(notesDir, "logs", yyyy, mm, `${yyyy}-${mm}-${dd}-${parsed.id}.md`);
|
|
776
|
+
const dir = dirname(outPath);
|
|
777
|
+
if (!existsSync2(dir)) {
|
|
778
|
+
mkdirSync2(dir, { recursive: true });
|
|
779
|
+
}
|
|
780
|
+
writeFileSync(outPath, markdown, "utf-8");
|
|
781
|
+
const hash = createHash("sha256").update(markdown).digest("hex");
|
|
782
|
+
await indexSingleFile(db, embedder, outPath, markdown, hash, Date.now());
|
|
783
|
+
db.updateInboxStatus(item.id, "indexed");
|
|
784
|
+
processed++;
|
|
785
|
+
} catch (err) {
|
|
786
|
+
db.updateInboxStatus(item.id, "failed");
|
|
787
|
+
process.stderr.write(
|
|
788
|
+
`Failed to process inbox item ${item.id}: ${err instanceof Error ? err.message : String(err)}
|
|
789
|
+
`
|
|
790
|
+
);
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
return processed;
|
|
794
|
+
}
|
|
795
|
+
function generateNoteIndex(db, notesDir) {
|
|
796
|
+
const notes = db.getAllNotes();
|
|
797
|
+
if (notes.length === 0) return;
|
|
798
|
+
const byCategory = /* @__PURE__ */ new Map();
|
|
799
|
+
for (const note of notes) {
|
|
800
|
+
const cat = note.category ?? "uncategorized";
|
|
801
|
+
const list = byCategory.get(cat) ?? [];
|
|
802
|
+
list.push(note);
|
|
803
|
+
byCategory.set(cat, list);
|
|
804
|
+
}
|
|
805
|
+
const lines = ["# Index", ""];
|
|
806
|
+
const sortedCategories = [...byCategory.keys()].sort();
|
|
807
|
+
for (const cat of sortedCategories) {
|
|
808
|
+
const catNotes = byCategory.get(cat);
|
|
809
|
+
lines.push(`## ${cat}`, "");
|
|
810
|
+
for (const note of catNotes) {
|
|
811
|
+
const relPath = relative(notesDir, note.filePath);
|
|
812
|
+
const summary = note.summary ? ` \u2014 ${note.summary}` : "";
|
|
813
|
+
lines.push(`- [${note.title}](${relPath})${summary}`);
|
|
814
|
+
}
|
|
815
|
+
lines.push("");
|
|
816
|
+
}
|
|
817
|
+
writeFileSync(join2(notesDir, "_index.md"), lines.join("\n"), "utf-8");
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
export {
|
|
821
|
+
VALID_CORE_NOTE_TYPES,
|
|
822
|
+
VALID_NOTE_TIERS,
|
|
823
|
+
VALID_NOTE_CONFIDENCES,
|
|
824
|
+
VALID_NOTE_STATUSES,
|
|
825
|
+
VALID_INBOX_SOURCES,
|
|
826
|
+
slugify,
|
|
827
|
+
parseIntervalDays,
|
|
828
|
+
parseMarkdown,
|
|
829
|
+
splitIntoSections,
|
|
830
|
+
computeAutoLinks,
|
|
831
|
+
traverseGraph,
|
|
832
|
+
expandResults,
|
|
833
|
+
computeGraphScores,
|
|
834
|
+
isSkippedFile,
|
|
835
|
+
addFrontmatterField,
|
|
836
|
+
indexSingleFile,
|
|
837
|
+
indexFiles,
|
|
838
|
+
processInbox,
|
|
839
|
+
generateNoteIndex
|
|
840
|
+
};
|