cofactor-memory 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +112 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +1309 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.d.ts +171 -0
- package/dist/index.js +1183 -0
- package/dist/index.js.map +1 -0
- package/package.json +67 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,1183 @@
|
|
|
1
|
+
// src/index.ts
|
|
2
|
+
import path8 from "path";
|
|
3
|
+
|
|
4
|
+
// src/core/indexer.ts
|
|
5
|
+
import path3 from "path";
|
|
6
|
+
|
|
7
|
+
// src/core/catalysts.ts
|
|
8
|
+
import crypto from "crypto";
|
|
9
|
+
|
|
10
|
+
// src/core/entities.ts
|
|
11
|
+
import path from "path";
|
|
12
|
+
|
|
13
|
+
// src/core/text.ts
|
|
14
|
+
var stopwords = /* @__PURE__ */ new Set([
|
|
15
|
+
"a",
|
|
16
|
+
"about",
|
|
17
|
+
"above",
|
|
18
|
+
"after",
|
|
19
|
+
"again",
|
|
20
|
+
"against",
|
|
21
|
+
"all",
|
|
22
|
+
"also",
|
|
23
|
+
"am",
|
|
24
|
+
"an",
|
|
25
|
+
"and",
|
|
26
|
+
"any",
|
|
27
|
+
"are",
|
|
28
|
+
"as",
|
|
29
|
+
"at",
|
|
30
|
+
"be",
|
|
31
|
+
"because",
|
|
32
|
+
"been",
|
|
33
|
+
"before",
|
|
34
|
+
"being",
|
|
35
|
+
"below",
|
|
36
|
+
"between",
|
|
37
|
+
"both",
|
|
38
|
+
"but",
|
|
39
|
+
"by",
|
|
40
|
+
"can",
|
|
41
|
+
"did",
|
|
42
|
+
"do",
|
|
43
|
+
"does",
|
|
44
|
+
"doing",
|
|
45
|
+
"down",
|
|
46
|
+
"during",
|
|
47
|
+
"each",
|
|
48
|
+
"few",
|
|
49
|
+
"for",
|
|
50
|
+
"from",
|
|
51
|
+
"further",
|
|
52
|
+
"had",
|
|
53
|
+
"has",
|
|
54
|
+
"have",
|
|
55
|
+
"having",
|
|
56
|
+
"he",
|
|
57
|
+
"her",
|
|
58
|
+
"here",
|
|
59
|
+
"hers",
|
|
60
|
+
"herself",
|
|
61
|
+
"him",
|
|
62
|
+
"himself",
|
|
63
|
+
"his",
|
|
64
|
+
"how",
|
|
65
|
+
"i",
|
|
66
|
+
"if",
|
|
67
|
+
"in",
|
|
68
|
+
"into",
|
|
69
|
+
"is",
|
|
70
|
+
"it",
|
|
71
|
+
"its",
|
|
72
|
+
"itself",
|
|
73
|
+
"just",
|
|
74
|
+
"me",
|
|
75
|
+
"more",
|
|
76
|
+
"most",
|
|
77
|
+
"my",
|
|
78
|
+
"myself",
|
|
79
|
+
"no",
|
|
80
|
+
"nor",
|
|
81
|
+
"not",
|
|
82
|
+
"now",
|
|
83
|
+
"of",
|
|
84
|
+
"off",
|
|
85
|
+
"on",
|
|
86
|
+
"once",
|
|
87
|
+
"only",
|
|
88
|
+
"or",
|
|
89
|
+
"other",
|
|
90
|
+
"our",
|
|
91
|
+
"ours",
|
|
92
|
+
"ourselves",
|
|
93
|
+
"out",
|
|
94
|
+
"over",
|
|
95
|
+
"own",
|
|
96
|
+
"same",
|
|
97
|
+
"she",
|
|
98
|
+
"should",
|
|
99
|
+
"so",
|
|
100
|
+
"some",
|
|
101
|
+
"such",
|
|
102
|
+
"than",
|
|
103
|
+
"that",
|
|
104
|
+
"the",
|
|
105
|
+
"their",
|
|
106
|
+
"theirs",
|
|
107
|
+
"them",
|
|
108
|
+
"themselves",
|
|
109
|
+
"then",
|
|
110
|
+
"there",
|
|
111
|
+
"these",
|
|
112
|
+
"they",
|
|
113
|
+
"this",
|
|
114
|
+
"those",
|
|
115
|
+
"through",
|
|
116
|
+
"to",
|
|
117
|
+
"too",
|
|
118
|
+
"under",
|
|
119
|
+
"until",
|
|
120
|
+
"up",
|
|
121
|
+
"very",
|
|
122
|
+
"was",
|
|
123
|
+
"we",
|
|
124
|
+
"were",
|
|
125
|
+
"what",
|
|
126
|
+
"when",
|
|
127
|
+
"where",
|
|
128
|
+
"which",
|
|
129
|
+
"while",
|
|
130
|
+
"who",
|
|
131
|
+
"whom",
|
|
132
|
+
"why",
|
|
133
|
+
"will",
|
|
134
|
+
"with",
|
|
135
|
+
"you",
|
|
136
|
+
"your",
|
|
137
|
+
"yours",
|
|
138
|
+
"yourself",
|
|
139
|
+
"yourselves"
|
|
140
|
+
]);
|
|
141
|
+
var aliases = {
|
|
142
|
+
auth: ["authentication", "identity", "login", "session"],
|
|
143
|
+
authentication: ["auth", "identity", "login", "session"],
|
|
144
|
+
build: ["ship", "implement", "create", "make"],
|
|
145
|
+
changed: ["shift", "pivot", "direction", "rewrite"],
|
|
146
|
+
change: ["shift", "pivot", "direction", "rewrite"],
|
|
147
|
+
complexity: ["risk", "friction", "maintenance", "tradeoff"],
|
|
148
|
+
cost: ["budget", "price", "expense", "tradeoff"],
|
|
149
|
+
decision: ["choice", "adr", "direction", "tradeoff"],
|
|
150
|
+
direction: ["change", "shift", "strategy", "decision"],
|
|
151
|
+
memory: ["recall", "context", "knowledge", "notes"],
|
|
152
|
+
notes: ["memory", "knowledge", "writing", "docs"],
|
|
153
|
+
problem: ["issue", "risk", "blocker", "challenge"],
|
|
154
|
+
search: ["retrieval", "recall", "find", "lookup"],
|
|
155
|
+
session: ["auth", "authentication", "identity", "login"],
|
|
156
|
+
tension: ["tradeoff", "conflict", "risk", "constraint"],
|
|
157
|
+
user: ["customer", "person", "people", "reader"]
|
|
158
|
+
};
|
|
159
|
+
function extractFrontmatter(raw) {
|
|
160
|
+
if (!raw.startsWith("---\n") && !raw.startsWith("---\r\n")) {
|
|
161
|
+
return { frontmatter: {}, body: raw };
|
|
162
|
+
}
|
|
163
|
+
const normalized = raw.replace(/\r\n/g, "\n");
|
|
164
|
+
const end = normalized.indexOf("\n---\n", 4);
|
|
165
|
+
if (end === -1) {
|
|
166
|
+
return { frontmatter: {}, body: raw };
|
|
167
|
+
}
|
|
168
|
+
const block = normalized.slice(4, end);
|
|
169
|
+
const body = normalized.slice(end + "\n---\n".length);
|
|
170
|
+
return { frontmatter: parseSimpleYaml(block), body };
|
|
171
|
+
}
|
|
172
|
+
function parseSimpleYaml(block) {
|
|
173
|
+
const data = {};
|
|
174
|
+
const lines = block.split("\n");
|
|
175
|
+
let activeArrayKey;
|
|
176
|
+
for (const rawLine of lines) {
|
|
177
|
+
const line = rawLine.trimEnd();
|
|
178
|
+
if (!line.trim() || line.trimStart().startsWith("#")) {
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
181
|
+
const itemMatch = line.match(/^\s*-\s+(.+)$/);
|
|
182
|
+
if (itemMatch && activeArrayKey) {
|
|
183
|
+
const current = data[activeArrayKey];
|
|
184
|
+
const values = Array.isArray(current) ? current : [];
|
|
185
|
+
values.push(cleanYamlValue(itemMatch[1] ?? ""));
|
|
186
|
+
data[activeArrayKey] = values;
|
|
187
|
+
continue;
|
|
188
|
+
}
|
|
189
|
+
activeArrayKey = void 0;
|
|
190
|
+
const match = line.match(/^([A-Za-z0-9_-]+):\s*(.*)$/);
|
|
191
|
+
if (!match) {
|
|
192
|
+
continue;
|
|
193
|
+
}
|
|
194
|
+
const key = match[1].toLowerCase();
|
|
195
|
+
const value = match[2] ?? "";
|
|
196
|
+
if (!value.trim()) {
|
|
197
|
+
data[key] = [];
|
|
198
|
+
activeArrayKey = key;
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
201
|
+
const arrayMatch = value.trim().match(/^\[(.*)]$/);
|
|
202
|
+
if (arrayMatch) {
|
|
203
|
+
data[key] = arrayMatch[1].split(",").map(cleanYamlValue).filter(Boolean);
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
data[key] = cleanYamlValue(value);
|
|
207
|
+
}
|
|
208
|
+
return data;
|
|
209
|
+
}
|
|
210
|
+
function cleanYamlValue(value) {
|
|
211
|
+
return value.trim().replace(/^['"]|['"]$/g, "");
|
|
212
|
+
}
|
|
213
|
+
function frontmatterDate(frontmatter, keys) {
|
|
214
|
+
for (const key of keys) {
|
|
215
|
+
const value = frontmatter[key];
|
|
216
|
+
if (typeof value === "string" && value.trim()) {
|
|
217
|
+
const date = new Date(value);
|
|
218
|
+
if (!Number.isNaN(date.getTime())) {
|
|
219
|
+
return date.toISOString();
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
return void 0;
|
|
224
|
+
}
|
|
225
|
+
function frontmatterList(frontmatter, key) {
|
|
226
|
+
const value = frontmatter[key];
|
|
227
|
+
if (!value) {
|
|
228
|
+
return [];
|
|
229
|
+
}
|
|
230
|
+
if (Array.isArray(value)) {
|
|
231
|
+
return value.map(normalizeTag).filter(Boolean);
|
|
232
|
+
}
|
|
233
|
+
return value.split(",").map(normalizeTag).filter(Boolean);
|
|
234
|
+
}
|
|
235
|
+
function normalizeTag(tag) {
|
|
236
|
+
return tag.trim().replace(/^#/, "").toLowerCase();
|
|
237
|
+
}
|
|
238
|
+
function markdownTitle(body, fallback) {
|
|
239
|
+
const heading = body.match(/^#\s+(.+)$/m);
|
|
240
|
+
if (heading?.[1]) {
|
|
241
|
+
return heading[1].trim();
|
|
242
|
+
}
|
|
243
|
+
return fallback.replace(/\.[^.]+$/, "").replace(/[-_]+/g, " ");
|
|
244
|
+
}
|
|
245
|
+
function stripMarkdown(raw) {
|
|
246
|
+
return raw.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/!\[[^\]]*]\([^)]*\)/g, " ").replace(/\[([^\]]+)]\([^)]*\)/g, "$1").replace(/\[\[([^\]|#]+)(?:#[^\]|]+)?(?:\|([^\]]+))?]]/g, "$2 $1").replace(/^#{1,6}\s+/gm, "").replace(/[*_~>|-]+/g, " ").replace(/\s+/g, " ").trim();
|
|
247
|
+
}
|
|
248
|
+
function tokenize(text, includeAliases = true) {
|
|
249
|
+
const normalized = stripMarkdown(text).toLowerCase().replace(/['’]/g, "").replace(/[^a-z0-9]+/g, " ");
|
|
250
|
+
const base = normalized.split(/\s+/).map(stem).filter((token) => token.length > 1 && !stopwords.has(token));
|
|
251
|
+
if (!includeAliases) {
|
|
252
|
+
return base;
|
|
253
|
+
}
|
|
254
|
+
const expanded = [];
|
|
255
|
+
for (const token of base) {
|
|
256
|
+
expanded.push(token);
|
|
257
|
+
const synonymSet = aliases[token];
|
|
258
|
+
if (synonymSet) {
|
|
259
|
+
expanded.push(...synonymSet.map(stem));
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
return expanded;
|
|
263
|
+
}
|
|
264
|
+
function topTerms(tokens, limit = 8) {
|
|
265
|
+
const counts = /* @__PURE__ */ new Map();
|
|
266
|
+
for (const token of tokens) {
|
|
267
|
+
counts.set(token, (counts.get(token) ?? 0) + 1);
|
|
268
|
+
}
|
|
269
|
+
return [...counts.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])).slice(0, limit).map(([term]) => term);
|
|
270
|
+
}
|
|
271
|
+
function phraseTerms(text, limit = 8) {
|
|
272
|
+
const tokens = tokenize(text, false);
|
|
273
|
+
const counts = /* @__PURE__ */ new Map();
|
|
274
|
+
for (let size = 2; size <= 3; size += 1) {
|
|
275
|
+
for (let index = 0; index <= tokens.length - size; index += 1) {
|
|
276
|
+
const phrase = tokens.slice(index, index + size).join(" ");
|
|
277
|
+
if (phrase.length > 4) {
|
|
278
|
+
counts.set(phrase, (counts.get(phrase) ?? 0) + 1);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
return [...counts.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])).slice(0, limit).map(([term]) => term);
|
|
283
|
+
}
|
|
284
|
+
function splitWords(text) {
|
|
285
|
+
return stripMarkdown(text).split(/\s+/).filter(Boolean);
|
|
286
|
+
}
|
|
287
|
+
function chunkByWords(text, chunkSize, overlap) {
|
|
288
|
+
const words = splitWords(text);
|
|
289
|
+
if (words.length <= chunkSize) {
|
|
290
|
+
return [words.join(" ")];
|
|
291
|
+
}
|
|
292
|
+
const chunks = [];
|
|
293
|
+
const step = Math.max(1, chunkSize - overlap);
|
|
294
|
+
for (let start = 0; start < words.length; start += step) {
|
|
295
|
+
const chunk = words.slice(start, start + chunkSize).join(" ");
|
|
296
|
+
if (chunk.trim()) {
|
|
297
|
+
chunks.push(chunk);
|
|
298
|
+
}
|
|
299
|
+
if (start + chunkSize >= words.length) {
|
|
300
|
+
break;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
return chunks;
|
|
304
|
+
}
|
|
305
|
+
function snippet(text, maxChars = 900) {
|
|
306
|
+
const clean = stripMarkdown(text);
|
|
307
|
+
if (clean.length <= maxChars) {
|
|
308
|
+
return clean;
|
|
309
|
+
}
|
|
310
|
+
return `${clean.slice(0, maxChars - 1).trim()}...`;
|
|
311
|
+
}
|
|
312
|
+
function stem(token) {
|
|
313
|
+
if (token.length > 5 && token.endsWith("ing")) {
|
|
314
|
+
return token.slice(0, -3);
|
|
315
|
+
}
|
|
316
|
+
if (token.length > 4 && token.endsWith("ied")) {
|
|
317
|
+
return `${token.slice(0, -3)}y`;
|
|
318
|
+
}
|
|
319
|
+
if (token.length > 4 && token.endsWith("ed")) {
|
|
320
|
+
return token.slice(0, -2);
|
|
321
|
+
}
|
|
322
|
+
if (token.length > 4 && token.endsWith("es")) {
|
|
323
|
+
return token.slice(0, -2);
|
|
324
|
+
}
|
|
325
|
+
if (token.length > 3 && token.endsWith("s")) {
|
|
326
|
+
return token.slice(0, -1);
|
|
327
|
+
}
|
|
328
|
+
return token;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// src/core/entities.ts
|
|
332
|
+
function entityId(type, name) {
|
|
333
|
+
return `${type}:${normalizeEntityName(type, name)}`;
|
|
334
|
+
}
|
|
335
|
+
function normalizeEntityName(type, name) {
|
|
336
|
+
const clean = name.trim().replace(/\s+/g, " ");
|
|
337
|
+
if (type === "tag") {
|
|
338
|
+
return normalizeTag(clean);
|
|
339
|
+
}
|
|
340
|
+
if (type === "folder") {
|
|
341
|
+
return clean.replace(/^\/+|\/+$/g, "");
|
|
342
|
+
}
|
|
343
|
+
return clean;
|
|
344
|
+
}
|
|
345
|
+
function entityDisplayName(type, name) {
|
|
346
|
+
if (type === "tag") {
|
|
347
|
+
return `#${name}`;
|
|
348
|
+
}
|
|
349
|
+
if (type === "link") {
|
|
350
|
+
return `[[${name}]]`;
|
|
351
|
+
}
|
|
352
|
+
if (name === ".") {
|
|
353
|
+
return "root folder";
|
|
354
|
+
}
|
|
355
|
+
return name ? `folder:${name}` : "folder:.";
|
|
356
|
+
}
|
|
357
|
+
function entityTypeFromId(id) {
|
|
358
|
+
const type = id.split(":", 1)[0];
|
|
359
|
+
if (type === "tag" || type === "link" || type === "folder") {
|
|
360
|
+
return type;
|
|
361
|
+
}
|
|
362
|
+
throw new Error(`Unknown entity type in id: ${id}`);
|
|
363
|
+
}
|
|
364
|
+
function entityNameFromId(id) {
|
|
365
|
+
const separator = id.indexOf(":");
|
|
366
|
+
return separator === -1 ? id : id.slice(separator + 1);
|
|
367
|
+
}
|
|
368
|
+
function extractEntities(body, frontmatter, relativePath) {
|
|
369
|
+
const tags = new Set(frontmatterList(frontmatter, "tags"));
|
|
370
|
+
const links = /* @__PURE__ */ new Set();
|
|
371
|
+
const folders = /* @__PURE__ */ new Set(["."]);
|
|
372
|
+
for (const match of body.matchAll(/(^|[\s([{>])#([A-Za-z][A-Za-z0-9_/-]{1,64})\b/g)) {
|
|
373
|
+
const tag = normalizeTag(match[2] ?? "");
|
|
374
|
+
if (tag) {
|
|
375
|
+
tags.add(tag);
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
for (const match of body.matchAll(/\[\[([^\]|#]+)(?:#[^\]|]+)?(?:\|[^\]]+)?]]/g)) {
|
|
379
|
+
const link = normalizeEntityName("link", match[1] ?? "");
|
|
380
|
+
if (link) {
|
|
381
|
+
links.add(link);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
const dirname = path.dirname(relativePath).replace(/\\/g, "/");
|
|
385
|
+
if (dirname !== ".") {
|
|
386
|
+
const parts = dirname.split("/").filter(Boolean);
|
|
387
|
+
for (let index = 0; index < parts.length; index += 1) {
|
|
388
|
+
folders.add(parts.slice(0, index + 1).join("/"));
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
const entityIds = [
|
|
392
|
+
...[...tags].map((name) => entityId("tag", name)),
|
|
393
|
+
...[...links].map((name) => entityId("link", name)),
|
|
394
|
+
...[...folders].map((name) => entityId("folder", name))
|
|
395
|
+
];
|
|
396
|
+
return {
|
|
397
|
+
entityIds: [...new Set(entityIds)].sort(),
|
|
398
|
+
folders: [...folders].sort(),
|
|
399
|
+
links: [...links].sort(),
|
|
400
|
+
tags: [...tags].sort()
|
|
401
|
+
};
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// src/core/time.ts
|
|
405
|
+
var dayMs = 24 * 60 * 60 * 1e3;
|
|
406
|
+
function ageInDays(dateIso, now = /* @__PURE__ */ new Date()) {
|
|
407
|
+
const date = new Date(dateIso);
|
|
408
|
+
if (Number.isNaN(date.getTime())) {
|
|
409
|
+
return 0;
|
|
410
|
+
}
|
|
411
|
+
return Math.max(0, (now.getTime() - date.getTime()) / dayMs);
|
|
412
|
+
}
|
|
413
|
+
function recencyScore(dateIso, halfLifeDays, now = /* @__PURE__ */ new Date()) {
|
|
414
|
+
return Math.exp(-Math.log(2) * ageInDays(dateIso, now) / Math.max(1, halfLifeDays));
|
|
415
|
+
}
|
|
416
|
+
function quarterLabel(dateIso) {
|
|
417
|
+
const date = new Date(dateIso);
|
|
418
|
+
if (Number.isNaN(date.getTime())) {
|
|
419
|
+
return "unknown";
|
|
420
|
+
}
|
|
421
|
+
return `${date.getUTCFullYear()} Q${Math.floor(date.getUTCMonth() / 3) + 1}`;
|
|
422
|
+
}
|
|
423
|
+
function inferTrend(dates, now = /* @__PURE__ */ new Date()) {
|
|
424
|
+
if (dates.length === 0) {
|
|
425
|
+
return "stable";
|
|
426
|
+
}
|
|
427
|
+
const ages = dates.map((date) => ageInDays(date, now));
|
|
428
|
+
const recent = ages.filter((age) => age <= 90).length;
|
|
429
|
+
const previous = ages.filter((age) => age > 90 && age <= 365).length;
|
|
430
|
+
const freshest = Math.min(...ages);
|
|
431
|
+
if (recent >= Math.max(2, previous * 1.35)) {
|
|
432
|
+
return "rising";
|
|
433
|
+
}
|
|
434
|
+
if (freshest <= 120) {
|
|
435
|
+
return "active";
|
|
436
|
+
}
|
|
437
|
+
if (freshest > 365) {
|
|
438
|
+
return "dormant";
|
|
439
|
+
}
|
|
440
|
+
return "stable";
|
|
441
|
+
}
|
|
442
|
+
function frequencyInLastYear(dates, now = /* @__PURE__ */ new Date()) {
|
|
443
|
+
return dates.filter((date) => ageInDays(date, now) <= 365).length;
|
|
444
|
+
}
|
|
445
|
+
function minIso(dates) {
|
|
446
|
+
return dates.reduce((min, value) => value < min ? value : min, dates[0] ?? (/* @__PURE__ */ new Date(0)).toISOString());
|
|
447
|
+
}
|
|
448
|
+
function maxIso(dates) {
|
|
449
|
+
return dates.reduce((max, value) => value > max ? value : max, dates[0] ?? (/* @__PURE__ */ new Date(0)).toISOString());
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// src/core/vector.ts
|
|
453
|
+
function buildIdf(tokenSets) {
|
|
454
|
+
const docFrequency = /* @__PURE__ */ new Map();
|
|
455
|
+
for (const tokens of tokenSets) {
|
|
456
|
+
for (const token of new Set(tokens)) {
|
|
457
|
+
docFrequency.set(token, (docFrequency.get(token) ?? 0) + 1);
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
const count = Math.max(1, tokenSets.length);
|
|
461
|
+
const idf = {};
|
|
462
|
+
for (const [token, frequency] of docFrequency.entries()) {
|
|
463
|
+
idf[token] = Math.log((count + 1) / (frequency + 1)) + 1;
|
|
464
|
+
}
|
|
465
|
+
return idf;
|
|
466
|
+
}
|
|
467
|
+
function vectorizeText(text, idf) {
|
|
468
|
+
return vectorizeTokens(tokenize(text), idf);
|
|
469
|
+
}
|
|
470
|
+
function vectorizeTokens(tokens, idf) {
|
|
471
|
+
const counts = /* @__PURE__ */ new Map();
|
|
472
|
+
for (const token of tokens) {
|
|
473
|
+
counts.set(token, (counts.get(token) ?? 0) + 1);
|
|
474
|
+
}
|
|
475
|
+
const vector = {};
|
|
476
|
+
const defaultIdf = Object.keys(idf).length > 0 ? Math.max(...Object.values(idf)) : 1;
|
|
477
|
+
for (const [token, count] of counts.entries()) {
|
|
478
|
+
vector[token] = (1 + Math.log(count)) * (idf[token] ?? defaultIdf);
|
|
479
|
+
}
|
|
480
|
+
return normalizeVector(vector);
|
|
481
|
+
}
|
|
482
|
+
function normalizeVector(vector) {
|
|
483
|
+
const norm = Math.sqrt(Object.values(vector).reduce((sum, value) => sum + value * value, 0));
|
|
484
|
+
if (!norm) {
|
|
485
|
+
return vector;
|
|
486
|
+
}
|
|
487
|
+
const normalized = {};
|
|
488
|
+
for (const [term, value] of Object.entries(vector)) {
|
|
489
|
+
normalized[term] = value / norm;
|
|
490
|
+
}
|
|
491
|
+
return normalized;
|
|
492
|
+
}
|
|
493
|
+
function cosine(a, b) {
|
|
494
|
+
let score = 0;
|
|
495
|
+
const [small, large] = Object.keys(a).length < Object.keys(b).length ? [a, b] : [b, a];
|
|
496
|
+
for (const [term, value] of Object.entries(small)) {
|
|
497
|
+
score += value * (large[term] ?? 0);
|
|
498
|
+
}
|
|
499
|
+
return score;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// src/core/catalysts.ts
|
|
503
|
+
function buildCatalysts(entities, documents, chunks, idf) {
|
|
504
|
+
const documentById = new Map(documents.map((document) => [document.id, document]));
|
|
505
|
+
const catalysts = [];
|
|
506
|
+
for (const entity of entities) {
|
|
507
|
+
const entityChunks = chunks.filter((chunk) => chunk.entities.includes(entity.id));
|
|
508
|
+
if (entityChunks.length === 0) {
|
|
509
|
+
continue;
|
|
510
|
+
}
|
|
511
|
+
const entityDocuments = [
|
|
512
|
+
...new Map(
|
|
513
|
+
entityChunks.map((chunk) => documentById.get(chunk.docId)).filter((document) => Boolean(document)).map((document) => [document.id, document])
|
|
514
|
+
).values()
|
|
515
|
+
];
|
|
516
|
+
const texts = entityChunks.map((chunk) => chunk.content).join("\n");
|
|
517
|
+
const terms = pickTerms(texts, entity.topTerms);
|
|
518
|
+
const timeline = entityDocuments.map((document) => document.createdAt).sort();
|
|
519
|
+
const era = eraLabel(timeline);
|
|
520
|
+
const displayName = entityDisplayName(entity.type, entity.name);
|
|
521
|
+
const candidates = catalystTexts(displayName, entity.type, era, terms, entityChunks);
|
|
522
|
+
for (const text of candidates) {
|
|
523
|
+
const vector = vectorizeText(`${displayName} ${text} ${terms.join(" ")}`, idf);
|
|
524
|
+
const topChunks = entityChunks.map((chunk) => ({ chunkId: chunk.id, score: cosine(vector, chunk.vector) })).filter((match) => match.score > 0).sort((a, b) => b.score - a.score).slice(0, 24);
|
|
525
|
+
catalysts.push({
|
|
526
|
+
context: terms.slice(0, 5).join(" / "),
|
|
527
|
+
entityId: entity.id,
|
|
528
|
+
entityName: entity.name,
|
|
529
|
+
entityType: entity.type,
|
|
530
|
+
era,
|
|
531
|
+
id: catalystId(entity.id, text),
|
|
532
|
+
terms,
|
|
533
|
+
text,
|
|
534
|
+
topChunks,
|
|
535
|
+
vector
|
|
536
|
+
});
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
return catalysts;
|
|
540
|
+
}
|
|
541
|
+
function refreshCatalystTopChunks(catalysts, chunks) {
|
|
542
|
+
return catalysts.map((catalyst) => ({
|
|
543
|
+
...catalyst,
|
|
544
|
+
topChunks: chunks.map((chunk) => ({ chunkId: chunk.id, score: cosine(catalyst.vector, chunk.vector) })).filter((match) => match.score > 0).sort((a, b) => b.score - a.score).slice(0, 32)
|
|
545
|
+
}));
|
|
546
|
+
}
|
|
547
|
+
function pickTerms(text, seedTerms) {
|
|
548
|
+
const phrases = phraseTerms(text, 6).map((term) => term.replace(/\s+/g, "_"));
|
|
549
|
+
const words = topTerms(tokenize(text), 12);
|
|
550
|
+
return [.../* @__PURE__ */ new Set([...seedTerms, ...phrases, ...words])].slice(0, 10);
|
|
551
|
+
}
|
|
552
|
+
function catalystTexts(displayName, type, era, terms, chunks) {
|
|
553
|
+
const [a = "this thread", b = "nearby ideas", c = "older context", d = "new evidence"] = terms;
|
|
554
|
+
const scope = type === "folder" ? displayName : `the ${displayName} material`;
|
|
555
|
+
const sorted = [...chunks].sort((left, right) => left.createdAt.localeCompare(right.createdAt));
|
|
556
|
+
const earlyTerm = sorted.length > 1 ? topTerms(tokenize(sorted[0].content), 1)[0] ?? a : a;
|
|
557
|
+
const recentTerm = sorted.length > 1 ? topTerms(tokenize(sorted[sorted.length - 1].content), 1)[0] ?? b : b;
|
|
558
|
+
return [
|
|
559
|
+
`What keeps connecting ${readable(a)} with ${readable(b)} across ${scope}?`,
|
|
560
|
+
`Where does ${scope} show tension between ${readable(a)} and ${readable(c)}?`,
|
|
561
|
+
`How did ${scope} shift from ${readable(earlyTerm)} toward ${readable(recentTerm)} during ${era}?`,
|
|
562
|
+
`Which older notes about ${readable(c)} still matter when ${readable(d)} appears?`
|
|
563
|
+
].filter((text, index, all) => all.indexOf(text) === index);
|
|
564
|
+
}
|
|
565
|
+
function eraLabel(dates) {
|
|
566
|
+
if (dates.length === 0) {
|
|
567
|
+
return "unknown";
|
|
568
|
+
}
|
|
569
|
+
const first = quarterLabel(minIso(dates));
|
|
570
|
+
const last = quarterLabel(maxIso(dates));
|
|
571
|
+
return first === last ? first : `${first} to ${last}`;
|
|
572
|
+
}
|
|
573
|
+
function readable(term) {
|
|
574
|
+
return term.replace(/_/g, " ");
|
|
575
|
+
}
|
|
576
|
+
function catalystId(entityId2, text) {
|
|
577
|
+
const hash = crypto.createHash("sha1").update(`${entityId2}
|
|
578
|
+
${text}`).digest("hex").slice(0, 12);
|
|
579
|
+
return `cat_${hash}`;
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
// src/core/discover.ts
|
|
583
|
+
import { promises as fs } from "fs";
|
|
584
|
+
import path2 from "path";
|
|
585
|
+
async function discoverDocuments(rootPath, config) {
|
|
586
|
+
const root = path2.resolve(rootPath);
|
|
587
|
+
const files = await walk(root, config);
|
|
588
|
+
const sortedFiles = files.sort((a, b) => a.localeCompare(b)).slice(0, config.maxFiles);
|
|
589
|
+
const documents = await Promise.all(
|
|
590
|
+
sortedFiles.map(async (filePath) => readDocument(root, filePath))
|
|
591
|
+
);
|
|
592
|
+
return documents.filter((document) => document.wordCount > 0);
|
|
593
|
+
}
|
|
594
|
+
async function readDocument(root, filePath) {
|
|
595
|
+
const content = await fs.readFile(filePath, "utf8");
|
|
596
|
+
const stat = await fs.stat(filePath);
|
|
597
|
+
const relativePath = path2.relative(root, filePath).replace(/\\/g, "/");
|
|
598
|
+
const { frontmatter, body } = extractFrontmatter(content);
|
|
599
|
+
const cleanText = stripMarkdown(body);
|
|
600
|
+
const fallbackCreated = stat.birthtimeMs > 0 ? stat.birthtime : stat.mtime;
|
|
601
|
+
const createdAt = frontmatterDate(frontmatter, ["created", "created_at", "date"]) ?? fallbackCreated.toISOString();
|
|
602
|
+
const modifiedAt = frontmatterDate(frontmatter, ["modified", "updated", "updated_at"]) ?? stat.mtime.toISOString();
|
|
603
|
+
const extracted = extractEntities(body, frontmatter, relativePath);
|
|
604
|
+
return {
|
|
605
|
+
content: body,
|
|
606
|
+
createdAt,
|
|
607
|
+
entities: extracted.entityIds,
|
|
608
|
+
folders: extracted.folders,
|
|
609
|
+
frontmatter,
|
|
610
|
+
id: stableFileId(relativePath),
|
|
611
|
+
links: extracted.links,
|
|
612
|
+
modifiedAt,
|
|
613
|
+
path: filePath,
|
|
614
|
+
relativePath,
|
|
615
|
+
tags: extracted.tags,
|
|
616
|
+
title: markdownTitle(body, path2.basename(filePath)),
|
|
617
|
+
wordCount: splitWords(cleanText).length
|
|
618
|
+
};
|
|
619
|
+
}
|
|
620
|
+
async function walk(root, config) {
|
|
621
|
+
const files = [];
|
|
622
|
+
const excluded = new Set(config.excludedDirs);
|
|
623
|
+
const extensions = new Set(config.extensions.map((extension) => extension.toLowerCase()));
|
|
624
|
+
async function visit(directory) {
|
|
625
|
+
const entries = await fs.readdir(directory, { withFileTypes: true });
|
|
626
|
+
for (const entry of entries) {
|
|
627
|
+
const absolutePath = path2.join(directory, entry.name);
|
|
628
|
+
if (entry.isDirectory()) {
|
|
629
|
+
if (!excluded.has(entry.name)) {
|
|
630
|
+
await visit(absolutePath);
|
|
631
|
+
}
|
|
632
|
+
continue;
|
|
633
|
+
}
|
|
634
|
+
if (entry.isFile() && extensions.has(path2.extname(entry.name).toLowerCase())) {
|
|
635
|
+
files.push(absolutePath);
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
await visit(root);
|
|
640
|
+
return files;
|
|
641
|
+
}
|
|
642
|
+
function stableFileId(relativePath) {
|
|
643
|
+
return Buffer.from(relativePath).toString("base64url");
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// src/core/defaults.ts
|
|
647
|
+
var INDEX_VERSION = "0.1";
|
|
648
|
+
var STORE_DIR = ".cofactor";
|
|
649
|
+
var INDEX_FILE = "index.json";
|
|
650
|
+
var CONFIG_FILE = "config.json";
|
|
651
|
+
var TARGETS_DIR = "targets";
|
|
652
|
+
var defaultConfig = {
|
|
653
|
+
chunkOverlapWords: 48,
|
|
654
|
+
chunkSizeWords: 260,
|
|
655
|
+
excludedDirs: [
|
|
656
|
+
".git",
|
|
657
|
+
".hg",
|
|
658
|
+
".svn",
|
|
659
|
+
".cofactor",
|
|
660
|
+
".obsidian",
|
|
661
|
+
".trash",
|
|
662
|
+
".venv",
|
|
663
|
+
"coverage",
|
|
664
|
+
"dist",
|
|
665
|
+
"node_modules"
|
|
666
|
+
],
|
|
667
|
+
extensions: [".md", ".mdx", ".txt"],
|
|
668
|
+
maxFiles: 1024,
|
|
669
|
+
maxResults: 8,
|
|
670
|
+
minEntityDocuments: 1,
|
|
671
|
+
recencyHalfLifeDays: 180,
|
|
672
|
+
selectedEntityLimit: 48
|
|
673
|
+
};
|
|
674
|
+
function mergeConfig(overrides) {
|
|
675
|
+
if (!overrides) {
|
|
676
|
+
return { ...defaultConfig, excludedDirs: [...defaultConfig.excludedDirs] };
|
|
677
|
+
}
|
|
678
|
+
return {
|
|
679
|
+
...defaultConfig,
|
|
680
|
+
...overrides,
|
|
681
|
+
excludedDirs: overrides.excludedDirs ?? [...defaultConfig.excludedDirs],
|
|
682
|
+
extensions: overrides.extensions ?? [...defaultConfig.extensions]
|
|
683
|
+
};
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
// src/core/indexer.ts
|
|
687
|
+
async function createIndex(vaultPath, options = {}, kind = "vault", sourceVaultPath) {
|
|
688
|
+
const config = mergeConfig(options.config);
|
|
689
|
+
const root = path3.resolve(vaultPath);
|
|
690
|
+
const documents = await discoverDocuments(root, config);
|
|
691
|
+
const chunkShells = documents.flatMap((document) => documentChunks(document, config));
|
|
692
|
+
const tokenSets = chunkShells.map((chunk) => chunk.tokens);
|
|
693
|
+
const idf = buildIdf(tokenSets);
|
|
694
|
+
const chunks = chunkShells.map((chunk) => ({
|
|
695
|
+
...chunk,
|
|
696
|
+
vector: vectorizeTokens(chunk.tokens, idf)
|
|
697
|
+
}));
|
|
698
|
+
const entities = selectEntities(documents, chunks, idf, config.selectedEntityLimit);
|
|
699
|
+
const catalysts = buildCatalysts(entities, documents, chunks, idf);
|
|
700
|
+
const entityCatalystIds = /* @__PURE__ */ new Map();
|
|
701
|
+
for (const catalyst of catalysts) {
|
|
702
|
+
const list = entityCatalystIds.get(catalyst.entityId) ?? [];
|
|
703
|
+
list.push(catalyst.id);
|
|
704
|
+
entityCatalystIds.set(catalyst.entityId, list);
|
|
705
|
+
}
|
|
706
|
+
return {
|
|
707
|
+
appliedTargets: [],
|
|
708
|
+
catalysts,
|
|
709
|
+
chunks,
|
|
710
|
+
config,
|
|
711
|
+
documents,
|
|
712
|
+
entities: entities.map((entity) => ({
|
|
713
|
+
...entity,
|
|
714
|
+
catalystIds: entityCatalystIds.get(entity.id) ?? []
|
|
715
|
+
})),
|
|
716
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
717
|
+
idf,
|
|
718
|
+
kind,
|
|
719
|
+
sourceVaultPath,
|
|
720
|
+
stats: {
|
|
721
|
+
catalystCount: catalysts.length,
|
|
722
|
+
chunkCount: chunks.length,
|
|
723
|
+
documentCount: documents.length,
|
|
724
|
+
entityCount: entities.length
|
|
725
|
+
},
|
|
726
|
+
targetPath: kind === "target" ? root : void 0,
|
|
727
|
+
vaultPath: root,
|
|
728
|
+
version: INDEX_VERSION
|
|
729
|
+
};
|
|
730
|
+
}
|
|
731
|
+
function documentChunks(document, config) {
|
|
732
|
+
return chunkByWords(document.content, config.chunkSizeWords, config.chunkOverlapWords).map(
|
|
733
|
+
(content, ordinal) => ({
|
|
734
|
+
content,
|
|
735
|
+
createdAt: document.createdAt,
|
|
736
|
+
docId: document.id,
|
|
737
|
+
entities: document.entities,
|
|
738
|
+
filePath: document.path,
|
|
739
|
+
id: `${document.id}:${ordinal}`,
|
|
740
|
+
modifiedAt: document.modifiedAt,
|
|
741
|
+
ordinal,
|
|
742
|
+
relativePath: document.relativePath,
|
|
743
|
+
title: document.title,
|
|
744
|
+
tokens: tokenize(`${document.title} ${content}`)
|
|
745
|
+
})
|
|
746
|
+
);
|
|
747
|
+
}
|
|
748
|
+
function selectEntities(documents, chunks, idf, limit) {
|
|
749
|
+
const docsByEntity = /* @__PURE__ */ new Map();
|
|
750
|
+
const chunksByEntity = /* @__PURE__ */ new Map();
|
|
751
|
+
for (const document of documents) {
|
|
752
|
+
for (const entity of document.entities) {
|
|
753
|
+
const list = docsByEntity.get(entity) ?? [];
|
|
754
|
+
list.push(document);
|
|
755
|
+
docsByEntity.set(entity, list);
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
for (const chunk of chunks) {
|
|
759
|
+
for (const entity of chunk.entities) {
|
|
760
|
+
const list = chunksByEntity.get(entity) ?? [];
|
|
761
|
+
list.push(chunk);
|
|
762
|
+
chunksByEntity.set(entity, list);
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
const candidates = [];
|
|
766
|
+
for (const [id, entityDocuments] of docsByEntity.entries()) {
|
|
767
|
+
const entityChunks = chunksByEntity.get(id) ?? [];
|
|
768
|
+
const dates = entityDocuments.map((document) => document.createdAt);
|
|
769
|
+
const type = entityTypeFromId(id);
|
|
770
|
+
const name = entityNameFromId(id);
|
|
771
|
+
const entityText = entityChunks.map((chunk) => chunk.content).join("\n");
|
|
772
|
+
const terms = weightedTerms(entityText, idf);
|
|
773
|
+
const score = entityDocuments.length * 2 + entityChunks.length + frequencyInLastYear(dates) * 1.5 + recencyScore(maxIso(dates), 180) * 2;
|
|
774
|
+
candidates.push({
|
|
775
|
+
activityTrend: inferTrend(dates),
|
|
776
|
+
catalystIds: [],
|
|
777
|
+
chunkCount: entityChunks.length,
|
|
778
|
+
documentCount: entityDocuments.length,
|
|
779
|
+
firstSeen: minIso(dates),
|
|
780
|
+
frequency12m: frequencyInLastYear(dates),
|
|
781
|
+
id,
|
|
782
|
+
lastSeen: maxIso(dates),
|
|
783
|
+
name,
|
|
784
|
+
score,
|
|
785
|
+
topTerms: terms,
|
|
786
|
+
type
|
|
787
|
+
});
|
|
788
|
+
}
|
|
789
|
+
return candidates.sort((left, right) => right.score - left.score).slice(0, limit);
|
|
790
|
+
}
|
|
791
|
+
function weightedTerms(text, idf) {
|
|
792
|
+
const tokens = tokenize(text);
|
|
793
|
+
const counts = /* @__PURE__ */ new Map();
|
|
794
|
+
for (const token of tokens) {
|
|
795
|
+
counts.set(token, (counts.get(token) ?? 0) + 1);
|
|
796
|
+
}
|
|
797
|
+
const scored = [...counts.entries()].map(([token, count]) => [token, count * (idf[token] ?? 1)]).sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0])).slice(0, 12).map(([token]) => token);
|
|
798
|
+
return scored.length ? scored : topTerms(tokens, 8);
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
// src/core/store.ts
|
|
802
|
+
import { promises as fs2 } from "fs";
|
|
803
|
+
import path4 from "path";
|
|
804
|
+
function storePath(vaultPath) {
|
|
805
|
+
return path4.join(path4.resolve(vaultPath), STORE_DIR);
|
|
806
|
+
}
|
|
807
|
+
function indexPath(vaultPath) {
|
|
808
|
+
return path4.join(storePath(vaultPath), INDEX_FILE);
|
|
809
|
+
}
|
|
810
|
+
function configPath(vaultPath) {
|
|
811
|
+
return path4.join(storePath(vaultPath), CONFIG_FILE);
|
|
812
|
+
}
|
|
813
|
+
function targetsPath(vaultPath) {
|
|
814
|
+
return path4.join(storePath(vaultPath), TARGETS_DIR);
|
|
815
|
+
}
|
|
816
|
+
function targetIndexPath(vaultPath, targetId2) {
|
|
817
|
+
return path4.join(targetsPath(vaultPath), `${targetId2}.json`);
|
|
818
|
+
}
|
|
819
|
+
async function readConfig(vaultPath) {
|
|
820
|
+
try {
|
|
821
|
+
const raw = await fs2.readFile(configPath(vaultPath), "utf8");
|
|
822
|
+
return mergeConfig(JSON.parse(raw));
|
|
823
|
+
} catch (error) {
|
|
824
|
+
if (isNotFound(error)) {
|
|
825
|
+
return mergeConfig();
|
|
826
|
+
}
|
|
827
|
+
throw error;
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
async function writeConfig(vaultPath, config) {
|
|
831
|
+
await fs2.mkdir(storePath(vaultPath), { recursive: true });
|
|
832
|
+
await fs2.writeFile(configPath(vaultPath), `${JSON.stringify(config, null, 2)}
|
|
833
|
+
`);
|
|
834
|
+
}
|
|
835
|
+
async function readIndex(vaultPath) {
|
|
836
|
+
const raw = await fs2.readFile(indexPath(vaultPath), "utf8");
|
|
837
|
+
return JSON.parse(raw);
|
|
838
|
+
}
|
|
839
|
+
async function writeIndex(vaultPath, index) {
|
|
840
|
+
await fs2.mkdir(storePath(vaultPath), { recursive: true });
|
|
841
|
+
await fs2.writeFile(indexPath(vaultPath), `${JSON.stringify(index, null, 2)}
|
|
842
|
+
`);
|
|
843
|
+
}
|
|
844
|
+
async function readTargetIndex(vaultPath, target) {
|
|
845
|
+
const baseIndex = await readIndex(vaultPath);
|
|
846
|
+
const appliedTarget = resolveAppliedTarget(baseIndex.appliedTargets, target);
|
|
847
|
+
if (!appliedTarget) {
|
|
848
|
+
throw new Error(`No applied target matches "${target}". Run cofactor apply ${target} first.`);
|
|
849
|
+
}
|
|
850
|
+
const raw = await fs2.readFile(appliedTarget.indexPath, "utf8");
|
|
851
|
+
return JSON.parse(raw);
|
|
852
|
+
}
|
|
853
|
+
async function writeTargetIndex(vaultPath, targetId2, index) {
|
|
854
|
+
await fs2.mkdir(targetsPath(vaultPath), { recursive: true });
|
|
855
|
+
const filePath = targetIndexPath(vaultPath, targetId2);
|
|
856
|
+
await fs2.writeFile(filePath, `${JSON.stringify(index, null, 2)}
|
|
857
|
+
`);
|
|
858
|
+
return filePath;
|
|
859
|
+
}
|
|
860
|
+
function resolveAppliedTarget(targets, requested) {
|
|
861
|
+
const absolute = path4.resolve(requested);
|
|
862
|
+
return targets.find(
|
|
863
|
+
(target) => target.id === requested || target.path === requested || target.path === absolute || path4.basename(target.path) === requested
|
|
864
|
+
);
|
|
865
|
+
}
|
|
866
|
+
function isNotFound(error) {
|
|
867
|
+
return Boolean(error && typeof error === "object" && "code" in error && error.code === "ENOENT");
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
// src/core/apply.ts
|
|
871
|
+
import crypto2 from "crypto";
|
|
872
|
+
import path5 from "path";
|
|
873
|
+
async function applyCorpus(vaultPath, targetPathInput, options = {}) {
|
|
874
|
+
const targetPath = path5.resolve(targetPathInput);
|
|
875
|
+
const vaultIndex = await readIndex(vaultPath);
|
|
876
|
+
const config = options.config ? { ...vaultIndex.config, ...options.config } : await readConfig(vaultPath);
|
|
877
|
+
const targetIndex = await createIndex(targetPath, { config }, "target", path5.resolve(vaultPath));
|
|
878
|
+
targetIndex.idf = vaultIndex.idf;
|
|
879
|
+
targetIndex.chunks = targetIndex.chunks.map((chunk) => ({
|
|
880
|
+
...chunk,
|
|
881
|
+
vector: vectorizeTokens(chunk.tokens, vaultIndex.idf)
|
|
882
|
+
}));
|
|
883
|
+
targetIndex.catalysts = refreshCatalystTopChunks(vaultIndex.catalysts, targetIndex.chunks);
|
|
884
|
+
targetIndex.entities = [];
|
|
885
|
+
targetIndex.stats = {
|
|
886
|
+
...targetIndex.stats,
|
|
887
|
+
catalystCount: targetIndex.catalysts.length,
|
|
888
|
+
entityCount: 0
|
|
889
|
+
};
|
|
890
|
+
const id = targetId(targetPath);
|
|
891
|
+
const indexFilePath = await writeTargetIndex(vaultPath, id, targetIndex);
|
|
892
|
+
const appliedTarget = {
|
|
893
|
+
appliedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
894
|
+
docCount: targetIndex.documents.length,
|
|
895
|
+
id,
|
|
896
|
+
indexPath: indexFilePath,
|
|
897
|
+
path: targetPath
|
|
898
|
+
};
|
|
899
|
+
const remainingTargets = vaultIndex.appliedTargets.filter((target) => target.id !== id);
|
|
900
|
+
const nextVaultIndex = {
|
|
901
|
+
...vaultIndex,
|
|
902
|
+
appliedTargets: [...remainingTargets, appliedTarget].sort((a, b) => a.path.localeCompare(b.path))
|
|
903
|
+
};
|
|
904
|
+
await writeIndex(vaultPath, nextVaultIndex);
|
|
905
|
+
return {
|
|
906
|
+
target: appliedTarget,
|
|
907
|
+
targetIndex,
|
|
908
|
+
vaultIndex: nextVaultIndex
|
|
909
|
+
};
|
|
910
|
+
}
|
|
911
|
+
function targetId(targetPath) {
|
|
912
|
+
return crypto2.createHash("sha1").update(path5.resolve(targetPath)).digest("hex").slice(0, 12);
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
// src/core/agents.ts
|
|
916
|
+
import { promises as fs3 } from "fs";
|
|
917
|
+
import path6 from "path";
|
|
918
|
+
var startMarker = "<!-- cofactor:start -->";
|
|
919
|
+
var endMarker = "<!-- cofactor:end -->";
|
|
920
|
+
async function installAgentInstructions(vaultPathInput, agent) {
|
|
921
|
+
const vaultPath = path6.resolve(vaultPathInput);
|
|
922
|
+
const files = [];
|
|
923
|
+
if (agent === "codex") {
|
|
924
|
+
const agentsFile = path6.join(vaultPath, "AGENTS.md");
|
|
925
|
+
await upsertBlock(agentsFile, codexBlock());
|
|
926
|
+
files.push(agentsFile);
|
|
927
|
+
const skillDir = path6.join(vaultPath, ".agents", "skills", "cofactor");
|
|
928
|
+
await fs3.mkdir(skillDir, { recursive: true });
|
|
929
|
+
const skillFile = path6.join(skillDir, "SKILL.md");
|
|
930
|
+
await fs3.writeFile(skillFile, codexSkill());
|
|
931
|
+
files.push(skillFile);
|
|
932
|
+
return { files };
|
|
933
|
+
}
|
|
934
|
+
const claudeFile = path6.join(vaultPath, "CLAUDE.md");
|
|
935
|
+
await upsertBlock(claudeFile, claudeBlock());
|
|
936
|
+
files.push(claudeFile);
|
|
937
|
+
return { files };
|
|
938
|
+
}
|
|
939
|
+
async function upsertBlock(filePath, block) {
|
|
940
|
+
let existing = "";
|
|
941
|
+
try {
|
|
942
|
+
existing = await fs3.readFile(filePath, "utf8");
|
|
943
|
+
} catch (error) {
|
|
944
|
+
if (!isNotFound2(error)) {
|
|
945
|
+
throw error;
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
const wrapped = `${startMarker}
|
|
949
|
+
${block.trim()}
|
|
950
|
+
${endMarker}`;
|
|
951
|
+
const markerPattern = new RegExp(`${escapeRegex(startMarker)}[\\s\\S]*?${escapeRegex(endMarker)}`);
|
|
952
|
+
const next = markerPattern.test(existing) ? existing.replace(markerPattern, wrapped) : `${existing.trimEnd()}${existing.trim() ? "\n\n" : ""}${wrapped}
|
|
953
|
+
`;
|
|
954
|
+
await fs3.writeFile(filePath, next);
|
|
955
|
+
}
|
|
956
|
+
function codexBlock() {
|
|
957
|
+
return `
|
|
958
|
+
## Cofactor Memory
|
|
959
|
+
|
|
960
|
+
Use \`cofactor petri --json\` before broad research or implementation work in this repository to load the local knowledge graph. Use \`cofactor catalyze "<question>" --json\` when the current task needs relevant notes, decisions, or cross-corpus context. The same tools are exposed over stdio with \`cofactor mcp\`. The index lives in \`${STORE_DIR}/\`; refresh it with \`cofactor init\` after substantial note changes.
|
|
961
|
+
`;
|
|
962
|
+
}
|
|
963
|
+
function claudeBlock() {
|
|
964
|
+
return `
|
|
965
|
+
## Cofactor Memory
|
|
966
|
+
|
|
967
|
+
Before broad reasoning tasks, run \`cofactor petri --json\` to inspect local entities and catalysts. For a specific question, run \`cofactor catalyze "<question>" --json\` and use the returned catalysts plus documents as working memory. Claude Desktop can also run \`cofactor mcp\` as a stdio MCP server. Refresh the index with \`cofactor init\` when notes change.
|
|
968
|
+
`;
|
|
969
|
+
}
|
|
970
|
+
function codexSkill() {
|
|
971
|
+
return `---
|
|
972
|
+
name: cofactor
|
|
973
|
+
description: Use local Cofactor memory before repository research, planning, or implementation tasks.
|
|
974
|
+
---
|
|
975
|
+
|
|
976
|
+
Run \`cofactor petri --json\` to inspect indexed entities and catalysts for the current repository. Run \`cofactor catalyze "<question>" --json\` to retrieve the most relevant indexed notes for a specific task. Use \`cofactor mcp\` when a client supports Model Context Protocol tools over stdio.
|
|
977
|
+
`;
|
|
978
|
+
}
|
|
979
|
+
function escapeRegex(text) {
|
|
980
|
+
return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
981
|
+
}
|
|
982
|
+
function isNotFound2(error) {
|
|
983
|
+
return Boolean(error && typeof error === "object" && "code" in error && error.code === "ENOENT");
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
// src/core/mcp.ts
|
|
987
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
988
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
989
|
+
import { z } from "zod";
|
|
990
|
+
|
|
991
|
+
// src/core/search.ts
|
|
992
|
+
import path7 from "path";
|
|
993
|
+
async function catalyze(query, options = {}) {
|
|
994
|
+
const vaultPath = path7.resolve(options.vaultPath ?? process.cwd());
|
|
995
|
+
const index = options.target ? await readTargetIndex(vaultPath, options.target) : await readIndex(vaultPath);
|
|
996
|
+
return searchIndex(index, query, {
|
|
997
|
+
limit: options.limit ?? index.config.maxResults,
|
|
998
|
+
target: options.target,
|
|
999
|
+
vaultPath
|
|
1000
|
+
});
|
|
1001
|
+
}
|
|
1002
|
+
function searchIndex(index, query, options) {
|
|
1003
|
+
const queryVector = vectorizeText(query, index.idf);
|
|
1004
|
+
const catalystMatches = rankCatalysts(index.catalysts, queryVector).slice(0, 10);
|
|
1005
|
+
const catalystChunkScores = /* @__PURE__ */ new Map();
|
|
1006
|
+
for (const match of catalystMatches) {
|
|
1007
|
+
for (const linkedChunk of match.catalyst.topChunks) {
|
|
1008
|
+
const weightedScore = linkedChunk.score * match.score;
|
|
1009
|
+
const list = catalystChunkScores.get(linkedChunk.chunkId) ?? [];
|
|
1010
|
+
list.push({ catalyst: match.catalyst, score: weightedScore });
|
|
1011
|
+
catalystChunkScores.set(linkedChunk.chunkId, list);
|
|
1012
|
+
}
|
|
1013
|
+
}
|
|
1014
|
+
const scored = index.chunks.map((chunk) => scoreChunk(chunk, queryVector, catalystChunkScores.get(chunk.id) ?? [], index)).filter((result) => result.score > 0).sort((a, b) => b.score - a.score).slice(0, options.limit);
|
|
1015
|
+
return {
|
|
1016
|
+
catalysts: catalystMatches.slice(0, 5),
|
|
1017
|
+
query,
|
|
1018
|
+
results: scored.map((result) => ({
|
|
1019
|
+
catalystIds: result.catalystIds,
|
|
1020
|
+
content: snippet(result.chunk.content),
|
|
1021
|
+
filePath: result.chunk.filePath,
|
|
1022
|
+
score: Number(result.score.toFixed(6)),
|
|
1023
|
+
title: result.chunk.title
|
|
1024
|
+
})),
|
|
1025
|
+
target: options.target,
|
|
1026
|
+
vaultPath: options.vaultPath ?? index.vaultPath
|
|
1027
|
+
};
|
|
1028
|
+
}
|
|
1029
|
+
async function petri(vaultPathInput = process.cwd(), query) {
|
|
1030
|
+
const vaultPath = path7.resolve(vaultPathInput);
|
|
1031
|
+
const index = await readIndex(vaultPath);
|
|
1032
|
+
if (!query) {
|
|
1033
|
+
return {
|
|
1034
|
+
appliedTargets: index.appliedTargets,
|
|
1035
|
+
catalysts: index.catalysts.slice(0, 24),
|
|
1036
|
+
entities: index.entities,
|
|
1037
|
+
stats: index.stats,
|
|
1038
|
+
vaultPath
|
|
1039
|
+
};
|
|
1040
|
+
}
|
|
1041
|
+
const queryVector = vectorizeText(query, index.idf);
|
|
1042
|
+
const catalystMatches = rankCatalysts(index.catalysts, queryVector).slice(0, 20);
|
|
1043
|
+
const catalystIds = new Set(catalystMatches.map((match) => match.catalyst.id));
|
|
1044
|
+
const entityIds = new Set(catalystMatches.map((match) => match.catalyst.entityId));
|
|
1045
|
+
return {
|
|
1046
|
+
appliedTargets: index.appliedTargets,
|
|
1047
|
+
catalysts: index.catalysts.filter((catalyst) => catalystIds.has(catalyst.id)).slice(0, 20),
|
|
1048
|
+
entities: index.entities.filter((entity) => entityIds.has(entity.id)).slice(0, 12),
|
|
1049
|
+
query,
|
|
1050
|
+
stats: index.stats,
|
|
1051
|
+
vaultPath
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1054
|
+
function rankCatalysts(catalysts, queryVector) {
|
|
1055
|
+
return catalysts.map((catalyst) => ({
|
|
1056
|
+
catalyst,
|
|
1057
|
+
score: cosine(queryVector, catalyst.vector)
|
|
1058
|
+
})).filter((match) => match.score > 0).sort((a, b) => b.score - a.score);
|
|
1059
|
+
}
|
|
1060
|
+
function scoreChunk(chunk, queryVector, catalystMatches, index) {
|
|
1061
|
+
const directScore = cosine(queryVector, chunk.vector);
|
|
1062
|
+
const catalystScore = catalystMatches.reduce((max, match) => Math.max(max, match.score), 0);
|
|
1063
|
+
const recency = recencyScore(chunk.modifiedAt, index.config.recencyHalfLifeDays);
|
|
1064
|
+
const relevance = directScore + catalystScore;
|
|
1065
|
+
const score = relevance > 0 ? directScore * 0.58 + catalystScore * 0.92 + recency * 0.04 : 0;
|
|
1066
|
+
const catalystIds = [...new Set(catalystMatches.map((match) => match.catalyst.id))].slice(0, 5);
|
|
1067
|
+
return {
|
|
1068
|
+
catalystIds,
|
|
1069
|
+
chunk,
|
|
1070
|
+
score
|
|
1071
|
+
};
|
|
1072
|
+
}
|
|
1073
|
+
function formatSearchResults(response) {
|
|
1074
|
+
const payload = {
|
|
1075
|
+
catalysts: response.catalysts.map((match) => ({
|
|
1076
|
+
entity: match.catalyst.entityName,
|
|
1077
|
+
id: match.catalyst.id,
|
|
1078
|
+
score: Number(match.score.toFixed(6)),
|
|
1079
|
+
text: match.catalyst.text
|
|
1080
|
+
})),
|
|
1081
|
+
query: response.query,
|
|
1082
|
+
results: response.results,
|
|
1083
|
+
target: response.target,
|
|
1084
|
+
vaultPath: response.vaultPath
|
|
1085
|
+
};
|
|
1086
|
+
return JSON.stringify(payload, null, 2);
|
|
1087
|
+
}
|
|
1088
|
+
function formatPetri(response) {
|
|
1089
|
+
return JSON.stringify(response, null, 2);
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
// src/core/mcp.ts
|
|
1093
|
+
async function runMcpServer(defaultVaultPath = process.cwd()) {
|
|
1094
|
+
const server = new McpServer({
|
|
1095
|
+
name: "cofactor",
|
|
1096
|
+
version: "0.1.0"
|
|
1097
|
+
});
|
|
1098
|
+
server.registerTool(
|
|
1099
|
+
"petri",
|
|
1100
|
+
{
|
|
1101
|
+
description: "Inspect the local Cofactor entity/catalyst graph for working memory before broad reasoning tasks.",
|
|
1102
|
+
inputSchema: {
|
|
1103
|
+
query: z.string().optional().describe("Optional query for narrowing entities and catalysts."),
|
|
1104
|
+
vaultPath: z.string().optional().describe("Path to the indexed vault. Defaults to the server cwd.")
|
|
1105
|
+
}
|
|
1106
|
+
},
|
|
1107
|
+
async ({ query, vaultPath }) => {
|
|
1108
|
+
const response = await petri(vaultPath ?? defaultVaultPath, query);
|
|
1109
|
+
return {
|
|
1110
|
+
content: [
|
|
1111
|
+
{
|
|
1112
|
+
text: JSON.stringify(response, null, 2),
|
|
1113
|
+
type: "text"
|
|
1114
|
+
}
|
|
1115
|
+
]
|
|
1116
|
+
};
|
|
1117
|
+
}
|
|
1118
|
+
);
|
|
1119
|
+
server.registerTool(
|
|
1120
|
+
"catalyze",
|
|
1121
|
+
{
|
|
1122
|
+
description: "Search an indexed local corpus through precomputed catalyst questions and return relevant documents.",
|
|
1123
|
+
inputSchema: {
|
|
1124
|
+
limit: z.number().int().positive().optional().describe("Maximum number of results."),
|
|
1125
|
+
query: z.string().min(1).describe("Search question."),
|
|
1126
|
+
target: z.string().optional().describe("Optional applied target id, path, or basename."),
|
|
1127
|
+
vaultPath: z.string().optional().describe("Path to the indexed vault. Defaults to the server cwd.")
|
|
1128
|
+
}
|
|
1129
|
+
},
|
|
1130
|
+
async ({ limit, query, target, vaultPath }) => {
|
|
1131
|
+
const response = await catalyze(query, {
|
|
1132
|
+
limit,
|
|
1133
|
+
target,
|
|
1134
|
+
vaultPath: vaultPath ?? defaultVaultPath
|
|
1135
|
+
});
|
|
1136
|
+
return {
|
|
1137
|
+
content: [
|
|
1138
|
+
{
|
|
1139
|
+
text: JSON.stringify(response, null, 2),
|
|
1140
|
+
type: "text"
|
|
1141
|
+
}
|
|
1142
|
+
]
|
|
1143
|
+
};
|
|
1144
|
+
}
|
|
1145
|
+
);
|
|
1146
|
+
const transport = new StdioServerTransport();
|
|
1147
|
+
await server.connect(transport);
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
// src/index.ts
|
|
1151
|
+
async function indexVault(vaultPathInput = process.cwd(), options = {}) {
|
|
1152
|
+
const vaultPath = path8.resolve(vaultPathInput);
|
|
1153
|
+
const previousConfig = await readConfig(vaultPath);
|
|
1154
|
+
const config = options.config ? { ...previousConfig, ...options.config } : previousConfig;
|
|
1155
|
+
const index = await createIndex(vaultPath, { ...options, config });
|
|
1156
|
+
await writeConfig(vaultPath, config);
|
|
1157
|
+
await writeIndex(vaultPath, index);
|
|
1158
|
+
return index;
|
|
1159
|
+
}
|
|
1160
|
+
async function loadIndex(vaultPathInput = process.cwd()) {
|
|
1161
|
+
return readIndex(path8.resolve(vaultPathInput));
|
|
1162
|
+
}
|
|
1163
|
+
async function refreshVault(vaultPathInput = process.cwd(), options = {}) {
|
|
1164
|
+
return indexVault(vaultPathInput, options);
|
|
1165
|
+
}
|
|
1166
|
+
export {
|
|
1167
|
+
applyCorpus,
|
|
1168
|
+
catalyze,
|
|
1169
|
+
createIndex,
|
|
1170
|
+
formatPetri,
|
|
1171
|
+
formatSearchResults,
|
|
1172
|
+
indexVault,
|
|
1173
|
+
installAgentInstructions,
|
|
1174
|
+
loadIndex,
|
|
1175
|
+
petri,
|
|
1176
|
+
readConfig,
|
|
1177
|
+
readIndex,
|
|
1178
|
+
readTargetIndex,
|
|
1179
|
+
refreshVault,
|
|
1180
|
+
runMcpServer,
|
|
1181
|
+
searchIndex
|
|
1182
|
+
};
|
|
1183
|
+
//# sourceMappingURL=index.js.map
|