@danielblomma/cortex-mcp 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +203 -0
- package/bin/cortex.mjs +621 -0
- package/docs/MCP_MARKETPLACE.md +160 -0
- package/package.json +42 -0
- package/scaffold/.context/config.yaml +21 -0
- package/scaffold/.context/ontology.cypher +63 -0
- package/scaffold/.context/rules.yaml +25 -0
- package/scaffold/.githooks/_cortex-update-runner.sh +58 -0
- package/scaffold/.githooks/post-checkout +22 -0
- package/scaffold/.githooks/post-merge +14 -0
- package/scaffold/docs/architecture.md +22 -0
- package/scaffold/mcp/package-lock.json +2623 -0
- package/scaffold/mcp/package.json +29 -0
- package/scaffold/mcp/src/embed.ts +416 -0
- package/scaffold/mcp/src/embeddings.ts +192 -0
- package/scaffold/mcp/src/graph.ts +666 -0
- package/scaffold/mcp/src/loadGraph.ts +597 -0
- package/scaffold/mcp/src/paths.ts +33 -0
- package/scaffold/mcp/src/search.ts +412 -0
- package/scaffold/mcp/src/server.ts +98 -0
- package/scaffold/mcp/src/types.ts +109 -0
- package/scaffold/mcp/tests/server.test.mjs +60 -0
- package/scaffold/mcp/tsconfig.json +13 -0
- package/scaffold/scripts/bootstrap.sh +57 -0
- package/scaffold/scripts/capture-note.sh +55 -0
- package/scaffold/scripts/context.sh +109 -0
- package/scaffold/scripts/embed.sh +15 -0
- package/scaffold/scripts/ingest.mjs +1118 -0
- package/scaffold/scripts/ingest.sh +20 -0
- package/scaffold/scripts/install-git-hooks.sh +21 -0
- package/scaffold/scripts/load-kuzu.sh +6 -0
- package/scaffold/scripts/load-ryu.sh +18 -0
- package/scaffold/scripts/parsers/javascript.mjs +390 -0
- package/scaffold/scripts/parsers/package-lock.json +51 -0
- package/scaffold/scripts/parsers/package.json +17 -0
- package/scaffold/scripts/plan-state-engine.cjs +310 -0
- package/scaffold/scripts/plan-state.sh +71 -0
- package/scaffold/scripts/refresh.sh +9 -0
- package/scaffold/scripts/status.sh +282 -0
- package/scaffold/scripts/update-context.sh +18 -0
- package/scaffold/scripts/watch.sh +374 -0
|
@@ -0,0 +1,1118 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import crypto from "node:crypto";
|
|
3
|
+
import fs from "node:fs";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
6
|
+
import { execSync } from "node:child_process";
|
|
7
|
+
import { parseCode } from "./parsers/javascript.mjs";
|
|
8
|
+
|
|
9
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
10
|
+
const __dirname = path.dirname(__filename);
|
|
11
|
+
const REPO_ROOT = path.resolve(__dirname, "..");
|
|
12
|
+
const CONTEXT_DIR = path.join(REPO_ROOT, ".context");
|
|
13
|
+
const CACHE_DIR = path.join(CONTEXT_DIR, "cache");
|
|
14
|
+
const DB_IMPORT_DIR = path.join(CONTEXT_DIR, "db", "import");
|
|
15
|
+
|
|
16
|
+
const SUPPORTED_TEXT_EXTENSIONS = new Set([
|
|
17
|
+
".md",
|
|
18
|
+
".mdx",
|
|
19
|
+
".txt",
|
|
20
|
+
".adoc",
|
|
21
|
+
".rst",
|
|
22
|
+
".yaml",
|
|
23
|
+
".yml",
|
|
24
|
+
".json",
|
|
25
|
+
".toml",
|
|
26
|
+
".csv",
|
|
27
|
+
".ts",
|
|
28
|
+
".tsx",
|
|
29
|
+
".js",
|
|
30
|
+
".jsx",
|
|
31
|
+
".mjs",
|
|
32
|
+
".cjs",
|
|
33
|
+
".py",
|
|
34
|
+
".go",
|
|
35
|
+
".java",
|
|
36
|
+
".cs",
|
|
37
|
+
".rb",
|
|
38
|
+
".rs",
|
|
39
|
+
".php",
|
|
40
|
+
".swift",
|
|
41
|
+
".kt",
|
|
42
|
+
".sql",
|
|
43
|
+
".sh",
|
|
44
|
+
".bash",
|
|
45
|
+
".zsh",
|
|
46
|
+
".ps1",
|
|
47
|
+
".c",
|
|
48
|
+
".h",
|
|
49
|
+
".cpp",
|
|
50
|
+
".hpp",
|
|
51
|
+
".cc",
|
|
52
|
+
".hh"
|
|
53
|
+
]);
|
|
54
|
+
|
|
55
|
+
const SKIP_DIRECTORIES = new Set([
|
|
56
|
+
".git",
|
|
57
|
+
".idea",
|
|
58
|
+
".vscode",
|
|
59
|
+
"node_modules",
|
|
60
|
+
"dist",
|
|
61
|
+
"build",
|
|
62
|
+
"coverage",
|
|
63
|
+
".next",
|
|
64
|
+
".cache",
|
|
65
|
+
".context"
|
|
66
|
+
]);
|
|
67
|
+
|
|
68
|
+
const MAX_FILE_BYTES = 1024 * 1024;
|
|
69
|
+
const MAX_CONTENT_CHARS = 60000;
|
|
70
|
+
const MAX_BODY_CHARS = 12000;
|
|
71
|
+
const RULE_KEYWORD_LIMIT = 20;
|
|
72
|
+
|
|
73
|
+
const STOP_WORDS = new Set([
|
|
74
|
+
"the",
|
|
75
|
+
"and",
|
|
76
|
+
"for",
|
|
77
|
+
"with",
|
|
78
|
+
"from",
|
|
79
|
+
"that",
|
|
80
|
+
"this",
|
|
81
|
+
"must",
|
|
82
|
+
"when",
|
|
83
|
+
"where",
|
|
84
|
+
"into",
|
|
85
|
+
"used",
|
|
86
|
+
"using",
|
|
87
|
+
"only",
|
|
88
|
+
"true",
|
|
89
|
+
"false",
|
|
90
|
+
"unless",
|
|
91
|
+
"should",
|
|
92
|
+
"global",
|
|
93
|
+
"active",
|
|
94
|
+
"rule",
|
|
95
|
+
"rules",
|
|
96
|
+
"data",
|
|
97
|
+
"file",
|
|
98
|
+
"files",
|
|
99
|
+
"code",
|
|
100
|
+
"docs",
|
|
101
|
+
"context",
|
|
102
|
+
"och",
|
|
103
|
+
"det",
|
|
104
|
+
"att",
|
|
105
|
+
"som",
|
|
106
|
+
"med",
|
|
107
|
+
"för",
|
|
108
|
+
"utan",
|
|
109
|
+
"eller",
|
|
110
|
+
"inte",
|
|
111
|
+
"ska",
|
|
112
|
+
"skall",
|
|
113
|
+
"måste",
|
|
114
|
+
"kan",
|
|
115
|
+
"vid",
|
|
116
|
+
"alla"
|
|
117
|
+
]);
|
|
118
|
+
|
|
119
|
+
function parseArgs(argv) {
|
|
120
|
+
const args = new Set(argv.slice(2));
|
|
121
|
+
if (args.has("--help") || args.has("-h")) {
|
|
122
|
+
printHelp();
|
|
123
|
+
process.exit(0);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
mode: args.has("--changed") ? "changed" : "full",
|
|
128
|
+
verbose: args.has("--verbose")
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function printHelp() {
|
|
133
|
+
console.log("Usage: ./scripts/ingest.sh [--changed] [--verbose]");
|
|
134
|
+
console.log("");
|
|
135
|
+
console.log("Options:");
|
|
136
|
+
console.log(" --changed Ingest only changed/untracked files when git is available.");
|
|
137
|
+
console.log(" --verbose Print skipped files and additional diagnostics.");
|
|
138
|
+
console.log(" -h, --help Show this help message.");
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function ensureDirectory(directoryPath) {
|
|
142
|
+
fs.mkdirSync(directoryPath, { recursive: true });
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function isTextFile(relPath) {
|
|
146
|
+
const ext = path.extname(relPath).toLowerCase();
|
|
147
|
+
const base = path.basename(relPath).toLowerCase();
|
|
148
|
+
if (SUPPORTED_TEXT_EXTENSIONS.has(ext)) {
|
|
149
|
+
return true;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return base === "readme" || base.startsWith("readme.");
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function isBinaryBuffer(buffer) {
|
|
156
|
+
const scanLength = Math.min(buffer.length, 4000);
|
|
157
|
+
for (let index = 0; index < scanLength; index += 1) {
|
|
158
|
+
if (buffer[index] === 0) {
|
|
159
|
+
return true;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return false;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function toPosixPath(value) {
|
|
167
|
+
return value.split(path.sep).join("/");
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function normalizeToken(value) {
|
|
171
|
+
return value.toLowerCase().replace(/[^a-z0-9]/g, "");
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function tokenizeKeywords(value) {
|
|
175
|
+
return value
|
|
176
|
+
.toLowerCase()
|
|
177
|
+
.split(/[^a-z0-9]+/g)
|
|
178
|
+
.map((token) => token.trim())
|
|
179
|
+
.filter((token) => token.length >= 3 && !STOP_WORDS.has(token));
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function uniqueSorted(values) {
|
|
183
|
+
return [...new Set(values)].sort();
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function parseSourcePaths(configText) {
|
|
187
|
+
const sourcePaths = [];
|
|
188
|
+
const lines = configText.split(/\r?\n/);
|
|
189
|
+
let inSourcePaths = false;
|
|
190
|
+
|
|
191
|
+
for (const line of lines) {
|
|
192
|
+
if (!inSourcePaths && /^source_paths:\s*$/.test(line.trim())) {
|
|
193
|
+
inSourcePaths = true;
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
if (!inSourcePaths) {
|
|
198
|
+
continue;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const entryMatch = line.match(/^\s*-\s*(.+?)\s*$/);
|
|
202
|
+
if (entryMatch) {
|
|
203
|
+
const unquoted = entryMatch[1].replace(/^['"]|['"]$/g, "");
|
|
204
|
+
sourcePaths.push(unquoted);
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (line.trim() !== "" && !/^\s/.test(line)) {
|
|
209
|
+
break;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return sourcePaths;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function parseRules(rulesText) {
|
|
217
|
+
const lines = rulesText.split(/\r?\n/);
|
|
218
|
+
const rules = [];
|
|
219
|
+
let current = null;
|
|
220
|
+
|
|
221
|
+
const pushCurrent = () => {
|
|
222
|
+
if (!current || !current.id) {
|
|
223
|
+
return;
|
|
224
|
+
}
|
|
225
|
+
rules.push({
|
|
226
|
+
id: current.id,
|
|
227
|
+
description: current.description ?? "",
|
|
228
|
+
priority: Number.isFinite(current.priority) ? current.priority : 0,
|
|
229
|
+
enforce: current.enforce === true
|
|
230
|
+
});
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
for (const line of lines) {
|
|
234
|
+
const idMatch = line.match(/^\s*-\s*id:\s*(.+?)\s*$/);
|
|
235
|
+
if (idMatch) {
|
|
236
|
+
pushCurrent();
|
|
237
|
+
current = { id: idMatch[1].replace(/^['"]|['"]$/g, "") };
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if (!current) {
|
|
242
|
+
continue;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const descriptionMatch = line.match(/^\s*description:\s*(.+?)\s*$/);
|
|
246
|
+
if (descriptionMatch) {
|
|
247
|
+
current.description = descriptionMatch[1].replace(/^['"]|['"]$/g, "");
|
|
248
|
+
continue;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
const priorityMatch = line.match(/^\s*priority:\s*(\d+)\s*$/);
|
|
252
|
+
if (priorityMatch) {
|
|
253
|
+
current.priority = Number(priorityMatch[1]);
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const enforceMatch = line.match(/^\s*enforce:\s*(true|false)\s*$/i);
|
|
258
|
+
if (enforceMatch) {
|
|
259
|
+
current.enforce = enforceMatch[1].toLowerCase() === "true";
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
pushCurrent();
|
|
264
|
+
return rules;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function walkDirectory(directoryPath, files) {
|
|
268
|
+
const entries = fs.readdirSync(directoryPath, { withFileTypes: true });
|
|
269
|
+
for (const entry of entries) {
|
|
270
|
+
if (entry.isDirectory() && SKIP_DIRECTORIES.has(entry.name)) {
|
|
271
|
+
continue;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const absolutePath = path.join(directoryPath, entry.name);
|
|
275
|
+
if (entry.isDirectory()) {
|
|
276
|
+
walkDirectory(absolutePath, files);
|
|
277
|
+
continue;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (entry.isFile()) {
|
|
281
|
+
files.add(absolutePath);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function hasSourcePrefix(relPath, sourcePaths) {
|
|
287
|
+
return sourcePaths.some((sourcePath) => {
|
|
288
|
+
const source = toPosixPath(sourcePath).replace(/\/+$/, "");
|
|
289
|
+
return relPath === source || relPath.startsWith(`${source}/`);
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
function getGitChanges() {
|
|
294
|
+
try {
|
|
295
|
+
const output = execSync("git status --porcelain", {
|
|
296
|
+
cwd: REPO_ROOT,
|
|
297
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
298
|
+
encoding: "utf8"
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
const changed = new Set();
|
|
302
|
+
const deleted = new Set();
|
|
303
|
+
|
|
304
|
+
for (const line of output.split(/\r?\n/)) {
|
|
305
|
+
if (!line) continue;
|
|
306
|
+
const status = line.slice(0, 2);
|
|
307
|
+
const payload = line.slice(3).trim();
|
|
308
|
+
if (!payload) continue;
|
|
309
|
+
|
|
310
|
+
if (payload.includes(" -> ")) {
|
|
311
|
+
const [fromPath, toPath] = payload.split(" -> ");
|
|
312
|
+
deleted.add(path.resolve(REPO_ROOT, fromPath));
|
|
313
|
+
changed.add(path.resolve(REPO_ROOT, toPath));
|
|
314
|
+
continue;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
const absolutePath = path.resolve(REPO_ROOT, payload);
|
|
318
|
+
if (status.includes("D")) {
|
|
319
|
+
deleted.add(absolutePath);
|
|
320
|
+
} else {
|
|
321
|
+
changed.add(absolutePath);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
return {
|
|
326
|
+
changed: [...changed],
|
|
327
|
+
deleted: [...deleted]
|
|
328
|
+
};
|
|
329
|
+
} catch {
|
|
330
|
+
return {
|
|
331
|
+
changed: [],
|
|
332
|
+
deleted: []
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
function collectCandidateFiles(sourcePaths, mode) {
|
|
338
|
+
const candidates = new Set();
|
|
339
|
+
const deletedRelPaths = new Set();
|
|
340
|
+
|
|
341
|
+
if (mode === "changed") {
|
|
342
|
+
const gitChanges = getGitChanges();
|
|
343
|
+
if (gitChanges.changed.length > 0 || gitChanges.deleted.length > 0) {
|
|
344
|
+
for (const absolutePath of gitChanges.changed) {
|
|
345
|
+
if (!fs.existsSync(absolutePath)) {
|
|
346
|
+
continue;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
const stats = fs.statSync(absolutePath);
|
|
350
|
+
if (stats.isFile()) {
|
|
351
|
+
const relPath = toPosixPath(path.relative(REPO_ROOT, absolutePath));
|
|
352
|
+
if (hasSourcePrefix(relPath, sourcePaths)) {
|
|
353
|
+
candidates.add(absolutePath);
|
|
354
|
+
}
|
|
355
|
+
continue;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (stats.isDirectory()) {
|
|
359
|
+
const nestedFiles = new Set();
|
|
360
|
+
walkDirectory(absolutePath, nestedFiles);
|
|
361
|
+
for (const nestedPath of nestedFiles) {
|
|
362
|
+
const nestedRelPath = toPosixPath(path.relative(REPO_ROOT, nestedPath));
|
|
363
|
+
if (hasSourcePrefix(nestedRelPath, sourcePaths)) {
|
|
364
|
+
candidates.add(nestedPath);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
for (const deletedPath of gitChanges.deleted) {
|
|
371
|
+
const relPath = toPosixPath(path.relative(REPO_ROOT, deletedPath));
|
|
372
|
+
if (hasSourcePrefix(relPath, sourcePaths)) {
|
|
373
|
+
deletedRelPaths.add(relPath);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
return {
|
|
378
|
+
candidates,
|
|
379
|
+
incrementalMode: true,
|
|
380
|
+
deletedRelPaths: [...deletedRelPaths]
|
|
381
|
+
};
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
for (const sourcePath of sourcePaths) {
|
|
386
|
+
const absoluteSourcePath = path.resolve(REPO_ROOT, sourcePath);
|
|
387
|
+
if (!fs.existsSync(absoluteSourcePath)) {
|
|
388
|
+
continue;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
const stats = fs.statSync(absoluteSourcePath);
|
|
392
|
+
if (stats.isFile()) {
|
|
393
|
+
candidates.add(absoluteSourcePath);
|
|
394
|
+
continue;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
if (stats.isDirectory()) {
|
|
398
|
+
walkDirectory(absoluteSourcePath, candidates);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
return {
|
|
403
|
+
candidates,
|
|
404
|
+
incrementalMode: false,
|
|
405
|
+
deletedRelPaths: []
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
function detectKind(relPath) {
|
|
410
|
+
const lower = relPath.toLowerCase();
|
|
411
|
+
const ext = path.extname(lower);
|
|
412
|
+
const isAdrPath =
|
|
413
|
+
/(^|\/)(adr|adrs|decisions)(\/|$)/.test(lower) ||
|
|
414
|
+
/(^|\/)adr[-_ ]?\d+/.test(path.basename(lower));
|
|
415
|
+
|
|
416
|
+
if (isAdrPath) {
|
|
417
|
+
return "ADR";
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
if (
|
|
421
|
+
lower.startsWith("docs/") ||
|
|
422
|
+
ext === ".md" ||
|
|
423
|
+
ext === ".mdx" ||
|
|
424
|
+
ext === ".txt" ||
|
|
425
|
+
ext === ".adoc" ||
|
|
426
|
+
ext === ".rst"
|
|
427
|
+
) {
|
|
428
|
+
return "DOC";
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
return "CODE";
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
function trustLevelForKind(kind) {
|
|
435
|
+
if (kind === "ADR") return 95;
|
|
436
|
+
if (kind === "CODE") return 80;
|
|
437
|
+
return 70;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
function checksum(buffer) {
|
|
441
|
+
return crypto.createHash("sha256").update(buffer).digest("hex");
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
function normalizeWhitespace(value) {
|
|
445
|
+
return value.replace(/\s+/g, " ").trim();
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
function extractTitle(content, fallbackTitle) {
|
|
449
|
+
const lines = content.split(/\r?\n/);
|
|
450
|
+
for (const line of lines) {
|
|
451
|
+
const match = line.match(/^#\s+(.+)\s*$/);
|
|
452
|
+
if (match) return match[1].trim();
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
return fallbackTitle;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
function parseDecisionDate(content, fallbackDate) {
|
|
459
|
+
const datePatterns = [
|
|
460
|
+
/^\s*date:\s*["']?(\d{4}-\d{2}-\d{2})["']?\s*$/im,
|
|
461
|
+
/^\s*decision[_\s-]*date:\s*["']?(\d{4}-\d{2}-\d{2})["']?\s*$/im
|
|
462
|
+
];
|
|
463
|
+
|
|
464
|
+
for (const pattern of datePatterns) {
|
|
465
|
+
const match = content.match(pattern);
|
|
466
|
+
if (match && !Number.isNaN(Date.parse(match[1]))) {
|
|
467
|
+
return match[1];
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
return fallbackDate.slice(0, 10);
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
function adrTokens(adrRecord) {
|
|
475
|
+
const fileBase = path.basename(adrRecord.path).replace(path.extname(adrRecord.path), "");
|
|
476
|
+
const tokens = new Set([
|
|
477
|
+
normalizeToken(adrRecord.id),
|
|
478
|
+
normalizeToken(fileBase),
|
|
479
|
+
normalizeToken(adrRecord.title)
|
|
480
|
+
]);
|
|
481
|
+
|
|
482
|
+
const numberMatch = fileBase.match(/(\d+)/);
|
|
483
|
+
if (numberMatch) {
|
|
484
|
+
tokens.add(normalizeToken(`adr-${numberMatch[1]}`));
|
|
485
|
+
tokens.add(normalizeToken(numberMatch[1]));
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
return [...tokens].filter(Boolean);
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
function findSupersedesReferences(content) {
|
|
492
|
+
const refs = new Set();
|
|
493
|
+
const pattern = /(?:supersedes|ersätter)\s*[:\-]?\s*([A-Za-z0-9._/-]+)/gi;
|
|
494
|
+
let match;
|
|
495
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
496
|
+
refs.add(match[1]);
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
return [...refs];
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
function writeJsonl(filePath, records) {
|
|
503
|
+
const body = records.map((record) => JSON.stringify(record)).join("\n");
|
|
504
|
+
fs.writeFileSync(filePath, body ? `${body}\n` : "", "utf8");
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
function sanitizeTsvCell(value) {
|
|
508
|
+
if (value === null || value === undefined) return "";
|
|
509
|
+
return String(value).replace(/\t/g, " ").replace(/\r?\n/g, " ");
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
function writeTsv(filePath, headers, rows) {
|
|
513
|
+
const lines = [headers.join("\t")];
|
|
514
|
+
for (const row of rows) {
|
|
515
|
+
lines.push(row.map((value) => sanitizeTsvCell(value)).join("\t"));
|
|
516
|
+
}
|
|
517
|
+
fs.writeFileSync(filePath, `${lines.join("\n")}\n`, "utf8");
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
function readJsonlSafe(filePath) {
|
|
521
|
+
if (!fs.existsSync(filePath)) {
|
|
522
|
+
return [];
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
return fs
|
|
526
|
+
.readFileSync(filePath, "utf8")
|
|
527
|
+
.split(/\r?\n/)
|
|
528
|
+
.map((line) => line.trim())
|
|
529
|
+
.filter(Boolean)
|
|
530
|
+
.map((line) => {
|
|
531
|
+
try {
|
|
532
|
+
return JSON.parse(line);
|
|
533
|
+
} catch {
|
|
534
|
+
return null;
|
|
535
|
+
}
|
|
536
|
+
})
|
|
537
|
+
.filter((record) => record !== null);
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
function normalizeRuleTokens(ruleRecord) {
|
|
541
|
+
const idParts = ruleRecord.id.split(/[._-]+/g);
|
|
542
|
+
const descriptionTokens = tokenizeKeywords(ruleRecord.body);
|
|
543
|
+
const rawKeywords = [...idParts, ...descriptionTokens];
|
|
544
|
+
const normalized = rawKeywords
|
|
545
|
+
.map((token) => token.toLowerCase().replace(/[^a-z0-9]/g, ""))
|
|
546
|
+
.filter((token) => token.length >= 3 && !STOP_WORDS.has(token));
|
|
547
|
+
|
|
548
|
+
return uniqueSorted(normalized).slice(0, RULE_KEYWORD_LIMIT);
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
function fileTokenSet(fileRecord) {
|
|
552
|
+
const tokenSource = `${fileRecord.path}\n${fileRecord.content.slice(0, 12000)}`;
|
|
553
|
+
return new Set(tokenizeKeywords(tokenSource));
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
function chunkIdFor(filePath, chunk) {
|
|
557
|
+
const startLine = Number.isFinite(chunk.startLine) ? chunk.startLine : 0;
|
|
558
|
+
const endLine = Number.isFinite(chunk.endLine) ? chunk.endLine : startLine;
|
|
559
|
+
return `chunk:${filePath}:${chunk.name}:${startLine}-${endLine}`;
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
function main() {
|
|
563
|
+
const { mode, verbose } = parseArgs(process.argv);
|
|
564
|
+
const configPath = path.join(CONTEXT_DIR, "config.yaml");
|
|
565
|
+
const rulesPath = path.join(CONTEXT_DIR, "rules.yaml");
|
|
566
|
+
|
|
567
|
+
if (!fs.existsSync(configPath)) {
|
|
568
|
+
throw new Error(`Missing config: ${configPath}`);
|
|
569
|
+
}
|
|
570
|
+
if (!fs.existsSync(rulesPath)) {
|
|
571
|
+
throw new Error(`Missing rules: ${rulesPath}`);
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
ensureDirectory(CACHE_DIR);
|
|
575
|
+
ensureDirectory(DB_IMPORT_DIR);
|
|
576
|
+
|
|
577
|
+
const configText = fs.readFileSync(configPath, "utf8");
|
|
578
|
+
const sourcePaths = parseSourcePaths(configText);
|
|
579
|
+
if (sourcePaths.length === 0) {
|
|
580
|
+
throw new Error("No source_paths found in .context/config.yaml");
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
const rules = parseRules(fs.readFileSync(rulesPath, "utf8"));
|
|
584
|
+
const { candidates, incrementalMode, deletedRelPaths } = collectCandidateFiles(sourcePaths, mode);
|
|
585
|
+
|
|
586
|
+
const fileRecordMap = new Map();
|
|
587
|
+
const adrRecordMap = new Map();
|
|
588
|
+
const skipped = {
|
|
589
|
+
unsupported: 0,
|
|
590
|
+
tooLarge: 0,
|
|
591
|
+
binary: 0
|
|
592
|
+
};
|
|
593
|
+
|
|
594
|
+
if (incrementalMode) {
|
|
595
|
+
const existingFiles = readJsonlSafe(path.join(CACHE_DIR, "entities.file.jsonl"));
|
|
596
|
+
for (const record of existingFiles) {
|
|
597
|
+
if (!record || typeof record !== "object") continue;
|
|
598
|
+
const filePath = toPosixPath(String(record.path ?? ""));
|
|
599
|
+
if (!filePath || !hasSourcePrefix(filePath, sourcePaths)) {
|
|
600
|
+
continue;
|
|
601
|
+
}
|
|
602
|
+
const absolutePath = path.resolve(REPO_ROOT, filePath);
|
|
603
|
+
if (!fs.existsSync(absolutePath)) {
|
|
604
|
+
continue;
|
|
605
|
+
}
|
|
606
|
+
fileRecordMap.set(String(record.id ?? `file:${filePath}`), {
|
|
607
|
+
...record,
|
|
608
|
+
id: String(record.id ?? `file:${filePath}`),
|
|
609
|
+
path: filePath,
|
|
610
|
+
kind: String(record.kind ?? detectKind(filePath)),
|
|
611
|
+
content: String(record.content ?? "")
|
|
612
|
+
});
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
const existingAdrs = readJsonlSafe(path.join(CACHE_DIR, "entities.adr.jsonl"));
|
|
616
|
+
for (const adr of existingAdrs) {
|
|
617
|
+
if (!adr || typeof adr !== "object") continue;
|
|
618
|
+
const adrPath = toPosixPath(String(adr.path ?? ""));
|
|
619
|
+
if (!adrPath || !hasSourcePrefix(adrPath, sourcePaths)) {
|
|
620
|
+
continue;
|
|
621
|
+
}
|
|
622
|
+
if (!fs.existsSync(path.resolve(REPO_ROOT, adrPath))) {
|
|
623
|
+
continue;
|
|
624
|
+
}
|
|
625
|
+
adrRecordMap.set(String(adr.id ?? ""), {
|
|
626
|
+
...adr,
|
|
627
|
+
id: String(adr.id ?? ""),
|
|
628
|
+
path: adrPath
|
|
629
|
+
});
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
for (const relPath of deletedRelPaths) {
|
|
634
|
+
fileRecordMap.delete(`file:${relPath}`);
|
|
635
|
+
const relPrefix = relPath.endsWith("/") ? relPath : `${relPath}/`;
|
|
636
|
+
for (const [fileId, fileRecord] of fileRecordMap.entries()) {
|
|
637
|
+
if (String(fileRecord.path ?? "").startsWith(relPrefix)) {
|
|
638
|
+
fileRecordMap.delete(fileId);
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
for (const [adrId, adrRecord] of adrRecordMap.entries()) {
|
|
643
|
+
if (adrRecord.path === relPath || String(adrRecord.path ?? "").startsWith(relPrefix)) {
|
|
644
|
+
adrRecordMap.delete(adrId);
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
for (const absolutePath of [...candidates].sort()) {
|
|
650
|
+
const relPath = toPosixPath(path.relative(REPO_ROOT, absolutePath));
|
|
651
|
+
if (!isTextFile(relPath)) {
|
|
652
|
+
skipped.unsupported += 1;
|
|
653
|
+
if (verbose) console.log(`[ingest] skip unsupported: ${relPath}`);
|
|
654
|
+
continue;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
const stats = fs.statSync(absolutePath);
|
|
658
|
+
if (stats.size > MAX_FILE_BYTES) {
|
|
659
|
+
skipped.tooLarge += 1;
|
|
660
|
+
if (verbose) console.log(`[ingest] skip large: ${relPath}`);
|
|
661
|
+
continue;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
const buffer = fs.readFileSync(absolutePath);
|
|
665
|
+
if (isBinaryBuffer(buffer)) {
|
|
666
|
+
skipped.binary += 1;
|
|
667
|
+
if (verbose) console.log(`[ingest] skip binary: ${relPath}`);
|
|
668
|
+
continue;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
const content = buffer.toString("utf8");
|
|
672
|
+
const kind = detectKind(relPath);
|
|
673
|
+
const id = `file:${relPath}`;
|
|
674
|
+
const updatedAt = stats.mtime.toISOString();
|
|
675
|
+
const sourceOfTruth = kind === "ADR";
|
|
676
|
+
const trustLevel = trustLevelForKind(kind);
|
|
677
|
+
|
|
678
|
+
const fileRecord = {
|
|
679
|
+
id,
|
|
680
|
+
path: relPath,
|
|
681
|
+
kind,
|
|
682
|
+
checksum: checksum(buffer),
|
|
683
|
+
updated_at: updatedAt,
|
|
684
|
+
source_of_truth: sourceOfTruth,
|
|
685
|
+
trust_level: trustLevel,
|
|
686
|
+
status: "active",
|
|
687
|
+
size_bytes: stats.size,
|
|
688
|
+
excerpt: normalizeWhitespace(content).slice(0, 500),
|
|
689
|
+
content: content.slice(0, MAX_CONTENT_CHARS)
|
|
690
|
+
};
|
|
691
|
+
fileRecordMap.set(fileRecord.id, fileRecord);
|
|
692
|
+
|
|
693
|
+
if (kind === "ADR") {
|
|
694
|
+
const title = extractTitle(content, path.basename(relPath, path.extname(relPath)));
|
|
695
|
+
const adrRecord = {
|
|
696
|
+
id: `adr:${path.basename(relPath, path.extname(relPath)).toLowerCase()}`,
|
|
697
|
+
path: relPath,
|
|
698
|
+
title,
|
|
699
|
+
body: content.slice(0, MAX_BODY_CHARS),
|
|
700
|
+
decision_date: parseDecisionDate(content, updatedAt),
|
|
701
|
+
supersedes_id: "",
|
|
702
|
+
source_of_truth: true,
|
|
703
|
+
trust_level: 95,
|
|
704
|
+
status: "active"
|
|
705
|
+
};
|
|
706
|
+
adrRecordMap.set(adrRecord.id, adrRecord);
|
|
707
|
+
} else {
|
|
708
|
+
for (const [adrId, adrRecord] of adrRecordMap.entries()) {
|
|
709
|
+
if (adrRecord.path === relPath) {
|
|
710
|
+
adrRecordMap.delete(adrId);
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
const fileRecords = [...fileRecordMap.values()].sort((a, b) => a.path.localeCompare(b.path));
|
|
717
|
+
const adrRecords = [...adrRecordMap.values()].sort((a, b) => a.path.localeCompare(b.path));
|
|
718
|
+
|
|
719
|
+
// Extract chunks from code files
|
|
720
|
+
const chunkRecords = [];
|
|
721
|
+
const definesRelations = [];
|
|
722
|
+
const callsRelations = [];
|
|
723
|
+
const importsRelations = [];
|
|
724
|
+
|
|
725
|
+
for (const fileRecord of fileRecords) {
|
|
726
|
+
if (fileRecord.kind !== "CODE") continue;
|
|
727
|
+
|
|
728
|
+
const ext = path.extname(fileRecord.path).toLowerCase();
|
|
729
|
+
const supportedForChunking = [".js", ".mjs", ".cjs", ".ts"].includes(ext);
|
|
730
|
+
if (!supportedForChunking) continue;
|
|
731
|
+
|
|
732
|
+
try {
|
|
733
|
+
const language = ext === ".ts" ? "typescript" : "javascript";
|
|
734
|
+
const parseResult = parseCode(fileRecord.content, fileRecord.path, language);
|
|
735
|
+
|
|
736
|
+
if (parseResult.errors.length > 0 && verbose) {
|
|
737
|
+
console.log(`[ingest] parse errors in ${fileRecord.path}:`, parseResult.errors[0].message);
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
const parsedChunks = [];
|
|
741
|
+
const chunkIdsByName = new Map();
|
|
742
|
+
|
|
743
|
+
for (const chunk of parseResult.chunks) {
|
|
744
|
+
const chunkId = chunkIdFor(fileRecord.path, chunk);
|
|
745
|
+
parsedChunks.push({ chunk, chunkId });
|
|
746
|
+
if (!chunkIdsByName.has(chunk.name)) {
|
|
747
|
+
chunkIdsByName.set(chunk.name, []);
|
|
748
|
+
}
|
|
749
|
+
chunkIdsByName.get(chunk.name).push(chunkId);
|
|
750
|
+
|
|
751
|
+
const chunkRecord = {
|
|
752
|
+
id: chunkId,
|
|
753
|
+
file_id: fileRecord.id,
|
|
754
|
+
name: chunk.name,
|
|
755
|
+
kind: chunk.kind,
|
|
756
|
+
signature: chunk.signature,
|
|
757
|
+
body: chunk.body.slice(0, 12000), // Limit chunk body size
|
|
758
|
+
start_line: chunk.startLine,
|
|
759
|
+
end_line: chunk.endLine,
|
|
760
|
+
language: chunk.language,
|
|
761
|
+
checksum: checksum(Buffer.from(chunk.body)),
|
|
762
|
+
updated_at: fileRecord.updated_at,
|
|
763
|
+
trust_level: fileRecord.trust_level
|
|
764
|
+
};
|
|
765
|
+
chunkRecords.push(chunkRecord);
|
|
766
|
+
|
|
767
|
+
// DEFINES relation: File -> Chunk
|
|
768
|
+
definesRelations.push({
|
|
769
|
+
from: fileRecord.id,
|
|
770
|
+
to: chunkId
|
|
771
|
+
});
|
|
772
|
+
|
|
773
|
+
// IMPORTS relations: Chunk -> File
|
|
774
|
+
for (const importPath of chunk.imports || []) {
|
|
775
|
+
// Normalize relative imports to absolute paths
|
|
776
|
+
if (importPath.startsWith(".")) {
|
|
777
|
+
const dirName = path.dirname(fileRecord.path);
|
|
778
|
+
const resolvedImport = path.posix.normalize(path.posix.join(dirName, importPath));
|
|
779
|
+
const targetFileId = `file:${resolvedImport}`;
|
|
780
|
+
importsRelations.push({
|
|
781
|
+
from: chunkId,
|
|
782
|
+
to: targetFileId,
|
|
783
|
+
import_name: importPath
|
|
784
|
+
});
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
const seenCallEdges = new Set();
|
|
790
|
+
for (const { chunk, chunkId } of parsedChunks) {
|
|
791
|
+
// CALLS relations: Chunk -> Chunk (within same file)
|
|
792
|
+
for (const calledName of chunk.calls || []) {
|
|
793
|
+
const targetChunkIds = chunkIdsByName.get(calledName) || [];
|
|
794
|
+
for (const targetChunkId of targetChunkIds) {
|
|
795
|
+
const callKey = `${chunkId}|${targetChunkId}|direct`;
|
|
796
|
+
if (seenCallEdges.has(callKey)) {
|
|
797
|
+
continue;
|
|
798
|
+
}
|
|
799
|
+
seenCallEdges.add(callKey);
|
|
800
|
+
callsRelations.push({
|
|
801
|
+
from: chunkId,
|
|
802
|
+
to: targetChunkId,
|
|
803
|
+
call_type: "direct"
|
|
804
|
+
});
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
} catch (error) {
|
|
809
|
+
if (verbose) {
|
|
810
|
+
console.log(`[ingest] failed to parse ${fileRecord.path}: ${error instanceof Error ? error.message : String(error)}`);
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
// Filter CALLS relations to only valid targets (chunks that actually exist)
|
|
816
|
+
const chunkIdSet = new Set(chunkRecords.map(c => c.id));
|
|
817
|
+
const validCallsRelations = callsRelations.filter(rel => chunkIdSet.has(rel.to));
|
|
818
|
+
|
|
819
|
+
if (verbose && chunkRecords.length > 0) {
|
|
820
|
+
console.log(`[ingest] extracted ${chunkRecords.length} chunks from ${fileRecords.filter(f => f.kind === "CODE").length} code files`);
|
|
821
|
+
console.log(`[ingest] ${validCallsRelations.length} call relations (${callsRelations.length - validCallsRelations.length} filtered)`);
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
const ruleRecords = rules.map((rule) => ({
|
|
825
|
+
id: rule.id,
|
|
826
|
+
title: rule.id,
|
|
827
|
+
body: rule.description,
|
|
828
|
+
scope: "global",
|
|
829
|
+
updated_at: new Date().toISOString(),
|
|
830
|
+
source_of_truth: true,
|
|
831
|
+
trust_level: 95,
|
|
832
|
+
status: rule.enforce ? "active" : "draft",
|
|
833
|
+
priority: rule.priority
|
|
834
|
+
}));
|
|
835
|
+
|
|
836
|
+
const adrTokenIndex = new Map();
|
|
837
|
+
for (const adrRecord of adrRecords) {
|
|
838
|
+
for (const token of adrTokens(adrRecord)) {
|
|
839
|
+
if (!adrTokenIndex.has(token)) {
|
|
840
|
+
adrTokenIndex.set(token, adrRecord.id);
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
const supersedesRelations = [];
|
|
846
|
+
for (const adrRecord of adrRecords) {
|
|
847
|
+
const refs = findSupersedesReferences(adrRecord.body);
|
|
848
|
+
for (const ref of refs) {
|
|
849
|
+
const target = adrTokenIndex.get(normalizeToken(ref));
|
|
850
|
+
if (!target || target === adrRecord.id) {
|
|
851
|
+
continue;
|
|
852
|
+
}
|
|
853
|
+
adrRecord.supersedes_id = target;
|
|
854
|
+
supersedesRelations.push({
|
|
855
|
+
from: adrRecord.id,
|
|
856
|
+
to: target,
|
|
857
|
+
reason: `Supersedes ${ref}`
|
|
858
|
+
});
|
|
859
|
+
}
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
const constrainsRelations = [];
|
|
863
|
+
const implementsRelations = [];
|
|
864
|
+
const constrainsSeen = new Set();
|
|
865
|
+
const implementsSeen = new Set();
|
|
866
|
+
const lowerContentByFileId = new Map(
|
|
867
|
+
fileRecords.map((fileRecord) => [fileRecord.id, fileRecord.content.toLowerCase()])
|
|
868
|
+
);
|
|
869
|
+
const tokenByFileId = new Map(fileRecords.map((fileRecord) => [fileRecord.id, fileTokenSet(fileRecord)]));
|
|
870
|
+
|
|
871
|
+
for (const ruleRecord of ruleRecords) {
|
|
872
|
+
const needle = ruleRecord.id.toLowerCase();
|
|
873
|
+
const ruleKeywords = normalizeRuleTokens(ruleRecord);
|
|
874
|
+
|
|
875
|
+
for (const fileRecord of fileRecords) {
|
|
876
|
+
const lower = lowerContentByFileId.get(fileRecord.id) ?? "";
|
|
877
|
+
const explicitMention = lower.includes(needle);
|
|
878
|
+
const tokens = tokenByFileId.get(fileRecord.id) ?? new Set();
|
|
879
|
+
const matchedKeywords = ruleKeywords.filter((keyword) => tokens.has(keyword));
|
|
880
|
+
const minimumMatches = fileRecord.kind === "CODE" ? 1 : 2;
|
|
881
|
+
const keywordMatch = matchedKeywords.length >= Math.min(minimumMatches, Math.max(1, ruleKeywords.length));
|
|
882
|
+
|
|
883
|
+
if (!explicitMention && !keywordMatch) {
|
|
884
|
+
continue;
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
const constrainsKey = `${ruleRecord.id}|${fileRecord.id}`;
|
|
888
|
+
if (!constrainsSeen.has(constrainsKey)) {
|
|
889
|
+
constrainsSeen.add(constrainsKey);
|
|
890
|
+
constrainsRelations.push({
|
|
891
|
+
from: ruleRecord.id,
|
|
892
|
+
to: fileRecord.id,
|
|
893
|
+
note: explicitMention
|
|
894
|
+
? `Mentions ${ruleRecord.id}`
|
|
895
|
+
: `Keyword match ${matchedKeywords.slice(0, 5).join(", ")}`
|
|
896
|
+
});
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
if (fileRecord.kind === "CODE") {
|
|
900
|
+
const implementsKey = `${fileRecord.id}|${ruleRecord.id}`;
|
|
901
|
+
if (!implementsSeen.has(implementsKey)) {
|
|
902
|
+
implementsSeen.add(implementsKey);
|
|
903
|
+
implementsRelations.push({
|
|
904
|
+
from: fileRecord.id,
|
|
905
|
+
to: ruleRecord.id,
|
|
906
|
+
note: explicitMention
|
|
907
|
+
? `Code references ${ruleRecord.id}`
|
|
908
|
+
: `Code keywords ${matchedKeywords.slice(0, 5).join(", ")}`
|
|
909
|
+
});
|
|
910
|
+
}
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
writeJsonl(path.join(CACHE_DIR, "documents.jsonl"), fileRecords);
|
|
916
|
+
writeJsonl(path.join(CACHE_DIR, "entities.file.jsonl"), fileRecords);
|
|
917
|
+
writeJsonl(path.join(CACHE_DIR, "entities.adr.jsonl"), adrRecords);
|
|
918
|
+
writeJsonl(path.join(CACHE_DIR, "entities.rule.jsonl"), ruleRecords);
|
|
919
|
+
writeJsonl(path.join(CACHE_DIR, "entities.chunk.jsonl"), chunkRecords);
|
|
920
|
+
writeJsonl(path.join(CACHE_DIR, "relations.supersedes.jsonl"), supersedesRelations);
|
|
921
|
+
writeJsonl(path.join(CACHE_DIR, "relations.constrains.jsonl"), constrainsRelations);
|
|
922
|
+
writeJsonl(path.join(CACHE_DIR, "relations.implements.jsonl"), implementsRelations);
|
|
923
|
+
writeJsonl(path.join(CACHE_DIR, "relations.defines.jsonl"), definesRelations);
|
|
924
|
+
writeJsonl(path.join(CACHE_DIR, "relations.calls.jsonl"), validCallsRelations);
|
|
925
|
+
writeJsonl(path.join(CACHE_DIR, "relations.imports.jsonl"), importsRelations);
|
|
926
|
+
|
|
927
|
+
writeTsv(
|
|
928
|
+
path.join(DB_IMPORT_DIR, "file_nodes.tsv"),
|
|
929
|
+
[
|
|
930
|
+
"id",
|
|
931
|
+
"path",
|
|
932
|
+
"kind",
|
|
933
|
+
"excerpt",
|
|
934
|
+
"checksum",
|
|
935
|
+
"updated_at",
|
|
936
|
+
"source_of_truth",
|
|
937
|
+
"trust_level",
|
|
938
|
+
"status"
|
|
939
|
+
],
|
|
940
|
+
fileRecords.map((record) => [
|
|
941
|
+
record.id,
|
|
942
|
+
record.path,
|
|
943
|
+
record.kind,
|
|
944
|
+
record.excerpt,
|
|
945
|
+
record.checksum,
|
|
946
|
+
record.updated_at,
|
|
947
|
+
record.source_of_truth,
|
|
948
|
+
record.trust_level,
|
|
949
|
+
record.status
|
|
950
|
+
])
|
|
951
|
+
);
|
|
952
|
+
|
|
953
|
+
writeTsv(
|
|
954
|
+
path.join(DB_IMPORT_DIR, "rule_nodes.tsv"),
|
|
955
|
+
[
|
|
956
|
+
"id",
|
|
957
|
+
"title",
|
|
958
|
+
"body",
|
|
959
|
+
"scope",
|
|
960
|
+
"priority",
|
|
961
|
+
"updated_at",
|
|
962
|
+
"source_of_truth",
|
|
963
|
+
"trust_level",
|
|
964
|
+
"status"
|
|
965
|
+
],
|
|
966
|
+
ruleRecords.map((record) => [
|
|
967
|
+
record.id,
|
|
968
|
+
record.title,
|
|
969
|
+
record.body,
|
|
970
|
+
record.scope,
|
|
971
|
+
record.priority,
|
|
972
|
+
record.updated_at,
|
|
973
|
+
record.source_of_truth,
|
|
974
|
+
record.trust_level,
|
|
975
|
+
record.status
|
|
976
|
+
])
|
|
977
|
+
);
|
|
978
|
+
|
|
979
|
+
writeTsv(
|
|
980
|
+
path.join(DB_IMPORT_DIR, "adr_nodes.tsv"),
|
|
981
|
+
[
|
|
982
|
+
"id",
|
|
983
|
+
"path",
|
|
984
|
+
"title",
|
|
985
|
+
"body",
|
|
986
|
+
"decision_date",
|
|
987
|
+
"supersedes_id",
|
|
988
|
+
"source_of_truth",
|
|
989
|
+
"trust_level",
|
|
990
|
+
"status"
|
|
991
|
+
],
|
|
992
|
+
adrRecords.map((record) => [
|
|
993
|
+
record.id,
|
|
994
|
+
record.path,
|
|
995
|
+
record.title,
|
|
996
|
+
record.body,
|
|
997
|
+
record.decision_date,
|
|
998
|
+
record.supersedes_id,
|
|
999
|
+
record.source_of_truth,
|
|
1000
|
+
record.trust_level,
|
|
1001
|
+
record.status
|
|
1002
|
+
])
|
|
1003
|
+
);
|
|
1004
|
+
|
|
1005
|
+
writeTsv(
|
|
1006
|
+
path.join(DB_IMPORT_DIR, "constrains_rel.tsv"),
|
|
1007
|
+
["from", "to", "note"],
|
|
1008
|
+
constrainsRelations.map((record) => [record.from, record.to, record.note])
|
|
1009
|
+
);
|
|
1010
|
+
|
|
1011
|
+
writeTsv(
|
|
1012
|
+
path.join(DB_IMPORT_DIR, "implements_rel.tsv"),
|
|
1013
|
+
["from", "to", "note"],
|
|
1014
|
+
implementsRelations.map((record) => [record.from, record.to, record.note])
|
|
1015
|
+
);
|
|
1016
|
+
|
|
1017
|
+
writeTsv(
|
|
1018
|
+
path.join(DB_IMPORT_DIR, "supersedes_rel.tsv"),
|
|
1019
|
+
["from", "to", "reason"],
|
|
1020
|
+
supersedesRelations.map((record) => [record.from, record.to, record.reason])
|
|
1021
|
+
);
|
|
1022
|
+
|
|
1023
|
+
writeTsv(
|
|
1024
|
+
path.join(DB_IMPORT_DIR, "chunk_nodes.tsv"),
|
|
1025
|
+
[
|
|
1026
|
+
"id",
|
|
1027
|
+
"file_id",
|
|
1028
|
+
"name",
|
|
1029
|
+
"kind",
|
|
1030
|
+
"signature",
|
|
1031
|
+
"body",
|
|
1032
|
+
"start_line",
|
|
1033
|
+
"end_line",
|
|
1034
|
+
"language",
|
|
1035
|
+
"checksum",
|
|
1036
|
+
"updated_at",
|
|
1037
|
+
"trust_level"
|
|
1038
|
+
],
|
|
1039
|
+
chunkRecords.map((record) => [
|
|
1040
|
+
record.id,
|
|
1041
|
+
record.file_id,
|
|
1042
|
+
record.name,
|
|
1043
|
+
record.kind,
|
|
1044
|
+
record.signature,
|
|
1045
|
+
record.body,
|
|
1046
|
+
record.start_line,
|
|
1047
|
+
record.end_line,
|
|
1048
|
+
record.language,
|
|
1049
|
+
record.checksum,
|
|
1050
|
+
record.updated_at,
|
|
1051
|
+
record.trust_level
|
|
1052
|
+
])
|
|
1053
|
+
);
|
|
1054
|
+
|
|
1055
|
+
writeTsv(
|
|
1056
|
+
path.join(DB_IMPORT_DIR, "defines_rel.tsv"),
|
|
1057
|
+
["from", "to"],
|
|
1058
|
+
definesRelations.map((record) => [record.from, record.to])
|
|
1059
|
+
);
|
|
1060
|
+
|
|
1061
|
+
writeTsv(
|
|
1062
|
+
path.join(DB_IMPORT_DIR, "calls_rel.tsv"),
|
|
1063
|
+
["from", "to", "call_type"],
|
|
1064
|
+
validCallsRelations.map((record) => [record.from, record.to, record.call_type])
|
|
1065
|
+
);
|
|
1066
|
+
|
|
1067
|
+
writeTsv(
|
|
1068
|
+
path.join(DB_IMPORT_DIR, "imports_rel.tsv"),
|
|
1069
|
+
["from", "to", "import_name"],
|
|
1070
|
+
importsRelations.map((record) => [record.from, record.to, record.import_name])
|
|
1071
|
+
);
|
|
1072
|
+
|
|
1073
|
+
const manifest = {
|
|
1074
|
+
generated_at: new Date().toISOString(),
|
|
1075
|
+
mode,
|
|
1076
|
+
source_paths: sourcePaths,
|
|
1077
|
+
counts: {
|
|
1078
|
+
files: fileRecords.length,
|
|
1079
|
+
adrs: adrRecords.length,
|
|
1080
|
+
rules: ruleRecords.length,
|
|
1081
|
+
chunks: chunkRecords.length,
|
|
1082
|
+
relations_constrains: constrainsRelations.length,
|
|
1083
|
+
relations_implements: implementsRelations.length,
|
|
1084
|
+
relations_supersedes: supersedesRelations.length,
|
|
1085
|
+
relations_defines: definesRelations.length,
|
|
1086
|
+
relations_calls: validCallsRelations.length,
|
|
1087
|
+
relations_imports: importsRelations.length
|
|
1088
|
+
},
|
|
1089
|
+
skipped,
|
|
1090
|
+
incremental_mode: incrementalMode,
|
|
1091
|
+
changed_candidates: candidates.size,
|
|
1092
|
+
deleted_paths: deletedRelPaths.length
|
|
1093
|
+
};
|
|
1094
|
+
|
|
1095
|
+
fs.writeFileSync(path.join(CACHE_DIR, "manifest.json"), `${JSON.stringify(manifest, null, 2)}\n`);
|
|
1096
|
+
|
|
1097
|
+
console.log(`[ingest] mode=${mode}`);
|
|
1098
|
+
if (incrementalMode) {
|
|
1099
|
+
console.log(
|
|
1100
|
+
`[ingest] incremental changed_candidates=${manifest.changed_candidates} deleted_paths=${manifest.deleted_paths}`
|
|
1101
|
+
);
|
|
1102
|
+
} else if (mode === "changed") {
|
|
1103
|
+
console.log("[ingest] incremental diff unavailable; processed full source set");
|
|
1104
|
+
}
|
|
1105
|
+
console.log(`[ingest] files=${manifest.counts.files} adrs=${manifest.counts.adrs} rules=${manifest.counts.rules} chunks=${manifest.counts.chunks}`);
|
|
1106
|
+
console.log(
|
|
1107
|
+
`[ingest] rels constrains=${manifest.counts.relations_constrains} implements=${manifest.counts.relations_implements} supersedes=${manifest.counts.relations_supersedes}`
|
|
1108
|
+
);
|
|
1109
|
+
console.log(
|
|
1110
|
+
`[ingest] rels defines=${manifest.counts.relations_defines} calls=${manifest.counts.relations_calls} imports=${manifest.counts.relations_imports}`
|
|
1111
|
+
);
|
|
1112
|
+
console.log(
|
|
1113
|
+
`[ingest] skipped unsupported=${skipped.unsupported} too_large=${skipped.tooLarge} binary=${skipped.binary}`
|
|
1114
|
+
);
|
|
1115
|
+
console.log(`[ingest] wrote cache + db import files under .context/`);
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
main();
|