@danielblomma/cortex-mcp 0.4.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +125 -42
  2. package/bin/cortex.mjs +36 -63
  3. package/bin/wsl.mjs +30 -0
  4. package/package.json +15 -3
  5. package/scaffold/.context/ontology.cypher +47 -0
  6. package/scaffold/.githooks/post-commit +14 -0
  7. package/scaffold/.githooks/post-rewrite +23 -0
  8. package/scaffold/mcp/package-lock.json +16 -16
  9. package/scaffold/mcp/package.json +4 -1
  10. package/scaffold/mcp/src/contextEntities.ts +311 -0
  11. package/scaffold/mcp/src/defaults.ts +6 -0
  12. package/scaffold/mcp/src/embed.ts +163 -37
  13. package/scaffold/mcp/src/frontmatter.ts +39 -0
  14. package/scaffold/mcp/src/graph.ts +253 -130
  15. package/scaffold/mcp/src/graphMetrics.ts +12 -0
  16. package/scaffold/mcp/src/impactPresentation.ts +202 -0
  17. package/scaffold/mcp/src/impactRanking.ts +237 -0
  18. package/scaffold/mcp/src/impactResponse.ts +47 -0
  19. package/scaffold/mcp/src/impactResults.ts +173 -0
  20. package/scaffold/mcp/src/impactSeed.ts +33 -0
  21. package/scaffold/mcp/src/impactTraversal.ts +83 -0
  22. package/scaffold/mcp/src/jsonl.ts +34 -0
  23. package/scaffold/mcp/src/loadGraph.ts +345 -86
  24. package/scaffold/mcp/src/paths.ts +33 -2
  25. package/scaffold/mcp/src/presets.ts +137 -0
  26. package/scaffold/mcp/src/relatedResponse.ts +30 -0
  27. package/scaffold/mcp/src/relatedTraversal.ts +101 -0
  28. package/scaffold/mcp/src/rules.ts +27 -0
  29. package/scaffold/mcp/src/search.ts +186 -455
  30. package/scaffold/mcp/src/searchCore.ts +274 -0
  31. package/scaffold/mcp/src/searchResults.ts +133 -0
  32. package/scaffold/mcp/src/server.ts +95 -3
  33. package/scaffold/mcp/src/types.ts +82 -3
  34. package/scaffold/scripts/context.sh +12 -46
  35. package/scaffold/scripts/dashboard.mjs +797 -0
  36. package/scaffold/scripts/dashboard.sh +13 -0
  37. package/scaffold/scripts/ingest.mjs +2227 -59
  38. package/scaffold/scripts/install-git-hooks.sh +3 -1
  39. package/scaffold/scripts/memory-compile.mjs +241 -0
  40. package/scaffold/scripts/memory-compile.sh +20 -0
  41. package/scaffold/scripts/memory-lint.mjs +384 -0
  42. package/scaffold/scripts/memory-lint.sh +20 -0
  43. package/scaffold/scripts/parsers/config.mjs +178 -0
  44. package/scaffold/scripts/parsers/cpp.mjs +316 -0
  45. package/scaffold/scripts/parsers/dotnet/VbNetParser/Program.cs +374 -0
  46. package/scaffold/scripts/parsers/dotnet/VbNetParser/VbNetParser.csproj +13 -0
  47. package/scaffold/scripts/parsers/javascript/ast.mjs +61 -0
  48. package/scaffold/scripts/parsers/javascript/calls.mjs +53 -0
  49. package/scaffold/scripts/parsers/javascript/chunks.mjs +388 -0
  50. package/scaffold/scripts/parsers/javascript/imports.mjs +162 -0
  51. package/scaffold/scripts/parsers/javascript/patterns.mjs +82 -0
  52. package/scaffold/scripts/parsers/javascript/scope-analysis.mjs +3 -0
  53. package/scaffold/scripts/parsers/javascript/scope-builder.mjs +305 -0
  54. package/scaffold/scripts/parsers/javascript/scope-resolver.mjs +82 -0
  55. package/scaffold/scripts/parsers/javascript.mjs +27 -350
  56. package/scaffold/scripts/parsers/resources.mjs +166 -0
  57. package/scaffold/scripts/parsers/rust.mjs +515 -0
  58. package/scaffold/scripts/parsers/sql.mjs +137 -0
  59. package/scaffold/scripts/parsers/vbnet.mjs +143 -0
  60. package/scaffold/scripts/status.sh +0 -7
  61. package/scaffold/scripts/watch.sh +9 -1
  62. package/scaffold/scripts/capture-note.sh +0 -55
  63. package/scaffold/scripts/plan-state-engine.cjs +0 -310
  64. package/scaffold/scripts/plan-state.sh +0 -71
@@ -0,0 +1,311 @@
1
+ import type { ContextData, JsonObject, RelationRecord, SearchEntity } from "./types.js";
2
+
3
+ function isWindowChunkId(id: string): boolean {
4
+ return id.includes(":window:");
5
+ }
6
+
7
+ function baseChunkId(id: string): string {
8
+ const markerIndex = id.indexOf(":window:");
9
+ return markerIndex === -1 ? id : id.slice(0, markerIndex);
10
+ }
11
+
12
+ function groupRuleLinks(relations: RelationRecord[]): Map<string, string[]> {
13
+ const links = new Map<string, string[]>();
14
+ for (const relation of relations) {
15
+ if (relation.relation !== "CONSTRAINS" && relation.relation !== "IMPLEMENTS") {
16
+ continue;
17
+ }
18
+
19
+ if (relation.relation === "CONSTRAINS") {
20
+ const list = links.get(relation.to) ?? [];
21
+ list.push(relation.from);
22
+ links.set(relation.to, list);
23
+ } else {
24
+ const list = links.get(relation.from) ?? [];
25
+ list.push(relation.to);
26
+ links.set(relation.from, list);
27
+ }
28
+ }
29
+ return links;
30
+ }
31
+
32
+ function buildRelationSearchSignals(data: ContextData): Map<string, string> {
33
+ const labelsById = new Map<string, string>();
34
+ for (const document of data.documents) {
35
+ labelsById.set(document.id, document.path);
36
+ }
37
+ for (const chunk of data.chunks) {
38
+ labelsById.set(chunk.id, chunk.name || chunk.id);
39
+ }
40
+ for (const module of data.modules) {
41
+ labelsById.set(module.id, module.name || module.path);
42
+ }
43
+ for (const project of data.projects) {
44
+ labelsById.set(project.id, project.name || project.path);
45
+ }
46
+
47
+ const supportedRelations = new Set([
48
+ "CALLS_SQL",
49
+ "USES_CONFIG_KEY",
50
+ "USES_RESOURCE_KEY",
51
+ "USES_SETTING_KEY",
52
+ "USES_CONFIG",
53
+ "TRANSFORMS_CONFIG"
54
+ ]);
55
+ const signalsByEntity = new Map<string, string[]>();
56
+
57
+ for (const relation of data.relations) {
58
+ if (!supportedRelations.has(relation.relation)) {
59
+ continue;
60
+ }
61
+ const sourceLabel = labelsById.get(relation.from) ?? relation.from;
62
+ const targetLabel = labelsById.get(relation.to) ?? relation.to;
63
+ const outgoingSignal = [relation.relation.toLowerCase(), relation.note, targetLabel].filter(Boolean).join(" ");
64
+ const incomingSignal = ["affected_by", relation.relation.toLowerCase(), relation.note, sourceLabel]
65
+ .filter(Boolean)
66
+ .join(" ");
67
+ const outgoing = signalsByEntity.get(relation.from) ?? [];
68
+ outgoing.push(outgoingSignal);
69
+ signalsByEntity.set(relation.from, outgoing);
70
+ const incoming = signalsByEntity.get(relation.to) ?? [];
71
+ incoming.push(incomingSignal);
72
+ signalsByEntity.set(relation.to, incoming);
73
+ }
74
+
75
+ return new Map([...signalsByEntity.entries()].map(([id, parts]) => [id, parts.join("\n")]));
76
+ }
77
+
78
+ export function buildSearchEntities(data: ContextData, includeContent: boolean): SearchEntity[] {
79
+ const entities: SearchEntity[] = [];
80
+ const fileRuleLinks = groupRuleLinks(data.relations);
81
+ const relationSignals = buildRelationSearchSignals(data);
82
+ const adrPathSet = new Set(
83
+ data.adrs
84
+ .map((adr) => adr.path.trim().toLowerCase())
85
+ .filter((adrPath) => adrPath.length > 0)
86
+ );
87
+
88
+ for (const document of data.documents) {
89
+ const normalizedPath = document.path.trim().toLowerCase();
90
+ if (document.kind === "ADR" && adrPathSet.has(normalizedPath)) {
91
+ continue;
92
+ }
93
+
94
+ entities.push({
95
+ id: document.id,
96
+ entity_type: "File",
97
+ kind: document.kind,
98
+ label: document.path,
99
+ path: document.path,
100
+ text: `${document.path}\n${document.excerpt}\n${document.content}\n${relationSignals.get(document.id) ?? ""}`,
101
+ status: document.status,
102
+ source_of_truth: document.source_of_truth,
103
+ trust_level: document.trust_level,
104
+ updated_at: document.updated_at,
105
+ snippet: document.excerpt,
106
+ matched_rules: fileRuleLinks.get(document.id) ?? [],
107
+ content: includeContent ? document.content : undefined
108
+ });
109
+ }
110
+
111
+ for (const rule of data.rules) {
112
+ entities.push({
113
+ id: rule.id,
114
+ entity_type: "Rule",
115
+ kind: "RULE",
116
+ label: rule.title || rule.id,
117
+ path: "",
118
+ text: `${rule.id}\n${rule.title}\n${rule.body}`,
119
+ status: rule.status,
120
+ source_of_truth: rule.source_of_truth,
121
+ trust_level: rule.trust_level,
122
+ updated_at: rule.updated_at,
123
+ snippet: rule.body.slice(0, 500),
124
+ matched_rules: [rule.id],
125
+ content: includeContent ? rule.body : undefined
126
+ });
127
+ }
128
+
129
+ for (const adr of data.adrs) {
130
+ entities.push({
131
+ id: adr.id,
132
+ entity_type: "ADR",
133
+ kind: "ADR",
134
+ label: adr.title || adr.id,
135
+ path: adr.path,
136
+ text: `${adr.path}\n${adr.title}\n${adr.body}`,
137
+ status: adr.status,
138
+ source_of_truth: adr.source_of_truth,
139
+ trust_level: adr.trust_level,
140
+ updated_at: adr.decision_date,
141
+ snippet: adr.body.slice(0, 500),
142
+ matched_rules: [],
143
+ content: includeContent ? adr.body : undefined
144
+ });
145
+ }
146
+
147
+ const filePathById = new Map(
148
+ data.documents.filter((document) => document.kind === "CODE").map((document) => [document.id, document.path])
149
+ );
150
+
151
+ for (const chunk of data.chunks) {
152
+ const filePath = filePathById.get(chunk.file_id) ?? "";
153
+ entities.push({
154
+ id: chunk.id,
155
+ entity_type: "Chunk",
156
+ kind: chunk.kind || "chunk",
157
+ label: chunk.name || chunk.id,
158
+ path: filePath,
159
+ text: `${filePath}\n${chunk.name}\n${chunk.signature}\n${chunk.description}\n${chunk.body}\n${relationSignals.get(chunk.id) ?? ""}`,
160
+ status: chunk.status,
161
+ source_of_truth: chunk.source_of_truth,
162
+ trust_level: chunk.trust_level,
163
+ updated_at: chunk.updated_at,
164
+ snippet: chunk.description || chunk.body.slice(0, 500),
165
+ matched_rules: fileRuleLinks.get(chunk.file_id) ?? [],
166
+ content: includeContent ? chunk.body : undefined
167
+ });
168
+ }
169
+
170
+ for (const module of data.modules) {
171
+ entities.push({
172
+ id: module.id,
173
+ entity_type: "Module",
174
+ kind: "MODULE",
175
+ label: module.name,
176
+ path: module.path,
177
+ text: `${module.path}\n${module.name}\n${module.summary}\n${module.exported_symbols}`,
178
+ status: module.status,
179
+ source_of_truth: module.source_of_truth,
180
+ trust_level: module.trust_level,
181
+ updated_at: module.updated_at,
182
+ snippet: (module.summary || "").slice(0, 500),
183
+ matched_rules: [],
184
+ content: includeContent ? module.summary : undefined
185
+ });
186
+ }
187
+
188
+ for (const project of data.projects) {
189
+ entities.push({
190
+ id: project.id,
191
+ entity_type: "Project",
192
+ kind: project.kind.toUpperCase() || "PROJECT",
193
+ label: project.name || project.path,
194
+ path: project.path,
195
+ text: `${project.path}\n${project.name}\n${project.kind}\n${project.language}\n${project.target_framework}\n${project.summary}`,
196
+ status: project.status,
197
+ source_of_truth: project.source_of_truth,
198
+ trust_level: project.trust_level,
199
+ updated_at: project.updated_at,
200
+ snippet: (project.summary || "").slice(0, 500),
201
+ matched_rules: [],
202
+ content: includeContent ? project.summary : undefined
203
+ });
204
+ }
205
+
206
+ return entities;
207
+ }
208
+
209
+ export function buildChunkPartOfRelations(data: ContextData): RelationRecord[] {
210
+ const relations: RelationRecord[] = [];
211
+ for (const chunk of data.chunks) {
212
+ if (isWindowChunkId(chunk.id)) {
213
+ relations.push({
214
+ from: chunk.id,
215
+ to: baseChunkId(chunk.id),
216
+ relation: "PART_OF",
217
+ note: "Overlap window belongs to base chunk"
218
+ });
219
+ }
220
+
221
+ if (!chunk.file_id) {
222
+ continue;
223
+ }
224
+
225
+ relations.push({
226
+ from: chunk.id,
227
+ to: chunk.file_id,
228
+ relation: "PART_OF",
229
+ note: "Chunk belongs to file"
230
+ });
231
+ }
232
+ return relations;
233
+ }
234
+
235
+ export function entityCatalog(data: ContextData): Map<string, JsonObject> {
236
+ const catalog = new Map<string, JsonObject>();
237
+ const fileById = new Map(data.documents.map((document) => [document.id, document]));
238
+
239
+ for (const file of data.documents) {
240
+ catalog.set(file.id, {
241
+ id: file.id,
242
+ type: "File",
243
+ label: file.path,
244
+ status: file.status,
245
+ source_of_truth: file.source_of_truth
246
+ });
247
+ }
248
+
249
+ for (const rule of data.rules) {
250
+ catalog.set(rule.id, {
251
+ id: rule.id,
252
+ type: "Rule",
253
+ label: rule.title,
254
+ status: rule.status,
255
+ source_of_truth: rule.source_of_truth
256
+ });
257
+ }
258
+
259
+ for (const adr of data.adrs) {
260
+ catalog.set(adr.id, {
261
+ id: adr.id,
262
+ type: "ADR",
263
+ label: adr.title || adr.id,
264
+ status: adr.status,
265
+ source_of_truth: adr.source_of_truth
266
+ });
267
+ }
268
+
269
+ for (const chunk of data.chunks) {
270
+ const filePath = fileById.get(chunk.file_id)?.path ?? "";
271
+ const chunkEntity: JsonObject = {
272
+ id: chunk.id,
273
+ type: "Chunk",
274
+ label: chunk.name || chunk.id,
275
+ status: chunk.status,
276
+ source_of_truth: chunk.source_of_truth
277
+ };
278
+ if (filePath) {
279
+ chunkEntity.path = filePath;
280
+ }
281
+ catalog.set(chunk.id, chunkEntity);
282
+ }
283
+
284
+ for (const module of data.modules) {
285
+ catalog.set(module.id, {
286
+ id: module.id,
287
+ type: "Module",
288
+ label: module.name,
289
+ status: module.status,
290
+ source_of_truth: module.source_of_truth,
291
+ path: module.path
292
+ });
293
+ }
294
+
295
+ for (const project of data.projects) {
296
+ catalog.set(project.id, {
297
+ id: project.id,
298
+ type: "Project",
299
+ label: project.name || project.path,
300
+ status: project.status,
301
+ source_of_truth: project.source_of_truth,
302
+ path: project.path
303
+ });
304
+ }
305
+
306
+ return catalog;
307
+ }
308
+
309
+ export function buildEntitySearchMap(data: ContextData): Map<string, SearchEntity> {
310
+ return new Map(buildSearchEntities(data, false).map((entity) => [entity.id, entity]));
311
+ }
@@ -0,0 +1,6 @@
1
+ export const TRUST_DOCUMENT = 50;
2
+ export const TRUST_CHUNK = 60;
3
+ export const TRUST_MEMORY = 70;
4
+ export const TRUST_CHUNK_RYUGRAPH = 80;
5
+ export const TRUST_RULE = 95;
6
+ export const TRUST_ADR = 95;
@@ -3,6 +3,8 @@ import fs from "node:fs";
3
3
  import path from "node:path";
4
4
  import { fileURLToPath } from "node:url";
5
5
  import { env, pipeline } from "@xenova/transformers";
6
+ import { readJsonl, asString, asNumber, asBoolean } from "./jsonl.js";
7
+ import type { JsonObject, JsonValue } from "./types.js";
6
8
 
7
9
  const __filename = fileURLToPath(import.meta.url);
8
10
  const __dirname = path.dirname(__filename);
@@ -16,9 +18,7 @@ const MODEL_CACHE_DIR = path.join(EMBEDDINGS_DIR, "models");
16
18
 
17
19
  const DEFAULT_MODEL_ID = "Xenova/all-MiniLM-L6-v2";
18
20
  const DEFAULT_MAX_TEXT_CHARS = 7000;
19
-
20
- type JsonValue = string | number | boolean | null | JsonObject | JsonValue[];
21
- type JsonObject = { [key: string]: JsonValue };
21
+ const CHUNK_BODY_PREVIEW_CHARS = 2000;
22
22
 
23
23
  type FileEntity = {
24
24
  id: string;
@@ -62,7 +62,51 @@ type AdrEntity = {
62
62
  signature: string;
63
63
  };
64
64
 
65
- type SearchEntity = FileEntity | RuleEntity | AdrEntity;
65
+ // Embedding-specific entity types intentionally different from types.ts records
66
+ // because they carry `text` and `signature` fields used for embedding generation.
67
+ type ModuleEntity = {
68
+ id: string;
69
+ type: "Module";
70
+ kind: "MODULE";
71
+ label: string;
72
+ path: string;
73
+ status: string;
74
+ source_of_truth: boolean;
75
+ trust_level: number;
76
+ updated_at: string;
77
+ text: string;
78
+ signature: string;
79
+ };
80
+
81
+ type ProjectEntity = {
82
+ id: string;
83
+ type: "Project";
84
+ kind: string;
85
+ label: string;
86
+ path: string;
87
+ status: string;
88
+ source_of_truth: boolean;
89
+ trust_level: number;
90
+ updated_at: string;
91
+ text: string;
92
+ signature: string;
93
+ };
94
+
95
+ type ChunkEntity = {
96
+ id: string;
97
+ type: "Chunk";
98
+ kind: string;
99
+ label: string;
100
+ path: string;
101
+ status: string;
102
+ source_of_truth: boolean;
103
+ trust_level: number;
104
+ updated_at: string;
105
+ text: string;
106
+ signature: string;
107
+ };
108
+
109
+ type SearchEntity = FileEntity | RuleEntity | AdrEntity | ModuleEntity | ProjectEntity | ChunkEntity;
66
110
 
67
111
  type EmbeddingRecord = {
68
112
  id: string;
@@ -87,18 +131,6 @@ function parseArgs(argv: string[]): { mode: "full" | "changed" } {
87
131
  };
88
132
  }
89
133
 
90
- function asString(value: JsonValue | undefined, fallback = ""): string {
91
- return typeof value === "string" ? value : fallback;
92
- }
93
-
94
- function asNumber(value: JsonValue | undefined, fallback = 0): number {
95
- return typeof value === "number" && Number.isFinite(value) ? value : fallback;
96
- }
97
-
98
- function asBoolean(value: JsonValue | undefined, fallback = false): boolean {
99
- return typeof value === "boolean" ? value : fallback;
100
- }
101
-
102
134
  function hashText(value: string): string {
103
135
  return crypto.createHash("sha256").update(value).digest("hex");
104
136
  }
@@ -111,26 +143,6 @@ function clampText(value: string, maxChars: number): string {
111
143
  return value.slice(0, maxChars);
112
144
  }
113
145
 
114
- function readJsonl(filePath: string): JsonObject[] {
115
- if (!fs.existsSync(filePath)) {
116
- return [];
117
- }
118
-
119
- return fs
120
- .readFileSync(filePath, "utf8")
121
- .split(/\r?\n/)
122
- .map((line) => line.trim())
123
- .filter(Boolean)
124
- .map((line) => {
125
- try {
126
- return JSON.parse(line) as JsonObject;
127
- } catch {
128
- return null;
129
- }
130
- })
131
- .filter((value): value is JsonObject => value !== null);
132
- }
133
-
134
146
  function writeJsonl(filePath: string, records: EmbeddingRecord[]): void {
135
147
  const body = records.map((record) => JSON.stringify(record)).join("\n");
136
148
  fs.writeFileSync(filePath, body ? `${body}\n` : "", "utf8");
@@ -243,6 +255,110 @@ function parseAdrEntities(raw: JsonObject[], maxChars: number): AdrEntity[] {
243
255
  .filter((value): value is AdrEntity => value !== null);
244
256
  }
245
257
 
258
+ function parseModuleEntities(raw: JsonObject[], maxChars: number): ModuleEntity[] {
259
+ return raw
260
+ .map((item) => {
261
+ const id = asString(item.id);
262
+ if (!id) {
263
+ return null;
264
+ }
265
+
266
+ const modulePath = asString(item.path);
267
+ const name = asString(item.name);
268
+ const summary = asString(item.summary);
269
+ const exportedSymbols = asString(item.exported_symbols);
270
+ const updatedAt = asString(item.updated_at);
271
+ const text = clampText(`${modulePath}\n${name}\n${summary}\n${exportedSymbols}`, maxChars);
272
+
273
+ return {
274
+ id,
275
+ type: "Module" as const,
276
+ kind: "MODULE" as const,
277
+ label: name || modulePath,
278
+ path: modulePath,
279
+ status: asString(item.status, "active"),
280
+ source_of_truth: asBoolean(item.source_of_truth, false),
281
+ trust_level: asNumber(item.trust_level, 75),
282
+ updated_at: updatedAt,
283
+ text,
284
+ signature: hashText(`module|${id}|${updatedAt}|${hashText(text)}`)
285
+ };
286
+ })
287
+ .filter((value): value is ModuleEntity => value !== null);
288
+ }
289
+
290
+ function parseChunkEntities(raw: JsonObject[], filePathById: Map<string, string>, maxChars: number): ChunkEntity[] {
291
+ return raw
292
+ .map((item) => {
293
+ const id = asString(item.id);
294
+ if (!id) {
295
+ return null;
296
+ }
297
+
298
+ const fileId = asString(item.file_id);
299
+ const filePath = filePathById.get(fileId) ?? "";
300
+ const name = asString(item.name);
301
+ const sig = asString(item.signature);
302
+ const description = asString(item.description);
303
+ const body = asString(item.body);
304
+ const updatedAt = asString(item.updated_at);
305
+ const checksum = asString(item.checksum, hashText(body));
306
+ const text = clampText(`${filePath}\n${name}\n${sig}\n${description}\n${body.slice(0, CHUNK_BODY_PREVIEW_CHARS)}`, maxChars);
307
+
308
+ return {
309
+ id,
310
+ type: "Chunk" as const,
311
+ kind: asString(item.kind, "chunk"),
312
+ label: name || id,
313
+ path: filePath,
314
+ status: asString(item.status, "active"),
315
+ source_of_truth: asBoolean(item.source_of_truth, false),
316
+ trust_level: asNumber(item.trust_level, 60),
317
+ updated_at: updatedAt,
318
+ text,
319
+ signature: hashText(`chunk|${checksum}|${updatedAt}|${hashText(text)}`)
320
+ };
321
+ })
322
+ .filter((value): value is ChunkEntity => value !== null);
323
+ }
324
+
325
+ function parseProjectEntities(raw: JsonObject[], maxChars: number): ProjectEntity[] {
326
+ return raw
327
+ .map((item) => {
328
+ const id = asString(item.id);
329
+ if (!id) {
330
+ return null;
331
+ }
332
+
333
+ const projectPath = asString(item.path);
334
+ const name = asString(item.name);
335
+ const kind = asString(item.kind, "project");
336
+ const language = asString(item.language, "dotnet");
337
+ const targetFramework = asString(item.target_framework);
338
+ const summary = asString(item.summary);
339
+ const updatedAt = asString(item.updated_at);
340
+ const text = clampText(
341
+ `${projectPath}\n${name}\n${kind}\n${language}\n${targetFramework}\n${summary}`,
342
+ maxChars
343
+ );
344
+
345
+ return {
346
+ id,
347
+ type: "Project" as const,
348
+ kind: kind.toUpperCase() || "PROJECT",
349
+ label: name || projectPath,
350
+ path: projectPath,
351
+ status: asString(item.status, "active"),
352
+ source_of_truth: asBoolean(item.source_of_truth, false),
353
+ trust_level: asNumber(item.trust_level, 80),
354
+ updated_at: updatedAt,
355
+ text,
356
+ signature: hashText(`project|${id}|${updatedAt}|${hashText(text)}`)
357
+ };
358
+ })
359
+ .filter((value): value is ProjectEntity => value !== null);
360
+ }
361
+
246
362
  function parseExistingEmbeddings(raw: JsonObject[], modelId: string): Map<string, EmbeddingRecord> {
247
363
  const index = new Map<string, EmbeddingRecord>();
248
364
 
@@ -312,7 +428,17 @@ async function main(): Promise<void> {
312
428
  const documents = parseFileEntities(readJsonl(path.join(CACHE_DIR, "documents.jsonl")), maxTextChars);
313
429
  const rules = parseRuleEntities(readJsonl(path.join(CACHE_DIR, "entities.rule.jsonl")), maxTextChars);
314
430
  const adrs = parseAdrEntities(readJsonl(path.join(CACHE_DIR, "entities.adr.jsonl")), maxTextChars);
315
- const entities: SearchEntity[] = [...documents, ...rules, ...adrs].sort((a, b) => a.id.localeCompare(b.id));
431
+ const modules = parseModuleEntities(readJsonl(path.join(CACHE_DIR, "entities.module.jsonl")), maxTextChars);
432
+ const projects = parseProjectEntities(readJsonl(path.join(CACHE_DIR, "entities.project.jsonl")), maxTextChars);
433
+
434
+ // Build file path lookup for chunk embedding text (reuse already-parsed documents)
435
+ const filePathById = new Map<string, string>();
436
+ for (const doc of documents) {
437
+ filePathById.set(doc.id, doc.path);
438
+ }
439
+ const chunks = parseChunkEntities(readJsonl(path.join(CACHE_DIR, "entities.chunk.jsonl")), filePathById, maxTextChars);
440
+
441
+ const entities: SearchEntity[] = [...documents, ...rules, ...adrs, ...modules, ...projects, ...chunks].sort((a, b) => a.id.localeCompare(b.id));
316
442
 
317
443
  const existing = parseExistingEmbeddings(readJsonl(EMBEDDINGS_PATH), modelId);
318
444
 
@@ -0,0 +1,39 @@
1
+ export function parseFrontmatter(markdown: string): { fields: Map<string, string>; body: string } {
2
+ const lines = markdown.split(/\r?\n/);
3
+ if (lines[0]?.trim() !== "---") {
4
+ return { fields: new Map(), body: markdown };
5
+ }
6
+
7
+ const fields = new Map<string, string>();
8
+ let index = 1;
9
+ for (; index < lines.length; index += 1) {
10
+ const line = lines[index];
11
+ if (line.trim() === "---") {
12
+ index += 1;
13
+ break;
14
+ }
15
+
16
+ const match = line.match(/^([A-Za-z0-9_-]+):\s*(.*)$/);
17
+ if (!match) {
18
+ continue;
19
+ }
20
+
21
+ fields.set(match[1].toLowerCase(), match[2].trim());
22
+ }
23
+
24
+ return {
25
+ fields,
26
+ body: lines.slice(index).join("\n").trim()
27
+ };
28
+ }
29
+
30
+ export function parseStringList(value: string | undefined): string[] {
31
+ if (!value || !value.trim()) {
32
+ return [];
33
+ }
34
+
35
+ return value
36
+ .split(",")
37
+ .map((item) => item.trim())
38
+ .filter(Boolean);
39
+ }