@danielblomma/cortex-mcp 0.4.2 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -16
- package/bin/cortex.mjs +32 -60
- package/package.json +17 -3
- package/scaffold/.context/ontology.cypher +47 -0
- package/scaffold/.githooks/post-commit +14 -0
- package/scaffold/.githooks/post-rewrite +23 -0
- package/scaffold/mcp/package-lock.json +19 -23
- package/scaffold/mcp/package.json +3 -1
- package/scaffold/mcp/src/contextEntities.ts +311 -0
- package/scaffold/mcp/src/defaults.ts +6 -0
- package/scaffold/mcp/src/embed.ts +163 -37
- package/scaffold/mcp/src/frontmatter.ts +39 -0
- package/scaffold/mcp/src/graph.ts +330 -109
- package/scaffold/mcp/src/graphMetrics.ts +12 -0
- package/scaffold/mcp/src/impactPresentation.ts +202 -0
- package/scaffold/mcp/src/impactRanking.ts +237 -0
- package/scaffold/mcp/src/impactResponse.ts +47 -0
- package/scaffold/mcp/src/impactResults.ts +173 -0
- package/scaffold/mcp/src/impactSeed.ts +33 -0
- package/scaffold/mcp/src/impactTraversal.ts +83 -0
- package/scaffold/mcp/src/jsonl.ts +34 -0
- package/scaffold/mcp/src/loadGraph.ts +345 -86
- package/scaffold/mcp/src/paths.ts +24 -2
- package/scaffold/mcp/src/presets.ts +137 -0
- package/scaffold/mcp/src/relatedResponse.ts +30 -0
- package/scaffold/mcp/src/relatedTraversal.ts +101 -0
- package/scaffold/mcp/src/rules.ts +27 -0
- package/scaffold/mcp/src/search.ts +191 -355
- package/scaffold/mcp/src/searchCore.ts +274 -0
- package/scaffold/mcp/src/searchResults.ts +133 -0
- package/scaffold/mcp/src/server.ts +95 -3
- package/scaffold/mcp/src/types.ts +99 -3
- package/scaffold/scripts/context.sh +12 -46
- package/scaffold/scripts/dashboard.mjs +797 -0
- package/scaffold/scripts/dashboard.sh +13 -0
- package/scaffold/scripts/ingest.mjs +2219 -59
- package/scaffold/scripts/install-git-hooks.sh +3 -1
- package/scaffold/scripts/memory-compile.mjs +232 -0
- package/scaffold/scripts/memory-compile.sh +20 -0
- package/scaffold/scripts/memory-lint.mjs +375 -0
- package/scaffold/scripts/memory-lint.sh +20 -0
- package/scaffold/scripts/parsers/config.mjs +178 -0
- package/scaffold/scripts/parsers/cpp.mjs +316 -0
- package/scaffold/scripts/parsers/dotnet/VbNetParser/Program.cs +374 -0
- package/scaffold/scripts/parsers/dotnet/VbNetParser/VbNetParser.csproj +13 -0
- package/scaffold/scripts/parsers/javascript/ast.mjs +61 -0
- package/scaffold/scripts/parsers/javascript/calls.mjs +53 -0
- package/scaffold/scripts/parsers/javascript/chunks.mjs +388 -0
- package/scaffold/scripts/parsers/javascript/imports.mjs +162 -0
- package/scaffold/scripts/parsers/javascript/patterns.mjs +82 -0
- package/scaffold/scripts/parsers/javascript/scope-analysis.mjs +3 -0
- package/scaffold/scripts/parsers/javascript/scope-builder.mjs +305 -0
- package/scaffold/scripts/parsers/javascript/scope-resolver.mjs +82 -0
- package/scaffold/scripts/parsers/javascript.mjs +27 -350
- package/scaffold/scripts/parsers/resources.mjs +166 -0
- package/scaffold/scripts/parsers/sql.mjs +137 -0
- package/scaffold/scripts/parsers/vbnet.mjs +143 -0
- package/scaffold/scripts/status.sh +15 -8
- package/scaffold/scripts/capture-note.sh +0 -55
- package/scaffold/scripts/plan-state-engine.cjs +0 -310
- package/scaffold/scripts/plan-state.sh +0 -71
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
import type { ContextData, JsonObject, RelationRecord, SearchEntity } from "./types.js";
|
|
2
|
+
|
|
3
|
+
function isWindowChunkId(id: string): boolean {
|
|
4
|
+
return id.includes(":window:");
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
function baseChunkId(id: string): string {
|
|
8
|
+
const markerIndex = id.indexOf(":window:");
|
|
9
|
+
return markerIndex === -1 ? id : id.slice(0, markerIndex);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function groupRuleLinks(relations: RelationRecord[]): Map<string, string[]> {
|
|
13
|
+
const links = new Map<string, string[]>();
|
|
14
|
+
for (const relation of relations) {
|
|
15
|
+
if (relation.relation !== "CONSTRAINS" && relation.relation !== "IMPLEMENTS") {
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
if (relation.relation === "CONSTRAINS") {
|
|
20
|
+
const list = links.get(relation.to) ?? [];
|
|
21
|
+
list.push(relation.from);
|
|
22
|
+
links.set(relation.to, list);
|
|
23
|
+
} else {
|
|
24
|
+
const list = links.get(relation.from) ?? [];
|
|
25
|
+
list.push(relation.to);
|
|
26
|
+
links.set(relation.from, list);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return links;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function buildRelationSearchSignals(data: ContextData): Map<string, string> {
|
|
33
|
+
const labelsById = new Map<string, string>();
|
|
34
|
+
for (const document of data.documents) {
|
|
35
|
+
labelsById.set(document.id, document.path);
|
|
36
|
+
}
|
|
37
|
+
for (const chunk of data.chunks) {
|
|
38
|
+
labelsById.set(chunk.id, chunk.name || chunk.id);
|
|
39
|
+
}
|
|
40
|
+
for (const module of data.modules) {
|
|
41
|
+
labelsById.set(module.id, module.name || module.path);
|
|
42
|
+
}
|
|
43
|
+
for (const project of data.projects) {
|
|
44
|
+
labelsById.set(project.id, project.name || project.path);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const supportedRelations = new Set([
|
|
48
|
+
"CALLS_SQL",
|
|
49
|
+
"USES_CONFIG_KEY",
|
|
50
|
+
"USES_RESOURCE_KEY",
|
|
51
|
+
"USES_SETTING_KEY",
|
|
52
|
+
"USES_CONFIG",
|
|
53
|
+
"TRANSFORMS_CONFIG"
|
|
54
|
+
]);
|
|
55
|
+
const signalsByEntity = new Map<string, string[]>();
|
|
56
|
+
|
|
57
|
+
for (const relation of data.relations) {
|
|
58
|
+
if (!supportedRelations.has(relation.relation)) {
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
const sourceLabel = labelsById.get(relation.from) ?? relation.from;
|
|
62
|
+
const targetLabel = labelsById.get(relation.to) ?? relation.to;
|
|
63
|
+
const outgoingSignal = [relation.relation.toLowerCase(), relation.note, targetLabel].filter(Boolean).join(" ");
|
|
64
|
+
const incomingSignal = ["affected_by", relation.relation.toLowerCase(), relation.note, sourceLabel]
|
|
65
|
+
.filter(Boolean)
|
|
66
|
+
.join(" ");
|
|
67
|
+
const outgoing = signalsByEntity.get(relation.from) ?? [];
|
|
68
|
+
outgoing.push(outgoingSignal);
|
|
69
|
+
signalsByEntity.set(relation.from, outgoing);
|
|
70
|
+
const incoming = signalsByEntity.get(relation.to) ?? [];
|
|
71
|
+
incoming.push(incomingSignal);
|
|
72
|
+
signalsByEntity.set(relation.to, incoming);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return new Map([...signalsByEntity.entries()].map(([id, parts]) => [id, parts.join("\n")]));
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function buildSearchEntities(data: ContextData, includeContent: boolean): SearchEntity[] {
|
|
79
|
+
const entities: SearchEntity[] = [];
|
|
80
|
+
const fileRuleLinks = groupRuleLinks(data.relations);
|
|
81
|
+
const relationSignals = buildRelationSearchSignals(data);
|
|
82
|
+
const adrPathSet = new Set(
|
|
83
|
+
data.adrs
|
|
84
|
+
.map((adr) => adr.path.trim().toLowerCase())
|
|
85
|
+
.filter((adrPath) => adrPath.length > 0)
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
for (const document of data.documents) {
|
|
89
|
+
const normalizedPath = document.path.trim().toLowerCase();
|
|
90
|
+
if (document.kind === "ADR" && adrPathSet.has(normalizedPath)) {
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
entities.push({
|
|
95
|
+
id: document.id,
|
|
96
|
+
entity_type: "File",
|
|
97
|
+
kind: document.kind,
|
|
98
|
+
label: document.path,
|
|
99
|
+
path: document.path,
|
|
100
|
+
text: `${document.path}\n${document.excerpt}\n${document.content}\n${relationSignals.get(document.id) ?? ""}`,
|
|
101
|
+
status: document.status,
|
|
102
|
+
source_of_truth: document.source_of_truth,
|
|
103
|
+
trust_level: document.trust_level,
|
|
104
|
+
updated_at: document.updated_at,
|
|
105
|
+
snippet: document.excerpt,
|
|
106
|
+
matched_rules: fileRuleLinks.get(document.id) ?? [],
|
|
107
|
+
content: includeContent ? document.content : undefined
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
for (const rule of data.rules) {
|
|
112
|
+
entities.push({
|
|
113
|
+
id: rule.id,
|
|
114
|
+
entity_type: "Rule",
|
|
115
|
+
kind: "RULE",
|
|
116
|
+
label: rule.title || rule.id,
|
|
117
|
+
path: "",
|
|
118
|
+
text: `${rule.id}\n${rule.title}\n${rule.body}`,
|
|
119
|
+
status: rule.status,
|
|
120
|
+
source_of_truth: rule.source_of_truth,
|
|
121
|
+
trust_level: rule.trust_level,
|
|
122
|
+
updated_at: rule.updated_at,
|
|
123
|
+
snippet: rule.body.slice(0, 500),
|
|
124
|
+
matched_rules: [rule.id],
|
|
125
|
+
content: includeContent ? rule.body : undefined
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
for (const adr of data.adrs) {
|
|
130
|
+
entities.push({
|
|
131
|
+
id: adr.id,
|
|
132
|
+
entity_type: "ADR",
|
|
133
|
+
kind: "ADR",
|
|
134
|
+
label: adr.title || adr.id,
|
|
135
|
+
path: adr.path,
|
|
136
|
+
text: `${adr.path}\n${adr.title}\n${adr.body}`,
|
|
137
|
+
status: adr.status,
|
|
138
|
+
source_of_truth: adr.source_of_truth,
|
|
139
|
+
trust_level: adr.trust_level,
|
|
140
|
+
updated_at: adr.decision_date,
|
|
141
|
+
snippet: adr.body.slice(0, 500),
|
|
142
|
+
matched_rules: [],
|
|
143
|
+
content: includeContent ? adr.body : undefined
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const filePathById = new Map(
|
|
148
|
+
data.documents.filter((document) => document.kind === "CODE").map((document) => [document.id, document.path])
|
|
149
|
+
);
|
|
150
|
+
|
|
151
|
+
for (const chunk of data.chunks) {
|
|
152
|
+
const filePath = filePathById.get(chunk.file_id) ?? "";
|
|
153
|
+
entities.push({
|
|
154
|
+
id: chunk.id,
|
|
155
|
+
entity_type: "Chunk",
|
|
156
|
+
kind: chunk.kind || "chunk",
|
|
157
|
+
label: chunk.name || chunk.id,
|
|
158
|
+
path: filePath,
|
|
159
|
+
text: `${filePath}\n${chunk.name}\n${chunk.signature}\n${chunk.description}\n${chunk.body}\n${relationSignals.get(chunk.id) ?? ""}`,
|
|
160
|
+
status: chunk.status,
|
|
161
|
+
source_of_truth: chunk.source_of_truth,
|
|
162
|
+
trust_level: chunk.trust_level,
|
|
163
|
+
updated_at: chunk.updated_at,
|
|
164
|
+
snippet: chunk.description || chunk.body.slice(0, 500),
|
|
165
|
+
matched_rules: fileRuleLinks.get(chunk.file_id) ?? [],
|
|
166
|
+
content: includeContent ? chunk.body : undefined
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
for (const module of data.modules) {
|
|
171
|
+
entities.push({
|
|
172
|
+
id: module.id,
|
|
173
|
+
entity_type: "Module",
|
|
174
|
+
kind: "MODULE",
|
|
175
|
+
label: module.name,
|
|
176
|
+
path: module.path,
|
|
177
|
+
text: `${module.path}\n${module.name}\n${module.summary}\n${module.exported_symbols}`,
|
|
178
|
+
status: module.status,
|
|
179
|
+
source_of_truth: module.source_of_truth,
|
|
180
|
+
trust_level: module.trust_level,
|
|
181
|
+
updated_at: module.updated_at,
|
|
182
|
+
snippet: (module.summary || "").slice(0, 500),
|
|
183
|
+
matched_rules: [],
|
|
184
|
+
content: includeContent ? module.summary : undefined
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
for (const project of data.projects) {
|
|
189
|
+
entities.push({
|
|
190
|
+
id: project.id,
|
|
191
|
+
entity_type: "Project",
|
|
192
|
+
kind: project.kind.toUpperCase() || "PROJECT",
|
|
193
|
+
label: project.name || project.path,
|
|
194
|
+
path: project.path,
|
|
195
|
+
text: `${project.path}\n${project.name}\n${project.kind}\n${project.language}\n${project.target_framework}\n${project.summary}`,
|
|
196
|
+
status: project.status,
|
|
197
|
+
source_of_truth: project.source_of_truth,
|
|
198
|
+
trust_level: project.trust_level,
|
|
199
|
+
updated_at: project.updated_at,
|
|
200
|
+
snippet: (project.summary || "").slice(0, 500),
|
|
201
|
+
matched_rules: [],
|
|
202
|
+
content: includeContent ? project.summary : undefined
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return entities;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export function buildChunkPartOfRelations(data: ContextData): RelationRecord[] {
|
|
210
|
+
const relations: RelationRecord[] = [];
|
|
211
|
+
for (const chunk of data.chunks) {
|
|
212
|
+
if (isWindowChunkId(chunk.id)) {
|
|
213
|
+
relations.push({
|
|
214
|
+
from: chunk.id,
|
|
215
|
+
to: baseChunkId(chunk.id),
|
|
216
|
+
relation: "PART_OF",
|
|
217
|
+
note: "Overlap window belongs to base chunk"
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if (!chunk.file_id) {
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
relations.push({
|
|
226
|
+
from: chunk.id,
|
|
227
|
+
to: chunk.file_id,
|
|
228
|
+
relation: "PART_OF",
|
|
229
|
+
note: "Chunk belongs to file"
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
return relations;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
export function entityCatalog(data: ContextData): Map<string, JsonObject> {
|
|
236
|
+
const catalog = new Map<string, JsonObject>();
|
|
237
|
+
const fileById = new Map(data.documents.map((document) => [document.id, document]));
|
|
238
|
+
|
|
239
|
+
for (const file of data.documents) {
|
|
240
|
+
catalog.set(file.id, {
|
|
241
|
+
id: file.id,
|
|
242
|
+
type: "File",
|
|
243
|
+
label: file.path,
|
|
244
|
+
status: file.status,
|
|
245
|
+
source_of_truth: file.source_of_truth
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
for (const rule of data.rules) {
|
|
250
|
+
catalog.set(rule.id, {
|
|
251
|
+
id: rule.id,
|
|
252
|
+
type: "Rule",
|
|
253
|
+
label: rule.title,
|
|
254
|
+
status: rule.status,
|
|
255
|
+
source_of_truth: rule.source_of_truth
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
for (const adr of data.adrs) {
|
|
260
|
+
catalog.set(adr.id, {
|
|
261
|
+
id: adr.id,
|
|
262
|
+
type: "ADR",
|
|
263
|
+
label: adr.title || adr.id,
|
|
264
|
+
status: adr.status,
|
|
265
|
+
source_of_truth: adr.source_of_truth
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
for (const chunk of data.chunks) {
|
|
270
|
+
const filePath = fileById.get(chunk.file_id)?.path ?? "";
|
|
271
|
+
const chunkEntity: JsonObject = {
|
|
272
|
+
id: chunk.id,
|
|
273
|
+
type: "Chunk",
|
|
274
|
+
label: chunk.name || chunk.id,
|
|
275
|
+
status: chunk.status,
|
|
276
|
+
source_of_truth: chunk.source_of_truth
|
|
277
|
+
};
|
|
278
|
+
if (filePath) {
|
|
279
|
+
chunkEntity.path = filePath;
|
|
280
|
+
}
|
|
281
|
+
catalog.set(chunk.id, chunkEntity);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
for (const module of data.modules) {
|
|
285
|
+
catalog.set(module.id, {
|
|
286
|
+
id: module.id,
|
|
287
|
+
type: "Module",
|
|
288
|
+
label: module.name,
|
|
289
|
+
status: module.status,
|
|
290
|
+
source_of_truth: module.source_of_truth,
|
|
291
|
+
path: module.path
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
for (const project of data.projects) {
|
|
296
|
+
catalog.set(project.id, {
|
|
297
|
+
id: project.id,
|
|
298
|
+
type: "Project",
|
|
299
|
+
label: project.name || project.path,
|
|
300
|
+
status: project.status,
|
|
301
|
+
source_of_truth: project.source_of_truth,
|
|
302
|
+
path: project.path
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
return catalog;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
export function buildEntitySearchMap(data: ContextData): Map<string, SearchEntity> {
|
|
310
|
+
return new Map(buildSearchEntities(data, false).map((entity) => [entity.id, entity]));
|
|
311
|
+
}
|
|
@@ -3,6 +3,8 @@ import fs from "node:fs";
|
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import { fileURLToPath } from "node:url";
|
|
5
5
|
import { env, pipeline } from "@xenova/transformers";
|
|
6
|
+
import { readJsonl, asString, asNumber, asBoolean } from "./jsonl.js";
|
|
7
|
+
import type { JsonObject, JsonValue } from "./types.js";
|
|
6
8
|
|
|
7
9
|
const __filename = fileURLToPath(import.meta.url);
|
|
8
10
|
const __dirname = path.dirname(__filename);
|
|
@@ -16,9 +18,7 @@ const MODEL_CACHE_DIR = path.join(EMBEDDINGS_DIR, "models");
|
|
|
16
18
|
|
|
17
19
|
const DEFAULT_MODEL_ID = "Xenova/all-MiniLM-L6-v2";
|
|
18
20
|
const DEFAULT_MAX_TEXT_CHARS = 7000;
|
|
19
|
-
|
|
20
|
-
type JsonValue = string | number | boolean | null | JsonObject | JsonValue[];
|
|
21
|
-
type JsonObject = { [key: string]: JsonValue };
|
|
21
|
+
const CHUNK_BODY_PREVIEW_CHARS = 2000;
|
|
22
22
|
|
|
23
23
|
type FileEntity = {
|
|
24
24
|
id: string;
|
|
@@ -62,7 +62,51 @@ type AdrEntity = {
|
|
|
62
62
|
signature: string;
|
|
63
63
|
};
|
|
64
64
|
|
|
65
|
-
|
|
65
|
+
// Embedding-specific entity types — intentionally different from types.ts records
|
|
66
|
+
// because they carry `text` and `signature` fields used for embedding generation.
|
|
67
|
+
type ModuleEntity = {
|
|
68
|
+
id: string;
|
|
69
|
+
type: "Module";
|
|
70
|
+
kind: "MODULE";
|
|
71
|
+
label: string;
|
|
72
|
+
path: string;
|
|
73
|
+
status: string;
|
|
74
|
+
source_of_truth: boolean;
|
|
75
|
+
trust_level: number;
|
|
76
|
+
updated_at: string;
|
|
77
|
+
text: string;
|
|
78
|
+
signature: string;
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
type ProjectEntity = {
|
|
82
|
+
id: string;
|
|
83
|
+
type: "Project";
|
|
84
|
+
kind: string;
|
|
85
|
+
label: string;
|
|
86
|
+
path: string;
|
|
87
|
+
status: string;
|
|
88
|
+
source_of_truth: boolean;
|
|
89
|
+
trust_level: number;
|
|
90
|
+
updated_at: string;
|
|
91
|
+
text: string;
|
|
92
|
+
signature: string;
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
type ChunkEntity = {
|
|
96
|
+
id: string;
|
|
97
|
+
type: "Chunk";
|
|
98
|
+
kind: string;
|
|
99
|
+
label: string;
|
|
100
|
+
path: string;
|
|
101
|
+
status: string;
|
|
102
|
+
source_of_truth: boolean;
|
|
103
|
+
trust_level: number;
|
|
104
|
+
updated_at: string;
|
|
105
|
+
text: string;
|
|
106
|
+
signature: string;
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
type SearchEntity = FileEntity | RuleEntity | AdrEntity | ModuleEntity | ProjectEntity | ChunkEntity;
|
|
66
110
|
|
|
67
111
|
type EmbeddingRecord = {
|
|
68
112
|
id: string;
|
|
@@ -87,18 +131,6 @@ function parseArgs(argv: string[]): { mode: "full" | "changed" } {
|
|
|
87
131
|
};
|
|
88
132
|
}
|
|
89
133
|
|
|
90
|
-
function asString(value: JsonValue | undefined, fallback = ""): string {
|
|
91
|
-
return typeof value === "string" ? value : fallback;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
function asNumber(value: JsonValue | undefined, fallback = 0): number {
|
|
95
|
-
return typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
function asBoolean(value: JsonValue | undefined, fallback = false): boolean {
|
|
99
|
-
return typeof value === "boolean" ? value : fallback;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
134
|
function hashText(value: string): string {
|
|
103
135
|
return crypto.createHash("sha256").update(value).digest("hex");
|
|
104
136
|
}
|
|
@@ -111,26 +143,6 @@ function clampText(value: string, maxChars: number): string {
|
|
|
111
143
|
return value.slice(0, maxChars);
|
|
112
144
|
}
|
|
113
145
|
|
|
114
|
-
function readJsonl(filePath: string): JsonObject[] {
|
|
115
|
-
if (!fs.existsSync(filePath)) {
|
|
116
|
-
return [];
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
return fs
|
|
120
|
-
.readFileSync(filePath, "utf8")
|
|
121
|
-
.split(/\r?\n/)
|
|
122
|
-
.map((line) => line.trim())
|
|
123
|
-
.filter(Boolean)
|
|
124
|
-
.map((line) => {
|
|
125
|
-
try {
|
|
126
|
-
return JSON.parse(line) as JsonObject;
|
|
127
|
-
} catch {
|
|
128
|
-
return null;
|
|
129
|
-
}
|
|
130
|
-
})
|
|
131
|
-
.filter((value): value is JsonObject => value !== null);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
146
|
function writeJsonl(filePath: string, records: EmbeddingRecord[]): void {
|
|
135
147
|
const body = records.map((record) => JSON.stringify(record)).join("\n");
|
|
136
148
|
fs.writeFileSync(filePath, body ? `${body}\n` : "", "utf8");
|
|
@@ -243,6 +255,110 @@ function parseAdrEntities(raw: JsonObject[], maxChars: number): AdrEntity[] {
|
|
|
243
255
|
.filter((value): value is AdrEntity => value !== null);
|
|
244
256
|
}
|
|
245
257
|
|
|
258
|
+
function parseModuleEntities(raw: JsonObject[], maxChars: number): ModuleEntity[] {
|
|
259
|
+
return raw
|
|
260
|
+
.map((item) => {
|
|
261
|
+
const id = asString(item.id);
|
|
262
|
+
if (!id) {
|
|
263
|
+
return null;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
const modulePath = asString(item.path);
|
|
267
|
+
const name = asString(item.name);
|
|
268
|
+
const summary = asString(item.summary);
|
|
269
|
+
const exportedSymbols = asString(item.exported_symbols);
|
|
270
|
+
const updatedAt = asString(item.updated_at);
|
|
271
|
+
const text = clampText(`${modulePath}\n${name}\n${summary}\n${exportedSymbols}`, maxChars);
|
|
272
|
+
|
|
273
|
+
return {
|
|
274
|
+
id,
|
|
275
|
+
type: "Module" as const,
|
|
276
|
+
kind: "MODULE" as const,
|
|
277
|
+
label: name || modulePath,
|
|
278
|
+
path: modulePath,
|
|
279
|
+
status: asString(item.status, "active"),
|
|
280
|
+
source_of_truth: asBoolean(item.source_of_truth, false),
|
|
281
|
+
trust_level: asNumber(item.trust_level, 75),
|
|
282
|
+
updated_at: updatedAt,
|
|
283
|
+
text,
|
|
284
|
+
signature: hashText(`module|${id}|${updatedAt}|${hashText(text)}`)
|
|
285
|
+
};
|
|
286
|
+
})
|
|
287
|
+
.filter((value): value is ModuleEntity => value !== null);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
function parseChunkEntities(raw: JsonObject[], filePathById: Map<string, string>, maxChars: number): ChunkEntity[] {
|
|
291
|
+
return raw
|
|
292
|
+
.map((item) => {
|
|
293
|
+
const id = asString(item.id);
|
|
294
|
+
if (!id) {
|
|
295
|
+
return null;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
const fileId = asString(item.file_id);
|
|
299
|
+
const filePath = filePathById.get(fileId) ?? "";
|
|
300
|
+
const name = asString(item.name);
|
|
301
|
+
const sig = asString(item.signature);
|
|
302
|
+
const description = asString(item.description);
|
|
303
|
+
const body = asString(item.body);
|
|
304
|
+
const updatedAt = asString(item.updated_at);
|
|
305
|
+
const checksum = asString(item.checksum, hashText(body));
|
|
306
|
+
const text = clampText(`${filePath}\n${name}\n${sig}\n${description}\n${body.slice(0, CHUNK_BODY_PREVIEW_CHARS)}`, maxChars);
|
|
307
|
+
|
|
308
|
+
return {
|
|
309
|
+
id,
|
|
310
|
+
type: "Chunk" as const,
|
|
311
|
+
kind: asString(item.kind, "chunk"),
|
|
312
|
+
label: name || id,
|
|
313
|
+
path: filePath,
|
|
314
|
+
status: asString(item.status, "active"),
|
|
315
|
+
source_of_truth: asBoolean(item.source_of_truth, false),
|
|
316
|
+
trust_level: asNumber(item.trust_level, 60),
|
|
317
|
+
updated_at: updatedAt,
|
|
318
|
+
text,
|
|
319
|
+
signature: hashText(`chunk|${checksum}|${updatedAt}|${hashText(text)}`)
|
|
320
|
+
};
|
|
321
|
+
})
|
|
322
|
+
.filter((value): value is ChunkEntity => value !== null);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
function parseProjectEntities(raw: JsonObject[], maxChars: number): ProjectEntity[] {
|
|
326
|
+
return raw
|
|
327
|
+
.map((item) => {
|
|
328
|
+
const id = asString(item.id);
|
|
329
|
+
if (!id) {
|
|
330
|
+
return null;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
const projectPath = asString(item.path);
|
|
334
|
+
const name = asString(item.name);
|
|
335
|
+
const kind = asString(item.kind, "project");
|
|
336
|
+
const language = asString(item.language, "dotnet");
|
|
337
|
+
const targetFramework = asString(item.target_framework);
|
|
338
|
+
const summary = asString(item.summary);
|
|
339
|
+
const updatedAt = asString(item.updated_at);
|
|
340
|
+
const text = clampText(
|
|
341
|
+
`${projectPath}\n${name}\n${kind}\n${language}\n${targetFramework}\n${summary}`,
|
|
342
|
+
maxChars
|
|
343
|
+
);
|
|
344
|
+
|
|
345
|
+
return {
|
|
346
|
+
id,
|
|
347
|
+
type: "Project" as const,
|
|
348
|
+
kind: kind.toUpperCase() || "PROJECT",
|
|
349
|
+
label: name || projectPath,
|
|
350
|
+
path: projectPath,
|
|
351
|
+
status: asString(item.status, "active"),
|
|
352
|
+
source_of_truth: asBoolean(item.source_of_truth, false),
|
|
353
|
+
trust_level: asNumber(item.trust_level, 80),
|
|
354
|
+
updated_at: updatedAt,
|
|
355
|
+
text,
|
|
356
|
+
signature: hashText(`project|${id}|${updatedAt}|${hashText(text)}`)
|
|
357
|
+
};
|
|
358
|
+
})
|
|
359
|
+
.filter((value): value is ProjectEntity => value !== null);
|
|
360
|
+
}
|
|
361
|
+
|
|
246
362
|
function parseExistingEmbeddings(raw: JsonObject[], modelId: string): Map<string, EmbeddingRecord> {
|
|
247
363
|
const index = new Map<string, EmbeddingRecord>();
|
|
248
364
|
|
|
@@ -312,7 +428,17 @@ async function main(): Promise<void> {
|
|
|
312
428
|
const documents = parseFileEntities(readJsonl(path.join(CACHE_DIR, "documents.jsonl")), maxTextChars);
|
|
313
429
|
const rules = parseRuleEntities(readJsonl(path.join(CACHE_DIR, "entities.rule.jsonl")), maxTextChars);
|
|
314
430
|
const adrs = parseAdrEntities(readJsonl(path.join(CACHE_DIR, "entities.adr.jsonl")), maxTextChars);
|
|
315
|
-
const
|
|
431
|
+
const modules = parseModuleEntities(readJsonl(path.join(CACHE_DIR, "entities.module.jsonl")), maxTextChars);
|
|
432
|
+
const projects = parseProjectEntities(readJsonl(path.join(CACHE_DIR, "entities.project.jsonl")), maxTextChars);
|
|
433
|
+
|
|
434
|
+
// Build file path lookup for chunk embedding text (reuse already-parsed documents)
|
|
435
|
+
const filePathById = new Map<string, string>();
|
|
436
|
+
for (const doc of documents) {
|
|
437
|
+
filePathById.set(doc.id, doc.path);
|
|
438
|
+
}
|
|
439
|
+
const chunks = parseChunkEntities(readJsonl(path.join(CACHE_DIR, "entities.chunk.jsonl")), filePathById, maxTextChars);
|
|
440
|
+
|
|
441
|
+
const entities: SearchEntity[] = [...documents, ...rules, ...adrs, ...modules, ...projects, ...chunks].sort((a, b) => a.id.localeCompare(b.id));
|
|
316
442
|
|
|
317
443
|
const existing = parseExistingEmbeddings(readJsonl(EMBEDDINGS_PATH), modelId);
|
|
318
444
|
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
export function parseFrontmatter(markdown: string): { fields: Map<string, string>; body: string } {
|
|
2
|
+
const lines = markdown.split(/\r?\n/);
|
|
3
|
+
if (lines[0]?.trim() !== "---") {
|
|
4
|
+
return { fields: new Map(), body: markdown };
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
const fields = new Map<string, string>();
|
|
8
|
+
let index = 1;
|
|
9
|
+
for (; index < lines.length; index += 1) {
|
|
10
|
+
const line = lines[index];
|
|
11
|
+
if (line.trim() === "---") {
|
|
12
|
+
index += 1;
|
|
13
|
+
break;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const match = line.match(/^([A-Za-z0-9_-]+):\s*(.*)$/);
|
|
17
|
+
if (!match) {
|
|
18
|
+
continue;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
fields.set(match[1].toLowerCase(), match[2].trim());
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
fields,
|
|
26
|
+
body: lines.slice(index).join("\n").trim()
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function parseStringList(value: string | undefined): string[] {
|
|
31
|
+
if (!value || !value.trim()) {
|
|
32
|
+
return [];
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return value
|
|
36
|
+
.split(",")
|
|
37
|
+
.map((item) => item.trim())
|
|
38
|
+
.filter(Boolean);
|
|
39
|
+
}
|