@danielblomma/cortex-mcp 0.4.2 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -0
- package/package.json +3 -1
- package/scaffold/mcp/package-lock.json +9 -13
- package/scaffold/mcp/src/graph.ts +103 -5
- package/scaffold/mcp/src/paths.ts +8 -2
- package/scaffold/mcp/src/search.ts +120 -15
- package/scaffold/mcp/src/types.ts +19 -2
- package/scaffold/scripts/status.sh +15 -1
package/README.md
CHANGED
|
@@ -30,6 +30,29 @@ Cortex can extract function-level chunks and build call graphs in experimental b
|
|
|
30
30
|
|
|
31
31
|
These APIs are experimental and may not be exposed in every installation.
|
|
32
32
|
|
|
33
|
+
## Chunking Strategy (Code)
|
|
34
|
+
|
|
35
|
+
When semantic chunking is enabled, large function/method chunks are split into overlap windows during ingest.
|
|
36
|
+
|
|
37
|
+
Defaults:
|
|
38
|
+
|
|
39
|
+
- `CORTEX_CHUNK_WINDOW_LINES=80`
|
|
40
|
+
- `CORTEX_CHUNK_OVERLAP_LINES=16`
|
|
41
|
+
- `CORTEX_CHUNK_SPLIT_MIN_LINES=120`
|
|
42
|
+
- `CORTEX_CHUNK_MAX_WINDOWS=8`
|
|
43
|
+
|
|
44
|
+
Behavior:
|
|
45
|
+
|
|
46
|
+
- Chunks are split only when the chunk body exceeds the split threshold.
|
|
47
|
+
- Windows slide forward using configured overlap (`next_start = previous_end - overlap`).
|
|
48
|
+
- The last allowed window always stretches to the end of the chunk body.
|
|
49
|
+
- Window chunks inherit metadata (`status`, `source_of_truth`) from their parent chunk.
|
|
50
|
+
- Window chunks inherit parent graph edges for `CALLS` and `IMPORTS` to keep traversal/ranking consistent.
|
|
51
|
+
|
|
52
|
+
Verification:
|
|
53
|
+
|
|
54
|
+
- Overlap and windowing regressions are covered in `tests/context-regressions.test.mjs`.
|
|
55
|
+
|
|
33
56
|
## Requirements
|
|
34
57
|
|
|
35
58
|
- Node.js 18+
|
|
@@ -63,6 +86,14 @@ Disable watcher setup:
|
|
|
63
86
|
cortex init --bootstrap --no-watch
|
|
64
87
|
```
|
|
65
88
|
|
|
89
|
+
Check semantic search readiness:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
cortex status
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Look for `semantic_search=embedding+lexical (ready)` to confirm full semantic mode.
|
|
96
|
+
|
|
66
97
|
## Verify MCP Connection
|
|
67
98
|
|
|
68
99
|
Claude:
|
|
@@ -189,6 +220,27 @@ cortex todo [text|list|done <id>|reopen <id>|remove <id>]
|
|
|
189
220
|
cortex help
|
|
190
221
|
```
|
|
191
222
|
|
|
223
|
+
## Automated Release
|
|
224
|
+
|
|
225
|
+
This repository includes two GitHub Actions workflows:
|
|
226
|
+
|
|
227
|
+
- `Release Bump` (`.github/workflows/release-bump.yml`)
|
|
228
|
+
- Manual `workflow_dispatch` from `main`
|
|
229
|
+
- Bumps semver (`patch`/`minor`/`major`)
|
|
230
|
+
- Syncs release metadata files (`package.json`, `server.json`, plugin manifests)
|
|
231
|
+
- Runs tests
|
|
232
|
+
- Commits and tags `vX.Y.Z`
|
|
233
|
+
|
|
234
|
+
- `Release Publish` (`.github/workflows/release-publish.yml`)
|
|
235
|
+
- Triggers on tag push `v*.*.*`
|
|
236
|
+
- Verifies tag/version sync
|
|
237
|
+
- Runs root tests + MCP build/tests
|
|
238
|
+
- Publishes `@danielblomma/cortex-mcp` to npm
|
|
239
|
+
|
|
240
|
+
Required GitHub secret:
|
|
241
|
+
|
|
242
|
+
- `NPM_TOKEN` (npm automation token with publish rights for `@danielblomma/cortex-mcp`)
|
|
243
|
+
|
|
192
244
|
## Limitations
|
|
193
245
|
|
|
194
246
|
- Requires repo initialization (`cortex init --bootstrap`).
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@danielblomma/cortex-mcp",
|
|
3
3
|
"mcpName": "io.github.DanielBlomma/cortex",
|
|
4
|
-
"version": "0.4.
|
|
4
|
+
"version": "0.4.5",
|
|
5
5
|
"description": "Local, repo-scoped context platform for coding assistants. Semantic search, graph relationships, and architectural rule context.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"author": "Daniel Blomma",
|
|
@@ -35,6 +35,8 @@
|
|
|
35
35
|
],
|
|
36
36
|
"scripts": {
|
|
37
37
|
"test": "node tests/plan-state.test.mjs",
|
|
38
|
+
"release:sync-version": "node scripts/sync-release-version.mjs",
|
|
39
|
+
"release:check-version-sync": "node scripts/sync-release-version.mjs --check",
|
|
38
40
|
"prepublishOnly": "echo 'Ready to publish to npm'"
|
|
39
41
|
},
|
|
40
42
|
"engines": {
|
|
@@ -33,9 +33,9 @@
|
|
|
33
33
|
}
|
|
34
34
|
},
|
|
35
35
|
"node_modules/@hono/node-server": {
|
|
36
|
-
"version": "1.19.
|
|
37
|
-
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.
|
|
38
|
-
"integrity": "sha512-
|
|
36
|
+
"version": "1.19.10",
|
|
37
|
+
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.10.tgz",
|
|
38
|
+
"integrity": "sha512-hZ7nOssGqRgyV3FVVQdfi+U4q02uB23bpnYpdvNXkYTRRyWx84b7yf1ans+dnJ/7h41sGL3CeQTfO+ZGxuO+Iw==",
|
|
39
39
|
"license": "MIT",
|
|
40
40
|
"engines": {
|
|
41
41
|
"node": ">=18.14.1"
|
|
@@ -236,7 +236,6 @@
|
|
|
236
236
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.13.tgz",
|
|
237
237
|
"integrity": "sha512-akNQMv0wW5uyRpD2v2IEyRSZiR+BeGuoB6L310EgGObO44HSMNT8z1xzio28V8qOrgYaopIDNA18YgdXd+qTiw==",
|
|
238
238
|
"license": "MIT",
|
|
239
|
-
"peer": true,
|
|
240
239
|
"dependencies": {
|
|
241
240
|
"undici-types": "~6.21.0"
|
|
242
241
|
}
|
|
@@ -1285,11 +1284,10 @@
|
|
|
1285
1284
|
}
|
|
1286
1285
|
},
|
|
1287
1286
|
"node_modules/hono": {
|
|
1288
|
-
"version": "4.12.
|
|
1289
|
-
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.
|
|
1290
|
-
"integrity": "sha512-
|
|
1287
|
+
"version": "4.12.5",
|
|
1288
|
+
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.5.tgz",
|
|
1289
|
+
"integrity": "sha512-3qq+FUBtlTHhtYxbxheZgY8NIFnkkC/MR8u5TTsr7YZ3wixryQ3cCwn3iZbg8p8B88iDBBAYSfZDS75t8MN7Vg==",
|
|
1291
1290
|
"license": "MIT",
|
|
1292
|
-
"peer": true,
|
|
1293
1291
|
"engines": {
|
|
1294
1292
|
"node": ">=16.9.0"
|
|
1295
1293
|
}
|
|
@@ -2277,9 +2275,9 @@
|
|
|
2277
2275
|
}
|
|
2278
2276
|
},
|
|
2279
2277
|
"node_modules/tar": {
|
|
2280
|
-
"version": "7.5.
|
|
2281
|
-
"resolved": "https://registry.npmjs.org/tar/-/tar-7.5.
|
|
2282
|
-
"integrity": "sha512-
|
|
2278
|
+
"version": "7.5.10",
|
|
2279
|
+
"resolved": "https://registry.npmjs.org/tar/-/tar-7.5.10.tgz",
|
|
2280
|
+
"integrity": "sha512-8mOPs1//5q/rlkNSPcCegA6hiHJYDmSLEI8aMH/CdSQJNWztHC9WHNam5zdQlfpTwB9Xp7IBEsHfV5LKMJGVAw==",
|
|
2283
2281
|
"license": "BlueOak-1.0.0",
|
|
2284
2282
|
"dependencies": {
|
|
2285
2283
|
"@isaacs/fs-minipass": "^4.0.0",
|
|
@@ -2446,7 +2444,6 @@
|
|
|
2446
2444
|
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
|
2447
2445
|
"dev": true,
|
|
2448
2446
|
"license": "Apache-2.0",
|
|
2449
|
-
"peer": true,
|
|
2450
2447
|
"bin": {
|
|
2451
2448
|
"tsc": "bin/tsc",
|
|
2452
2449
|
"tsserver": "bin/tsserver"
|
|
@@ -2605,7 +2602,6 @@
|
|
|
2605
2602
|
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
|
|
2606
2603
|
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
|
|
2607
2604
|
"license": "MIT",
|
|
2608
|
-
"peer": true,
|
|
2609
2605
|
"funding": {
|
|
2610
2606
|
"url": "https://github.com/sponsors/colinhacks"
|
|
2611
2607
|
}
|
|
@@ -4,6 +4,7 @@ import ryugraph, { type Connection, type Database, type QueryResult } from "ryug
|
|
|
4
4
|
import { DB_PATH, DEFAULT_RANKING, PATHS } from "./paths.js";
|
|
5
5
|
import type {
|
|
6
6
|
AdrRecord,
|
|
7
|
+
ChunkRecord,
|
|
7
8
|
ContextData,
|
|
8
9
|
DocumentRecord,
|
|
9
10
|
JsonObject,
|
|
@@ -148,6 +149,33 @@ function parseAdrs(raw: JsonObject[]): AdrRecord[] {
|
|
|
148
149
|
.filter((item): item is AdrRecord => item !== null);
|
|
149
150
|
}
|
|
150
151
|
|
|
152
|
+
function parseChunkEntities(raw: JsonObject[]): ChunkRecord[] {
|
|
153
|
+
return raw
|
|
154
|
+
.map((item) => {
|
|
155
|
+
const id = asString(item.id);
|
|
156
|
+
if (!id) {
|
|
157
|
+
return null;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
id,
|
|
162
|
+
file_id: asString(item.file_id),
|
|
163
|
+
name: asString(item.name),
|
|
164
|
+
kind: asString(item.kind, "chunk"),
|
|
165
|
+
signature: asString(item.signature),
|
|
166
|
+
body: asString(item.body),
|
|
167
|
+
start_line: asNumber(item.start_line),
|
|
168
|
+
end_line: asNumber(item.end_line),
|
|
169
|
+
language: asString(item.language),
|
|
170
|
+
updated_at: asString(item.updated_at),
|
|
171
|
+
source_of_truth: asBoolean(item.source_of_truth, false),
|
|
172
|
+
trust_level: asNumber(item.trust_level, 60),
|
|
173
|
+
status: asString(item.status, "active")
|
|
174
|
+
};
|
|
175
|
+
})
|
|
176
|
+
.filter((item): item is ChunkRecord => item !== null);
|
|
177
|
+
}
|
|
178
|
+
|
|
151
179
|
function parseRuleEntities(raw: JsonObject[]): RuleRecord[] {
|
|
152
180
|
return raw
|
|
153
181
|
.map((item) => {
|
|
@@ -243,7 +271,11 @@ function parseRulesYaml(yamlText: string | null): RuleRecord[] {
|
|
|
243
271
|
return rules;
|
|
244
272
|
}
|
|
245
273
|
|
|
246
|
-
function parseRelations(
|
|
274
|
+
function parseRelations(
|
|
275
|
+
raw: JsonObject[],
|
|
276
|
+
relation: RelationRecord["relation"],
|
|
277
|
+
noteFields: string[] = ["note", "reason"]
|
|
278
|
+
): RelationRecord[] {
|
|
247
279
|
return raw
|
|
248
280
|
.map((item) => {
|
|
249
281
|
const from = asString(item.from);
|
|
@@ -252,11 +284,20 @@ function parseRelations(raw: JsonObject[], relation: RelationRecord["relation"])
|
|
|
252
284
|
return null;
|
|
253
285
|
}
|
|
254
286
|
|
|
287
|
+
let note = "";
|
|
288
|
+
for (const fieldName of noteFields) {
|
|
289
|
+
const candidate = asString(item[fieldName]);
|
|
290
|
+
if (candidate) {
|
|
291
|
+
note = candidate;
|
|
292
|
+
break;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
255
296
|
return {
|
|
256
297
|
from,
|
|
257
298
|
to,
|
|
258
299
|
relation,
|
|
259
|
-
note
|
|
300
|
+
note
|
|
260
301
|
};
|
|
261
302
|
})
|
|
262
303
|
.filter((item): item is RelationRecord => item !== null);
|
|
@@ -483,6 +524,33 @@ function parseRyuGraphAdrs(rows: UnknownRow[]): AdrRecord[] {
|
|
|
483
524
|
.filter((value): value is AdrRecord => value !== null);
|
|
484
525
|
}
|
|
485
526
|
|
|
527
|
+
function parseRyuGraphChunks(rows: UnknownRow[]): ChunkRecord[] {
|
|
528
|
+
return rows
|
|
529
|
+
.map((row) => {
|
|
530
|
+
const id = asStringUnknown(row.id);
|
|
531
|
+
if (!id) {
|
|
532
|
+
return null;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
return {
|
|
536
|
+
id,
|
|
537
|
+
file_id: asStringUnknown(row.file_id),
|
|
538
|
+
name: asStringUnknown(row.name),
|
|
539
|
+
kind: asStringUnknown(row.kind, "chunk"),
|
|
540
|
+
signature: asStringUnknown(row.signature),
|
|
541
|
+
body: asStringUnknown(row.body),
|
|
542
|
+
start_line: asNumberUnknown(row.start_line),
|
|
543
|
+
end_line: asNumberUnknown(row.end_line),
|
|
544
|
+
language: asStringUnknown(row.language),
|
|
545
|
+
updated_at: asStringUnknown(row.updated_at),
|
|
546
|
+
source_of_truth: asBooleanUnknown(row.source_of_truth, false),
|
|
547
|
+
trust_level: asNumberUnknown(row.trust_level, 60),
|
|
548
|
+
status: asStringUnknown(row.status, "active")
|
|
549
|
+
};
|
|
550
|
+
})
|
|
551
|
+
.filter((value): value is ChunkRecord => value !== null);
|
|
552
|
+
}
|
|
553
|
+
|
|
486
554
|
function parseRyuGraphRelations(
|
|
487
555
|
rows: UnknownRow[],
|
|
488
556
|
relation: RelationRecord["relation"],
|
|
@@ -509,10 +577,16 @@ export async function loadContextData(): Promise<ContextData> {
|
|
|
509
577
|
const ranking = parseRankingFromConfig(readFileIfExists(PATHS.config));
|
|
510
578
|
const cachedDocuments = parseDocuments(readJsonl(PATHS.documents));
|
|
511
579
|
const cachedAdrs = parseAdrs(readJsonl(PATHS.adrEntities));
|
|
580
|
+
const cachedChunks = parseChunkEntities(readJsonl(PATHS.chunkEntities));
|
|
581
|
+
const cachedChunkRelations = [
|
|
582
|
+
...parseRelations(readJsonl(PATHS.callsRelations), "CALLS", ["call_type"]),
|
|
583
|
+
...parseRelations(readJsonl(PATHS.importsRelations), "IMPORTS", ["import_name"])
|
|
584
|
+
];
|
|
512
585
|
const cachedRelations = [
|
|
513
586
|
...parseRelations(readJsonl(PATHS.constrainsRelations), "CONSTRAINS"),
|
|
514
587
|
...parseRelations(readJsonl(PATHS.implementsRelations), "IMPLEMENTS"),
|
|
515
|
-
...parseRelations(readJsonl(PATHS.supersedesRelations), "SUPERSEDES")
|
|
588
|
+
...parseRelations(readJsonl(PATHS.supersedesRelations), "SUPERSEDES"),
|
|
589
|
+
...cachedChunkRelations
|
|
516
590
|
];
|
|
517
591
|
|
|
518
592
|
const yamlRules = parseRulesYaml(readFileIfExists(PATHS.rulesYaml));
|
|
@@ -525,6 +599,7 @@ export async function loadContextData(): Promise<ContextData> {
|
|
|
525
599
|
documents: cachedDocuments,
|
|
526
600
|
adrs: cachedAdrs,
|
|
527
601
|
rules: cachedRules,
|
|
602
|
+
chunks: cachedChunks,
|
|
528
603
|
relations: cachedRelations,
|
|
529
604
|
ranking,
|
|
530
605
|
source: "cache",
|
|
@@ -533,7 +608,7 @@ export async function loadContextData(): Promise<ContextData> {
|
|
|
533
608
|
}
|
|
534
609
|
|
|
535
610
|
try {
|
|
536
|
-
const [fileRows, ruleRows, adrRows, constrainsRows, implementsRows, supersedesRows] =
|
|
611
|
+
const [fileRows, ruleRows, adrRows, chunkRows, constrainsRows, implementsRows, supersedesRows] =
|
|
537
612
|
await Promise.all([
|
|
538
613
|
queryRows(
|
|
539
614
|
connection,
|
|
@@ -582,6 +657,26 @@ export async function loadContextData(): Promise<ContextData> {
|
|
|
582
657
|
a.status AS status;
|
|
583
658
|
`
|
|
584
659
|
),
|
|
660
|
+
queryRows(
|
|
661
|
+
connection,
|
|
662
|
+
`
|
|
663
|
+
MATCH (c:Chunk)
|
|
664
|
+
RETURN
|
|
665
|
+
c.id AS id,
|
|
666
|
+
c.file_id AS file_id,
|
|
667
|
+
c.name AS name,
|
|
668
|
+
c.kind AS kind,
|
|
669
|
+
c.signature AS signature,
|
|
670
|
+
c.body AS body,
|
|
671
|
+
c.start_line AS start_line,
|
|
672
|
+
c.end_line AS end_line,
|
|
673
|
+
c.language AS language,
|
|
674
|
+
c.updated_at AS updated_at,
|
|
675
|
+
c.source_of_truth AS source_of_truth,
|
|
676
|
+
c.trust_level AS trust_level,
|
|
677
|
+
c.status AS status;
|
|
678
|
+
`
|
|
679
|
+
),
|
|
585
680
|
queryRows(
|
|
586
681
|
connection,
|
|
587
682
|
`
|
|
@@ -610,6 +705,7 @@ export async function loadContextData(): Promise<ContextData> {
|
|
|
610
705
|
const ryuDocuments = parseRyuGraphDocuments(fileRows, contentById);
|
|
611
706
|
const ryuRules = parseRyuGraphRules(ruleRows);
|
|
612
707
|
const ryuAdrs = parseRyuGraphAdrs(adrRows);
|
|
708
|
+
const ryuChunks = parseRyuGraphChunks(chunkRows);
|
|
613
709
|
const ryuRelations = [
|
|
614
710
|
...parseRyuGraphRelations(constrainsRows, "CONSTRAINS", "note"),
|
|
615
711
|
...parseRyuGraphRelations(implementsRows, "IMPLEMENTS", "note"),
|
|
@@ -620,7 +716,8 @@ export async function loadContextData(): Promise<ContextData> {
|
|
|
620
716
|
documents: ryuDocuments.length > 0 ? ryuDocuments : cachedDocuments,
|
|
621
717
|
adrs: ryuAdrs.length > 0 ? ryuAdrs : cachedAdrs,
|
|
622
718
|
rules: ryuRules.length > 0 ? ryuRules : cachedRules,
|
|
623
|
-
|
|
719
|
+
chunks: ryuChunks.length > 0 ? ryuChunks : cachedChunks,
|
|
720
|
+
relations: ryuRelations.length > 0 ? [...ryuRelations, ...cachedChunkRelations] : cachedRelations,
|
|
624
721
|
ranking,
|
|
625
722
|
source: "ryu"
|
|
626
723
|
};
|
|
@@ -634,6 +731,7 @@ export async function loadContextData(): Promise<ContextData> {
|
|
|
634
731
|
documents: cachedDocuments,
|
|
635
732
|
adrs: cachedAdrs,
|
|
636
733
|
rules: cachedRules,
|
|
734
|
+
chunks: cachedChunks,
|
|
637
735
|
relations: cachedRelations,
|
|
638
736
|
ranking,
|
|
639
737
|
source: "cache",
|
|
@@ -5,7 +5,10 @@ import type { RankingWeights } from "./types.js";
|
|
|
5
5
|
const __filename = fileURLToPath(import.meta.url);
|
|
6
6
|
const __dirname = path.dirname(__filename);
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
const PROJECT_ROOT_OVERRIDE = process.env.CORTEX_PROJECT_ROOT?.trim();
|
|
9
|
+
export const REPO_ROOT = PROJECT_ROOT_OVERRIDE
|
|
10
|
+
? path.resolve(PROJECT_ROOT_OVERRIDE)
|
|
11
|
+
: path.resolve(__dirname, "../..");
|
|
9
12
|
export const CONTEXT_DIR = path.join(REPO_ROOT, ".context");
|
|
10
13
|
export const CACHE_DIR = path.join(CONTEXT_DIR, "cache");
|
|
11
14
|
export const DB_PATH = path.join(CONTEXT_DIR, "db", "graph.ryu");
|
|
@@ -20,9 +23,12 @@ export const PATHS = {
|
|
|
20
23
|
documents: path.join(CACHE_DIR, "documents.jsonl"),
|
|
21
24
|
adrEntities: path.join(CACHE_DIR, "entities.adr.jsonl"),
|
|
22
25
|
ruleEntities: path.join(CACHE_DIR, "entities.rule.jsonl"),
|
|
26
|
+
chunkEntities: path.join(CACHE_DIR, "entities.chunk.jsonl"),
|
|
23
27
|
constrainsRelations: path.join(CACHE_DIR, "relations.constrains.jsonl"),
|
|
24
28
|
implementsRelations: path.join(CACHE_DIR, "relations.implements.jsonl"),
|
|
25
|
-
supersedesRelations: path.join(CACHE_DIR, "relations.supersedes.jsonl")
|
|
29
|
+
supersedesRelations: path.join(CACHE_DIR, "relations.supersedes.jsonl"),
|
|
30
|
+
callsRelations: path.join(CACHE_DIR, "relations.calls.jsonl"),
|
|
31
|
+
importsRelations: path.join(CACHE_DIR, "relations.imports.jsonl")
|
|
26
32
|
};
|
|
27
33
|
|
|
28
34
|
export const DEFAULT_RANKING: RankingWeights = {
|
|
@@ -11,14 +11,45 @@ import type {
|
|
|
11
11
|
ToolPayload
|
|
12
12
|
} from "./types.js";
|
|
13
13
|
|
|
14
|
+
const MIN_LEXICAL_RELEVANCE = 0.05;
|
|
15
|
+
const MIN_VECTOR_RELEVANCE = 0.2;
|
|
16
|
+
|
|
17
|
+
const QUERY_TOKEN_EXPANSIONS: Record<string, string[]> = {
|
|
18
|
+
semantisk: ["semantic"],
|
|
19
|
+
sökning: ["search"],
|
|
20
|
+
sokning: ["search"],
|
|
21
|
+
regel: ["rule"],
|
|
22
|
+
regler: ["rules"],
|
|
23
|
+
relaterad: ["related"],
|
|
24
|
+
meddelande: ["message"],
|
|
25
|
+
avvikelse: ["deviation"]
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
function normalizeText(value: string): string {
|
|
29
|
+
return value.normalize("NFKC").toLowerCase();
|
|
30
|
+
}
|
|
31
|
+
|
|
14
32
|
function tokenize(value: string): string[] {
|
|
15
|
-
return value
|
|
16
|
-
.
|
|
17
|
-
.split(/[^a-z0-9]+/g)
|
|
33
|
+
return normalizeText(value)
|
|
34
|
+
.split(/[^\p{L}\p{N}]+/gu)
|
|
18
35
|
.map((part) => part.trim())
|
|
19
36
|
.filter((part) => part.length >= 2);
|
|
20
37
|
}
|
|
21
38
|
|
|
39
|
+
function expandQueryTokens(tokens: string[]): string[] {
|
|
40
|
+
const expanded = new Set<string>(tokens);
|
|
41
|
+
for (const token of tokens) {
|
|
42
|
+
const aliases = QUERY_TOKEN_EXPANSIONS[token];
|
|
43
|
+
if (!aliases) {
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
for (const alias of aliases) {
|
|
47
|
+
expanded.add(alias);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return Array.from(expanded);
|
|
51
|
+
}
|
|
52
|
+
|
|
22
53
|
function daysSince(isoDate: string): number {
|
|
23
54
|
const timestamp = Date.parse(isoDate);
|
|
24
55
|
if (Number.isNaN(timestamp)) {
|
|
@@ -34,22 +65,30 @@ function recencyScore(isoDate: string): number {
|
|
|
34
65
|
return 1 / (1 + days / 30);
|
|
35
66
|
}
|
|
36
67
|
|
|
37
|
-
function semanticScore(
|
|
38
|
-
const queryTokens = tokenize(query);
|
|
68
|
+
function semanticScore(queryTokens: string[], queryPhrase: string, text: string): number {
|
|
39
69
|
if (queryTokens.length === 0) {
|
|
40
70
|
return 0;
|
|
41
71
|
}
|
|
42
72
|
|
|
43
|
-
const
|
|
73
|
+
const textTokenSet = new Set(tokenize(text));
|
|
74
|
+
if (textTokenSet.size === 0) {
|
|
75
|
+
return 0;
|
|
76
|
+
}
|
|
77
|
+
|
|
44
78
|
let matched = 0;
|
|
45
79
|
for (const token of queryTokens) {
|
|
46
|
-
if (
|
|
80
|
+
if (textTokenSet.has(token)) {
|
|
47
81
|
matched += 1;
|
|
48
82
|
}
|
|
49
83
|
}
|
|
50
84
|
|
|
51
85
|
const overlap = matched / queryTokens.length;
|
|
52
|
-
|
|
86
|
+
if (overlap <= 0) {
|
|
87
|
+
return 0;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const normalizedText = normalizeText(text);
|
|
91
|
+
const phraseBonus = queryPhrase && normalizedText.includes(queryPhrase) ? 0.15 : 0;
|
|
53
92
|
return Math.min(1, overlap * 0.85 + phraseBonus);
|
|
54
93
|
}
|
|
55
94
|
|
|
@@ -97,7 +136,7 @@ function groupRuleLinks(relations: RelationRecord[]): Map<string, string[]> {
|
|
|
97
136
|
|
|
98
137
|
function buildSearchEntities(data: ContextData, includeContent: boolean): SearchEntity[] {
|
|
99
138
|
const entities: SearchEntity[] = [];
|
|
100
|
-
const
|
|
139
|
+
const fileRuleLinks = groupRuleLinks(data.relations);
|
|
101
140
|
const adrPathSet = new Set(
|
|
102
141
|
data.adrs
|
|
103
142
|
.map((adr) => adr.path.trim().toLowerCase())
|
|
@@ -123,7 +162,7 @@ function buildSearchEntities(data: ContextData, includeContent: boolean): Search
|
|
|
123
162
|
trust_level: document.trust_level,
|
|
124
163
|
updated_at: document.updated_at,
|
|
125
164
|
snippet: document.excerpt,
|
|
126
|
-
matched_rules:
|
|
165
|
+
matched_rules: fileRuleLinks.get(document.id) ?? [],
|
|
127
166
|
content: includeContent ? document.content : undefined
|
|
128
167
|
});
|
|
129
168
|
}
|
|
@@ -164,6 +203,31 @@ function buildSearchEntities(data: ContextData, includeContent: boolean): Search
|
|
|
164
203
|
});
|
|
165
204
|
}
|
|
166
205
|
|
|
206
|
+
const filePathById = new Map(
|
|
207
|
+
data.documents
|
|
208
|
+
.filter((document) => document.kind === "CODE")
|
|
209
|
+
.map((document) => [document.id, document.path])
|
|
210
|
+
);
|
|
211
|
+
|
|
212
|
+
for (const chunk of data.chunks) {
|
|
213
|
+
const filePath = filePathById.get(chunk.file_id) ?? "";
|
|
214
|
+
entities.push({
|
|
215
|
+
id: chunk.id,
|
|
216
|
+
entity_type: "Chunk",
|
|
217
|
+
kind: chunk.kind || "chunk",
|
|
218
|
+
label: chunk.name || chunk.id,
|
|
219
|
+
path: filePath,
|
|
220
|
+
text: `${filePath}\n${chunk.name}\n${chunk.signature}\n${chunk.body}`,
|
|
221
|
+
status: chunk.status,
|
|
222
|
+
source_of_truth: chunk.source_of_truth,
|
|
223
|
+
trust_level: chunk.trust_level,
|
|
224
|
+
updated_at: chunk.updated_at,
|
|
225
|
+
snippet: chunk.body.slice(0, 500),
|
|
226
|
+
matched_rules: fileRuleLinks.get(chunk.file_id) ?? [],
|
|
227
|
+
content: includeContent ? chunk.body : undefined
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
|
|
167
231
|
return entities;
|
|
168
232
|
}
|
|
169
233
|
|
|
@@ -178,8 +242,26 @@ function relationDegree(relations: RelationRecord[]): Map<string, number> {
|
|
|
178
242
|
return degrees;
|
|
179
243
|
}
|
|
180
244
|
|
|
245
|
+
function buildChunkPartOfRelations(data: ContextData): RelationRecord[] {
|
|
246
|
+
const relations: RelationRecord[] = [];
|
|
247
|
+
for (const chunk of data.chunks) {
|
|
248
|
+
if (!chunk.file_id) {
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
relations.push({
|
|
253
|
+
from: chunk.id,
|
|
254
|
+
to: chunk.file_id,
|
|
255
|
+
relation: "PART_OF",
|
|
256
|
+
note: "Chunk belongs to file"
|
|
257
|
+
});
|
|
258
|
+
}
|
|
259
|
+
return relations;
|
|
260
|
+
}
|
|
261
|
+
|
|
181
262
|
function entityCatalog(data: ContextData): Map<string, JsonObject> {
|
|
182
263
|
const catalog = new Map<string, JsonObject>();
|
|
264
|
+
const fileById = new Map(data.documents.map((document) => [document.id, document]));
|
|
183
265
|
|
|
184
266
|
for (const file of data.documents) {
|
|
185
267
|
catalog.set(file.id, {
|
|
@@ -211,12 +293,29 @@ function entityCatalog(data: ContextData): Map<string, JsonObject> {
|
|
|
211
293
|
});
|
|
212
294
|
}
|
|
213
295
|
|
|
296
|
+
for (const chunk of data.chunks) {
|
|
297
|
+
const filePath = fileById.get(chunk.file_id)?.path ?? "";
|
|
298
|
+
const chunkEntity: JsonObject = {
|
|
299
|
+
id: chunk.id,
|
|
300
|
+
type: "Chunk",
|
|
301
|
+
label: chunk.name || chunk.id,
|
|
302
|
+
status: chunk.status,
|
|
303
|
+
source_of_truth: chunk.source_of_truth
|
|
304
|
+
};
|
|
305
|
+
if (filePath) {
|
|
306
|
+
chunkEntity.path = filePath;
|
|
307
|
+
}
|
|
308
|
+
catalog.set(chunk.id, chunkEntity);
|
|
309
|
+
}
|
|
310
|
+
|
|
214
311
|
return catalog;
|
|
215
312
|
}
|
|
216
313
|
|
|
217
314
|
export async function runContextSearch(parsed: SearchParams): Promise<ToolPayload> {
|
|
218
315
|
const data = await loadContextData();
|
|
219
316
|
const degreeByEntity = relationDegree(data.relations);
|
|
317
|
+
const queryTokens = expandQueryTokens(Array.from(new Set(tokenize(parsed.query))));
|
|
318
|
+
const queryPhrase = normalizeText(parsed.query).trim();
|
|
220
319
|
const candidates = buildSearchEntities(data, parsed.include_content).filter(
|
|
221
320
|
(entity) => parsed.include_deprecated || entity.status.toLowerCase() !== "deprecated"
|
|
222
321
|
);
|
|
@@ -228,12 +327,17 @@ export async function runContextSearch(parsed: SearchParams): Promise<ToolPayloa
|
|
|
228
327
|
|
|
229
328
|
const results = candidates
|
|
230
329
|
.map((entity) => {
|
|
231
|
-
const lexicalSemantic = semanticScore(
|
|
330
|
+
const lexicalSemantic = semanticScore(queryTokens, queryPhrase, entity.text);
|
|
232
331
|
const entityVector = embeddings.vectors.get(entity.id);
|
|
233
332
|
const vectorSemantic =
|
|
234
333
|
queryVector && entityVector
|
|
235
|
-
? Math.max(0, Math.min(1,
|
|
334
|
+
? Math.max(0, Math.min(1, cosineSimilarity(queryVector, entityVector)))
|
|
236
335
|
: 0;
|
|
336
|
+
const hasRelevanceSignal =
|
|
337
|
+
lexicalSemantic >= MIN_LEXICAL_RELEVANCE || vectorSemantic >= MIN_VECTOR_RELEVANCE;
|
|
338
|
+
if (!hasRelevanceSignal) {
|
|
339
|
+
return null;
|
|
340
|
+
}
|
|
237
341
|
const semantic =
|
|
238
342
|
vectorSemantic > 0 ? vectorSemantic * 0.75 + lexicalSemantic * 0.25 : lexicalSemantic;
|
|
239
343
|
const graphScore = Math.min(1, (degreeByEntity.get(entity.id) ?? 0) / 4);
|
|
@@ -247,7 +351,7 @@ export async function runContextSearch(parsed: SearchParams): Promise<ToolPayloa
|
|
|
247
351
|
score += data.ranking.recency * dateScore;
|
|
248
352
|
|
|
249
353
|
if (entity.source_of_truth) {
|
|
250
|
-
score += 0.1;
|
|
354
|
+
score += 0.1 * semantic;
|
|
251
355
|
}
|
|
252
356
|
|
|
253
357
|
return {
|
|
@@ -269,7 +373,7 @@ export async function runContextSearch(parsed: SearchParams): Promise<ToolPayloa
|
|
|
269
373
|
content: parsed.include_content ? entity.content : undefined
|
|
270
374
|
};
|
|
271
375
|
})
|
|
272
|
-
.filter((result) => result
|
|
376
|
+
.filter((result): result is NonNullable<typeof result> => result !== null)
|
|
273
377
|
.sort((a, b) => b.score - a.score)
|
|
274
378
|
.slice(0, parsed.top_k);
|
|
275
379
|
|
|
@@ -291,6 +395,7 @@ export async function runContextSearch(parsed: SearchParams): Promise<ToolPayloa
|
|
|
291
395
|
export async function runContextRelated(parsed: RelatedParams): Promise<ToolPayload> {
|
|
292
396
|
const data = await loadContextData();
|
|
293
397
|
const catalog = entityCatalog(data);
|
|
398
|
+
const relations = [...data.relations, ...buildChunkPartOfRelations(data)];
|
|
294
399
|
|
|
295
400
|
if (!catalog.has(parsed.entity_id)) {
|
|
296
401
|
return {
|
|
@@ -306,7 +411,7 @@ export async function runContextRelated(parsed: RelatedParams): Promise<ToolPayl
|
|
|
306
411
|
const outgoing = new Map<string, RelationRecord[]>();
|
|
307
412
|
const incoming = new Map<string, RelationRecord[]>();
|
|
308
413
|
|
|
309
|
-
for (const relation of
|
|
414
|
+
for (const relation of relations) {
|
|
310
415
|
const outList = outgoing.get(relation.from) ?? [];
|
|
311
416
|
outList.push(relation);
|
|
312
417
|
outgoing.set(relation.from, outList);
|
|
@@ -41,10 +41,26 @@ export type AdrRecord = {
|
|
|
41
41
|
export type RelationRecord = {
|
|
42
42
|
from: string;
|
|
43
43
|
to: string;
|
|
44
|
-
relation: "CONSTRAINS" | "IMPLEMENTS" | "SUPERSEDES";
|
|
44
|
+
relation: "CONSTRAINS" | "IMPLEMENTS" | "SUPERSEDES" | "CALLS" | "IMPORTS" | "PART_OF";
|
|
45
45
|
note: string;
|
|
46
46
|
};
|
|
47
47
|
|
|
48
|
+
export type ChunkRecord = {
|
|
49
|
+
id: string;
|
|
50
|
+
file_id: string;
|
|
51
|
+
name: string;
|
|
52
|
+
kind: string;
|
|
53
|
+
signature: string;
|
|
54
|
+
body: string;
|
|
55
|
+
start_line: number;
|
|
56
|
+
end_line: number;
|
|
57
|
+
language: string;
|
|
58
|
+
updated_at: string;
|
|
59
|
+
source_of_truth: boolean;
|
|
60
|
+
trust_level: number;
|
|
61
|
+
status: string;
|
|
62
|
+
};
|
|
63
|
+
|
|
48
64
|
export type RankingWeights = {
|
|
49
65
|
semantic: number;
|
|
50
66
|
graph: number;
|
|
@@ -56,6 +72,7 @@ export type ContextData = {
|
|
|
56
72
|
documents: DocumentRecord[];
|
|
57
73
|
adrs: AdrRecord[];
|
|
58
74
|
rules: RuleRecord[];
|
|
75
|
+
chunks: ChunkRecord[];
|
|
59
76
|
relations: RelationRecord[];
|
|
60
77
|
ranking: RankingWeights;
|
|
61
78
|
source: "cache" | "ryu";
|
|
@@ -64,7 +81,7 @@ export type ContextData = {
|
|
|
64
81
|
|
|
65
82
|
export type SearchEntity = {
|
|
66
83
|
id: string;
|
|
67
|
-
entity_type: "File" | "Rule" | "ADR";
|
|
84
|
+
entity_type: "File" | "Rule" | "ADR" | "Chunk";
|
|
68
85
|
kind: string;
|
|
69
86
|
label: string;
|
|
70
87
|
path: string;
|
|
@@ -49,12 +49,26 @@ const fs = require("node:fs");
|
|
|
49
49
|
const embedManifestPath = process.argv[1];
|
|
50
50
|
const data = JSON.parse(fs.readFileSync(embedManifestPath, "utf8"));
|
|
51
51
|
const c = data.counts || {};
|
|
52
|
+
const entities = Number(c.entities ?? 0);
|
|
53
|
+
const output = Number(c.output ?? 0);
|
|
54
|
+
const embedded = Number(c.embedded ?? 0);
|
|
55
|
+
const failed = Number(c.failed ?? 0);
|
|
52
56
|
console.log(`[status] embeddings generated_at=${data.generated_at}`);
|
|
53
57
|
console.log(`[status] embeddings model=${data.model} dim=${data.dimensions ?? 0}`);
|
|
54
|
-
console.log(`[status] embeddings entities=${
|
|
58
|
+
console.log(`[status] embeddings entities=${entities} output=${output} embedded=${embedded} reused=${c.reused ?? 0} failed=${failed}`);
|
|
59
|
+
if (embedded > 0 && output > 0 && failed === 0) {
|
|
60
|
+
console.log("[status] semantic_search=embedding+lexical (ready)");
|
|
61
|
+
} else if (embedded > 0 && output > 0) {
|
|
62
|
+
console.log(`[status] semantic_search=embedding+lexical-partial (failed=${failed})`);
|
|
63
|
+
} else if (entities > 0) {
|
|
64
|
+
console.log("[status] semantic_search=lexical-only (run: ./scripts/context.sh embed)");
|
|
65
|
+
} else {
|
|
66
|
+
console.log("[status] semantic_search=lexical-only (no indexed entities)");
|
|
67
|
+
}
|
|
55
68
|
' "$EMBED_MANIFEST"
|
|
56
69
|
else
|
|
57
70
|
echo "[status] embeddings manifest missing (run: ./scripts/context.sh embed)"
|
|
71
|
+
echo "[status] semantic_search=lexical-only (embeddings manifest missing)"
|
|
58
72
|
fi
|
|
59
73
|
|
|
60
74
|
node -e '
|