@danielblomma/cortex-mcp 0.4.2 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -30,6 +30,29 @@ Cortex can extract function-level chunks and build call graphs in experimental b
30
30
 
31
31
  These APIs are experimental and may not be exposed in every installation.
32
32
 
33
+ ## Chunking Strategy (Code)
34
+
35
+ When semantic chunking is enabled, large function/method chunks are split into overlap windows during ingest.
36
+
37
+ Defaults:
38
+
39
+ - `CORTEX_CHUNK_WINDOW_LINES=80`
40
+ - `CORTEX_CHUNK_OVERLAP_LINES=16`
41
+ - `CORTEX_CHUNK_SPLIT_MIN_LINES=120`
42
+ - `CORTEX_CHUNK_MAX_WINDOWS=8`
43
+
44
+ Behavior:
45
+
46
+ - Chunks are split only when the chunk body exceeds the split threshold.
47
+ - Windows slide forward using configured overlap (`next_start = previous_end - overlap`).
48
+ - The last allowed window always stretches to the end of the chunk body.
49
+ - Window chunks inherit metadata (`status`, `source_of_truth`) from their parent chunk.
50
+ - Window chunks inherit parent graph edges for `CALLS` and `IMPORTS` to keep traversal/ranking consistent.
51
+
52
+ Verification:
53
+
54
+ - Overlap and windowing regressions are covered in `tests/context-regressions.test.mjs`.
55
+
33
56
  ## Requirements
34
57
 
35
58
  - Node.js 18+
@@ -63,6 +86,14 @@ Disable watcher setup:
63
86
  cortex init --bootstrap --no-watch
64
87
  ```
65
88
 
89
+ Check semantic search readiness:
90
+
91
+ ```bash
92
+ cortex status
93
+ ```
94
+
95
+ Look for `semantic_search=embedding+lexical (ready)` to confirm full semantic mode.
96
+
66
97
  ## Verify MCP Connection
67
98
 
68
99
  Claude:
@@ -189,6 +220,27 @@ cortex todo [text|list|done <id>|reopen <id>|remove <id>]
189
220
  cortex help
190
221
  ```
191
222
 
223
+ ## Automated Release
224
+
225
+ This repository includes two GitHub Actions workflows:
226
+
227
+ - `Release Bump` (`.github/workflows/release-bump.yml`)
228
+ - Manual `workflow_dispatch` from `main`
229
+ - Bumps semver (`patch`/`minor`/`major`)
230
+ - Syncs release metadata files (`package.json`, `server.json`, plugin manifests)
231
+ - Runs tests
232
+ - Commits and tags `vX.Y.Z`
233
+
234
+ - `Release Publish` (`.github/workflows/release-publish.yml`)
235
+ - Triggers on tag push `v*.*.*`
236
+ - Verifies tag/version sync
237
+ - Runs root tests + MCP build/tests
238
+ - Publishes `@danielblomma/cortex-mcp` to npm
239
+
240
+ Required GitHub secret:
241
+
242
+ - `NPM_TOKEN` (npm automation token with publish rights for `@danielblomma/cortex-mcp`)
243
+
192
244
  ## Limitations
193
245
 
194
246
  - Requires repo initialization (`cortex init --bootstrap`).
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@danielblomma/cortex-mcp",
3
3
  "mcpName": "io.github.DanielBlomma/cortex",
4
- "version": "0.4.2",
4
+ "version": "0.4.5",
5
5
  "description": "Local, repo-scoped context platform for coding assistants. Semantic search, graph relationships, and architectural rule context.",
6
6
  "type": "module",
7
7
  "author": "Daniel Blomma",
@@ -35,6 +35,8 @@
35
35
  ],
36
36
  "scripts": {
37
37
  "test": "node tests/plan-state.test.mjs",
38
+ "release:sync-version": "node scripts/sync-release-version.mjs",
39
+ "release:check-version-sync": "node scripts/sync-release-version.mjs --check",
38
40
  "prepublishOnly": "echo 'Ready to publish to npm'"
39
41
  },
40
42
  "engines": {
@@ -33,9 +33,9 @@
33
33
  }
34
34
  },
35
35
  "node_modules/@hono/node-server": {
36
- "version": "1.19.9",
37
- "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.9.tgz",
38
- "integrity": "sha512-vHL6w3ecZsky+8P5MD+eFfaGTyCeOHUIFYMGpQGbrBTSmNNoxv0if69rEZ5giu36weC5saFuznL411gRX7bJDw==",
36
+ "version": "1.19.10",
37
+ "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.10.tgz",
38
+ "integrity": "sha512-hZ7nOssGqRgyV3FVVQdfi+U4q02uB23bpnYpdvNXkYTRRyWx84b7yf1ans+dnJ/7h41sGL3CeQTfO+ZGxuO+Iw==",
39
39
  "license": "MIT",
40
40
  "engines": {
41
41
  "node": ">=18.14.1"
@@ -236,7 +236,6 @@
236
236
  "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.13.tgz",
237
237
  "integrity": "sha512-akNQMv0wW5uyRpD2v2IEyRSZiR+BeGuoB6L310EgGObO44HSMNT8z1xzio28V8qOrgYaopIDNA18YgdXd+qTiw==",
238
238
  "license": "MIT",
239
- "peer": true,
240
239
  "dependencies": {
241
240
  "undici-types": "~6.21.0"
242
241
  }
@@ -1285,11 +1284,10 @@
1285
1284
  }
1286
1285
  },
1287
1286
  "node_modules/hono": {
1288
- "version": "4.12.3",
1289
- "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.3.tgz",
1290
- "integrity": "sha512-SFsVSjp8sj5UumXOOFlkZOG6XS9SJDKw0TbwFeV+AJ8xlST8kxK5Z/5EYa111UY8732lK2S/xB653ceuaoGwpg==",
1287
+ "version": "4.12.5",
1288
+ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.5.tgz",
1289
+ "integrity": "sha512-3qq+FUBtlTHhtYxbxheZgY8NIFnkkC/MR8u5TTsr7YZ3wixryQ3cCwn3iZbg8p8B88iDBBAYSfZDS75t8MN7Vg==",
1291
1290
  "license": "MIT",
1292
- "peer": true,
1293
1291
  "engines": {
1294
1292
  "node": ">=16.9.0"
1295
1293
  }
@@ -2277,9 +2275,9 @@
2277
2275
  }
2278
2276
  },
2279
2277
  "node_modules/tar": {
2280
- "version": "7.5.9",
2281
- "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.9.tgz",
2282
- "integrity": "sha512-BTLcK0xsDh2+PUe9F6c2TlRp4zOOBMTkoQHQIWSIzI0R7KG46uEwq4OPk2W7bZcprBMsuaeFsqwYr7pjh6CuHg==",
2278
+ "version": "7.5.10",
2279
+ "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.10.tgz",
2280
+ "integrity": "sha512-8mOPs1//5q/rlkNSPcCegA6hiHJYDmSLEI8aMH/CdSQJNWztHC9WHNam5zdQlfpTwB9Xp7IBEsHfV5LKMJGVAw==",
2283
2281
  "license": "BlueOak-1.0.0",
2284
2282
  "dependencies": {
2285
2283
  "@isaacs/fs-minipass": "^4.0.0",
@@ -2446,7 +2444,6 @@
2446
2444
  "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
2447
2445
  "dev": true,
2448
2446
  "license": "Apache-2.0",
2449
- "peer": true,
2450
2447
  "bin": {
2451
2448
  "tsc": "bin/tsc",
2452
2449
  "tsserver": "bin/tsserver"
@@ -2605,7 +2602,6 @@
2605
2602
  "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
2606
2603
  "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
2607
2604
  "license": "MIT",
2608
- "peer": true,
2609
2605
  "funding": {
2610
2606
  "url": "https://github.com/sponsors/colinhacks"
2611
2607
  }
@@ -4,6 +4,7 @@ import ryugraph, { type Connection, type Database, type QueryResult } from "ryug
4
4
  import { DB_PATH, DEFAULT_RANKING, PATHS } from "./paths.js";
5
5
  import type {
6
6
  AdrRecord,
7
+ ChunkRecord,
7
8
  ContextData,
8
9
  DocumentRecord,
9
10
  JsonObject,
@@ -148,6 +149,33 @@ function parseAdrs(raw: JsonObject[]): AdrRecord[] {
148
149
  .filter((item): item is AdrRecord => item !== null);
149
150
  }
150
151
 
152
+ function parseChunkEntities(raw: JsonObject[]): ChunkRecord[] {
153
+ return raw
154
+ .map((item) => {
155
+ const id = asString(item.id);
156
+ if (!id) {
157
+ return null;
158
+ }
159
+
160
+ return {
161
+ id,
162
+ file_id: asString(item.file_id),
163
+ name: asString(item.name),
164
+ kind: asString(item.kind, "chunk"),
165
+ signature: asString(item.signature),
166
+ body: asString(item.body),
167
+ start_line: asNumber(item.start_line),
168
+ end_line: asNumber(item.end_line),
169
+ language: asString(item.language),
170
+ updated_at: asString(item.updated_at),
171
+ source_of_truth: asBoolean(item.source_of_truth, false),
172
+ trust_level: asNumber(item.trust_level, 60),
173
+ status: asString(item.status, "active")
174
+ };
175
+ })
176
+ .filter((item): item is ChunkRecord => item !== null);
177
+ }
178
+
151
179
  function parseRuleEntities(raw: JsonObject[]): RuleRecord[] {
152
180
  return raw
153
181
  .map((item) => {
@@ -243,7 +271,11 @@ function parseRulesYaml(yamlText: string | null): RuleRecord[] {
243
271
  return rules;
244
272
  }
245
273
 
246
- function parseRelations(raw: JsonObject[], relation: RelationRecord["relation"]): RelationRecord[] {
274
+ function parseRelations(
275
+ raw: JsonObject[],
276
+ relation: RelationRecord["relation"],
277
+ noteFields: string[] = ["note", "reason"]
278
+ ): RelationRecord[] {
247
279
  return raw
248
280
  .map((item) => {
249
281
  const from = asString(item.from);
@@ -252,11 +284,20 @@ function parseRelations(raw: JsonObject[], relation: RelationRecord["relation"])
252
284
  return null;
253
285
  }
254
286
 
287
+ let note = "";
288
+ for (const fieldName of noteFields) {
289
+ const candidate = asString(item[fieldName]);
290
+ if (candidate) {
291
+ note = candidate;
292
+ break;
293
+ }
294
+ }
295
+
255
296
  return {
256
297
  from,
257
298
  to,
258
299
  relation,
259
- note: asString(item.note) || asString(item.reason)
300
+ note
260
301
  };
261
302
  })
262
303
  .filter((item): item is RelationRecord => item !== null);
@@ -483,6 +524,33 @@ function parseRyuGraphAdrs(rows: UnknownRow[]): AdrRecord[] {
483
524
  .filter((value): value is AdrRecord => value !== null);
484
525
  }
485
526
 
527
+ function parseRyuGraphChunks(rows: UnknownRow[]): ChunkRecord[] {
528
+ return rows
529
+ .map((row) => {
530
+ const id = asStringUnknown(row.id);
531
+ if (!id) {
532
+ return null;
533
+ }
534
+
535
+ return {
536
+ id,
537
+ file_id: asStringUnknown(row.file_id),
538
+ name: asStringUnknown(row.name),
539
+ kind: asStringUnknown(row.kind, "chunk"),
540
+ signature: asStringUnknown(row.signature),
541
+ body: asStringUnknown(row.body),
542
+ start_line: asNumberUnknown(row.start_line),
543
+ end_line: asNumberUnknown(row.end_line),
544
+ language: asStringUnknown(row.language),
545
+ updated_at: asStringUnknown(row.updated_at),
546
+ source_of_truth: asBooleanUnknown(row.source_of_truth, false),
547
+ trust_level: asNumberUnknown(row.trust_level, 60),
548
+ status: asStringUnknown(row.status, "active")
549
+ };
550
+ })
551
+ .filter((value): value is ChunkRecord => value !== null);
552
+ }
553
+
486
554
  function parseRyuGraphRelations(
487
555
  rows: UnknownRow[],
488
556
  relation: RelationRecord["relation"],
@@ -509,10 +577,16 @@ export async function loadContextData(): Promise<ContextData> {
509
577
  const ranking = parseRankingFromConfig(readFileIfExists(PATHS.config));
510
578
  const cachedDocuments = parseDocuments(readJsonl(PATHS.documents));
511
579
  const cachedAdrs = parseAdrs(readJsonl(PATHS.adrEntities));
580
+ const cachedChunks = parseChunkEntities(readJsonl(PATHS.chunkEntities));
581
+ const cachedChunkRelations = [
582
+ ...parseRelations(readJsonl(PATHS.callsRelations), "CALLS", ["call_type"]),
583
+ ...parseRelations(readJsonl(PATHS.importsRelations), "IMPORTS", ["import_name"])
584
+ ];
512
585
  const cachedRelations = [
513
586
  ...parseRelations(readJsonl(PATHS.constrainsRelations), "CONSTRAINS"),
514
587
  ...parseRelations(readJsonl(PATHS.implementsRelations), "IMPLEMENTS"),
515
- ...parseRelations(readJsonl(PATHS.supersedesRelations), "SUPERSEDES")
588
+ ...parseRelations(readJsonl(PATHS.supersedesRelations), "SUPERSEDES"),
589
+ ...cachedChunkRelations
516
590
  ];
517
591
 
518
592
  const yamlRules = parseRulesYaml(readFileIfExists(PATHS.rulesYaml));
@@ -525,6 +599,7 @@ export async function loadContextData(): Promise<ContextData> {
525
599
  documents: cachedDocuments,
526
600
  adrs: cachedAdrs,
527
601
  rules: cachedRules,
602
+ chunks: cachedChunks,
528
603
  relations: cachedRelations,
529
604
  ranking,
530
605
  source: "cache",
@@ -533,7 +608,7 @@ export async function loadContextData(): Promise<ContextData> {
533
608
  }
534
609
 
535
610
  try {
536
- const [fileRows, ruleRows, adrRows, constrainsRows, implementsRows, supersedesRows] =
611
+ const [fileRows, ruleRows, adrRows, chunkRows, constrainsRows, implementsRows, supersedesRows] =
537
612
  await Promise.all([
538
613
  queryRows(
539
614
  connection,
@@ -582,6 +657,26 @@ export async function loadContextData(): Promise<ContextData> {
582
657
  a.status AS status;
583
658
  `
584
659
  ),
660
+ queryRows(
661
+ connection,
662
+ `
663
+ MATCH (c:Chunk)
664
+ RETURN
665
+ c.id AS id,
666
+ c.file_id AS file_id,
667
+ c.name AS name,
668
+ c.kind AS kind,
669
+ c.signature AS signature,
670
+ c.body AS body,
671
+ c.start_line AS start_line,
672
+ c.end_line AS end_line,
673
+ c.language AS language,
674
+ c.updated_at AS updated_at,
675
+ c.source_of_truth AS source_of_truth,
676
+ c.trust_level AS trust_level,
677
+ c.status AS status;
678
+ `
679
+ ),
585
680
  queryRows(
586
681
  connection,
587
682
  `
@@ -610,6 +705,7 @@ export async function loadContextData(): Promise<ContextData> {
610
705
  const ryuDocuments = parseRyuGraphDocuments(fileRows, contentById);
611
706
  const ryuRules = parseRyuGraphRules(ruleRows);
612
707
  const ryuAdrs = parseRyuGraphAdrs(adrRows);
708
+ const ryuChunks = parseRyuGraphChunks(chunkRows);
613
709
  const ryuRelations = [
614
710
  ...parseRyuGraphRelations(constrainsRows, "CONSTRAINS", "note"),
615
711
  ...parseRyuGraphRelations(implementsRows, "IMPLEMENTS", "note"),
@@ -620,7 +716,8 @@ export async function loadContextData(): Promise<ContextData> {
620
716
  documents: ryuDocuments.length > 0 ? ryuDocuments : cachedDocuments,
621
717
  adrs: ryuAdrs.length > 0 ? ryuAdrs : cachedAdrs,
622
718
  rules: ryuRules.length > 0 ? ryuRules : cachedRules,
623
- relations: ryuRelations.length > 0 ? ryuRelations : cachedRelations,
719
+ chunks: ryuChunks.length > 0 ? ryuChunks : cachedChunks,
720
+ relations: ryuRelations.length > 0 ? [...ryuRelations, ...cachedChunkRelations] : cachedRelations,
624
721
  ranking,
625
722
  source: "ryu"
626
723
  };
@@ -634,6 +731,7 @@ export async function loadContextData(): Promise<ContextData> {
634
731
  documents: cachedDocuments,
635
732
  adrs: cachedAdrs,
636
733
  rules: cachedRules,
734
+ chunks: cachedChunks,
637
735
  relations: cachedRelations,
638
736
  ranking,
639
737
  source: "cache",
@@ -5,7 +5,10 @@ import type { RankingWeights } from "./types.js";
5
5
  const __filename = fileURLToPath(import.meta.url);
6
6
  const __dirname = path.dirname(__filename);
7
7
 
8
- export const REPO_ROOT = path.resolve(__dirname, "../..");
8
+ const PROJECT_ROOT_OVERRIDE = process.env.CORTEX_PROJECT_ROOT?.trim();
9
+ export const REPO_ROOT = PROJECT_ROOT_OVERRIDE
10
+ ? path.resolve(PROJECT_ROOT_OVERRIDE)
11
+ : path.resolve(__dirname, "../..");
9
12
  export const CONTEXT_DIR = path.join(REPO_ROOT, ".context");
10
13
  export const CACHE_DIR = path.join(CONTEXT_DIR, "cache");
11
14
  export const DB_PATH = path.join(CONTEXT_DIR, "db", "graph.ryu");
@@ -20,9 +23,12 @@ export const PATHS = {
20
23
  documents: path.join(CACHE_DIR, "documents.jsonl"),
21
24
  adrEntities: path.join(CACHE_DIR, "entities.adr.jsonl"),
22
25
  ruleEntities: path.join(CACHE_DIR, "entities.rule.jsonl"),
26
+ chunkEntities: path.join(CACHE_DIR, "entities.chunk.jsonl"),
23
27
  constrainsRelations: path.join(CACHE_DIR, "relations.constrains.jsonl"),
24
28
  implementsRelations: path.join(CACHE_DIR, "relations.implements.jsonl"),
25
- supersedesRelations: path.join(CACHE_DIR, "relations.supersedes.jsonl")
29
+ supersedesRelations: path.join(CACHE_DIR, "relations.supersedes.jsonl"),
30
+ callsRelations: path.join(CACHE_DIR, "relations.calls.jsonl"),
31
+ importsRelations: path.join(CACHE_DIR, "relations.imports.jsonl")
26
32
  };
27
33
 
28
34
  export const DEFAULT_RANKING: RankingWeights = {
@@ -11,14 +11,45 @@ import type {
11
11
  ToolPayload
12
12
  } from "./types.js";
13
13
 
14
+ const MIN_LEXICAL_RELEVANCE = 0.05;
15
+ const MIN_VECTOR_RELEVANCE = 0.2;
16
+
17
+ const QUERY_TOKEN_EXPANSIONS: Record<string, string[]> = {
18
+ semantisk: ["semantic"],
19
+ sökning: ["search"],
20
+ sokning: ["search"],
21
+ regel: ["rule"],
22
+ regler: ["rules"],
23
+ relaterad: ["related"],
24
+ meddelande: ["message"],
25
+ avvikelse: ["deviation"]
26
+ };
27
+
28
+ function normalizeText(value: string): string {
29
+ return value.normalize("NFKC").toLowerCase();
30
+ }
31
+
14
32
  function tokenize(value: string): string[] {
15
- return value
16
- .toLowerCase()
17
- .split(/[^a-z0-9]+/g)
33
+ return normalizeText(value)
34
+ .split(/[^\p{L}\p{N}]+/gu)
18
35
  .map((part) => part.trim())
19
36
  .filter((part) => part.length >= 2);
20
37
  }
21
38
 
39
+ function expandQueryTokens(tokens: string[]): string[] {
40
+ const expanded = new Set<string>(tokens);
41
+ for (const token of tokens) {
42
+ const aliases = QUERY_TOKEN_EXPANSIONS[token];
43
+ if (!aliases) {
44
+ continue;
45
+ }
46
+ for (const alias of aliases) {
47
+ expanded.add(alias);
48
+ }
49
+ }
50
+ return Array.from(expanded);
51
+ }
52
+
22
53
  function daysSince(isoDate: string): number {
23
54
  const timestamp = Date.parse(isoDate);
24
55
  if (Number.isNaN(timestamp)) {
@@ -34,22 +65,30 @@ function recencyScore(isoDate: string): number {
34
65
  return 1 / (1 + days / 30);
35
66
  }
36
67
 
37
- function semanticScore(query: string, text: string): number {
38
- const queryTokens = tokenize(query);
68
+ function semanticScore(queryTokens: string[], queryPhrase: string, text: string): number {
39
69
  if (queryTokens.length === 0) {
40
70
  return 0;
41
71
  }
42
72
 
43
- const haystack = text.toLowerCase();
73
+ const textTokenSet = new Set(tokenize(text));
74
+ if (textTokenSet.size === 0) {
75
+ return 0;
76
+ }
77
+
44
78
  let matched = 0;
45
79
  for (const token of queryTokens) {
46
- if (haystack.includes(token)) {
80
+ if (textTokenSet.has(token)) {
47
81
  matched += 1;
48
82
  }
49
83
  }
50
84
 
51
85
  const overlap = matched / queryTokens.length;
52
- const phraseBonus = haystack.includes(query.toLowerCase()) ? 0.25 : 0;
86
+ if (overlap <= 0) {
87
+ return 0;
88
+ }
89
+
90
+ const normalizedText = normalizeText(text);
91
+ const phraseBonus = queryPhrase && normalizedText.includes(queryPhrase) ? 0.15 : 0;
53
92
  return Math.min(1, overlap * 0.85 + phraseBonus);
54
93
  }
55
94
 
@@ -97,7 +136,7 @@ function groupRuleLinks(relations: RelationRecord[]): Map<string, string[]> {
97
136
 
98
137
  function buildSearchEntities(data: ContextData, includeContent: boolean): SearchEntity[] {
99
138
  const entities: SearchEntity[] = [];
100
- const ruleLinks = groupRuleLinks(data.relations);
139
+ const fileRuleLinks = groupRuleLinks(data.relations);
101
140
  const adrPathSet = new Set(
102
141
  data.adrs
103
142
  .map((adr) => adr.path.trim().toLowerCase())
@@ -123,7 +162,7 @@ function buildSearchEntities(data: ContextData, includeContent: boolean): Search
123
162
  trust_level: document.trust_level,
124
163
  updated_at: document.updated_at,
125
164
  snippet: document.excerpt,
126
- matched_rules: ruleLinks.get(document.id) ?? [],
165
+ matched_rules: fileRuleLinks.get(document.id) ?? [],
127
166
  content: includeContent ? document.content : undefined
128
167
  });
129
168
  }
@@ -164,6 +203,31 @@ function buildSearchEntities(data: ContextData, includeContent: boolean): Search
164
203
  });
165
204
  }
166
205
 
206
+ const filePathById = new Map(
207
+ data.documents
208
+ .filter((document) => document.kind === "CODE")
209
+ .map((document) => [document.id, document.path])
210
+ );
211
+
212
+ for (const chunk of data.chunks) {
213
+ const filePath = filePathById.get(chunk.file_id) ?? "";
214
+ entities.push({
215
+ id: chunk.id,
216
+ entity_type: "Chunk",
217
+ kind: chunk.kind || "chunk",
218
+ label: chunk.name || chunk.id,
219
+ path: filePath,
220
+ text: `${filePath}\n${chunk.name}\n${chunk.signature}\n${chunk.body}`,
221
+ status: chunk.status,
222
+ source_of_truth: chunk.source_of_truth,
223
+ trust_level: chunk.trust_level,
224
+ updated_at: chunk.updated_at,
225
+ snippet: chunk.body.slice(0, 500),
226
+ matched_rules: fileRuleLinks.get(chunk.file_id) ?? [],
227
+ content: includeContent ? chunk.body : undefined
228
+ });
229
+ }
230
+
167
231
  return entities;
168
232
  }
169
233
 
@@ -178,8 +242,26 @@ function relationDegree(relations: RelationRecord[]): Map<string, number> {
178
242
  return degrees;
179
243
  }
180
244
 
245
+ function buildChunkPartOfRelations(data: ContextData): RelationRecord[] {
246
+ const relations: RelationRecord[] = [];
247
+ for (const chunk of data.chunks) {
248
+ if (!chunk.file_id) {
249
+ continue;
250
+ }
251
+
252
+ relations.push({
253
+ from: chunk.id,
254
+ to: chunk.file_id,
255
+ relation: "PART_OF",
256
+ note: "Chunk belongs to file"
257
+ });
258
+ }
259
+ return relations;
260
+ }
261
+
181
262
  function entityCatalog(data: ContextData): Map<string, JsonObject> {
182
263
  const catalog = new Map<string, JsonObject>();
264
+ const fileById = new Map(data.documents.map((document) => [document.id, document]));
183
265
 
184
266
  for (const file of data.documents) {
185
267
  catalog.set(file.id, {
@@ -211,12 +293,29 @@ function entityCatalog(data: ContextData): Map<string, JsonObject> {
211
293
  });
212
294
  }
213
295
 
296
+ for (const chunk of data.chunks) {
297
+ const filePath = fileById.get(chunk.file_id)?.path ?? "";
298
+ const chunkEntity: JsonObject = {
299
+ id: chunk.id,
300
+ type: "Chunk",
301
+ label: chunk.name || chunk.id,
302
+ status: chunk.status,
303
+ source_of_truth: chunk.source_of_truth
304
+ };
305
+ if (filePath) {
306
+ chunkEntity.path = filePath;
307
+ }
308
+ catalog.set(chunk.id, chunkEntity);
309
+ }
310
+
214
311
  return catalog;
215
312
  }
216
313
 
217
314
  export async function runContextSearch(parsed: SearchParams): Promise<ToolPayload> {
218
315
  const data = await loadContextData();
219
316
  const degreeByEntity = relationDegree(data.relations);
317
+ const queryTokens = expandQueryTokens(Array.from(new Set(tokenize(parsed.query))));
318
+ const queryPhrase = normalizeText(parsed.query).trim();
220
319
  const candidates = buildSearchEntities(data, parsed.include_content).filter(
221
320
  (entity) => parsed.include_deprecated || entity.status.toLowerCase() !== "deprecated"
222
321
  );
@@ -228,12 +327,17 @@ export async function runContextSearch(parsed: SearchParams): Promise<ToolPayloa
228
327
 
229
328
  const results = candidates
230
329
  .map((entity) => {
231
- const lexicalSemantic = semanticScore(parsed.query, entity.text);
330
+ const lexicalSemantic = semanticScore(queryTokens, queryPhrase, entity.text);
232
331
  const entityVector = embeddings.vectors.get(entity.id);
233
332
  const vectorSemantic =
234
333
  queryVector && entityVector
235
- ? Math.max(0, Math.min(1, (cosineSimilarity(queryVector, entityVector) + 1) / 2))
334
+ ? Math.max(0, Math.min(1, cosineSimilarity(queryVector, entityVector)))
236
335
  : 0;
336
+ const hasRelevanceSignal =
337
+ lexicalSemantic >= MIN_LEXICAL_RELEVANCE || vectorSemantic >= MIN_VECTOR_RELEVANCE;
338
+ if (!hasRelevanceSignal) {
339
+ return null;
340
+ }
237
341
  const semantic =
238
342
  vectorSemantic > 0 ? vectorSemantic * 0.75 + lexicalSemantic * 0.25 : lexicalSemantic;
239
343
  const graphScore = Math.min(1, (degreeByEntity.get(entity.id) ?? 0) / 4);
@@ -247,7 +351,7 @@ export async function runContextSearch(parsed: SearchParams): Promise<ToolPayloa
247
351
  score += data.ranking.recency * dateScore;
248
352
 
249
353
  if (entity.source_of_truth) {
250
- score += 0.1;
354
+ score += 0.1 * semantic;
251
355
  }
252
356
 
253
357
  return {
@@ -269,7 +373,7 @@ export async function runContextSearch(parsed: SearchParams): Promise<ToolPayloa
269
373
  content: parsed.include_content ? entity.content : undefined
270
374
  };
271
375
  })
272
- .filter((result) => result.score > 0)
376
+ .filter((result): result is NonNullable<typeof result> => result !== null)
273
377
  .sort((a, b) => b.score - a.score)
274
378
  .slice(0, parsed.top_k);
275
379
 
@@ -291,6 +395,7 @@ export async function runContextSearch(parsed: SearchParams): Promise<ToolPayloa
291
395
  export async function runContextRelated(parsed: RelatedParams): Promise<ToolPayload> {
292
396
  const data = await loadContextData();
293
397
  const catalog = entityCatalog(data);
398
+ const relations = [...data.relations, ...buildChunkPartOfRelations(data)];
294
399
 
295
400
  if (!catalog.has(parsed.entity_id)) {
296
401
  return {
@@ -306,7 +411,7 @@ export async function runContextRelated(parsed: RelatedParams): Promise<ToolPayl
306
411
  const outgoing = new Map<string, RelationRecord[]>();
307
412
  const incoming = new Map<string, RelationRecord[]>();
308
413
 
309
- for (const relation of data.relations) {
414
+ for (const relation of relations) {
310
415
  const outList = outgoing.get(relation.from) ?? [];
311
416
  outList.push(relation);
312
417
  outgoing.set(relation.from, outList);
@@ -41,10 +41,26 @@ export type AdrRecord = {
41
41
  export type RelationRecord = {
42
42
  from: string;
43
43
  to: string;
44
- relation: "CONSTRAINS" | "IMPLEMENTS" | "SUPERSEDES";
44
+ relation: "CONSTRAINS" | "IMPLEMENTS" | "SUPERSEDES" | "CALLS" | "IMPORTS" | "PART_OF";
45
45
  note: string;
46
46
  };
47
47
 
48
+ export type ChunkRecord = {
49
+ id: string;
50
+ file_id: string;
51
+ name: string;
52
+ kind: string;
53
+ signature: string;
54
+ body: string;
55
+ start_line: number;
56
+ end_line: number;
57
+ language: string;
58
+ updated_at: string;
59
+ source_of_truth: boolean;
60
+ trust_level: number;
61
+ status: string;
62
+ };
63
+
48
64
  export type RankingWeights = {
49
65
  semantic: number;
50
66
  graph: number;
@@ -56,6 +72,7 @@ export type ContextData = {
56
72
  documents: DocumentRecord[];
57
73
  adrs: AdrRecord[];
58
74
  rules: RuleRecord[];
75
+ chunks: ChunkRecord[];
59
76
  relations: RelationRecord[];
60
77
  ranking: RankingWeights;
61
78
  source: "cache" | "ryu";
@@ -64,7 +81,7 @@ export type ContextData = {
64
81
 
65
82
  export type SearchEntity = {
66
83
  id: string;
67
- entity_type: "File" | "Rule" | "ADR";
84
+ entity_type: "File" | "Rule" | "ADR" | "Chunk";
68
85
  kind: string;
69
86
  label: string;
70
87
  path: string;
@@ -49,12 +49,26 @@ const fs = require("node:fs");
49
49
  const embedManifestPath = process.argv[1];
50
50
  const data = JSON.parse(fs.readFileSync(embedManifestPath, "utf8"));
51
51
  const c = data.counts || {};
52
+ const entities = Number(c.entities ?? 0);
53
+ const output = Number(c.output ?? 0);
54
+ const embedded = Number(c.embedded ?? 0);
55
+ const failed = Number(c.failed ?? 0);
52
56
  console.log(`[status] embeddings generated_at=${data.generated_at}`);
53
57
  console.log(`[status] embeddings model=${data.model} dim=${data.dimensions ?? 0}`);
54
- console.log(`[status] embeddings entities=${c.entities ?? 0} output=${c.output ?? 0} embedded=${c.embedded ?? 0} reused=${c.reused ?? 0} failed=${c.failed ?? 0}`);
58
+ console.log(`[status] embeddings entities=${entities} output=${output} embedded=${embedded} reused=${c.reused ?? 0} failed=${failed}`);
59
+ if (embedded > 0 && output > 0 && failed === 0) {
60
+ console.log("[status] semantic_search=embedding+lexical (ready)");
61
+ } else if (embedded > 0 && output > 0) {
62
+ console.log(`[status] semantic_search=embedding+lexical-partial (failed=${failed})`);
63
+ } else if (entities > 0) {
64
+ console.log("[status] semantic_search=lexical-only (run: ./scripts/context.sh embed)");
65
+ } else {
66
+ console.log("[status] semantic_search=lexical-only (no indexed entities)");
67
+ }
55
68
  ' "$EMBED_MANIFEST"
56
69
  else
57
70
  echo "[status] embeddings manifest missing (run: ./scripts/context.sh embed)"
71
+ echo "[status] semantic_search=lexical-only (embeddings manifest missing)"
58
72
  fi
59
73
 
60
74
  node -e '