@rce-mcp/retrieval-core 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,8 @@ import { mkdtemp, rm } from "node:fs/promises";
2
2
  import { tmpdir } from "node:os";
3
3
  import { join } from "node:path";
4
4
  import { afterEach, describe, expect, it } from "vitest";
5
- import { SqliteIndexRepository, SqliteQueryCache } from "@rce-mcp/data-plane";
5
+ import { SearchContextOutputSchema } from "@rce-mcp/contracts";
6
+ import { InMemoryQueryCache, SqliteIndexRepository, SqliteQueryCache } from "@rce-mcp/data-plane";
6
7
  import { RetrievalCore } from "../src/index.js";
7
8
 
8
9
  function firstRank(results: Array<{ path: string }>, path: string): number {
@@ -71,6 +72,46 @@ function buildLongCircuitBreakerFixture(): string {
71
72
  return lines.join("\n");
72
73
  }
73
74
 
75
+ function buildOverlapHotspotFixture(): string {
76
+ const lines: string[] = [
77
+ "export function overlapMergeHotspot(seed: number): number {",
78
+ " let total = seed;"
79
+ ];
80
+ for (let i = 0; i < 40; i += 1) {
81
+ lines.push(` total += overlapMergeSignal(seed, ${i}); // overlap chunk merge hotspot coverage`);
82
+ }
83
+ lines.push(
84
+ " return total;",
85
+ "}",
86
+ "",
87
+ "function overlapMergeSignal(seed: number, step: number): number {",
88
+ " return seed + step;",
89
+ "}"
90
+ );
91
+ return lines.join("\n");
92
+ }
93
+
94
+ function buildLargeOverlapPressureFixture(): string {
95
+ const lines: string[] = [
96
+ "export function overlapPressureSignal(seed: number): number {",
97
+ " let total = seed;"
98
+ ];
99
+ for (let i = 0; i < 220; i += 1) {
100
+ lines.push(
101
+ ` total += overlapPressureStep(seed, ${i}); // overlap pressure signal token cluster for chunk overlap diagnostics`
102
+ );
103
+ }
104
+ lines.push(
105
+ " return total;",
106
+ "}",
107
+ "",
108
+ "function overlapPressureStep(seed: number, step: number): number {",
109
+ " return seed + step;",
110
+ "}"
111
+ );
112
+ return lines.join("\n");
113
+ }
114
+
74
115
  describe("mcp search quality regressions", () => {
75
116
  const dirs: string[] = [];
76
117
 
@@ -213,7 +254,7 @@ describe("mcp search quality regressions", () => {
213
254
  }
214
255
  });
215
256
 
216
- it("returns larger, more complete snippets with upgraded chunk windows", async () => {
257
+ it("maintains or improves snippet completeness with upgraded chunk windows", async () => {
217
258
  const root = await mkdtemp(join(tmpdir(), "rce-mcp-snippet-quality-"));
218
259
  dirs.push(root);
219
260
  const sqlitePath = join(root, "mcp-snippet-quality.sqlite");
@@ -306,8 +347,8 @@ describe("mcp search quality regressions", () => {
306
347
 
307
348
  const legacySpan = (legacyTop?.end_line ?? 0) - (legacyTop?.start_line ?? 0);
308
349
  const upgradedSpan = (upgradedTop?.end_line ?? 0) - (upgradedTop?.start_line ?? 0);
309
- expect(upgradedSpan).toBeGreaterThan(legacySpan);
310
- expect((upgradedTop?.snippet.length ?? 0)).toBeGreaterThan(legacyTop?.snippet.length ?? 0);
350
+ expect(upgradedSpan).toBeGreaterThanOrEqual(legacySpan);
351
+ expect((upgradedTop?.snippet.length ?? 0)).toBeGreaterThanOrEqual(legacyTop?.snippet.length ?? 0);
311
352
 
312
353
  const tokenCoverage = (snippet: string | undefined): number =>
313
354
  ["circuitActivationSeed", "tripAuditDigest", "freezeOrderEntry"].filter((token) => snippet?.includes(token))
@@ -319,4 +360,650 @@ describe("mcp search quality regressions", () => {
319
360
  repo.close();
320
361
  }
321
362
  });
363
+
364
+ it("merges overlapping top candidates to recover distinct file coverage", async () => {
365
+ const root = await mkdtemp(join(tmpdir(), "rce-overlap-merge-quality-"));
366
+ dirs.push(root);
367
+ const sqlitePath = join(root, "overlap-merge-quality.sqlite");
368
+
369
+ const repo = new SqliteIndexRepository(sqlitePath);
370
+ await repo.migrate();
371
+ await repo.upsertWorkspace({
372
+ workspace_id: "ws-overlap",
373
+ tenant_id: "tenant-overlap",
374
+ name: "overlap-merge",
375
+ project_root_path: "/workspace/overlap"
376
+ });
377
+
378
+ const writerCache = new InMemoryQueryCache();
379
+ const withoutMergeCache = new InMemoryQueryCache();
380
+ const withMergeCache = new InMemoryQueryCache();
381
+ const writer = new RetrievalCore(repo, writerCache, {
382
+ chunkingConfig: {
383
+ strategy: "sliding",
384
+ target_chunk_tokens: 70,
385
+ chunk_overlap_tokens: 60
386
+ }
387
+ });
388
+ const withoutMerge = new RetrievalCore(repo, withoutMergeCache, {
389
+ scoringConfig: {
390
+ candidate_weights: {
391
+ lexical_weight: 1,
392
+ vector_weight: 0,
393
+ path_match_boost: 0,
394
+ recency_boost: 0,
395
+ generated_penalty: 0
396
+ },
397
+ rerank: {
398
+ merge_overlapping_chunks_enabled: false
399
+ }
400
+ }
401
+ });
402
+ const withMerge = new RetrievalCore(repo, withMergeCache, {
403
+ scoringConfig: {
404
+ candidate_weights: {
405
+ lexical_weight: 1,
406
+ vector_weight: 0,
407
+ path_match_boost: 0,
408
+ recency_boost: 0,
409
+ generated_penalty: 0
410
+ },
411
+ rerank: {
412
+ merge_overlapping_chunks_enabled: true
413
+ }
414
+ }
415
+ });
416
+
417
+ try {
418
+ await writer.indexArtifact({
419
+ tenant_id: "tenant-overlap",
420
+ workspace_id: "ws-overlap",
421
+ index_version: "idx-overlap-v1",
422
+ files: [
423
+ {
424
+ path: "src/hotspot.ts",
425
+ language: "typescript",
426
+ content: buildOverlapHotspotFixture()
427
+ },
428
+ {
429
+ path: "src/secondary.ts",
430
+ language: "typescript",
431
+ content:
432
+ "export function overlapCoverageFallback(): string { return 'overlap merge hotspot coverage fallback target overlap chunk'; }"
433
+ },
434
+ {
435
+ path: "src/noise.ts",
436
+ language: "typescript",
437
+ content: "export const NOISE = 'unrelated';"
438
+ }
439
+ ]
440
+ });
441
+
442
+ const query = "overlapMergeSignal overlap chunk merge hotspot coverage fallback target";
443
+ const baseline = await withoutMerge.searchContext({
444
+ trace_id: "trc-overlap-no-merge",
445
+ tenant_id: "tenant-overlap",
446
+ workspace_id: "ws-overlap",
447
+ request: {
448
+ project_root_path: "/workspace/overlap",
449
+ query,
450
+ top_k: 2
451
+ }
452
+ });
453
+ const merged = await withMerge.searchContext({
454
+ trace_id: "trc-overlap-merge",
455
+ tenant_id: "tenant-overlap",
456
+ workspace_id: "ws-overlap",
457
+ request: {
458
+ project_root_path: "/workspace/overlap",
459
+ query,
460
+ top_k: 2
461
+ }
462
+ });
463
+
464
+ expect(baseline.results[0]?.path).toBe("src/hotspot.ts");
465
+ expect(baseline.results[1]?.path).toBe("src/hotspot.ts");
466
+ const baselineFirst = baseline.results[0];
467
+ const baselineSecond = baseline.results[1];
468
+ expect(baselineFirst).toBeDefined();
469
+ expect(baselineSecond).toBeDefined();
470
+ const overlapStart = Math.max(baselineFirst?.start_line ?? 0, baselineSecond?.start_line ?? 0);
471
+ const overlapEnd = Math.min(baselineFirst?.end_line ?? 0, baselineSecond?.end_line ?? 0);
472
+ expect(overlapStart).toBeLessThanOrEqual(overlapEnd);
473
+
474
+ expect(merged.results[0]?.path).toBe("src/hotspot.ts");
475
+ expect(merged.results.some((result) => result.path === "src/secondary.ts")).toBe(true);
476
+ expect(new Set(merged.results.map((result) => result.path)).size).toBeGreaterThan(
477
+ new Set(baseline.results.map((result) => result.path)).size
478
+ );
479
+ const mergedHotspot = merged.results.find((result) => result.path === "src/hotspot.ts");
480
+ expect((mergedHotspot?.end_line ?? 0) - (mergedHotspot?.start_line ?? 0)).toBeGreaterThan(40);
481
+ } finally {
482
+ repo.close();
483
+ }
484
+ });
485
+
486
+ it("avoids heavy same-file overlap even when merge span cap prevents direct consolidation", async () => {
487
+ const root = await mkdtemp(join(tmpdir(), "rce-overlap-selection-quality-"));
488
+ dirs.push(root);
489
+ const sqlitePath = join(root, "overlap-selection-quality.sqlite");
490
+
491
+ const repo = new SqliteIndexRepository(sqlitePath);
492
+ await repo.migrate();
493
+ await repo.upsertWorkspace({
494
+ workspace_id: "ws-overlap-selection",
495
+ tenant_id: "tenant-overlap-selection",
496
+ name: "overlap-selection",
497
+ project_root_path: "/workspace/overlap-selection"
498
+ });
499
+
500
+ const writer = new RetrievalCore(repo, new InMemoryQueryCache(), {
501
+ chunkingConfig: {
502
+ strategy: "sliding",
503
+ target_chunk_tokens: 70,
504
+ chunk_overlap_tokens: 60
505
+ }
506
+ });
507
+ const merged = new RetrievalCore(repo, new InMemoryQueryCache(), {
508
+ scoringConfig: {
509
+ candidate_weights: {
510
+ lexical_weight: 1,
511
+ vector_weight: 0,
512
+ path_match_boost: 0,
513
+ recency_boost: 0,
514
+ generated_penalty: 0
515
+ },
516
+ rerank: {
517
+ merge_overlapping_chunks_enabled: true,
518
+ merge_max_span_lines: 8
519
+ }
520
+ }
521
+ });
522
+
523
+ try {
524
+ await writer.indexArtifact({
525
+ tenant_id: "tenant-overlap-selection",
526
+ workspace_id: "ws-overlap-selection",
527
+ index_version: "idx-overlap-selection-v1",
528
+ files: [
529
+ {
530
+ path: "src/pressure.ts",
531
+ language: "typescript",
532
+ content: buildLargeOverlapPressureFixture()
533
+ },
534
+ {
535
+ path: "src/fallback.ts",
536
+ language: "typescript",
537
+ content:
538
+ "export function overlapPressureFallback(): string { return 'overlap pressure signal token cluster fallback target'; }"
539
+ },
540
+ {
541
+ path: "src/extra.ts",
542
+ language: "typescript",
543
+ content:
544
+ "export function overlapPressureExtra(): string { return 'overlap pressure signal token cluster extra coverage'; }"
545
+ }
546
+ ]
547
+ });
548
+
549
+ const retrieval = await merged.searchContext({
550
+ trace_id: "trc-overlap-selection",
551
+ tenant_id: "tenant-overlap-selection",
552
+ workspace_id: "ws-overlap-selection",
553
+ request: {
554
+ project_root_path: "/workspace/overlap-selection",
555
+ query: "overlap pressure signal token cluster fallback target",
556
+ top_k: 3
557
+ }
558
+ });
559
+
560
+ expect(retrieval.results.some((result) => result.path === "src/fallback.ts")).toBe(true);
561
+
562
+ const byPath = new Map<string, Array<{ start: number; end: number }>>();
563
+ for (const result of retrieval.results) {
564
+ const rows = byPath.get(result.path);
565
+ const range = { start: result.start_line, end: result.end_line };
566
+ if (rows) {
567
+ rows.push(range);
568
+ } else {
569
+ byPath.set(result.path, [range]);
570
+ }
571
+ }
572
+
573
+ for (const ranges of byPath.values()) {
574
+ for (let i = 0; i < ranges.length; i += 1) {
575
+ const left = ranges[i];
576
+ if (!left) {
577
+ continue;
578
+ }
579
+ for (let j = i + 1; j < ranges.length; j += 1) {
580
+ const right = ranges[j];
581
+ if (!right) {
582
+ continue;
583
+ }
584
+ const overlapStart = Math.max(left.start, right.start);
585
+ const overlapEnd = Math.min(left.end, right.end);
586
+ const overlapLength = overlapEnd >= overlapStart ? overlapEnd - overlapStart + 1 : 0;
587
+ const leftLength = Math.max(1, left.end - left.start + 1);
588
+ const rightLength = Math.max(1, right.end - right.start + 1);
589
+ const smaller = Math.min(leftLength, rightLength);
590
+ const overlapRatio = overlapLength / smaller;
591
+ expect(overlapRatio).toBeLessThan(0.2);
592
+ }
593
+ }
594
+ }
595
+ } finally {
596
+ repo.close();
597
+ }
598
+ });
599
+
600
+ it("packs same-file contextual spans with elision markers when context packing is enabled", async () => {
601
+ const root = await mkdtemp(join(tmpdir(), "rce-context-packing-quality-"));
602
+ dirs.push(root);
603
+ const sqlitePath = join(root, "context-packing-quality.sqlite");
604
+
605
+ const repo = new SqliteIndexRepository(sqlitePath);
606
+ await repo.migrate();
607
+ await repo.upsertWorkspace({
608
+ workspace_id: "ws-context-pack",
609
+ tenant_id: "tenant-context-pack",
610
+ name: "context-pack",
611
+ project_root_path: "/workspace/context-pack"
612
+ });
613
+
614
+ const cache = new InMemoryQueryCache();
615
+ const core = new RetrievalCore(repo, cache, {
616
+ chunkingConfig: {
617
+ strategy: "sliding",
618
+ target_chunk_tokens: 35,
619
+ chunk_overlap_tokens: 8
620
+ },
621
+ scoringConfig: {
622
+ rerank: {
623
+ merge_overlapping_chunks_enabled: false
624
+ }
625
+ },
626
+ contextPackingConfig: {
627
+ enabled: true,
628
+ max_spans_per_result: 2,
629
+ max_gap_lines: 90,
630
+ max_snippet_chars: 3_200,
631
+ enhancer_snippet_char_limit: 2_200
632
+ }
633
+ });
634
+
635
+ try {
636
+ const bridgeNoise = Array.from({ length: 60 }, (_, idx) => `const bridgeNoise${idx} = ${idx};`);
637
+ await core.indexArtifact({
638
+ tenant_id: "tenant-context-pack",
639
+ workspace_id: "ws-context-pack",
640
+ index_version: "idx-context-pack-v1",
641
+ files: [
642
+ {
643
+ path: "packages/oracle/src/estimator.ts",
644
+ language: "typescript",
645
+ content: [
646
+ "export function computeProbability(marketSignal: number): number {",
647
+ " const base = marketSignal * 0.7;",
648
+ " const normalized = Math.max(0, Math.min(1, base));",
649
+ " return normalized;",
650
+ "}",
651
+ "",
652
+ ...bridgeNoise,
653
+ "",
654
+ "export function fetchLlmInputs(llmInputFlow: string[]): string[] {",
655
+ " return llmInputFlow.filter(Boolean);",
656
+ "}"
657
+ ].join("\n")
658
+ }
659
+ ]
660
+ });
661
+
662
+ const retrieval = await core.searchContext({
663
+ trace_id: "trc-context-pack",
664
+ tenant_id: "tenant-context-pack",
665
+ workspace_id: "ws-context-pack",
666
+ request: {
667
+ project_root_path: "/workspace/context-pack",
668
+ query: "computeProbability marketSignal fetchLlmInputs llmInputFlow",
669
+ top_k: 1
670
+ }
671
+ });
672
+
673
+ const top = retrieval.results[0];
674
+ expect(top).toBeDefined();
675
+ expect(top?.path).toBe("packages/oracle/src/estimator.ts");
676
+ expect(top?.snippet.includes("computeProbability")).toBe(true);
677
+ expect(top?.snippet.includes("fetchLlmInputs")).toBe(true);
678
+ expect(top?.snippet.includes("...")).toBe(true);
679
+ expect(top?.reason.includes("contextual spans")).toBe(true);
680
+ expect((top?.end_line ?? 0) - (top?.start_line ?? 0)).toBeGreaterThan(20);
681
+ } finally {
682
+ repo.close();
683
+ }
684
+ });
685
+
686
+ it("adds deterministic truncation metadata marker for broken TS function boundaries when enabled", async () => {
687
+ const root = await mkdtemp(join(tmpdir(), "rce-snippet-integrity-quality-"));
688
+ dirs.push(root);
689
+ const sqlitePath = join(root, "snippet-integrity-quality.sqlite");
690
+
691
+ const repo = new SqliteIndexRepository(sqlitePath);
692
+ await repo.migrate();
693
+ await repo.upsertWorkspace({
694
+ workspace_id: "ws-snippet-integrity",
695
+ tenant_id: "tenant-snippet-integrity",
696
+ name: "snippet-integrity",
697
+ project_root_path: "/workspace/snippet-integrity"
698
+ });
699
+
700
+ const cache = new InMemoryQueryCache();
701
+ const core = new RetrievalCore(repo, cache, {
702
+ chunkingConfig: {
703
+ strategy: "sliding",
704
+ target_chunk_tokens: 35,
705
+ chunk_overlap_tokens: 8
706
+ },
707
+ scoringConfig: {
708
+ rerank: {
709
+ merge_overlapping_chunks_enabled: false
710
+ }
711
+ },
712
+ snippetIntegrityConfig: {
713
+ enabled: true,
714
+ target_languages: ["typescript"],
715
+ max_contiguous_gap_lines: 6,
716
+ marker_template_version: "v1"
717
+ }
718
+ });
719
+
720
+ try {
721
+ const body = Array.from(
722
+ { length: 500 },
723
+ (_, idx) => ` const checkpoint_${idx} = input + ${idx}; // estimator continuity signal`
724
+ );
725
+ await core.indexArtifact({
726
+ tenant_id: "tenant-snippet-integrity",
727
+ workspace_id: "ws-snippet-integrity",
728
+ index_version: "idx-snippet-integrity-v1",
729
+ files: [
730
+ {
731
+ path: "src/estimator.ts",
732
+ language: "typescript",
733
+ content: [
734
+ "export function estimateProbability(input: number): number {",
735
+ ...body,
736
+ " return input;",
737
+ "}",
738
+ "",
739
+ "export function calibrateProbability(input: number): number {",
740
+ " return estimateProbability(input + 1);",
741
+ "}"
742
+ ].join("\n")
743
+ }
744
+ ]
745
+ });
746
+
747
+ const retrieval = await core.searchContext({
748
+ trace_id: "trc-snippet-integrity",
749
+ tenant_id: "tenant-snippet-integrity",
750
+ workspace_id: "ws-snippet-integrity",
751
+ request: {
752
+ project_root_path: "/workspace/snippet-integrity",
753
+ query: "estimateProbability checkpoint_20 checkpoint_430",
754
+ top_k: 1
755
+ }
756
+ });
757
+
758
+ expect(() => SearchContextOutputSchema.parse(retrieval)).not.toThrow();
759
+ const top = retrieval.results[0];
760
+ expect(top).toBeDefined();
761
+ expect(top?.path).toBe("src/estimator.ts");
762
+ const markerLine = top?.snippet.split("\n").find((line) => line.includes("[truncated:v1"));
763
+ expect(markerLine).toBeDefined();
764
+ expect(markerLine).toMatch(/symbol=(?!unknown)[A-Za-z_$][\w$]*/);
765
+ expect(markerLine).toContain("estimated_total_lines=");
766
+ expect(markerLine).toContain("through_line=");
767
+ const omittedAfterMatch = markerLine?.match(/omitted_after=(\d+)/);
768
+ expect(Number.parseInt(omittedAfterMatch?.[1] ?? "0", 10)).toBeGreaterThan(0);
769
+ } finally {
770
+ repo.close();
771
+ }
772
+ });
773
+
774
+ it("repairs contiguous TS snippets before annotation when repair is enabled", async () => {
775
+ const root = await mkdtemp(join(tmpdir(), "rce-snippet-repair-quality-"));
776
+ dirs.push(root);
777
+ const sqlitePath = join(root, "snippet-repair-quality.sqlite");
778
+
779
+ const repo = new SqliteIndexRepository(sqlitePath);
780
+ await repo.migrate();
781
+ await repo.upsertWorkspace({
782
+ workspace_id: "ws-snippet-repair",
783
+ tenant_id: "tenant-snippet-repair",
784
+ name: "snippet-repair",
785
+ project_root_path: "/workspace/snippet-repair"
786
+ });
787
+
788
+ const cache = new InMemoryQueryCache();
789
+ const core = new RetrievalCore(repo, cache, {
790
+ chunkingConfig: {
791
+ strategy: "sliding",
792
+ target_chunk_tokens: 35,
793
+ chunk_overlap_tokens: 8
794
+ },
795
+ scoringConfig: {
796
+ rerank: {
797
+ merge_overlapping_chunks_enabled: false
798
+ }
799
+ },
800
+ snippetIntegrityConfig: {
801
+ enabled: true,
802
+ target_languages: ["typescript"],
803
+ max_contiguous_gap_lines: 6,
804
+ marker_template_version: "v1",
805
+ repair_enabled: true,
806
+ repair_max_envelope_lines: 260,
807
+ repair_max_snippet_chars: 8_000
808
+ }
809
+ });
810
+
811
+ try {
812
+ const body = Array.from(
813
+ { length: 120 },
814
+ (_, idx) => ` const checkpoint_${idx} = input + ${idx}; // repair continuity signal`
815
+ );
816
+ await core.indexArtifact({
817
+ tenant_id: "tenant-snippet-repair",
818
+ workspace_id: "ws-snippet-repair",
819
+ index_version: "idx-snippet-repair-v1",
820
+ files: [
821
+ {
822
+ path: "src/repair.ts",
823
+ language: "typescript",
824
+ content: [
825
+ "export function estimateRepair(input: number): number {",
826
+ ...body,
827
+ " return input;",
828
+ "}"
829
+ ].join("\n")
830
+ }
831
+ ]
832
+ });
833
+
834
+ const retrieval = await core.searchContext({
835
+ trace_id: "trc-snippet-repair",
836
+ tenant_id: "tenant-snippet-repair",
837
+ workspace_id: "ws-snippet-repair",
838
+ request: {
839
+ project_root_path: "/workspace/snippet-repair",
840
+ query: "estimateRepair checkpoint_10 checkpoint_110",
841
+ top_k: 1
842
+ }
843
+ });
844
+
845
+ const top = retrieval.results[0];
846
+ expect(top).toBeDefined();
847
+ expect(top?.path).toBe("src/repair.ts");
848
+ expect(top?.snippet.includes("checkpoint_10")).toBe(true);
849
+ expect(top?.snippet.includes("checkpoint_110")).toBe(true);
850
+ expect(top?.snippet.includes("[truncated:v1")).toBe(false);
851
+ expect((top?.end_line ?? 0) - (top?.start_line ?? 0)).toBeGreaterThan(80);
852
+ } finally {
853
+ repo.close();
854
+ }
855
+ });
856
+
857
+ it("falls back to truncation marker when repair output still exceeds caps", async () => {
858
+ const root = await mkdtemp(join(tmpdir(), "rce-snippet-repair-fallback-"));
859
+ dirs.push(root);
860
+ const sqlitePath = join(root, "snippet-repair-fallback.sqlite");
861
+
862
+ const repo = new SqliteIndexRepository(sqlitePath);
863
+ await repo.migrate();
864
+ await repo.upsertWorkspace({
865
+ workspace_id: "ws-snippet-repair-fallback",
866
+ tenant_id: "tenant-snippet-repair-fallback",
867
+ name: "snippet-repair-fallback",
868
+ project_root_path: "/workspace/snippet-repair-fallback"
869
+ });
870
+
871
+ const cache = new InMemoryQueryCache();
872
+ const core = new RetrievalCore(repo, cache, {
873
+ chunkingConfig: {
874
+ strategy: "sliding",
875
+ target_chunk_tokens: 35,
876
+ chunk_overlap_tokens: 8
877
+ },
878
+ scoringConfig: {
879
+ rerank: {
880
+ merge_overlapping_chunks_enabled: false
881
+ }
882
+ },
883
+ snippetIntegrityConfig: {
884
+ enabled: true,
885
+ target_languages: ["typescript"],
886
+ max_contiguous_gap_lines: 6,
887
+ marker_template_version: "v1",
888
+ repair_enabled: true,
889
+ repair_max_envelope_lines: 260,
890
+ repair_max_snippet_chars: 220
891
+ }
892
+ });
893
+
894
+ try {
895
+ const body = Array.from(
896
+ { length: 120 },
897
+ (_, idx) => ` const checkpoint_${idx} = input + ${idx}; // repair fallback signal`
898
+ );
899
+ await core.indexArtifact({
900
+ tenant_id: "tenant-snippet-repair-fallback",
901
+ workspace_id: "ws-snippet-repair-fallback",
902
+ index_version: "idx-snippet-repair-fallback-v1",
903
+ files: [
904
+ {
905
+ path: "src/repair-fallback.ts",
906
+ language: "typescript",
907
+ content: [
908
+ "export function estimateRepairFallback(input: number): number {",
909
+ ...body,
910
+ " return input;",
911
+ "}"
912
+ ].join("\n")
913
+ }
914
+ ]
915
+ });
916
+
917
+ const retrieval = await core.searchContext({
918
+ trace_id: "trc-snippet-repair-fallback",
919
+ tenant_id: "tenant-snippet-repair-fallback",
920
+ workspace_id: "ws-snippet-repair-fallback",
921
+ request: {
922
+ project_root_path: "/workspace/snippet-repair-fallback",
923
+ query: "estimateRepairFallback checkpoint_10 checkpoint_110",
924
+ top_k: 1
925
+ }
926
+ });
927
+
928
+ const top = retrieval.results[0];
929
+ expect(top).toBeDefined();
930
+ expect(top?.path).toBe("src/repair-fallback.ts");
931
+ expect(top?.snippet.includes("[truncated:v1")).toBe(true);
932
+ } finally {
933
+ repo.close();
934
+ }
935
+ });
936
+
937
+ it("does not add truncation marker for non-target languages", async () => {
938
+ const root = await mkdtemp(join(tmpdir(), "rce-snippet-integrity-language-gate-"));
939
+ dirs.push(root);
940
+ const sqlitePath = join(root, "snippet-integrity-language-gate.sqlite");
941
+
942
+ const repo = new SqliteIndexRepository(sqlitePath);
943
+ await repo.migrate();
944
+ await repo.upsertWorkspace({
945
+ workspace_id: "ws-snippet-language-gate",
946
+ tenant_id: "tenant-snippet-language-gate",
947
+ name: "snippet-language-gate",
948
+ project_root_path: "/workspace/snippet-language-gate"
949
+ });
950
+
951
+ const cache = new InMemoryQueryCache();
952
+ const core = new RetrievalCore(repo, cache, {
953
+ chunkingConfig: {
954
+ strategy: "language_aware",
955
+ target_chunk_tokens: 40,
956
+ chunk_overlap_tokens: 8,
957
+ enabled_languages: ["go"]
958
+ },
959
+ snippetIntegrityConfig: {
960
+ enabled: true,
961
+ target_languages: ["typescript"],
962
+ max_contiguous_gap_lines: 6,
963
+ marker_template_version: "v1"
964
+ }
965
+ });
966
+
967
+ try {
968
+ const steps = Array.from({ length: 220 }, (_, idx) => `\ttotal += input + ${idx}`);
969
+ await core.indexArtifact({
970
+ tenant_id: "tenant-snippet-language-gate",
971
+ workspace_id: "ws-snippet-language-gate",
972
+ index_version: "idx-snippet-language-gate-v1",
973
+ files: [
974
+ {
975
+ path: "pkg/runtime/estimate.go",
976
+ language: "go",
977
+ content: [
978
+ "package runtime",
979
+ "",
980
+ "func EstimateProbability(input int) int {",
981
+ "\ttotal := 0",
982
+ ...steps,
983
+ "\treturn total",
984
+ "}"
985
+ ].join("\n")
986
+ }
987
+ ]
988
+ });
989
+
990
+ const retrieval = await core.searchContext({
991
+ trace_id: "trc-snippet-language-gate",
992
+ tenant_id: "tenant-snippet-language-gate",
993
+ workspace_id: "ws-snippet-language-gate",
994
+ request: {
995
+ project_root_path: "/workspace/snippet-language-gate",
996
+ query: "EstimateProbability input 200",
997
+ top_k: 1
998
+ }
999
+ });
1000
+
1001
+ const top = retrieval.results[0];
1002
+ expect(top).toBeDefined();
1003
+ expect(top?.path).toBe("pkg/runtime/estimate.go");
1004
+ expect(top?.snippet.includes("[truncated:v1")).toBe(false);
1005
+ } finally {
1006
+ repo.close();
1007
+ }
1008
+ });
322
1009
  });