@kenkaiiii/ggcoder 4.14.2 → 4.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/dist/core/agent-session.d.ts.map +1 -1
  2. package/dist/core/agent-session.js +8 -2
  3. package/dist/core/agent-session.js.map +1 -1
  4. package/dist/core/code-retrieval.d.ts +13 -0
  5. package/dist/core/code-retrieval.d.ts.map +1 -0
  6. package/dist/core/code-retrieval.js +87 -0
  7. package/dist/core/code-retrieval.js.map +1 -0
  8. package/dist/core/hashline-edit-benchmark.d.ts +55 -0
  9. package/dist/core/hashline-edit-benchmark.d.ts.map +1 -0
  10. package/dist/core/hashline-edit-benchmark.js +342 -0
  11. package/dist/core/hashline-edit-benchmark.js.map +1 -0
  12. package/dist/core/hashline-edit-benchmark.test.d.ts +2 -0
  13. package/dist/core/hashline-edit-benchmark.test.d.ts.map +1 -0
  14. package/dist/core/hashline-edit-benchmark.test.js +141 -0
  15. package/dist/core/hashline-edit-benchmark.test.js.map +1 -0
  16. package/dist/core/hashline.d.ts +50 -0
  17. package/dist/core/hashline.d.ts.map +1 -0
  18. package/dist/core/hashline.js +76 -0
  19. package/dist/core/hashline.js.map +1 -0
  20. package/dist/core/semantic-search-benchmark.d.ts +37 -0
  21. package/dist/core/semantic-search-benchmark.d.ts.map +1 -0
  22. package/dist/core/semantic-search-benchmark.js +211 -0
  23. package/dist/core/semantic-search-benchmark.js.map +1 -0
  24. package/dist/core/semantic-search-benchmark.test.d.ts +2 -0
  25. package/dist/core/semantic-search-benchmark.test.d.ts.map +1 -0
  26. package/dist/core/semantic-search-benchmark.test.js +89 -0
  27. package/dist/core/semantic-search-benchmark.test.js.map +1 -0
  28. package/dist/core/steering.d.ts +25 -0
  29. package/dist/core/steering.d.ts.map +1 -0
  30. package/dist/core/steering.js +29 -0
  31. package/dist/core/steering.js.map +1 -0
  32. package/dist/tools/edit.d.ts +6 -0
  33. package/dist/tools/edit.d.ts.map +1 -1
  34. package/dist/tools/edit.js +34 -2
  35. package/dist/tools/edit.js.map +1 -1
  36. package/dist/tools/edit.test.js +112 -0
  37. package/dist/tools/edit.test.js.map +1 -1
  38. package/dist/tools/index.d.ts +1 -0
  39. package/dist/tools/index.d.ts.map +1 -1
  40. package/dist/tools/index.js +3 -0
  41. package/dist/tools/index.js.map +1 -1
  42. package/dist/tools/prompt-hints.d.ts.map +1 -1
  43. package/dist/tools/prompt-hints.js +8 -0
  44. package/dist/tools/prompt-hints.js.map +1 -1
  45. package/dist/tools/read.d.ts +1 -0
  46. package/dist/tools/read.d.ts.map +1 -1
  47. package/dist/tools/read.js +14 -3
  48. package/dist/tools/read.js.map +1 -1
  49. package/dist/tools/read.test.js +40 -0
  50. package/dist/tools/read.test.js.map +1 -1
  51. package/dist/tools/search-code.d.ts +11 -0
  52. package/dist/tools/search-code.d.ts.map +1 -0
  53. package/dist/tools/search-code.js +95 -0
  54. package/dist/tools/search-code.js.map +1 -0
  55. package/dist/tools/search-code.test.d.ts +2 -0
  56. package/dist/tools/search-code.test.d.ts.map +1 -0
  57. package/dist/tools/search-code.test.js +77 -0
  58. package/dist/tools/search-code.test.js.map +1 -0
  59. package/dist/ui/App.d.ts +2 -1
  60. package/dist/ui/App.d.ts.map +1 -1
  61. package/dist/ui/components/ActivityIndicator.d.ts +1 -1
  62. package/dist/ui/components/ActivityIndicator.d.ts.map +1 -1
  63. package/dist/ui/components/AnimationContext.d.ts +1 -1
  64. package/dist/ui/components/AnimationContext.d.ts.map +1 -1
  65. package/dist/ui/components/AssistantMessage.d.ts +1 -1
  66. package/dist/ui/components/AssistantMessage.d.ts.map +1 -1
  67. package/dist/ui/components/BackgroundTasksBar.d.ts +2 -1
  68. package/dist/ui/components/BackgroundTasksBar.d.ts.map +1 -1
  69. package/dist/ui/components/Banner.d.ts +2 -1
  70. package/dist/ui/components/Banner.d.ts.map +1 -1
  71. package/dist/ui/components/ChatFooterPane.d.ts +2 -1
  72. package/dist/ui/components/ChatFooterPane.d.ts.map +1 -1
  73. package/dist/ui/components/ChatInputStack.d.ts +1 -1
  74. package/dist/ui/components/ChatInputStack.d.ts.map +1 -1
  75. package/dist/ui/components/ChatLayout.d.ts +4 -4
  76. package/dist/ui/components/ChatLayout.d.ts.map +1 -1
  77. package/dist/ui/components/ChatLivePane.d.ts +1 -1
  78. package/dist/ui/components/ChatLivePane.d.ts.map +1 -1
  79. package/dist/ui/components/ChatScreen.d.ts +1 -1
  80. package/dist/ui/components/ChatScreen.d.ts.map +1 -1
  81. package/dist/ui/components/ChatStatusRow.d.ts +1 -1
  82. package/dist/ui/components/ChatStatusRow.d.ts.map +1 -1
  83. package/dist/ui/components/CompactionNotice.d.ts +3 -2
  84. package/dist/ui/components/CompactionNotice.d.ts.map +1 -1
  85. package/dist/ui/components/DiffFrame.d.ts +1 -1
  86. package/dist/ui/components/DiffFrame.d.ts.map +1 -1
  87. package/dist/ui/components/DiffView.d.ts +2 -1
  88. package/dist/ui/components/DiffView.d.ts.map +1 -1
  89. package/dist/ui/components/Footer.d.ts +2 -1
  90. package/dist/ui/components/Footer.d.ts.map +1 -1
  91. package/dist/ui/components/FooterStatusRow.d.ts +2 -1
  92. package/dist/ui/components/FooterStatusRow.d.ts.map +1 -1
  93. package/dist/ui/components/FullScreenOverlayRouter.d.ts +2 -1
  94. package/dist/ui/components/FullScreenOverlayRouter.d.ts.map +1 -1
  95. package/dist/ui/components/IdealHookMessage.d.ts +1 -1
  96. package/dist/ui/components/IdealHookMessage.d.ts.map +1 -1
  97. package/dist/ui/components/InputArea.d.ts +1 -1
  98. package/dist/ui/components/InputArea.d.ts.map +1 -1
  99. package/dist/ui/components/LiveToolPanel.d.ts +2 -1
  100. package/dist/ui/components/LiveToolPanel.d.ts.map +1 -1
  101. package/dist/ui/components/Markdown.d.ts +2 -2
  102. package/dist/ui/components/Markdown.d.ts.map +1 -1
  103. package/dist/ui/components/ModelSelector.d.ts +2 -1
  104. package/dist/ui/components/ModelSelector.d.ts.map +1 -1
  105. package/dist/ui/components/Overlay.d.ts +1 -1
  106. package/dist/ui/components/Overlay.d.ts.map +1 -1
  107. package/dist/ui/components/PixelOverlay.d.ts +2 -1
  108. package/dist/ui/components/PixelOverlay.d.ts.map +1 -1
  109. package/dist/ui/components/PlanApproval.d.ts +2 -1
  110. package/dist/ui/components/PlanApproval.d.ts.map +1 -1
  111. package/dist/ui/components/PlanBanner.d.ts +2 -1
  112. package/dist/ui/components/PlanBanner.d.ts.map +1 -1
  113. package/dist/ui/components/PlanModeLogo.d.ts +2 -1
  114. package/dist/ui/components/PlanModeLogo.d.ts.map +1 -1
  115. package/dist/ui/components/PlanOverlay.d.ts +3 -2
  116. package/dist/ui/components/PlanOverlay.d.ts.map +1 -1
  117. package/dist/ui/components/PlanProgress.d.ts +2 -1
  118. package/dist/ui/components/PlanProgress.d.ts.map +1 -1
  119. package/dist/ui/components/QueueIndicator.d.ts +2 -1
  120. package/dist/ui/components/QueueIndicator.d.ts.map +1 -1
  121. package/dist/ui/components/RewindOverlay.d.ts +2 -1
  122. package/dist/ui/components/RewindOverlay.d.ts.map +1 -1
  123. package/dist/ui/components/SelectList.d.ts +2 -1
  124. package/dist/ui/components/SelectList.d.ts.map +1 -1
  125. package/dist/ui/components/ServerToolExecution.d.ts +2 -1
  126. package/dist/ui/components/ServerToolExecution.d.ts.map +1 -1
  127. package/dist/ui/components/SessionSelector.d.ts +2 -1
  128. package/dist/ui/components/SessionSelector.d.ts.map +1 -1
  129. package/dist/ui/components/SessionSummary.d.ts +2 -1
  130. package/dist/ui/components/SessionSummary.d.ts.map +1 -1
  131. package/dist/ui/components/SettingsSelector.d.ts +2 -1
  132. package/dist/ui/components/SettingsSelector.d.ts.map +1 -1
  133. package/dist/ui/components/SkillsOverlay.d.ts +2 -1
  134. package/dist/ui/components/SkillsOverlay.d.ts.map +1 -1
  135. package/dist/ui/components/SlashCommandMenu.d.ts +2 -1
  136. package/dist/ui/components/SlashCommandMenu.d.ts.map +1 -1
  137. package/dist/ui/components/SlashStyledSelectList.d.ts +2 -1
  138. package/dist/ui/components/SlashStyledSelectList.d.ts.map +1 -1
  139. package/dist/ui/components/Spinner.d.ts +2 -1
  140. package/dist/ui/components/Spinner.d.ts.map +1 -1
  141. package/dist/ui/components/StreamingArea.d.ts +1 -1
  142. package/dist/ui/components/StreamingArea.d.ts.map +1 -1
  143. package/dist/ui/components/SubAgentPanel.d.ts +2 -1
  144. package/dist/ui/components/SubAgentPanel.d.ts.map +1 -1
  145. package/dist/ui/components/TaskPickerMenu.d.ts +2 -1
  146. package/dist/ui/components/TaskPickerMenu.d.ts.map +1 -1
  147. package/dist/ui/components/ThemeSelector.d.ts +2 -1
  148. package/dist/ui/components/ThemeSelector.d.ts.map +1 -1
  149. package/dist/ui/components/ThinkingBlock.d.ts +1 -1
  150. package/dist/ui/components/ThinkingBlock.d.ts.map +1 -1
  151. package/dist/ui/components/ToolExecution.d.ts +2 -1
  152. package/dist/ui/components/ToolExecution.d.ts.map +1 -1
  153. package/dist/ui/components/ToolGroupExecution.d.ts +2 -1
  154. package/dist/ui/components/ToolGroupExecution.d.ts.map +1 -1
  155. package/dist/ui/components/TranscriptViewport.d.ts +1 -1
  156. package/dist/ui/components/TranscriptViewport.d.ts.map +1 -1
  157. package/dist/ui/components/UserMessage.d.ts +2 -1
  158. package/dist/ui/components/UserMessage.d.ts.map +1 -1
  159. package/dist/ui/hooks/useAgentLoop.d.ts.map +1 -1
  160. package/dist/ui/hooks/useAgentLoop.js +5 -1
  161. package/dist/ui/hooks/useAgentLoop.js.map +1 -1
  162. package/dist/ui/transcript/MiscRows.d.ts +8 -7
  163. package/dist/ui/transcript/MiscRows.d.ts.map +1 -1
  164. package/dist/ui/transcript/StatusRow.d.ts +1 -1
  165. package/dist/ui/transcript/StatusRow.d.ts.map +1 -1
  166. package/dist/ui/transcript/ToolRows.d.ts +7 -6
  167. package/dist/ui/transcript/ToolRows.d.ts.map +1 -1
  168. package/dist/ui/transcript/TranscriptItemFrame.d.ts +1 -1
  169. package/dist/ui/transcript/TranscriptItemFrame.d.ts.map +1 -1
  170. package/package.json +6 -4
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Hash-anchored line addressing — the pure, UI-free core shared by the hashline
3
+ * benchmark and the opt-in anchor guard in the read/edit tools.
4
+ *
5
+ * Every line gets a short content+position hash. Anchors are UNIQUE by
6
+ * construction (the line's index is folded into the hash, so blank/repeated
7
+ * lines never collide), which is what lets an edit either resolve to exactly one
8
+ * location or be rejected — never silently corrupt a file that drifted since the
9
+ * model last read it.
10
+ */
11
+ import { createHash } from "node:crypto";
12
+ /**
13
+ * 4-hex-char anchor for a line. Position is folded into the hash so anchors are
14
+ * UNIQUE by construction (blank lines and repeated lines no longer collide).
15
+ * Resolution stays O(1) via a lookup map. `index` is the 0-based line index.
16
+ */
17
+ export function lineHash(line, index) {
18
+ return createHash("sha1").update(`${index}:${line.trim()}`).digest("hex").slice(0, 4);
19
+ }
20
+ /** File rendered with `anchor│line` prefixes for the model to read. */
21
+ export function renderWithAnchors(file) {
22
+ return file
23
+ .split("\n")
24
+ .map((l, i) => `${lineHash(l, i)}│${l}`)
25
+ .join("\n");
26
+ }
27
+ export function anchorFile(file) {
28
+ const lines = file.split("\n");
29
+ const counts = new Map();
30
+ lines.forEach((l, i) => {
31
+ const h = lineHash(l, i);
32
+ const arr = counts.get(h) ?? [];
33
+ arr.push(i);
34
+ counts.set(h, arr);
35
+ });
36
+ const anchorToIndex = new Map();
37
+ const ambiguous = new Set();
38
+ for (const [h, idxs] of counts) {
39
+ // Position-folded anchors are unique unless sha1 itself collides in 16 bits.
40
+ if (idxs.length === 1)
41
+ anchorToIndex.set(h, idxs[0]);
42
+ else
43
+ ambiguous.add(h);
44
+ }
45
+ return { rendered: renderWithAnchors(file), anchorToIndex, ambiguous, lines };
46
+ }
47
+ /**
48
+ * True when the line at `index` (0-based) still hashes to `hash`. Out-of-range
49
+ * indices return false. This is the staleness gate the edit tool uses.
50
+ */
51
+ export function verifyAnchor(lines, index, hash) {
52
+ if (index < 0 || index >= lines.length)
53
+ return false;
54
+ return lineHash(lines[index], index) === hash;
55
+ }
56
+ /**
57
+ * Resolve an anchor against the current file lines (0-based). Rejects the edit
58
+ * if either endpoint is out of range, the range is reversed, or either hash no
59
+ * longer matches — the corruption-avoidance property.
60
+ */
61
+ export function resolveAnchoredEdit(lines, anchor) {
62
+ const startIndex = anchor.start_line - 1;
63
+ const endIndex = anchor.end_line - 1;
64
+ if (startIndex < 0 || endIndex < 0 || startIndex >= lines.length || endIndex >= lines.length) {
65
+ return { ok: false, reason: "out_of_range" };
66
+ }
67
+ if (startIndex > endIndex) {
68
+ return { ok: false, reason: "reversed" };
69
+ }
70
+ if (!verifyAnchor(lines, startIndex, anchor.start_hash) ||
71
+ !verifyAnchor(lines, endIndex, anchor.end_hash)) {
72
+ return { ok: false, reason: "hash_mismatch" };
73
+ }
74
+ return { ok: true, startIndex, endIndex };
75
+ }
76
+ //# sourceMappingURL=hashline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hashline.js","sourceRoot":"","sources":["../../src/core/hashline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC;;;;GAIG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAY,EAAE,KAAa;IAClD,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,GAAG,KAAK,IAAI,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AACxF,CAAC;AAED,uEAAuE;AACvE,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,OAAO,IAAI;SACR,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;SACvC,IAAI,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC;AAYD,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC3C,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACrB,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACzB,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAChC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACZ,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACrB,CAAC,CAAC,CAAC;IACH,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB,CAAC;IAChD,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;IACpC,KAAK,MAAM,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,MAAM,EAAE,CAAC;QAC/B,6EAA6E;QAC7E,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,aAAa,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAE,CAAC,CAAC;;YACjD,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACxB,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,iBAAiB,CAAC,IAAI,CAAC,EAAE,aAAa,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;AAChF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,KAAe,EAAE,KAAa,EAAE,IAAY;IACvE,IAAI,KAAK,GAAG,CAAC,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IACrD,OAAO,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAE,EAAE,KAAK,CAAC,KAAK,IAAI,CAAC;AACjD,CAAC;AAyBD;;;;GAIG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAe,EAAE,MAAkB;IACrE,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,GAAG,CAAC,CAAC;IACzC,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC;IACrC,IAAI,UAAU,GAAG,CAAC,IAAI,QAAQ,GAAG,CAAC,IAAI,UAAU,IAAI,KAAK,CAAC,MAAM,IAAI,QAAQ,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QAC7F,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,cAAc,EAAE,CAAC;IAC/C,CAAC;IACD,IAAI,UAAU,GAAG,QAAQ,EAAE,CAAC;QAC1B,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;IAC3C,CAAC;IACD,IACE,CAAC,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,CAAC;QACnD,CAAC,YAAY,CAAC,KAAK,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,CAAC,EAC/C,CAAC;QACD,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC;IAChD,CAAC;IACD,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;AAC5C,CAAC"}
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Semantic AST-chunk retrieval vs whole-file reads — real-API measurement of
3
+ * whether Feature #3 is worth building.
4
+ *
5
+ * The claim (cocoindex-code / oh-my-pi): replacing "grep then read whole files"
6
+ * with "retrieve only the relevant AST chunks" cuts ~70% of the tokens an agent
7
+ * spends locating code, with no loss of answer quality. We test that directly on
8
+ * OUR OWN repo against a live model.
9
+ *
10
+ * For a set of natural-language questions about real files in this repo, we build
11
+ * three context strategies and ask the model the same question with each:
12
+ *
13
+ * BASELINE (whole-file): deliver the FULL text of the top files a lexical
14
+ * grep would surface — this is what the agent reads today (read + grep).
15
+ *
16
+ * SEMANTIC (AST chunks): parse every file into top-level declarations
17
+ * (functions / classes / interfaces / consts), rank chunks with a real BM25
18
+ * retriever, and deliver only the top-k chunks. No embedding model needed; a
19
+ * learned embedding retriever would land between BM25 and ORACLE.
20
+ *
21
+ * ORACLE (upper bound): deliver only the hand-labelled answer chunk(s) — the
22
+ * best case any retriever could achieve.
23
+ *
24
+ * We measure, per question and strategy: INPUT tokens delivered (the headline
25
+ * cost) and whether the model's answer was correct (deterministic keyword grade).
26
+ * The verdict: does SEMANTIC reach BASELINE-level correctness at a fraction of
27
+ * the input tokens?
28
+ *
29
+ * Usage:
30
+ * npx tsx src/core/semantic-search-benchmark.ts
31
+ *
32
+ * Env overrides:
33
+ * GG_SS_PROVIDER / GG_SS_MODEL (default openai / gpt-5.5)
34
+ * GG_SS_TOPK (chunks/files delivered, default 3)
35
+ */
36
+ export declare function grade(answer: string, mustInclude: string[]): boolean;
37
+ //# sourceMappingURL=semantic-search-benchmark.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-search-benchmark.d.ts","sourceRoot":"","sources":["../../src/core/semantic-search-benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAwHH,wBAAgB,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,GAAG,OAAO,CAGpE"}
@@ -0,0 +1,211 @@
1
+ /**
2
+ * Semantic AST-chunk retrieval vs whole-file reads — real-API measurement of
3
+ * whether Feature #3 is worth building.
4
+ *
5
+ * The claim (cocoindex-code / oh-my-pi): replacing "grep then read whole files"
6
+ * with "retrieve only the relevant AST chunks" cuts ~70% of the tokens an agent
7
+ * spends locating code, with no loss of answer quality. We test that directly on
8
+ * OUR OWN repo against a live model.
9
+ *
10
+ * For a set of natural-language questions about real files in this repo, we build
11
+ * three context strategies and ask the model the same question with each:
12
+ *
13
+ * BASELINE (whole-file): deliver the FULL text of the top files a lexical
14
+ * grep would surface — this is what the agent reads today (read + grep).
15
+ *
16
+ * SEMANTIC (AST chunks): parse every file into top-level declarations
17
+ * (functions / classes / interfaces / consts), rank chunks with a real BM25
18
+ * retriever, and deliver only the top-k chunks. No embedding model needed; a
19
+ * learned embedding retriever would land between BM25 and ORACLE.
20
+ *
21
+ * ORACLE (upper bound): deliver only the hand-labelled answer chunk(s) — the
22
+ * best case any retriever could achieve.
23
+ *
24
+ * We measure, per question and strategy: INPUT tokens delivered (the headline
25
+ * cost) and whether the model's answer was correct (deterministic keyword grade).
26
+ * The verdict: does SEMANTIC reach BASELINE-level correctness at a fraction of
27
+ * the input tokens?
28
+ *
29
+ * Usage:
30
+ * npx tsx src/core/semantic-search-benchmark.ts
31
+ *
32
+ * Env overrides:
33
+ * GG_SS_PROVIDER / GG_SS_MODEL (default openai / gpt-5.5)
34
+ * GG_SS_TOPK (chunks/files delivered, default 3)
35
+ */
36
+ import fs from "node:fs";
37
+ import path from "node:path";
38
+ import { fileURLToPath } from "node:url";
39
+ import { stream } from "@kenkaiiii/gg-ai";
40
+ import { AuthStorage } from "./auth-storage.js";
41
+ import { chunkFile, bm25Rank, rankFiles } from "./code-retrieval.js";
42
+ const HERE = path.dirname(fileURLToPath(import.meta.url));
43
+ const SRC = path.resolve(HERE, ".."); // packages/ggcoder/src
44
+ const QUESTIONS = [
45
+ {
46
+ q: "Which method resolves provider credentials and auto-refreshes expired OAuth tokens, and what happens if it is not logged in?",
47
+ files: ["core/auth-storage.ts", "core/loop-breaker.ts", "tools/edit-diff.ts"],
48
+ oracle: { file: "core/auth-storage.ts", symbol: "resolveCredentials" },
49
+ mustInclude: ["resolvecredentials", "refresh"],
50
+ },
51
+ {
52
+ q: "What function performs fuzzy text matching for the edit tool, and how does it tolerate indentation drift?",
53
+ files: ["tools/edit-diff.ts", "core/auth-storage.ts", "core/checkpoint-store.ts"],
54
+ oracle: { file: "tools/edit-diff.ts", symbol: "fuzzyFindText" },
55
+ mustInclude: ["fuzzyfindtext", "indent"],
56
+ },
57
+ {
58
+ q: "What restore modes does the checkpoint / rewind system support?",
59
+ files: ["core/checkpoint-store.ts", "core/loop-breaker.ts", "tools/edit-diff.ts"],
60
+ oracle: { file: "core/checkpoint-store.ts", symbol: "RestoreMode" },
61
+ mustInclude: ["code", "conversation", "both"],
62
+ },
63
+ ];
64
+ function sleep(ms) {
65
+ return new Promise((r) => setTimeout(r, ms));
66
+ }
67
+ async function ask(provider, model, c, context, question) {
68
+ const messages = [
69
+ {
70
+ role: "system",
71
+ content: "You answer questions about a codebase using ONLY the provided context. " +
72
+ "Be specific: name the exact functions/types involved. If the context is insufficient, say so. " +
73
+ "Answer in 1-3 sentences.",
74
+ },
75
+ { role: "user", content: `CONTEXT:\n${context}\n\nQUESTION: ${question}` },
76
+ ];
77
+ let lastErr;
78
+ for (let attempt = 0; attempt < 4; attempt++) {
79
+ try {
80
+ let text = "";
81
+ const result = stream({
82
+ provider: provider,
83
+ model,
84
+ messages,
85
+ maxTokens: 512,
86
+ apiKey: c.apiKey,
87
+ baseUrl: c.baseUrl,
88
+ accountId: c.accountId,
89
+ });
90
+ for await (const event of result) {
91
+ if (event.type === "text_delta")
92
+ text += event.text;
93
+ }
94
+ const response = await result.response;
95
+ const content = response.message.content;
96
+ const finalText = typeof content === "string"
97
+ ? content
98
+ : content
99
+ .filter((p) => p.type === "text")
100
+ .map((p) => p.text ?? "")
101
+ .join("");
102
+ return {
103
+ text: finalText || text,
104
+ inputTokens: response.usage.inputTokens,
105
+ outputTokens: response.usage.outputTokens,
106
+ };
107
+ }
108
+ catch (err) {
109
+ lastErr = err;
110
+ await sleep(2000 * (attempt + 1));
111
+ }
112
+ }
113
+ throw lastErr;
114
+ }
115
+ export function grade(answer, mustInclude) {
116
+ const a = answer.toLowerCase();
117
+ return mustInclude.every((t) => a.includes(t));
118
+ }
119
+ async function main() {
120
+ const provider = process.env.GG_SS_PROVIDER ?? "openai";
121
+ const model = process.env.GG_SS_MODEL ?? "gpt-5.5";
122
+ const topK = Math.max(1, parseInt(process.env.GG_SS_TOPK ?? "3", 10));
123
+ const auth = new AuthStorage();
124
+ await auth.load();
125
+ const cr = await auth.resolveCredentials(provider);
126
+ const creds = { apiKey: cr.accessToken, baseUrl: cr.baseUrl, accountId: cr.accountId };
127
+ console.log(`\n🔎 Semantic-search benchmark — ${provider}/${model} (top-${topK})\n`);
128
+ const rows = [];
129
+ for (const q of QUESTIONS) {
130
+ // Load the real corpus files; skip the question if any are missing/moved.
131
+ const files = new Map();
132
+ let missing = false;
133
+ for (const rel of q.files) {
134
+ const abs = path.join(SRC, rel);
135
+ if (!fs.existsSync(abs)) {
136
+ console.log(` ⚠ skipping question — missing ${rel}`);
137
+ missing = true;
138
+ break;
139
+ }
140
+ files.set(rel, fs.readFileSync(abs, "utf-8"));
141
+ }
142
+ if (missing)
143
+ continue;
144
+ // BASELINE context: full text of the top-k files a grep would surface.
145
+ const baseFiles = rankFiles(q.q, files, topK);
146
+ const baseContext = baseFiles.map((f) => `// FILE: ${f}\n${files.get(f)}`).join("\n\n");
147
+ // SEMANTIC context: top-k AST chunks across all corpus files.
148
+ const allChunks = [...files].flatMap(([rel, src]) => chunkFile(rel, src));
149
+ const semChunks = bm25Rank(q.q, allChunks, topK);
150
+ const semContext = semChunks.map((c) => `// ${c.file} → ${c.symbol}\n${c.text}`).join("\n\n");
151
+ // ORACLE context: just the labelled answer chunk.
152
+ const oracleChunk = allChunks.find((c) => c.file === q.oracle.file && c.symbol === q.oracle.symbol);
153
+ const oracleContext = oracleChunk
154
+ ? `// ${oracleChunk.file} → ${oracleChunk.symbol}\n${oracleChunk.text}`
155
+ : semContext;
156
+ process.stdout.write(`▶ ${q.q.slice(0, 64)}…\n`);
157
+ await sleep(1200);
158
+ const base = await ask(provider, model, creds, baseContext, q.q);
159
+ await sleep(1200);
160
+ const sem = await ask(provider, model, creds, semContext, q.q);
161
+ await sleep(1200);
162
+ const oracle = await ask(provider, model, creds, oracleContext, q.q);
163
+ const row = {
164
+ q: q.q.slice(0, 40),
165
+ baseInTok: base.inputTokens,
166
+ baseOk: grade(base.text, q.mustInclude),
167
+ semInTok: sem.inputTokens,
168
+ semOk: grade(sem.text, q.mustInclude),
169
+ oracleInTok: oracle.inputTokens,
170
+ oracleOk: grade(oracle.text, q.mustInclude),
171
+ };
172
+ rows.push(row);
173
+ process.stdout.write(` baseline ${row.baseInTok} in tok ${row.baseOk ? "OK" : "FAIL"} | ` +
174
+ `semantic ${row.semInTok} in tok ${row.semOk ? "OK" : "FAIL"} | ` +
175
+ `oracle ${row.oracleInTok} in tok ${row.oracleOk ? "OK" : "FAIL"}\n\n`);
176
+ }
177
+ if (rows.length === 0) {
178
+ console.log("No questions ran (corpus files not found).");
179
+ return;
180
+ }
181
+ // ── Report ──
182
+ console.log("══════════════════════ RESULTS ══════════════════════\n");
183
+ console.log("Question | base in-tok | sem in-tok | oracle | ok b/s/o");
184
+ for (const r of rows) {
185
+ console.log(`${r.q.padEnd(40)} | ${String(r.baseInTok).padStart(11)} | ${String(r.semInTok).padStart(10)} | ` +
186
+ `${String(r.oracleInTok).padStart(6)} | ${r.baseOk ? "1" : "0"}/${r.semOk ? "1" : "0"}/${r.oracleOk ? "1" : "0"}`);
187
+ }
188
+ const sum = (f) => rows.reduce((s, r) => s + f(r), 0);
189
+ const baseIn = sum((r) => r.baseInTok);
190
+ const semIn = sum((r) => r.semInTok);
191
+ const oracleIn = sum((r) => r.oracleInTok);
192
+ console.log(`\nInput tokens to answer: baseline ${baseIn} | semantic ${semIn} ` +
193
+ `(${(((baseIn - semIn) / baseIn) * 100).toFixed(0)}% fewer) | oracle ${oracleIn} ` +
194
+ `(${(((baseIn - oracleIn) / baseIn) * 100).toFixed(0)}% fewer)`);
195
+ console.log(`Correctness: baseline ${rows.filter((r) => r.baseOk).length}/${rows.length} | ` +
196
+ `semantic ${rows.filter((r) => r.semOk).length}/${rows.length} | ` +
197
+ `oracle ${rows.filter((r) => r.oracleOk).length}/${rows.length}`);
198
+ console.log(`\nVerdict: worth building if SEMANTIC keeps correctness ≈ baseline while cutting input tokens. ` +
199
+ `cocoindex claims ~70% fewer; ORACLE shows the ceiling a better (embedding) retriever could reach.\n`);
200
+ }
201
+ // Run when executed directly (not when imported by tests).
202
+ const isDirectRun = process.argv[1]?.endsWith("semantic-search-benchmark.ts") ||
203
+ process.argv[1]?.endsWith("semantic-search-benchmark.js") ||
204
+ process.argv[1]?.endsWith("semantic-search-benchmark");
205
+ if (isDirectRun) {
206
+ main().catch((err) => {
207
+ console.error("Benchmark failed:", err);
208
+ process.exit(1);
209
+ });
210
+ }
211
+ //# sourceMappingURL=semantic-search-benchmark.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-search-benchmark.js","sourceRoot":"","sources":["../../src/core/semantic-search-benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAEH,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,MAAM,EAA8C,MAAM,kBAAkB,CAAC;AACtF,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAErE,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,uBAAuB;AAc7D,MAAM,SAAS,GAAe;IAC5B;QACE,CAAC,EAAE,8HAA8H;QACjI,KAAK,EAAE,CAAC,sBAAsB,EAAE,sBAAsB,EAAE,oBAAoB,CAAC;QAC7E,MAAM,EAAE,EAAE,IAAI,EAAE,sBAAsB,EAAE,MAAM,EAAE,oBAAoB,EAAE;QACtE,WAAW,EAAE,CAAC,oBAAoB,EAAE,SAAS,CAAC;KAC/C;IACD;QACE,CAAC,EAAE,2GAA2G;QAC9G,KAAK,EAAE,CAAC,oBAAoB,EAAE,sBAAsB,EAAE,0BAA0B,CAAC;QACjF,MAAM,EAAE,EAAE,IAAI,EAAE,oBAAoB,EAAE,MAAM,EAAE,eAAe,EAAE;QAC/D,WAAW,EAAE,CAAC,eAAe,EAAE,QAAQ,CAAC;KACzC;IACD;QACE,CAAC,EAAE,iEAAiE;QACpE,KAAK,EAAE,CAAC,0BAA0B,EAAE,sBAAsB,EAAE,oBAAoB,CAAC;QACjF,MAAM,EAAE,EAAE,IAAI,EAAE,0BAA0B,EAAE,MAAM,EAAE,aAAa,EAAE;QACnE,WAAW,EAAE,CAAC,MAAM,EAAE,cAAc,EAAE,MAAM,CAAC;KAC9C;CACF,CAAC;AAYF,SAAS,KAAK,CAAC,EAAU;IACvB,OAAO,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;AAC/C,CAAC;AAQD,KAAK,UAAU,GAAG,CAChB,QAAgB,EAChB,KAAa,EACb,CAAQ,EACR,OAAe,EACf,QAAgB;IAEhB,MAAM,QAAQ,GAAc;QAC1B;YACE,IAAI,EAAE,QAAQ;YACd,OAAO,EACL,yEAAyE;gBACzE,gGAAgG;gBAChG,0BAA0B;SAC7B;QACD,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,OAAO,iBAAiB,QAAQ,EAAE,EAAE;KAC3E,CAAC;IACF,IAAI,OAAgB,CAAC;IACrB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC;QAC7C,IAAI,CAAC;YACH,IAAI,IAAI,GAAG,EAAE,CAAC;YACd,MAAM,MAAM,GAAG,MAAM,CAAC;gBACpB,QAAQ,EAAE,QAAiB;gBAC3B,KAAK;gBACL,QAAQ;gBACR,SAAS,EAAE,GAAG;gBACd,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,SAAS,EAAE,CAAC,CAAC,SAAS;aACvB,CAAC,CAAC;YACH,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAoC,EAAE,CAAC;gBAC/D,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY;oBAAE,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC;YACtD,CAAC;YACD,MAAM,QAAQ,GAAuC,MAAM,MAAM,CAAC,QAAQ,CAAC;YAC3E,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC;YACzC,MAAM,SAAS,GACb,OAAO,OAAO,KAAK,QAAQ;gBACzB,CAAC,CAAC,OAAO;gBACT,CAAC,CAAE,OAAkD;qBAChD,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC;qBAChC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;qBACxB,IAAI,CAAC,EAAE,CAAC,CAAC;YAClB,OAAO;gBACL,IAAI,EAAE,SAAS,IAAI,IAAI;gBACvB,WAAW,EAAE,QAAQ,CAAC,KAAK,CAAC,WAAW;gBACvC,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC,YAAY;aAC1C,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,GAAG,GAAG,CAAC;YACd,MAAM,KAAK,CAAC,IAAI,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;IACD,MAAM,OAAO,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,KAAK,CAAC,MAAc,EAAE,WAAqB;IACzD,MAAM,CAAC,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC;IAC/B,OAAO,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;AACjD,CAAC;AAcD,KAAK,UAAU,IAAI;IACjB,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,QAAQ,CAAC;IACxD,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,SAAS,CAAC;IACnD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC;IAEtE,MAAM,IAAI,GAAG,IAAI,WAAW,EAAE,CAAC;IAC/B,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;IAClB,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,KAAK,GAAU,EAAE,MAAM,EAAE,EAAE,CAAC,WAAW,EAAE,OAAO,EAAE,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,EAAE,CAAC,SAAS,EAAE,CAAC;IAE9F,OAAO,CAAC,GAAG,CAAC,oCAAoC,QAAQ,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,CAAC;IAErF,MAAM,IAAI,GAAU,EAAE,CAAC;IAEvB,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,0EAA0E;QAC1E,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAC;QACxC,IAAI,OAAO,GAAG,KAAK,CAAC;QACpB,KAAK,MAAM,GAAG,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;YAC1B,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;YAChC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxB,OAAO,CAAC,GAAG,CAAC,oCAAoC,GAAG,EAAE,CAAC,CAAC;gBACvD,OAAO,GAAG,IAAI,CAAC;gBACf,MAAM;YACR,CAAC;YACD,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,YAAY,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,CAAC;QAChD,CAAC;QACD,IAAI,OAAO;YAAE,SAAS;QAEtB,uEAAuE;QACvE,MAAM,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;QAC9C,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAExF,8DAA8D;QAC9D,MAAM,SAAS,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;QAC1E,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QACjD,MAAM,UAAU,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAE9F,kDAAkD;QAClD,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAChC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,CAAC,MAAM,CAChE,CAAC;QACF,MAAM,aAAa,GAAG,WAAW;YAC/B,CAAC,CAAC,MAAM,WAAW,CAAC,IAAI,MAAM,WAAW,CAAC,MAAM,KAAK,WAAW,CAAC,IAAI,EAAE;YACvE,CAAC,CAAC,UAAU,CAAC;QAEf,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;QACjD,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;QAClB,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,QAAQ,EAAE,KAAK,EAAE,KAAK,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;QAClB,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,QAAQ,EAAE,KAAK,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;QAClB,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,QAAQ,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAErE,MAAM,GAAG,GAAQ;YACf,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;YACnB,SAAS,EAAE,IAAI,CAAC,WAAW;YAC3B,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,CAAC;YACvC,QAAQ,EAAE,GAAG,CAAC,WAAW;YACzB,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,CAAC;YACrC,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,QAAQ,EAAE,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,CAAC;SAC5C,CAAC;QACF,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACf,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,eAAe,GAAG,CAAC,SAAS,WAAW,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,KAAK;YACpE,YAAY,GAAG,CAAC,QAAQ,WAAW,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,KAAK;YACjE,UAAU,GAAG,CAAC,WAAW,WAAW,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,MAAM,CACzE,CAAC;IACJ,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,CAAC,GAAG,CAAC,4CAA4C,CAAC,CAAC;QAC1D,OAAO;IACT,CAAC;IAED,eAAe;IACf,OAAO,CAAC,GAAG,CAAC,yDAAyD,CAAC,CAAC;IACvE,OAAO,CAAC,GAAG,CACT,yFAAyF,CAC1F,CAAC;IACF,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,OAAO,CAAC,GAAG,CACT,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,MAAM,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,KAAK;YAC/F,GAAG,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CACpH,CAAC;IACJ,CAAC;IACD,MAAM,GAAG,GAAG,CAAC,CAAqB,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC1E,MAAM,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;IACvC,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACrC,MAAM,QAAQ,GAAG,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAC3C,OAAO,CAAC,GAAG,CACT,sCAAsC,MAAM,eAAe,KAAK,GAAG;QACjE,IAAI,CAAC,CAAC,CAAC,MAAM,GAAG,KAAK,CAAC,GAAG,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,qBAAqB,QAAQ,GAAG;QAClF,IAAI,CAAC,CAAC,CAAC,MAAM,GAAG,QAAQ,CAAC,GAAG,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAClE,CAAC;IACF,OAAO,CAAC,GAAG,CACT,yBAAyB,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,KAAK;QAC9E,YAAY,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,KAAK;QAClE,UAAU,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,EAAE,CACnE,CAAC;IACF,OAAO,CAAC,GAAG,CACT,iGAAiG;QAC/F,qGAAqG,CACxG,CAAC;AACJ,CAAC;AAED,2DAA2D;AAC3D,MAAM,WAAW,GACf,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,8BAA8B,CAAC;IACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,8BAA8B,CAAC;IACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,2BAA2B,CAAC,CAAC;AAEzD,IAAI,WAAW,EAAE,CAAC;IAChB,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;QACnB,OAAO,CAAC,KAAK,CAAC,mBAAmB,EAAE,GAAG,CAAC,CAAC;QACxC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=semantic-search-benchmark.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-search-benchmark.test.d.ts","sourceRoot":"","sources":["../../src/core/semantic-search-benchmark.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,89 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { chunkFile, tokenize, bm25Rank, rankFiles } from "./code-retrieval.js";
3
+ import { grade } from "./semantic-search-benchmark.js";
4
+ /**
5
+ * Deterministic tests for the retrieval half of the semantic-search benchmark.
6
+ * The token-saving claim is only meaningful if AST chunking, tokenization, BM25
7
+ * ranking, and the answer grader are correct. We test parsing across every node
8
+ * kind, full-body capture, camelCase splitting, ranking relevance, the top-k
9
+ * cap, and the grader's all-tokens-required strictness.
10
+ */
11
+ const SAMPLE = `
12
+ import { x } from "y";
13
+
14
+ export interface Session { id: string; userId: string; }
15
+
16
+ export type Mode = "code" | "conversation" | "both";
17
+
18
+ export enum Color { Red, Green }
19
+
20
+ const DEFAULT_TTL = 3000;
21
+
22
+ /** Sign a session token with HMAC-SHA256. */
23
+ export function signSessionToken(s: Session, secret: string): string {
24
+ return s.id + secret + "SIGNED_MARKER";
25
+ }
26
+
27
+ export class LruCache {
28
+ evictLeastRecentlyUsed(): void {}
29
+ }
30
+ `;
31
+ describe("chunkFile (AST chunking)", () => {
32
+ const chunks = chunkFile("sample.ts", SAMPLE);
33
+ const symbols = chunks.map((c) => c.symbol);
34
+ it("1. extracts a top-level function declaration by name", () => {
35
+ expect(symbols).toContain("signSessionToken");
36
+ });
37
+ it("2. extracts class and interface declarations", () => {
38
+ expect(symbols).toContain("LruCache");
39
+ expect(symbols).toContain("Session");
40
+ });
41
+ it("3. extracts type alias, enum, and top-level const", () => {
42
+ expect(symbols).toContain("Mode");
43
+ expect(symbols).toContain("Color");
44
+ expect(symbols).toContain("DEFAULT_TTL");
45
+ });
46
+ it("4. captures the FULL body text of a chunk (not just the signature)", () => {
47
+ const fn = chunks.find((c) => c.symbol === "signSessionToken");
48
+ expect(fn.text).toContain("SIGNED_MARKER");
49
+ expect(fn.file).toBe("sample.ts");
50
+ });
51
+ it("5. ignores import statements (no spurious chunk)", () => {
52
+ expect(symbols).not.toContain("x");
53
+ expect(chunks.length).toBe(6); // Session, Mode, Color, DEFAULT_TTL, signSessionToken, LruCache
54
+ });
55
+ });
56
+ describe("tokenize", () => {
57
+ it("6. splits camelCase identifiers into separate terms", () => {
58
+ const toks = tokenize("resolveCredentials");
59
+ expect(toks).toContain("resolve");
60
+ expect(toks).toContain("credentials");
61
+ });
62
+ it("7. lowercases and splits on non-word boundaries (snake/punctuation)", () => {
63
+ const toks = tokenize("DEFAULT_TTL = signSessionToken()");
64
+ expect(toks).toEqual(expect.arrayContaining(["default", "ttl", "sign", "session", "token"]));
65
+ expect(toks.every((t) => t === t.toLowerCase())).toBe(true);
66
+ });
67
+ });
68
+ describe("bm25Rank + rankFiles (retrieval)", () => {
69
+ const chunks = chunkFile("sample.ts", SAMPLE);
70
+ it("8. ranks the chunk matching the query's terms first", () => {
71
+ const top = bm25Rank("how are session tokens signed", chunks, 1);
72
+ expect(top[0].symbol).toBe("signSessionToken");
73
+ });
74
+ it("9. never returns more than top-k chunks", () => {
75
+ expect(bm25Rank("session", chunks, 2).length).toBeLessThanOrEqual(2);
76
+ expect(bm25Rank("session", chunks, 100).length).toBe(chunks.length);
77
+ });
78
+ it("10. rankFiles surfaces the relevant file and grade requires ALL tokens", () => {
79
+ const files = new Map([
80
+ ["auth.ts", SAMPLE],
81
+ ["unrelated.ts", "export const pi = 3.14; // geometry helpers only"],
82
+ ]);
83
+ expect(rankFiles("sign a session token", files, 1)[0]).toBe("auth.ts");
84
+ // grade is strict: every required token must be present (case-insensitive).
85
+ expect(grade("It uses signSessionToken with refresh logic", ["signsessiontoken", "refresh"])).toBe(true);
86
+ expect(grade("It uses signSessionToken", ["signsessiontoken", "refresh"])).toBe(false);
87
+ });
88
+ });
89
+ //# sourceMappingURL=semantic-search-benchmark.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-search-benchmark.test.js","sourceRoot":"","sources":["../../src/core/semantic-search-benchmark.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAC/E,OAAO,EAAE,KAAK,EAAE,MAAM,gCAAgC,CAAC;AAEvD;;;;;;GAMG;AAEH,MAAM,MAAM,GAAG;;;;;;;;;;;;;;;;;;;CAmBd,CAAC;AAEF,QAAQ,CAAC,0BAA0B,EAAE,GAAG,EAAE;IACxC,MAAM,MAAM,GAAG,SAAS,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;IAC9C,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAE5C,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC9D,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;QACtC,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mDAAmD,EAAE,GAAG,EAAE;QAC3D,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAClC,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oEAAoE,EAAE,GAAG,EAAE;QAC5E,MAAM,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,kBAAkB,CAAE,CAAC;QAChE,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAC3C,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;QAC1D,MAAM,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,gEAAgE;IACjG,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,UAAU,EAAE,GAAG,EAAE;IACxB,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC7D,MAAM,IAAI,GAAG,QAAQ,CAAC,oBAAoB,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;QAClC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qEAAqE,EAAE,GAAG,EAAE;QAC7E,MAAM,IAAI,GAAG,QAAQ,CAAC,kCAAkC,CAAC,CAAC;QAC1D,MAAM,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,SAAS,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC;QAC7F,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,kCAAkC,EAAE,GAAG,EAAE;IAChD,MAAM,MAAM,GAAG,SAAS,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;IAE9C,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC7D,MAAM,GAAG,GAAG,QAAQ,CAAC,+BAA+B,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;QACjE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,CAAC,QAAQ,CAAC,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC;QACrE,MAAM,CAAC,QAAQ,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wEAAwE,EAAE,GAAG,EAAE;QAChF,MAAM,KAAK,GAAG,IAAI,GAAG,CAAiB;YACpC,CAAC,SAAS,EAAE,MAAM,CAAC;YACnB,CAAC,cAAc,EAAE,kDAAkD,CAAC;SACrE,CAAC,CAAC;QACH,MAAM,CAAC,SAAS,CAAC,sBAAsB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEvE,4EAA4E;QAC5E,MAAM,CACJ,KAAK,CAAC,6CAA6C,EAAE,CAAC,kBAAkB,EAAE,SAAS,CAAC,CAAC,CACtF,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACb,MAAM,CAAC,KAAK,CAAC,0BAA0B,EAAE,CAAC,kBAAkB,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzF,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,25 @@
1
+ import type { TextContent, ImageContent, VideoContent } from "@kenkaiiii/gg-ai";
2
+ type ContentPart = TextContent | ImageContent | VideoContent;
3
+ type UserContent = string | ContentPart[];
4
+ /**
5
+ * Framing prepended to a mid-run steering message (a prompt the user submitted
6
+ * while the agent was already working).
7
+ *
8
+ * Without this wrapper the queued text arrives as a bare top-level user turn,
9
+ * identical to a brand-new request — so models treat it as the authoritative
10
+ * instruction and silently abandon the original task. The wrapper names the
11
+ * relationship (a second, concurrent instruction) and the one rule that kills
12
+ * the failure mode: don't drop either side. The model already knows how to
13
+ * merge two live instructions once it knows both are in force.
14
+ */
15
+ export declare const STEERING_PREFIX: string;
16
+ /** Wrap a plain-text steering message with the framing prefix. */
17
+ export declare function wrapSteeringText(text: string): string;
18
+ /**
19
+ * Wrap a steering `UserContent` (string or multimodal parts) with the framing
20
+ * prefix. Media blocks pass through untouched; the prefix is prepended to the
21
+ * leading text so attachments still ride the same native-block path.
22
+ */
23
+ export declare function wrapSteeringContent(content: UserContent): UserContent;
24
+ export {};
25
+ //# sourceMappingURL=steering.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"steering.d.ts","sourceRoot":"","sources":["../../src/core/steering.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAEhF,KAAK,WAAW,GAAG,WAAW,GAAG,YAAY,GAAG,YAAY,CAAC;AAC7D,KAAK,WAAW,GAAG,MAAM,GAAG,WAAW,EAAE,CAAC;AAE1C;;;;;;;;;;GAUG;AACH,eAAO,MAAM,eAAe,QAGd,CAAC;AAEf,kEAAkE;AAClE,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAErD;AAED;;;;GAIG;AACH,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,WAAW,GAAG,WAAW,CAGrE"}
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Framing prepended to a mid-run steering message (a prompt the user submitted
3
+ * while the agent was already working).
4
+ *
5
+ * Without this wrapper the queued text arrives as a bare top-level user turn,
6
+ * identical to a brand-new request — so models treat it as the authoritative
7
+ * instruction and silently abandon the original task. The wrapper names the
8
+ * relationship (a second, concurrent instruction) and the one rule that kills
9
+ * the failure mode: don't drop either side. The model already knows how to
10
+ * merge two live instructions once it knows both are in force.
11
+ */
12
+ export const STEERING_PREFIX = "[The user added this while you were working — fold it into the current " +
13
+ "task, adjusting or extending as needed. Don't drop your original work or " +
14
+ "this.]\n\n";
15
+ /** Wrap a plain-text steering message with the framing prefix. */
16
+ export function wrapSteeringText(text) {
17
+ return STEERING_PREFIX + text;
18
+ }
19
+ /**
20
+ * Wrap a steering `UserContent` (string or multimodal parts) with the framing
21
+ * prefix. Media blocks pass through untouched; the prefix is prepended to the
22
+ * leading text so attachments still ride the same native-block path.
23
+ */
24
+ export function wrapSteeringContent(content) {
25
+ if (typeof content === "string")
26
+ return wrapSteeringText(content);
27
+ return [{ type: "text", text: STEERING_PREFIX }, ...content];
28
+ }
29
+ //# sourceMappingURL=steering.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"steering.js","sourceRoot":"","sources":["../../src/core/steering.ts"],"names":[],"mappings":"AAKA;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,eAAe,GAC1B,yEAAyE;IACzE,2EAA2E;IAC3E,YAAY,CAAC;AAEf,kEAAkE;AAClE,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,OAAO,eAAe,GAAG,IAAI,CAAC;AAChC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,mBAAmB,CAAC,OAAoB;IACtD,IAAI,OAAO,OAAO,KAAK,QAAQ;QAAE,OAAO,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAClE,OAAO,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,eAAe,EAAiB,EAAE,GAAG,OAAO,CAAC,CAAC;AAC9E,CAAC"}
@@ -11,6 +11,12 @@ declare const EditParams: z.ZodObject<{
11
11
  old_text: z.ZodString;
12
12
  new_text: z.ZodString;
13
13
  replace_all: z.ZodOptional<z.ZodBoolean>;
14
+ anchor: z.ZodOptional<z.ZodObject<{
15
+ start_line: z.ZodNumber;
16
+ start_hash: z.ZodString;
17
+ end_line: z.ZodNumber;
18
+ end_hash: z.ZodString;
19
+ }, z.core.$strip>>;
14
20
  }, z.core.$strip>>>;
15
21
  atomic: z.ZodOptional<z.ZodBoolean>;
16
22
  }, z.core.$strip>;
@@ -1 +1 @@
1
- {"version":3,"file":"edit.d.ts","sourceRoot":"","sources":["../../src/tools/edit.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAYrD,OAAO,EAAmB,KAAK,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACvE,OAAO,EAA4B,KAAK,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAG/E,KAAK,gBAAgB,GAAG,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;AAEnE,uGAAuG;AACvG,KAAK,mBAAmB,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;AAuClF,QAAA,MAAM,UAAU;;;;;;;;iBAgBd,CAAC;AAsDH,wBAAgB,cAAc,CAC5B,GAAG,EAAE,MAAM,EACX,SAAS,CAAC,EAAE,WAAW,EACvB,GAAG,GAAE,cAAgC,EACrC,0BAA0B,CAAC,EAAE;IAAE,OAAO,EAAE,OAAO,CAAA;CAAE,GAAG,gBAAgB,EACpE,aAAa,CAAC,EAAE,gBAAgB,EAChC,iBAAiB,CAAC,EAAE,gBAAgB,EACpC,cAAc,CAAC,EAAE,mBAAmB,GACnC,SAAS,CAAC,OAAO,UAAU,CAAC,CAgP9B"}
1
+ {"version":3,"file":"edit.d.ts","sourceRoot":"","sources":["../../src/tools/edit.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAYrD,OAAO,EAAmB,KAAK,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACvE,OAAO,EAA4B,KAAK,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAI/E,KAAK,gBAAgB,GAAG,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;AAEnE,uGAAuG;AACvG,KAAK,mBAAmB,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;AAqDlF,QAAA,MAAM,UAAU;;;;;;;;;;;;;;iBAgBd,CAAC;AAuDH,wBAAgB,cAAc,CAC5B,GAAG,EAAE,MAAM,EACX,SAAS,CAAC,EAAE,WAAW,EACvB,GAAG,GAAE,cAAgC,EACrC,0BAA0B,CAAC,EAAE;IAAE,OAAO,EAAE,OAAO,CAAA;CAAE,GAAG,gBAAgB,EACpE,aAAa,CAAC,EAAE,gBAAgB,EAChC,iBAAiB,CAAC,EAAE,gBAAgB,EACpC,cAAc,CAAC,EAAE,mBAAmB,GACnC,SAAS,CAAC,OAAO,UAAU,CAAC,CAsQ9B"}
@@ -4,6 +4,7 @@ import { resolvePath, rejectSymlink } from "./path-utils.js";
4
4
  import { fuzzyFindText, countOccurrences, generateDiff, findClosestSnippet, findOccurrenceLines, stripBlankEdges, applyDotdotdots, applyMissingLeadingWhitespace, } from "./edit-diff.js";
5
5
  import { localOperations } from "./operations.js";
6
6
  import { assertFresh, recordWrite } from "./read-tracker.js";
7
+ import { resolveAnchoredEdit } from "../core/hashline.js";
7
8
  import { isPlanModeActive, planModeRestriction } from "../core/runtime-mode.js";
8
9
  function isMutationCallback(value) {
9
10
  return typeof value === "function";
@@ -13,6 +14,14 @@ function isPlanModeRef(value) {
13
14
  value !== null &&
14
15
  typeof value.current === "boolean");
15
16
  }
17
+ const EditAnchorSchema = z.object({
18
+ start_line: z.number().int().min(1).describe("1-based line number of the first edited line"),
19
+ start_hash: z
20
+ .string()
21
+ .describe("Content anchor of the first line (from a read with anchors:true)"),
22
+ end_line: z.number().int().min(1).describe("1-based line number of the last edited line"),
23
+ end_hash: z.string().describe("Content anchor of the last line"),
24
+ });
16
25
  const EditItem = z.object({
17
26
  old_text: z.string().describe("The exact text to find and replace"),
18
27
  new_text: z.string().describe("The replacement text"),
@@ -21,6 +30,9 @@ const EditItem = z.object({
21
30
  .optional()
22
31
  .describe("Replace every occurrence of old_text instead of requiring a unique match. " +
23
32
  "Use for renames or repeated tokens. Defaults to false."),
33
+ anchor: EditAnchorSchema.optional().describe("Optional staleness guard. When set (using line+hash anchors from a read with anchors:true), " +
34
+ "the edit is rejected if the file changed since you read it. old_text/new_text still drive the " +
35
+ "actual replacement."),
24
36
  });
25
37
  // Some models (Opus 4.6, GLM-5.1) occasionally send `edits` as a JSON string
26
38
  // instead of a real array, which trips Zod and makes the model fall back to
@@ -91,7 +103,9 @@ export function createEditTool(cwd, readFiles, ops = localOperations, planModeRe
91
103
  "Each old_text should identify one location — include surrounding context; set replace_all: true only for deliberate global replacements/renames. " +
92
104
  "The matcher tolerates safe whitespace/quote/dash drift, but do not paraphrase. For long blocks, a line containing only `...` in BOTH old_text and new_text elides a middle preserved verbatim. " +
93
105
  "Partial-apply by default: failed edits are listed for retry, successful ones are still written — " +
94
- "re-issue ONLY the listed failures, not the whole batch. Returns a unified diff.",
106
+ "re-issue ONLY the listed failures, not the whole batch. " +
107
+ "Optionally pin an edit with `anchor` (line+hash from a read with anchors:true) to reject it if the file changed since you read it. " +
108
+ "Returns a unified diff.",
95
109
  parameters: EditParams,
96
110
  executionMode: "sequential",
97
111
  async execute({ file_path, edits, atomic = false }) {
@@ -105,10 +119,25 @@ export function createEditTool(cwd, readFiles, ops = localOperations, planModeRe
105
119
  const hasCRLF = original.includes("\r\n");
106
120
  const originalNormalized = hasCRLF ? original.replace(/\r\n/g, "\n") : original;
107
121
  let working = originalNormalized;
122
+ // Anchors pin lines in the file AS READ, so they always verify against the
123
+ // original (pre-edit) line array — earlier edits in the batch don't shift
124
+ // what an anchor refers to.
125
+ const originalLines = originalNormalized.split("\n");
108
126
  const fileName = path.basename(resolved);
109
127
  const outcomes = new Array(edits.length);
110
128
  for (let i = 0; i < edits.length; i++) {
111
- const { old_text, new_text, replace_all } = edits[i];
129
+ const { old_text, new_text, replace_all, anchor } = edits[i];
130
+ // Optional staleness guard (opt-in). Runs BEFORE the fuzzy match ladder:
131
+ // if the model supplied an anchor and the file drifted since it read it,
132
+ // reject this edit instead of risking a misplaced fuzzy match. The fuzzy
133
+ // path below is byte-identical to today when `anchor` is absent.
134
+ if (anchor) {
135
+ const res = resolveAnchoredEdit(originalLines, anchor);
136
+ if (!res.ok) {
137
+ outcomes[i] = { ok: false, failure: { reason: "stale_anchor" } };
138
+ continue;
139
+ }
140
+ }
112
141
  const normalizedOld = hasCRLF ? old_text.replace(/\r\n/g, "\n") : old_text;
113
142
  const normalizedNew = hasCRLF ? new_text.replace(/\r\n/g, "\n") : new_text;
114
143
  const replaceAll = replace_all ?? false;
@@ -197,6 +226,9 @@ export function createEditTool(cwd, readFiles, ops = localOperations, planModeRe
197
226
  // and the snippet is its only guidance — keep it.
198
227
  const willPersistSuccesses = successCount > 0 && !atomic;
199
228
  const formatFailureMessage = (f) => {
229
+ if (f.reason === "stale_anchor") {
230
+ return `the file changed since you read it (anchor mismatch) — re-read \`${file_path}\` and retry`;
231
+ }
200
232
  if (f.reason === "noop") {
201
233
  return `old_text and new_text are identical in ${fileName} — this edit would be a no-op. Either fix new_text or drop this edit.`;
202
234
  }