@kenkaiiii/ggcoder 4.14.2 → 4.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/agent-session.d.ts.map +1 -1
- package/dist/core/agent-session.js +8 -2
- package/dist/core/agent-session.js.map +1 -1
- package/dist/core/code-retrieval.d.ts +13 -0
- package/dist/core/code-retrieval.d.ts.map +1 -0
- package/dist/core/code-retrieval.js +87 -0
- package/dist/core/code-retrieval.js.map +1 -0
- package/dist/core/hashline-edit-benchmark.d.ts +55 -0
- package/dist/core/hashline-edit-benchmark.d.ts.map +1 -0
- package/dist/core/hashline-edit-benchmark.js +342 -0
- package/dist/core/hashline-edit-benchmark.js.map +1 -0
- package/dist/core/hashline-edit-benchmark.test.d.ts +2 -0
- package/dist/core/hashline-edit-benchmark.test.d.ts.map +1 -0
- package/dist/core/hashline-edit-benchmark.test.js +141 -0
- package/dist/core/hashline-edit-benchmark.test.js.map +1 -0
- package/dist/core/hashline.d.ts +50 -0
- package/dist/core/hashline.d.ts.map +1 -0
- package/dist/core/hashline.js +76 -0
- package/dist/core/hashline.js.map +1 -0
- package/dist/core/semantic-search-benchmark.d.ts +37 -0
- package/dist/core/semantic-search-benchmark.d.ts.map +1 -0
- package/dist/core/semantic-search-benchmark.js +211 -0
- package/dist/core/semantic-search-benchmark.js.map +1 -0
- package/dist/core/semantic-search-benchmark.test.d.ts +2 -0
- package/dist/core/semantic-search-benchmark.test.d.ts.map +1 -0
- package/dist/core/semantic-search-benchmark.test.js +89 -0
- package/dist/core/semantic-search-benchmark.test.js.map +1 -0
- package/dist/core/steering.d.ts +25 -0
- package/dist/core/steering.d.ts.map +1 -0
- package/dist/core/steering.js +29 -0
- package/dist/core/steering.js.map +1 -0
- package/dist/tools/edit.d.ts +6 -0
- package/dist/tools/edit.d.ts.map +1 -1
- package/dist/tools/edit.js +34 -2
- package/dist/tools/edit.js.map +1 -1
- package/dist/tools/edit.test.js +112 -0
- package/dist/tools/edit.test.js.map +1 -1
- package/dist/tools/index.d.ts +1 -0
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +3 -0
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/prompt-hints.d.ts.map +1 -1
- package/dist/tools/prompt-hints.js +8 -0
- package/dist/tools/prompt-hints.js.map +1 -1
- package/dist/tools/read.d.ts +1 -0
- package/dist/tools/read.d.ts.map +1 -1
- package/dist/tools/read.js +14 -3
- package/dist/tools/read.js.map +1 -1
- package/dist/tools/read.test.js +40 -0
- package/dist/tools/read.test.js.map +1 -1
- package/dist/tools/search-code.d.ts +11 -0
- package/dist/tools/search-code.d.ts.map +1 -0
- package/dist/tools/search-code.js +95 -0
- package/dist/tools/search-code.js.map +1 -0
- package/dist/tools/search-code.test.d.ts +2 -0
- package/dist/tools/search-code.test.d.ts.map +1 -0
- package/dist/tools/search-code.test.js +77 -0
- package/dist/tools/search-code.test.js.map +1 -0
- package/dist/ui/App.d.ts +2 -1
- package/dist/ui/App.d.ts.map +1 -1
- package/dist/ui/components/ActivityIndicator.d.ts +1 -1
- package/dist/ui/components/ActivityIndicator.d.ts.map +1 -1
- package/dist/ui/components/AnimationContext.d.ts +1 -1
- package/dist/ui/components/AnimationContext.d.ts.map +1 -1
- package/dist/ui/components/AssistantMessage.d.ts +1 -1
- package/dist/ui/components/AssistantMessage.d.ts.map +1 -1
- package/dist/ui/components/BackgroundTasksBar.d.ts +2 -1
- package/dist/ui/components/BackgroundTasksBar.d.ts.map +1 -1
- package/dist/ui/components/Banner.d.ts +2 -1
- package/dist/ui/components/Banner.d.ts.map +1 -1
- package/dist/ui/components/ChatFooterPane.d.ts +2 -1
- package/dist/ui/components/ChatFooterPane.d.ts.map +1 -1
- package/dist/ui/components/ChatInputStack.d.ts +1 -1
- package/dist/ui/components/ChatInputStack.d.ts.map +1 -1
- package/dist/ui/components/ChatLayout.d.ts +4 -4
- package/dist/ui/components/ChatLayout.d.ts.map +1 -1
- package/dist/ui/components/ChatLivePane.d.ts +1 -1
- package/dist/ui/components/ChatLivePane.d.ts.map +1 -1
- package/dist/ui/components/ChatScreen.d.ts +1 -1
- package/dist/ui/components/ChatScreen.d.ts.map +1 -1
- package/dist/ui/components/ChatStatusRow.d.ts +1 -1
- package/dist/ui/components/ChatStatusRow.d.ts.map +1 -1
- package/dist/ui/components/CompactionNotice.d.ts +3 -2
- package/dist/ui/components/CompactionNotice.d.ts.map +1 -1
- package/dist/ui/components/DiffFrame.d.ts +1 -1
- package/dist/ui/components/DiffFrame.d.ts.map +1 -1
- package/dist/ui/components/DiffView.d.ts +2 -1
- package/dist/ui/components/DiffView.d.ts.map +1 -1
- package/dist/ui/components/Footer.d.ts +2 -1
- package/dist/ui/components/Footer.d.ts.map +1 -1
- package/dist/ui/components/FooterStatusRow.d.ts +2 -1
- package/dist/ui/components/FooterStatusRow.d.ts.map +1 -1
- package/dist/ui/components/FullScreenOverlayRouter.d.ts +2 -1
- package/dist/ui/components/FullScreenOverlayRouter.d.ts.map +1 -1
- package/dist/ui/components/IdealHookMessage.d.ts +1 -1
- package/dist/ui/components/IdealHookMessage.d.ts.map +1 -1
- package/dist/ui/components/InputArea.d.ts +1 -1
- package/dist/ui/components/InputArea.d.ts.map +1 -1
- package/dist/ui/components/LiveToolPanel.d.ts +2 -1
- package/dist/ui/components/LiveToolPanel.d.ts.map +1 -1
- package/dist/ui/components/Markdown.d.ts +2 -2
- package/dist/ui/components/Markdown.d.ts.map +1 -1
- package/dist/ui/components/ModelSelector.d.ts +2 -1
- package/dist/ui/components/ModelSelector.d.ts.map +1 -1
- package/dist/ui/components/Overlay.d.ts +1 -1
- package/dist/ui/components/Overlay.d.ts.map +1 -1
- package/dist/ui/components/PixelOverlay.d.ts +2 -1
- package/dist/ui/components/PixelOverlay.d.ts.map +1 -1
- package/dist/ui/components/PlanApproval.d.ts +2 -1
- package/dist/ui/components/PlanApproval.d.ts.map +1 -1
- package/dist/ui/components/PlanBanner.d.ts +2 -1
- package/dist/ui/components/PlanBanner.d.ts.map +1 -1
- package/dist/ui/components/PlanModeLogo.d.ts +2 -1
- package/dist/ui/components/PlanModeLogo.d.ts.map +1 -1
- package/dist/ui/components/PlanOverlay.d.ts +3 -2
- package/dist/ui/components/PlanOverlay.d.ts.map +1 -1
- package/dist/ui/components/PlanProgress.d.ts +2 -1
- package/dist/ui/components/PlanProgress.d.ts.map +1 -1
- package/dist/ui/components/QueueIndicator.d.ts +2 -1
- package/dist/ui/components/QueueIndicator.d.ts.map +1 -1
- package/dist/ui/components/RewindOverlay.d.ts +2 -1
- package/dist/ui/components/RewindOverlay.d.ts.map +1 -1
- package/dist/ui/components/SelectList.d.ts +2 -1
- package/dist/ui/components/SelectList.d.ts.map +1 -1
- package/dist/ui/components/ServerToolExecution.d.ts +2 -1
- package/dist/ui/components/ServerToolExecution.d.ts.map +1 -1
- package/dist/ui/components/SessionSelector.d.ts +2 -1
- package/dist/ui/components/SessionSelector.d.ts.map +1 -1
- package/dist/ui/components/SessionSummary.d.ts +2 -1
- package/dist/ui/components/SessionSummary.d.ts.map +1 -1
- package/dist/ui/components/SettingsSelector.d.ts +2 -1
- package/dist/ui/components/SettingsSelector.d.ts.map +1 -1
- package/dist/ui/components/SkillsOverlay.d.ts +2 -1
- package/dist/ui/components/SkillsOverlay.d.ts.map +1 -1
- package/dist/ui/components/SlashCommandMenu.d.ts +2 -1
- package/dist/ui/components/SlashCommandMenu.d.ts.map +1 -1
- package/dist/ui/components/SlashStyledSelectList.d.ts +2 -1
- package/dist/ui/components/SlashStyledSelectList.d.ts.map +1 -1
- package/dist/ui/components/Spinner.d.ts +2 -1
- package/dist/ui/components/Spinner.d.ts.map +1 -1
- package/dist/ui/components/StreamingArea.d.ts +1 -1
- package/dist/ui/components/StreamingArea.d.ts.map +1 -1
- package/dist/ui/components/SubAgentPanel.d.ts +2 -1
- package/dist/ui/components/SubAgentPanel.d.ts.map +1 -1
- package/dist/ui/components/TaskPickerMenu.d.ts +2 -1
- package/dist/ui/components/TaskPickerMenu.d.ts.map +1 -1
- package/dist/ui/components/ThemeSelector.d.ts +2 -1
- package/dist/ui/components/ThemeSelector.d.ts.map +1 -1
- package/dist/ui/components/ThinkingBlock.d.ts +1 -1
- package/dist/ui/components/ThinkingBlock.d.ts.map +1 -1
- package/dist/ui/components/ToolExecution.d.ts +2 -1
- package/dist/ui/components/ToolExecution.d.ts.map +1 -1
- package/dist/ui/components/ToolGroupExecution.d.ts +2 -1
- package/dist/ui/components/ToolGroupExecution.d.ts.map +1 -1
- package/dist/ui/components/TranscriptViewport.d.ts +1 -1
- package/dist/ui/components/TranscriptViewport.d.ts.map +1 -1
- package/dist/ui/components/UserMessage.d.ts +2 -1
- package/dist/ui/components/UserMessage.d.ts.map +1 -1
- package/dist/ui/hooks/useAgentLoop.d.ts.map +1 -1
- package/dist/ui/hooks/useAgentLoop.js +5 -1
- package/dist/ui/hooks/useAgentLoop.js.map +1 -1
- package/dist/ui/transcript/MiscRows.d.ts +8 -7
- package/dist/ui/transcript/MiscRows.d.ts.map +1 -1
- package/dist/ui/transcript/StatusRow.d.ts +1 -1
- package/dist/ui/transcript/StatusRow.d.ts.map +1 -1
- package/dist/ui/transcript/ToolRows.d.ts +7 -6
- package/dist/ui/transcript/ToolRows.d.ts.map +1 -1
- package/dist/ui/transcript/TranscriptItemFrame.d.ts +1 -1
- package/dist/ui/transcript/TranscriptItemFrame.d.ts.map +1 -1
- package/package.json +6 -4
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hash-anchored line addressing — the pure, UI-free core shared by the hashline
|
|
3
|
+
* benchmark and the opt-in anchor guard in the read/edit tools.
|
|
4
|
+
*
|
|
5
|
+
* Every line gets a short content+position hash. Anchors are UNIQUE by
|
|
6
|
+
* construction (the line's index is folded into the hash, so blank/repeated
|
|
7
|
+
* lines never collide), which is what lets an edit either resolve to exactly one
|
|
8
|
+
* location or be rejected — never silently corrupt a file that drifted since the
|
|
9
|
+
* model last read it.
|
|
10
|
+
*/
|
|
11
|
+
import { createHash } from "node:crypto";
|
|
12
|
+
/**
|
|
13
|
+
* 4-hex-char anchor for a line. Position is folded into the hash so anchors are
|
|
14
|
+
* UNIQUE by construction (blank lines and repeated lines no longer collide).
|
|
15
|
+
* Resolution stays O(1) via a lookup map. `index` is the 0-based line index.
|
|
16
|
+
*/
|
|
17
|
+
export function lineHash(line, index) {
|
|
18
|
+
return createHash("sha1").update(`${index}:${line.trim()}`).digest("hex").slice(0, 4);
|
|
19
|
+
}
|
|
20
|
+
/** File rendered with `anchor│line` prefixes for the model to read. */
|
|
21
|
+
export function renderWithAnchors(file) {
|
|
22
|
+
return file
|
|
23
|
+
.split("\n")
|
|
24
|
+
.map((l, i) => `${lineHash(l, i)}│${l}`)
|
|
25
|
+
.join("\n");
|
|
26
|
+
}
|
|
27
|
+
export function anchorFile(file) {
|
|
28
|
+
const lines = file.split("\n");
|
|
29
|
+
const counts = new Map();
|
|
30
|
+
lines.forEach((l, i) => {
|
|
31
|
+
const h = lineHash(l, i);
|
|
32
|
+
const arr = counts.get(h) ?? [];
|
|
33
|
+
arr.push(i);
|
|
34
|
+
counts.set(h, arr);
|
|
35
|
+
});
|
|
36
|
+
const anchorToIndex = new Map();
|
|
37
|
+
const ambiguous = new Set();
|
|
38
|
+
for (const [h, idxs] of counts) {
|
|
39
|
+
// Position-folded anchors are unique unless sha1 itself collides in 16 bits.
|
|
40
|
+
if (idxs.length === 1)
|
|
41
|
+
anchorToIndex.set(h, idxs[0]);
|
|
42
|
+
else
|
|
43
|
+
ambiguous.add(h);
|
|
44
|
+
}
|
|
45
|
+
return { rendered: renderWithAnchors(file), anchorToIndex, ambiguous, lines };
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* True when the line at `index` (0-based) still hashes to `hash`. Out-of-range
|
|
49
|
+
* indices return false. This is the staleness gate the edit tool uses.
|
|
50
|
+
*/
|
|
51
|
+
export function verifyAnchor(lines, index, hash) {
|
|
52
|
+
if (index < 0 || index >= lines.length)
|
|
53
|
+
return false;
|
|
54
|
+
return lineHash(lines[index], index) === hash;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Resolve an anchor against the current file lines (0-based). Rejects the edit
|
|
58
|
+
* if either endpoint is out of range, the range is reversed, or either hash no
|
|
59
|
+
* longer matches — the corruption-avoidance property.
|
|
60
|
+
*/
|
|
61
|
+
export function resolveAnchoredEdit(lines, anchor) {
|
|
62
|
+
const startIndex = anchor.start_line - 1;
|
|
63
|
+
const endIndex = anchor.end_line - 1;
|
|
64
|
+
if (startIndex < 0 || endIndex < 0 || startIndex >= lines.length || endIndex >= lines.length) {
|
|
65
|
+
return { ok: false, reason: "out_of_range" };
|
|
66
|
+
}
|
|
67
|
+
if (startIndex > endIndex) {
|
|
68
|
+
return { ok: false, reason: "reversed" };
|
|
69
|
+
}
|
|
70
|
+
if (!verifyAnchor(lines, startIndex, anchor.start_hash) ||
|
|
71
|
+
!verifyAnchor(lines, endIndex, anchor.end_hash)) {
|
|
72
|
+
return { ok: false, reason: "hash_mismatch" };
|
|
73
|
+
}
|
|
74
|
+
return { ok: true, startIndex, endIndex };
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=hashline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hashline.js","sourceRoot":"","sources":["../../src/core/hashline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC;;;;GAIG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAY,EAAE,KAAa;IAClD,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,GAAG,KAAK,IAAI,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AACxF,CAAC;AAED,uEAAuE;AACvE,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,OAAO,IAAI;SACR,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;SACvC,IAAI,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC;AAYD,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC3C,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACrB,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACzB,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAChC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACZ,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACrB,CAAC,CAAC,CAAC;IACH,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB,CAAC;IAChD,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;IACpC,KAAK,MAAM,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,MAAM,EAAE,CAAC;QAC/B,6EAA6E;QAC7E,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,aAAa,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAE,CAAC,CAAC;;YACjD,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACxB,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,iBAAiB,CAAC,IAAI,CAAC,EAAE,aAAa,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;AAChF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,KAAe,EAAE,KAAa,EAAE,IAAY;IACvE,IAAI,KAAK,GAAG,CAAC,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IACrD,OAAO,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAE,EAAE,KAAK,CAAC,KAAK,IAAI,CAAC;AACjD,CAAC;AAyBD;;;;GAIG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAe,EAAE,MAAkB;IACrE,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,GAAG,CAAC,CAAC;IACzC,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC;IACrC,IAAI,UAAU,GAAG,CAAC,IAAI,QAAQ,GAAG,CAAC,IAAI,UAAU,IAAI,KAAK,CAAC,MAAM,IAAI,QAAQ,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QAC7F,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,cAAc,EAAE,CAAC;IAC/C,CAAC;IACD,IAAI,UAAU,GAAG,QAAQ,EAAE,CAAC;QAC1B,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;IAC3C,CAAC;IACD,IACE,CAAC,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,CAAC;QACnD,CAAC,YAAY,CAAC,KAAK,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,CAAC,EAC/C,CAAC;QACD,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC;IAChD,CAAC;IACD,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;AAC5C,CAAC"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic AST-chunk retrieval vs whole-file reads — real-API measurement of
|
|
3
|
+
* whether Feature #3 is worth building.
|
|
4
|
+
*
|
|
5
|
+
* The claim (cocoindex-code / oh-my-pi): replacing "grep then read whole files"
|
|
6
|
+
* with "retrieve only the relevant AST chunks" cuts ~70% of the tokens an agent
|
|
7
|
+
* spends locating code, with no loss of answer quality. We test that directly on
|
|
8
|
+
* OUR OWN repo against a live model.
|
|
9
|
+
*
|
|
10
|
+
* For a set of natural-language questions about real files in this repo, we build
|
|
11
|
+
* three context strategies and ask the model the same question with each:
|
|
12
|
+
*
|
|
13
|
+
* BASELINE (whole-file): deliver the FULL text of the top files a lexical
|
|
14
|
+
* grep would surface — this is what the agent reads today (read + grep).
|
|
15
|
+
*
|
|
16
|
+
* SEMANTIC (AST chunks): parse every file into top-level declarations
|
|
17
|
+
* (functions / classes / interfaces / consts), rank chunks with a real BM25
|
|
18
|
+
* retriever, and deliver only the top-k chunks. No embedding model needed; a
|
|
19
|
+
* learned embedding retriever would land between BM25 and ORACLE.
|
|
20
|
+
*
|
|
21
|
+
* ORACLE (upper bound): deliver only the hand-labelled answer chunk(s) — the
|
|
22
|
+
* best case any retriever could achieve.
|
|
23
|
+
*
|
|
24
|
+
* We measure, per question and strategy: INPUT tokens delivered (the headline
|
|
25
|
+
* cost) and whether the model's answer was correct (deterministic keyword grade).
|
|
26
|
+
* The verdict: does SEMANTIC reach BASELINE-level correctness at a fraction of
|
|
27
|
+
* the input tokens?
|
|
28
|
+
*
|
|
29
|
+
* Usage:
|
|
30
|
+
* npx tsx src/core/semantic-search-benchmark.ts
|
|
31
|
+
*
|
|
32
|
+
* Env overrides:
|
|
33
|
+
* GG_SS_PROVIDER / GG_SS_MODEL (default openai / gpt-5.5)
|
|
34
|
+
* GG_SS_TOPK (chunks/files delivered, default 3)
|
|
35
|
+
*/
|
|
36
|
+
export declare function grade(answer: string, mustInclude: string[]): boolean;
|
|
37
|
+
//# sourceMappingURL=semantic-search-benchmark.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-search-benchmark.d.ts","sourceRoot":"","sources":["../../src/core/semantic-search-benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAwHH,wBAAgB,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,GAAG,OAAO,CAGpE"}
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic AST-chunk retrieval vs whole-file reads — real-API measurement of
|
|
3
|
+
* whether Feature #3 is worth building.
|
|
4
|
+
*
|
|
5
|
+
* The claim (cocoindex-code / oh-my-pi): replacing "grep then read whole files"
|
|
6
|
+
* with "retrieve only the relevant AST chunks" cuts ~70% of the tokens an agent
|
|
7
|
+
* spends locating code, with no loss of answer quality. We test that directly on
|
|
8
|
+
* OUR OWN repo against a live model.
|
|
9
|
+
*
|
|
10
|
+
* For a set of natural-language questions about real files in this repo, we build
|
|
11
|
+
* three context strategies and ask the model the same question with each:
|
|
12
|
+
*
|
|
13
|
+
* BASELINE (whole-file): deliver the FULL text of the top files a lexical
|
|
14
|
+
* grep would surface — this is what the agent reads today (read + grep).
|
|
15
|
+
*
|
|
16
|
+
* SEMANTIC (AST chunks): parse every file into top-level declarations
|
|
17
|
+
* (functions / classes / interfaces / consts), rank chunks with a real BM25
|
|
18
|
+
* retriever, and deliver only the top-k chunks. No embedding model needed; a
|
|
19
|
+
* learned embedding retriever would land between BM25 and ORACLE.
|
|
20
|
+
*
|
|
21
|
+
* ORACLE (upper bound): deliver only the hand-labelled answer chunk(s) — the
|
|
22
|
+
* best case any retriever could achieve.
|
|
23
|
+
*
|
|
24
|
+
* We measure, per question and strategy: INPUT tokens delivered (the headline
|
|
25
|
+
* cost) and whether the model's answer was correct (deterministic keyword grade).
|
|
26
|
+
* The verdict: does SEMANTIC reach BASELINE-level correctness at a fraction of
|
|
27
|
+
* the input tokens?
|
|
28
|
+
*
|
|
29
|
+
* Usage:
|
|
30
|
+
* npx tsx src/core/semantic-search-benchmark.ts
|
|
31
|
+
*
|
|
32
|
+
* Env overrides:
|
|
33
|
+
* GG_SS_PROVIDER / GG_SS_MODEL (default openai / gpt-5.5)
|
|
34
|
+
* GG_SS_TOPK (chunks/files delivered, default 3)
|
|
35
|
+
*/
|
|
36
|
+
import fs from "node:fs";
|
|
37
|
+
import path from "node:path";
|
|
38
|
+
import { fileURLToPath } from "node:url";
|
|
39
|
+
import { stream } from "@kenkaiiii/gg-ai";
|
|
40
|
+
import { AuthStorage } from "./auth-storage.js";
|
|
41
|
+
import { chunkFile, bm25Rank, rankFiles } from "./code-retrieval.js";
|
|
42
|
+
const HERE = path.dirname(fileURLToPath(import.meta.url));
|
|
43
|
+
const SRC = path.resolve(HERE, ".."); // packages/ggcoder/src
|
|
44
|
+
const QUESTIONS = [
|
|
45
|
+
{
|
|
46
|
+
q: "Which method resolves provider credentials and auto-refreshes expired OAuth tokens, and what happens if it is not logged in?",
|
|
47
|
+
files: ["core/auth-storage.ts", "core/loop-breaker.ts", "tools/edit-diff.ts"],
|
|
48
|
+
oracle: { file: "core/auth-storage.ts", symbol: "resolveCredentials" },
|
|
49
|
+
mustInclude: ["resolvecredentials", "refresh"],
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
q: "What function performs fuzzy text matching for the edit tool, and how does it tolerate indentation drift?",
|
|
53
|
+
files: ["tools/edit-diff.ts", "core/auth-storage.ts", "core/checkpoint-store.ts"],
|
|
54
|
+
oracle: { file: "tools/edit-diff.ts", symbol: "fuzzyFindText" },
|
|
55
|
+
mustInclude: ["fuzzyfindtext", "indent"],
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
q: "What restore modes does the checkpoint / rewind system support?",
|
|
59
|
+
files: ["core/checkpoint-store.ts", "core/loop-breaker.ts", "tools/edit-diff.ts"],
|
|
60
|
+
oracle: { file: "core/checkpoint-store.ts", symbol: "RestoreMode" },
|
|
61
|
+
mustInclude: ["code", "conversation", "both"],
|
|
62
|
+
},
|
|
63
|
+
];
|
|
64
|
+
function sleep(ms) {
|
|
65
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
66
|
+
}
|
|
67
|
+
async function ask(provider, model, c, context, question) {
|
|
68
|
+
const messages = [
|
|
69
|
+
{
|
|
70
|
+
role: "system",
|
|
71
|
+
content: "You answer questions about a codebase using ONLY the provided context. " +
|
|
72
|
+
"Be specific: name the exact functions/types involved. If the context is insufficient, say so. " +
|
|
73
|
+
"Answer in 1-3 sentences.",
|
|
74
|
+
},
|
|
75
|
+
{ role: "user", content: `CONTEXT:\n${context}\n\nQUESTION: ${question}` },
|
|
76
|
+
];
|
|
77
|
+
let lastErr;
|
|
78
|
+
for (let attempt = 0; attempt < 4; attempt++) {
|
|
79
|
+
try {
|
|
80
|
+
let text = "";
|
|
81
|
+
const result = stream({
|
|
82
|
+
provider: provider,
|
|
83
|
+
model,
|
|
84
|
+
messages,
|
|
85
|
+
maxTokens: 512,
|
|
86
|
+
apiKey: c.apiKey,
|
|
87
|
+
baseUrl: c.baseUrl,
|
|
88
|
+
accountId: c.accountId,
|
|
89
|
+
});
|
|
90
|
+
for await (const event of result) {
|
|
91
|
+
if (event.type === "text_delta")
|
|
92
|
+
text += event.text;
|
|
93
|
+
}
|
|
94
|
+
const response = await result.response;
|
|
95
|
+
const content = response.message.content;
|
|
96
|
+
const finalText = typeof content === "string"
|
|
97
|
+
? content
|
|
98
|
+
: content
|
|
99
|
+
.filter((p) => p.type === "text")
|
|
100
|
+
.map((p) => p.text ?? "")
|
|
101
|
+
.join("");
|
|
102
|
+
return {
|
|
103
|
+
text: finalText || text,
|
|
104
|
+
inputTokens: response.usage.inputTokens,
|
|
105
|
+
outputTokens: response.usage.outputTokens,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
catch (err) {
|
|
109
|
+
lastErr = err;
|
|
110
|
+
await sleep(2000 * (attempt + 1));
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
throw lastErr;
|
|
114
|
+
}
|
|
115
|
+
export function grade(answer, mustInclude) {
|
|
116
|
+
const a = answer.toLowerCase();
|
|
117
|
+
return mustInclude.every((t) => a.includes(t));
|
|
118
|
+
}
|
|
119
|
+
async function main() {
|
|
120
|
+
const provider = process.env.GG_SS_PROVIDER ?? "openai";
|
|
121
|
+
const model = process.env.GG_SS_MODEL ?? "gpt-5.5";
|
|
122
|
+
const topK = Math.max(1, parseInt(process.env.GG_SS_TOPK ?? "3", 10));
|
|
123
|
+
const auth = new AuthStorage();
|
|
124
|
+
await auth.load();
|
|
125
|
+
const cr = await auth.resolveCredentials(provider);
|
|
126
|
+
const creds = { apiKey: cr.accessToken, baseUrl: cr.baseUrl, accountId: cr.accountId };
|
|
127
|
+
console.log(`\n🔎 Semantic-search benchmark — ${provider}/${model} (top-${topK})\n`);
|
|
128
|
+
const rows = [];
|
|
129
|
+
for (const q of QUESTIONS) {
|
|
130
|
+
// Load the real corpus files; skip the question if any are missing/moved.
|
|
131
|
+
const files = new Map();
|
|
132
|
+
let missing = false;
|
|
133
|
+
for (const rel of q.files) {
|
|
134
|
+
const abs = path.join(SRC, rel);
|
|
135
|
+
if (!fs.existsSync(abs)) {
|
|
136
|
+
console.log(` ⚠ skipping question — missing ${rel}`);
|
|
137
|
+
missing = true;
|
|
138
|
+
break;
|
|
139
|
+
}
|
|
140
|
+
files.set(rel, fs.readFileSync(abs, "utf-8"));
|
|
141
|
+
}
|
|
142
|
+
if (missing)
|
|
143
|
+
continue;
|
|
144
|
+
// BASELINE context: full text of the top-k files a grep would surface.
|
|
145
|
+
const baseFiles = rankFiles(q.q, files, topK);
|
|
146
|
+
const baseContext = baseFiles.map((f) => `// FILE: ${f}\n${files.get(f)}`).join("\n\n");
|
|
147
|
+
// SEMANTIC context: top-k AST chunks across all corpus files.
|
|
148
|
+
const allChunks = [...files].flatMap(([rel, src]) => chunkFile(rel, src));
|
|
149
|
+
const semChunks = bm25Rank(q.q, allChunks, topK);
|
|
150
|
+
const semContext = semChunks.map((c) => `// ${c.file} → ${c.symbol}\n${c.text}`).join("\n\n");
|
|
151
|
+
// ORACLE context: just the labelled answer chunk.
|
|
152
|
+
const oracleChunk = allChunks.find((c) => c.file === q.oracle.file && c.symbol === q.oracle.symbol);
|
|
153
|
+
const oracleContext = oracleChunk
|
|
154
|
+
? `// ${oracleChunk.file} → ${oracleChunk.symbol}\n${oracleChunk.text}`
|
|
155
|
+
: semContext;
|
|
156
|
+
process.stdout.write(`▶ ${q.q.slice(0, 64)}…\n`);
|
|
157
|
+
await sleep(1200);
|
|
158
|
+
const base = await ask(provider, model, creds, baseContext, q.q);
|
|
159
|
+
await sleep(1200);
|
|
160
|
+
const sem = await ask(provider, model, creds, semContext, q.q);
|
|
161
|
+
await sleep(1200);
|
|
162
|
+
const oracle = await ask(provider, model, creds, oracleContext, q.q);
|
|
163
|
+
const row = {
|
|
164
|
+
q: q.q.slice(0, 40),
|
|
165
|
+
baseInTok: base.inputTokens,
|
|
166
|
+
baseOk: grade(base.text, q.mustInclude),
|
|
167
|
+
semInTok: sem.inputTokens,
|
|
168
|
+
semOk: grade(sem.text, q.mustInclude),
|
|
169
|
+
oracleInTok: oracle.inputTokens,
|
|
170
|
+
oracleOk: grade(oracle.text, q.mustInclude),
|
|
171
|
+
};
|
|
172
|
+
rows.push(row);
|
|
173
|
+
process.stdout.write(` baseline ${row.baseInTok} in tok ${row.baseOk ? "OK" : "FAIL"} | ` +
|
|
174
|
+
`semantic ${row.semInTok} in tok ${row.semOk ? "OK" : "FAIL"} | ` +
|
|
175
|
+
`oracle ${row.oracleInTok} in tok ${row.oracleOk ? "OK" : "FAIL"}\n\n`);
|
|
176
|
+
}
|
|
177
|
+
if (rows.length === 0) {
|
|
178
|
+
console.log("No questions ran (corpus files not found).");
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
// ── Report ──
|
|
182
|
+
console.log("══════════════════════ RESULTS ══════════════════════\n");
|
|
183
|
+
console.log("Question | base in-tok | sem in-tok | oracle | ok b/s/o");
|
|
184
|
+
for (const r of rows) {
|
|
185
|
+
console.log(`${r.q.padEnd(40)} | ${String(r.baseInTok).padStart(11)} | ${String(r.semInTok).padStart(10)} | ` +
|
|
186
|
+
`${String(r.oracleInTok).padStart(6)} | ${r.baseOk ? "1" : "0"}/${r.semOk ? "1" : "0"}/${r.oracleOk ? "1" : "0"}`);
|
|
187
|
+
}
|
|
188
|
+
const sum = (f) => rows.reduce((s, r) => s + f(r), 0);
|
|
189
|
+
const baseIn = sum((r) => r.baseInTok);
|
|
190
|
+
const semIn = sum((r) => r.semInTok);
|
|
191
|
+
const oracleIn = sum((r) => r.oracleInTok);
|
|
192
|
+
console.log(`\nInput tokens to answer: baseline ${baseIn} | semantic ${semIn} ` +
|
|
193
|
+
`(${(((baseIn - semIn) / baseIn) * 100).toFixed(0)}% fewer) | oracle ${oracleIn} ` +
|
|
194
|
+
`(${(((baseIn - oracleIn) / baseIn) * 100).toFixed(0)}% fewer)`);
|
|
195
|
+
console.log(`Correctness: baseline ${rows.filter((r) => r.baseOk).length}/${rows.length} | ` +
|
|
196
|
+
`semantic ${rows.filter((r) => r.semOk).length}/${rows.length} | ` +
|
|
197
|
+
`oracle ${rows.filter((r) => r.oracleOk).length}/${rows.length}`);
|
|
198
|
+
console.log(`\nVerdict: worth building if SEMANTIC keeps correctness ≈ baseline while cutting input tokens. ` +
|
|
199
|
+
`cocoindex claims ~70% fewer; ORACLE shows the ceiling a better (embedding) retriever could reach.\n`);
|
|
200
|
+
}
|
|
201
|
+
// Run when executed directly (not when imported by tests).
|
|
202
|
+
const isDirectRun = process.argv[1]?.endsWith("semantic-search-benchmark.ts") ||
|
|
203
|
+
process.argv[1]?.endsWith("semantic-search-benchmark.js") ||
|
|
204
|
+
process.argv[1]?.endsWith("semantic-search-benchmark");
|
|
205
|
+
if (isDirectRun) {
|
|
206
|
+
main().catch((err) => {
|
|
207
|
+
console.error("Benchmark failed:", err);
|
|
208
|
+
process.exit(1);
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
//# sourceMappingURL=semantic-search-benchmark.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-search-benchmark.js","sourceRoot":"","sources":["../../src/core/semantic-search-benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAEH,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,MAAM,EAA8C,MAAM,kBAAkB,CAAC;AACtF,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAErE,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,uBAAuB;AAc7D,MAAM,SAAS,GAAe;IAC5B;QACE,CAAC,EAAE,8HAA8H;QACjI,KAAK,EAAE,CAAC,sBAAsB,EAAE,sBAAsB,EAAE,oBAAoB,CAAC;QAC7E,MAAM,EAAE,EAAE,IAAI,EAAE,sBAAsB,EAAE,MAAM,EAAE,oBAAoB,EAAE;QACtE,WAAW,EAAE,CAAC,oBAAoB,EAAE,SAAS,CAAC;KAC/C;IACD;QACE,CAAC,EAAE,2GAA2G;QAC9G,KAAK,EAAE,CAAC,oBAAoB,EAAE,sBAAsB,EAAE,0BAA0B,CAAC;QACjF,MAAM,EAAE,EAAE,IAAI,EAAE,oBAAoB,EAAE,MAAM,EAAE,eAAe,EAAE;QAC/D,WAAW,EAAE,CAAC,eAAe,EAAE,QAAQ,CAAC;KACzC;IACD;QACE,CAAC,EAAE,iEAAiE;QACpE,KAAK,EAAE,CAAC,0BAA0B,EAAE,sBAAsB,EAAE,oBAAoB,CAAC;QACjF,MAAM,EAAE,EAAE,IAAI,EAAE,0BAA0B,EAAE,MAAM,EAAE,aAAa,EAAE;QACnE,WAAW,EAAE,CAAC,MAAM,EAAE,cAAc,EAAE,MAAM,CAAC;KAC9C;CACF,CAAC;AAYF,SAAS,KAAK,CAAC,EAAU;IACvB,OAAO,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;AAC/C,CAAC;AAQD,KAAK,UAAU,GAAG,CAChB,QAAgB,EAChB,KAAa,EACb,CAAQ,EACR,OAAe,EACf,QAAgB;IAEhB,MAAM,QAAQ,GAAc;QAC1B;YACE,IAAI,EAAE,QAAQ;YACd,OAAO,EACL,yEAAyE;gBACzE,gGAAgG;gBAChG,0BAA0B;SAC7B;QACD,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,OAAO,iBAAiB,QAAQ,EAAE,EAAE;KAC3E,CAAC;IACF,IAAI,OAAgB,CAAC;IACrB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC;QAC7C,IAAI,CAAC;YACH,IAAI,IAAI,GAAG,EAAE,CAAC;YACd,MAAM,MAAM,GAAG,MAAM,CAAC;gBACpB,QAAQ,EAAE,QAAiB;gBAC3B,KAAK;gBACL,QAAQ;gBACR,SAAS,EAAE,GAAG;gBACd,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,SAAS,EAAE,CAAC,CAAC,SAAS;aACvB,CAAC,CAAC;YACH,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAoC,EAAE,CAAC;gBAC/D,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY;oBAAE,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC;YACtD,CAAC;YACD,MAAM,QAAQ,GAAuC,MAAM,MAAM,CAAC,QAAQ,CAAC;YAC3E,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC;YACzC,MAAM,SAAS,GACb,OAAO,OAAO,KAAK,QAAQ;gBACzB,CAAC,CAAC,OAAO;gBACT,CAAC,CAAE,OAAkD;qBAChD,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC;qBAChC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;qBACxB,IAAI,CAAC,EAAE,CAAC,CAAC;YAClB,OAAO;gBACL,IAAI,EAAE,SAAS,IAAI,IAAI;gBACvB,WAAW,EAAE,QAAQ,CAAC,KAAK,CAAC,WAAW;gBACvC,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC,YAAY;aAC1C,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,GAAG,GAAG,CAAC;YACd,MAAM,KAAK,CAAC,IAAI,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;IACD,MAAM,OAAO,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,KAAK,CAAC,MAAc,EAAE,WAAqB;IACzD,MAAM,CAAC,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC;IAC/B,OAAO,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;AACjD,CAAC;AAcD,KAAK,UAAU,IAAI;IACjB,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,QAAQ,CAAC;IACxD,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,SAAS,CAAC;IACnD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC;IAEtE,MAAM,IAAI,GAAG,IAAI,WAAW,EAAE,CAAC;IAC/B,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;IAClB,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,KAAK,GAAU,EAAE,MAAM,EAAE,EAAE,CAAC,WAAW,EAAE,OAAO,EAAE,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,EAAE,CAAC,SAAS,EAAE,CAAC;IAE9F,OAAO,CAAC,GAAG,CAAC,oCAAoC,QAAQ,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,CAAC;IAErF,MAAM,IAAI,GAAU,EAAE,CAAC;IAEvB,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,0EAA0E;QAC1E,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAC;QACxC,IAAI,OAAO,GAAG,KAAK,CAAC;QACpB,KAAK,MAAM,GAAG,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;YAC1B,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;YAChC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxB,OAAO,CAAC,GAAG,CAAC,oCAAoC,GAAG,EAAE,CAAC,CAAC;gBACvD,OAAO,GAAG,IAAI,CAAC;gBACf,MAAM;YACR,CAAC;YACD,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,YAAY,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,CAAC;QAChD,CAAC;QACD,IAAI,OAAO;YAAE,SAAS;QAEtB,uEAAuE;QACvE,MAAM,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;QAC9C,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAExF,8DAA8D;QAC9D,MAAM,SAAS,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;QAC1E,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QACjD,MAAM,UAAU,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAE9F,kDAAkD;QAClD,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAChC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,CAAC,MAAM,CAChE,CAAC;QACF,MAAM,aAAa,GAAG,WAAW;YAC/B,CAAC,CAAC,MAAM,WAAW,CAAC,IAAI,MAAM,WAAW,CAAC,MAAM,KAAK,WAAW,CAAC,IAAI,EAAE;YACvE,CAAC,CAAC,UAAU,CAAC;QAEf,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;QACjD,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;QAClB,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,QAAQ,EAAE,KAAK,EAAE,KAAK,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;QAClB,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,QAAQ,EAAE,KAAK,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;QAClB,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,QAAQ,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAErE,MAAM,GAAG,GAAQ;YACf,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;YACnB,SAAS,EAAE,IAAI,CAAC,WAAW;YAC3B,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,CAAC;YACvC,QAAQ,EAAE,GAAG,CAAC,WAAW;YACzB,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,CAAC;YACrC,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,QAAQ,EAAE,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,CAAC;SAC5C,CAAC;QACF,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACf,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,eAAe,GAAG,CAAC,SAAS,WAAW,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,KAAK;YACpE,YAAY,GAAG,CAAC,QAAQ,WAAW,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,KAAK;YACjE,UAAU,GAAG,CAAC,WAAW,WAAW,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,MAAM,CACzE,CAAC;IACJ,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,CAAC,GAAG,CAAC,4CAA4C,CAAC,CAAC;QAC1D,OAAO;IACT,CAAC;IAED,eAAe;IACf,OAAO,CAAC,GAAG,CAAC,yDAAyD,CAAC,CAAC;IACvE,OAAO,CAAC,GAAG,CACT,yFAAyF,CAC1F,CAAC;IACF,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,OAAO,CAAC,GAAG,CACT,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,MAAM,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,KAAK;YAC/F,GAAG,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CACpH,CAAC;IACJ,CAAC;IACD,MAAM,GAAG,GAAG,CAAC,CAAqB,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC1E,MAAM,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;IACvC,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACrC,MAAM,QAAQ,GAAG,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAC3C,OAAO,CAAC,GAAG,CACT,sCAAsC,MAAM,eAAe,KAAK,GAAG;QACjE,IAAI,CAAC,CAAC,CAAC,MAAM,GAAG,KAAK,CAAC,GAAG,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,qBAAqB,QAAQ,GAAG;QAClF,IAAI,CAAC,CAAC,CAAC,MAAM,GAAG,QAAQ,CAAC,GAAG,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAClE,CAAC;IACF,OAAO,CAAC,GAAG,CACT,yBAAyB,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,KAAK;QAC9E,YAAY,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,KAAK;QAClE,UAAU,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,EAAE,CACnE,CAAC;IACF,OAAO,CAAC,GAAG,CACT,iGAAiG;QAC/F,qGAAqG,CACxG,CAAC;AACJ,CAAC;AAED,2DAA2D;AAC3D,MAAM,WAAW,GACf,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,8BAA8B,CAAC;IACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,8BAA8B,CAAC;IACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,2BAA2B,CAAC,CAAC;AAEzD,IAAI,WAAW,EAAE,CAAC;IAChB,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;QACnB,OAAO,CAAC,KAAK,CAAC,mBAAmB,EAAE,GAAG,CAAC,CAAC;QACxC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-search-benchmark.test.d.ts","sourceRoot":"","sources":["../../src/core/semantic-search-benchmark.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { chunkFile, tokenize, bm25Rank, rankFiles } from "./code-retrieval.js";
|
|
3
|
+
import { grade } from "./semantic-search-benchmark.js";
|
|
4
|
+
/**
|
|
5
|
+
* Deterministic tests for the retrieval half of the semantic-search benchmark.
|
|
6
|
+
* The token-saving claim is only meaningful if AST chunking, tokenization, BM25
|
|
7
|
+
* ranking, and the answer grader are correct. We test parsing across every node
|
|
8
|
+
* kind, full-body capture, camelCase splitting, ranking relevance, the top-k
|
|
9
|
+
* cap, and the grader's all-tokens-required strictness.
|
|
10
|
+
*/
|
|
11
|
+
const SAMPLE = `
|
|
12
|
+
import { x } from "y";
|
|
13
|
+
|
|
14
|
+
export interface Session { id: string; userId: string; }
|
|
15
|
+
|
|
16
|
+
export type Mode = "code" | "conversation" | "both";
|
|
17
|
+
|
|
18
|
+
export enum Color { Red, Green }
|
|
19
|
+
|
|
20
|
+
const DEFAULT_TTL = 3000;
|
|
21
|
+
|
|
22
|
+
/** Sign a session token with HMAC-SHA256. */
|
|
23
|
+
export function signSessionToken(s: Session, secret: string): string {
|
|
24
|
+
return s.id + secret + "SIGNED_MARKER";
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export class LruCache {
|
|
28
|
+
evictLeastRecentlyUsed(): void {}
|
|
29
|
+
}
|
|
30
|
+
`;
|
|
31
|
+
describe("chunkFile (AST chunking)", () => {
|
|
32
|
+
const chunks = chunkFile("sample.ts", SAMPLE);
|
|
33
|
+
const symbols = chunks.map((c) => c.symbol);
|
|
34
|
+
it("1. extracts a top-level function declaration by name", () => {
|
|
35
|
+
expect(symbols).toContain("signSessionToken");
|
|
36
|
+
});
|
|
37
|
+
it("2. extracts class and interface declarations", () => {
|
|
38
|
+
expect(symbols).toContain("LruCache");
|
|
39
|
+
expect(symbols).toContain("Session");
|
|
40
|
+
});
|
|
41
|
+
it("3. extracts type alias, enum, and top-level const", () => {
|
|
42
|
+
expect(symbols).toContain("Mode");
|
|
43
|
+
expect(symbols).toContain("Color");
|
|
44
|
+
expect(symbols).toContain("DEFAULT_TTL");
|
|
45
|
+
});
|
|
46
|
+
it("4. captures the FULL body text of a chunk (not just the signature)", () => {
|
|
47
|
+
const fn = chunks.find((c) => c.symbol === "signSessionToken");
|
|
48
|
+
expect(fn.text).toContain("SIGNED_MARKER");
|
|
49
|
+
expect(fn.file).toBe("sample.ts");
|
|
50
|
+
});
|
|
51
|
+
it("5. ignores import statements (no spurious chunk)", () => {
|
|
52
|
+
expect(symbols).not.toContain("x");
|
|
53
|
+
expect(chunks.length).toBe(6); // Session, Mode, Color, DEFAULT_TTL, signSessionToken, LruCache
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
describe("tokenize", () => {
|
|
57
|
+
it("6. splits camelCase identifiers into separate terms", () => {
|
|
58
|
+
const toks = tokenize("resolveCredentials");
|
|
59
|
+
expect(toks).toContain("resolve");
|
|
60
|
+
expect(toks).toContain("credentials");
|
|
61
|
+
});
|
|
62
|
+
it("7. lowercases and splits on non-word boundaries (snake/punctuation)", () => {
|
|
63
|
+
const toks = tokenize("DEFAULT_TTL = signSessionToken()");
|
|
64
|
+
expect(toks).toEqual(expect.arrayContaining(["default", "ttl", "sign", "session", "token"]));
|
|
65
|
+
expect(toks.every((t) => t === t.toLowerCase())).toBe(true);
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
describe("bm25Rank + rankFiles (retrieval)", () => {
|
|
69
|
+
const chunks = chunkFile("sample.ts", SAMPLE);
|
|
70
|
+
it("8. ranks the chunk matching the query's terms first", () => {
|
|
71
|
+
const top = bm25Rank("how are session tokens signed", chunks, 1);
|
|
72
|
+
expect(top[0].symbol).toBe("signSessionToken");
|
|
73
|
+
});
|
|
74
|
+
it("9. never returns more than top-k chunks", () => {
|
|
75
|
+
expect(bm25Rank("session", chunks, 2).length).toBeLessThanOrEqual(2);
|
|
76
|
+
expect(bm25Rank("session", chunks, 100).length).toBe(chunks.length);
|
|
77
|
+
});
|
|
78
|
+
it("10. rankFiles surfaces the relevant file and grade requires ALL tokens", () => {
|
|
79
|
+
const files = new Map([
|
|
80
|
+
["auth.ts", SAMPLE],
|
|
81
|
+
["unrelated.ts", "export const pi = 3.14; // geometry helpers only"],
|
|
82
|
+
]);
|
|
83
|
+
expect(rankFiles("sign a session token", files, 1)[0]).toBe("auth.ts");
|
|
84
|
+
// grade is strict: every required token must be present (case-insensitive).
|
|
85
|
+
expect(grade("It uses signSessionToken with refresh logic", ["signsessiontoken", "refresh"])).toBe(true);
|
|
86
|
+
expect(grade("It uses signSessionToken", ["signsessiontoken", "refresh"])).toBe(false);
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
//# sourceMappingURL=semantic-search-benchmark.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-search-benchmark.test.js","sourceRoot":"","sources":["../../src/core/semantic-search-benchmark.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAC/E,OAAO,EAAE,KAAK,EAAE,MAAM,gCAAgC,CAAC;AAEvD;;;;;;GAMG;AAEH,MAAM,MAAM,GAAG;;;;;;;;;;;;;;;;;;;CAmBd,CAAC;AAEF,QAAQ,CAAC,0BAA0B,EAAE,GAAG,EAAE;IACxC,MAAM,MAAM,GAAG,SAAS,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;IAC9C,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAE5C,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC9D,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;QACtC,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mDAAmD,EAAE,GAAG,EAAE;QAC3D,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAClC,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oEAAoE,EAAE,GAAG,EAAE;QAC5E,MAAM,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,kBAAkB,CAAE,CAAC;QAChE,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAC3C,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;QAC1D,MAAM,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,gEAAgE;IACjG,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,UAAU,EAAE,GAAG,EAAE;IACxB,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC7D,MAAM,IAAI,GAAG,QAAQ,CAAC,oBAAoB,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;QAClC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qEAAqE,EAAE,GAAG,EAAE;QAC7E,MAAM,IAAI,GAAG,QAAQ,CAAC,kCAAkC,CAAC,CAAC;QAC1D,MAAM,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,SAAS,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC;QAC7F,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,kCAAkC,EAAE,GAAG,EAAE;IAChD,MAAM,MAAM,GAAG,SAAS,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;IAE9C,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC7D,MAAM,GAAG,GAAG,QAAQ,CAAC,+BAA+B,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;QACjE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,CAAC,QAAQ,CAAC,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC;QACrE,MAAM,CAAC,QAAQ,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wEAAwE,EAAE,GAAG,EAAE;QAChF,MAAM,KAAK,GAAG,IAAI,GAAG,CAAiB;YACpC,CAAC,SAAS,EAAE,MAAM,CAAC;YACnB,CAAC,cAAc,EAAE,kDAAkD,CAAC;SACrE,CAAC,CAAC;QACH,MAAM,CAAC,SAAS,CAAC,sBAAsB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEvE,4EAA4E;QAC5E,MAAM,CACJ,KAAK,CAAC,6CAA6C,EAAE,CAAC,kBAAkB,EAAE,SAAS,CAAC,CAAC,CACtF,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACb,MAAM,CAAC,KAAK,CAAC,0BAA0B,EAAE,CAAC,kBAAkB,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzF,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { TextContent, ImageContent, VideoContent } from "@kenkaiiii/gg-ai";
|
|
2
|
+
type ContentPart = TextContent | ImageContent | VideoContent;
|
|
3
|
+
type UserContent = string | ContentPart[];
|
|
4
|
+
/**
|
|
5
|
+
* Framing prepended to a mid-run steering message (a prompt the user submitted
|
|
6
|
+
* while the agent was already working).
|
|
7
|
+
*
|
|
8
|
+
* Without this wrapper the queued text arrives as a bare top-level user turn,
|
|
9
|
+
* identical to a brand-new request — so models treat it as the authoritative
|
|
10
|
+
* instruction and silently abandon the original task. The wrapper names the
|
|
11
|
+
* relationship (a second, concurrent instruction) and the one rule that kills
|
|
12
|
+
* the failure mode: don't drop either side. The model already knows how to
|
|
13
|
+
* merge two live instructions once it knows both are in force.
|
|
14
|
+
*/
|
|
15
|
+
export declare const STEERING_PREFIX: string;
|
|
16
|
+
/** Wrap a plain-text steering message with the framing prefix. */
|
|
17
|
+
export declare function wrapSteeringText(text: string): string;
|
|
18
|
+
/**
|
|
19
|
+
* Wrap a steering `UserContent` (string or multimodal parts) with the framing
|
|
20
|
+
* prefix. Media blocks pass through untouched; the prefix is prepended to the
|
|
21
|
+
* leading text so attachments still ride the same native-block path.
|
|
22
|
+
*/
|
|
23
|
+
export declare function wrapSteeringContent(content: UserContent): UserContent;
|
|
24
|
+
export {};
|
|
25
|
+
//# sourceMappingURL=steering.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"steering.d.ts","sourceRoot":"","sources":["../../src/core/steering.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAEhF,KAAK,WAAW,GAAG,WAAW,GAAG,YAAY,GAAG,YAAY,CAAC;AAC7D,KAAK,WAAW,GAAG,MAAM,GAAG,WAAW,EAAE,CAAC;AAE1C;;;;;;;;;;GAUG;AACH,eAAO,MAAM,eAAe,QAGd,CAAC;AAEf,kEAAkE;AAClE,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAErD;AAED;;;;GAIG;AACH,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,WAAW,GAAG,WAAW,CAGrE"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Framing prepended to a mid-run steering message (a prompt the user submitted
|
|
3
|
+
* while the agent was already working).
|
|
4
|
+
*
|
|
5
|
+
* Without this wrapper the queued text arrives as a bare top-level user turn,
|
|
6
|
+
* identical to a brand-new request — so models treat it as the authoritative
|
|
7
|
+
* instruction and silently abandon the original task. The wrapper names the
|
|
8
|
+
* relationship (a second, concurrent instruction) and the one rule that kills
|
|
9
|
+
* the failure mode: don't drop either side. The model already knows how to
|
|
10
|
+
* merge two live instructions once it knows both are in force.
|
|
11
|
+
*/
|
|
12
|
+
export const STEERING_PREFIX = "[The user added this while you were working — fold it into the current " +
|
|
13
|
+
"task, adjusting or extending as needed. Don't drop your original work or " +
|
|
14
|
+
"this.]\n\n";
|
|
15
|
+
/** Wrap a plain-text steering message with the framing prefix. */
|
|
16
|
+
export function wrapSteeringText(text) {
|
|
17
|
+
return STEERING_PREFIX + text;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Wrap a steering `UserContent` (string or multimodal parts) with the framing
|
|
21
|
+
* prefix. Media blocks pass through untouched; the prefix is prepended to the
|
|
22
|
+
* leading text so attachments still ride the same native-block path.
|
|
23
|
+
*/
|
|
24
|
+
export function wrapSteeringContent(content) {
|
|
25
|
+
if (typeof content === "string")
|
|
26
|
+
return wrapSteeringText(content);
|
|
27
|
+
return [{ type: "text", text: STEERING_PREFIX }, ...content];
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=steering.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"steering.js","sourceRoot":"","sources":["../../src/core/steering.ts"],"names":[],"mappings":"AAKA;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,eAAe,GAC1B,yEAAyE;IACzE,2EAA2E;IAC3E,YAAY,CAAC;AAEf,kEAAkE;AAClE,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,OAAO,eAAe,GAAG,IAAI,CAAC;AAChC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,mBAAmB,CAAC,OAAoB;IACtD,IAAI,OAAO,OAAO,KAAK,QAAQ;QAAE,OAAO,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAClE,OAAO,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,eAAe,EAAiB,EAAE,GAAG,OAAO,CAAC,CAAC;AAC9E,CAAC"}
|
package/dist/tools/edit.d.ts
CHANGED
|
@@ -11,6 +11,12 @@ declare const EditParams: z.ZodObject<{
|
|
|
11
11
|
old_text: z.ZodString;
|
|
12
12
|
new_text: z.ZodString;
|
|
13
13
|
replace_all: z.ZodOptional<z.ZodBoolean>;
|
|
14
|
+
anchor: z.ZodOptional<z.ZodObject<{
|
|
15
|
+
start_line: z.ZodNumber;
|
|
16
|
+
start_hash: z.ZodString;
|
|
17
|
+
end_line: z.ZodNumber;
|
|
18
|
+
end_hash: z.ZodString;
|
|
19
|
+
}, z.core.$strip>>;
|
|
14
20
|
}, z.core.$strip>>>;
|
|
15
21
|
atomic: z.ZodOptional<z.ZodBoolean>;
|
|
16
22
|
}, z.core.$strip>;
|
package/dist/tools/edit.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"edit.d.ts","sourceRoot":"","sources":["../../src/tools/edit.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAYrD,OAAO,EAAmB,KAAK,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACvE,OAAO,EAA4B,KAAK,WAAW,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"edit.d.ts","sourceRoot":"","sources":["../../src/tools/edit.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAYrD,OAAO,EAAmB,KAAK,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACvE,OAAO,EAA4B,KAAK,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAI/E,KAAK,gBAAgB,GAAG,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;AAEnE,uGAAuG;AACvG,KAAK,mBAAmB,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;AAqDlF,QAAA,MAAM,UAAU;;;;;;;;;;;;;;iBAgBd,CAAC;AAuDH,wBAAgB,cAAc,CAC5B,GAAG,EAAE,MAAM,EACX,SAAS,CAAC,EAAE,WAAW,EACvB,GAAG,GAAE,cAAgC,EACrC,0BAA0B,CAAC,EAAE;IAAE,OAAO,EAAE,OAAO,CAAA;CAAE,GAAG,gBAAgB,EACpE,aAAa,CAAC,EAAE,gBAAgB,EAChC,iBAAiB,CAAC,EAAE,gBAAgB,EACpC,cAAc,CAAC,EAAE,mBAAmB,GACnC,SAAS,CAAC,OAAO,UAAU,CAAC,CAsQ9B"}
|
package/dist/tools/edit.js
CHANGED
|
@@ -4,6 +4,7 @@ import { resolvePath, rejectSymlink } from "./path-utils.js";
|
|
|
4
4
|
import { fuzzyFindText, countOccurrences, generateDiff, findClosestSnippet, findOccurrenceLines, stripBlankEdges, applyDotdotdots, applyMissingLeadingWhitespace, } from "./edit-diff.js";
|
|
5
5
|
import { localOperations } from "./operations.js";
|
|
6
6
|
import { assertFresh, recordWrite } from "./read-tracker.js";
|
|
7
|
+
import { resolveAnchoredEdit } from "../core/hashline.js";
|
|
7
8
|
import { isPlanModeActive, planModeRestriction } from "../core/runtime-mode.js";
|
|
8
9
|
function isMutationCallback(value) {
|
|
9
10
|
return typeof value === "function";
|
|
@@ -13,6 +14,14 @@ function isPlanModeRef(value) {
|
|
|
13
14
|
value !== null &&
|
|
14
15
|
typeof value.current === "boolean");
|
|
15
16
|
}
|
|
17
|
+
const EditAnchorSchema = z.object({
|
|
18
|
+
start_line: z.number().int().min(1).describe("1-based line number of the first edited line"),
|
|
19
|
+
start_hash: z
|
|
20
|
+
.string()
|
|
21
|
+
.describe("Content anchor of the first line (from a read with anchors:true)"),
|
|
22
|
+
end_line: z.number().int().min(1).describe("1-based line number of the last edited line"),
|
|
23
|
+
end_hash: z.string().describe("Content anchor of the last line"),
|
|
24
|
+
});
|
|
16
25
|
const EditItem = z.object({
|
|
17
26
|
old_text: z.string().describe("The exact text to find and replace"),
|
|
18
27
|
new_text: z.string().describe("The replacement text"),
|
|
@@ -21,6 +30,9 @@ const EditItem = z.object({
|
|
|
21
30
|
.optional()
|
|
22
31
|
.describe("Replace every occurrence of old_text instead of requiring a unique match. " +
|
|
23
32
|
"Use for renames or repeated tokens. Defaults to false."),
|
|
33
|
+
anchor: EditAnchorSchema.optional().describe("Optional staleness guard. When set (using line+hash anchors from a read with anchors:true), " +
|
|
34
|
+
"the edit is rejected if the file changed since you read it. old_text/new_text still drive the " +
|
|
35
|
+
"actual replacement."),
|
|
24
36
|
});
|
|
25
37
|
// Some models (Opus 4.6, GLM-5.1) occasionally send `edits` as a JSON string
|
|
26
38
|
// instead of a real array, which trips Zod and makes the model fall back to
|
|
@@ -91,7 +103,9 @@ export function createEditTool(cwd, readFiles, ops = localOperations, planModeRe
|
|
|
91
103
|
"Each old_text should identify one location — include surrounding context; set replace_all: true only for deliberate global replacements/renames. " +
|
|
92
104
|
"The matcher tolerates safe whitespace/quote/dash drift, but do not paraphrase. For long blocks, a line containing only `...` in BOTH old_text and new_text elides a middle preserved verbatim. " +
|
|
93
105
|
"Partial-apply by default: failed edits are listed for retry, successful ones are still written — " +
|
|
94
|
-
"re-issue ONLY the listed failures, not the whole batch.
|
|
106
|
+
"re-issue ONLY the listed failures, not the whole batch. " +
|
|
107
|
+
"Optionally pin an edit with `anchor` (line+hash from a read with anchors:true) to reject it if the file changed since you read it. " +
|
|
108
|
+
"Returns a unified diff.",
|
|
95
109
|
parameters: EditParams,
|
|
96
110
|
executionMode: "sequential",
|
|
97
111
|
async execute({ file_path, edits, atomic = false }) {
|
|
@@ -105,10 +119,25 @@ export function createEditTool(cwd, readFiles, ops = localOperations, planModeRe
|
|
|
105
119
|
const hasCRLF = original.includes("\r\n");
|
|
106
120
|
const originalNormalized = hasCRLF ? original.replace(/\r\n/g, "\n") : original;
|
|
107
121
|
let working = originalNormalized;
|
|
122
|
+
// Anchors pin lines in the file AS READ, so they always verify against the
|
|
123
|
+
// original (pre-edit) line array — earlier edits in the batch don't shift
|
|
124
|
+
// what an anchor refers to.
|
|
125
|
+
const originalLines = originalNormalized.split("\n");
|
|
108
126
|
const fileName = path.basename(resolved);
|
|
109
127
|
const outcomes = new Array(edits.length);
|
|
110
128
|
for (let i = 0; i < edits.length; i++) {
|
|
111
|
-
const { old_text, new_text, replace_all } = edits[i];
|
|
129
|
+
const { old_text, new_text, replace_all, anchor } = edits[i];
|
|
130
|
+
// Optional staleness guard (opt-in). Runs BEFORE the fuzzy match ladder:
|
|
131
|
+
// if the model supplied an anchor and the file drifted since it read it,
|
|
132
|
+
// reject this edit instead of risking a misplaced fuzzy match. The fuzzy
|
|
133
|
+
// path below is byte-identical to today when `anchor` is absent.
|
|
134
|
+
if (anchor) {
|
|
135
|
+
const res = resolveAnchoredEdit(originalLines, anchor);
|
|
136
|
+
if (!res.ok) {
|
|
137
|
+
outcomes[i] = { ok: false, failure: { reason: "stale_anchor" } };
|
|
138
|
+
continue;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
112
141
|
const normalizedOld = hasCRLF ? old_text.replace(/\r\n/g, "\n") : old_text;
|
|
113
142
|
const normalizedNew = hasCRLF ? new_text.replace(/\r\n/g, "\n") : new_text;
|
|
114
143
|
const replaceAll = replace_all ?? false;
|
|
@@ -197,6 +226,9 @@ export function createEditTool(cwd, readFiles, ops = localOperations, planModeRe
|
|
|
197
226
|
// and the snippet is its only guidance — keep it.
|
|
198
227
|
const willPersistSuccesses = successCount > 0 && !atomic;
|
|
199
228
|
const formatFailureMessage = (f) => {
|
|
229
|
+
if (f.reason === "stale_anchor") {
|
|
230
|
+
return `the file changed since you read it (anchor mismatch) — re-read \`${file_path}\` and retry`;
|
|
231
|
+
}
|
|
200
232
|
if (f.reason === "noop") {
|
|
201
233
|
return `old_text and new_text are identical in ${fileName} — this edit would be a no-op. Either fix new_text or drop this edit.`;
|
|
202
234
|
}
|