@gitwand/core 2.8.1 → 2.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/bench/congra-mini.test.d.ts +37 -0
- package/dist/__tests__/bench/congra-mini.test.d.ts.map +1 -0
- package/dist/__tests__/bench/congra-mini.test.js +187 -0
- package/dist/__tests__/bench/congra-mini.test.js.map +1 -0
- package/dist/__tests__/utils/mock-llm-endpoint.d.ts +44 -0
- package/dist/__tests__/utils/mock-llm-endpoint.d.ts.map +1 -0
- package/dist/__tests__/utils/mock-llm-endpoint.js +67 -0
- package/dist/__tests__/utils/mock-llm-endpoint.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v2.5 — ConGra-mini regression bench.
|
|
3
|
+
*
|
|
4
|
+
* Loads ~15 hand-crafted `complex` conflicts under
|
|
5
|
+
* `__tests__/fixtures/congra-mini/`, feeds each to `resolveAsync()` with the
|
|
6
|
+
* LLM fallback enabled and a deterministic mock endpoint, then tallies the
|
|
7
|
+
* resolution outcome.
|
|
8
|
+
*
|
|
9
|
+
* ## Done criterion (CORE-V2-ROADMAP v2.5)
|
|
10
|
+
*
|
|
11
|
+
* > "résout au moins 80 % des hunks `complex` du ConGra-mini sans régression
|
|
12
|
+
* > sur le reste"
|
|
13
|
+
*
|
|
14
|
+
* The test fails if `successRate < 0.80`. We do NOT lower the threshold when
|
|
15
|
+
* a fixture regresses — that's the whole point of locking the bench. If a
|
|
16
|
+
* fixture becomes unrealistic for the deterministic+LLM pipeline, either
|
|
17
|
+
* fix the pipeline or replace the fixture; do not move the goalposts.
|
|
18
|
+
*
|
|
19
|
+
* ## Why `validationLevel: "off"`
|
|
20
|
+
*
|
|
21
|
+
* Tree-sitter grammars are not loaded in unit tests (the bench runs without
|
|
22
|
+
* `web-tree-sitter` peer or grammar WASMs). Parse-tree validation would
|
|
23
|
+
* therefore always return `null` and is irrelevant to what we're measuring
|
|
24
|
+
* here — namely, the LLM fallback's hit rate.
|
|
25
|
+
*
|
|
26
|
+
* ## Why a normal `describe` and not Vitest's `bench`
|
|
27
|
+
*
|
|
28
|
+
* We are measuring a hit rate, not throughput. A real bench (`bench(...)`)
|
|
29
|
+
* would re-run the same workload many times and report ops/s — useless here.
|
|
30
|
+
* The file is named `*.bench.ts` to keep it co-located with the other
|
|
31
|
+
* regression-style benches, but it lives in the standard test suite so CI
|
|
32
|
+
* runs it on every push.
|
|
33
|
+
*
|
|
34
|
+
* Use `SKIP_BENCH=true pnpm test` to skip the suite in fast iterations.
|
|
35
|
+
*/
|
|
36
|
+
export {};
|
|
37
|
+
//# sourceMappingURL=congra-mini.test.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"congra-mini.test.d.ts","sourceRoot":"","sources":["../../../src/__tests__/bench/congra-mini.test.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG"}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v2.5 — ConGra-mini regression bench.
|
|
3
|
+
*
|
|
4
|
+
* Loads ~15 hand-crafted `complex` conflicts under
|
|
5
|
+
* `__tests__/fixtures/congra-mini/`, feeds each to `resolveAsync()` with the
|
|
6
|
+
* LLM fallback enabled and a deterministic mock endpoint, then tallies the
|
|
7
|
+
* resolution outcome.
|
|
8
|
+
*
|
|
9
|
+
* ## Done criterion (CORE-V2-ROADMAP v2.5)
|
|
10
|
+
*
|
|
11
|
+
* > "résout au moins 80 % des hunks `complex` du ConGra-mini sans régression
|
|
12
|
+
* > sur le reste"
|
|
13
|
+
*
|
|
14
|
+
* The test fails if `successRate < 0.80`. We do NOT lower the threshold when
|
|
15
|
+
* a fixture regresses — that's the whole point of locking the bench. If a
|
|
16
|
+
* fixture becomes unrealistic for the deterministic+LLM pipeline, either
|
|
17
|
+
* fix the pipeline or replace the fixture; do not move the goalposts.
|
|
18
|
+
*
|
|
19
|
+
* ## Why `validationLevel: "off"`
|
|
20
|
+
*
|
|
21
|
+
* Tree-sitter grammars are not loaded in unit tests (the bench runs without
|
|
22
|
+
* `web-tree-sitter` peer or grammar WASMs). Parse-tree validation would
|
|
23
|
+
* therefore always return `null` and is irrelevant to what we're measuring
|
|
24
|
+
* here — namely, the LLM fallback's hit rate.
|
|
25
|
+
*
|
|
26
|
+
* ## Why a normal `describe` and not Vitest's `bench`
|
|
27
|
+
*
|
|
28
|
+
* We are measuring a hit rate, not throughput. A real bench (`bench(...)`)
|
|
29
|
+
* would re-run the same workload many times and report ops/s — useless here.
|
|
30
|
+
* The file is named `*.bench.ts` to keep it co-located with the other
|
|
31
|
+
* regression-style benches, but it lives in the standard test suite so CI
|
|
32
|
+
* runs it on every push.
|
|
33
|
+
*
|
|
34
|
+
* Use `SKIP_BENCH=true pnpm test` to skip the suite in fast iterations.
|
|
35
|
+
*/
|
|
36
|
+
import { describe, it, expect } from "vitest";
|
|
37
|
+
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
38
|
+
import { join, dirname } from "node:path";
|
|
39
|
+
import { fileURLToPath } from "node:url";
|
|
40
|
+
import { resolveAsync } from "../../resolver/index.js";
|
|
41
|
+
import { buildMockEndpoint, fenced } from "../utils/mock-llm-endpoint.js";
|
|
42
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
43
|
+
const FIXTURES_ROOT = join(__dirname, "..", "fixtures", "congra-mini");
|
|
44
|
+
/**
|
|
45
|
+
* Extract a unique substring of the first conflict's `ours` block. We grab
|
|
46
|
+
* the first non-empty line after `<<<<<<< ours` — that's enough to
|
|
47
|
+
* disambiguate fixtures whose conflict markers are unique per fixture.
|
|
48
|
+
*/
|
|
49
|
+
function extractOursSnippet(conflict) {
|
|
50
|
+
const lines = conflict.split("\n");
|
|
51
|
+
const start = lines.findIndex((l) => l.startsWith("<<<<<<<"));
|
|
52
|
+
if (start === -1)
|
|
53
|
+
return "";
|
|
54
|
+
const end = lines.findIndex((l, i) => i > start && (l.startsWith("|||||||") || l.startsWith("=======")));
|
|
55
|
+
const ours = lines.slice(start + 1, end === -1 ? lines.length : end);
|
|
56
|
+
return ours.find((l) => l.trim().length > 0) ?? ours.join("\n");
|
|
57
|
+
}
|
|
58
|
+
function loadFixtures() {
|
|
59
|
+
const entries = readdirSync(FIXTURES_ROOT).filter((n) => {
|
|
60
|
+
const p = join(FIXTURES_ROOT, n);
|
|
61
|
+
return statSync(p).isDirectory();
|
|
62
|
+
});
|
|
63
|
+
return entries.sort().map((name) => {
|
|
64
|
+
const dir = join(FIXTURES_ROOT, name);
|
|
65
|
+
const conflict = readFileSync(join(dir, "conflict.txt"), "utf-8");
|
|
66
|
+
const expectedResolution = readFileSync(join(dir, "expected-llm-resolution.txt"), "utf-8");
|
|
67
|
+
const meta = JSON.parse(readFileSync(join(dir, "meta.json"), "utf-8"));
|
|
68
|
+
return {
|
|
69
|
+
name,
|
|
70
|
+
conflict,
|
|
71
|
+
expectedResolution,
|
|
72
|
+
meta,
|
|
73
|
+
oursSnippet: extractOursSnippet(conflict),
|
|
74
|
+
};
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
function classifyOutcome(fixture, result) {
|
|
78
|
+
const firstHunk = result.hunks[0];
|
|
79
|
+
const firstResolution = result.resolutions[0];
|
|
80
|
+
if (!firstHunk || !firstResolution) {
|
|
81
|
+
return {
|
|
82
|
+
fixture,
|
|
83
|
+
outcome: "unresolved",
|
|
84
|
+
decisionType: "no-hunk",
|
|
85
|
+
reason: "no conflict parsed",
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
// Resolved by the LLM fallback → success.
|
|
89
|
+
if (firstHunk.type === "llm_proposed" && firstResolution.autoResolved) {
|
|
90
|
+
return {
|
|
91
|
+
fixture,
|
|
92
|
+
outcome: "llm-resolved",
|
|
93
|
+
decisionType: firstHunk.type,
|
|
94
|
+
validationScore: firstHunk.trace.llmTrace?.validationScore,
|
|
95
|
+
reason: firstResolution.resolutionReason,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
// Resolved by a deterministic pattern before the LLM ran → does NOT count
|
|
99
|
+
// toward LLM success (the fixture was supposed to be `complex`).
|
|
100
|
+
if (firstResolution.autoResolved) {
|
|
101
|
+
return {
|
|
102
|
+
fixture,
|
|
103
|
+
outcome: "deterministic-resolved",
|
|
104
|
+
decisionType: firstHunk.type,
|
|
105
|
+
reason: firstResolution.resolutionReason,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
return {
|
|
109
|
+
fixture,
|
|
110
|
+
outcome: "unresolved",
|
|
111
|
+
decisionType: firstHunk.type,
|
|
112
|
+
validationScore: firstHunk.trace.llmTrace?.validationScore,
|
|
113
|
+
reason: firstResolution.resolutionReason,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
// ─── Suite ───────────────────────────────────────────────
|
|
117
|
+
const SKIP = process.env.SKIP_BENCH === "true";
|
|
118
|
+
describe.skipIf(SKIP)("ConGra-mini — LLM fallback regression bench", () => {
|
|
119
|
+
const fixtures = loadFixtures();
|
|
120
|
+
it("loads exactly 15 fixtures", () => {
|
|
121
|
+
expect(fixtures.length).toBe(15);
|
|
122
|
+
});
|
|
123
|
+
it("each fixture parses one or more conflict hunks", () => {
|
|
124
|
+
for (const f of fixtures) {
|
|
125
|
+
expect(f.conflict).toContain("<<<<<<<");
|
|
126
|
+
expect(f.conflict).toContain("=======");
|
|
127
|
+
expect(f.conflict).toContain(">>>>>>>");
|
|
128
|
+
expect(f.oursSnippet.length).toBeGreaterThan(0);
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
it("resolves ≥ 80 % of complex hunks via the LLM fallback", async () => {
|
|
132
|
+
// Build a single mock endpoint that knows every fixture's expected
|
|
133
|
+
// resolution. The endpoint matches by ours-snippet substring (see
|
|
134
|
+
// mock-llm-endpoint.ts).
|
|
135
|
+
const responses = new Map();
|
|
136
|
+
for (const f of fixtures) {
|
|
137
|
+
responses.set(f.oursSnippet, fenced(f.expectedResolution));
|
|
138
|
+
}
|
|
139
|
+
const endpoint = buildMockEndpoint(responses);
|
|
140
|
+
const results = [];
|
|
141
|
+
for (const f of fixtures) {
|
|
142
|
+
const merge = await resolveAsync(f.conflict, f.meta.filePath, {
|
|
143
|
+
llmFallback: {
|
|
144
|
+
enabled: true,
|
|
145
|
+
endpoint,
|
|
146
|
+
minPostMergeScore: 80,
|
|
147
|
+
contextLines: 50,
|
|
148
|
+
},
|
|
149
|
+
// Tree-sitter is unavailable in unit-test env — disable parse-tree
|
|
150
|
+
// validation. The LLM resolver still runs `validateMergedContent`
|
|
151
|
+
// (residual markers + JSON/YAML/TOML syntax).
|
|
152
|
+
validationLevel: "off",
|
|
153
|
+
});
|
|
154
|
+
results.push(classifyOutcome(f, merge));
|
|
155
|
+
}
|
|
156
|
+
// Per-fixture log (one line each) + summary.
|
|
157
|
+
const lines = [];
|
|
158
|
+
for (const r of results) {
|
|
159
|
+
const symbol = r.outcome === "llm-resolved" ? "✓"
|
|
160
|
+
: r.outcome === "deterministic-resolved" ? "·"
|
|
161
|
+
: "✗";
|
|
162
|
+
const score = r.validationScore !== undefined ? ` score=${r.validationScore}` : "";
|
|
163
|
+
lines.push(` ${symbol} [${r.fixture.meta.category}/${r.fixture.meta.difficulty}] ${r.fixture.name} → ${r.decisionType}${score}`);
|
|
164
|
+
}
|
|
165
|
+
const llmResolved = results.filter((r) => r.outcome === "llm-resolved").length;
|
|
166
|
+
const deterministic = results.filter((r) => r.outcome === "deterministic-resolved").length;
|
|
167
|
+
const unresolved = results.filter((r) => r.outcome === "unresolved").length;
|
|
168
|
+
const total = results.length;
|
|
169
|
+
const successRate = llmResolved / total;
|
|
170
|
+
const summary = [
|
|
171
|
+
"",
|
|
172
|
+
"ConGra-mini bench summary:",
|
|
173
|
+
...lines,
|
|
174
|
+
"",
|
|
175
|
+
` Total fixtures : ${total}`,
|
|
176
|
+
` LLM-resolved : ${llmResolved} (${(successRate * 100).toFixed(1)} %)`,
|
|
177
|
+
` Deterministic-resolved: ${deterministic} (regressions — expected to be complex)`,
|
|
178
|
+
` Unresolved : ${unresolved}`,
|
|
179
|
+
` Target : ≥ 80 % LLM-resolved`,
|
|
180
|
+
"",
|
|
181
|
+
].join("\n");
|
|
182
|
+
// eslint-disable-next-line no-console
|
|
183
|
+
console.log(summary);
|
|
184
|
+
expect(successRate, `LLM fallback resolved ${llmResolved}/${total} = ${(successRate * 100).toFixed(1)} % (target ≥ 80 %).\n${summary}`).toBeGreaterThanOrEqual(0.8);
|
|
185
|
+
});
|
|
186
|
+
});
|
|
187
|
+
//# sourceMappingURL=congra-mini.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"congra-mini.test.js","sourceRoot":"","sources":["../../../src/__tests__/bench/congra-mini.test.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAEH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,iBAAiB,EAAE,MAAM,EAAE,MAAM,+BAA+B,CAAC;AAoB1E,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,aAAa,GAAG,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,aAAa,CAAC,CAAC;AAEvE;;;;GAIG;AACH,SAAS,kBAAkB,CAAC,QAAgB;IAC1C,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,KAAK,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC;IAC9D,IAAI,KAAK,KAAK,CAAC,CAAC;QAAE,OAAO,EAAE,CAAC;IAC5B,MAAM,GAAG,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,KAAK,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;IACzG,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACrE,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,YAAY;IACnB,MAAM,OAAO,GAAG,WAAW,CAAC,aAAa,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;QACtD,MAAM,CAAC,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;QACjC,OAAO,QAAQ,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACjC,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,CAAC;QACtC,MAAM,QAAQ,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,cAAc,CAAC,EAAE,OAAO,CAAC,CAAC;QAClE,MAAM,kBAAkB,GAAG,YAAY,CACrC,IAAI,CAAC,GAAG,EAAE,6BAA6B,CAAC,EACxC,OAAO,CACR,CAAC;QACF,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CACrB,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,OAAO,CAAC,CAC/B,CAAC;QACjB,OAAO;YACL,IAAI;YACJ,QAAQ;YACR,kBAAkB;YAClB,IAAI;YACJ,WAAW,EAAE,kBAAkB,CAAC,QAAQ,CAAC;SAC1C,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAcD,SAAS,eAAe,CACtB,OAAgB,EAChB,MAAgD;IAEhD,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAClC,MAAM,eAAe,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;IAE9C,IAAI,CAAC,SAAS,IAAI,CAAC,eAAe,EAAE,CAAC;QACnC,OAAO;YACL,OAAO;YACP,OAAO,EAAE,YAAY;YACrB,YAAY,EAAE,SAAS;YACvB,MAAM,EAAE,oBAAoB;SAC7B,CAAC;IACJ,CAAC;IAED,0CAA0C;IAC1C,IAAI,SAAS,CAAC,IAAI,KAAK,cAAc,IAAI,eAAe,CAAC,YAAY,EAAE,CAAC;QACtE,OAAO;YACL,OAAO;YACP,OAAO,EAAE,cAAc;YACvB,YAAY,EAAE,SAAS,CAAC,IAAI;YAC5B,eAAe,EAAE,SAAS,CAAC,KAAK,CAAC,QAAQ,EAAE,eAAe;YAC1D,MAAM,EAAE,eAAe,CAAC,gBAAgB;SACzC,CAAC;IACJ,CAAC;IAED,0EAA0E;IAC1E,iEAAiE;IACjE,IAAI,eAAe,CAAC,YAAY,EAAE,CAAC;QACjC,OAAO;YACL,OAAO;YACP,OAAO,EAAE,wBAAwB;YACjC,YAAY,EAAE,SAAS,CAAC,IAAI;YAC5B,MAAM,EAAE,eAAe,CAAC,gBAAgB;SACzC,CAAC;IACJ,CAAC;IAED,OAAO;QACL,OAAO;QACP,OAAO,EAAE,YAAY;QACrB,YAAY,EAAE,SAAS,CAAC,IAAI;QAC5B,eAAe,EAAE,SAAS,CAAC,KAAK,CAAC,QAAQ,EAAE,eAAe;QAC1D,MAAM,EAAE,eAAe,CAAC,gBAAgB;KACzC,CAAC;AACJ,CAAC;AAED,4DAA4D;AAE5D,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,KAAK,MAAM,CAAC;AAE/C,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,6CAA6C,EAAE,GAAG,EAAE;IACxE,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAEhC,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;YACxC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;YACxC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;YACxC,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAClD,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,KAAK,IAAI,EAAE;QACrE,mEAAmE;QACnE,kEAAkE;QAClE,yBAAyB;QACzB,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC5C,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC;QAC7D,CAAC;QACD,MAAM,QAAQ,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;QAE9C,MAAM,OAAO,GAAoB,EAAE,CAAC;QAEpC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,EAAE;gBAC5D,WAAW,EAAE;oBACX,OAAO,EAAE,IAAI;oBACb,QAAQ;oBACR,iBAAiB,EAAE,EAAE;oBACrB,YAAY,EAAE,EAAE;iBACjB;gBACD,mEAAmE;gBACnE,kEAAkE;gBAClE,8CAA8C;gBAC9C,eAAe,EAAE,KAAK;aACvB,CAAC,CAAC;YACH,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;QAC1C,CAAC;QAED,6CAA6C;QAC7C,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,MAAM,GACV,CAAC,CAAC,OAAO,KAAK,cAAc,CAAC,CAAC,CAAC,GAAG;gBAClC,CAAC,CAAC,CAAC,CAAC,OAAO,KAAK,wBAAwB,CAAC,CAAC,CAAC,GAAG;oBAC9C,CAAC,CAAC,GAAG,CAAC;YACR,MAAM,KAAK,GAAG,CAAC,CAAC,eAAe,KAAK,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACnF,KAAK,CAAC,IAAI,CACR,KAAK,MAAM,KAAK,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,KAAK,CAAC,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,CAAC,YAAY,GAAG,KAAK,EAAE,CACtH,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,cAAc,CAAC,CAAC,MAAM,CAAC;QAC/E,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,wBAAwB,CAAC,CAAC,MAAM,CAAC;QAC3F,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,YAAY,CAAC,CAAC,MAAM,CAAC;QAC5E,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;QAC7B,MAAM,WAAW,GAAG,WAAW,GAAG,KAAK,CAAC;QAExC,MAAM,OAAO,GAAG;YACd,EAAE;YACF,4BAA4B;YAC5B,GAAG,KAAK;YACR,EAAE;YACF,6BAA6B,KAAK,EAAE;YACpC,6BAA6B,WAAW,KAAK,CAAC,WAAW,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;YAChF,6BAA6B,aAAa,yCAAyC;YACnF,6BAA6B,UAAU,EAAE;YACzC,+CAA+C;YAC/C,EAAE;SACH,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEb,sCAAsC;QACtC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAErB,MAAM,CACJ,WAAW,EACX,yBAAyB,WAAW,IAAI,KAAK,MAAM,CAAC,WAAW,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,wBAAwB,OAAO,EAAE,CACnH,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v2.5 — Mock LLM endpoint for deterministic bench / integration tests.
|
|
3
|
+
*
|
|
4
|
+
* `@gitwand/core` never makes a network call: it consumes an injected
|
|
5
|
+
* `LlmEndpoint` whose `call(prompt)` returns the model output as a string.
|
|
6
|
+
* For tests, we want a reproducible endpoint that returns a pre-recorded
|
|
7
|
+
* response for each fixture without any randomness.
|
|
8
|
+
*
|
|
9
|
+
* ## Lookup strategy
|
|
10
|
+
*
|
|
11
|
+
* The pipeline serialises the conflict hunk (ours / base / theirs) verbatim
|
|
12
|
+
* inside the prompt — so we can match a fixture by searching the prompt for
|
|
13
|
+
* a unique substring. Two keys are supported, in order:
|
|
14
|
+
*
|
|
15
|
+
* 1. A literal `[FIXTURE: <name>]` marker injected by the caller (used by
|
|
16
|
+
* future prompt builders that want explicit tagging).
|
|
17
|
+
* 2. The `ours` snippet of the fixture — the prompt always contains the
|
|
18
|
+
* ours block verbatim between `<<<<<<< ours` and `||||||| base` (or
|
|
19
|
+
* `=======` for diff2). A unique substring of `oursLines` is therefore
|
|
20
|
+
* enough to disambiguate.
|
|
21
|
+
*
|
|
22
|
+
* If neither match yields a hit, the endpoint returns the empty string —
|
|
23
|
+
* which the pipeline treats as `CANNOT_RESOLVE` (lines: null, rejected).
|
|
24
|
+
* This is the expected behaviour for "the LLM has nothing useful to say".
|
|
25
|
+
*/
|
|
26
|
+
import type { LlmEndpoint } from "../../types.js";
|
|
27
|
+
/**
|
|
28
|
+
* Build a deterministic mock endpoint that returns pre-recorded responses
|
|
29
|
+
* keyed by a unique substring of the prompt.
|
|
30
|
+
*
|
|
31
|
+
* @param responses - Map of `lookupKey → rawResponseBody`. The raw response
|
|
32
|
+
* is returned as-is to the pipeline, which then parses
|
|
33
|
+
* fenced blocks / detects `CANNOT_RESOLVE` / etc.
|
|
34
|
+
* Wrap the resolution in triple backticks like a real
|
|
35
|
+
* LLM would.
|
|
36
|
+
*/
|
|
37
|
+
export declare function buildMockEndpoint(responses: Map<string, string>): LlmEndpoint;
|
|
38
|
+
/**
|
|
39
|
+
* Wrap a raw resolution body in a fenced code block, matching what a real
|
|
40
|
+
* LLM would output. The pipeline's `parseResponse()` extracts the first
|
|
41
|
+
* fenced block and uses it as the resolved lines.
|
|
42
|
+
*/
|
|
43
|
+
export declare function fenced(body: string): string;
|
|
44
|
+
//# sourceMappingURL=mock-llm-endpoint.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mock-llm-endpoint.d.ts","sourceRoot":"","sources":["../../../src/__tests__/utils/mock-llm-endpoint.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAElD;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,CAC/B,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAC7B,WAAW,CAsBb;AAED;;;;GAIG;AACH,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAE3C"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v2.5 — Mock LLM endpoint for deterministic bench / integration tests.
|
|
3
|
+
*
|
|
4
|
+
* `@gitwand/core` never makes a network call: it consumes an injected
|
|
5
|
+
* `LlmEndpoint` whose `call(prompt)` returns the model output as a string.
|
|
6
|
+
* For tests, we want a reproducible endpoint that returns a pre-recorded
|
|
7
|
+
* response for each fixture without any randomness.
|
|
8
|
+
*
|
|
9
|
+
* ## Lookup strategy
|
|
10
|
+
*
|
|
11
|
+
* The pipeline serialises the conflict hunk (ours / base / theirs) verbatim
|
|
12
|
+
* inside the prompt — so we can match a fixture by searching the prompt for
|
|
13
|
+
* a unique substring. Two keys are supported, in order:
|
|
14
|
+
*
|
|
15
|
+
* 1. A literal `[FIXTURE: <name>]` marker injected by the caller (used by
|
|
16
|
+
* future prompt builders that want explicit tagging).
|
|
17
|
+
* 2. The `ours` snippet of the fixture — the prompt always contains the
|
|
18
|
+
* ours block verbatim between `<<<<<<< ours` and `||||||| base` (or
|
|
19
|
+
* `=======` for diff2). A unique substring of `oursLines` is therefore
|
|
20
|
+
* enough to disambiguate.
|
|
21
|
+
*
|
|
22
|
+
* If neither match yields a hit, the endpoint returns the empty string —
|
|
23
|
+
* which the pipeline treats as `CANNOT_RESOLVE` (lines: null, rejected).
|
|
24
|
+
* This is the expected behaviour for "the LLM has nothing useful to say".
|
|
25
|
+
*/
|
|
26
|
+
/**
|
|
27
|
+
* Build a deterministic mock endpoint that returns pre-recorded responses
|
|
28
|
+
* keyed by a unique substring of the prompt.
|
|
29
|
+
*
|
|
30
|
+
* @param responses - Map of `lookupKey → rawResponseBody`. The raw response
|
|
31
|
+
* is returned as-is to the pipeline, which then parses
|
|
32
|
+
* fenced blocks / detects `CANNOT_RESOLVE` / etc.
|
|
33
|
+
* Wrap the resolution in triple backticks like a real
|
|
34
|
+
* LLM would.
|
|
35
|
+
*/
|
|
36
|
+
export function buildMockEndpoint(responses) {
|
|
37
|
+
return {
|
|
38
|
+
async call(prompt) {
|
|
39
|
+
// First pass — explicit fixture tag (future-proof).
|
|
40
|
+
const tagMatch = prompt.match(/\[FIXTURE: ([^\]]+)\]/);
|
|
41
|
+
if (tagMatch) {
|
|
42
|
+
const direct = responses.get(tagMatch[1]);
|
|
43
|
+
if (direct !== undefined)
|
|
44
|
+
return direct;
|
|
45
|
+
}
|
|
46
|
+
// Second pass — first key whose substring appears in the prompt wins.
|
|
47
|
+
// The fixture loader stores the ours-snippet as the key, which the
|
|
48
|
+
// pipeline serialises verbatim into the conflict block.
|
|
49
|
+
for (const [key, response] of responses) {
|
|
50
|
+
if (key && prompt.includes(key))
|
|
51
|
+
return response;
|
|
52
|
+
}
|
|
53
|
+
// No match — return empty string. The pipeline treats this as
|
|
54
|
+
// CANNOT_RESOLVE, leaving the hunk unresolved (audit trail kept).
|
|
55
|
+
return "";
|
|
56
|
+
},
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Wrap a raw resolution body in a fenced code block, matching what a real
|
|
61
|
+
* LLM would output. The pipeline's `parseResponse()` extracts the first
|
|
62
|
+
* fenced block and uses it as the resolved lines.
|
|
63
|
+
*/
|
|
64
|
+
export function fenced(body) {
|
|
65
|
+
return "```\n" + body + "\n```";
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=mock-llm-endpoint.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mock-llm-endpoint.js","sourceRoot":"","sources":["../../../src/__tests__/utils/mock-llm-endpoint.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAIH;;;;;;;;;GASG;AACH,MAAM,UAAU,iBAAiB,CAC/B,SAA8B;IAE9B,OAAO;QACL,KAAK,CAAC,IAAI,CAAC,MAAc;YACvB,oDAAoD;YACpD,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;YACvD,IAAI,QAAQ,EAAE,CAAC;gBACb,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1C,IAAI,MAAM,KAAK,SAAS;oBAAE,OAAO,MAAM,CAAC;YAC1C,CAAC;YAED,sEAAsE;YACtE,mEAAmE;YACnE,wDAAwD;YACxD,KAAK,MAAM,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,SAAS,EAAE,CAAC;gBACxC,IAAI,GAAG,IAAI,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC;oBAAE,OAAO,QAAQ,CAAC;YACnD,CAAC;YAED,8DAA8D;YAC9D,kEAAkE;YAClE,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,MAAM,CAAC,IAAY;IACjC,OAAO,OAAO,GAAG,IAAI,GAAG,OAAO,CAAC;AAClC,CAAC"}
|
package/package.json
CHANGED