dialekt 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -10
- package/TESTING.md +29 -29
- package/dist/cli/main.d.mts +1 -1
- package/dist/cli/main.mjs +549 -362
- package/dist/formatters-De4Q-X1d.mjs +516 -435
- package/dist/index.d.mts +162 -25
- package/dist/index.mjs +119 -34
- package/package.json +3 -3
- package/pnpm-workspace.yaml +3 -3
- package/src/adapter/types.test.ts +57 -57
- package/src/adapter/types.ts +7 -4
- package/src/benchmark/metrics.test.ts +141 -69
- package/src/benchmark/metrics.ts +6 -6
- package/src/benchmark/report.test.ts +38 -38
- package/src/benchmark/report.ts +6 -6
- package/src/benchmark/runner.test.ts +70 -72
- package/src/benchmark/runner.ts +4 -4
- package/src/cli/commands/add.test.ts +90 -109
- package/src/cli/commands/add.ts +40 -28
- package/src/cli/commands/benchmark.test.ts +77 -64
- package/src/cli/commands/benchmark.ts +64 -41
- package/src/cli/commands/languages.test.ts +45 -42
- package/src/cli/commands/languages.ts +16 -12
- package/src/cli/commands/missing.test.ts +143 -92
- package/src/cli/commands/missing.ts +24 -17
- package/src/cli/commands/translate.test.ts +79 -79
- package/src/cli/commands/translate.ts +41 -31
- package/src/cli/commands/unused.test.ts +62 -51
- package/src/cli/commands/unused.ts +18 -14
- package/src/cli/commands/validate.test.ts +130 -72
- package/src/cli/commands/validate.ts +25 -20
- package/src/cli/config-resolution.test.ts +169 -49
- package/src/cli/config-resolution.ts +5 -7
- package/src/cli/format.test.ts +50 -50
- package/src/cli/format.ts +57 -60
- package/src/cli/formatters.test.ts +128 -106
- package/src/cli/formatters.ts +72 -95
- package/src/cli/main.ts +13 -13
- package/src/config/define-config.test.ts +44 -29
- package/src/config/define-config.ts +1 -1
- package/src/config/load-config.test.ts +21 -18
- package/src/config/load-config.ts +5 -5
- package/src/config/types.test.ts +50 -44
- package/src/config/types.ts +2 -2
- package/src/index.ts +22 -26
- package/src/keys/flatten.test.ts +52 -52
- package/src/keys/flatten.ts +7 -9
- package/src/sdk/file-io.test.ts +47 -59
- package/src/sdk/file-io.ts +2 -2
- package/src/sdk/node-layer.test.ts +18 -18
- package/src/sdk/node-layer.ts +2 -2
- package/src/sdk/php-array-reader.test.ts +49 -40
- package/src/sdk/php-array-reader.ts +5 -5
- package/src/translation/chunking.test.ts +52 -44
- package/src/translation/chunking.ts +1 -1
- package/src/translation/missing-keys.test.ts +86 -93
- package/src/translation/missing-keys.ts +4 -6
- package/src/translation/model-registry.test.ts +41 -32
- package/src/translation/model-registry.ts +9 -9
- package/src/translation/one-shot-strategy.test.ts +105 -86
- package/src/translation/one-shot-strategy.ts +10 -12
- package/src/translation/orchestrator.test.ts +90 -101
- package/src/translation/orchestrator.ts +26 -26
- package/src/translation/prompt.test.ts +76 -76
- package/src/translation/prompt.ts +2 -2
- package/src/translation/tool-loop-strategy.test.ts +134 -107
- package/src/translation/tool-loop-strategy.ts +14 -18
- package/src/translation/types.test.ts +22 -22
- package/src/translation/types.ts +3 -3
- package/tsdown.config.ts +3 -3
- package/vitest.config.ts +3 -3
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
import { describe, expect, it } from
|
|
2
|
-
import { Effect, Option } from
|
|
3
|
-
import { runBenchmarkCommand, benchmarkCommand } from
|
|
4
|
-
import type { DialektConfig } from
|
|
5
|
-
import type { TranslationAdapter, ResourceRef } from
|
|
6
|
-
import type { TranslationStrategy, TranslationContext } from
|
|
7
|
-
import type { StrategyBenchmarkSummary } from
|
|
8
|
-
|
|
9
|
-
describe(
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { Effect, Option } from "effect";
|
|
3
|
+
import { runBenchmarkCommand, benchmarkCommand } from "./benchmark.js";
|
|
4
|
+
import type { DialektConfig } from "../../config/types.js";
|
|
5
|
+
import type { TranslationAdapter, ResourceRef } from "../../adapter/types.js";
|
|
6
|
+
import type { TranslationStrategy, TranslationContext } from "../../translation/types.js";
|
|
7
|
+
import type { StrategyBenchmarkSummary } from "../../benchmark/metrics.js";
|
|
8
|
+
|
|
9
|
+
describe("runBenchmarkCommand", () => {
|
|
10
10
|
const baseConfig: DialektConfig = {
|
|
11
|
-
sourceLocale:
|
|
12
|
-
targetLocales: [
|
|
13
|
-
strategy:
|
|
14
|
-
model: { provider:
|
|
15
|
-
fastModel: { provider:
|
|
11
|
+
sourceLocale: "en",
|
|
12
|
+
targetLocales: ["de"],
|
|
13
|
+
strategy: "one-shot",
|
|
14
|
+
model: { provider: "openai", modelId: "gpt-4o" },
|
|
15
|
+
fastModel: { provider: "openai", modelId: "gpt-4o-mini" },
|
|
16
16
|
chunking: { maxTokens: 3000, charsPerToken: 3.0, concurrency: 3 },
|
|
17
17
|
retry: { maxAttempts: 3, baseDelayMs: 1000 },
|
|
18
18
|
adapters: [],
|
|
@@ -29,10 +29,11 @@ describe('runBenchmarkCommand', () => {
|
|
|
29
29
|
return {
|
|
30
30
|
name: opts.name,
|
|
31
31
|
capabilities: { canCreateResource: true, unusedKeyDetection: false },
|
|
32
|
-
listLocales: () => Effect.succeed(opts.locales ?? [
|
|
33
|
-
listResources: () =>
|
|
32
|
+
listLocales: () => Effect.succeed(opts.locales ?? ["en", "de"]),
|
|
33
|
+
listResources: () =>
|
|
34
|
+
Effect.succeed(opts.resources ?? [{ key: "messages", label: "messages" }]),
|
|
34
35
|
readResource: (_locale: string) =>
|
|
35
|
-
Effect.succeed(_locale ===
|
|
36
|
+
Effect.succeed(_locale === "en" ? (opts.sourceMap ?? {}) : (opts.targetMap ?? {})),
|
|
36
37
|
writeResource: () => Effect.void,
|
|
37
38
|
};
|
|
38
39
|
}
|
|
@@ -44,11 +45,18 @@ describe('runBenchmarkCommand', () => {
|
|
|
44
45
|
configLoader: () => Effect.succeed(baseConfig),
|
|
45
46
|
modelResolver: () => Effect.succeed({} as unknown),
|
|
46
47
|
missingKeysComputer: () =>
|
|
47
|
-
Effect.succeed([
|
|
48
|
+
Effect.succeed([
|
|
49
|
+
{
|
|
50
|
+
adapter: "test",
|
|
51
|
+
locale: "de",
|
|
52
|
+
resource: { key: "messages", label: "messages" } as ResourceRef,
|
|
53
|
+
missing: ["hello"] as readonly string[],
|
|
54
|
+
},
|
|
55
|
+
]),
|
|
48
56
|
benchmarkRunner: () =>
|
|
49
57
|
Effect.succeed([
|
|
50
58
|
{
|
|
51
|
-
strategyName:
|
|
59
|
+
strategyName: "one-shot",
|
|
52
60
|
totalChunks: 1,
|
|
53
61
|
succeededChunks: 1,
|
|
54
62
|
failedChunks: 0,
|
|
@@ -63,7 +71,7 @@ describe('runBenchmarkCommand', () => {
|
|
|
63
71
|
};
|
|
64
72
|
}
|
|
65
73
|
|
|
66
|
-
it(
|
|
74
|
+
it("logs a cost warning", async () => {
|
|
67
75
|
const errors: string[] = [];
|
|
68
76
|
const deps = makeDeps({
|
|
69
77
|
errorLogger: (msg) => Effect.sync(() => errors.push(msg)),
|
|
@@ -71,7 +79,7 @@ describe('runBenchmarkCommand', () => {
|
|
|
71
79
|
|
|
72
80
|
const program = runBenchmarkCommand(
|
|
73
81
|
{
|
|
74
|
-
config:
|
|
82
|
+
config: "./config.ts",
|
|
75
83
|
adapter: Option.none(),
|
|
76
84
|
strategies: Option.none(),
|
|
77
85
|
sampleSize: Option.none(),
|
|
@@ -80,10 +88,10 @@ describe('runBenchmarkCommand', () => {
|
|
|
80
88
|
);
|
|
81
89
|
|
|
82
90
|
await Effect.runPromise(program);
|
|
83
|
-
expect(errors.some((e) => e.includes(
|
|
91
|
+
expect(errors.some((e) => e.includes("Warning") && e.includes("cost"))).toBe(true);
|
|
84
92
|
});
|
|
85
93
|
|
|
86
|
-
it(
|
|
94
|
+
it("uses default strategies when none specified", async () => {
|
|
87
95
|
let usedStrategies: readonly string[] | undefined;
|
|
88
96
|
const deps = makeDeps({
|
|
89
97
|
benchmarkRunner: (opts) =>
|
|
@@ -95,7 +103,7 @@ describe('runBenchmarkCommand', () => {
|
|
|
95
103
|
|
|
96
104
|
const program = runBenchmarkCommand(
|
|
97
105
|
{
|
|
98
|
-
config:
|
|
106
|
+
config: "./config.ts",
|
|
99
107
|
adapter: Option.none(),
|
|
100
108
|
strategies: Option.none(),
|
|
101
109
|
sampleSize: Option.none(),
|
|
@@ -104,10 +112,10 @@ describe('runBenchmarkCommand', () => {
|
|
|
104
112
|
);
|
|
105
113
|
|
|
106
114
|
await Effect.runPromise(program);
|
|
107
|
-
expect(usedStrategies).toEqual([
|
|
115
|
+
expect(usedStrategies).toEqual(["one-shot", "tool-loop-agent"]);
|
|
108
116
|
});
|
|
109
117
|
|
|
110
|
-
it(
|
|
118
|
+
it("uses custom strategies when --strategies is passed", async () => {
|
|
111
119
|
let usedStrategies: readonly string[] | undefined;
|
|
112
120
|
const deps = makeDeps({
|
|
113
121
|
benchmarkRunner: (opts) =>
|
|
@@ -119,26 +127,26 @@ describe('runBenchmarkCommand', () => {
|
|
|
119
127
|
|
|
120
128
|
const program = runBenchmarkCommand(
|
|
121
129
|
{
|
|
122
|
-
config:
|
|
130
|
+
config: "./config.ts",
|
|
123
131
|
adapter: Option.none(),
|
|
124
|
-
strategies: Option.some(
|
|
132
|
+
strategies: Option.some("tool-loop-agent"),
|
|
125
133
|
sampleSize: Option.none(),
|
|
126
134
|
},
|
|
127
135
|
deps,
|
|
128
136
|
);
|
|
129
137
|
|
|
130
138
|
await Effect.runPromise(program);
|
|
131
|
-
expect(usedStrategies).toEqual([
|
|
139
|
+
expect(usedStrategies).toEqual(["tool-loop-agent"]);
|
|
132
140
|
});
|
|
133
141
|
|
|
134
|
-
it(
|
|
142
|
+
it("limits sample size with --sample-size", async () => {
|
|
135
143
|
let usedChunks: readonly TranslationContext[] | undefined;
|
|
136
144
|
const adapter = makeAdapter({
|
|
137
|
-
name:
|
|
138
|
-
sourceMap: { a:
|
|
145
|
+
name: "test",
|
|
146
|
+
sourceMap: { a: "A", b: "B", c: "C" },
|
|
139
147
|
targetMap: {},
|
|
140
148
|
});
|
|
141
|
-
const config = { ...baseConfig, adapters: [adapter] as unknown as DialektConfig[
|
|
149
|
+
const config = { ...baseConfig, adapters: [adapter] as unknown as DialektConfig["adapters"] };
|
|
142
150
|
const deps = makeDeps({
|
|
143
151
|
configLoader: () => Effect.succeed(config),
|
|
144
152
|
benchmarkRunner: (opts) =>
|
|
@@ -148,13 +156,18 @@ describe('runBenchmarkCommand', () => {
|
|
|
148
156
|
}),
|
|
149
157
|
missingKeysComputer: () =>
|
|
150
158
|
Effect.succeed([
|
|
151
|
-
{
|
|
159
|
+
{
|
|
160
|
+
adapter: "test",
|
|
161
|
+
locale: "de",
|
|
162
|
+
resource: { key: "messages", label: "messages" } as ResourceRef,
|
|
163
|
+
missing: ["a", "b", "c"] as readonly string[],
|
|
164
|
+
},
|
|
152
165
|
]),
|
|
153
166
|
});
|
|
154
167
|
|
|
155
168
|
const program = runBenchmarkCommand(
|
|
156
169
|
{
|
|
157
|
-
config:
|
|
170
|
+
config: "./config.ts",
|
|
158
171
|
adapter: Option.none(),
|
|
159
172
|
strategies: Option.none(),
|
|
160
173
|
sampleSize: Option.some(1),
|
|
@@ -166,7 +179,7 @@ describe('runBenchmarkCommand', () => {
|
|
|
166
179
|
expect(usedChunks).toHaveLength(1);
|
|
167
180
|
});
|
|
168
181
|
|
|
169
|
-
it(
|
|
182
|
+
it("outputs JSON report when --format json is passed", async () => {
|
|
170
183
|
const logs: string[] = [];
|
|
171
184
|
const deps = makeDeps({
|
|
172
185
|
logger: (msg) => Effect.sync(() => logs.push(msg)),
|
|
@@ -174,11 +187,11 @@ describe('runBenchmarkCommand', () => {
|
|
|
174
187
|
|
|
175
188
|
const program = runBenchmarkCommand(
|
|
176
189
|
{
|
|
177
|
-
config:
|
|
190
|
+
config: "./config.ts",
|
|
178
191
|
adapter: Option.none(),
|
|
179
192
|
strategies: Option.none(),
|
|
180
193
|
sampleSize: Option.none(),
|
|
181
|
-
format: Option.some(
|
|
194
|
+
format: Option.some("json"),
|
|
182
195
|
},
|
|
183
196
|
deps,
|
|
184
197
|
);
|
|
@@ -187,10 +200,10 @@ describe('runBenchmarkCommand', () => {
|
|
|
187
200
|
expect(logs).toHaveLength(1);
|
|
188
201
|
const parsed = JSON.parse(logs[0]!);
|
|
189
202
|
expect(parsed).toBeInstanceOf(Array);
|
|
190
|
-
expect(parsed[0]).toMatchObject({ strategyName:
|
|
203
|
+
expect(parsed[0]).toMatchObject({ strategyName: "one-shot" });
|
|
191
204
|
});
|
|
192
205
|
|
|
193
|
-
it(
|
|
206
|
+
it("outputs benchmark data by default", async () => {
|
|
194
207
|
const logs: string[] = [];
|
|
195
208
|
const deps = makeDeps({
|
|
196
209
|
logger: (msg) => Effect.sync(() => logs.push(msg)),
|
|
@@ -198,7 +211,7 @@ describe('runBenchmarkCommand', () => {
|
|
|
198
211
|
|
|
199
212
|
const program = runBenchmarkCommand(
|
|
200
213
|
{
|
|
201
|
-
config:
|
|
214
|
+
config: "./config.ts",
|
|
202
215
|
adapter: Option.none(),
|
|
203
216
|
strategies: Option.none(),
|
|
204
217
|
sampleSize: Option.none(),
|
|
@@ -212,11 +225,11 @@ describe('runBenchmarkCommand', () => {
|
|
|
212
225
|
expect(parsed).toBeInstanceOf(Array);
|
|
213
226
|
});
|
|
214
227
|
|
|
215
|
-
it(
|
|
228
|
+
it("filters adapters by --adapter flag", async () => {
|
|
216
229
|
let queriedAdapter: string | undefined;
|
|
217
|
-
const a1 = makeAdapter({ name:
|
|
218
|
-
const a2 = makeAdapter({ name:
|
|
219
|
-
const config = { ...baseConfig, adapters: [a1, a2] as unknown as DialektConfig[
|
|
230
|
+
const a1 = makeAdapter({ name: "a1" });
|
|
231
|
+
const a2 = makeAdapter({ name: "a2" });
|
|
232
|
+
const config = { ...baseConfig, adapters: [a1, a2] as unknown as DialektConfig["adapters"] };
|
|
220
233
|
const deps = makeDeps({
|
|
221
234
|
configLoader: () => Effect.succeed(config),
|
|
222
235
|
missingKeysComputer: (a) =>
|
|
@@ -228,8 +241,8 @@ describe('runBenchmarkCommand', () => {
|
|
|
228
241
|
|
|
229
242
|
const program = runBenchmarkCommand(
|
|
230
243
|
{
|
|
231
|
-
config:
|
|
232
|
-
adapter: Option.some(
|
|
244
|
+
config: "./config.ts",
|
|
245
|
+
adapter: Option.some("a2"),
|
|
233
246
|
strategies: Option.none(),
|
|
234
247
|
sampleSize: Option.none(),
|
|
235
248
|
},
|
|
@@ -237,17 +250,17 @@ describe('runBenchmarkCommand', () => {
|
|
|
237
250
|
);
|
|
238
251
|
|
|
239
252
|
await Effect.runPromise(program);
|
|
240
|
-
expect(queriedAdapter).toBe(
|
|
253
|
+
expect(queriedAdapter).toBe("a2");
|
|
241
254
|
});
|
|
242
255
|
|
|
243
|
-
it(
|
|
256
|
+
it("fails when configLoader fails", async () => {
|
|
244
257
|
const deps = makeDeps({
|
|
245
|
-
configLoader: () => Effect.fail(new Error(
|
|
258
|
+
configLoader: () => Effect.fail(new Error("Config not found")),
|
|
246
259
|
});
|
|
247
260
|
|
|
248
261
|
const program = runBenchmarkCommand(
|
|
249
262
|
{
|
|
250
|
-
config:
|
|
263
|
+
config: "./missing.ts",
|
|
251
264
|
adapter: Option.none(),
|
|
252
265
|
strategies: Option.none(),
|
|
253
266
|
sampleSize: Option.none(),
|
|
@@ -255,17 +268,17 @@ describe('runBenchmarkCommand', () => {
|
|
|
255
268
|
deps,
|
|
256
269
|
);
|
|
257
270
|
|
|
258
|
-
await expect(Effect.runPromise(program)).rejects.toThrow(
|
|
271
|
+
await expect(Effect.runPromise(program)).rejects.toThrow("Config not found");
|
|
259
272
|
});
|
|
260
273
|
|
|
261
|
-
it(
|
|
274
|
+
it("fails when modelResolver fails", async () => {
|
|
262
275
|
const deps = makeDeps({
|
|
263
|
-
modelResolver: () => Effect.fail(new Error(
|
|
276
|
+
modelResolver: () => Effect.fail(new Error("Model unavailable")),
|
|
264
277
|
});
|
|
265
278
|
|
|
266
279
|
const program = runBenchmarkCommand(
|
|
267
280
|
{
|
|
268
|
-
config:
|
|
281
|
+
config: "./config.ts",
|
|
269
282
|
adapter: Option.none(),
|
|
270
283
|
strategies: Option.none(),
|
|
271
284
|
sampleSize: Option.none(),
|
|
@@ -273,17 +286,17 @@ describe('runBenchmarkCommand', () => {
|
|
|
273
286
|
deps,
|
|
274
287
|
);
|
|
275
288
|
|
|
276
|
-
await expect(Effect.runPromise(program)).rejects.toThrow(
|
|
289
|
+
await expect(Effect.runPromise(program)).rejects.toThrow("Model unavailable");
|
|
277
290
|
});
|
|
278
291
|
|
|
279
|
-
it(
|
|
292
|
+
it("fails when benchmarkRunner fails", async () => {
|
|
280
293
|
const deps = makeDeps({
|
|
281
|
-
benchmarkRunner: () => Effect.fail(new Error(
|
|
294
|
+
benchmarkRunner: () => Effect.fail(new Error("Benchmark crashed")),
|
|
282
295
|
});
|
|
283
296
|
|
|
284
297
|
const program = runBenchmarkCommand(
|
|
285
298
|
{
|
|
286
|
-
config:
|
|
299
|
+
config: "./config.ts",
|
|
287
300
|
adapter: Option.none(),
|
|
288
301
|
strategies: Option.none(),
|
|
289
302
|
sampleSize: Option.none(),
|
|
@@ -291,10 +304,10 @@ describe('runBenchmarkCommand', () => {
|
|
|
291
304
|
deps,
|
|
292
305
|
);
|
|
293
306
|
|
|
294
|
-
await expect(Effect.runPromise(program)).rejects.toThrow(
|
|
307
|
+
await expect(Effect.runPromise(program)).rejects.toThrow("Benchmark crashed");
|
|
295
308
|
});
|
|
296
309
|
|
|
297
|
-
it(
|
|
310
|
+
it("handles empty adapter list", async () => {
|
|
298
311
|
const logs: string[] = [];
|
|
299
312
|
const config = { ...baseConfig, adapters: [] };
|
|
300
313
|
const deps = makeDeps({
|
|
@@ -305,7 +318,7 @@ describe('runBenchmarkCommand', () => {
|
|
|
305
318
|
|
|
306
319
|
const program = runBenchmarkCommand(
|
|
307
320
|
{
|
|
308
|
-
config:
|
|
321
|
+
config: "./config.ts",
|
|
309
322
|
adapter: Option.none(),
|
|
310
323
|
strategies: Option.none(),
|
|
311
324
|
sampleSize: Option.none(),
|
|
@@ -319,7 +332,7 @@ describe('runBenchmarkCommand', () => {
|
|
|
319
332
|
expect(parsed).toEqual([]);
|
|
320
333
|
});
|
|
321
334
|
|
|
322
|
-
it(
|
|
335
|
+
it("handles adapter with no missing keys", async () => {
|
|
323
336
|
let usedChunks: readonly TranslationContext[] | undefined;
|
|
324
337
|
const deps = makeDeps({
|
|
325
338
|
missingKeysComputer: () => Effect.succeed([]),
|
|
@@ -332,7 +345,7 @@ describe('runBenchmarkCommand', () => {
|
|
|
332
345
|
|
|
333
346
|
const program = runBenchmarkCommand(
|
|
334
347
|
{
|
|
335
|
-
config:
|
|
348
|
+
config: "./config.ts",
|
|
336
349
|
adapter: Option.none(),
|
|
337
350
|
strategies: Option.none(),
|
|
338
351
|
sampleSize: Option.none(),
|
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
import { Command, Options } from
|
|
2
|
-
import { Effect, Console, Option } from
|
|
3
|
-
import { loadConfig } from
|
|
4
|
-
import { resolveEffectiveConfig } from
|
|
5
|
-
import { resolveModel } from
|
|
6
|
-
import { createOneShotStrategy } from
|
|
7
|
-
import { createToolLoopStrategy } from
|
|
8
|
-
import { chunkKeys } from
|
|
9
|
-
import { computeMissingKeys } from
|
|
10
|
-
import { runBenchmark } from
|
|
11
|
-
import { detectFormat, type OutputFormat } from
|
|
12
|
-
import { formatBenchmark, formatError } from
|
|
13
|
-
import type { DialektConfig } from
|
|
14
|
-
import type { TranslationAdapter, ResourceRef } from
|
|
15
|
-
import type { TranslationStrategy, TranslationContext } from
|
|
16
|
-
import type { StrategyBenchmarkSummary } from
|
|
1
|
+
import { Command, Options } from "@effect/cli";
|
|
2
|
+
import { Effect, Console, Option } from "effect";
|
|
3
|
+
import { loadConfig } from "../../config/load-config.js";
|
|
4
|
+
import { resolveEffectiveConfig } from "../config-resolution.js";
|
|
5
|
+
import { resolveModel } from "../../translation/model-registry.js";
|
|
6
|
+
import { createOneShotStrategy } from "../../translation/one-shot-strategy.js";
|
|
7
|
+
import { createToolLoopStrategy } from "../../translation/tool-loop-strategy.js";
|
|
8
|
+
import { chunkKeys } from "../../translation/chunking.js";
|
|
9
|
+
import { computeMissingKeys } from "../../translation/missing-keys.js";
|
|
10
|
+
import { runBenchmark } from "../../benchmark/runner.js";
|
|
11
|
+
import { detectFormat, type OutputFormat } from "../format.js";
|
|
12
|
+
import { formatBenchmark, formatError } from "../formatters.js";
|
|
13
|
+
import type { DialektConfig } from "../../config/types.js";
|
|
14
|
+
import type { TranslationAdapter, ResourceRef } from "../../adapter/types.js";
|
|
15
|
+
import type { TranslationStrategy, TranslationContext } from "../../translation/types.js";
|
|
16
|
+
import type { StrategyBenchmarkSummary } from "../../benchmark/metrics.js";
|
|
17
17
|
|
|
18
18
|
export interface BenchmarkFlags {
|
|
19
19
|
readonly config: string;
|
|
@@ -25,18 +25,32 @@ export interface BenchmarkFlags {
|
|
|
25
25
|
|
|
26
26
|
export interface BenchmarkDeps {
|
|
27
27
|
readonly configLoader: (path: string) => Effect.Effect<DialektConfig, unknown>;
|
|
28
|
-
readonly modelResolver: (config: {
|
|
28
|
+
readonly modelResolver: (config: {
|
|
29
|
+
provider: string;
|
|
30
|
+
modelId: string;
|
|
31
|
+
}) => Effect.Effect<unknown, unknown>;
|
|
29
32
|
readonly missingKeysComputer: (
|
|
30
33
|
adapter: TranslationAdapter,
|
|
31
34
|
sourceLocale: string,
|
|
32
35
|
targetLocales: readonly string[],
|
|
33
|
-
) => Effect.Effect<
|
|
36
|
+
) => Effect.Effect<
|
|
37
|
+
readonly {
|
|
38
|
+
adapter: string;
|
|
39
|
+
locale: string;
|
|
40
|
+
resource: ResourceRef;
|
|
41
|
+
missing: readonly string[];
|
|
42
|
+
}[],
|
|
43
|
+
unknown
|
|
44
|
+
>;
|
|
34
45
|
readonly benchmarkRunner: (opts: {
|
|
35
46
|
strategies: readonly TranslationStrategy[];
|
|
36
47
|
chunks: readonly TranslationContext[];
|
|
37
48
|
concurrency: number;
|
|
38
49
|
}) => Effect.Effect<readonly StrategyBenchmarkSummary[], unknown>;
|
|
39
|
-
readonly reportFormatter?: (
|
|
50
|
+
readonly reportFormatter?: (
|
|
51
|
+
summaries: readonly StrategyBenchmarkSummary[],
|
|
52
|
+
format: "table" | "json",
|
|
53
|
+
) => string;
|
|
40
54
|
readonly logger: (msg: string) => Effect.Effect<void>;
|
|
41
55
|
readonly errorLogger: (msg: string) => Effect.Effect<void>;
|
|
42
56
|
}
|
|
@@ -48,7 +62,7 @@ export function runBenchmarkCommand(
|
|
|
48
62
|
return Effect.gen(function* () {
|
|
49
63
|
yield* deps.errorLogger(
|
|
50
64
|
formatError(
|
|
51
|
-
|
|
65
|
+
"Warning: This will make real API calls to the configured model provider(s) and may incur cost.",
|
|
52
66
|
detectFormat(
|
|
53
67
|
flags.format !== undefined
|
|
54
68
|
? (Option.getOrUndefined(flags.format) as OutputFormat | undefined)
|
|
@@ -63,14 +77,17 @@ export function runBenchmarkCommand(
|
|
|
63
77
|
loaded,
|
|
64
78
|
);
|
|
65
79
|
|
|
66
|
-
const strategyNames = Option.getOrElse(flags.strategies, () =>
|
|
67
|
-
.split(
|
|
68
|
-
.map((s: string) => s.trim()) as Array<
|
|
80
|
+
const strategyNames = Option.getOrElse(flags.strategies, () => "one-shot,tool-loop-agent")
|
|
81
|
+
.split(",")
|
|
82
|
+
.map((s: string) => s.trim()) as Array<"one-shot" | "tool-loop-agent">;
|
|
69
83
|
|
|
70
|
-
const model = yield* deps.modelResolver(effective.model) as Effect.Effect<
|
|
84
|
+
const model = yield* deps.modelResolver(effective.model) as Effect.Effect<
|
|
85
|
+
import("ai").LanguageModel,
|
|
86
|
+
unknown
|
|
87
|
+
>;
|
|
71
88
|
|
|
72
89
|
const strategyList = strategyNames.map((name) =>
|
|
73
|
-
name ===
|
|
90
|
+
name === "tool-loop-agent"
|
|
74
91
|
? createToolLoopStrategy({ model, retry: effective.retry })
|
|
75
92
|
: createOneShotStrategy({ model, retry: effective.retry }),
|
|
76
93
|
);
|
|
@@ -102,7 +119,10 @@ export function runBenchmarkCommand(
|
|
|
102
119
|
}
|
|
103
120
|
}
|
|
104
121
|
|
|
105
|
-
const sampled = allChunks.slice(
|
|
122
|
+
const sampled = allChunks.slice(
|
|
123
|
+
0,
|
|
124
|
+
Option.getOrElse(flags.sampleSize, () => 20),
|
|
125
|
+
);
|
|
106
126
|
|
|
107
127
|
const summaries = yield* deps.benchmarkRunner({
|
|
108
128
|
strategies: strategyList,
|
|
@@ -130,19 +150,22 @@ export function runBenchmarkCommand(
|
|
|
130
150
|
});
|
|
131
151
|
}
|
|
132
152
|
|
|
133
|
-
export const benchmarkCommand = Command.make(
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
153
|
+
export const benchmarkCommand = Command.make(
|
|
154
|
+
"benchmark",
|
|
155
|
+
{
|
|
156
|
+
config: Options.text("config").pipe(Options.withDefault("./dialekt.config.ts")),
|
|
157
|
+
adapter: Options.optional(Options.text("adapter")),
|
|
158
|
+
strategies: Options.optional(Options.text("strategies")),
|
|
159
|
+
sampleSize: Options.optional(Options.integer("sample-size")),
|
|
160
|
+
format: Options.optional(Options.text("format")),
|
|
161
|
+
},
|
|
162
|
+
(flags) =>
|
|
163
|
+
runBenchmarkCommand(flags, {
|
|
164
|
+
configLoader: loadConfig,
|
|
165
|
+
modelResolver: resolveModel,
|
|
166
|
+
missingKeysComputer: computeMissingKeys as unknown as BenchmarkDeps["missingKeysComputer"],
|
|
167
|
+
benchmarkRunner: runBenchmark,
|
|
168
|
+
logger: (msg: string) => Console.log(msg),
|
|
169
|
+
errorLogger: (msg: string) => Console.error(msg),
|
|
170
|
+
}),
|
|
148
171
|
);
|