@flotorch/loadtest 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/dist/index.js +2163 -0
- package/package.json +45 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,2163 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
5
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
6
|
+
}) : x)(function(x) {
|
|
7
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
8
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
9
|
+
});
|
|
10
|
+
var __esm = (fn, res) => function __init() {
|
|
11
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
12
|
+
};
|
|
13
|
+
var __export = (target, all) => {
|
|
14
|
+
for (var name in all)
|
|
15
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
// node_modules/.pnpm/tsup@8.5.1_tsx@4.21.0_typescript@5.9.3/node_modules/tsup/assets/esm_shims.js
|
|
19
|
+
import path from "path";
|
|
20
|
+
import { fileURLToPath } from "url";
|
|
21
|
+
var init_esm_shims = __esm({
|
|
22
|
+
"node_modules/.pnpm/tsup@8.5.1_tsx@4.21.0_typescript@5.9.3/node_modules/tsup/assets/esm_shims.js"() {
|
|
23
|
+
"use strict";
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
// src/generator/tokenizer.ts
|
|
28
|
+
import { encode } from "gpt-tokenizer";
|
|
29
|
+
function countTokens(text) {
|
|
30
|
+
return encode(text).length;
|
|
31
|
+
}
|
|
32
|
+
var init_tokenizer = __esm({
|
|
33
|
+
"src/generator/tokenizer.ts"() {
|
|
34
|
+
"use strict";
|
|
35
|
+
init_esm_shims();
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// src/generator/file.ts
|
|
40
|
+
var file_exports = {};
|
|
41
|
+
__export(file_exports, {
|
|
42
|
+
FileGenerator: () => FileGenerator
|
|
43
|
+
});
|
|
44
|
+
import { readFileSync as readFileSync3 } from "fs";
|
|
45
|
+
var FileGenerator;
|
|
46
|
+
var init_file = __esm({
|
|
47
|
+
"src/generator/file.ts"() {
|
|
48
|
+
"use strict";
|
|
49
|
+
init_esm_shims();
|
|
50
|
+
init_tokenizer();
|
|
51
|
+
FileGenerator = class {
|
|
52
|
+
records;
|
|
53
|
+
constructor(filePath) {
|
|
54
|
+
const content = readFileSync3(filePath, "utf-8");
|
|
55
|
+
this.records = content.split("\n").filter((line) => line.trim().length > 0).map((line) => {
|
|
56
|
+
const parsed = JSON.parse(line);
|
|
57
|
+
const text = parsed.text ?? parsed.prompt ?? "";
|
|
58
|
+
return {
|
|
59
|
+
text,
|
|
60
|
+
tokenCount: parsed.tokenCount ?? countTokens(text),
|
|
61
|
+
outputTokenTarget: parsed.outputTokenTarget ?? parsed.max_tokens ?? 256
|
|
62
|
+
};
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
generate(count) {
|
|
66
|
+
const results = [];
|
|
67
|
+
for (let i = 0; i < count; i++) {
|
|
68
|
+
results.push(this.records[i % this.records.length]);
|
|
69
|
+
}
|
|
70
|
+
return results;
|
|
71
|
+
}
|
|
72
|
+
generateOne(_targetInputTokens, targetOutputTokens) {
|
|
73
|
+
const record = this.records[Math.floor(Math.random() * this.records.length)];
|
|
74
|
+
return { ...record, outputTokenTarget: targetOutputTokens };
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
// src/cli/ui/components/header.tsx
|
|
81
|
+
import { Box, Text } from "ink";
|
|
82
|
+
import { jsx, jsxs } from "react/jsx-runtime";
|
|
83
|
+
function Header({ modelName, maxConcurrency, streaming, totalTarget }) {
|
|
84
|
+
const targetStr = totalTarget === Infinity ? "\u221E" : String(totalTarget);
|
|
85
|
+
return /* @__PURE__ */ jsxs(Box, { flexDirection: "column", children: [
|
|
86
|
+
/* @__PURE__ */ jsx(Text, { bold: true, color: "cyan", children: "FLOTorch Load Tester" }),
|
|
87
|
+
/* @__PURE__ */ jsxs(Text, { children: [
|
|
88
|
+
/* @__PURE__ */ jsx(Text, { dimColor: true, children: "Model: " }),
|
|
89
|
+
/* @__PURE__ */ jsx(Text, { bold: true, children: modelName }),
|
|
90
|
+
/* @__PURE__ */ jsx(Text, { dimColor: true, children: " Concurrency: " }),
|
|
91
|
+
/* @__PURE__ */ jsx(Text, { bold: true, children: maxConcurrency }),
|
|
92
|
+
/* @__PURE__ */ jsx(Text, { dimColor: true, children: " Streaming: " }),
|
|
93
|
+
/* @__PURE__ */ jsx(Text, { bold: true, children: streaming ? "yes" : "no" }),
|
|
94
|
+
/* @__PURE__ */ jsx(Text, { dimColor: true, children: " Requests: " }),
|
|
95
|
+
/* @__PURE__ */ jsx(Text, { bold: true, children: targetStr })
|
|
96
|
+
] })
|
|
97
|
+
] });
|
|
98
|
+
}
|
|
99
|
+
var init_header = __esm({
|
|
100
|
+
"src/cli/ui/components/header.tsx"() {
|
|
101
|
+
"use strict";
|
|
102
|
+
init_esm_shims();
|
|
103
|
+
}
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// src/cli/ui/components/phase-bar.tsx
|
|
107
|
+
import { Box as Box2, Text as Text2 } from "ink";
|
|
108
|
+
import { jsx as jsx2, jsxs as jsxs2 } from "react/jsx-runtime";
|
|
109
|
+
function PhaseBar({ phase, activeSlots, allowedConcurrency }) {
|
|
110
|
+
const color = phaseColors[phase];
|
|
111
|
+
const barWidth = 20;
|
|
112
|
+
const filled = allowedConcurrency > 0 ? Math.round(activeSlots / allowedConcurrency * barWidth) : 0;
|
|
113
|
+
const empty = barWidth - filled;
|
|
114
|
+
return /* @__PURE__ */ jsx2(Box2, { children: /* @__PURE__ */ jsxs2(Text2, { children: [
|
|
115
|
+
/* @__PURE__ */ jsx2(Text2, { dimColor: true, children: "Phase: " }),
|
|
116
|
+
/* @__PURE__ */ jsx2(Text2, { color, bold: true, children: "\u25CF " }),
|
|
117
|
+
/* @__PURE__ */ jsx2(Text2, { color, children: phaseLabels[phase] }),
|
|
118
|
+
/* @__PURE__ */ jsx2(Text2, { dimColor: true, children: " Slots: " }),
|
|
119
|
+
/* @__PURE__ */ jsx2(Text2, { color: "cyan", children: "\u2588".repeat(filled) }),
|
|
120
|
+
/* @__PURE__ */ jsx2(Text2, { dimColor: true, children: "\u2591".repeat(empty) }),
|
|
121
|
+
/* @__PURE__ */ jsxs2(Text2, { children: [
|
|
122
|
+
" ",
|
|
123
|
+
activeSlots,
|
|
124
|
+
"/",
|
|
125
|
+
allowedConcurrency
|
|
126
|
+
] })
|
|
127
|
+
] }) });
|
|
128
|
+
}
|
|
129
|
+
var phaseColors, phaseLabels;
|
|
130
|
+
var init_phase_bar = __esm({
|
|
131
|
+
"src/cli/ui/components/phase-bar.tsx"() {
|
|
132
|
+
"use strict";
|
|
133
|
+
init_esm_shims();
|
|
134
|
+
phaseColors = {
|
|
135
|
+
"ramp-up": "yellow",
|
|
136
|
+
steady: "green",
|
|
137
|
+
"ramp-down": "magenta"
|
|
138
|
+
};
|
|
139
|
+
phaseLabels = {
|
|
140
|
+
"ramp-up": "Ramp Up",
|
|
141
|
+
steady: "Steady State",
|
|
142
|
+
"ramp-down": "Ramp Down"
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
// src/cli/ui/components/progress-bar.tsx
|
|
148
|
+
import { Box as Box3, Text as Text3 } from "ink";
|
|
149
|
+
import { jsx as jsx3, jsxs as jsxs3 } from "react/jsx-runtime";
|
|
150
|
+
function formatDuration2(seconds) {
|
|
151
|
+
const m = Math.floor(seconds / 60);
|
|
152
|
+
const s = Math.floor(seconds % 60);
|
|
153
|
+
return m > 0 ? `${m}m${s.toString().padStart(2, "0")}s` : `${s}s`;
|
|
154
|
+
}
|
|
155
|
+
function ProgressBar({ completed, totalTarget, elapsedMs, rps }) {
|
|
156
|
+
const barWidth = 30;
|
|
157
|
+
const isInfinite = totalTarget === Infinity || totalTarget <= 0;
|
|
158
|
+
const pct = isInfinite ? 0 : Math.min(1, completed / totalTarget);
|
|
159
|
+
const filled = Math.round(pct * barWidth);
|
|
160
|
+
const empty = barWidth - filled;
|
|
161
|
+
const elapsedSec = elapsedMs / 1e3;
|
|
162
|
+
let eta = "";
|
|
163
|
+
if (!isInfinite && rps > 0) {
|
|
164
|
+
const remaining = (totalTarget - completed) / rps;
|
|
165
|
+
eta = `ETA: ${formatDuration2(remaining)}`;
|
|
166
|
+
}
|
|
167
|
+
const targetStr = isInfinite ? "?" : String(totalTarget);
|
|
168
|
+
const pctStr = isInfinite ? "" : ` (${(pct * 100).toFixed(1)}%)`;
|
|
169
|
+
return /* @__PURE__ */ jsxs3(Box3, { flexDirection: "column", children: [
|
|
170
|
+
/* @__PURE__ */ jsxs3(Text3, { children: [
|
|
171
|
+
/* @__PURE__ */ jsx3(Text3, { dimColor: true, children: "Progress " }),
|
|
172
|
+
/* @__PURE__ */ jsx3(Text3, { color: "green", children: "\u2588".repeat(filled) }),
|
|
173
|
+
/* @__PURE__ */ jsx3(Text3, { dimColor: true, children: "\u2591".repeat(empty) }),
|
|
174
|
+
/* @__PURE__ */ jsxs3(Text3, { children: [
|
|
175
|
+
" ",
|
|
176
|
+
completed,
|
|
177
|
+
"/",
|
|
178
|
+
targetStr,
|
|
179
|
+
pctStr
|
|
180
|
+
] })
|
|
181
|
+
] }),
|
|
182
|
+
/* @__PURE__ */ jsxs3(Text3, { dimColor: true, children: [
|
|
183
|
+
"Elapsed: ",
|
|
184
|
+
formatDuration2(elapsedSec),
|
|
185
|
+
" ",
|
|
186
|
+
rps.toFixed(1),
|
|
187
|
+
" req/s",
|
|
188
|
+
" ",
|
|
189
|
+
eta
|
|
190
|
+
] })
|
|
191
|
+
] });
|
|
192
|
+
}
|
|
193
|
+
var init_progress_bar = __esm({
|
|
194
|
+
"src/cli/ui/components/progress-bar.tsx"() {
|
|
195
|
+
"use strict";
|
|
196
|
+
init_esm_shims();
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
// src/cli/ui/components/stats-panel.tsx
|
|
201
|
+
import { Box as Box4, Text as Text4 } from "ink";
|
|
202
|
+
import { jsx as jsx4, jsxs as jsxs4 } from "react/jsx-runtime";
|
|
203
|
+
function percentile(sorted, p) {
|
|
204
|
+
if (sorted.length === 0) return 0;
|
|
205
|
+
const idx = Math.ceil(p / 100 * sorted.length) - 1;
|
|
206
|
+
return sorted[Math.max(0, idx)];
|
|
207
|
+
}
|
|
208
|
+
function mean2(arr) {
|
|
209
|
+
if (arr.length === 0) return 0;
|
|
210
|
+
let sum = 0;
|
|
211
|
+
for (const v of arr) sum += v;
|
|
212
|
+
return sum / arr.length;
|
|
213
|
+
}
|
|
214
|
+
function fmtMs(ms) {
|
|
215
|
+
if (ms >= 1e3) return `${(ms / 1e3).toFixed(1)}s`;
|
|
216
|
+
return `${Math.round(ms)}ms`;
|
|
217
|
+
}
|
|
218
|
+
function StatsPanel({
|
|
219
|
+
rps,
|
|
220
|
+
outputTps,
|
|
221
|
+
inputTps,
|
|
222
|
+
recentTtft,
|
|
223
|
+
recentE2eLatency,
|
|
224
|
+
errors,
|
|
225
|
+
completed
|
|
226
|
+
}) {
|
|
227
|
+
const sortedTtft = [...recentTtft].sort((a, b) => a - b);
|
|
228
|
+
const sortedE2e = [...recentE2eLatency].sort((a, b) => a - b);
|
|
229
|
+
const errRate = completed > 0 ? (errors / completed * 100).toFixed(1) : "0.0";
|
|
230
|
+
const labelW = 14;
|
|
231
|
+
const valW = 10;
|
|
232
|
+
return /* @__PURE__ */ jsxs4(Box4, { flexDirection: "column", children: [
|
|
233
|
+
/* @__PURE__ */ jsx4(Text4, { bold: true, dimColor: true, children: "Live Stats" }),
|
|
234
|
+
/* @__PURE__ */ jsxs4(Box4, { children: [
|
|
235
|
+
/* @__PURE__ */ jsx4(Box4, { width: labelW, children: /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "Requests/s" }) }),
|
|
236
|
+
/* @__PURE__ */ jsx4(Box4, { width: valW, children: /* @__PURE__ */ jsx4(Text4, { bold: true, children: rps.toFixed(1) }) }),
|
|
237
|
+
/* @__PURE__ */ jsx4(Box4, { width: labelW, children: /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "Out tok/s" }) }),
|
|
238
|
+
/* @__PURE__ */ jsx4(Box4, { width: valW, children: /* @__PURE__ */ jsx4(Text4, { bold: true, children: outputTps.toFixed(0) }) }),
|
|
239
|
+
/* @__PURE__ */ jsx4(Box4, { width: labelW, children: /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "In tok/s" }) }),
|
|
240
|
+
/* @__PURE__ */ jsx4(Box4, { width: valW, children: /* @__PURE__ */ jsx4(Text4, { bold: true, children: inputTps.toFixed(0) }) })
|
|
241
|
+
] }),
|
|
242
|
+
/* @__PURE__ */ jsxs4(Box4, { children: [
|
|
243
|
+
/* @__PURE__ */ jsx4(Box4, { width: labelW, children: /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "TTFT" }) }),
|
|
244
|
+
/* @__PURE__ */ jsxs4(Text4, { children: [
|
|
245
|
+
/* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "mean=" }),
|
|
246
|
+
/* @__PURE__ */ jsx4(Text4, { children: fmtMs(mean2(sortedTtft)) }),
|
|
247
|
+
/* @__PURE__ */ jsx4(Text4, { dimColor: true, children: " p50=" }),
|
|
248
|
+
/* @__PURE__ */ jsx4(Text4, { children: fmtMs(percentile(sortedTtft, 50)) }),
|
|
249
|
+
/* @__PURE__ */ jsx4(Text4, { dimColor: true, children: " p95=" }),
|
|
250
|
+
/* @__PURE__ */ jsx4(Text4, { children: fmtMs(percentile(sortedTtft, 95)) })
|
|
251
|
+
] })
|
|
252
|
+
] }),
|
|
253
|
+
/* @__PURE__ */ jsxs4(Box4, { children: [
|
|
254
|
+
/* @__PURE__ */ jsx4(Box4, { width: labelW, children: /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "E2E" }) }),
|
|
255
|
+
/* @__PURE__ */ jsxs4(Text4, { children: [
|
|
256
|
+
/* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "mean=" }),
|
|
257
|
+
/* @__PURE__ */ jsx4(Text4, { children: fmtMs(mean2(sortedE2e)) }),
|
|
258
|
+
/* @__PURE__ */ jsx4(Text4, { dimColor: true, children: " p50=" }),
|
|
259
|
+
/* @__PURE__ */ jsx4(Text4, { children: fmtMs(percentile(sortedE2e, 50)) }),
|
|
260
|
+
/* @__PURE__ */ jsx4(Text4, { dimColor: true, children: " p95=" }),
|
|
261
|
+
/* @__PURE__ */ jsx4(Text4, { children: fmtMs(percentile(sortedE2e, 95)) })
|
|
262
|
+
] })
|
|
263
|
+
] }),
|
|
264
|
+
/* @__PURE__ */ jsxs4(Box4, { children: [
|
|
265
|
+
/* @__PURE__ */ jsx4(Box4, { width: labelW, children: /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "Errors" }) }),
|
|
266
|
+
/* @__PURE__ */ jsxs4(Text4, { color: errors > 0 ? "red" : void 0, bold: errors > 0, children: [
|
|
267
|
+
errors,
|
|
268
|
+
" (",
|
|
269
|
+
errRate,
|
|
270
|
+
"%)"
|
|
271
|
+
] })
|
|
272
|
+
] })
|
|
273
|
+
] });
|
|
274
|
+
}
|
|
275
|
+
var init_stats_panel = __esm({
|
|
276
|
+
"src/cli/ui/components/stats-panel.tsx"() {
|
|
277
|
+
"use strict";
|
|
278
|
+
init_esm_shims();
|
|
279
|
+
}
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
// src/cli/ui/components/error-panel.tsx
|
|
283
|
+
import { Box as Box5, Text as Text5 } from "ink";
|
|
284
|
+
import { jsx as jsx5, jsxs as jsxs5 } from "react/jsx-runtime";
|
|
285
|
+
function ErrorPanel({ recentErrors }) {
|
|
286
|
+
if (recentErrors.length === 0) return null;
|
|
287
|
+
return /* @__PURE__ */ jsxs5(Box5, { flexDirection: "column", marginTop: 1, children: [
|
|
288
|
+
/* @__PURE__ */ jsx5(Text5, { color: "red", bold: true, children: "Recent Errors:" }),
|
|
289
|
+
recentErrors.map((err, i) => /* @__PURE__ */ jsxs5(Text5, { color: "red", dimColor: true, children: [
|
|
290
|
+
" ",
|
|
291
|
+
" ",
|
|
292
|
+
err.length > 80 ? err.slice(0, 77) + "..." : err
|
|
293
|
+
] }, i))
|
|
294
|
+
] });
|
|
295
|
+
}
|
|
296
|
+
var init_error_panel = __esm({
|
|
297
|
+
"src/cli/ui/components/error-panel.tsx"() {
|
|
298
|
+
"use strict";
|
|
299
|
+
init_esm_shims();
|
|
300
|
+
}
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
// src/cli/ui/app.tsx
|
|
304
|
+
var app_exports = {};
|
|
305
|
+
__export(app_exports, {
|
|
306
|
+
App: () => App
|
|
307
|
+
});
|
|
308
|
+
import { useState, useEffect } from "react";
|
|
309
|
+
import { Box as Box6 } from "ink";
|
|
310
|
+
import { jsx as jsx6, jsxs as jsxs6 } from "react/jsx-runtime";
|
|
311
|
+
function App({ store }) {
|
|
312
|
+
const [snap, setSnap] = useState(() => store.snapshot());
|
|
313
|
+
useEffect(() => {
|
|
314
|
+
const id = setInterval(() => {
|
|
315
|
+
setSnap(store.snapshot());
|
|
316
|
+
}, 200);
|
|
317
|
+
return () => clearInterval(id);
|
|
318
|
+
}, [store]);
|
|
319
|
+
const elapsedMs = snap.startTime > 0 ? performance.now() - snap.startTime : 0;
|
|
320
|
+
const elapsedSec = elapsedMs / 1e3;
|
|
321
|
+
const rps = elapsedSec > 0 ? snap.completed / elapsedSec : 0;
|
|
322
|
+
const outputTps = elapsedSec > 0 ? snap.totalOutputTokens / elapsedSec : 0;
|
|
323
|
+
const inputTps = elapsedSec > 0 ? snap.totalInputTokens / elapsedSec : 0;
|
|
324
|
+
return /* @__PURE__ */ jsxs6(Box6, { flexDirection: "column", paddingLeft: 2, children: [
|
|
325
|
+
/* @__PURE__ */ jsx6(
|
|
326
|
+
Header,
|
|
327
|
+
{
|
|
328
|
+
modelName: snap.modelName,
|
|
329
|
+
maxConcurrency: snap.maxConcurrency,
|
|
330
|
+
streaming: snap.streaming,
|
|
331
|
+
totalTarget: snap.totalTarget
|
|
332
|
+
}
|
|
333
|
+
),
|
|
334
|
+
/* @__PURE__ */ jsx6(Box6, { marginTop: 1, children: /* @__PURE__ */ jsx6(
|
|
335
|
+
PhaseBar,
|
|
336
|
+
{
|
|
337
|
+
phase: snap.phase,
|
|
338
|
+
activeSlots: snap.activeSlots,
|
|
339
|
+
allowedConcurrency: snap.allowedConcurrency
|
|
340
|
+
}
|
|
341
|
+
) }),
|
|
342
|
+
/* @__PURE__ */ jsx6(Box6, { marginTop: 1, flexDirection: "column", children: /* @__PURE__ */ jsx6(
|
|
343
|
+
ProgressBar,
|
|
344
|
+
{
|
|
345
|
+
completed: snap.completed,
|
|
346
|
+
totalTarget: snap.totalTarget,
|
|
347
|
+
elapsedMs,
|
|
348
|
+
rps
|
|
349
|
+
}
|
|
350
|
+
) }),
|
|
351
|
+
/* @__PURE__ */ jsx6(Box6, { marginTop: 1, children: /* @__PURE__ */ jsx6(
|
|
352
|
+
StatsPanel,
|
|
353
|
+
{
|
|
354
|
+
rps,
|
|
355
|
+
outputTps,
|
|
356
|
+
inputTps,
|
|
357
|
+
recentTtft: snap.recentTtft,
|
|
358
|
+
recentE2eLatency: snap.recentE2eLatency,
|
|
359
|
+
errors: snap.errors,
|
|
360
|
+
completed: snap.completed
|
|
361
|
+
}
|
|
362
|
+
) }),
|
|
363
|
+
/* @__PURE__ */ jsx6(ErrorPanel, { recentErrors: snap.recentErrors })
|
|
364
|
+
] });
|
|
365
|
+
}
|
|
366
|
+
var init_app = __esm({
|
|
367
|
+
"src/cli/ui/app.tsx"() {
|
|
368
|
+
"use strict";
|
|
369
|
+
init_esm_shims();
|
|
370
|
+
init_header();
|
|
371
|
+
init_phase_bar();
|
|
372
|
+
init_progress_bar();
|
|
373
|
+
init_stats_panel();
|
|
374
|
+
init_error_panel();
|
|
375
|
+
}
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
// index.ts
|
|
379
|
+
init_esm_shims();
|
|
380
|
+
|
|
381
|
+
// src/cli/args.ts
|
|
382
|
+
init_esm_shims();
|
|
383
|
+
import { parseArgs } from "util";
|
|
384
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
|
|
385
|
+
import { join } from "path";
|
|
386
|
+
|
|
387
|
+
// src/schemas/config.zod.ts
|
|
388
|
+
init_esm_shims();
|
|
389
|
+
import { z } from "zod";
|
|
390
|
+
var rampSchema = z.object({
|
|
391
|
+
requests: z.number().optional(),
|
|
392
|
+
duration: z.number().optional()
|
|
393
|
+
}).refine((d) => d.requests || d.duration, {
|
|
394
|
+
message: "At least one of requests or duration required"
|
|
395
|
+
});
|
|
396
|
+
var ConfigSchema = z.object({
|
|
397
|
+
generator: z.object({
|
|
398
|
+
enabled: z.boolean().default(false),
|
|
399
|
+
prompt: z.string().optional(),
|
|
400
|
+
corpus: z.string().optional()
|
|
401
|
+
}).default(() => ({ enabled: false })),
|
|
402
|
+
benchmark: z.object({
|
|
403
|
+
inputFile: z.string().optional(),
|
|
404
|
+
outputDir: z.string().default("./results"),
|
|
405
|
+
inputTokens: z.object({
|
|
406
|
+
mean: z.number(),
|
|
407
|
+
stddev: z.number().optional()
|
|
408
|
+
}),
|
|
409
|
+
outputTokens: z.object({
|
|
410
|
+
mean: z.number(),
|
|
411
|
+
stddev: z.number().optional()
|
|
412
|
+
}),
|
|
413
|
+
maxRequests: z.number().optional(),
|
|
414
|
+
maxDuration: z.number().optional(),
|
|
415
|
+
timeout: z.number().default(600),
|
|
416
|
+
concurrency: z.number(),
|
|
417
|
+
rampUp: rampSchema.optional(),
|
|
418
|
+
rampDown: rampSchema.optional(),
|
|
419
|
+
cachePercentage: z.number().min(0).max(100).default(0),
|
|
420
|
+
streaming: z.boolean().default(true)
|
|
421
|
+
}).refine((d) => d.maxRequests || d.maxDuration, {
|
|
422
|
+
message: "At least one of maxRequests or maxDuration required"
|
|
423
|
+
}),
|
|
424
|
+
provider: z.object({
|
|
425
|
+
adapter: z.enum(["openai", "sagemaker"]).default("openai"),
|
|
426
|
+
model: z.string(),
|
|
427
|
+
baseURL: z.string().optional(),
|
|
428
|
+
systemPrompt: z.string().optional(),
|
|
429
|
+
config: z.record(z.string(), z.any()).optional()
|
|
430
|
+
}),
|
|
431
|
+
reporter: z.object({
|
|
432
|
+
adapters: z.array(z.enum(["json", "csv"])).default(["json"])
|
|
433
|
+
}).default(() => ({ adapters: ["json"] }))
|
|
434
|
+
});
|
|
435
|
+
function validateEnv(schema, adapterName) {
|
|
436
|
+
const result = schema.safeParse(process.env);
|
|
437
|
+
if (!result.success) {
|
|
438
|
+
const errors = result.error.issues.map((i) => ` ${i.path.join(".")}: ${i.message}`).join("\n");
|
|
439
|
+
throw new Error(`Missing/invalid env vars for "${adapterName}" backend:
|
|
440
|
+
${errors}`);
|
|
441
|
+
}
|
|
442
|
+
return result.data;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// src/cli/ansi.ts
|
|
446
|
+
init_esm_shims();
|
|
447
|
+
var enabled = process.env.NO_COLOR === void 0 && process.env.TERM !== "dumb" && (process.stdout.isTTY ?? false);
|
|
448
|
+
var wrap = (code, close) => enabled ? (s) => `\x1B[${code}m${s}\x1B[${close}m` : (s) => s;
|
|
449
|
+
var bold = wrap("1", "22");
|
|
450
|
+
var dim = wrap("2", "22");
|
|
451
|
+
var red = wrap("31", "39");
|
|
452
|
+
var green = wrap("32", "39");
|
|
453
|
+
var yellow = wrap("33", "39");
|
|
454
|
+
var cyan = wrap("36", "39");
|
|
455
|
+
var magenta = wrap("35", "39");
|
|
456
|
+
|
|
457
|
+
// src/cli/args.ts
|
|
458
|
+
var VALID_COMMANDS = /* @__PURE__ */ new Set(["run", "generate", "bench", "report", "init"]);
|
|
459
|
+
var HELP_TEXT = `
|
|
460
|
+
${bold("FLOTorch Load Tester")}
|
|
461
|
+
|
|
462
|
+
${yellow("USAGE")}
|
|
463
|
+
flotorch ${dim("<command>")} ${dim("[options]")}
|
|
464
|
+
|
|
465
|
+
${yellow("COMMANDS")}
|
|
466
|
+
${cyan("run")} ${dim("Run full pipeline: generate \u2192 bench \u2192 report (default)")}
|
|
467
|
+
${cyan("generate")} ${dim("Generate prompts only")}
|
|
468
|
+
${cyan("bench")} ${dim("Run benchmark using existing prompts")}
|
|
469
|
+
${cyan("report")} ${dim("Generate report from existing results")}
|
|
470
|
+
${cyan("init")} ${dim("[path]")} ${dim("Interactively create a config file (default: config.json)")}
|
|
471
|
+
|
|
472
|
+
${yellow("OPTIONS")}
|
|
473
|
+
${cyan("-c, --config")} ${dim("<path>")} Path to config JSON ${dim("(required for run/generate/bench/report)")}
|
|
474
|
+
${cyan("--run-id")} ${dim("<id>")} Custom run ID ${dim("(default: ISO timestamp)")}
|
|
475
|
+
${cyan("-m, --model")} ${dim("<name>")} Override provider.model
|
|
476
|
+
${cyan("-n, --concurrency")} ${dim("<n>")} Override benchmark.concurrency
|
|
477
|
+
${cyan("--max-requests")} ${dim("<n>")} Override benchmark.maxRequests
|
|
478
|
+
${cyan("--max-duration")} ${dim("<n>")} Override benchmark.maxDuration ${dim("(seconds)")}
|
|
479
|
+
${cyan("-o, --output-dir")} ${dim("<p>")} Override benchmark.outputDir
|
|
480
|
+
${cyan("--base-url")} ${dim("<url>")} Override provider.baseURL
|
|
481
|
+
${cyan("--streaming")} Enable streaming
|
|
482
|
+
${cyan("--no-streaming")} Disable streaming
|
|
483
|
+
${cyan("-h, --help")} Show this help message
|
|
484
|
+
`.trimStart();
|
|
485
|
+
function parseCliArgs(argv) {
|
|
486
|
+
const args = argv.slice(2);
|
|
487
|
+
let command = "run";
|
|
488
|
+
if (args.length > 0 && VALID_COMMANDS.has(args[0])) {
|
|
489
|
+
command = args.shift();
|
|
490
|
+
}
|
|
491
|
+
const { values, positionals } = parseArgs({
|
|
492
|
+
args,
|
|
493
|
+
options: {
|
|
494
|
+
config: { type: "string", short: "c" },
|
|
495
|
+
"run-id": { type: "string" },
|
|
496
|
+
model: { type: "string", short: "m" },
|
|
497
|
+
concurrency: { type: "string", short: "n" },
|
|
498
|
+
"max-requests": { type: "string" },
|
|
499
|
+
"max-duration": { type: "string" },
|
|
500
|
+
"output-dir": { type: "string", short: "o" },
|
|
501
|
+
"base-url": { type: "string" },
|
|
502
|
+
streaming: { type: "boolean" },
|
|
503
|
+
"no-streaming": { type: "boolean" },
|
|
504
|
+
help: { type: "boolean", short: "h" }
|
|
505
|
+
},
|
|
506
|
+
allowPositionals: true
|
|
507
|
+
});
|
|
508
|
+
if (values.help) {
|
|
509
|
+
console.log(HELP_TEXT);
|
|
510
|
+
process.exit(0);
|
|
511
|
+
}
|
|
512
|
+
if (command === "init") {
|
|
513
|
+
const initOutputPath = positionals[0] ?? "config.json";
|
|
514
|
+
return { command, configPath: "", runId: "", overrides: {}, initOutputPath };
|
|
515
|
+
}
|
|
516
|
+
const configPath = values.config;
|
|
517
|
+
if (!configPath) {
|
|
518
|
+
throw new Error("--config / -c is required");
|
|
519
|
+
}
|
|
520
|
+
const runId = values["run-id"] ?? (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
521
|
+
const overrides = collectOverrides(values);
|
|
522
|
+
return { command, configPath, runId, overrides };
|
|
523
|
+
}
|
|
524
|
+
function resolveConfig(configPath, runId, cliOverrides) {
|
|
525
|
+
if (!existsSync(configPath)) {
|
|
526
|
+
throw new Error(`Config file not found: ${configPath}`);
|
|
527
|
+
}
|
|
528
|
+
const rawConfig = JSON.parse(readFileSync(configPath, "utf-8"));
|
|
529
|
+
const benchRaw = rawConfig.benchmark ?? {};
|
|
530
|
+
const baseOutputDir = benchRaw.outputDir ?? "./results";
|
|
531
|
+
const outputDir = join(baseOutputDir, runId);
|
|
532
|
+
const savedOverrides = loadSavedOverrides(outputDir);
|
|
533
|
+
const merged = deepMerge(rawConfig, savedOverrides, cliOverrides);
|
|
534
|
+
const combinedOverrides = deepMerge(savedOverrides, cliOverrides);
|
|
535
|
+
mkdirSync(outputDir, { recursive: true });
|
|
536
|
+
saveOverrides(outputDir, combinedOverrides);
|
|
537
|
+
const result = ConfigSchema.safeParse(merged);
|
|
538
|
+
if (!result.success) {
|
|
539
|
+
const errors = result.error.issues.map((i) => ` ${i.path.join(".")}: ${i.message}`).join("\n");
|
|
540
|
+
throw new Error(`Invalid configuration:
|
|
541
|
+
${errors}`);
|
|
542
|
+
}
|
|
543
|
+
writeFileSync(
|
|
544
|
+
join(outputDir, "config.resolved.json"),
|
|
545
|
+
JSON.stringify(result.data, null, 2) + "\n"
|
|
546
|
+
);
|
|
547
|
+
return { config: result.data, outputDir };
|
|
548
|
+
}
|
|
549
|
+
function collectOverrides(values) {
|
|
550
|
+
const overrides = {};
|
|
551
|
+
const benchmark = {};
|
|
552
|
+
const provider = {};
|
|
553
|
+
if (values.model) provider.model = values.model;
|
|
554
|
+
if (values["base-url"]) provider.baseURL = values["base-url"];
|
|
555
|
+
if (values.concurrency) benchmark.concurrency = Number(values.concurrency);
|
|
556
|
+
if (values["max-requests"]) benchmark.maxRequests = Number(values["max-requests"]);
|
|
557
|
+
if (values["max-duration"]) benchmark.maxDuration = Number(values["max-duration"]);
|
|
558
|
+
if (values["output-dir"]) benchmark.outputDir = values["output-dir"];
|
|
559
|
+
if (values.streaming === true) benchmark.streaming = true;
|
|
560
|
+
if (values["no-streaming"] === true) benchmark.streaming = false;
|
|
561
|
+
if (Object.keys(provider).length > 0) overrides.provider = provider;
|
|
562
|
+
if (Object.keys(benchmark).length > 0) overrides.benchmark = benchmark;
|
|
563
|
+
return overrides;
|
|
564
|
+
}
|
|
565
|
+
function loadSavedOverrides(outputDir) {
|
|
566
|
+
const path2 = join(outputDir, "overrides.json");
|
|
567
|
+
if (!existsSync(path2)) return {};
|
|
568
|
+
return JSON.parse(readFileSync(path2, "utf-8"));
|
|
569
|
+
}
|
|
570
|
+
function saveOverrides(outputDir, overrides) {
|
|
571
|
+
if (Object.keys(overrides).length === 0) return;
|
|
572
|
+
writeFileSync(join(outputDir, "overrides.json"), JSON.stringify(overrides, null, 2) + "\n");
|
|
573
|
+
}
|
|
574
|
+
function isPlainObject(val) {
|
|
575
|
+
return typeof val === "object" && val !== null && !Array.isArray(val);
|
|
576
|
+
}
|
|
577
|
+
function deepMerge(...sources) {
|
|
578
|
+
const result = {};
|
|
579
|
+
for (const source of sources) {
|
|
580
|
+
for (const key of Object.keys(source)) {
|
|
581
|
+
if (isPlainObject(result[key]) && isPlainObject(source[key])) {
|
|
582
|
+
result[key] = deepMerge(
|
|
583
|
+
result[key],
|
|
584
|
+
source[key]
|
|
585
|
+
);
|
|
586
|
+
} else {
|
|
587
|
+
result[key] = source[key];
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
return result;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
// src/cli/init.ts
|
|
595
|
+
init_esm_shims();
|
|
596
|
+
import { createInterface } from "readline/promises";
|
|
597
|
+
import { writeFileSync as writeFileSync2, existsSync as existsSync2 } from "fs";
|
|
598
|
+
async function prompt(rl, question, defaultValue) {
|
|
599
|
+
const suffix = defaultValue != null ? ` (${defaultValue})` : "";
|
|
600
|
+
const answer = (await rl.question(`${question}${suffix}: `)).trim();
|
|
601
|
+
return answer || defaultValue || "";
|
|
602
|
+
}
|
|
603
|
+
async function runInit(outputPath) {
|
|
604
|
+
if (existsSync2(outputPath)) {
|
|
605
|
+
const rl2 = createInterface({ input: process.stdin, output: process.stdout });
|
|
606
|
+
const overwrite = await prompt(rl2, `${outputPath} already exists. Overwrite? [y/N]`, "n");
|
|
607
|
+
if (overwrite.toLowerCase() !== "y") {
|
|
608
|
+
rl2.close();
|
|
609
|
+
console.log("Aborted.");
|
|
610
|
+
return;
|
|
611
|
+
}
|
|
612
|
+
rl2.close();
|
|
613
|
+
}
|
|
614
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
615
|
+
console.log("\nFLOTorch Load Tester \u2014 Config Generator\n");
|
|
616
|
+
const adapter = await prompt(rl, "Provider adapter [openai/sagemaker]", "openai");
|
|
617
|
+
const model = await prompt(rl, "Model name (required)");
|
|
618
|
+
if (!model) {
|
|
619
|
+
rl.close();
|
|
620
|
+
throw new Error("Model name is required");
|
|
621
|
+
}
|
|
622
|
+
let baseURL;
|
|
623
|
+
if (adapter === "openai") {
|
|
624
|
+
const url = await prompt(rl, "Base URL", "https://api.openai.com/v1");
|
|
625
|
+
if (url !== "https://api.openai.com/v1") {
|
|
626
|
+
baseURL = url;
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
const concurrency = Number(await prompt(rl, "Concurrency", "10"));
|
|
630
|
+
const inputMean = Number(await prompt(rl, "Input tokens mean", "512"));
|
|
631
|
+
const outputMean = Number(await prompt(rl, "Output tokens mean", "256"));
|
|
632
|
+
const maxRequests = Number(await prompt(rl, "Max requests", "100"));
|
|
633
|
+
const streamingAnswer = await prompt(rl, "Streaming? [y/n]", "y");
|
|
634
|
+
const streaming = streamingAnswer.toLowerCase() === "y";
|
|
635
|
+
rl.close();
|
|
636
|
+
const config = {
|
|
637
|
+
provider: {
|
|
638
|
+
adapter,
|
|
639
|
+
model,
|
|
640
|
+
...baseURL && { baseURL }
|
|
641
|
+
},
|
|
642
|
+
benchmark: {
|
|
643
|
+
concurrency,
|
|
644
|
+
inputTokens: { mean: inputMean },
|
|
645
|
+
outputTokens: { mean: outputMean },
|
|
646
|
+
maxRequests,
|
|
647
|
+
streaming,
|
|
648
|
+
outputDir: "./results",
|
|
649
|
+
timeout: 600,
|
|
650
|
+
cachePercentage: 0
|
|
651
|
+
},
|
|
652
|
+
generator: {
|
|
653
|
+
enabled: false
|
|
654
|
+
},
|
|
655
|
+
reporter: {
|
|
656
|
+
adapters: ["json"]
|
|
657
|
+
}
|
|
658
|
+
};
|
|
659
|
+
writeFileSync2(outputPath, JSON.stringify(config, null, 2) + "\n");
|
|
660
|
+
console.log(`
|
|
661
|
+
Config written to ${outputPath}`);
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
// src/generator/generator.ts
|
|
665
|
+
init_esm_shims();
|
|
666
|
+
|
|
667
|
+
// src/generator/synthetic.ts
|
|
668
|
+
init_esm_shims();
|
|
669
|
+
init_tokenizer();
|
|
670
|
+
|
|
671
|
+
// src/utils/random.ts
|
|
672
|
+
init_esm_shims();
|
|
673
|
+
function gaussianRandom(mean3, stddev2) {
|
|
674
|
+
let u1 = Math.random();
|
|
675
|
+
const u2 = Math.random();
|
|
676
|
+
while (u1 === 0) u1 = Math.random();
|
|
677
|
+
const z4 = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
|
|
678
|
+
return mean3 + z4 * stddev2;
|
|
679
|
+
}
|
|
680
|
+
function clampedGaussian(mean3, stddev2, min, max) {
|
|
681
|
+
const value = gaussianRandom(mean3, stddev2);
|
|
682
|
+
return Math.max(min, Math.min(max, Math.round(value)));
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
// src/generator/synthetic.ts
|
|
686
|
+
import { readFileSync as readFileSync2 } from "fs";
|
|
687
|
+
|
|
688
|
+
// src/generator/corpus/default.ts
|
|
689
|
+
init_esm_shims();
|
|
690
|
+
var default_default = `Shall I compare thee to a summer's day?
|
|
691
|
+
Thou art more lovely and more temperate:
|
|
692
|
+
Rough winds do shake the darling buds of May,
|
|
693
|
+
And summer's lease hath all too short a date:
|
|
694
|
+
Sometime too hot the eye of heaven shines,
|
|
695
|
+
And often is his gold complexion dimm'd;
|
|
696
|
+
And every fair from fair sometime declines,
|
|
697
|
+
By chance or nature's changing course untrimm'd;
|
|
698
|
+
But thy eternal summer shall not fade
|
|
699
|
+
Nor lose possession of that fair thou owest;
|
|
700
|
+
Nor shall Death brag thou wander'st in his shade,
|
|
701
|
+
When in eternal lines to time thou growest:
|
|
702
|
+
So long as men can breathe or eyes can see,
|
|
703
|
+
So long lives this and this gives life to thee.
|
|
704
|
+
Then let not winter's ragged hand deface
|
|
705
|
+
In thee thy summer, ere thou be distill'd:
|
|
706
|
+
Make sweet some vial; treasure thou some place
|
|
707
|
+
With beauty's treasure, ere it be self-kill'd.
|
|
708
|
+
That use is not forbidden usury,
|
|
709
|
+
Which happies those that pay the willing loan;
|
|
710
|
+
That's for thyself to breed another thee,
|
|
711
|
+
Or ten times happier, be it ten for one;
|
|
712
|
+
Ten times thyself were happier than thou art,
|
|
713
|
+
If ten of thine ten times refigured thee:
|
|
714
|
+
Then what could death do, if thou shouldst depart,
|
|
715
|
+
Leaving thee living in posterity?
|
|
716
|
+
Be not self-will'd, for thou art much too fair
|
|
717
|
+
To be death's conquest and make worms thine heir.
|
|
718
|
+
Where art thou, Muse, that thou forget'st so long
|
|
719
|
+
To speak of that which gives thee all thy might?
|
|
720
|
+
Spend'st thou thy fury on some worthless song,
|
|
721
|
+
Darkening thy power to lend base subjects light?
|
|
722
|
+
Return, forgetful Muse, and straight redeem
|
|
723
|
+
In gentle numbers time so idly spent;
|
|
724
|
+
Sing to the ear that doth thy lays esteem
|
|
725
|
+
And gives thy pen both skill and argument.
|
|
726
|
+
Rise, resty Muse, my love's sweet face survey,
|
|
727
|
+
If Time have any wrinkle graven there;
|
|
728
|
+
If any, be a satire to decay,
|
|
729
|
+
And make Time's spoils despised every where.
|
|
730
|
+
Give my love fame faster than Time wastes life;
|
|
731
|
+
So thou prevent'st his scythe and crooked knife.
|
|
732
|
+
My glass shall not persuade me I am old,
|
|
733
|
+
So long as youth and thou are of one date;
|
|
734
|
+
But when in thee time's furrows I behold,
|
|
735
|
+
Then look I death my days should expiate.
|
|
736
|
+
For all that beauty that doth cover thee
|
|
737
|
+
Is but the seemly raiment of my heart,
|
|
738
|
+
Which in thy breast doth live, as thine in me:
|
|
739
|
+
How can I then be elder than thou art?
|
|
740
|
+
O, therefore, love, be of thyself so wary
|
|
741
|
+
As I, not for myself, but for thee will;
|
|
742
|
+
Bearing thy heart, which I will keep so chary
|
|
743
|
+
As tender nurse her babe from faring ill.
|
|
744
|
+
Presume not on thy heart when mine is slain;
|
|
745
|
+
Thou gavest me thine, not to give back again.
|
|
746
|
+
So am I as the rich, whose blessed key
|
|
747
|
+
Can bring him to his sweet up-locked treasure,
|
|
748
|
+
The which he will not every hour survey,
|
|
749
|
+
For blunting the fine point of seldom pleasure.
|
|
750
|
+
Therefore are feasts so solemn and so rare,
|
|
751
|
+
Since, seldom coming, in the long year set,
|
|
752
|
+
Like stones of worth they thinly placed are,
|
|
753
|
+
Or captain jewels in the carcanet.
|
|
754
|
+
So is the time that keeps you as my chest,
|
|
755
|
+
Or as the wardrobe which the robe doth hide,
|
|
756
|
+
To make some special instant special blest,
|
|
757
|
+
By new unfolding his imprison'd pride.
|
|
758
|
+
Blessed are you, whose worthiness gives scope,
|
|
759
|
+
Being had, to triumph, being lack'd, to hope.
|
|
760
|
+
If there be nothing new, but that which is
|
|
761
|
+
Hath been before, how are our brains beguiled,
|
|
762
|
+
Which, labouring for invention, bear amiss
|
|
763
|
+
The second burden of a former child!
|
|
764
|
+
O, that record could with a backward look,
|
|
765
|
+
Even of five hundred courses of the sun,
|
|
766
|
+
Show me your image in some antique book,
|
|
767
|
+
Since mind at first in character was done!
|
|
768
|
+
That I might see what the old world could say
|
|
769
|
+
To this composed wonder of your frame;
|
|
770
|
+
Whether we are mended, or whether better they,
|
|
771
|
+
Or whether revolution be the same.
|
|
772
|
+
O, sure I am, the wits of former days
|
|
773
|
+
To subjects worse have given admiring praise.`;
|
|
774
|
+
|
|
775
|
+
// src/generator/synthetic.ts
|
|
776
|
+
var SyntheticGenerator = class {
|
|
777
|
+
lines;
|
|
778
|
+
config;
|
|
779
|
+
constructor(config) {
|
|
780
|
+
this.config = config;
|
|
781
|
+
const corpus = config.generator.corpus ? readFileSync2(config.generator.corpus, "utf-8") : default_default;
|
|
782
|
+
this.lines = corpus.split("\n").map((l) => l.trim()).filter((l) => l.length > 0);
|
|
783
|
+
}
|
|
784
|
+
generate(count) {
|
|
785
|
+
const records = [];
|
|
786
|
+
const inputMean = this.config.benchmark.inputTokens.mean;
|
|
787
|
+
const inputStddev = this.config.benchmark.inputTokens.stddev ?? inputMean * 0.1;
|
|
788
|
+
const outputMean = this.config.benchmark.outputTokens.mean;
|
|
789
|
+
const outputStddev = this.config.benchmark.outputTokens.stddev ?? outputMean * 0.1;
|
|
790
|
+
for (let i = 0; i < count; i++) {
|
|
791
|
+
const targetInput = clampedGaussian(inputMean, inputStddev, 1, inputMean * 3);
|
|
792
|
+
const targetOutput = clampedGaussian(outputMean, outputStddev, 1, outputMean * 3);
|
|
793
|
+
records.push(this.generateOne(targetInput, targetOutput));
|
|
794
|
+
}
|
|
795
|
+
return records;
|
|
796
|
+
}
|
|
797
|
+
generateOne(targetInputTokens, targetOutputTokens) {
|
|
798
|
+
const shuffled = [...this.lines].sort(() => Math.random() - 0.5);
|
|
799
|
+
let text = "";
|
|
800
|
+
let tokens = 0;
|
|
801
|
+
let lineIdx = 0;
|
|
802
|
+
while (tokens < targetInputTokens && lineIdx < shuffled.length) {
|
|
803
|
+
const candidate = text ? `${text}
|
|
804
|
+
${shuffled[lineIdx]}` : shuffled[lineIdx];
|
|
805
|
+
const candidateTokens = countTokens(candidate);
|
|
806
|
+
if (candidateTokens > targetInputTokens && text.length > 0) break;
|
|
807
|
+
text = candidate;
|
|
808
|
+
tokens = candidateTokens;
|
|
809
|
+
lineIdx++;
|
|
810
|
+
}
|
|
811
|
+
while (tokens < targetInputTokens) {
|
|
812
|
+
const line = shuffled[lineIdx % shuffled.length];
|
|
813
|
+
const candidate = `${text}
|
|
814
|
+
${line}`;
|
|
815
|
+
const candidateTokens = countTokens(candidate);
|
|
816
|
+
if (candidateTokens > targetInputTokens * 1.1) break;
|
|
817
|
+
text = candidate;
|
|
818
|
+
tokens = candidateTokens;
|
|
819
|
+
lineIdx++;
|
|
820
|
+
}
|
|
821
|
+
const suffix = this.config.generator.prompt ?? "";
|
|
822
|
+
const header = `Randomly stream lines from the following text with ${targetOutputTokens} output tokens. Don't generate eos tokens:
|
|
823
|
+
|
|
824
|
+
`;
|
|
825
|
+
const fullText = header + text + (suffix ? `
|
|
826
|
+
${suffix}` : "");
|
|
827
|
+
const finalTokens = countTokens(fullText);
|
|
828
|
+
return {
|
|
829
|
+
text: fullText,
|
|
830
|
+
tokenCount: finalTokens,
|
|
831
|
+
outputTokenTarget: targetOutputTokens
|
|
832
|
+
};
|
|
833
|
+
}
|
|
834
|
+
};
|
|
835
|
+
|
|
836
|
+
// src/generator/generator.ts
|
|
837
|
+
init_file();
|
|
838
|
+
function createGenerator(config) {
|
|
839
|
+
if (config.generator.enabled) {
|
|
840
|
+
return new SyntheticGenerator(config);
|
|
841
|
+
}
|
|
842
|
+
if (config.benchmark.inputFile) {
|
|
843
|
+
return new FileGenerator(config.benchmark.inputFile);
|
|
844
|
+
}
|
|
845
|
+
return new SyntheticGenerator(config);
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
// src/runner/backend.ts
|
|
849
|
+
init_esm_shims();
|
|
850
|
+
|
|
851
|
+
// src/runner/backends/openai.ts
|
|
852
|
+
init_esm_shims();
|
|
853
|
+
init_tokenizer();
|
|
854
|
+
import { z as z2 } from "zod";
|
|
855
|
+
var EnvSchema = z2.object({
|
|
856
|
+
OPENAI_API_KEY: z2.string().min(1, "OPENAI_API_KEY is required")
|
|
857
|
+
});
|
|
858
|
+
var OpenAIBackend = class _OpenAIBackend {
|
|
859
|
+
name = "openai";
|
|
860
|
+
url;
|
|
861
|
+
apiKey;
|
|
862
|
+
static create(baseURL) {
|
|
863
|
+
const env = validateEnv(EnvSchema, "openai");
|
|
864
|
+
const url = baseURL ?? "https://api.openai.com/v1";
|
|
865
|
+
return new _OpenAIBackend(url, env.OPENAI_API_KEY);
|
|
866
|
+
}
|
|
867
|
+
constructor(baseURL, apiKey) {
|
|
868
|
+
this.url = baseURL.endsWith("/chat/completions") ? baseURL : `${baseURL.replace(/\/+$/, "")}/chat/completions`;
|
|
869
|
+
this.apiKey = apiKey;
|
|
870
|
+
}
|
|
871
|
+
async request(prompt2, model, maxTokens, systemPrompt, params, streaming, signal) {
|
|
872
|
+
const messages = [];
|
|
873
|
+
if (systemPrompt) {
|
|
874
|
+
messages.push({ role: "system", content: systemPrompt });
|
|
875
|
+
}
|
|
876
|
+
messages.push({ role: "user", content: prompt2 });
|
|
877
|
+
const body = {
|
|
878
|
+
model,
|
|
879
|
+
messages,
|
|
880
|
+
stream: streaming,
|
|
881
|
+
...params
|
|
882
|
+
};
|
|
883
|
+
if (body.max_tokens && this.isOpenAIHost()) {
|
|
884
|
+
body.max_completion_tokens = body.max_tokens;
|
|
885
|
+
delete body.max_tokens;
|
|
886
|
+
}
|
|
887
|
+
const response = await fetch(this.url, {
|
|
888
|
+
method: "POST",
|
|
889
|
+
headers: {
|
|
890
|
+
"Content-Type": "application/json",
|
|
891
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
892
|
+
},
|
|
893
|
+
body: JSON.stringify(body),
|
|
894
|
+
signal
|
|
895
|
+
});
|
|
896
|
+
if (!response.ok) {
|
|
897
|
+
const text = await response.text();
|
|
898
|
+
const error = new Error(`HTTP ${response.status}: ${text}`);
|
|
899
|
+
error.code = String(response.status);
|
|
900
|
+
throw error;
|
|
901
|
+
}
|
|
902
|
+
if (streaming) {
|
|
903
|
+
return this.parseStream(response);
|
|
904
|
+
}
|
|
905
|
+
return this.parseResponse(response);
|
|
906
|
+
}
|
|
907
|
+
isOpenAIHost() {
|
|
908
|
+
return this.url.includes("api.openai.com");
|
|
909
|
+
}
|
|
910
|
+
async parseStream(response) {
|
|
911
|
+
const body = response.body;
|
|
912
|
+
if (!body) throw new Error("No response body");
|
|
913
|
+
const reader = body.getReader();
|
|
914
|
+
const decoder = new TextDecoder();
|
|
915
|
+
let buffer = "";
|
|
916
|
+
let generatedText = "";
|
|
917
|
+
let ttftMs = 0;
|
|
918
|
+
const requestStart = performance.now();
|
|
919
|
+
let lastChunkTime = requestStart;
|
|
920
|
+
const interTokenLatencies = [];
|
|
921
|
+
let firstToken = true;
|
|
922
|
+
let outputTokens = 0;
|
|
923
|
+
try {
|
|
924
|
+
while (true) {
|
|
925
|
+
const { done, value } = await reader.read();
|
|
926
|
+
if (done) break;
|
|
927
|
+
buffer += decoder.decode(value, { stream: true });
|
|
928
|
+
const lines = buffer.split("\n");
|
|
929
|
+
buffer = lines.pop();
|
|
930
|
+
for (const line of lines) {
|
|
931
|
+
const trimmed = line.trim();
|
|
932
|
+
if (!trimmed.startsWith("data: ")) continue;
|
|
933
|
+
const data = trimmed.slice(6);
|
|
934
|
+
if (data === "[DONE]") continue;
|
|
935
|
+
let chunk;
|
|
936
|
+
try {
|
|
937
|
+
chunk = JSON.parse(data);
|
|
938
|
+
} catch {
|
|
939
|
+
continue;
|
|
940
|
+
}
|
|
941
|
+
const content = chunk.choices?.[0]?.delta?.content;
|
|
942
|
+
if (content) {
|
|
943
|
+
const now = performance.now();
|
|
944
|
+
if (firstToken) {
|
|
945
|
+
ttftMs = now - requestStart;
|
|
946
|
+
firstToken = false;
|
|
947
|
+
} else {
|
|
948
|
+
interTokenLatencies.push(now - lastChunkTime);
|
|
949
|
+
}
|
|
950
|
+
lastChunkTime = now;
|
|
951
|
+
generatedText += content;
|
|
952
|
+
}
|
|
953
|
+
if (chunk.usage?.completion_tokens) {
|
|
954
|
+
outputTokens = chunk.usage.completion_tokens;
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
} finally {
|
|
959
|
+
reader.releaseLock();
|
|
960
|
+
}
|
|
961
|
+
if (outputTokens === 0) {
|
|
962
|
+
outputTokens = countTokens(generatedText);
|
|
963
|
+
}
|
|
964
|
+
return { generatedText, outputTokens, ttftMs, interTokenLatencies };
|
|
965
|
+
}
|
|
966
|
+
async parseResponse(response) {
|
|
967
|
+
const requestStart = performance.now();
|
|
968
|
+
const json = await response.json();
|
|
969
|
+
const ttftMs = performance.now() - requestStart;
|
|
970
|
+
const generatedText = json.choices?.[0]?.message?.content ?? "";
|
|
971
|
+
const outputTokens = json.usage?.completion_tokens ?? countTokens(generatedText);
|
|
972
|
+
return { generatedText, outputTokens, ttftMs, interTokenLatencies: [] };
|
|
973
|
+
}
|
|
974
|
+
};
|
|
975
|
+
|
|
976
|
+
// src/runner/backends/sagemaker.ts
|
|
977
|
+
init_esm_shims();
|
|
978
|
+
init_tokenizer();
|
|
979
|
+
import { z as z3 } from "zod";
|
|
980
|
+
import { SignatureV4 } from "@smithy/signature-v4";
|
|
981
|
+
import { Hash } from "@smithy/hash-node";
|
|
982
|
+
import { HttpRequest } from "@smithy/protocol-http";
|
|
983
|
+
var EnvSchema2 = z3.object({
|
|
984
|
+
AWS_REGION: z3.string().default("us-east-1"),
|
|
985
|
+
AWS_ACCESS_KEY_ID: z3.string().min(1, "AWS_ACCESS_KEY_ID is required"),
|
|
986
|
+
AWS_SECRET_ACCESS_KEY: z3.string().min(1, "AWS_SECRET_ACCESS_KEY is required"),
|
|
987
|
+
AWS_SESSION_TOKEN: z3.string().optional()
|
|
988
|
+
});
|
|
989
|
+
var Sha256Hash = class extends Hash {
|
|
990
|
+
constructor(secret) {
|
|
991
|
+
super("sha256", secret);
|
|
992
|
+
}
|
|
993
|
+
};
|
|
994
|
+
var SageMakerBackend = class _SageMakerBackend {
|
|
995
|
+
name = "sagemaker";
|
|
996
|
+
signer;
|
|
997
|
+
baseURL;
|
|
998
|
+
requestFormat;
|
|
999
|
+
static create(baseURL, requestFormat) {
|
|
1000
|
+
const env = validateEnv(EnvSchema2, "sagemaker");
|
|
1001
|
+
const region = env.AWS_REGION;
|
|
1002
|
+
return new _SageMakerBackend({
|
|
1003
|
+
region,
|
|
1004
|
+
accessKeyId: env.AWS_ACCESS_KEY_ID,
|
|
1005
|
+
secretAccessKey: env.AWS_SECRET_ACCESS_KEY,
|
|
1006
|
+
sessionToken: env.AWS_SESSION_TOKEN,
|
|
1007
|
+
baseURL,
|
|
1008
|
+
requestFormat
|
|
1009
|
+
});
|
|
1010
|
+
}
|
|
1011
|
+
constructor(config) {
|
|
1012
|
+
this.baseURL = config.baseURL ?? `https://runtime.sagemaker.${config.region}.amazonaws.com`;
|
|
1013
|
+
this.requestFormat = config.requestFormat ?? "sagemaker" /* Sagemaker */;
|
|
1014
|
+
this.signer = new SignatureV4({
|
|
1015
|
+
service: "sagemaker",
|
|
1016
|
+
region: config.region,
|
|
1017
|
+
credentials: {
|
|
1018
|
+
accessKeyId: config.accessKeyId,
|
|
1019
|
+
secretAccessKey: config.secretAccessKey,
|
|
1020
|
+
sessionToken: config.sessionToken
|
|
1021
|
+
},
|
|
1022
|
+
sha256: Sha256Hash
|
|
1023
|
+
});
|
|
1024
|
+
}
|
|
1025
|
+
async request(prompt2, model, maxTokens, systemPrompt, params, streaming, signal) {
|
|
1026
|
+
const path2 = streaming ? `/endpoints/${model}/invocations-response-stream` : `/endpoints/${model}/invocations`;
|
|
1027
|
+
const body = this.buildRequestBody(prompt2, maxTokens, systemPrompt, params, streaming);
|
|
1028
|
+
const bodyStr = JSON.stringify(body);
|
|
1029
|
+
const url = new URL(path2, this.baseURL);
|
|
1030
|
+
const headers = {
|
|
1031
|
+
"Content-Type": "application/json"
|
|
1032
|
+
};
|
|
1033
|
+
if (streaming) {
|
|
1034
|
+
headers["X-Amzn-SageMaker-InferenceComponent-Inference-Code-Accepts"] = "application/jsonlines";
|
|
1035
|
+
}
|
|
1036
|
+
const httpRequest = new HttpRequest({
|
|
1037
|
+
method: "POST",
|
|
1038
|
+
protocol: url.protocol,
|
|
1039
|
+
hostname: url.hostname,
|
|
1040
|
+
port: url.port ? Number(url.port) : void 0,
|
|
1041
|
+
path: url.pathname,
|
|
1042
|
+
headers,
|
|
1043
|
+
body: bodyStr
|
|
1044
|
+
});
|
|
1045
|
+
const signed = await this.signer.sign(httpRequest);
|
|
1046
|
+
const response = await fetch(url.toString(), {
|
|
1047
|
+
method: "POST",
|
|
1048
|
+
headers: signed.headers,
|
|
1049
|
+
body: bodyStr,
|
|
1050
|
+
signal
|
|
1051
|
+
});
|
|
1052
|
+
if (!response.ok) {
|
|
1053
|
+
const text = await response.text();
|
|
1054
|
+
const error = new Error(`HTTP ${response.status}: ${text}`);
|
|
1055
|
+
error.code = String(response.status);
|
|
1056
|
+
throw error;
|
|
1057
|
+
}
|
|
1058
|
+
if (streaming) {
|
|
1059
|
+
return this.parseEventStream(response);
|
|
1060
|
+
}
|
|
1061
|
+
return this.parseResponse(response);
|
|
1062
|
+
}
|
|
1063
|
+
buildRequestBody(prompt2, maxTokens, systemPrompt, params, streaming) {
|
|
1064
|
+
const messages = [];
|
|
1065
|
+
if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
|
|
1066
|
+
messages.push({ role: "user", content: prompt2 });
|
|
1067
|
+
if (this.requestFormat === "openai" /* OpenAI */) {
|
|
1068
|
+
return {
|
|
1069
|
+
messages,
|
|
1070
|
+
max_tokens: maxTokens,
|
|
1071
|
+
stream: streaming,
|
|
1072
|
+
...params
|
|
1073
|
+
};
|
|
1074
|
+
}
|
|
1075
|
+
return {
|
|
1076
|
+
inputs: [messages],
|
|
1077
|
+
parameters: {
|
|
1078
|
+
max_new_tokens: maxTokens,
|
|
1079
|
+
...params
|
|
1080
|
+
}
|
|
1081
|
+
};
|
|
1082
|
+
}
|
|
1083
|
+
// ---- Streaming: eventstream binary parser ----
|
|
1084
|
+
async parseEventStream(response) {
|
|
1085
|
+
const body = response.body;
|
|
1086
|
+
if (!body) throw new Error("No response body");
|
|
1087
|
+
const reader = body.getReader();
|
|
1088
|
+
let buffer = new Uint8Array(0);
|
|
1089
|
+
let generatedText = "";
|
|
1090
|
+
let ttftMs = 0;
|
|
1091
|
+
const requestStart = performance.now();
|
|
1092
|
+
let lastChunkTime = requestStart;
|
|
1093
|
+
const interTokenLatencies = [];
|
|
1094
|
+
let firstToken = true;
|
|
1095
|
+
let outputTokens = 0;
|
|
1096
|
+
let sseBuffer = "";
|
|
1097
|
+
try {
|
|
1098
|
+
while (true) {
|
|
1099
|
+
const { done, value } = await reader.read();
|
|
1100
|
+
if (done) break;
|
|
1101
|
+
buffer = concatBytes(buffer, value);
|
|
1102
|
+
while (true) {
|
|
1103
|
+
const parsed = readEventStreamMessage(buffer);
|
|
1104
|
+
if (!parsed) break;
|
|
1105
|
+
const { message, bytesConsumed } = parsed;
|
|
1106
|
+
buffer = buffer.slice(bytesConsumed);
|
|
1107
|
+
if (message.headers[":message-type"] === "exception") {
|
|
1108
|
+
const errText = new TextDecoder().decode(message.payload);
|
|
1109
|
+
throw new Error(
|
|
1110
|
+
`SageMaker stream exception (${message.headers[":event-type"]}): ${errText}`
|
|
1111
|
+
);
|
|
1112
|
+
}
|
|
1113
|
+
if (message.headers[":event-type"] !== "PayloadPart") continue;
|
|
1114
|
+
const payloadText = new TextDecoder().decode(message.payload);
|
|
1115
|
+
const now = performance.now();
|
|
1116
|
+
if (this.requestFormat === "openai" /* OpenAI */) {
|
|
1117
|
+
sseBuffer += payloadText;
|
|
1118
|
+
const lines = sseBuffer.split("\n");
|
|
1119
|
+
sseBuffer = lines.pop();
|
|
1120
|
+
for (const line of lines) {
|
|
1121
|
+
const trimmed = line.trim();
|
|
1122
|
+
if (!trimmed.startsWith("data: ")) continue;
|
|
1123
|
+
const data = trimmed.slice(6);
|
|
1124
|
+
if (data === "[DONE]") continue;
|
|
1125
|
+
let chunk;
|
|
1126
|
+
try {
|
|
1127
|
+
chunk = JSON.parse(data);
|
|
1128
|
+
} catch {
|
|
1129
|
+
continue;
|
|
1130
|
+
}
|
|
1131
|
+
const content = chunk.choices?.[0]?.delta?.content;
|
|
1132
|
+
if (content) {
|
|
1133
|
+
if (firstToken) {
|
|
1134
|
+
ttftMs = now - requestStart;
|
|
1135
|
+
firstToken = false;
|
|
1136
|
+
} else {
|
|
1137
|
+
interTokenLatencies.push(now - lastChunkTime);
|
|
1138
|
+
}
|
|
1139
|
+
lastChunkTime = now;
|
|
1140
|
+
generatedText += content;
|
|
1141
|
+
}
|
|
1142
|
+
if (chunk.usage?.completion_tokens) {
|
|
1143
|
+
outputTokens = chunk.usage.completion_tokens;
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
} else {
|
|
1147
|
+
const jsonLines = payloadText.split("\n").filter((l) => l.trim());
|
|
1148
|
+
for (const jsonLine of jsonLines) {
|
|
1149
|
+
let chunk;
|
|
1150
|
+
try {
|
|
1151
|
+
chunk = JSON.parse(jsonLine);
|
|
1152
|
+
} catch {
|
|
1153
|
+
continue;
|
|
1154
|
+
}
|
|
1155
|
+
const tokenText = chunk.token?.text;
|
|
1156
|
+
if (tokenText) {
|
|
1157
|
+
if (firstToken) {
|
|
1158
|
+
ttftMs = now - requestStart;
|
|
1159
|
+
firstToken = false;
|
|
1160
|
+
} else {
|
|
1161
|
+
interTokenLatencies.push(now - lastChunkTime);
|
|
1162
|
+
}
|
|
1163
|
+
lastChunkTime = now;
|
|
1164
|
+
generatedText += tokenText;
|
|
1165
|
+
} else if (typeof chunk.generated_text === "string" && !generatedText) {
|
|
1166
|
+
generatedText = chunk.generated_text;
|
|
1167
|
+
}
|
|
1168
|
+
const details = chunk.details;
|
|
1169
|
+
if (details?.generated_tokens) {
|
|
1170
|
+
outputTokens = details.generated_tokens;
|
|
1171
|
+
}
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
}
|
|
1175
|
+
}
|
|
1176
|
+
} finally {
|
|
1177
|
+
reader.releaseLock();
|
|
1178
|
+
}
|
|
1179
|
+
if (outputTokens === 0) {
|
|
1180
|
+
outputTokens = countTokens(generatedText);
|
|
1181
|
+
}
|
|
1182
|
+
return { generatedText, outputTokens, ttftMs, interTokenLatencies };
|
|
1183
|
+
}
|
|
1184
|
+
// ---- Non-streaming ----
|
|
1185
|
+
async parseResponse(response) {
|
|
1186
|
+
const requestStart = performance.now();
|
|
1187
|
+
const json = await response.json();
|
|
1188
|
+
const ttftMs = performance.now() - requestStart;
|
|
1189
|
+
let generatedText = "";
|
|
1190
|
+
let outputTokens = 0;
|
|
1191
|
+
if (this.requestFormat === "openai" /* OpenAI */) {
|
|
1192
|
+
const data = json;
|
|
1193
|
+
generatedText = data.choices?.[0]?.message?.content ?? "";
|
|
1194
|
+
outputTokens = data.usage?.completion_tokens ?? 0;
|
|
1195
|
+
} else {
|
|
1196
|
+
if (Array.isArray(json)) {
|
|
1197
|
+
generatedText = json[0]?.generated_text ?? "";
|
|
1198
|
+
} else {
|
|
1199
|
+
const data = json;
|
|
1200
|
+
generatedText = data.generated_text ?? "";
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
if (outputTokens === 0) {
|
|
1204
|
+
outputTokens = countTokens(generatedText);
|
|
1205
|
+
}
|
|
1206
|
+
return {
|
|
1207
|
+
generatedText,
|
|
1208
|
+
outputTokens,
|
|
1209
|
+
ttftMs,
|
|
1210
|
+
interTokenLatencies: []
|
|
1211
|
+
};
|
|
1212
|
+
}
|
|
1213
|
+
};
|
|
1214
|
+
function concatBytes(a, b) {
|
|
1215
|
+
const out = new Uint8Array(a.length + b.length);
|
|
1216
|
+
out.set(a, 0);
|
|
1217
|
+
out.set(b, a.length);
|
|
1218
|
+
return out;
|
|
1219
|
+
}
|
|
1220
|
+
function readEventStreamMessage(buf) {
|
|
1221
|
+
if (buf.length < 16) return null;
|
|
1222
|
+
const view = new DataView(buf.buffer, buf.byteOffset);
|
|
1223
|
+
const totalLength = view.getUint32(0);
|
|
1224
|
+
const headersLength = view.getUint32(4);
|
|
1225
|
+
if (buf.length < totalLength) return null;
|
|
1226
|
+
const headers = {};
|
|
1227
|
+
let offset = 12;
|
|
1228
|
+
const headersEnd = 12 + headersLength;
|
|
1229
|
+
while (offset < headersEnd) {
|
|
1230
|
+
const nameLen = buf[offset];
|
|
1231
|
+
offset++;
|
|
1232
|
+
const name = new TextDecoder().decode(buf.slice(offset, offset + nameLen));
|
|
1233
|
+
offset += nameLen;
|
|
1234
|
+
const valueType = buf[offset];
|
|
1235
|
+
offset++;
|
|
1236
|
+
if (valueType === 7) {
|
|
1237
|
+
const valueLen = new DataView(buf.buffer, buf.byteOffset + offset).getUint16(0);
|
|
1238
|
+
offset += 2;
|
|
1239
|
+
headers[name] = new TextDecoder().decode(buf.slice(offset, offset + valueLen));
|
|
1240
|
+
offset += valueLen;
|
|
1241
|
+
} else {
|
|
1242
|
+
break;
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1245
|
+
const payloadLength = totalLength - headersLength - 16;
|
|
1246
|
+
const payloadOffset = 12 + headersLength;
|
|
1247
|
+
const payload = buf.slice(payloadOffset, payloadOffset + payloadLength);
|
|
1248
|
+
return { message: { headers, payload }, bytesConsumed: totalLength };
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
// src/runner/backend.ts
|
|
1252
|
+
function createBackend(config) {
|
|
1253
|
+
const { adapter, baseURL } = config.provider;
|
|
1254
|
+
switch (adapter) {
|
|
1255
|
+
case "openai":
|
|
1256
|
+
return OpenAIBackend.create(baseURL);
|
|
1257
|
+
case "sagemaker": {
|
|
1258
|
+
const requestFormat = config.provider.config?.["requestFormat"] ?? "sagemaker" /* Sagemaker */;
|
|
1259
|
+
return SageMakerBackend.create(baseURL, requestFormat);
|
|
1260
|
+
}
|
|
1261
|
+
default:
|
|
1262
|
+
throw new Error(`Unknown backend adapter: ${adapter}`);
|
|
1263
|
+
}
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
// src/runner/orchestrator.ts
|
|
1267
|
+
init_esm_shims();
|
|
1268
|
+
|
|
1269
|
+
// src/runner/phase.ts
|
|
1270
|
+
init_esm_shims();
|
|
1271
|
+
var PhaseController = class {
|
|
1272
|
+
maxConcurrency;
|
|
1273
|
+
rampUpRequests;
|
|
1274
|
+
rampUpDuration;
|
|
1275
|
+
rampDownRequests;
|
|
1276
|
+
rampDownDuration;
|
|
1277
|
+
startTime = 0;
|
|
1278
|
+
completedRequests = 0;
|
|
1279
|
+
totalRequests;
|
|
1280
|
+
maxDuration;
|
|
1281
|
+
constructor(config) {
|
|
1282
|
+
this.maxConcurrency = config.benchmark.concurrency;
|
|
1283
|
+
this.rampUpRequests = config.benchmark.rampUp?.requests ?? 0;
|
|
1284
|
+
this.rampUpDuration = (config.benchmark.rampUp?.duration ?? 0) * 1e3;
|
|
1285
|
+
this.rampDownRequests = config.benchmark.rampDown?.requests ?? 0;
|
|
1286
|
+
this.rampDownDuration = (config.benchmark.rampDown?.duration ?? 0) * 1e3;
|
|
1287
|
+
this.totalRequests = config.benchmark.maxRequests ?? Infinity;
|
|
1288
|
+
this.maxDuration = (config.benchmark.maxDuration ?? Infinity) * 1e3;
|
|
1289
|
+
}
|
|
1290
|
+
start() {
|
|
1291
|
+
this.startTime = performance.now();
|
|
1292
|
+
}
|
|
1293
|
+
recordCompletion() {
|
|
1294
|
+
this.completedRequests++;
|
|
1295
|
+
}
|
|
1296
|
+
get phase() {
|
|
1297
|
+
const elapsed = performance.now() - this.startTime;
|
|
1298
|
+
const remaining = this.totalRequests - this.completedRequests;
|
|
1299
|
+
if (this.rampUpDuration > 0 && elapsed < this.rampUpDuration) {
|
|
1300
|
+
return "ramp-up";
|
|
1301
|
+
}
|
|
1302
|
+
if (this.rampUpRequests > 0 && this.completedRequests < this.rampUpRequests) {
|
|
1303
|
+
return "ramp-up";
|
|
1304
|
+
}
|
|
1305
|
+
if (this.rampDownDuration > 0) {
|
|
1306
|
+
const timeUntilEnd = this.maxDuration - elapsed;
|
|
1307
|
+
if (timeUntilEnd <= this.rampDownDuration) return "ramp-down";
|
|
1308
|
+
}
|
|
1309
|
+
if (this.rampDownRequests > 0 && remaining <= this.rampDownRequests) {
|
|
1310
|
+
return "ramp-down";
|
|
1311
|
+
}
|
|
1312
|
+
return "steady";
|
|
1313
|
+
}
|
|
1314
|
+
get allowedConcurrency() {
|
|
1315
|
+
const currentPhase = this.phase;
|
|
1316
|
+
const elapsed = performance.now() - this.startTime;
|
|
1317
|
+
if (currentPhase === "ramp-up") {
|
|
1318
|
+
let progress;
|
|
1319
|
+
if (this.rampUpDuration > 0) {
|
|
1320
|
+
progress = Math.min(1, elapsed / this.rampUpDuration);
|
|
1321
|
+
} else {
|
|
1322
|
+
progress = Math.min(1, this.completedRequests / this.rampUpRequests);
|
|
1323
|
+
}
|
|
1324
|
+
return Math.max(1, Math.ceil(progress * this.maxConcurrency));
|
|
1325
|
+
}
|
|
1326
|
+
if (currentPhase === "ramp-down") {
|
|
1327
|
+
let progress;
|
|
1328
|
+
if (this.rampDownDuration > 0) {
|
|
1329
|
+
const timeUntilEnd = this.maxDuration - elapsed;
|
|
1330
|
+
progress = Math.max(0, timeUntilEnd / this.rampDownDuration);
|
|
1331
|
+
} else {
|
|
1332
|
+
const remaining = this.totalRequests - this.completedRequests;
|
|
1333
|
+
progress = Math.max(0, remaining / this.rampDownRequests);
|
|
1334
|
+
}
|
|
1335
|
+
return Math.max(1, Math.ceil(progress * this.maxConcurrency));
|
|
1336
|
+
}
|
|
1337
|
+
return this.maxConcurrency;
|
|
1338
|
+
}
|
|
1339
|
+
shouldStop(aborted) {
|
|
1340
|
+
if (aborted) return true;
|
|
1341
|
+
if (this.completedRequests >= this.totalRequests) return true;
|
|
1342
|
+
const elapsed = performance.now() - this.startTime;
|
|
1343
|
+
if (elapsed >= this.maxDuration) return true;
|
|
1344
|
+
return false;
|
|
1345
|
+
}
|
|
1346
|
+
};
|
|
1347
|
+
|
|
1348
|
+
// src/runner/wal.ts
|
|
1349
|
+
init_esm_shims();
|
|
1350
|
+
import { appendFileSync, mkdirSync as mkdirSync2, writeFileSync as writeFileSync3 } from "fs";
|
|
1351
|
+
import { join as join2 } from "path";
|
|
1352
|
+
var WAL = class {
|
|
1353
|
+
logPath;
|
|
1354
|
+
responsesDir;
|
|
1355
|
+
constructor(outputDir) {
|
|
1356
|
+
this.logPath = join2(outputDir, "run_log.jsonl");
|
|
1357
|
+
this.responsesDir = join2(outputDir, "individual_responses");
|
|
1358
|
+
mkdirSync2(this.responsesDir, { recursive: true });
|
|
1359
|
+
}
|
|
1360
|
+
write(metrics) {
|
|
1361
|
+
appendFileSync(this.logPath, JSON.stringify(metrics) + "\n");
|
|
1362
|
+
writeFileSync3(
|
|
1363
|
+
join2(this.responsesDir, `${metrics.requestId}.json`),
|
|
1364
|
+
JSON.stringify(metrics, null, 2)
|
|
1365
|
+
);
|
|
1366
|
+
}
|
|
1367
|
+
static readLog(outputDir) {
|
|
1368
|
+
const logPath = join2(outputDir, "run_log.jsonl");
|
|
1369
|
+
const content = __require("fs").readFileSync(logPath, "utf-8");
|
|
1370
|
+
return content.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
1371
|
+
}
|
|
1372
|
+
};
|
|
1373
|
+
|
|
1374
|
+
// src/runner/request.ts
|
|
1375
|
+
init_esm_shims();
|
|
1376
|
+
async function executeRequest(backend, prompt2, config, requestId, phase, cacheHit, signal) {
|
|
1377
|
+
const startTime = performance.now();
|
|
1378
|
+
try {
|
|
1379
|
+
const timeoutMs = config.benchmark.timeout * 1e3;
|
|
1380
|
+
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
1381
|
+
const combinedSignal = AbortSignal.any([signal, timeoutSignal]);
|
|
1382
|
+
const response = await backend.request(
|
|
1383
|
+
prompt2.text,
|
|
1384
|
+
config.provider.model,
|
|
1385
|
+
prompt2.outputTokenTarget,
|
|
1386
|
+
config.provider.systemPrompt,
|
|
1387
|
+
config.provider.config,
|
|
1388
|
+
config.benchmark.streaming,
|
|
1389
|
+
combinedSignal
|
|
1390
|
+
);
|
|
1391
|
+
const endTime = performance.now();
|
|
1392
|
+
const e2eLatencyMs = endTime - startTime;
|
|
1393
|
+
return {
|
|
1394
|
+
requestId,
|
|
1395
|
+
startTime,
|
|
1396
|
+
endTime,
|
|
1397
|
+
ttftMs: response.ttftMs,
|
|
1398
|
+
e2eLatencyMs,
|
|
1399
|
+
interTokenLatencies: response.interTokenLatencies,
|
|
1400
|
+
inputText: prompt2.text,
|
|
1401
|
+
inputTokens: prompt2.tokenCount,
|
|
1402
|
+
outputTokens: response.outputTokens,
|
|
1403
|
+
outputThroughputTps: e2eLatencyMs > 0 ? response.outputTokens / (e2eLatencyMs / 1e3) : 0,
|
|
1404
|
+
generatedText: response.generatedText,
|
|
1405
|
+
phase,
|
|
1406
|
+
cacheHit
|
|
1407
|
+
};
|
|
1408
|
+
} catch (error) {
|
|
1409
|
+
const endTime = performance.now();
|
|
1410
|
+
return {
|
|
1411
|
+
requestId,
|
|
1412
|
+
startTime,
|
|
1413
|
+
endTime,
|
|
1414
|
+
ttftMs: 0,
|
|
1415
|
+
e2eLatencyMs: endTime - startTime,
|
|
1416
|
+
interTokenLatencies: [],
|
|
1417
|
+
inputText: prompt2.text,
|
|
1418
|
+
inputTokens: prompt2.tokenCount,
|
|
1419
|
+
outputTokens: 0,
|
|
1420
|
+
outputThroughputTps: 0,
|
|
1421
|
+
generatedText: "",
|
|
1422
|
+
error: error instanceof Error ? error.message : String(error),
|
|
1423
|
+
errorCode: error instanceof Error && "code" in error ? String(error.code) : void 0,
|
|
1424
|
+
phase,
|
|
1425
|
+
cacheHit
|
|
1426
|
+
};
|
|
1427
|
+
}
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
// src/runner/orchestrator.ts
|
|
1431
|
+
var ConcurrencyOrchestrator = class {
|
|
1432
|
+
config;
|
|
1433
|
+
backend;
|
|
1434
|
+
prompts;
|
|
1435
|
+
wal;
|
|
1436
|
+
phaseController;
|
|
1437
|
+
promptIndex = 0;
|
|
1438
|
+
completedRequests = 0;
|
|
1439
|
+
activeSlots = 0;
|
|
1440
|
+
results = [];
|
|
1441
|
+
aborted = false;
|
|
1442
|
+
onProgress;
|
|
1443
|
+
constructor(config, backend, prompts, outputDir, onProgress) {
|
|
1444
|
+
this.config = config;
|
|
1445
|
+
this.backend = backend;
|
|
1446
|
+
this.prompts = prompts;
|
|
1447
|
+
this.wal = new WAL(outputDir);
|
|
1448
|
+
this.phaseController = new PhaseController(config);
|
|
1449
|
+
this.onProgress = onProgress;
|
|
1450
|
+
}
|
|
1451
|
+
abort() {
|
|
1452
|
+
this.aborted = true;
|
|
1453
|
+
}
|
|
1454
|
+
async run(signal) {
|
|
1455
|
+
this.phaseController.start();
|
|
1456
|
+
const maxConcurrency = this.config.benchmark.concurrency;
|
|
1457
|
+
const workers = Array.from({ length: maxConcurrency }, (_, i) => this.workerLoop(i, signal));
|
|
1458
|
+
await Promise.allSettled(workers);
|
|
1459
|
+
return this.results;
|
|
1460
|
+
}
|
|
1461
|
+
async workerLoop(slotId, signal) {
|
|
1462
|
+
while (!this.phaseController.shouldStop(this.aborted || signal.aborted)) {
|
|
1463
|
+
if (slotId >= this.phaseController.allowedConcurrency) {
|
|
1464
|
+
await sleep(50);
|
|
1465
|
+
continue;
|
|
1466
|
+
}
|
|
1467
|
+
const prompt2 = this.getNextPrompt();
|
|
1468
|
+
if (!prompt2) break;
|
|
1469
|
+
this.activeSlots++;
|
|
1470
|
+
const requestId = crypto.randomUUID();
|
|
1471
|
+
const phase = this.phaseController.phase;
|
|
1472
|
+
const cacheHit = this.isCacheHit();
|
|
1473
|
+
const metrics = await executeRequest(
|
|
1474
|
+
this.backend,
|
|
1475
|
+
prompt2,
|
|
1476
|
+
this.config,
|
|
1477
|
+
requestId,
|
|
1478
|
+
phase,
|
|
1479
|
+
cacheHit,
|
|
1480
|
+
signal
|
|
1481
|
+
);
|
|
1482
|
+
this.results.push(metrics);
|
|
1483
|
+
this.wal.write(metrics);
|
|
1484
|
+
this.completedRequests++;
|
|
1485
|
+
this.phaseController.recordCompletion();
|
|
1486
|
+
this.onProgress?.(
|
|
1487
|
+
metrics,
|
|
1488
|
+
this.activeSlots,
|
|
1489
|
+
this.completedRequests,
|
|
1490
|
+
this.phaseController.phase,
|
|
1491
|
+
this.phaseController.allowedConcurrency
|
|
1492
|
+
);
|
|
1493
|
+
this.activeSlots--;
|
|
1494
|
+
}
|
|
1495
|
+
}
|
|
1496
|
+
getNextPrompt() {
|
|
1497
|
+
if (this.prompts.length === 0) return null;
|
|
1498
|
+
const maxReqs = this.config.benchmark.maxRequests ?? Infinity;
|
|
1499
|
+
if (this.promptIndex >= maxReqs) return null;
|
|
1500
|
+
const prompt2 = this.prompts[this.promptIndex % this.prompts.length];
|
|
1501
|
+
this.promptIndex++;
|
|
1502
|
+
return prompt2;
|
|
1503
|
+
}
|
|
1504
|
+
isCacheHit() {
|
|
1505
|
+
const pct = this.config.benchmark.cachePercentage;
|
|
1506
|
+
if (pct <= 0) return false;
|
|
1507
|
+
return Math.random() * 100 < pct;
|
|
1508
|
+
}
|
|
1509
|
+
};
|
|
1510
|
+
function sleep(ms) {
|
|
1511
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1512
|
+
}
|
|
1513
|
+
|
|
1514
|
+
// src/reporter/aggregator.ts
|
|
1515
|
+
init_esm_shims();
|
|
1516
|
+
|
|
1517
|
+
// src/reporter/statistics.ts
|
|
1518
|
+
init_esm_shims();
|
|
1519
|
+
function mean(values) {
|
|
1520
|
+
if (values.length === 0) return 0;
|
|
1521
|
+
return values.reduce((a, b) => a + b, 0) / values.length;
|
|
1522
|
+
}
|
|
1523
|
+
function stddev(values) {
|
|
1524
|
+
if (values.length < 2) return 0;
|
|
1525
|
+
const m = mean(values);
|
|
1526
|
+
const variance = values.reduce((acc, v) => acc + (v - m) ** 2, 0) / (values.length - 1);
|
|
1527
|
+
return Math.sqrt(variance);
|
|
1528
|
+
}
|
|
1529
|
+
function quantile(sorted, q) {
|
|
1530
|
+
if (sorted.length === 0) return 0;
|
|
1531
|
+
const pos = (sorted.length - 1) * q;
|
|
1532
|
+
const lower = Math.floor(pos);
|
|
1533
|
+
const upper = Math.ceil(pos);
|
|
1534
|
+
if (lower === upper) return sorted[lower];
|
|
1535
|
+
return sorted[lower] + (sorted[upper] - sorted[lower]) * (pos - lower);
|
|
1536
|
+
}
|
|
1537
|
+
function aggregate(values) {
|
|
1538
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
1539
|
+
return {
|
|
1540
|
+
mean: mean(values),
|
|
1541
|
+
min: sorted[0] ?? 0,
|
|
1542
|
+
max: sorted[sorted.length - 1] ?? 0,
|
|
1543
|
+
stddev: stddev(values),
|
|
1544
|
+
p25: quantile(sorted, 0.25),
|
|
1545
|
+
p50: quantile(sorted, 0.5),
|
|
1546
|
+
p75: quantile(sorted, 0.75),
|
|
1547
|
+
p90: quantile(sorted, 0.9),
|
|
1548
|
+
p95: quantile(sorted, 0.95),
|
|
1549
|
+
p99: quantile(sorted, 0.99)
|
|
1550
|
+
};
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
// src/reporter/aggregator.ts
|
|
1554
|
+
function computeSummary(requests) {
|
|
1555
|
+
const successful = requests.filter((r) => !r.error);
|
|
1556
|
+
const failed = requests.filter((r) => !!r.error);
|
|
1557
|
+
const startTime = Math.min(...requests.map((r) => r.startTime));
|
|
1558
|
+
const endTime = Math.max(...requests.map((r) => r.endTime));
|
|
1559
|
+
const durationMs = endTime - startTime;
|
|
1560
|
+
const durationMin = durationMs / 6e4;
|
|
1561
|
+
const totalOutputTokens = successful.reduce((sum, r) => sum + r.outputTokens, 0);
|
|
1562
|
+
const errorCodeFrequency = {};
|
|
1563
|
+
for (const r of failed) {
|
|
1564
|
+
const code = r.errorCode ?? "unknown";
|
|
1565
|
+
errorCodeFrequency[code] = (errorCodeFrequency[code] ?? 0) + 1;
|
|
1566
|
+
}
|
|
1567
|
+
const cacheHits = requests.filter((r) => r.cacheHit).length;
|
|
1568
|
+
const phases = ["ramp-up", "steady", "ramp-down"];
|
|
1569
|
+
const phaseBreakdown = {};
|
|
1570
|
+
for (const phase of phases) {
|
|
1571
|
+
const phaseReqs = requests.filter((r) => r.phase === phase);
|
|
1572
|
+
if (phaseReqs.length > 0) {
|
|
1573
|
+
const phaseErrors = phaseReqs.filter((r) => !!r.error).length;
|
|
1574
|
+
phaseBreakdown[phase] = {
|
|
1575
|
+
requests: phaseReqs.length,
|
|
1576
|
+
errorRate: phaseErrors / phaseReqs.length
|
|
1577
|
+
};
|
|
1578
|
+
}
|
|
1579
|
+
}
|
|
1580
|
+
const ttfntValues = successful.map((r) => r.ttfntMs).filter((v) => v !== void 0 && v > 0);
|
|
1581
|
+
const itlValues = successful.flatMap((r) => r.interTokenLatencies);
|
|
1582
|
+
return {
|
|
1583
|
+
startTime,
|
|
1584
|
+
endTime,
|
|
1585
|
+
totalRequests: requests.length,
|
|
1586
|
+
successfulRequests: successful.length,
|
|
1587
|
+
failedRequests: failed.length,
|
|
1588
|
+
errorRate: requests.length > 0 ? failed.length / requests.length : 0,
|
|
1589
|
+
rpm: durationMin > 0 ? requests.length / durationMin : 0,
|
|
1590
|
+
overallTps: durationMs > 0 ? totalOutputTokens / (durationMs / 1e3) : 0,
|
|
1591
|
+
ttft: aggregate(successful.map((r) => r.ttftMs)),
|
|
1592
|
+
ttfnt: ttfntValues.length > 0 ? aggregate(ttfntValues) : void 0,
|
|
1593
|
+
e2eLatency: aggregate(successful.map((r) => r.e2eLatencyMs)),
|
|
1594
|
+
outputThroughput: aggregate(successful.map((r) => r.outputThroughputTps)),
|
|
1595
|
+
interTokenLatency: aggregate(itlValues),
|
|
1596
|
+
inputTokens: aggregate(successful.map((r) => r.inputTokens)),
|
|
1597
|
+
outputTokens: aggregate(successful.map((r) => r.outputTokens)),
|
|
1598
|
+
errorCodeFrequency,
|
|
1599
|
+
cacheHitRate: requests.length > 0 ? cacheHits / requests.length : 0,
|
|
1600
|
+
phaseBreakdown
|
|
1601
|
+
};
|
|
1602
|
+
}
|
|
1603
|
+
|
|
1604
|
+
// src/reporter/exporter.ts
|
|
1605
|
+
init_esm_shims();
|
|
1606
|
+
|
|
1607
|
+
// src/reporter/exporters/json.ts
|
|
1608
|
+
init_esm_shims();
|
|
1609
|
+
import { writeFileSync as writeFileSync4, mkdirSync as mkdirSync3 } from "fs";
|
|
1610
|
+
import { join as join3 } from "path";
|
|
1611
|
+
var JsonExporter = class {
|
|
1612
|
+
name = "json";
|
|
1613
|
+
async export(summary, _requests, outputDir) {
|
|
1614
|
+
mkdirSync3(outputDir, { recursive: true });
|
|
1615
|
+
writeFileSync4(join3(outputDir, "summary.json"), JSON.stringify(summary, null, 2));
|
|
1616
|
+
}
|
|
1617
|
+
};
|
|
1618
|
+
|
|
1619
|
+
// src/reporter/exporters/csv.ts
|
|
1620
|
+
init_esm_shims();
|
|
1621
|
+
import { writeFileSync as writeFileSync5, mkdirSync as mkdirSync4 } from "fs";
|
|
1622
|
+
import { join as join4 } from "path";
|
|
1623
|
+
var CsvExporter = class {
|
|
1624
|
+
name = "csv";
|
|
1625
|
+
async export(summary, requests, outputDir) {
|
|
1626
|
+
mkdirSync4(outputDir, { recursive: true });
|
|
1627
|
+
this.writeSummary(summary, outputDir);
|
|
1628
|
+
this.writeRequests(requests, outputDir);
|
|
1629
|
+
}
|
|
1630
|
+
writeSummary(summary, outputDir) {
|
|
1631
|
+
const metricFields = [
|
|
1632
|
+
"ttft",
|
|
1633
|
+
"e2eLatency",
|
|
1634
|
+
"outputThroughput",
|
|
1635
|
+
"interTokenLatency",
|
|
1636
|
+
"inputTokens",
|
|
1637
|
+
"outputTokens"
|
|
1638
|
+
];
|
|
1639
|
+
const statFields = [
|
|
1640
|
+
"mean",
|
|
1641
|
+
"min",
|
|
1642
|
+
"max",
|
|
1643
|
+
"stddev",
|
|
1644
|
+
"p25",
|
|
1645
|
+
"p50",
|
|
1646
|
+
"p75",
|
|
1647
|
+
"p90",
|
|
1648
|
+
"p95",
|
|
1649
|
+
"p99"
|
|
1650
|
+
];
|
|
1651
|
+
const headers = [
|
|
1652
|
+
"totalRequests",
|
|
1653
|
+
"successfulRequests",
|
|
1654
|
+
"failedRequests",
|
|
1655
|
+
"errorRate",
|
|
1656
|
+
"rpm",
|
|
1657
|
+
"overallTps",
|
|
1658
|
+
"cacheHitRate"
|
|
1659
|
+
];
|
|
1660
|
+
const values = [
|
|
1661
|
+
summary.totalRequests,
|
|
1662
|
+
summary.successfulRequests,
|
|
1663
|
+
summary.failedRequests,
|
|
1664
|
+
summary.errorRate,
|
|
1665
|
+
summary.rpm,
|
|
1666
|
+
summary.overallTps,
|
|
1667
|
+
summary.cacheHitRate
|
|
1668
|
+
];
|
|
1669
|
+
for (const metric of metricFields) {
|
|
1670
|
+
const agg = summary[metric];
|
|
1671
|
+
for (const stat of statFields) {
|
|
1672
|
+
headers.push(`${metric}_${stat}`);
|
|
1673
|
+
values.push(agg[stat]);
|
|
1674
|
+
}
|
|
1675
|
+
}
|
|
1676
|
+
const csv = [headers.join(","), values.join(",")].join("\n");
|
|
1677
|
+
writeFileSync5(join4(outputDir, "summary.csv"), csv);
|
|
1678
|
+
}
|
|
1679
|
+
writeRequests(requests, outputDir) {
|
|
1680
|
+
if (requests.length === 0) return;
|
|
1681
|
+
const headers = [
|
|
1682
|
+
"requestId",
|
|
1683
|
+
"startTime",
|
|
1684
|
+
"endTime",
|
|
1685
|
+
"ttftMs",
|
|
1686
|
+
"e2eLatencyMs",
|
|
1687
|
+
"inputTokens",
|
|
1688
|
+
"outputTokens",
|
|
1689
|
+
"outputThroughputTps",
|
|
1690
|
+
"phase",
|
|
1691
|
+
"cacheHit",
|
|
1692
|
+
"error",
|
|
1693
|
+
"errorCode"
|
|
1694
|
+
];
|
|
1695
|
+
const rows = requests.map(
|
|
1696
|
+
(r) => [
|
|
1697
|
+
r.requestId,
|
|
1698
|
+
r.startTime,
|
|
1699
|
+
r.endTime,
|
|
1700
|
+
r.ttftMs,
|
|
1701
|
+
r.e2eLatencyMs,
|
|
1702
|
+
r.inputTokens,
|
|
1703
|
+
r.outputTokens,
|
|
1704
|
+
r.outputThroughputTps,
|
|
1705
|
+
r.phase,
|
|
1706
|
+
r.cacheHit,
|
|
1707
|
+
r.error ?? "",
|
|
1708
|
+
r.errorCode ?? ""
|
|
1709
|
+
].join(",")
|
|
1710
|
+
);
|
|
1711
|
+
const csv = [headers.join(","), ...rows].join("\n");
|
|
1712
|
+
writeFileSync5(join4(outputDir, "requests.csv"), csv);
|
|
1713
|
+
}
|
|
1714
|
+
};
|
|
1715
|
+
|
|
1716
|
+
// src/reporter/exporter.ts
|
|
1717
|
+
function createExporters(config) {
|
|
1718
|
+
const adapters = config.reporter.adapters;
|
|
1719
|
+
const exporters = [];
|
|
1720
|
+
for (const adapter of adapters) {
|
|
1721
|
+
switch (adapter) {
|
|
1722
|
+
case "json":
|
|
1723
|
+
exporters.push(new JsonExporter());
|
|
1724
|
+
break;
|
|
1725
|
+
case "csv":
|
|
1726
|
+
exporters.push(new CsvExporter());
|
|
1727
|
+
break;
|
|
1728
|
+
}
|
|
1729
|
+
}
|
|
1730
|
+
return exporters;
|
|
1731
|
+
}
|
|
1732
|
+
|
|
1733
|
+
// src/utils/signal.ts
|
|
1734
|
+
init_esm_shims();
|
|
1735
|
+
function createAbortController() {
|
|
1736
|
+
const controller = new AbortController();
|
|
1737
|
+
const shutdownCallbacks = [];
|
|
1738
|
+
let shutdownCount = 0;
|
|
1739
|
+
const handler = () => {
|
|
1740
|
+
shutdownCount++;
|
|
1741
|
+
if (shutdownCount === 1) {
|
|
1742
|
+
console.log("\nGraceful shutdown initiated... (press Ctrl+C again to force)");
|
|
1743
|
+
controller.abort();
|
|
1744
|
+
for (const cb of shutdownCallbacks) cb();
|
|
1745
|
+
} else {
|
|
1746
|
+
process.exit(1);
|
|
1747
|
+
}
|
|
1748
|
+
};
|
|
1749
|
+
process.on("SIGINT", handler);
|
|
1750
|
+
process.on("SIGTERM", handler);
|
|
1751
|
+
return {
|
|
1752
|
+
controller,
|
|
1753
|
+
onShutdown: (fn) => shutdownCallbacks.push(fn)
|
|
1754
|
+
};
|
|
1755
|
+
}
|
|
1756
|
+
|
|
1757
|
+
// src/cli/progress.ts
|
|
1758
|
+
init_esm_shims();
|
|
1759
|
+
|
|
1760
|
+
// src/cli/ui/store.ts
|
|
1761
|
+
init_esm_shims();
|
|
1762
|
+
var ROLLING_CAP = 50;
|
|
1763
|
+
var BenchmarkStore = class {
|
|
1764
|
+
phase = "steady";
|
|
1765
|
+
stage = "idle";
|
|
1766
|
+
activeSlots = 0;
|
|
1767
|
+
allowedConcurrency = 0;
|
|
1768
|
+
maxConcurrency;
|
|
1769
|
+
completed = 0;
|
|
1770
|
+
totalTarget;
|
|
1771
|
+
errors = 0;
|
|
1772
|
+
totalOutputTokens = 0;
|
|
1773
|
+
totalInputTokens = 0;
|
|
1774
|
+
startTime = 0;
|
|
1775
|
+
recentTtft = [];
|
|
1776
|
+
recentE2eLatency = [];
|
|
1777
|
+
recentErrors = [];
|
|
1778
|
+
modelName;
|
|
1779
|
+
streaming;
|
|
1780
|
+
constructor(opts) {
|
|
1781
|
+
this.totalTarget = opts.totalTarget;
|
|
1782
|
+
this.maxConcurrency = opts.maxConcurrency;
|
|
1783
|
+
this.modelName = opts.modelName;
|
|
1784
|
+
this.streaming = opts.streaming;
|
|
1785
|
+
}
|
|
1786
|
+
update(metrics, activeSlots, completed, phase, allowedConcurrency) {
|
|
1787
|
+
this.activeSlots = activeSlots;
|
|
1788
|
+
this.completed = completed;
|
|
1789
|
+
this.phase = phase;
|
|
1790
|
+
this.allowedConcurrency = allowedConcurrency;
|
|
1791
|
+
this.totalOutputTokens += metrics.outputTokens;
|
|
1792
|
+
this.totalInputTokens += metrics.inputTokens;
|
|
1793
|
+
if (metrics.error) {
|
|
1794
|
+
this.errors++;
|
|
1795
|
+
if (this.recentErrors.length >= 5) this.recentErrors.shift();
|
|
1796
|
+
this.recentErrors.push(metrics.error);
|
|
1797
|
+
}
|
|
1798
|
+
if (!metrics.error) {
|
|
1799
|
+
if (this.recentTtft.length >= ROLLING_CAP) this.recentTtft.shift();
|
|
1800
|
+
this.recentTtft.push(metrics.ttftMs);
|
|
1801
|
+
if (this.recentE2eLatency.length >= ROLLING_CAP) this.recentE2eLatency.shift();
|
|
1802
|
+
this.recentE2eLatency.push(metrics.e2eLatencyMs);
|
|
1803
|
+
}
|
|
1804
|
+
}
|
|
1805
|
+
setStage(stage) {
|
|
1806
|
+
this.stage = stage;
|
|
1807
|
+
}
|
|
1808
|
+
setStartTime(t) {
|
|
1809
|
+
this.startTime = t;
|
|
1810
|
+
}
|
|
1811
|
+
snapshot() {
|
|
1812
|
+
return {
|
|
1813
|
+
phase: this.phase,
|
|
1814
|
+
stage: this.stage,
|
|
1815
|
+
activeSlots: this.activeSlots,
|
|
1816
|
+
allowedConcurrency: this.allowedConcurrency,
|
|
1817
|
+
maxConcurrency: this.maxConcurrency,
|
|
1818
|
+
completed: this.completed,
|
|
1819
|
+
totalTarget: this.totalTarget,
|
|
1820
|
+
errors: this.errors,
|
|
1821
|
+
totalOutputTokens: this.totalOutputTokens,
|
|
1822
|
+
totalInputTokens: this.totalInputTokens,
|
|
1823
|
+
startTime: this.startTime,
|
|
1824
|
+
recentTtft: [...this.recentTtft],
|
|
1825
|
+
recentE2eLatency: [...this.recentE2eLatency],
|
|
1826
|
+
recentErrors: [...this.recentErrors],
|
|
1827
|
+
modelName: this.modelName,
|
|
1828
|
+
streaming: this.streaming
|
|
1829
|
+
};
|
|
1830
|
+
}
|
|
1831
|
+
};
|
|
1832
|
+
|
|
1833
|
+
// src/cli/ui/fallback.ts
|
|
1834
|
+
init_esm_shims();
|
|
1835
|
+
function formatDuration(seconds) {
|
|
1836
|
+
const m = Math.floor(seconds / 60);
|
|
1837
|
+
const s = Math.floor(seconds % 60);
|
|
1838
|
+
return m > 0 ? `${m}m${s.toString().padStart(2, "0")}s` : `${s}s`;
|
|
1839
|
+
}
|
|
1840
|
+
var FallbackDisplay = class {
|
|
1841
|
+
store;
|
|
1842
|
+
intervalId = null;
|
|
1843
|
+
constructor(store) {
|
|
1844
|
+
this.store = store;
|
|
1845
|
+
}
|
|
1846
|
+
start() {
|
|
1847
|
+
this.intervalId = setInterval(() => this.print(), 2e3);
|
|
1848
|
+
}
|
|
1849
|
+
stop() {
|
|
1850
|
+
if (this.intervalId) {
|
|
1851
|
+
clearInterval(this.intervalId);
|
|
1852
|
+
this.intervalId = null;
|
|
1853
|
+
}
|
|
1854
|
+
this.print();
|
|
1855
|
+
}
|
|
1856
|
+
print() {
|
|
1857
|
+
const s = this.store;
|
|
1858
|
+
const elapsed = s.startTime > 0 ? (performance.now() - s.startTime) / 1e3 : 0;
|
|
1859
|
+
const rps = elapsed > 0 ? s.completed / elapsed : 0;
|
|
1860
|
+
const tps = elapsed > 0 ? s.totalOutputTokens / elapsed : 0;
|
|
1861
|
+
const isInfinite = s.totalTarget === Infinity;
|
|
1862
|
+
const totalStr = isInfinite ? "?" : String(s.totalTarget);
|
|
1863
|
+
const pct = isInfinite ? 0 : Math.min(1, s.completed / s.totalTarget);
|
|
1864
|
+
const pctStr = isInfinite ? "" : ` (${(pct * 100).toFixed(1)}%)`;
|
|
1865
|
+
const barWidth = 20;
|
|
1866
|
+
const filled = Math.round(pct * barWidth);
|
|
1867
|
+
const bar = "=".repeat(filled) + ".".repeat(barWidth - filled);
|
|
1868
|
+
process.stderr.write(
|
|
1869
|
+
` [${bar}] ${s.completed}/${totalStr}${pctStr}
|
|
1870
|
+
${formatDuration(elapsed)} | ${rps.toFixed(1)} req/s | ${tps.toFixed(0)} tok/s | ${s.errors} err
|
|
1871
|
+
`
|
|
1872
|
+
);
|
|
1873
|
+
}
|
|
1874
|
+
};
|
|
1875
|
+
|
|
1876
|
+
// src/cli/progress.ts
|
|
1877
|
+
var ProgressDisplay = class {
|
|
1878
|
+
store;
|
|
1879
|
+
inkInstance = null;
|
|
1880
|
+
fallback = null;
|
|
1881
|
+
constructor(opts) {
|
|
1882
|
+
this.store = new BenchmarkStore(opts);
|
|
1883
|
+
}
|
|
1884
|
+
async start() {
|
|
1885
|
+
this.store.setStartTime(performance.now());
|
|
1886
|
+
if (process.stderr.isTTY) {
|
|
1887
|
+
const { render } = await import("ink");
|
|
1888
|
+
const { createElement } = await import("react");
|
|
1889
|
+
const { App: App2 } = await Promise.resolve().then(() => (init_app(), app_exports));
|
|
1890
|
+
this.inkInstance = render(createElement(App2, { store: this.store }), {
|
|
1891
|
+
stdout: process.stderr,
|
|
1892
|
+
patchConsole: false,
|
|
1893
|
+
exitOnCtrlC: false
|
|
1894
|
+
});
|
|
1895
|
+
} else {
|
|
1896
|
+
this.fallback = new FallbackDisplay(this.store);
|
|
1897
|
+
this.fallback.start();
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
update(metrics, activeSlots, completed, phase, allowedConcurrency) {
|
|
1901
|
+
this.store.update(metrics, activeSlots, completed, phase, allowedConcurrency);
|
|
1902
|
+
}
|
|
1903
|
+
setStage(stage) {
|
|
1904
|
+
this.store.setStage(stage);
|
|
1905
|
+
}
|
|
1906
|
+
stop() {
|
|
1907
|
+
if (this.inkInstance) {
|
|
1908
|
+
this.inkInstance.unmount();
|
|
1909
|
+
this.inkInstance = null;
|
|
1910
|
+
}
|
|
1911
|
+
if (this.fallback) {
|
|
1912
|
+
this.fallback.stop();
|
|
1913
|
+
this.fallback = null;
|
|
1914
|
+
}
|
|
1915
|
+
}
|
|
1916
|
+
};
|
|
1917
|
+
|
|
1918
|
+
// index.ts
|
|
1919
|
+
import { mkdirSync as mkdirSync5, writeFileSync as writeFileSync6 } from "fs";
|
|
1920
|
+
import { join as join5 } from "path";
|
|
1921
|
+
async function main() {
|
|
1922
|
+
const cliArgs = parseCliArgs(process.argv);
|
|
1923
|
+
if (cliArgs.command === "init") {
|
|
1924
|
+
await runInit(cliArgs.initOutputPath);
|
|
1925
|
+
return;
|
|
1926
|
+
}
|
|
1927
|
+
const { command, configPath, runId, overrides } = cliArgs;
|
|
1928
|
+
const { config, outputDir } = resolveConfig(configPath, runId, overrides);
|
|
1929
|
+
switch (command) {
|
|
1930
|
+
case "run":
|
|
1931
|
+
await runFullPipeline(config, outputDir);
|
|
1932
|
+
break;
|
|
1933
|
+
case "generate":
|
|
1934
|
+
await runGenerate(config, outputDir);
|
|
1935
|
+
break;
|
|
1936
|
+
case "bench":
|
|
1937
|
+
await runBench(config, outputDir);
|
|
1938
|
+
break;
|
|
1939
|
+
case "report":
|
|
1940
|
+
await runReport(config);
|
|
1941
|
+
break;
|
|
1942
|
+
}
|
|
1943
|
+
}
|
|
1944
|
+
var BOX_W = 56;
|
|
1945
|
+
function printBox(title, lines) {
|
|
1946
|
+
const inner = BOX_W - 2;
|
|
1947
|
+
const titleStr = `\u2500\u2500 ${title} `;
|
|
1948
|
+
const topPad = "\u2500".repeat(Math.max(0, inner - titleStr.length));
|
|
1949
|
+
console.log(` ${dim("\u256D")}${dim(titleStr)}${dim(topPad)}${dim("\u256E")}`);
|
|
1950
|
+
for (const line of lines) {
|
|
1951
|
+
const stripped = stripAnsi(line);
|
|
1952
|
+
const pad = Math.max(0, inner - stripped.length - 2);
|
|
1953
|
+
console.log(` ${dim("\u2502")} ${line}${" ".repeat(pad)}${dim("\u2502")}`);
|
|
1954
|
+
}
|
|
1955
|
+
console.log(` ${dim("\u2570")}${dim("\u2500".repeat(inner))}${dim("\u256F")}`);
|
|
1956
|
+
}
|
|
1957
|
+
function stripAnsi(s) {
|
|
1958
|
+
return s.replace(/\x1b\[[0-9;]*m/g, "");
|
|
1959
|
+
}
|
|
1960
|
+
function printBanner(config, outputDir) {
|
|
1961
|
+
const inner = BOX_W - 2;
|
|
1962
|
+
const title = "FLOTorch Load Tester";
|
|
1963
|
+
const titlePad = Math.floor((inner - title.length) / 2);
|
|
1964
|
+
console.log();
|
|
1965
|
+
console.log(` ${dim("\u256D")}${dim("\u2500".repeat(inner))}${dim("\u256E")}`);
|
|
1966
|
+
console.log(` ${dim("\u2502")}${" ".repeat(titlePad)}${bold(title)}${" ".repeat(inner - titlePad - title.length)}${dim("\u2502")}`);
|
|
1967
|
+
console.log(` ${dim("\u2570")}${dim("\u2500".repeat(inner))}${dim("\u256F")}`);
|
|
1968
|
+
const maxReqs = config.benchmark.maxRequests;
|
|
1969
|
+
const maxDur = config.benchmark.maxDuration;
|
|
1970
|
+
const rows = [
|
|
1971
|
+
[dim("Model"), bold(config.provider.model)],
|
|
1972
|
+
[dim("Concurrency"), bold(String(config.benchmark.concurrency))],
|
|
1973
|
+
[dim("Streaming"), bold(config.benchmark.streaming ? "yes" : "no")]
|
|
1974
|
+
];
|
|
1975
|
+
if (maxReqs) rows.push([dim("Requests"), bold(String(maxReqs))]);
|
|
1976
|
+
if (maxDur) rows.push([dim("Duration"), bold(`${maxDur}s`)]);
|
|
1977
|
+
rows.push([dim("Output"), cyan(outputDir)]);
|
|
1978
|
+
const labelW = 14;
|
|
1979
|
+
for (const [label, value] of rows) {
|
|
1980
|
+
const stripped = stripAnsi(label);
|
|
1981
|
+
const pad = " ".repeat(Math.max(0, labelW - stripped.length));
|
|
1982
|
+
console.log(` ${label}${pad}${value}`);
|
|
1983
|
+
}
|
|
1984
|
+
console.log();
|
|
1985
|
+
}
|
|
1986
|
+
function stageOk(msg, detail) {
|
|
1987
|
+
const suffix = detail ? ` ${dim(detail)}` : "";
|
|
1988
|
+
console.log(` ${green("\u2714")} ${msg}${suffix}`);
|
|
1989
|
+
}
|
|
1990
|
+
function stageRun(msg) {
|
|
1991
|
+
console.log(` ${bold("\u25B8")} ${msg}`);
|
|
1992
|
+
}
|
|
1993
|
+
function fmtNum(n, w) {
|
|
1994
|
+
const s = n >= 100 ? n.toFixed(0) : n >= 10 ? n.toFixed(1) : n.toFixed(1);
|
|
1995
|
+
return s.padStart(w);
|
|
1996
|
+
}
|
|
1997
|
+
function fmtMetricTable(label, agg, labelW, colW) {
|
|
1998
|
+
const l = label.padEnd(labelW);
|
|
1999
|
+
return `${bold(l)}${fmtNum(agg.mean, colW)}${fmtNum(agg.p50, colW)}${fmtNum(agg.p95, colW)}${fmtNum(agg.p99, colW)}${fmtNum(agg.max, colW)}`;
|
|
2000
|
+
}
|
|
2001
|
+
function printSummary(summary) {
|
|
2002
|
+
const durationSec = (summary.endTime - summary.startTime) / 1e3;
|
|
2003
|
+
const errColor = summary.failedRequests > 0 ? red : green;
|
|
2004
|
+
console.log();
|
|
2005
|
+
printBox("Summary", [
|
|
2006
|
+
`${dim("Duration")} ${bold(durationSec.toFixed(1) + "s")}`,
|
|
2007
|
+
`${dim("Requests")} ${bold(String(summary.successfulRequests))}/${summary.totalRequests} ${errColor(`(${summary.failedRequests} errors)`)}`,
|
|
2008
|
+
`${dim("Throughput")} ${bold(summary.rpm.toFixed(1))} ${dim("req/min")} ${bold(summary.overallTps.toFixed(1))} ${dim("tok/s")}`
|
|
2009
|
+
]);
|
|
2010
|
+
const labelW = 6;
|
|
2011
|
+
const colW = 9;
|
|
2012
|
+
const header = dim(" ".repeat(labelW)) + dim("mean".padStart(colW)) + dim("p50".padStart(colW)) + dim("p95".padStart(colW)) + dim("p99".padStart(colW)) + dim("max".padStart(colW));
|
|
2013
|
+
const latencyLines = [
|
|
2014
|
+
header,
|
|
2015
|
+
fmtMetricTable("TTFT", summary.ttft, labelW, colW),
|
|
2016
|
+
fmtMetricTable("E2E", summary.e2eLatency, labelW, colW),
|
|
2017
|
+
fmtMetricTable("ITL", summary.interTokenLatency, labelW, colW)
|
|
2018
|
+
];
|
|
2019
|
+
console.log();
|
|
2020
|
+
printBox("Latency (ms)", latencyLines);
|
|
2021
|
+
if (Object.keys(summary.errorCodeFrequency).length > 0) {
|
|
2022
|
+
console.log();
|
|
2023
|
+
const errLines = [];
|
|
2024
|
+
for (const [code, count] of Object.entries(summary.errorCodeFrequency)) {
|
|
2025
|
+
errLines.push(`${red(code)} ${bold(String(count))}`);
|
|
2026
|
+
}
|
|
2027
|
+
printBox("Errors", errLines);
|
|
2028
|
+
}
|
|
2029
|
+
if (Object.keys(summary.phaseBreakdown).length > 1) {
|
|
2030
|
+
console.log();
|
|
2031
|
+
const phaseLines = [];
|
|
2032
|
+
for (const [phase, data] of Object.entries(summary.phaseBreakdown)) {
|
|
2033
|
+
phaseLines.push(
|
|
2034
|
+
`${bold(phase.padEnd(12))} ${String(data.requests).padStart(4)} reqs ${(data.errorRate * 100).toFixed(1)}% err`
|
|
2035
|
+
);
|
|
2036
|
+
}
|
|
2037
|
+
printBox("Phases", phaseLines);
|
|
2038
|
+
}
|
|
2039
|
+
}
|
|
2040
|
+
async function runFullPipeline(config, outputDir) {
|
|
2041
|
+
printBanner(config, outputDir);
|
|
2042
|
+
stageRun("Generating prompts...");
|
|
2043
|
+
const prompts = generatePrompts(config);
|
|
2044
|
+
process.stdout.write("\x1B[1A\x1B[2K");
|
|
2045
|
+
stageOk("Generating prompts", `${prompts.length} prompts`);
|
|
2046
|
+
mkdirSync5(outputDir, { recursive: true });
|
|
2047
|
+
writeFileSync6(
|
|
2048
|
+
join5(outputDir, "prompts.jsonl"),
|
|
2049
|
+
prompts.map((p) => JSON.stringify(p)).join("\n") + "\n"
|
|
2050
|
+
);
|
|
2051
|
+
stageRun("Running benchmark...");
|
|
2052
|
+
const { controller, onShutdown } = createAbortController();
|
|
2053
|
+
const totalTarget = config.benchmark.maxRequests ?? Infinity;
|
|
2054
|
+
const progress = new ProgressDisplay({
|
|
2055
|
+
totalTarget,
|
|
2056
|
+
maxConcurrency: config.benchmark.concurrency,
|
|
2057
|
+
modelName: config.provider.model,
|
|
2058
|
+
streaming: config.benchmark.streaming
|
|
2059
|
+
});
|
|
2060
|
+
progress.setStage("benchmarking");
|
|
2061
|
+
const backend = createBackend(config);
|
|
2062
|
+
const orchestrator = new ConcurrencyOrchestrator(
|
|
2063
|
+
config,
|
|
2064
|
+
backend,
|
|
2065
|
+
prompts,
|
|
2066
|
+
outputDir,
|
|
2067
|
+
(metrics, active, completed, phase, allowedConcurrency) => progress.update(metrics, active, completed, phase, allowedConcurrency)
|
|
2068
|
+
);
|
|
2069
|
+
onShutdown(() => orchestrator.abort());
|
|
2070
|
+
await progress.start();
|
|
2071
|
+
const results = await orchestrator.run(controller.signal);
|
|
2072
|
+
progress.stop();
|
|
2073
|
+
stageOk("Running benchmark", `${results.length} requests`);
|
|
2074
|
+
stageRun("Generating report...");
|
|
2075
|
+
const summary = computeSummary(results);
|
|
2076
|
+
const exporters = createExporters(config);
|
|
2077
|
+
for (const exporter of exporters) {
|
|
2078
|
+
await exporter.export(summary, results, outputDir);
|
|
2079
|
+
}
|
|
2080
|
+
process.stdout.write("\x1B[1A\x1B[2K");
|
|
2081
|
+
stageOk("Generating report");
|
|
2082
|
+
printSummary(summary);
|
|
2083
|
+
console.log(`
|
|
2084
|
+
${dim("Results saved to:")} ${cyan(outputDir)}
|
|
2085
|
+
`);
|
|
2086
|
+
}
|
|
2087
|
+
async function runGenerate(config, outputDir) {
|
|
2088
|
+
printBanner(config, outputDir);
|
|
2089
|
+
stageRun("Generating prompts...");
|
|
2090
|
+
const prompts = generatePrompts(config);
|
|
2091
|
+
mkdirSync5(outputDir, { recursive: true });
|
|
2092
|
+
const outPath = join5(outputDir, "prompts.jsonl");
|
|
2093
|
+
writeFileSync6(outPath, prompts.map((p) => JSON.stringify(p)).join("\n") + "\n");
|
|
2094
|
+
process.stdout.write("\x1B[1A\x1B[2K");
|
|
2095
|
+
stageOk("Generating prompts", `${prompts.length} \u2192 ${cyan(outPath)}`);
|
|
2096
|
+
}
|
|
2097
|
+
async function runBench(config, outputDir) {
|
|
2098
|
+
const inputFile = config.benchmark.inputFile;
|
|
2099
|
+
if (!inputFile) {
|
|
2100
|
+
throw new Error("bench command requires benchmark.inputFile (path to prompts.jsonl)");
|
|
2101
|
+
}
|
|
2102
|
+
printBanner(config, outputDir);
|
|
2103
|
+
stageRun(`Loading prompts from ${cyan(inputFile)}...`);
|
|
2104
|
+
const { FileGenerator: FileGenerator2 } = await Promise.resolve().then(() => (init_file(), file_exports));
|
|
2105
|
+
const gen = new FileGenerator2(inputFile);
|
|
2106
|
+
const count = config.benchmark.maxRequests ?? 100;
|
|
2107
|
+
const prompts = gen.generate(count);
|
|
2108
|
+
process.stdout.write("\x1B[1A\x1B[2K");
|
|
2109
|
+
stageOk("Loading prompts", `${prompts.length} prompts`);
|
|
2110
|
+
stageRun("Running benchmark...");
|
|
2111
|
+
const { controller, onShutdown } = createAbortController();
|
|
2112
|
+
const progress = new ProgressDisplay({
|
|
2113
|
+
totalTarget: count,
|
|
2114
|
+
maxConcurrency: config.benchmark.concurrency,
|
|
2115
|
+
modelName: config.provider.model,
|
|
2116
|
+
streaming: config.benchmark.streaming
|
|
2117
|
+
});
|
|
2118
|
+
progress.setStage("benchmarking");
|
|
2119
|
+
const backend = createBackend(config);
|
|
2120
|
+
const orchestrator = new ConcurrencyOrchestrator(
|
|
2121
|
+
config,
|
|
2122
|
+
backend,
|
|
2123
|
+
prompts,
|
|
2124
|
+
outputDir,
|
|
2125
|
+
(metrics, active, completed, phase, allowedConcurrency) => progress.update(metrics, active, completed, phase, allowedConcurrency)
|
|
2126
|
+
);
|
|
2127
|
+
mkdirSync5(outputDir, { recursive: true });
|
|
2128
|
+
onShutdown(() => orchestrator.abort());
|
|
2129
|
+
await progress.start();
|
|
2130
|
+
const results = await orchestrator.run(controller.signal);
|
|
2131
|
+
progress.stop();
|
|
2132
|
+
stageOk("Running benchmark", `${results.length} requests \u2192 ${cyan(outputDir)}`);
|
|
2133
|
+
}
|
|
2134
|
+
async function runReport(config) {
|
|
2135
|
+
const inputDir = config.benchmark.inputFile;
|
|
2136
|
+
if (!inputDir) {
|
|
2137
|
+
throw new Error(
|
|
2138
|
+
"report command requires benchmark.inputFile (path to run output dir containing run_log.jsonl)"
|
|
2139
|
+
);
|
|
2140
|
+
}
|
|
2141
|
+
stageRun(`Reading results from ${cyan(inputDir)}...`);
|
|
2142
|
+
const results = WAL.readLog(inputDir);
|
|
2143
|
+
process.stdout.write("\x1B[1A\x1B[2K");
|
|
2144
|
+
stageOk("Reading results", `${results.length} entries`);
|
|
2145
|
+
stageRun("Computing report...");
|
|
2146
|
+
const summary = computeSummary(results);
|
|
2147
|
+
const exporters = createExporters(config);
|
|
2148
|
+
for (const exporter of exporters) {
|
|
2149
|
+
await exporter.export(summary, results, inputDir);
|
|
2150
|
+
}
|
|
2151
|
+
process.stdout.write("\x1B[1A\x1B[2K");
|
|
2152
|
+
stageOk("Computing report");
|
|
2153
|
+
printSummary(summary);
|
|
2154
|
+
}
|
|
2155
|
+
function generatePrompts(config) {
|
|
2156
|
+
const generator = createGenerator(config);
|
|
2157
|
+
const count = config.benchmark.maxRequests ?? 100;
|
|
2158
|
+
return generator.generate(count);
|
|
2159
|
+
}
|
|
2160
|
+
main().catch((err) => {
|
|
2161
|
+
console.error(red(err instanceof Error ? err.message : String(err)));
|
|
2162
|
+
process.exit(1);
|
|
2163
|
+
});
|