@flotorch/loadtest 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +15 -0
  2. package/dist/index.js +2163 -0
  3. package/package.json +45 -0
package/dist/index.js ADDED
@@ -0,0 +1,2163 @@
1
+ #!/usr/bin/env node
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropNames = Object.getOwnPropertyNames;
4
+ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
5
+ get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
6
+ }) : x)(function(x) {
7
+ if (typeof require !== "undefined") return require.apply(this, arguments);
8
+ throw Error('Dynamic require of "' + x + '" is not supported');
9
+ });
10
+ var __esm = (fn, res) => function __init() {
11
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
12
+ };
13
+ var __export = (target, all) => {
14
+ for (var name in all)
15
+ __defProp(target, name, { get: all[name], enumerable: true });
16
+ };
17
+
18
+ // node_modules/.pnpm/tsup@8.5.1_tsx@4.21.0_typescript@5.9.3/node_modules/tsup/assets/esm_shims.js
19
+ import path from "path";
20
+ import { fileURLToPath } from "url";
21
+ var init_esm_shims = __esm({
22
+ "node_modules/.pnpm/tsup@8.5.1_tsx@4.21.0_typescript@5.9.3/node_modules/tsup/assets/esm_shims.js"() {
23
+ "use strict";
24
+ }
25
+ });
26
+
27
+ // src/generator/tokenizer.ts
28
+ import { encode } from "gpt-tokenizer";
29
+ function countTokens(text) {
30
+ return encode(text).length;
31
+ }
32
+ var init_tokenizer = __esm({
33
+ "src/generator/tokenizer.ts"() {
34
+ "use strict";
35
+ init_esm_shims();
36
+ }
37
+ });
38
+
39
+ // src/generator/file.ts
40
+ var file_exports = {};
41
+ __export(file_exports, {
42
+ FileGenerator: () => FileGenerator
43
+ });
44
+ import { readFileSync as readFileSync3 } from "fs";
45
+ var FileGenerator;
46
+ var init_file = __esm({
47
+ "src/generator/file.ts"() {
48
+ "use strict";
49
+ init_esm_shims();
50
+ init_tokenizer();
51
+ FileGenerator = class {
52
+ records;
53
+ constructor(filePath) {
54
+ const content = readFileSync3(filePath, "utf-8");
55
+ this.records = content.split("\n").filter((line) => line.trim().length > 0).map((line) => {
56
+ const parsed = JSON.parse(line);
57
+ const text = parsed.text ?? parsed.prompt ?? "";
58
+ return {
59
+ text,
60
+ tokenCount: parsed.tokenCount ?? countTokens(text),
61
+ outputTokenTarget: parsed.outputTokenTarget ?? parsed.max_tokens ?? 256
62
+ };
63
+ });
64
+ }
65
+ generate(count) {
66
+ const results = [];
67
+ for (let i = 0; i < count; i++) {
68
+ results.push(this.records[i % this.records.length]);
69
+ }
70
+ return results;
71
+ }
72
+ generateOne(_targetInputTokens, targetOutputTokens) {
73
+ const record = this.records[Math.floor(Math.random() * this.records.length)];
74
+ return { ...record, outputTokenTarget: targetOutputTokens };
75
+ }
76
+ };
77
+ }
78
+ });
79
+
80
+ // src/cli/ui/components/header.tsx
81
+ import { Box, Text } from "ink";
82
+ import { jsx, jsxs } from "react/jsx-runtime";
83
+ function Header({ modelName, maxConcurrency, streaming, totalTarget }) {
84
+ const targetStr = totalTarget === Infinity ? "\u221E" : String(totalTarget);
85
+ return /* @__PURE__ */ jsxs(Box, { flexDirection: "column", children: [
86
+ /* @__PURE__ */ jsx(Text, { bold: true, color: "cyan", children: "FLOTorch Load Tester" }),
87
+ /* @__PURE__ */ jsxs(Text, { children: [
88
+ /* @__PURE__ */ jsx(Text, { dimColor: true, children: "Model: " }),
89
+ /* @__PURE__ */ jsx(Text, { bold: true, children: modelName }),
90
+ /* @__PURE__ */ jsx(Text, { dimColor: true, children: " Concurrency: " }),
91
+ /* @__PURE__ */ jsx(Text, { bold: true, children: maxConcurrency }),
92
+ /* @__PURE__ */ jsx(Text, { dimColor: true, children: " Streaming: " }),
93
+ /* @__PURE__ */ jsx(Text, { bold: true, children: streaming ? "yes" : "no" }),
94
+ /* @__PURE__ */ jsx(Text, { dimColor: true, children: " Requests: " }),
95
+ /* @__PURE__ */ jsx(Text, { bold: true, children: targetStr })
96
+ ] })
97
+ ] });
98
+ }
99
+ var init_header = __esm({
100
+ "src/cli/ui/components/header.tsx"() {
101
+ "use strict";
102
+ init_esm_shims();
103
+ }
104
+ });
105
+
106
+ // src/cli/ui/components/phase-bar.tsx
107
+ import { Box as Box2, Text as Text2 } from "ink";
108
+ import { jsx as jsx2, jsxs as jsxs2 } from "react/jsx-runtime";
109
+ function PhaseBar({ phase, activeSlots, allowedConcurrency }) {
110
+ const color = phaseColors[phase];
111
+ const barWidth = 20;
112
+ const filled = allowedConcurrency > 0 ? Math.round(activeSlots / allowedConcurrency * barWidth) : 0;
113
+ const empty = barWidth - filled;
114
+ return /* @__PURE__ */ jsx2(Box2, { children: /* @__PURE__ */ jsxs2(Text2, { children: [
115
+ /* @__PURE__ */ jsx2(Text2, { dimColor: true, children: "Phase: " }),
116
+ /* @__PURE__ */ jsx2(Text2, { color, bold: true, children: "\u25CF " }),
117
+ /* @__PURE__ */ jsx2(Text2, { color, children: phaseLabels[phase] }),
118
+ /* @__PURE__ */ jsx2(Text2, { dimColor: true, children: " Slots: " }),
119
+ /* @__PURE__ */ jsx2(Text2, { color: "cyan", children: "\u2588".repeat(filled) }),
120
+ /* @__PURE__ */ jsx2(Text2, { dimColor: true, children: "\u2591".repeat(empty) }),
121
+ /* @__PURE__ */ jsxs2(Text2, { children: [
122
+ " ",
123
+ activeSlots,
124
+ "/",
125
+ allowedConcurrency
126
+ ] })
127
+ ] }) });
128
+ }
129
+ var phaseColors, phaseLabels;
130
+ var init_phase_bar = __esm({
131
+ "src/cli/ui/components/phase-bar.tsx"() {
132
+ "use strict";
133
+ init_esm_shims();
134
+ phaseColors = {
135
+ "ramp-up": "yellow",
136
+ steady: "green",
137
+ "ramp-down": "magenta"
138
+ };
139
+ phaseLabels = {
140
+ "ramp-up": "Ramp Up",
141
+ steady: "Steady State",
142
+ "ramp-down": "Ramp Down"
143
+ };
144
+ }
145
+ });
146
+
147
+ // src/cli/ui/components/progress-bar.tsx
148
+ import { Box as Box3, Text as Text3 } from "ink";
149
+ import { jsx as jsx3, jsxs as jsxs3 } from "react/jsx-runtime";
150
+ function formatDuration2(seconds) {
151
+ const m = Math.floor(seconds / 60);
152
+ const s = Math.floor(seconds % 60);
153
+ return m > 0 ? `${m}m${s.toString().padStart(2, "0")}s` : `${s}s`;
154
+ }
155
+ function ProgressBar({ completed, totalTarget, elapsedMs, rps }) {
156
+ const barWidth = 30;
157
+ const isInfinite = totalTarget === Infinity || totalTarget <= 0;
158
+ const pct = isInfinite ? 0 : Math.min(1, completed / totalTarget);
159
+ const filled = Math.round(pct * barWidth);
160
+ const empty = barWidth - filled;
161
+ const elapsedSec = elapsedMs / 1e3;
162
+ let eta = "";
163
+ if (!isInfinite && rps > 0) {
164
+ const remaining = (totalTarget - completed) / rps;
165
+ eta = `ETA: ${formatDuration2(remaining)}`;
166
+ }
167
+ const targetStr = isInfinite ? "?" : String(totalTarget);
168
+ const pctStr = isInfinite ? "" : ` (${(pct * 100).toFixed(1)}%)`;
169
+ return /* @__PURE__ */ jsxs3(Box3, { flexDirection: "column", children: [
170
+ /* @__PURE__ */ jsxs3(Text3, { children: [
171
+ /* @__PURE__ */ jsx3(Text3, { dimColor: true, children: "Progress " }),
172
+ /* @__PURE__ */ jsx3(Text3, { color: "green", children: "\u2588".repeat(filled) }),
173
+ /* @__PURE__ */ jsx3(Text3, { dimColor: true, children: "\u2591".repeat(empty) }),
174
+ /* @__PURE__ */ jsxs3(Text3, { children: [
175
+ " ",
176
+ completed,
177
+ "/",
178
+ targetStr,
179
+ pctStr
180
+ ] })
181
+ ] }),
182
+ /* @__PURE__ */ jsxs3(Text3, { dimColor: true, children: [
183
+ "Elapsed: ",
184
+ formatDuration2(elapsedSec),
185
+ " ",
186
+ rps.toFixed(1),
187
+ " req/s",
188
+ " ",
189
+ eta
190
+ ] })
191
+ ] });
192
+ }
193
+ var init_progress_bar = __esm({
194
+ "src/cli/ui/components/progress-bar.tsx"() {
195
+ "use strict";
196
+ init_esm_shims();
197
+ }
198
+ });
199
+
200
+ // src/cli/ui/components/stats-panel.tsx
201
+ import { Box as Box4, Text as Text4 } from "ink";
202
+ import { jsx as jsx4, jsxs as jsxs4 } from "react/jsx-runtime";
203
+ function percentile(sorted, p) {
204
+ if (sorted.length === 0) return 0;
205
+ const idx = Math.ceil(p / 100 * sorted.length) - 1;
206
+ return sorted[Math.max(0, idx)];
207
+ }
208
+ function mean2(arr) {
209
+ if (arr.length === 0) return 0;
210
+ let sum = 0;
211
+ for (const v of arr) sum += v;
212
+ return sum / arr.length;
213
+ }
214
+ function fmtMs(ms) {
215
+ if (ms >= 1e3) return `${(ms / 1e3).toFixed(1)}s`;
216
+ return `${Math.round(ms)}ms`;
217
+ }
218
+ function StatsPanel({
219
+ rps,
220
+ outputTps,
221
+ inputTps,
222
+ recentTtft,
223
+ recentE2eLatency,
224
+ errors,
225
+ completed
226
+ }) {
227
+ const sortedTtft = [...recentTtft].sort((a, b) => a - b);
228
+ const sortedE2e = [...recentE2eLatency].sort((a, b) => a - b);
229
+ const errRate = completed > 0 ? (errors / completed * 100).toFixed(1) : "0.0";
230
+ const labelW = 14;
231
+ const valW = 10;
232
+ return /* @__PURE__ */ jsxs4(Box4, { flexDirection: "column", children: [
233
+ /* @__PURE__ */ jsx4(Text4, { bold: true, dimColor: true, children: "Live Stats" }),
234
+ /* @__PURE__ */ jsxs4(Box4, { children: [
235
+ /* @__PURE__ */ jsx4(Box4, { width: labelW, children: /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "Requests/s" }) }),
236
+ /* @__PURE__ */ jsx4(Box4, { width: valW, children: /* @__PURE__ */ jsx4(Text4, { bold: true, children: rps.toFixed(1) }) }),
237
+ /* @__PURE__ */ jsx4(Box4, { width: labelW, children: /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "Out tok/s" }) }),
238
+ /* @__PURE__ */ jsx4(Box4, { width: valW, children: /* @__PURE__ */ jsx4(Text4, { bold: true, children: outputTps.toFixed(0) }) }),
239
+ /* @__PURE__ */ jsx4(Box4, { width: labelW, children: /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "In tok/s" }) }),
240
+ /* @__PURE__ */ jsx4(Box4, { width: valW, children: /* @__PURE__ */ jsx4(Text4, { bold: true, children: inputTps.toFixed(0) }) })
241
+ ] }),
242
+ /* @__PURE__ */ jsxs4(Box4, { children: [
243
+ /* @__PURE__ */ jsx4(Box4, { width: labelW, children: /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "TTFT" }) }),
244
+ /* @__PURE__ */ jsxs4(Text4, { children: [
245
+ /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "mean=" }),
246
+ /* @__PURE__ */ jsx4(Text4, { children: fmtMs(mean2(sortedTtft)) }),
247
+ /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: " p50=" }),
248
+ /* @__PURE__ */ jsx4(Text4, { children: fmtMs(percentile(sortedTtft, 50)) }),
249
+ /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: " p95=" }),
250
+ /* @__PURE__ */ jsx4(Text4, { children: fmtMs(percentile(sortedTtft, 95)) })
251
+ ] })
252
+ ] }),
253
+ /* @__PURE__ */ jsxs4(Box4, { children: [
254
+ /* @__PURE__ */ jsx4(Box4, { width: labelW, children: /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "E2E" }) }),
255
+ /* @__PURE__ */ jsxs4(Text4, { children: [
256
+ /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "mean=" }),
257
+ /* @__PURE__ */ jsx4(Text4, { children: fmtMs(mean2(sortedE2e)) }),
258
+ /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: " p50=" }),
259
+ /* @__PURE__ */ jsx4(Text4, { children: fmtMs(percentile(sortedE2e, 50)) }),
260
+ /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: " p95=" }),
261
+ /* @__PURE__ */ jsx4(Text4, { children: fmtMs(percentile(sortedE2e, 95)) })
262
+ ] })
263
+ ] }),
264
+ /* @__PURE__ */ jsxs4(Box4, { children: [
265
+ /* @__PURE__ */ jsx4(Box4, { width: labelW, children: /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "Errors" }) }),
266
+ /* @__PURE__ */ jsxs4(Text4, { color: errors > 0 ? "red" : void 0, bold: errors > 0, children: [
267
+ errors,
268
+ " (",
269
+ errRate,
270
+ "%)"
271
+ ] })
272
+ ] })
273
+ ] });
274
+ }
275
+ var init_stats_panel = __esm({
276
+ "src/cli/ui/components/stats-panel.tsx"() {
277
+ "use strict";
278
+ init_esm_shims();
279
+ }
280
+ });
281
+
282
+ // src/cli/ui/components/error-panel.tsx
283
+ import { Box as Box5, Text as Text5 } from "ink";
284
+ import { jsx as jsx5, jsxs as jsxs5 } from "react/jsx-runtime";
285
+ function ErrorPanel({ recentErrors }) {
286
+ if (recentErrors.length === 0) return null;
287
+ return /* @__PURE__ */ jsxs5(Box5, { flexDirection: "column", marginTop: 1, children: [
288
+ /* @__PURE__ */ jsx5(Text5, { color: "red", bold: true, children: "Recent Errors:" }),
289
+ recentErrors.map((err, i) => /* @__PURE__ */ jsxs5(Text5, { color: "red", dimColor: true, children: [
290
+ " ",
291
+ " ",
292
+ err.length > 80 ? err.slice(0, 77) + "..." : err
293
+ ] }, i))
294
+ ] });
295
+ }
296
+ var init_error_panel = __esm({
297
+ "src/cli/ui/components/error-panel.tsx"() {
298
+ "use strict";
299
+ init_esm_shims();
300
+ }
301
+ });
302
+
303
+ // src/cli/ui/app.tsx
304
+ var app_exports = {};
305
+ __export(app_exports, {
306
+ App: () => App
307
+ });
308
+ import { useState, useEffect } from "react";
309
+ import { Box as Box6 } from "ink";
310
+ import { jsx as jsx6, jsxs as jsxs6 } from "react/jsx-runtime";
311
+ function App({ store }) {
312
+ const [snap, setSnap] = useState(() => store.snapshot());
313
+ useEffect(() => {
314
+ const id = setInterval(() => {
315
+ setSnap(store.snapshot());
316
+ }, 200);
317
+ return () => clearInterval(id);
318
+ }, [store]);
319
+ const elapsedMs = snap.startTime > 0 ? performance.now() - snap.startTime : 0;
320
+ const elapsedSec = elapsedMs / 1e3;
321
+ const rps = elapsedSec > 0 ? snap.completed / elapsedSec : 0;
322
+ const outputTps = elapsedSec > 0 ? snap.totalOutputTokens / elapsedSec : 0;
323
+ const inputTps = elapsedSec > 0 ? snap.totalInputTokens / elapsedSec : 0;
324
+ return /* @__PURE__ */ jsxs6(Box6, { flexDirection: "column", paddingLeft: 2, children: [
325
+ /* @__PURE__ */ jsx6(
326
+ Header,
327
+ {
328
+ modelName: snap.modelName,
329
+ maxConcurrency: snap.maxConcurrency,
330
+ streaming: snap.streaming,
331
+ totalTarget: snap.totalTarget
332
+ }
333
+ ),
334
+ /* @__PURE__ */ jsx6(Box6, { marginTop: 1, children: /* @__PURE__ */ jsx6(
335
+ PhaseBar,
336
+ {
337
+ phase: snap.phase,
338
+ activeSlots: snap.activeSlots,
339
+ allowedConcurrency: snap.allowedConcurrency
340
+ }
341
+ ) }),
342
+ /* @__PURE__ */ jsx6(Box6, { marginTop: 1, flexDirection: "column", children: /* @__PURE__ */ jsx6(
343
+ ProgressBar,
344
+ {
345
+ completed: snap.completed,
346
+ totalTarget: snap.totalTarget,
347
+ elapsedMs,
348
+ rps
349
+ }
350
+ ) }),
351
+ /* @__PURE__ */ jsx6(Box6, { marginTop: 1, children: /* @__PURE__ */ jsx6(
352
+ StatsPanel,
353
+ {
354
+ rps,
355
+ outputTps,
356
+ inputTps,
357
+ recentTtft: snap.recentTtft,
358
+ recentE2eLatency: snap.recentE2eLatency,
359
+ errors: snap.errors,
360
+ completed: snap.completed
361
+ }
362
+ ) }),
363
+ /* @__PURE__ */ jsx6(ErrorPanel, { recentErrors: snap.recentErrors })
364
+ ] });
365
+ }
366
+ var init_app = __esm({
367
+ "src/cli/ui/app.tsx"() {
368
+ "use strict";
369
+ init_esm_shims();
370
+ init_header();
371
+ init_phase_bar();
372
+ init_progress_bar();
373
+ init_stats_panel();
374
+ init_error_panel();
375
+ }
376
+ });
377
+
378
+ // index.ts
379
+ init_esm_shims();
380
+
381
+ // src/cli/args.ts
382
+ init_esm_shims();
383
+ import { parseArgs } from "util";
384
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
385
+ import { join } from "path";
386
+
387
+ // src/schemas/config.zod.ts
388
+ init_esm_shims();
389
+ import { z } from "zod";
390
+ var rampSchema = z.object({
391
+ requests: z.number().optional(),
392
+ duration: z.number().optional()
393
+ }).refine((d) => d.requests || d.duration, {
394
+ message: "At least one of requests or duration required"
395
+ });
396
+ var ConfigSchema = z.object({
397
+ generator: z.object({
398
+ enabled: z.boolean().default(false),
399
+ prompt: z.string().optional(),
400
+ corpus: z.string().optional()
401
+ }).default(() => ({ enabled: false })),
402
+ benchmark: z.object({
403
+ inputFile: z.string().optional(),
404
+ outputDir: z.string().default("./results"),
405
+ inputTokens: z.object({
406
+ mean: z.number(),
407
+ stddev: z.number().optional()
408
+ }),
409
+ outputTokens: z.object({
410
+ mean: z.number(),
411
+ stddev: z.number().optional()
412
+ }),
413
+ maxRequests: z.number().optional(),
414
+ maxDuration: z.number().optional(),
415
+ timeout: z.number().default(600),
416
+ concurrency: z.number(),
417
+ rampUp: rampSchema.optional(),
418
+ rampDown: rampSchema.optional(),
419
+ cachePercentage: z.number().min(0).max(100).default(0),
420
+ streaming: z.boolean().default(true)
421
+ }).refine((d) => d.maxRequests || d.maxDuration, {
422
+ message: "At least one of maxRequests or maxDuration required"
423
+ }),
424
+ provider: z.object({
425
+ adapter: z.enum(["openai", "sagemaker"]).default("openai"),
426
+ model: z.string(),
427
+ baseURL: z.string().optional(),
428
+ systemPrompt: z.string().optional(),
429
+ config: z.record(z.string(), z.any()).optional()
430
+ }),
431
+ reporter: z.object({
432
+ adapters: z.array(z.enum(["json", "csv"])).default(["json"])
433
+ }).default(() => ({ adapters: ["json"] }))
434
+ });
435
+ function validateEnv(schema, adapterName) {
436
+ const result = schema.safeParse(process.env);
437
+ if (!result.success) {
438
+ const errors = result.error.issues.map((i) => ` ${i.path.join(".")}: ${i.message}`).join("\n");
439
+ throw new Error(`Missing/invalid env vars for "${adapterName}" backend:
440
+ ${errors}`);
441
+ }
442
+ return result.data;
443
+ }
444
+
445
+ // src/cli/ansi.ts
446
+ init_esm_shims();
447
+ var enabled = process.env.NO_COLOR === void 0 && process.env.TERM !== "dumb" && (process.stdout.isTTY ?? false);
448
+ var wrap = (code, close) => enabled ? (s) => `\x1B[${code}m${s}\x1B[${close}m` : (s) => s;
449
+ var bold = wrap("1", "22");
450
+ var dim = wrap("2", "22");
451
+ var red = wrap("31", "39");
452
+ var green = wrap("32", "39");
453
+ var yellow = wrap("33", "39");
454
+ var cyan = wrap("36", "39");
455
+ var magenta = wrap("35", "39");
456
+
457
+ // src/cli/args.ts
458
+ var VALID_COMMANDS = /* @__PURE__ */ new Set(["run", "generate", "bench", "report", "init"]);
459
+ var HELP_TEXT = `
460
+ ${bold("FLOTorch Load Tester")}
461
+
462
+ ${yellow("USAGE")}
463
+ flotorch ${dim("<command>")} ${dim("[options]")}
464
+
465
+ ${yellow("COMMANDS")}
466
+ ${cyan("run")} ${dim("Run full pipeline: generate \u2192 bench \u2192 report (default)")}
467
+ ${cyan("generate")} ${dim("Generate prompts only")}
468
+ ${cyan("bench")} ${dim("Run benchmark using existing prompts")}
469
+ ${cyan("report")} ${dim("Generate report from existing results")}
470
+ ${cyan("init")} ${dim("[path]")} ${dim("Interactively create a config file (default: config.json)")}
471
+
472
+ ${yellow("OPTIONS")}
473
+ ${cyan("-c, --config")} ${dim("<path>")} Path to config JSON ${dim("(required for run/generate/bench/report)")}
474
+ ${cyan("--run-id")} ${dim("<id>")} Custom run ID ${dim("(default: ISO timestamp)")}
475
+ ${cyan("-m, --model")} ${dim("<name>")} Override provider.model
476
+ ${cyan("-n, --concurrency")} ${dim("<n>")} Override benchmark.concurrency
477
+ ${cyan("--max-requests")} ${dim("<n>")} Override benchmark.maxRequests
478
+ ${cyan("--max-duration")} ${dim("<n>")} Override benchmark.maxDuration ${dim("(seconds)")}
479
+ ${cyan("-o, --output-dir")} ${dim("<p>")} Override benchmark.outputDir
480
+ ${cyan("--base-url")} ${dim("<url>")} Override provider.baseURL
481
+ ${cyan("--streaming")} Enable streaming
482
+ ${cyan("--no-streaming")} Disable streaming
483
+ ${cyan("-h, --help")} Show this help message
484
+ `.trimStart();
485
+ function parseCliArgs(argv) {
486
+ const args = argv.slice(2);
487
+ let command = "run";
488
+ if (args.length > 0 && VALID_COMMANDS.has(args[0])) {
489
+ command = args.shift();
490
+ }
491
+ const { values, positionals } = parseArgs({
492
+ args,
493
+ options: {
494
+ config: { type: "string", short: "c" },
495
+ "run-id": { type: "string" },
496
+ model: { type: "string", short: "m" },
497
+ concurrency: { type: "string", short: "n" },
498
+ "max-requests": { type: "string" },
499
+ "max-duration": { type: "string" },
500
+ "output-dir": { type: "string", short: "o" },
501
+ "base-url": { type: "string" },
502
+ streaming: { type: "boolean" },
503
+ "no-streaming": { type: "boolean" },
504
+ help: { type: "boolean", short: "h" }
505
+ },
506
+ allowPositionals: true
507
+ });
508
+ if (values.help) {
509
+ console.log(HELP_TEXT);
510
+ process.exit(0);
511
+ }
512
+ if (command === "init") {
513
+ const initOutputPath = positionals[0] ?? "config.json";
514
+ return { command, configPath: "", runId: "", overrides: {}, initOutputPath };
515
+ }
516
+ const configPath = values.config;
517
+ if (!configPath) {
518
+ throw new Error("--config / -c is required");
519
+ }
520
+ const runId = values["run-id"] ?? (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
521
+ const overrides = collectOverrides(values);
522
+ return { command, configPath, runId, overrides };
523
+ }
524
+ function resolveConfig(configPath, runId, cliOverrides) {
525
+ if (!existsSync(configPath)) {
526
+ throw new Error(`Config file not found: ${configPath}`);
527
+ }
528
+ const rawConfig = JSON.parse(readFileSync(configPath, "utf-8"));
529
+ const benchRaw = rawConfig.benchmark ?? {};
530
+ const baseOutputDir = benchRaw.outputDir ?? "./results";
531
+ const outputDir = join(baseOutputDir, runId);
532
+ const savedOverrides = loadSavedOverrides(outputDir);
533
+ const merged = deepMerge(rawConfig, savedOverrides, cliOverrides);
534
+ const combinedOverrides = deepMerge(savedOverrides, cliOverrides);
535
+ mkdirSync(outputDir, { recursive: true });
536
+ saveOverrides(outputDir, combinedOverrides);
537
+ const result = ConfigSchema.safeParse(merged);
538
+ if (!result.success) {
539
+ const errors = result.error.issues.map((i) => ` ${i.path.join(".")}: ${i.message}`).join("\n");
540
+ throw new Error(`Invalid configuration:
541
+ ${errors}`);
542
+ }
543
+ writeFileSync(
544
+ join(outputDir, "config.resolved.json"),
545
+ JSON.stringify(result.data, null, 2) + "\n"
546
+ );
547
+ return { config: result.data, outputDir };
548
+ }
549
+ function collectOverrides(values) {
550
+ const overrides = {};
551
+ const benchmark = {};
552
+ const provider = {};
553
+ if (values.model) provider.model = values.model;
554
+ if (values["base-url"]) provider.baseURL = values["base-url"];
555
+ if (values.concurrency) benchmark.concurrency = Number(values.concurrency);
556
+ if (values["max-requests"]) benchmark.maxRequests = Number(values["max-requests"]);
557
+ if (values["max-duration"]) benchmark.maxDuration = Number(values["max-duration"]);
558
+ if (values["output-dir"]) benchmark.outputDir = values["output-dir"];
559
+ if (values.streaming === true) benchmark.streaming = true;
560
+ if (values["no-streaming"] === true) benchmark.streaming = false;
561
+ if (Object.keys(provider).length > 0) overrides.provider = provider;
562
+ if (Object.keys(benchmark).length > 0) overrides.benchmark = benchmark;
563
+ return overrides;
564
+ }
565
+ function loadSavedOverrides(outputDir) {
566
+ const path2 = join(outputDir, "overrides.json");
567
+ if (!existsSync(path2)) return {};
568
+ return JSON.parse(readFileSync(path2, "utf-8"));
569
+ }
570
+ function saveOverrides(outputDir, overrides) {
571
+ if (Object.keys(overrides).length === 0) return;
572
+ writeFileSync(join(outputDir, "overrides.json"), JSON.stringify(overrides, null, 2) + "\n");
573
+ }
574
+ function isPlainObject(val) {
575
+ return typeof val === "object" && val !== null && !Array.isArray(val);
576
+ }
577
+ function deepMerge(...sources) {
578
+ const result = {};
579
+ for (const source of sources) {
580
+ for (const key of Object.keys(source)) {
581
+ if (isPlainObject(result[key]) && isPlainObject(source[key])) {
582
+ result[key] = deepMerge(
583
+ result[key],
584
+ source[key]
585
+ );
586
+ } else {
587
+ result[key] = source[key];
588
+ }
589
+ }
590
+ }
591
+ return result;
592
+ }
593
+
594
+ // src/cli/init.ts
595
+ init_esm_shims();
596
+ import { createInterface } from "readline/promises";
597
+ import { writeFileSync as writeFileSync2, existsSync as existsSync2 } from "fs";
598
+ async function prompt(rl, question, defaultValue) {
599
+ const suffix = defaultValue != null ? ` (${defaultValue})` : "";
600
+ const answer = (await rl.question(`${question}${suffix}: `)).trim();
601
+ return answer || defaultValue || "";
602
+ }
603
+ async function runInit(outputPath) {
604
+ if (existsSync2(outputPath)) {
605
+ const rl2 = createInterface({ input: process.stdin, output: process.stdout });
606
+ const overwrite = await prompt(rl2, `${outputPath} already exists. Overwrite? [y/N]`, "n");
607
+ if (overwrite.toLowerCase() !== "y") {
608
+ rl2.close();
609
+ console.log("Aborted.");
610
+ return;
611
+ }
612
+ rl2.close();
613
+ }
614
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
615
+ console.log("\nFLOTorch Load Tester \u2014 Config Generator\n");
616
+ const adapter = await prompt(rl, "Provider adapter [openai/sagemaker]", "openai");
617
+ const model = await prompt(rl, "Model name (required)");
618
+ if (!model) {
619
+ rl.close();
620
+ throw new Error("Model name is required");
621
+ }
622
+ let baseURL;
623
+ if (adapter === "openai") {
624
+ const url = await prompt(rl, "Base URL", "https://api.openai.com/v1");
625
+ if (url !== "https://api.openai.com/v1") {
626
+ baseURL = url;
627
+ }
628
+ }
629
+ const concurrency = Number(await prompt(rl, "Concurrency", "10"));
630
+ const inputMean = Number(await prompt(rl, "Input tokens mean", "512"));
631
+ const outputMean = Number(await prompt(rl, "Output tokens mean", "256"));
632
+ const maxRequests = Number(await prompt(rl, "Max requests", "100"));
633
+ const streamingAnswer = await prompt(rl, "Streaming? [y/n]", "y");
634
+ const streaming = streamingAnswer.toLowerCase() === "y";
635
+ rl.close();
636
+ const config = {
637
+ provider: {
638
+ adapter,
639
+ model,
640
+ ...baseURL && { baseURL }
641
+ },
642
+ benchmark: {
643
+ concurrency,
644
+ inputTokens: { mean: inputMean },
645
+ outputTokens: { mean: outputMean },
646
+ maxRequests,
647
+ streaming,
648
+ outputDir: "./results",
649
+ timeout: 600,
650
+ cachePercentage: 0
651
+ },
652
+ generator: {
653
+ enabled: false
654
+ },
655
+ reporter: {
656
+ adapters: ["json"]
657
+ }
658
+ };
659
+ writeFileSync2(outputPath, JSON.stringify(config, null, 2) + "\n");
660
+ console.log(`
661
+ Config written to ${outputPath}`);
662
+ }
663
+
664
+ // src/generator/generator.ts
665
+ init_esm_shims();
666
+
667
+ // src/generator/synthetic.ts
668
+ init_esm_shims();
669
+ init_tokenizer();
670
+
671
+ // src/utils/random.ts
672
+ init_esm_shims();
673
+ function gaussianRandom(mean3, stddev2) {
674
+ let u1 = Math.random();
675
+ const u2 = Math.random();
676
+ while (u1 === 0) u1 = Math.random();
677
+ const z4 = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
678
+ return mean3 + z4 * stddev2;
679
+ }
680
+ function clampedGaussian(mean3, stddev2, min, max) {
681
+ const value = gaussianRandom(mean3, stddev2);
682
+ return Math.max(min, Math.min(max, Math.round(value)));
683
+ }
684
+
685
+ // src/generator/synthetic.ts
686
+ import { readFileSync as readFileSync2 } from "fs";
687
+
688
+ // src/generator/corpus/default.ts
689
+ init_esm_shims();
690
+ var default_default = `Shall I compare thee to a summer's day?
691
+ Thou art more lovely and more temperate:
692
+ Rough winds do shake the darling buds of May,
693
+ And summer's lease hath all too short a date:
694
+ Sometime too hot the eye of heaven shines,
695
+ And often is his gold complexion dimm'd;
696
+ And every fair from fair sometime declines,
697
+ By chance or nature's changing course untrimm'd;
698
+ But thy eternal summer shall not fade
699
+ Nor lose possession of that fair thou owest;
700
+ Nor shall Death brag thou wander'st in his shade,
701
+ When in eternal lines to time thou growest:
702
+ So long as men can breathe or eyes can see,
703
+ So long lives this and this gives life to thee.
704
+ Then let not winter's ragged hand deface
705
+ In thee thy summer, ere thou be distill'd:
706
+ Make sweet some vial; treasure thou some place
707
+ With beauty's treasure, ere it be self-kill'd.
708
+ That use is not forbidden usury,
709
+ Which happies those that pay the willing loan;
710
+ That's for thyself to breed another thee,
711
+ Or ten times happier, be it ten for one;
712
+ Ten times thyself were happier than thou art,
713
+ If ten of thine ten times refigured thee:
714
+ Then what could death do, if thou shouldst depart,
715
+ Leaving thee living in posterity?
716
+ Be not self-will'd, for thou art much too fair
717
+ To be death's conquest and make worms thine heir.
718
+ Where art thou, Muse, that thou forget'st so long
719
+ To speak of that which gives thee all thy might?
720
+ Spend'st thou thy fury on some worthless song,
721
+ Darkening thy power to lend base subjects light?
722
+ Return, forgetful Muse, and straight redeem
723
+ In gentle numbers time so idly spent;
724
+ Sing to the ear that doth thy lays esteem
725
+ And gives thy pen both skill and argument.
726
+ Rise, resty Muse, my love's sweet face survey,
727
+ If Time have any wrinkle graven there;
728
+ If any, be a satire to decay,
729
+ And make Time's spoils despised every where.
730
+ Give my love fame faster than Time wastes life;
731
+ So thou prevent'st his scythe and crooked knife.
732
+ My glass shall not persuade me I am old,
733
+ So long as youth and thou are of one date;
734
+ But when in thee time's furrows I behold,
735
+ Then look I death my days should expiate.
736
+ For all that beauty that doth cover thee
737
+ Is but the seemly raiment of my heart,
738
+ Which in thy breast doth live, as thine in me:
739
+ How can I then be elder than thou art?
740
+ O, therefore, love, be of thyself so wary
741
+ As I, not for myself, but for thee will;
742
+ Bearing thy heart, which I will keep so chary
743
+ As tender nurse her babe from faring ill.
744
+ Presume not on thy heart when mine is slain;
745
+ Thou gavest me thine, not to give back again.
746
+ So am I as the rich, whose blessed key
747
+ Can bring him to his sweet up-locked treasure,
748
+ The which he will not every hour survey,
749
+ For blunting the fine point of seldom pleasure.
750
+ Therefore are feasts so solemn and so rare,
751
+ Since, seldom coming, in the long year set,
752
+ Like stones of worth they thinly placed are,
753
+ Or captain jewels in the carcanet.
754
+ So is the time that keeps you as my chest,
755
+ Or as the wardrobe which the robe doth hide,
756
+ To make some special instant special blest,
757
+ By new unfolding his imprison'd pride.
758
+ Blessed are you, whose worthiness gives scope,
759
+ Being had, to triumph, being lack'd, to hope.
760
+ If there be nothing new, but that which is
761
+ Hath been before, how are our brains beguiled,
762
+ Which, labouring for invention, bear amiss
763
+ The second burden of a former child!
764
+ O, that record could with a backward look,
765
+ Even of five hundred courses of the sun,
766
+ Show me your image in some antique book,
767
+ Since mind at first in character was done!
768
+ That I might see what the old world could say
769
+ To this composed wonder of your frame;
770
+ Whether we are mended, or whether better they,
771
+ Or whether revolution be the same.
772
+ O, sure I am, the wits of former days
773
+ To subjects worse have given admiring praise.`;
774
+
775
+ // src/generator/synthetic.ts
776
+ var SyntheticGenerator = class {
777
+ lines;
778
+ config;
779
+ constructor(config) {
780
+ this.config = config;
781
+ const corpus = config.generator.corpus ? readFileSync2(config.generator.corpus, "utf-8") : default_default;
782
+ this.lines = corpus.split("\n").map((l) => l.trim()).filter((l) => l.length > 0);
783
+ }
784
+ generate(count) {
785
+ const records = [];
786
+ const inputMean = this.config.benchmark.inputTokens.mean;
787
+ const inputStddev = this.config.benchmark.inputTokens.stddev ?? inputMean * 0.1;
788
+ const outputMean = this.config.benchmark.outputTokens.mean;
789
+ const outputStddev = this.config.benchmark.outputTokens.stddev ?? outputMean * 0.1;
790
+ for (let i = 0; i < count; i++) {
791
+ const targetInput = clampedGaussian(inputMean, inputStddev, 1, inputMean * 3);
792
+ const targetOutput = clampedGaussian(outputMean, outputStddev, 1, outputMean * 3);
793
+ records.push(this.generateOne(targetInput, targetOutput));
794
+ }
795
+ return records;
796
+ }
797
+ generateOne(targetInputTokens, targetOutputTokens) {
798
+ const shuffled = [...this.lines].sort(() => Math.random() - 0.5);
799
+ let text = "";
800
+ let tokens = 0;
801
+ let lineIdx = 0;
802
+ while (tokens < targetInputTokens && lineIdx < shuffled.length) {
803
+ const candidate = text ? `${text}
804
+ ${shuffled[lineIdx]}` : shuffled[lineIdx];
805
+ const candidateTokens = countTokens(candidate);
806
+ if (candidateTokens > targetInputTokens && text.length > 0) break;
807
+ text = candidate;
808
+ tokens = candidateTokens;
809
+ lineIdx++;
810
+ }
811
+ while (tokens < targetInputTokens) {
812
+ const line = shuffled[lineIdx % shuffled.length];
813
+ const candidate = `${text}
814
+ ${line}`;
815
+ const candidateTokens = countTokens(candidate);
816
+ if (candidateTokens > targetInputTokens * 1.1) break;
817
+ text = candidate;
818
+ tokens = candidateTokens;
819
+ lineIdx++;
820
+ }
821
+ const suffix = this.config.generator.prompt ?? "";
822
+ const header = `Randomly stream lines from the following text with ${targetOutputTokens} output tokens. Don't generate eos tokens:
823
+
824
+ `;
825
+ const fullText = header + text + (suffix ? `
826
+ ${suffix}` : "");
827
+ const finalTokens = countTokens(fullText);
828
+ return {
829
+ text: fullText,
830
+ tokenCount: finalTokens,
831
+ outputTokenTarget: targetOutputTokens
832
+ };
833
+ }
834
+ };
835
+
836
+ // src/generator/generator.ts
837
+ init_file();
838
+ function createGenerator(config) {
839
+ if (config.generator.enabled) {
840
+ return new SyntheticGenerator(config);
841
+ }
842
+ if (config.benchmark.inputFile) {
843
+ return new FileGenerator(config.benchmark.inputFile);
844
+ }
845
+ return new SyntheticGenerator(config);
846
+ }
847
+
848
+ // src/runner/backend.ts
849
+ init_esm_shims();
850
+
851
+ // src/runner/backends/openai.ts
852
+ init_esm_shims();
853
+ init_tokenizer();
854
+ import { z as z2 } from "zod";
855
+ var EnvSchema = z2.object({
856
+ OPENAI_API_KEY: z2.string().min(1, "OPENAI_API_KEY is required")
857
+ });
858
+ var OpenAIBackend = class _OpenAIBackend {
859
+ name = "openai";
860
+ url;
861
+ apiKey;
862
+ static create(baseURL) {
863
+ const env = validateEnv(EnvSchema, "openai");
864
+ const url = baseURL ?? "https://api.openai.com/v1";
865
+ return new _OpenAIBackend(url, env.OPENAI_API_KEY);
866
+ }
867
+ constructor(baseURL, apiKey) {
868
+ this.url = baseURL.endsWith("/chat/completions") ? baseURL : `${baseURL.replace(/\/+$/, "")}/chat/completions`;
869
+ this.apiKey = apiKey;
870
+ }
871
+ async request(prompt2, model, maxTokens, systemPrompt, params, streaming, signal) {
872
+ const messages = [];
873
+ if (systemPrompt) {
874
+ messages.push({ role: "system", content: systemPrompt });
875
+ }
876
+ messages.push({ role: "user", content: prompt2 });
877
+ const body = {
878
+ model,
879
+ messages,
880
+ stream: streaming,
881
+ ...params
882
+ };
883
+ if (body.max_tokens && this.isOpenAIHost()) {
884
+ body.max_completion_tokens = body.max_tokens;
885
+ delete body.max_tokens;
886
+ }
887
+ const response = await fetch(this.url, {
888
+ method: "POST",
889
+ headers: {
890
+ "Content-Type": "application/json",
891
+ Authorization: `Bearer ${this.apiKey}`
892
+ },
893
+ body: JSON.stringify(body),
894
+ signal
895
+ });
896
+ if (!response.ok) {
897
+ const text = await response.text();
898
+ const error = new Error(`HTTP ${response.status}: ${text}`);
899
+ error.code = String(response.status);
900
+ throw error;
901
+ }
902
+ if (streaming) {
903
+ return this.parseStream(response);
904
+ }
905
+ return this.parseResponse(response);
906
+ }
907
+ isOpenAIHost() {
908
+ return this.url.includes("api.openai.com");
909
+ }
910
+ async parseStream(response) {
911
+ const body = response.body;
912
+ if (!body) throw new Error("No response body");
913
+ const reader = body.getReader();
914
+ const decoder = new TextDecoder();
915
+ let buffer = "";
916
+ let generatedText = "";
917
+ let ttftMs = 0;
918
+ const requestStart = performance.now();
919
+ let lastChunkTime = requestStart;
920
+ const interTokenLatencies = [];
921
+ let firstToken = true;
922
+ let outputTokens = 0;
923
+ try {
924
+ while (true) {
925
+ const { done, value } = await reader.read();
926
+ if (done) break;
927
+ buffer += decoder.decode(value, { stream: true });
928
+ const lines = buffer.split("\n");
929
+ buffer = lines.pop();
930
+ for (const line of lines) {
931
+ const trimmed = line.trim();
932
+ if (!trimmed.startsWith("data: ")) continue;
933
+ const data = trimmed.slice(6);
934
+ if (data === "[DONE]") continue;
935
+ let chunk;
936
+ try {
937
+ chunk = JSON.parse(data);
938
+ } catch {
939
+ continue;
940
+ }
941
+ const content = chunk.choices?.[0]?.delta?.content;
942
+ if (content) {
943
+ const now = performance.now();
944
+ if (firstToken) {
945
+ ttftMs = now - requestStart;
946
+ firstToken = false;
947
+ } else {
948
+ interTokenLatencies.push(now - lastChunkTime);
949
+ }
950
+ lastChunkTime = now;
951
+ generatedText += content;
952
+ }
953
+ if (chunk.usage?.completion_tokens) {
954
+ outputTokens = chunk.usage.completion_tokens;
955
+ }
956
+ }
957
+ }
958
+ } finally {
959
+ reader.releaseLock();
960
+ }
961
+ if (outputTokens === 0) {
962
+ outputTokens = countTokens(generatedText);
963
+ }
964
+ return { generatedText, outputTokens, ttftMs, interTokenLatencies };
965
+ }
966
+ async parseResponse(response) {
967
+ const requestStart = performance.now();
968
+ const json = await response.json();
969
+ const ttftMs = performance.now() - requestStart;
970
+ const generatedText = json.choices?.[0]?.message?.content ?? "";
971
+ const outputTokens = json.usage?.completion_tokens ?? countTokens(generatedText);
972
+ return { generatedText, outputTokens, ttftMs, interTokenLatencies: [] };
973
+ }
974
+ };
975
+
976
+ // src/runner/backends/sagemaker.ts
977
+ init_esm_shims();
978
+ init_tokenizer();
979
+ import { z as z3 } from "zod";
980
+ import { SignatureV4 } from "@smithy/signature-v4";
981
+ import { Hash } from "@smithy/hash-node";
982
+ import { HttpRequest } from "@smithy/protocol-http";
983
+ var EnvSchema2 = z3.object({
984
+ AWS_REGION: z3.string().default("us-east-1"),
985
+ AWS_ACCESS_KEY_ID: z3.string().min(1, "AWS_ACCESS_KEY_ID is required"),
986
+ AWS_SECRET_ACCESS_KEY: z3.string().min(1, "AWS_SECRET_ACCESS_KEY is required"),
987
+ AWS_SESSION_TOKEN: z3.string().optional()
988
+ });
989
+ var Sha256Hash = class extends Hash {
990
+ constructor(secret) {
991
+ super("sha256", secret);
992
+ }
993
+ };
994
+ var SageMakerBackend = class _SageMakerBackend {
995
+ name = "sagemaker";
996
+ signer;
997
+ baseURL;
998
+ requestFormat;
999
+ static create(baseURL, requestFormat) {
1000
+ const env = validateEnv(EnvSchema2, "sagemaker");
1001
+ const region = env.AWS_REGION;
1002
+ return new _SageMakerBackend({
1003
+ region,
1004
+ accessKeyId: env.AWS_ACCESS_KEY_ID,
1005
+ secretAccessKey: env.AWS_SECRET_ACCESS_KEY,
1006
+ sessionToken: env.AWS_SESSION_TOKEN,
1007
+ baseURL,
1008
+ requestFormat
1009
+ });
1010
+ }
1011
+ constructor(config) {
1012
+ this.baseURL = config.baseURL ?? `https://runtime.sagemaker.${config.region}.amazonaws.com`;
1013
+ this.requestFormat = config.requestFormat ?? "sagemaker" /* Sagemaker */;
1014
+ this.signer = new SignatureV4({
1015
+ service: "sagemaker",
1016
+ region: config.region,
1017
+ credentials: {
1018
+ accessKeyId: config.accessKeyId,
1019
+ secretAccessKey: config.secretAccessKey,
1020
+ sessionToken: config.sessionToken
1021
+ },
1022
+ sha256: Sha256Hash
1023
+ });
1024
+ }
1025
+ async request(prompt2, model, maxTokens, systemPrompt, params, streaming, signal) {
1026
+ const path2 = streaming ? `/endpoints/${model}/invocations-response-stream` : `/endpoints/${model}/invocations`;
1027
+ const body = this.buildRequestBody(prompt2, maxTokens, systemPrompt, params, streaming);
1028
+ const bodyStr = JSON.stringify(body);
1029
+ const url = new URL(path2, this.baseURL);
1030
+ const headers = {
1031
+ "Content-Type": "application/json"
1032
+ };
1033
+ if (streaming) {
1034
+ headers["X-Amzn-SageMaker-InferenceComponent-Inference-Code-Accepts"] = "application/jsonlines";
1035
+ }
1036
+ const httpRequest = new HttpRequest({
1037
+ method: "POST",
1038
+ protocol: url.protocol,
1039
+ hostname: url.hostname,
1040
+ port: url.port ? Number(url.port) : void 0,
1041
+ path: url.pathname,
1042
+ headers,
1043
+ body: bodyStr
1044
+ });
1045
+ const signed = await this.signer.sign(httpRequest);
1046
+ const response = await fetch(url.toString(), {
1047
+ method: "POST",
1048
+ headers: signed.headers,
1049
+ body: bodyStr,
1050
+ signal
1051
+ });
1052
+ if (!response.ok) {
1053
+ const text = await response.text();
1054
+ const error = new Error(`HTTP ${response.status}: ${text}`);
1055
+ error.code = String(response.status);
1056
+ throw error;
1057
+ }
1058
+ if (streaming) {
1059
+ return this.parseEventStream(response);
1060
+ }
1061
+ return this.parseResponse(response);
1062
+ }
1063
+ buildRequestBody(prompt2, maxTokens, systemPrompt, params, streaming) {
1064
+ const messages = [];
1065
+ if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
1066
+ messages.push({ role: "user", content: prompt2 });
1067
+ if (this.requestFormat === "openai" /* OpenAI */) {
1068
+ return {
1069
+ messages,
1070
+ max_tokens: maxTokens,
1071
+ stream: streaming,
1072
+ ...params
1073
+ };
1074
+ }
1075
+ return {
1076
+ inputs: [messages],
1077
+ parameters: {
1078
+ max_new_tokens: maxTokens,
1079
+ ...params
1080
+ }
1081
+ };
1082
+ }
1083
+ // ---- Streaming: eventstream binary parser ----
1084
+ async parseEventStream(response) {
1085
+ const body = response.body;
1086
+ if (!body) throw new Error("No response body");
1087
+ const reader = body.getReader();
1088
+ let buffer = new Uint8Array(0);
1089
+ let generatedText = "";
1090
+ let ttftMs = 0;
1091
+ const requestStart = performance.now();
1092
+ let lastChunkTime = requestStart;
1093
+ const interTokenLatencies = [];
1094
+ let firstToken = true;
1095
+ let outputTokens = 0;
1096
+ let sseBuffer = "";
1097
+ try {
1098
+ while (true) {
1099
+ const { done, value } = await reader.read();
1100
+ if (done) break;
1101
+ buffer = concatBytes(buffer, value);
1102
+ while (true) {
1103
+ const parsed = readEventStreamMessage(buffer);
1104
+ if (!parsed) break;
1105
+ const { message, bytesConsumed } = parsed;
1106
+ buffer = buffer.slice(bytesConsumed);
1107
+ if (message.headers[":message-type"] === "exception") {
1108
+ const errText = new TextDecoder().decode(message.payload);
1109
+ throw new Error(
1110
+ `SageMaker stream exception (${message.headers[":event-type"]}): ${errText}`
1111
+ );
1112
+ }
1113
+ if (message.headers[":event-type"] !== "PayloadPart") continue;
1114
+ const payloadText = new TextDecoder().decode(message.payload);
1115
+ const now = performance.now();
1116
+ if (this.requestFormat === "openai" /* OpenAI */) {
1117
+ sseBuffer += payloadText;
1118
+ const lines = sseBuffer.split("\n");
1119
+ sseBuffer = lines.pop();
1120
+ for (const line of lines) {
1121
+ const trimmed = line.trim();
1122
+ if (!trimmed.startsWith("data: ")) continue;
1123
+ const data = trimmed.slice(6);
1124
+ if (data === "[DONE]") continue;
1125
+ let chunk;
1126
+ try {
1127
+ chunk = JSON.parse(data);
1128
+ } catch {
1129
+ continue;
1130
+ }
1131
+ const content = chunk.choices?.[0]?.delta?.content;
1132
+ if (content) {
1133
+ if (firstToken) {
1134
+ ttftMs = now - requestStart;
1135
+ firstToken = false;
1136
+ } else {
1137
+ interTokenLatencies.push(now - lastChunkTime);
1138
+ }
1139
+ lastChunkTime = now;
1140
+ generatedText += content;
1141
+ }
1142
+ if (chunk.usage?.completion_tokens) {
1143
+ outputTokens = chunk.usage.completion_tokens;
1144
+ }
1145
+ }
1146
+ } else {
1147
+ const jsonLines = payloadText.split("\n").filter((l) => l.trim());
1148
+ for (const jsonLine of jsonLines) {
1149
+ let chunk;
1150
+ try {
1151
+ chunk = JSON.parse(jsonLine);
1152
+ } catch {
1153
+ continue;
1154
+ }
1155
+ const tokenText = chunk.token?.text;
1156
+ if (tokenText) {
1157
+ if (firstToken) {
1158
+ ttftMs = now - requestStart;
1159
+ firstToken = false;
1160
+ } else {
1161
+ interTokenLatencies.push(now - lastChunkTime);
1162
+ }
1163
+ lastChunkTime = now;
1164
+ generatedText += tokenText;
1165
+ } else if (typeof chunk.generated_text === "string" && !generatedText) {
1166
+ generatedText = chunk.generated_text;
1167
+ }
1168
+ const details = chunk.details;
1169
+ if (details?.generated_tokens) {
1170
+ outputTokens = details.generated_tokens;
1171
+ }
1172
+ }
1173
+ }
1174
+ }
1175
+ }
1176
+ } finally {
1177
+ reader.releaseLock();
1178
+ }
1179
+ if (outputTokens === 0) {
1180
+ outputTokens = countTokens(generatedText);
1181
+ }
1182
+ return { generatedText, outputTokens, ttftMs, interTokenLatencies };
1183
+ }
1184
+ // ---- Non-streaming ----
1185
+ async parseResponse(response) {
1186
+ const requestStart = performance.now();
1187
+ const json = await response.json();
1188
+ const ttftMs = performance.now() - requestStart;
1189
+ let generatedText = "";
1190
+ let outputTokens = 0;
1191
+ if (this.requestFormat === "openai" /* OpenAI */) {
1192
+ const data = json;
1193
+ generatedText = data.choices?.[0]?.message?.content ?? "";
1194
+ outputTokens = data.usage?.completion_tokens ?? 0;
1195
+ } else {
1196
+ if (Array.isArray(json)) {
1197
+ generatedText = json[0]?.generated_text ?? "";
1198
+ } else {
1199
+ const data = json;
1200
+ generatedText = data.generated_text ?? "";
1201
+ }
1202
+ }
1203
+ if (outputTokens === 0) {
1204
+ outputTokens = countTokens(generatedText);
1205
+ }
1206
+ return {
1207
+ generatedText,
1208
+ outputTokens,
1209
+ ttftMs,
1210
+ interTokenLatencies: []
1211
+ };
1212
+ }
1213
+ };
1214
+ function concatBytes(a, b) {
1215
+ const out = new Uint8Array(a.length + b.length);
1216
+ out.set(a, 0);
1217
+ out.set(b, a.length);
1218
+ return out;
1219
+ }
1220
+ function readEventStreamMessage(buf) {
1221
+ if (buf.length < 16) return null;
1222
+ const view = new DataView(buf.buffer, buf.byteOffset);
1223
+ const totalLength = view.getUint32(0);
1224
+ const headersLength = view.getUint32(4);
1225
+ if (buf.length < totalLength) return null;
1226
+ const headers = {};
1227
+ let offset = 12;
1228
+ const headersEnd = 12 + headersLength;
1229
+ while (offset < headersEnd) {
1230
+ const nameLen = buf[offset];
1231
+ offset++;
1232
+ const name = new TextDecoder().decode(buf.slice(offset, offset + nameLen));
1233
+ offset += nameLen;
1234
+ const valueType = buf[offset];
1235
+ offset++;
1236
+ if (valueType === 7) {
1237
+ const valueLen = new DataView(buf.buffer, buf.byteOffset + offset).getUint16(0);
1238
+ offset += 2;
1239
+ headers[name] = new TextDecoder().decode(buf.slice(offset, offset + valueLen));
1240
+ offset += valueLen;
1241
+ } else {
1242
+ break;
1243
+ }
1244
+ }
1245
+ const payloadLength = totalLength - headersLength - 16;
1246
+ const payloadOffset = 12 + headersLength;
1247
+ const payload = buf.slice(payloadOffset, payloadOffset + payloadLength);
1248
+ return { message: { headers, payload }, bytesConsumed: totalLength };
1249
+ }
1250
+
1251
+ // src/runner/backend.ts
1252
+ function createBackend(config) {
1253
+ const { adapter, baseURL } = config.provider;
1254
+ switch (adapter) {
1255
+ case "openai":
1256
+ return OpenAIBackend.create(baseURL);
1257
+ case "sagemaker": {
1258
+ const requestFormat = config.provider.config?.["requestFormat"] ?? "sagemaker" /* Sagemaker */;
1259
+ return SageMakerBackend.create(baseURL, requestFormat);
1260
+ }
1261
+ default:
1262
+ throw new Error(`Unknown backend adapter: ${adapter}`);
1263
+ }
1264
+ }
1265
+
1266
+ // src/runner/orchestrator.ts
1267
+ init_esm_shims();
1268
+
1269
+ // src/runner/phase.ts
1270
+ init_esm_shims();
1271
+ var PhaseController = class {
1272
+ maxConcurrency;
1273
+ rampUpRequests;
1274
+ rampUpDuration;
1275
+ rampDownRequests;
1276
+ rampDownDuration;
1277
+ startTime = 0;
1278
+ completedRequests = 0;
1279
+ totalRequests;
1280
+ maxDuration;
1281
+ constructor(config) {
1282
+ this.maxConcurrency = config.benchmark.concurrency;
1283
+ this.rampUpRequests = config.benchmark.rampUp?.requests ?? 0;
1284
+ this.rampUpDuration = (config.benchmark.rampUp?.duration ?? 0) * 1e3;
1285
+ this.rampDownRequests = config.benchmark.rampDown?.requests ?? 0;
1286
+ this.rampDownDuration = (config.benchmark.rampDown?.duration ?? 0) * 1e3;
1287
+ this.totalRequests = config.benchmark.maxRequests ?? Infinity;
1288
+ this.maxDuration = (config.benchmark.maxDuration ?? Infinity) * 1e3;
1289
+ }
1290
+ start() {
1291
+ this.startTime = performance.now();
1292
+ }
1293
+ recordCompletion() {
1294
+ this.completedRequests++;
1295
+ }
1296
+ get phase() {
1297
+ const elapsed = performance.now() - this.startTime;
1298
+ const remaining = this.totalRequests - this.completedRequests;
1299
+ if (this.rampUpDuration > 0 && elapsed < this.rampUpDuration) {
1300
+ return "ramp-up";
1301
+ }
1302
+ if (this.rampUpRequests > 0 && this.completedRequests < this.rampUpRequests) {
1303
+ return "ramp-up";
1304
+ }
1305
+ if (this.rampDownDuration > 0) {
1306
+ const timeUntilEnd = this.maxDuration - elapsed;
1307
+ if (timeUntilEnd <= this.rampDownDuration) return "ramp-down";
1308
+ }
1309
+ if (this.rampDownRequests > 0 && remaining <= this.rampDownRequests) {
1310
+ return "ramp-down";
1311
+ }
1312
+ return "steady";
1313
+ }
1314
+ get allowedConcurrency() {
1315
+ const currentPhase = this.phase;
1316
+ const elapsed = performance.now() - this.startTime;
1317
+ if (currentPhase === "ramp-up") {
1318
+ let progress;
1319
+ if (this.rampUpDuration > 0) {
1320
+ progress = Math.min(1, elapsed / this.rampUpDuration);
1321
+ } else {
1322
+ progress = Math.min(1, this.completedRequests / this.rampUpRequests);
1323
+ }
1324
+ return Math.max(1, Math.ceil(progress * this.maxConcurrency));
1325
+ }
1326
+ if (currentPhase === "ramp-down") {
1327
+ let progress;
1328
+ if (this.rampDownDuration > 0) {
1329
+ const timeUntilEnd = this.maxDuration - elapsed;
1330
+ progress = Math.max(0, timeUntilEnd / this.rampDownDuration);
1331
+ } else {
1332
+ const remaining = this.totalRequests - this.completedRequests;
1333
+ progress = Math.max(0, remaining / this.rampDownRequests);
1334
+ }
1335
+ return Math.max(1, Math.ceil(progress * this.maxConcurrency));
1336
+ }
1337
+ return this.maxConcurrency;
1338
+ }
1339
+ shouldStop(aborted) {
1340
+ if (aborted) return true;
1341
+ if (this.completedRequests >= this.totalRequests) return true;
1342
+ const elapsed = performance.now() - this.startTime;
1343
+ if (elapsed >= this.maxDuration) return true;
1344
+ return false;
1345
+ }
1346
+ };
1347
+
1348
+ // src/runner/wal.ts
1349
+ init_esm_shims();
1350
+ import { appendFileSync, mkdirSync as mkdirSync2, writeFileSync as writeFileSync3 } from "fs";
1351
+ import { join as join2 } from "path";
1352
+ var WAL = class {
1353
+ logPath;
1354
+ responsesDir;
1355
+ constructor(outputDir) {
1356
+ this.logPath = join2(outputDir, "run_log.jsonl");
1357
+ this.responsesDir = join2(outputDir, "individual_responses");
1358
+ mkdirSync2(this.responsesDir, { recursive: true });
1359
+ }
1360
+ write(metrics) {
1361
+ appendFileSync(this.logPath, JSON.stringify(metrics) + "\n");
1362
+ writeFileSync3(
1363
+ join2(this.responsesDir, `${metrics.requestId}.json`),
1364
+ JSON.stringify(metrics, null, 2)
1365
+ );
1366
+ }
1367
+ static readLog(outputDir) {
1368
+ const logPath = join2(outputDir, "run_log.jsonl");
1369
+ const content = __require("fs").readFileSync(logPath, "utf-8");
1370
+ return content.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
1371
+ }
1372
+ };
1373
+
1374
+ // src/runner/request.ts
1375
+ init_esm_shims();
1376
+ async function executeRequest(backend, prompt2, config, requestId, phase, cacheHit, signal) {
1377
+ const startTime = performance.now();
1378
+ try {
1379
+ const timeoutMs = config.benchmark.timeout * 1e3;
1380
+ const timeoutSignal = AbortSignal.timeout(timeoutMs);
1381
+ const combinedSignal = AbortSignal.any([signal, timeoutSignal]);
1382
+ const response = await backend.request(
1383
+ prompt2.text,
1384
+ config.provider.model,
1385
+ prompt2.outputTokenTarget,
1386
+ config.provider.systemPrompt,
1387
+ config.provider.config,
1388
+ config.benchmark.streaming,
1389
+ combinedSignal
1390
+ );
1391
+ const endTime = performance.now();
1392
+ const e2eLatencyMs = endTime - startTime;
1393
+ return {
1394
+ requestId,
1395
+ startTime,
1396
+ endTime,
1397
+ ttftMs: response.ttftMs,
1398
+ e2eLatencyMs,
1399
+ interTokenLatencies: response.interTokenLatencies,
1400
+ inputText: prompt2.text,
1401
+ inputTokens: prompt2.tokenCount,
1402
+ outputTokens: response.outputTokens,
1403
+ outputThroughputTps: e2eLatencyMs > 0 ? response.outputTokens / (e2eLatencyMs / 1e3) : 0,
1404
+ generatedText: response.generatedText,
1405
+ phase,
1406
+ cacheHit
1407
+ };
1408
+ } catch (error) {
1409
+ const endTime = performance.now();
1410
+ return {
1411
+ requestId,
1412
+ startTime,
1413
+ endTime,
1414
+ ttftMs: 0,
1415
+ e2eLatencyMs: endTime - startTime,
1416
+ interTokenLatencies: [],
1417
+ inputText: prompt2.text,
1418
+ inputTokens: prompt2.tokenCount,
1419
+ outputTokens: 0,
1420
+ outputThroughputTps: 0,
1421
+ generatedText: "",
1422
+ error: error instanceof Error ? error.message : String(error),
1423
+ errorCode: error instanceof Error && "code" in error ? String(error.code) : void 0,
1424
+ phase,
1425
+ cacheHit
1426
+ };
1427
+ }
1428
+ }
1429
+
1430
+ // src/runner/orchestrator.ts
1431
+ var ConcurrencyOrchestrator = class {
1432
+ config;
1433
+ backend;
1434
+ prompts;
1435
+ wal;
1436
+ phaseController;
1437
+ promptIndex = 0;
1438
+ completedRequests = 0;
1439
+ activeSlots = 0;
1440
+ results = [];
1441
+ aborted = false;
1442
+ onProgress;
1443
+ constructor(config, backend, prompts, outputDir, onProgress) {
1444
+ this.config = config;
1445
+ this.backend = backend;
1446
+ this.prompts = prompts;
1447
+ this.wal = new WAL(outputDir);
1448
+ this.phaseController = new PhaseController(config);
1449
+ this.onProgress = onProgress;
1450
+ }
1451
+ abort() {
1452
+ this.aborted = true;
1453
+ }
1454
+ async run(signal) {
1455
+ this.phaseController.start();
1456
+ const maxConcurrency = this.config.benchmark.concurrency;
1457
+ const workers = Array.from({ length: maxConcurrency }, (_, i) => this.workerLoop(i, signal));
1458
+ await Promise.allSettled(workers);
1459
+ return this.results;
1460
+ }
1461
+ async workerLoop(slotId, signal) {
1462
+ while (!this.phaseController.shouldStop(this.aborted || signal.aborted)) {
1463
+ if (slotId >= this.phaseController.allowedConcurrency) {
1464
+ await sleep(50);
1465
+ continue;
1466
+ }
1467
+ const prompt2 = this.getNextPrompt();
1468
+ if (!prompt2) break;
1469
+ this.activeSlots++;
1470
+ const requestId = crypto.randomUUID();
1471
+ const phase = this.phaseController.phase;
1472
+ const cacheHit = this.isCacheHit();
1473
+ const metrics = await executeRequest(
1474
+ this.backend,
1475
+ prompt2,
1476
+ this.config,
1477
+ requestId,
1478
+ phase,
1479
+ cacheHit,
1480
+ signal
1481
+ );
1482
+ this.results.push(metrics);
1483
+ this.wal.write(metrics);
1484
+ this.completedRequests++;
1485
+ this.phaseController.recordCompletion();
1486
+ this.onProgress?.(
1487
+ metrics,
1488
+ this.activeSlots,
1489
+ this.completedRequests,
1490
+ this.phaseController.phase,
1491
+ this.phaseController.allowedConcurrency
1492
+ );
1493
+ this.activeSlots--;
1494
+ }
1495
+ }
1496
+ getNextPrompt() {
1497
+ if (this.prompts.length === 0) return null;
1498
+ const maxReqs = this.config.benchmark.maxRequests ?? Infinity;
1499
+ if (this.promptIndex >= maxReqs) return null;
1500
+ const prompt2 = this.prompts[this.promptIndex % this.prompts.length];
1501
+ this.promptIndex++;
1502
+ return prompt2;
1503
+ }
1504
+ isCacheHit() {
1505
+ const pct = this.config.benchmark.cachePercentage;
1506
+ if (pct <= 0) return false;
1507
+ return Math.random() * 100 < pct;
1508
+ }
1509
+ };
1510
+ function sleep(ms) {
1511
+ return new Promise((resolve) => setTimeout(resolve, ms));
1512
+ }
1513
+
1514
+ // src/reporter/aggregator.ts
1515
+ init_esm_shims();
1516
+
1517
+ // src/reporter/statistics.ts
1518
+ init_esm_shims();
1519
+ function mean(values) {
1520
+ if (values.length === 0) return 0;
1521
+ return values.reduce((a, b) => a + b, 0) / values.length;
1522
+ }
1523
+ function stddev(values) {
1524
+ if (values.length < 2) return 0;
1525
+ const m = mean(values);
1526
+ const variance = values.reduce((acc, v) => acc + (v - m) ** 2, 0) / (values.length - 1);
1527
+ return Math.sqrt(variance);
1528
+ }
1529
+ function quantile(sorted, q) {
1530
+ if (sorted.length === 0) return 0;
1531
+ const pos = (sorted.length - 1) * q;
1532
+ const lower = Math.floor(pos);
1533
+ const upper = Math.ceil(pos);
1534
+ if (lower === upper) return sorted[lower];
1535
+ return sorted[lower] + (sorted[upper] - sorted[lower]) * (pos - lower);
1536
+ }
1537
+ function aggregate(values) {
1538
+ const sorted = [...values].sort((a, b) => a - b);
1539
+ return {
1540
+ mean: mean(values),
1541
+ min: sorted[0] ?? 0,
1542
+ max: sorted[sorted.length - 1] ?? 0,
1543
+ stddev: stddev(values),
1544
+ p25: quantile(sorted, 0.25),
1545
+ p50: quantile(sorted, 0.5),
1546
+ p75: quantile(sorted, 0.75),
1547
+ p90: quantile(sorted, 0.9),
1548
+ p95: quantile(sorted, 0.95),
1549
+ p99: quantile(sorted, 0.99)
1550
+ };
1551
+ }
1552
+
1553
+ // src/reporter/aggregator.ts
1554
+ function computeSummary(requests) {
1555
+ const successful = requests.filter((r) => !r.error);
1556
+ const failed = requests.filter((r) => !!r.error);
1557
+ const startTime = Math.min(...requests.map((r) => r.startTime));
1558
+ const endTime = Math.max(...requests.map((r) => r.endTime));
1559
+ const durationMs = endTime - startTime;
1560
+ const durationMin = durationMs / 6e4;
1561
+ const totalOutputTokens = successful.reduce((sum, r) => sum + r.outputTokens, 0);
1562
+ const errorCodeFrequency = {};
1563
+ for (const r of failed) {
1564
+ const code = r.errorCode ?? "unknown";
1565
+ errorCodeFrequency[code] = (errorCodeFrequency[code] ?? 0) + 1;
1566
+ }
1567
+ const cacheHits = requests.filter((r) => r.cacheHit).length;
1568
+ const phases = ["ramp-up", "steady", "ramp-down"];
1569
+ const phaseBreakdown = {};
1570
+ for (const phase of phases) {
1571
+ const phaseReqs = requests.filter((r) => r.phase === phase);
1572
+ if (phaseReqs.length > 0) {
1573
+ const phaseErrors = phaseReqs.filter((r) => !!r.error).length;
1574
+ phaseBreakdown[phase] = {
1575
+ requests: phaseReqs.length,
1576
+ errorRate: phaseErrors / phaseReqs.length
1577
+ };
1578
+ }
1579
+ }
1580
+ const ttfntValues = successful.map((r) => r.ttfntMs).filter((v) => v !== void 0 && v > 0);
1581
+ const itlValues = successful.flatMap((r) => r.interTokenLatencies);
1582
+ return {
1583
+ startTime,
1584
+ endTime,
1585
+ totalRequests: requests.length,
1586
+ successfulRequests: successful.length,
1587
+ failedRequests: failed.length,
1588
+ errorRate: requests.length > 0 ? failed.length / requests.length : 0,
1589
+ rpm: durationMin > 0 ? requests.length / durationMin : 0,
1590
+ overallTps: durationMs > 0 ? totalOutputTokens / (durationMs / 1e3) : 0,
1591
+ ttft: aggregate(successful.map((r) => r.ttftMs)),
1592
+ ttfnt: ttfntValues.length > 0 ? aggregate(ttfntValues) : void 0,
1593
+ e2eLatency: aggregate(successful.map((r) => r.e2eLatencyMs)),
1594
+ outputThroughput: aggregate(successful.map((r) => r.outputThroughputTps)),
1595
+ interTokenLatency: aggregate(itlValues),
1596
+ inputTokens: aggregate(successful.map((r) => r.inputTokens)),
1597
+ outputTokens: aggregate(successful.map((r) => r.outputTokens)),
1598
+ errorCodeFrequency,
1599
+ cacheHitRate: requests.length > 0 ? cacheHits / requests.length : 0,
1600
+ phaseBreakdown
1601
+ };
1602
+ }
1603
+
1604
+ // src/reporter/exporter.ts
1605
+ init_esm_shims();
1606
+
1607
+ // src/reporter/exporters/json.ts
1608
+ init_esm_shims();
1609
+ import { writeFileSync as writeFileSync4, mkdirSync as mkdirSync3 } from "fs";
1610
+ import { join as join3 } from "path";
1611
+ var JsonExporter = class {
1612
+ name = "json";
1613
+ async export(summary, _requests, outputDir) {
1614
+ mkdirSync3(outputDir, { recursive: true });
1615
+ writeFileSync4(join3(outputDir, "summary.json"), JSON.stringify(summary, null, 2));
1616
+ }
1617
+ };
1618
+
1619
+ // src/reporter/exporters/csv.ts
1620
+ init_esm_shims();
1621
+ import { writeFileSync as writeFileSync5, mkdirSync as mkdirSync4 } from "fs";
1622
+ import { join as join4 } from "path";
1623
+ var CsvExporter = class {
1624
+ name = "csv";
1625
+ async export(summary, requests, outputDir) {
1626
+ mkdirSync4(outputDir, { recursive: true });
1627
+ this.writeSummary(summary, outputDir);
1628
+ this.writeRequests(requests, outputDir);
1629
+ }
1630
+ writeSummary(summary, outputDir) {
1631
+ const metricFields = [
1632
+ "ttft",
1633
+ "e2eLatency",
1634
+ "outputThroughput",
1635
+ "interTokenLatency",
1636
+ "inputTokens",
1637
+ "outputTokens"
1638
+ ];
1639
+ const statFields = [
1640
+ "mean",
1641
+ "min",
1642
+ "max",
1643
+ "stddev",
1644
+ "p25",
1645
+ "p50",
1646
+ "p75",
1647
+ "p90",
1648
+ "p95",
1649
+ "p99"
1650
+ ];
1651
+ const headers = [
1652
+ "totalRequests",
1653
+ "successfulRequests",
1654
+ "failedRequests",
1655
+ "errorRate",
1656
+ "rpm",
1657
+ "overallTps",
1658
+ "cacheHitRate"
1659
+ ];
1660
+ const values = [
1661
+ summary.totalRequests,
1662
+ summary.successfulRequests,
1663
+ summary.failedRequests,
1664
+ summary.errorRate,
1665
+ summary.rpm,
1666
+ summary.overallTps,
1667
+ summary.cacheHitRate
1668
+ ];
1669
+ for (const metric of metricFields) {
1670
+ const agg = summary[metric];
1671
+ for (const stat of statFields) {
1672
+ headers.push(`${metric}_${stat}`);
1673
+ values.push(agg[stat]);
1674
+ }
1675
+ }
1676
+ const csv = [headers.join(","), values.join(",")].join("\n");
1677
+ writeFileSync5(join4(outputDir, "summary.csv"), csv);
1678
+ }
1679
+ writeRequests(requests, outputDir) {
1680
+ if (requests.length === 0) return;
1681
+ const headers = [
1682
+ "requestId",
1683
+ "startTime",
1684
+ "endTime",
1685
+ "ttftMs",
1686
+ "e2eLatencyMs",
1687
+ "inputTokens",
1688
+ "outputTokens",
1689
+ "outputThroughputTps",
1690
+ "phase",
1691
+ "cacheHit",
1692
+ "error",
1693
+ "errorCode"
1694
+ ];
1695
+ const rows = requests.map(
1696
+ (r) => [
1697
+ r.requestId,
1698
+ r.startTime,
1699
+ r.endTime,
1700
+ r.ttftMs,
1701
+ r.e2eLatencyMs,
1702
+ r.inputTokens,
1703
+ r.outputTokens,
1704
+ r.outputThroughputTps,
1705
+ r.phase,
1706
+ r.cacheHit,
1707
+ r.error ?? "",
1708
+ r.errorCode ?? ""
1709
+ ].join(",")
1710
+ );
1711
+ const csv = [headers.join(","), ...rows].join("\n");
1712
+ writeFileSync5(join4(outputDir, "requests.csv"), csv);
1713
+ }
1714
+ };
1715
+
1716
+ // src/reporter/exporter.ts
1717
+ function createExporters(config) {
1718
+ const adapters = config.reporter.adapters;
1719
+ const exporters = [];
1720
+ for (const adapter of adapters) {
1721
+ switch (adapter) {
1722
+ case "json":
1723
+ exporters.push(new JsonExporter());
1724
+ break;
1725
+ case "csv":
1726
+ exporters.push(new CsvExporter());
1727
+ break;
1728
+ }
1729
+ }
1730
+ return exporters;
1731
+ }
1732
+
1733
+ // src/utils/signal.ts
1734
+ init_esm_shims();
1735
+ function createAbortController() {
1736
+ const controller = new AbortController();
1737
+ const shutdownCallbacks = [];
1738
+ let shutdownCount = 0;
1739
+ const handler = () => {
1740
+ shutdownCount++;
1741
+ if (shutdownCount === 1) {
1742
+ console.log("\nGraceful shutdown initiated... (press Ctrl+C again to force)");
1743
+ controller.abort();
1744
+ for (const cb of shutdownCallbacks) cb();
1745
+ } else {
1746
+ process.exit(1);
1747
+ }
1748
+ };
1749
+ process.on("SIGINT", handler);
1750
+ process.on("SIGTERM", handler);
1751
+ return {
1752
+ controller,
1753
+ onShutdown: (fn) => shutdownCallbacks.push(fn)
1754
+ };
1755
+ }
1756
+
1757
+ // src/cli/progress.ts
1758
+ init_esm_shims();
1759
+
1760
+ // src/cli/ui/store.ts
1761
+ init_esm_shims();
1762
+ var ROLLING_CAP = 50;
1763
+ var BenchmarkStore = class {
1764
+ phase = "steady";
1765
+ stage = "idle";
1766
+ activeSlots = 0;
1767
+ allowedConcurrency = 0;
1768
+ maxConcurrency;
1769
+ completed = 0;
1770
+ totalTarget;
1771
+ errors = 0;
1772
+ totalOutputTokens = 0;
1773
+ totalInputTokens = 0;
1774
+ startTime = 0;
1775
+ recentTtft = [];
1776
+ recentE2eLatency = [];
1777
+ recentErrors = [];
1778
+ modelName;
1779
+ streaming;
1780
+ constructor(opts) {
1781
+ this.totalTarget = opts.totalTarget;
1782
+ this.maxConcurrency = opts.maxConcurrency;
1783
+ this.modelName = opts.modelName;
1784
+ this.streaming = opts.streaming;
1785
+ }
1786
+ update(metrics, activeSlots, completed, phase, allowedConcurrency) {
1787
+ this.activeSlots = activeSlots;
1788
+ this.completed = completed;
1789
+ this.phase = phase;
1790
+ this.allowedConcurrency = allowedConcurrency;
1791
+ this.totalOutputTokens += metrics.outputTokens;
1792
+ this.totalInputTokens += metrics.inputTokens;
1793
+ if (metrics.error) {
1794
+ this.errors++;
1795
+ if (this.recentErrors.length >= 5) this.recentErrors.shift();
1796
+ this.recentErrors.push(metrics.error);
1797
+ }
1798
+ if (!metrics.error) {
1799
+ if (this.recentTtft.length >= ROLLING_CAP) this.recentTtft.shift();
1800
+ this.recentTtft.push(metrics.ttftMs);
1801
+ if (this.recentE2eLatency.length >= ROLLING_CAP) this.recentE2eLatency.shift();
1802
+ this.recentE2eLatency.push(metrics.e2eLatencyMs);
1803
+ }
1804
+ }
1805
+ setStage(stage) {
1806
+ this.stage = stage;
1807
+ }
1808
+ setStartTime(t) {
1809
+ this.startTime = t;
1810
+ }
1811
+ snapshot() {
1812
+ return {
1813
+ phase: this.phase,
1814
+ stage: this.stage,
1815
+ activeSlots: this.activeSlots,
1816
+ allowedConcurrency: this.allowedConcurrency,
1817
+ maxConcurrency: this.maxConcurrency,
1818
+ completed: this.completed,
1819
+ totalTarget: this.totalTarget,
1820
+ errors: this.errors,
1821
+ totalOutputTokens: this.totalOutputTokens,
1822
+ totalInputTokens: this.totalInputTokens,
1823
+ startTime: this.startTime,
1824
+ recentTtft: [...this.recentTtft],
1825
+ recentE2eLatency: [...this.recentE2eLatency],
1826
+ recentErrors: [...this.recentErrors],
1827
+ modelName: this.modelName,
1828
+ streaming: this.streaming
1829
+ };
1830
+ }
1831
+ };
1832
+
1833
+ // src/cli/ui/fallback.ts
1834
+ init_esm_shims();
1835
+ function formatDuration(seconds) {
1836
+ const m = Math.floor(seconds / 60);
1837
+ const s = Math.floor(seconds % 60);
1838
+ return m > 0 ? `${m}m${s.toString().padStart(2, "0")}s` : `${s}s`;
1839
+ }
1840
+ var FallbackDisplay = class {
1841
+ store;
1842
+ intervalId = null;
1843
+ constructor(store) {
1844
+ this.store = store;
1845
+ }
1846
+ start() {
1847
+ this.intervalId = setInterval(() => this.print(), 2e3);
1848
+ }
1849
+ stop() {
1850
+ if (this.intervalId) {
1851
+ clearInterval(this.intervalId);
1852
+ this.intervalId = null;
1853
+ }
1854
+ this.print();
1855
+ }
1856
+ print() {
1857
+ const s = this.store;
1858
+ const elapsed = s.startTime > 0 ? (performance.now() - s.startTime) / 1e3 : 0;
1859
+ const rps = elapsed > 0 ? s.completed / elapsed : 0;
1860
+ const tps = elapsed > 0 ? s.totalOutputTokens / elapsed : 0;
1861
+ const isInfinite = s.totalTarget === Infinity;
1862
+ const totalStr = isInfinite ? "?" : String(s.totalTarget);
1863
+ const pct = isInfinite ? 0 : Math.min(1, s.completed / s.totalTarget);
1864
+ const pctStr = isInfinite ? "" : ` (${(pct * 100).toFixed(1)}%)`;
1865
+ const barWidth = 20;
1866
+ const filled = Math.round(pct * barWidth);
1867
+ const bar = "=".repeat(filled) + ".".repeat(barWidth - filled);
1868
+ process.stderr.write(
1869
+ ` [${bar}] ${s.completed}/${totalStr}${pctStr}
1870
+ ${formatDuration(elapsed)} | ${rps.toFixed(1)} req/s | ${tps.toFixed(0)} tok/s | ${s.errors} err
1871
+ `
1872
+ );
1873
+ }
1874
+ };
1875
+
1876
+ // src/cli/progress.ts
1877
+ var ProgressDisplay = class {
1878
+ store;
1879
+ inkInstance = null;
1880
+ fallback = null;
1881
+ constructor(opts) {
1882
+ this.store = new BenchmarkStore(opts);
1883
+ }
1884
+ async start() {
1885
+ this.store.setStartTime(performance.now());
1886
+ if (process.stderr.isTTY) {
1887
+ const { render } = await import("ink");
1888
+ const { createElement } = await import("react");
1889
+ const { App: App2 } = await Promise.resolve().then(() => (init_app(), app_exports));
1890
+ this.inkInstance = render(createElement(App2, { store: this.store }), {
1891
+ stdout: process.stderr,
1892
+ patchConsole: false,
1893
+ exitOnCtrlC: false
1894
+ });
1895
+ } else {
1896
+ this.fallback = new FallbackDisplay(this.store);
1897
+ this.fallback.start();
1898
+ }
1899
+ }
1900
+ update(metrics, activeSlots, completed, phase, allowedConcurrency) {
1901
+ this.store.update(metrics, activeSlots, completed, phase, allowedConcurrency);
1902
+ }
1903
+ setStage(stage) {
1904
+ this.store.setStage(stage);
1905
+ }
1906
+ stop() {
1907
+ if (this.inkInstance) {
1908
+ this.inkInstance.unmount();
1909
+ this.inkInstance = null;
1910
+ }
1911
+ if (this.fallback) {
1912
+ this.fallback.stop();
1913
+ this.fallback = null;
1914
+ }
1915
+ }
1916
+ };
1917
+
1918
+ // index.ts
1919
+ import { mkdirSync as mkdirSync5, writeFileSync as writeFileSync6 } from "fs";
1920
+ import { join as join5 } from "path";
1921
+ async function main() {
1922
+ const cliArgs = parseCliArgs(process.argv);
1923
+ if (cliArgs.command === "init") {
1924
+ await runInit(cliArgs.initOutputPath);
1925
+ return;
1926
+ }
1927
+ const { command, configPath, runId, overrides } = cliArgs;
1928
+ const { config, outputDir } = resolveConfig(configPath, runId, overrides);
1929
+ switch (command) {
1930
+ case "run":
1931
+ await runFullPipeline(config, outputDir);
1932
+ break;
1933
+ case "generate":
1934
+ await runGenerate(config, outputDir);
1935
+ break;
1936
+ case "bench":
1937
+ await runBench(config, outputDir);
1938
+ break;
1939
+ case "report":
1940
+ await runReport(config);
1941
+ break;
1942
+ }
1943
+ }
1944
+ var BOX_W = 56;
1945
+ function printBox(title, lines) {
1946
+ const inner = BOX_W - 2;
1947
+ const titleStr = `\u2500\u2500 ${title} `;
1948
+ const topPad = "\u2500".repeat(Math.max(0, inner - titleStr.length));
1949
+ console.log(` ${dim("\u256D")}${dim(titleStr)}${dim(topPad)}${dim("\u256E")}`);
1950
+ for (const line of lines) {
1951
+ const stripped = stripAnsi(line);
1952
+ const pad = Math.max(0, inner - stripped.length - 2);
1953
+ console.log(` ${dim("\u2502")} ${line}${" ".repeat(pad)}${dim("\u2502")}`);
1954
+ }
1955
+ console.log(` ${dim("\u2570")}${dim("\u2500".repeat(inner))}${dim("\u256F")}`);
1956
+ }
1957
+ function stripAnsi(s) {
1958
+ return s.replace(/\x1b\[[0-9;]*m/g, "");
1959
+ }
1960
+ function printBanner(config, outputDir) {
1961
+ const inner = BOX_W - 2;
1962
+ const title = "FLOTorch Load Tester";
1963
+ const titlePad = Math.floor((inner - title.length) / 2);
1964
+ console.log();
1965
+ console.log(` ${dim("\u256D")}${dim("\u2500".repeat(inner))}${dim("\u256E")}`);
1966
+ console.log(` ${dim("\u2502")}${" ".repeat(titlePad)}${bold(title)}${" ".repeat(inner - titlePad - title.length)}${dim("\u2502")}`);
1967
+ console.log(` ${dim("\u2570")}${dim("\u2500".repeat(inner))}${dim("\u256F")}`);
1968
+ const maxReqs = config.benchmark.maxRequests;
1969
+ const maxDur = config.benchmark.maxDuration;
1970
+ const rows = [
1971
+ [dim("Model"), bold(config.provider.model)],
1972
+ [dim("Concurrency"), bold(String(config.benchmark.concurrency))],
1973
+ [dim("Streaming"), bold(config.benchmark.streaming ? "yes" : "no")]
1974
+ ];
1975
+ if (maxReqs) rows.push([dim("Requests"), bold(String(maxReqs))]);
1976
+ if (maxDur) rows.push([dim("Duration"), bold(`${maxDur}s`)]);
1977
+ rows.push([dim("Output"), cyan(outputDir)]);
1978
+ const labelW = 14;
1979
+ for (const [label, value] of rows) {
1980
+ const stripped = stripAnsi(label);
1981
+ const pad = " ".repeat(Math.max(0, labelW - stripped.length));
1982
+ console.log(` ${label}${pad}${value}`);
1983
+ }
1984
+ console.log();
1985
+ }
1986
+ function stageOk(msg, detail) {
1987
+ const suffix = detail ? ` ${dim(detail)}` : "";
1988
+ console.log(` ${green("\u2714")} ${msg}${suffix}`);
1989
+ }
1990
+ function stageRun(msg) {
1991
+ console.log(` ${bold("\u25B8")} ${msg}`);
1992
+ }
1993
+ function fmtNum(n, w) {
1994
+ const s = n >= 100 ? n.toFixed(0) : n >= 10 ? n.toFixed(1) : n.toFixed(1);
1995
+ return s.padStart(w);
1996
+ }
1997
+ function fmtMetricTable(label, agg, labelW, colW) {
1998
+ const l = label.padEnd(labelW);
1999
+ return `${bold(l)}${fmtNum(agg.mean, colW)}${fmtNum(agg.p50, colW)}${fmtNum(agg.p95, colW)}${fmtNum(agg.p99, colW)}${fmtNum(agg.max, colW)}`;
2000
+ }
2001
+ function printSummary(summary) {
2002
+ const durationSec = (summary.endTime - summary.startTime) / 1e3;
2003
+ const errColor = summary.failedRequests > 0 ? red : green;
2004
+ console.log();
2005
+ printBox("Summary", [
2006
+ `${dim("Duration")} ${bold(durationSec.toFixed(1) + "s")}`,
2007
+ `${dim("Requests")} ${bold(String(summary.successfulRequests))}/${summary.totalRequests} ${errColor(`(${summary.failedRequests} errors)`)}`,
2008
+ `${dim("Throughput")} ${bold(summary.rpm.toFixed(1))} ${dim("req/min")} ${bold(summary.overallTps.toFixed(1))} ${dim("tok/s")}`
2009
+ ]);
2010
+ const labelW = 6;
2011
+ const colW = 9;
2012
+ const header = dim(" ".repeat(labelW)) + dim("mean".padStart(colW)) + dim("p50".padStart(colW)) + dim("p95".padStart(colW)) + dim("p99".padStart(colW)) + dim("max".padStart(colW));
2013
+ const latencyLines = [
2014
+ header,
2015
+ fmtMetricTable("TTFT", summary.ttft, labelW, colW),
2016
+ fmtMetricTable("E2E", summary.e2eLatency, labelW, colW),
2017
+ fmtMetricTable("ITL", summary.interTokenLatency, labelW, colW)
2018
+ ];
2019
+ console.log();
2020
+ printBox("Latency (ms)", latencyLines);
2021
+ if (Object.keys(summary.errorCodeFrequency).length > 0) {
2022
+ console.log();
2023
+ const errLines = [];
2024
+ for (const [code, count] of Object.entries(summary.errorCodeFrequency)) {
2025
+ errLines.push(`${red(code)} ${bold(String(count))}`);
2026
+ }
2027
+ printBox("Errors", errLines);
2028
+ }
2029
+ if (Object.keys(summary.phaseBreakdown).length > 1) {
2030
+ console.log();
2031
+ const phaseLines = [];
2032
+ for (const [phase, data] of Object.entries(summary.phaseBreakdown)) {
2033
+ phaseLines.push(
2034
+ `${bold(phase.padEnd(12))} ${String(data.requests).padStart(4)} reqs ${(data.errorRate * 100).toFixed(1)}% err`
2035
+ );
2036
+ }
2037
+ printBox("Phases", phaseLines);
2038
+ }
2039
+ }
2040
+ async function runFullPipeline(config, outputDir) {
2041
+ printBanner(config, outputDir);
2042
+ stageRun("Generating prompts...");
2043
+ const prompts = generatePrompts(config);
2044
+ process.stdout.write("\x1B[1A\x1B[2K");
2045
+ stageOk("Generating prompts", `${prompts.length} prompts`);
2046
+ mkdirSync5(outputDir, { recursive: true });
2047
+ writeFileSync6(
2048
+ join5(outputDir, "prompts.jsonl"),
2049
+ prompts.map((p) => JSON.stringify(p)).join("\n") + "\n"
2050
+ );
2051
+ stageRun("Running benchmark...");
2052
+ const { controller, onShutdown } = createAbortController();
2053
+ const totalTarget = config.benchmark.maxRequests ?? Infinity;
2054
+ const progress = new ProgressDisplay({
2055
+ totalTarget,
2056
+ maxConcurrency: config.benchmark.concurrency,
2057
+ modelName: config.provider.model,
2058
+ streaming: config.benchmark.streaming
2059
+ });
2060
+ progress.setStage("benchmarking");
2061
+ const backend = createBackend(config);
2062
+ const orchestrator = new ConcurrencyOrchestrator(
2063
+ config,
2064
+ backend,
2065
+ prompts,
2066
+ outputDir,
2067
+ (metrics, active, completed, phase, allowedConcurrency) => progress.update(metrics, active, completed, phase, allowedConcurrency)
2068
+ );
2069
+ onShutdown(() => orchestrator.abort());
2070
+ await progress.start();
2071
+ const results = await orchestrator.run(controller.signal);
2072
+ progress.stop();
2073
+ stageOk("Running benchmark", `${results.length} requests`);
2074
+ stageRun("Generating report...");
2075
+ const summary = computeSummary(results);
2076
+ const exporters = createExporters(config);
2077
+ for (const exporter of exporters) {
2078
+ await exporter.export(summary, results, outputDir);
2079
+ }
2080
+ process.stdout.write("\x1B[1A\x1B[2K");
2081
+ stageOk("Generating report");
2082
+ printSummary(summary);
2083
+ console.log(`
2084
+ ${dim("Results saved to:")} ${cyan(outputDir)}
2085
+ `);
2086
+ }
2087
+ async function runGenerate(config, outputDir) {
2088
+ printBanner(config, outputDir);
2089
+ stageRun("Generating prompts...");
2090
+ const prompts = generatePrompts(config);
2091
+ mkdirSync5(outputDir, { recursive: true });
2092
+ const outPath = join5(outputDir, "prompts.jsonl");
2093
+ writeFileSync6(outPath, prompts.map((p) => JSON.stringify(p)).join("\n") + "\n");
2094
+ process.stdout.write("\x1B[1A\x1B[2K");
2095
+ stageOk("Generating prompts", `${prompts.length} \u2192 ${cyan(outPath)}`);
2096
+ }
2097
+ async function runBench(config, outputDir) {
2098
+ const inputFile = config.benchmark.inputFile;
2099
+ if (!inputFile) {
2100
+ throw new Error("bench command requires benchmark.inputFile (path to prompts.jsonl)");
2101
+ }
2102
+ printBanner(config, outputDir);
2103
+ stageRun(`Loading prompts from ${cyan(inputFile)}...`);
2104
+ const { FileGenerator: FileGenerator2 } = await Promise.resolve().then(() => (init_file(), file_exports));
2105
+ const gen = new FileGenerator2(inputFile);
2106
+ const count = config.benchmark.maxRequests ?? 100;
2107
+ const prompts = gen.generate(count);
2108
+ process.stdout.write("\x1B[1A\x1B[2K");
2109
+ stageOk("Loading prompts", `${prompts.length} prompts`);
2110
+ stageRun("Running benchmark...");
2111
+ const { controller, onShutdown } = createAbortController();
2112
+ const progress = new ProgressDisplay({
2113
+ totalTarget: count,
2114
+ maxConcurrency: config.benchmark.concurrency,
2115
+ modelName: config.provider.model,
2116
+ streaming: config.benchmark.streaming
2117
+ });
2118
+ progress.setStage("benchmarking");
2119
+ const backend = createBackend(config);
2120
+ const orchestrator = new ConcurrencyOrchestrator(
2121
+ config,
2122
+ backend,
2123
+ prompts,
2124
+ outputDir,
2125
+ (metrics, active, completed, phase, allowedConcurrency) => progress.update(metrics, active, completed, phase, allowedConcurrency)
2126
+ );
2127
+ mkdirSync5(outputDir, { recursive: true });
2128
+ onShutdown(() => orchestrator.abort());
2129
+ await progress.start();
2130
+ const results = await orchestrator.run(controller.signal);
2131
+ progress.stop();
2132
+ stageOk("Running benchmark", `${results.length} requests \u2192 ${cyan(outputDir)}`);
2133
+ }
2134
+ async function runReport(config) {
2135
+ const inputDir = config.benchmark.inputFile;
2136
+ if (!inputDir) {
2137
+ throw new Error(
2138
+ "report command requires benchmark.inputFile (path to run output dir containing run_log.jsonl)"
2139
+ );
2140
+ }
2141
+ stageRun(`Reading results from ${cyan(inputDir)}...`);
2142
+ const results = WAL.readLog(inputDir);
2143
+ process.stdout.write("\x1B[1A\x1B[2K");
2144
+ stageOk("Reading results", `${results.length} entries`);
2145
+ stageRun("Computing report...");
2146
+ const summary = computeSummary(results);
2147
+ const exporters = createExporters(config);
2148
+ for (const exporter of exporters) {
2149
+ await exporter.export(summary, results, inputDir);
2150
+ }
2151
+ process.stdout.write("\x1B[1A\x1B[2K");
2152
+ stageOk("Computing report");
2153
+ printSummary(summary);
2154
+ }
2155
+ function generatePrompts(config) {
2156
+ const generator = createGenerator(config);
2157
+ const count = config.benchmark.maxRequests ?? 100;
2158
+ return generator.generate(count);
2159
+ }
2160
+ main().catch((err) => {
2161
+ console.error(red(err instanceof Error ? err.message : String(err)));
2162
+ process.exit(1);
2163
+ });