@flotorch/loadtest 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +52 -29
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -222,6 +222,7 @@ function StatsPanel({
|
|
|
222
222
|
recentTtft,
|
|
223
223
|
recentE2eLatency,
|
|
224
224
|
errors,
|
|
225
|
+
emptyResponses,
|
|
225
226
|
completed
|
|
226
227
|
}) {
|
|
227
228
|
const sortedTtft = [...recentTtft].sort((a, b) => a - b);
|
|
@@ -268,6 +269,14 @@ function StatsPanel({
|
|
|
268
269
|
" (",
|
|
269
270
|
errRate,
|
|
270
271
|
"%)"
|
|
272
|
+
] }),
|
|
273
|
+
/* @__PURE__ */ jsx4(Text4, { children: " " }),
|
|
274
|
+
/* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "Empty " }),
|
|
275
|
+
/* @__PURE__ */ jsxs4(Text4, { color: emptyResponses > 0 ? "yellow" : void 0, bold: emptyResponses > 0, children: [
|
|
276
|
+
emptyResponses,
|
|
277
|
+
" (",
|
|
278
|
+
completed > 0 ? (emptyResponses / completed * 100).toFixed(1) : "0.0",
|
|
279
|
+
"%)"
|
|
271
280
|
] })
|
|
272
281
|
] })
|
|
273
282
|
] });
|
|
@@ -357,6 +366,7 @@ function App({ store }) {
|
|
|
357
366
|
recentTtft: snap.recentTtft,
|
|
358
367
|
recentE2eLatency: snap.recentE2eLatency,
|
|
359
368
|
errors: snap.errors,
|
|
369
|
+
emptyResponses: snap.emptyResponses,
|
|
360
370
|
completed: snap.completed
|
|
361
371
|
}
|
|
362
372
|
) }),
|
|
@@ -455,7 +465,7 @@ var cyan = wrap("36", "39");
|
|
|
455
465
|
var magenta = wrap("35", "39");
|
|
456
466
|
|
|
457
467
|
// src/cli/args.ts
|
|
458
|
-
var VERSION = true ? "0.2.
|
|
468
|
+
var VERSION = true ? "0.2.4" : "dev";
|
|
459
469
|
var VALID_COMMANDS = /* @__PURE__ */ new Set(["run", "generate", "bench", "report", "init"]);
|
|
460
470
|
var HELP_TEXT = `
|
|
461
471
|
${bold("FLOTorch Load Tester")} ${dim(`v${VERSION}`)}
|
|
@@ -882,12 +892,13 @@ var OpenAIBackend = class _OpenAIBackend {
|
|
|
882
892
|
}
|
|
883
893
|
messages.push({ role: "user", content: prompt2 });
|
|
884
894
|
const body = {
|
|
885
|
-
model,
|
|
886
895
|
messages,
|
|
887
896
|
stream: streaming,
|
|
888
|
-
...params
|
|
897
|
+
...params,
|
|
898
|
+
model,
|
|
899
|
+
max_tokens: maxTokens
|
|
889
900
|
};
|
|
890
|
-
if (
|
|
901
|
+
if (this.isOpenAIHost()) {
|
|
891
902
|
body.max_completion_tokens = body.max_tokens;
|
|
892
903
|
delete body.max_tokens;
|
|
893
904
|
}
|
|
@@ -1017,7 +1028,7 @@ var SageMakerBackend = class _SageMakerBackend {
|
|
|
1017
1028
|
}
|
|
1018
1029
|
constructor(config) {
|
|
1019
1030
|
this.baseURL = config.baseURL ?? `https://runtime.sagemaker.${config.region}.amazonaws.com`;
|
|
1020
|
-
this.requestFormat = config.requestFormat ?? "
|
|
1031
|
+
this.requestFormat = config.requestFormat ?? "openai" /* OpenAI */;
|
|
1021
1032
|
this.signer = new SignatureV4({
|
|
1022
1033
|
service: "sagemaker",
|
|
1023
1034
|
region: config.region,
|
|
@@ -1068,22 +1079,25 @@ var SageMakerBackend = class _SageMakerBackend {
|
|
|
1068
1079
|
return this.parseResponse(response);
|
|
1069
1080
|
}
|
|
1070
1081
|
buildRequestBody(prompt2, maxTokens, systemPrompt, params, streaming) {
|
|
1071
|
-
const messages = [];
|
|
1072
|
-
if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
|
|
1073
|
-
messages.push({ role: "user", content: prompt2 });
|
|
1074
1082
|
if (this.requestFormat === "openai" /* OpenAI */) {
|
|
1083
|
+
const messages = [];
|
|
1084
|
+
if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
|
|
1085
|
+
messages.push({ role: "user", content: prompt2 });
|
|
1075
1086
|
return {
|
|
1076
1087
|
messages,
|
|
1077
|
-
max_tokens: maxTokens,
|
|
1078
1088
|
stream: streaming,
|
|
1079
|
-
...params
|
|
1089
|
+
...params,
|
|
1090
|
+
max_tokens: maxTokens
|
|
1080
1091
|
};
|
|
1081
1092
|
}
|
|
1093
|
+
const rawPrompt = systemPrompt ? `${systemPrompt}
|
|
1094
|
+
|
|
1095
|
+
${prompt2}` : prompt2;
|
|
1082
1096
|
return {
|
|
1083
|
-
inputs:
|
|
1097
|
+
inputs: rawPrompt,
|
|
1084
1098
|
parameters: {
|
|
1085
|
-
|
|
1086
|
-
|
|
1099
|
+
...params,
|
|
1100
|
+
max_new_tokens: maxTokens
|
|
1087
1101
|
}
|
|
1088
1102
|
};
|
|
1089
1103
|
}
|
|
@@ -1262,7 +1276,7 @@ function createBackend(config) {
|
|
|
1262
1276
|
case "openai":
|
|
1263
1277
|
return OpenAIBackend.create(baseURL);
|
|
1264
1278
|
case "sagemaker": {
|
|
1265
|
-
const requestFormat = config.provider.config?.["requestFormat"] ?? "
|
|
1279
|
+
const requestFormat = config.provider.config?.["requestFormat"] ?? "openai" /* OpenAI */;
|
|
1266
1280
|
return SageMakerBackend.create(baseURL, requestFormat);
|
|
1267
1281
|
}
|
|
1268
1282
|
default:
|
|
@@ -1397,17 +1411,18 @@ async function executeRequest(backend, prompt2, config, requestId, phase, cacheH
|
|
|
1397
1411
|
);
|
|
1398
1412
|
const endTime = performance.now();
|
|
1399
1413
|
const e2eLatencyMs = endTime - startTime;
|
|
1414
|
+
const isEmpty = response.outputTokens === 0 && response.generatedText === "";
|
|
1400
1415
|
return {
|
|
1401
1416
|
requestId,
|
|
1402
1417
|
startTime,
|
|
1403
1418
|
endTime,
|
|
1404
|
-
ttftMs: response.ttftMs,
|
|
1419
|
+
ttftMs: isEmpty ? -1 : response.ttftMs,
|
|
1405
1420
|
e2eLatencyMs,
|
|
1406
1421
|
interTokenLatencies: response.interTokenLatencies,
|
|
1407
1422
|
inputText: prompt2.text,
|
|
1408
1423
|
inputTokens: prompt2.tokenCount,
|
|
1409
1424
|
outputTokens: response.outputTokens,
|
|
1410
|
-
outputThroughputTps: e2eLatencyMs > 0 ? response.outputTokens / (e2eLatencyMs / 1e3) : 0,
|
|
1425
|
+
outputThroughputTps: isEmpty ? 0 : e2eLatencyMs > 0 ? response.outputTokens / (e2eLatencyMs / 1e3) : 0,
|
|
1411
1426
|
generatedText: response.generatedText,
|
|
1412
1427
|
phase,
|
|
1413
1428
|
cacheHit
|
|
@@ -1559,13 +1574,15 @@ function aggregate(values) {
|
|
|
1559
1574
|
|
|
1560
1575
|
// src/reporter/aggregator.ts
|
|
1561
1576
|
function computeSummary(requests) {
|
|
1562
|
-
const successful = requests.filter((r) => !r.error);
|
|
1563
1577
|
const failed = requests.filter((r) => !!r.error);
|
|
1578
|
+
const successful = requests.filter((r) => !r.error);
|
|
1579
|
+
const empty = successful.filter((r) => r.ttftMs === -1);
|
|
1580
|
+
const completed = successful.filter((r) => r.ttftMs !== -1);
|
|
1564
1581
|
const startTime = Math.min(...requests.map((r) => r.startTime));
|
|
1565
1582
|
const endTime = Math.max(...requests.map((r) => r.endTime));
|
|
1566
1583
|
const durationMs = endTime - startTime;
|
|
1567
1584
|
const durationMin = durationMs / 6e4;
|
|
1568
|
-
const totalOutputTokens =
|
|
1585
|
+
const totalOutputTokens = completed.reduce((sum, r) => sum + r.outputTokens, 0);
|
|
1569
1586
|
const errorCodeFrequency = {};
|
|
1570
1587
|
for (const r of failed) {
|
|
1571
1588
|
const code = r.errorCode ?? "unknown";
|
|
@@ -1584,24 +1601,25 @@ function computeSummary(requests) {
|
|
|
1584
1601
|
};
|
|
1585
1602
|
}
|
|
1586
1603
|
}
|
|
1587
|
-
const ttfntValues =
|
|
1588
|
-
const itlValues =
|
|
1604
|
+
const ttfntValues = completed.map((r) => r.ttfntMs).filter((v) => v !== void 0 && v > 0);
|
|
1605
|
+
const itlValues = completed.flatMap((r) => r.interTokenLatencies);
|
|
1589
1606
|
return {
|
|
1590
1607
|
startTime,
|
|
1591
1608
|
endTime,
|
|
1592
1609
|
totalRequests: requests.length,
|
|
1593
|
-
successfulRequests:
|
|
1610
|
+
successfulRequests: completed.length,
|
|
1594
1611
|
failedRequests: failed.length,
|
|
1612
|
+
emptyResponses: empty.length,
|
|
1595
1613
|
errorRate: requests.length > 0 ? failed.length / requests.length : 0,
|
|
1596
1614
|
rpm: durationMin > 0 ? requests.length / durationMin : 0,
|
|
1597
1615
|
overallTps: durationMs > 0 ? totalOutputTokens / (durationMs / 1e3) : 0,
|
|
1598
|
-
ttft: aggregate(
|
|
1616
|
+
ttft: aggregate(completed.map((r) => r.ttftMs)),
|
|
1599
1617
|
ttfnt: ttfntValues.length > 0 ? aggregate(ttfntValues) : void 0,
|
|
1600
|
-
e2eLatency: aggregate(
|
|
1601
|
-
outputThroughput: aggregate(
|
|
1618
|
+
e2eLatency: aggregate(completed.map((r) => r.e2eLatencyMs)),
|
|
1619
|
+
outputThroughput: aggregate(completed.map((r) => r.outputThroughputTps)),
|
|
1602
1620
|
interTokenLatency: aggregate(itlValues),
|
|
1603
|
-
inputTokens: aggregate(
|
|
1604
|
-
outputTokens: aggregate(
|
|
1621
|
+
inputTokens: aggregate(completed.map((r) => r.inputTokens)),
|
|
1622
|
+
outputTokens: aggregate(completed.map((r) => r.outputTokens)),
|
|
1605
1623
|
errorCodeFrequency,
|
|
1606
1624
|
cacheHitRate: requests.length > 0 ? cacheHits / requests.length : 0,
|
|
1607
1625
|
phaseBreakdown
|
|
@@ -1659,6 +1677,7 @@ var CsvExporter = class {
|
|
|
1659
1677
|
"totalRequests",
|
|
1660
1678
|
"successfulRequests",
|
|
1661
1679
|
"failedRequests",
|
|
1680
|
+
"emptyResponses",
|
|
1662
1681
|
"errorRate",
|
|
1663
1682
|
"rpm",
|
|
1664
1683
|
"overallTps",
|
|
@@ -1668,6 +1687,7 @@ var CsvExporter = class {
|
|
|
1668
1687
|
summary.totalRequests,
|
|
1669
1688
|
summary.successfulRequests,
|
|
1670
1689
|
summary.failedRequests,
|
|
1690
|
+
summary.emptyResponses,
|
|
1671
1691
|
summary.errorRate,
|
|
1672
1692
|
summary.rpm,
|
|
1673
1693
|
summary.overallTps,
|
|
@@ -1776,6 +1796,7 @@ var BenchmarkStore = class {
|
|
|
1776
1796
|
completed = 0;
|
|
1777
1797
|
totalTarget;
|
|
1778
1798
|
errors = 0;
|
|
1799
|
+
emptyResponses = 0;
|
|
1779
1800
|
totalOutputTokens = 0;
|
|
1780
1801
|
totalInputTokens = 0;
|
|
1781
1802
|
startTime = 0;
|
|
@@ -1801,8 +1822,9 @@ var BenchmarkStore = class {
|
|
|
1801
1822
|
this.errors++;
|
|
1802
1823
|
if (this.recentErrors.length >= 5) this.recentErrors.shift();
|
|
1803
1824
|
this.recentErrors.push(metrics.error);
|
|
1804
|
-
}
|
|
1805
|
-
|
|
1825
|
+
} else if (metrics.ttftMs === -1) {
|
|
1826
|
+
this.emptyResponses++;
|
|
1827
|
+
} else {
|
|
1806
1828
|
if (this.recentTtft.length >= ROLLING_CAP) this.recentTtft.shift();
|
|
1807
1829
|
this.recentTtft.push(metrics.ttftMs);
|
|
1808
1830
|
if (this.recentE2eLatency.length >= ROLLING_CAP) this.recentE2eLatency.shift();
|
|
@@ -1825,6 +1847,7 @@ var BenchmarkStore = class {
|
|
|
1825
1847
|
completed: this.completed,
|
|
1826
1848
|
totalTarget: this.totalTarget,
|
|
1827
1849
|
errors: this.errors,
|
|
1850
|
+
emptyResponses: this.emptyResponses,
|
|
1828
1851
|
totalOutputTokens: this.totalOutputTokens,
|
|
1829
1852
|
totalInputTokens: this.totalInputTokens,
|
|
1830
1853
|
startTime: this.startTime,
|
|
@@ -1874,7 +1897,7 @@ var FallbackDisplay = class {
|
|
|
1874
1897
|
const bar = "=".repeat(filled) + ".".repeat(barWidth - filled);
|
|
1875
1898
|
process.stderr.write(
|
|
1876
1899
|
` [${bar}] ${s.completed}/${totalStr}${pctStr}
|
|
1877
|
-
${formatDuration(elapsed)} | ${rps.toFixed(1)} req/s | ${tps.toFixed(0)} tok/s | ${s.errors} err
|
|
1900
|
+
${formatDuration(elapsed)} | ${rps.toFixed(1)} req/s | ${tps.toFixed(0)} tok/s | ${s.errors} err | ${s.emptyResponses} empty
|
|
1878
1901
|
`
|
|
1879
1902
|
);
|
|
1880
1903
|
}
|