@flotorch/loadtest 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +52 -29
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -222,6 +222,7 @@ function StatsPanel({
222
222
  recentTtft,
223
223
  recentE2eLatency,
224
224
  errors,
225
+ emptyResponses,
225
226
  completed
226
227
  }) {
227
228
  const sortedTtft = [...recentTtft].sort((a, b) => a - b);
@@ -268,6 +269,14 @@ function StatsPanel({
268
269
  " (",
269
270
  errRate,
270
271
  "%)"
272
+ ] }),
273
+ /* @__PURE__ */ jsx4(Text4, { children: " " }),
274
+ /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "Empty " }),
275
+ /* @__PURE__ */ jsxs4(Text4, { color: emptyResponses > 0 ? "yellow" : void 0, bold: emptyResponses > 0, children: [
276
+ emptyResponses,
277
+ " (",
278
+ completed > 0 ? (emptyResponses / completed * 100).toFixed(1) : "0.0",
279
+ "%)"
271
280
  ] })
272
281
  ] })
273
282
  ] });
@@ -357,6 +366,7 @@ function App({ store }) {
357
366
  recentTtft: snap.recentTtft,
358
367
  recentE2eLatency: snap.recentE2eLatency,
359
368
  errors: snap.errors,
369
+ emptyResponses: snap.emptyResponses,
360
370
  completed: snap.completed
361
371
  }
362
372
  ) }),
@@ -455,7 +465,7 @@ var cyan = wrap("36", "39");
455
465
  var magenta = wrap("35", "39");
456
466
 
457
467
  // src/cli/args.ts
458
- var VERSION = true ? "0.2.2" : "dev";
468
+ var VERSION = true ? "0.2.4" : "dev";
459
469
  var VALID_COMMANDS = /* @__PURE__ */ new Set(["run", "generate", "bench", "report", "init"]);
460
470
  var HELP_TEXT = `
461
471
  ${bold("FLOTorch Load Tester")} ${dim(`v${VERSION}`)}
@@ -882,12 +892,13 @@ var OpenAIBackend = class _OpenAIBackend {
882
892
  }
883
893
  messages.push({ role: "user", content: prompt2 });
884
894
  const body = {
885
- model,
886
895
  messages,
887
896
  stream: streaming,
888
- ...params
897
+ ...params,
898
+ model,
899
+ max_tokens: maxTokens
889
900
  };
890
- if (body.max_tokens && this.isOpenAIHost()) {
901
+ if (this.isOpenAIHost()) {
891
902
  body.max_completion_tokens = body.max_tokens;
892
903
  delete body.max_tokens;
893
904
  }
@@ -1017,7 +1028,7 @@ var SageMakerBackend = class _SageMakerBackend {
1017
1028
  }
1018
1029
  constructor(config) {
1019
1030
  this.baseURL = config.baseURL ?? `https://runtime.sagemaker.${config.region}.amazonaws.com`;
1020
- this.requestFormat = config.requestFormat ?? "sagemaker" /* Sagemaker */;
1031
+ this.requestFormat = config.requestFormat ?? "openai" /* OpenAI */;
1021
1032
  this.signer = new SignatureV4({
1022
1033
  service: "sagemaker",
1023
1034
  region: config.region,
@@ -1068,22 +1079,25 @@ var SageMakerBackend = class _SageMakerBackend {
1068
1079
  return this.parseResponse(response);
1069
1080
  }
1070
1081
  buildRequestBody(prompt2, maxTokens, systemPrompt, params, streaming) {
1071
- const messages = [];
1072
- if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
1073
- messages.push({ role: "user", content: prompt2 });
1074
1082
  if (this.requestFormat === "openai" /* OpenAI */) {
1083
+ const messages = [];
1084
+ if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
1085
+ messages.push({ role: "user", content: prompt2 });
1075
1086
  return {
1076
1087
  messages,
1077
- max_tokens: maxTokens,
1078
1088
  stream: streaming,
1079
- ...params
1089
+ ...params,
1090
+ max_tokens: maxTokens
1080
1091
  };
1081
1092
  }
1093
+ const rawPrompt = systemPrompt ? `${systemPrompt}
1094
+
1095
+ ${prompt2}` : prompt2;
1082
1096
  return {
1083
- inputs: [messages],
1097
+ inputs: rawPrompt,
1084
1098
  parameters: {
1085
- max_new_tokens: maxTokens,
1086
- ...params
1099
+ ...params,
1100
+ max_new_tokens: maxTokens
1087
1101
  }
1088
1102
  };
1089
1103
  }
@@ -1262,7 +1276,7 @@ function createBackend(config) {
1262
1276
  case "openai":
1263
1277
  return OpenAIBackend.create(baseURL);
1264
1278
  case "sagemaker": {
1265
- const requestFormat = config.provider.config?.["requestFormat"] ?? "sagemaker" /* Sagemaker */;
1279
+ const requestFormat = config.provider.config?.["requestFormat"] ?? "openai" /* OpenAI */;
1266
1280
  return SageMakerBackend.create(baseURL, requestFormat);
1267
1281
  }
1268
1282
  default:
@@ -1397,17 +1411,18 @@ async function executeRequest(backend, prompt2, config, requestId, phase, cacheH
1397
1411
  );
1398
1412
  const endTime = performance.now();
1399
1413
  const e2eLatencyMs = endTime - startTime;
1414
+ const isEmpty = response.outputTokens === 0 && response.generatedText === "";
1400
1415
  return {
1401
1416
  requestId,
1402
1417
  startTime,
1403
1418
  endTime,
1404
- ttftMs: response.ttftMs,
1419
+ ttftMs: isEmpty ? -1 : response.ttftMs,
1405
1420
  e2eLatencyMs,
1406
1421
  interTokenLatencies: response.interTokenLatencies,
1407
1422
  inputText: prompt2.text,
1408
1423
  inputTokens: prompt2.tokenCount,
1409
1424
  outputTokens: response.outputTokens,
1410
- outputThroughputTps: e2eLatencyMs > 0 ? response.outputTokens / (e2eLatencyMs / 1e3) : 0,
1425
+ outputThroughputTps: isEmpty ? 0 : e2eLatencyMs > 0 ? response.outputTokens / (e2eLatencyMs / 1e3) : 0,
1411
1426
  generatedText: response.generatedText,
1412
1427
  phase,
1413
1428
  cacheHit
@@ -1559,13 +1574,15 @@ function aggregate(values) {
1559
1574
 
1560
1575
  // src/reporter/aggregator.ts
1561
1576
  function computeSummary(requests) {
1562
- const successful = requests.filter((r) => !r.error);
1563
1577
  const failed = requests.filter((r) => !!r.error);
1578
+ const successful = requests.filter((r) => !r.error);
1579
+ const empty = successful.filter((r) => r.ttftMs === -1);
1580
+ const completed = successful.filter((r) => r.ttftMs !== -1);
1564
1581
  const startTime = Math.min(...requests.map((r) => r.startTime));
1565
1582
  const endTime = Math.max(...requests.map((r) => r.endTime));
1566
1583
  const durationMs = endTime - startTime;
1567
1584
  const durationMin = durationMs / 6e4;
1568
- const totalOutputTokens = successful.reduce((sum, r) => sum + r.outputTokens, 0);
1585
+ const totalOutputTokens = completed.reduce((sum, r) => sum + r.outputTokens, 0);
1569
1586
  const errorCodeFrequency = {};
1570
1587
  for (const r of failed) {
1571
1588
  const code = r.errorCode ?? "unknown";
@@ -1584,24 +1601,25 @@ function computeSummary(requests) {
1584
1601
  };
1585
1602
  }
1586
1603
  }
1587
- const ttfntValues = successful.map((r) => r.ttfntMs).filter((v) => v !== void 0 && v > 0);
1588
- const itlValues = successful.flatMap((r) => r.interTokenLatencies);
1604
+ const ttfntValues = completed.map((r) => r.ttfntMs).filter((v) => v !== void 0 && v > 0);
1605
+ const itlValues = completed.flatMap((r) => r.interTokenLatencies);
1589
1606
  return {
1590
1607
  startTime,
1591
1608
  endTime,
1592
1609
  totalRequests: requests.length,
1593
- successfulRequests: successful.length,
1610
+ successfulRequests: completed.length,
1594
1611
  failedRequests: failed.length,
1612
+ emptyResponses: empty.length,
1595
1613
  errorRate: requests.length > 0 ? failed.length / requests.length : 0,
1596
1614
  rpm: durationMin > 0 ? requests.length / durationMin : 0,
1597
1615
  overallTps: durationMs > 0 ? totalOutputTokens / (durationMs / 1e3) : 0,
1598
- ttft: aggregate(successful.map((r) => r.ttftMs)),
1616
+ ttft: aggregate(completed.map((r) => r.ttftMs)),
1599
1617
  ttfnt: ttfntValues.length > 0 ? aggregate(ttfntValues) : void 0,
1600
- e2eLatency: aggregate(successful.map((r) => r.e2eLatencyMs)),
1601
- outputThroughput: aggregate(successful.map((r) => r.outputThroughputTps)),
1618
+ e2eLatency: aggregate(completed.map((r) => r.e2eLatencyMs)),
1619
+ outputThroughput: aggregate(completed.map((r) => r.outputThroughputTps)),
1602
1620
  interTokenLatency: aggregate(itlValues),
1603
- inputTokens: aggregate(successful.map((r) => r.inputTokens)),
1604
- outputTokens: aggregate(successful.map((r) => r.outputTokens)),
1621
+ inputTokens: aggregate(completed.map((r) => r.inputTokens)),
1622
+ outputTokens: aggregate(completed.map((r) => r.outputTokens)),
1605
1623
  errorCodeFrequency,
1606
1624
  cacheHitRate: requests.length > 0 ? cacheHits / requests.length : 0,
1607
1625
  phaseBreakdown
@@ -1659,6 +1677,7 @@ var CsvExporter = class {
1659
1677
  "totalRequests",
1660
1678
  "successfulRequests",
1661
1679
  "failedRequests",
1680
+ "emptyResponses",
1662
1681
  "errorRate",
1663
1682
  "rpm",
1664
1683
  "overallTps",
@@ -1668,6 +1687,7 @@ var CsvExporter = class {
1668
1687
  summary.totalRequests,
1669
1688
  summary.successfulRequests,
1670
1689
  summary.failedRequests,
1690
+ summary.emptyResponses,
1671
1691
  summary.errorRate,
1672
1692
  summary.rpm,
1673
1693
  summary.overallTps,
@@ -1776,6 +1796,7 @@ var BenchmarkStore = class {
1776
1796
  completed = 0;
1777
1797
  totalTarget;
1778
1798
  errors = 0;
1799
+ emptyResponses = 0;
1779
1800
  totalOutputTokens = 0;
1780
1801
  totalInputTokens = 0;
1781
1802
  startTime = 0;
@@ -1801,8 +1822,9 @@ var BenchmarkStore = class {
1801
1822
  this.errors++;
1802
1823
  if (this.recentErrors.length >= 5) this.recentErrors.shift();
1803
1824
  this.recentErrors.push(metrics.error);
1804
- }
1805
- if (!metrics.error) {
1825
+ } else if (metrics.ttftMs === -1) {
1826
+ this.emptyResponses++;
1827
+ } else {
1806
1828
  if (this.recentTtft.length >= ROLLING_CAP) this.recentTtft.shift();
1807
1829
  this.recentTtft.push(metrics.ttftMs);
1808
1830
  if (this.recentE2eLatency.length >= ROLLING_CAP) this.recentE2eLatency.shift();
@@ -1825,6 +1847,7 @@ var BenchmarkStore = class {
1825
1847
  completed: this.completed,
1826
1848
  totalTarget: this.totalTarget,
1827
1849
  errors: this.errors,
1850
+ emptyResponses: this.emptyResponses,
1828
1851
  totalOutputTokens: this.totalOutputTokens,
1829
1852
  totalInputTokens: this.totalInputTokens,
1830
1853
  startTime: this.startTime,
@@ -1874,7 +1897,7 @@ var FallbackDisplay = class {
1874
1897
  const bar = "=".repeat(filled) + ".".repeat(barWidth - filled);
1875
1898
  process.stderr.write(
1876
1899
  ` [${bar}] ${s.completed}/${totalStr}${pctStr}
1877
- ${formatDuration(elapsed)} | ${rps.toFixed(1)} req/s | ${tps.toFixed(0)} tok/s | ${s.errors} err
1900
+ ${formatDuration(elapsed)} | ${rps.toFixed(1)} req/s | ${tps.toFixed(0)} tok/s | ${s.errors} err | ${s.emptyResponses} empty
1878
1901
  `
1879
1902
  );
1880
1903
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flotorch/loadtest",
3
- "version": "0.2.2",
3
+ "version": "0.2.4",
4
4
  "description": "LLM inference load testing and benchmarking tool",
5
5
  "license": "MIT",
6
6
  "repository": {