@flotorch/loadtest 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +35 -16
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -222,6 +222,7 @@ function StatsPanel({
222
222
  recentTtft,
223
223
  recentE2eLatency,
224
224
  errors,
225
+ emptyResponses,
225
226
  completed
226
227
  }) {
227
228
  const sortedTtft = [...recentTtft].sort((a, b) => a - b);
@@ -268,6 +269,14 @@ function StatsPanel({
268
269
  " (",
269
270
  errRate,
270
271
  "%)"
272
+ ] }),
273
+ /* @__PURE__ */ jsx4(Text4, { children: " " }),
274
+ /* @__PURE__ */ jsx4(Text4, { dimColor: true, children: "Empty " }),
275
+ /* @__PURE__ */ jsxs4(Text4, { color: emptyResponses > 0 ? "yellow" : void 0, bold: emptyResponses > 0, children: [
276
+ emptyResponses,
277
+ " (",
278
+ completed > 0 ? (emptyResponses / completed * 100).toFixed(1) : "0.0",
279
+ "%)"
271
280
  ] })
272
281
  ] })
273
282
  ] });
@@ -357,6 +366,7 @@ function App({ store }) {
357
366
  recentTtft: snap.recentTtft,
358
367
  recentE2eLatency: snap.recentE2eLatency,
359
368
  errors: snap.errors,
369
+ emptyResponses: snap.emptyResponses,
360
370
  completed: snap.completed
361
371
  }
362
372
  ) }),
@@ -455,7 +465,7 @@ var cyan = wrap("36", "39");
455
465
  var magenta = wrap("35", "39");
456
466
 
457
467
  // src/cli/args.ts
458
- var VERSION = true ? "0.2.3" : "dev";
468
+ var VERSION = true ? "0.2.4" : "dev";
459
469
  var VALID_COMMANDS = /* @__PURE__ */ new Set(["run", "generate", "bench", "report", "init"]);
460
470
  var HELP_TEXT = `
461
471
  ${bold("FLOTorch Load Tester")} ${dim(`v${VERSION}`)}
@@ -1401,17 +1411,18 @@ async function executeRequest(backend, prompt2, config, requestId, phase, cacheH
1401
1411
  );
1402
1412
  const endTime = performance.now();
1403
1413
  const e2eLatencyMs = endTime - startTime;
1414
+ const isEmpty = response.outputTokens === 0 && response.generatedText === "";
1404
1415
  return {
1405
1416
  requestId,
1406
1417
  startTime,
1407
1418
  endTime,
1408
- ttftMs: response.ttftMs,
1419
+ ttftMs: isEmpty ? -1 : response.ttftMs,
1409
1420
  e2eLatencyMs,
1410
1421
  interTokenLatencies: response.interTokenLatencies,
1411
1422
  inputText: prompt2.text,
1412
1423
  inputTokens: prompt2.tokenCount,
1413
1424
  outputTokens: response.outputTokens,
1414
- outputThroughputTps: e2eLatencyMs > 0 ? response.outputTokens / (e2eLatencyMs / 1e3) : 0,
1425
+ outputThroughputTps: isEmpty ? 0 : e2eLatencyMs > 0 ? response.outputTokens / (e2eLatencyMs / 1e3) : 0,
1415
1426
  generatedText: response.generatedText,
1416
1427
  phase,
1417
1428
  cacheHit
@@ -1563,13 +1574,15 @@ function aggregate(values) {
1563
1574
 
1564
1575
  // src/reporter/aggregator.ts
1565
1576
  function computeSummary(requests) {
1566
- const successful = requests.filter((r) => !r.error);
1567
1577
  const failed = requests.filter((r) => !!r.error);
1578
+ const successful = requests.filter((r) => !r.error);
1579
+ const empty = successful.filter((r) => r.ttftMs === -1);
1580
+ const completed = successful.filter((r) => r.ttftMs !== -1);
1568
1581
  const startTime = Math.min(...requests.map((r) => r.startTime));
1569
1582
  const endTime = Math.max(...requests.map((r) => r.endTime));
1570
1583
  const durationMs = endTime - startTime;
1571
1584
  const durationMin = durationMs / 6e4;
1572
- const totalOutputTokens = successful.reduce((sum, r) => sum + r.outputTokens, 0);
1585
+ const totalOutputTokens = completed.reduce((sum, r) => sum + r.outputTokens, 0);
1573
1586
  const errorCodeFrequency = {};
1574
1587
  for (const r of failed) {
1575
1588
  const code = r.errorCode ?? "unknown";
@@ -1588,24 +1601,25 @@ function computeSummary(requests) {
1588
1601
  };
1589
1602
  }
1590
1603
  }
1591
- const ttfntValues = successful.map((r) => r.ttfntMs).filter((v) => v !== void 0 && v > 0);
1592
- const itlValues = successful.flatMap((r) => r.interTokenLatencies);
1604
+ const ttfntValues = completed.map((r) => r.ttfntMs).filter((v) => v !== void 0 && v > 0);
1605
+ const itlValues = completed.flatMap((r) => r.interTokenLatencies);
1593
1606
  return {
1594
1607
  startTime,
1595
1608
  endTime,
1596
1609
  totalRequests: requests.length,
1597
- successfulRequests: successful.length,
1610
+ successfulRequests: completed.length,
1598
1611
  failedRequests: failed.length,
1612
+ emptyResponses: empty.length,
1599
1613
  errorRate: requests.length > 0 ? failed.length / requests.length : 0,
1600
1614
  rpm: durationMin > 0 ? requests.length / durationMin : 0,
1601
1615
  overallTps: durationMs > 0 ? totalOutputTokens / (durationMs / 1e3) : 0,
1602
- ttft: aggregate(successful.map((r) => r.ttftMs)),
1616
+ ttft: aggregate(completed.map((r) => r.ttftMs)),
1603
1617
  ttfnt: ttfntValues.length > 0 ? aggregate(ttfntValues) : void 0,
1604
- e2eLatency: aggregate(successful.map((r) => r.e2eLatencyMs)),
1605
- outputThroughput: aggregate(successful.map((r) => r.outputThroughputTps)),
1618
+ e2eLatency: aggregate(completed.map((r) => r.e2eLatencyMs)),
1619
+ outputThroughput: aggregate(completed.map((r) => r.outputThroughputTps)),
1606
1620
  interTokenLatency: aggregate(itlValues),
1607
- inputTokens: aggregate(successful.map((r) => r.inputTokens)),
1608
- outputTokens: aggregate(successful.map((r) => r.outputTokens)),
1621
+ inputTokens: aggregate(completed.map((r) => r.inputTokens)),
1622
+ outputTokens: aggregate(completed.map((r) => r.outputTokens)),
1609
1623
  errorCodeFrequency,
1610
1624
  cacheHitRate: requests.length > 0 ? cacheHits / requests.length : 0,
1611
1625
  phaseBreakdown
@@ -1663,6 +1677,7 @@ var CsvExporter = class {
1663
1677
  "totalRequests",
1664
1678
  "successfulRequests",
1665
1679
  "failedRequests",
1680
+ "emptyResponses",
1666
1681
  "errorRate",
1667
1682
  "rpm",
1668
1683
  "overallTps",
@@ -1672,6 +1687,7 @@ var CsvExporter = class {
1672
1687
  summary.totalRequests,
1673
1688
  summary.successfulRequests,
1674
1689
  summary.failedRequests,
1690
+ summary.emptyResponses,
1675
1691
  summary.errorRate,
1676
1692
  summary.rpm,
1677
1693
  summary.overallTps,
@@ -1780,6 +1796,7 @@ var BenchmarkStore = class {
1780
1796
  completed = 0;
1781
1797
  totalTarget;
1782
1798
  errors = 0;
1799
+ emptyResponses = 0;
1783
1800
  totalOutputTokens = 0;
1784
1801
  totalInputTokens = 0;
1785
1802
  startTime = 0;
@@ -1805,8 +1822,9 @@ var BenchmarkStore = class {
1805
1822
  this.errors++;
1806
1823
  if (this.recentErrors.length >= 5) this.recentErrors.shift();
1807
1824
  this.recentErrors.push(metrics.error);
1808
- }
1809
- if (!metrics.error) {
1825
+ } else if (metrics.ttftMs === -1) {
1826
+ this.emptyResponses++;
1827
+ } else {
1810
1828
  if (this.recentTtft.length >= ROLLING_CAP) this.recentTtft.shift();
1811
1829
  this.recentTtft.push(metrics.ttftMs);
1812
1830
  if (this.recentE2eLatency.length >= ROLLING_CAP) this.recentE2eLatency.shift();
@@ -1829,6 +1847,7 @@ var BenchmarkStore = class {
1829
1847
  completed: this.completed,
1830
1848
  totalTarget: this.totalTarget,
1831
1849
  errors: this.errors,
1850
+ emptyResponses: this.emptyResponses,
1832
1851
  totalOutputTokens: this.totalOutputTokens,
1833
1852
  totalInputTokens: this.totalInputTokens,
1834
1853
  startTime: this.startTime,
@@ -1878,7 +1897,7 @@ var FallbackDisplay = class {
1878
1897
  const bar = "=".repeat(filled) + ".".repeat(barWidth - filled);
1879
1898
  process.stderr.write(
1880
1899
  ` [${bar}] ${s.completed}/${totalStr}${pctStr}
1881
- ${formatDuration(elapsed)} | ${rps.toFixed(1)} req/s | ${tps.toFixed(0)} tok/s | ${s.errors} err
1900
+ ${formatDuration(elapsed)} | ${rps.toFixed(1)} req/s | ${tps.toFixed(0)} tok/s | ${s.errors} err | ${s.emptyResponses} empty
1882
1901
  `
1883
1902
  );
1884
1903
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flotorch/loadtest",
3
- "version": "0.2.3",
3
+ "version": "0.2.4",
4
4
  "description": "LLM inference load testing and benchmarking tool",
5
5
  "license": "MIT",
6
6
  "repository": {