@durable-streams/client-conformance-tests 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +451 -0
  2. package/dist/adapters/typescript-adapter.d.ts +1 -0
  3. package/dist/adapters/typescript-adapter.js +586 -0
  4. package/dist/benchmark-runner-C_Yghc8f.js +1333 -0
  5. package/dist/cli.d.ts +1 -0
  6. package/dist/cli.js +265 -0
  7. package/dist/index.d.ts +508 -0
  8. package/dist/index.js +4 -0
  9. package/dist/protocol-DyEvTHPF.d.ts +472 -0
  10. package/dist/protocol-qb83AeUH.js +120 -0
  11. package/dist/protocol.d.ts +2 -0
  12. package/dist/protocol.js +3 -0
  13. package/package.json +53 -0
  14. package/src/adapters/typescript-adapter.ts +848 -0
  15. package/src/benchmark-runner.ts +860 -0
  16. package/src/benchmark-scenarios.ts +311 -0
  17. package/src/cli.ts +294 -0
  18. package/src/index.ts +50 -0
  19. package/src/protocol.ts +656 -0
  20. package/src/runner.ts +1191 -0
  21. package/src/test-cases.ts +475 -0
  22. package/test-cases/consumer/cache-headers.yaml +150 -0
  23. package/test-cases/consumer/error-handling.yaml +108 -0
  24. package/test-cases/consumer/message-ordering.yaml +209 -0
  25. package/test-cases/consumer/offset-handling.yaml +209 -0
  26. package/test-cases/consumer/offset-resumption.yaml +197 -0
  27. package/test-cases/consumer/read-catchup.yaml +173 -0
  28. package/test-cases/consumer/read-longpoll.yaml +132 -0
  29. package/test-cases/consumer/read-sse.yaml +145 -0
  30. package/test-cases/consumer/retry-resilience.yaml +160 -0
  31. package/test-cases/consumer/streaming-equivalence.yaml +226 -0
  32. package/test-cases/lifecycle/dynamic-headers.yaml +147 -0
  33. package/test-cases/lifecycle/headers-params.yaml +117 -0
  34. package/test-cases/lifecycle/stream-lifecycle.yaml +148 -0
  35. package/test-cases/producer/append-data.yaml +142 -0
  36. package/test-cases/producer/batching.yaml +112 -0
  37. package/test-cases/producer/create-stream.yaml +87 -0
  38. package/test-cases/producer/error-handling.yaml +90 -0
  39. package/test-cases/producer/sequence-ordering.yaml +148 -0
@@ -0,0 +1,860 @@
1
+ /**
2
+ * Benchmark runner for client performance testing.
3
+ *
4
+ * Orchestrates:
5
+ * - Reference server lifecycle
6
+ * - Client adapter process spawning
7
+ * - Benchmark scenario execution
8
+ * - Statistical analysis and reporting
9
+ */
10
+
11
+ import { spawn } from "node:child_process"
12
+ import { createInterface } from "node:readline"
13
+ import { randomUUID } from "node:crypto"
14
+ import { DurableStreamTestServer } from "@durable-streams/server"
15
+ import { DurableStream } from "@durable-streams/client"
16
+ import {
17
+ calculateStats,
18
+ formatStats,
19
+ parseResult,
20
+ serializeCommand,
21
+ } from "./protocol.js"
22
+ import { allScenarios, getScenarioById } from "./benchmark-scenarios.js"
23
+ import type { Interface as ReadlineInterface } from "node:readline"
24
+ import type { ChildProcess } from "node:child_process"
25
+ import type {
26
+ BenchmarkOperation,
27
+ BenchmarkResult,
28
+ BenchmarkStats,
29
+ TestCommand,
30
+ TestResult,
31
+ } from "./protocol.js"
32
+ import type {
33
+ BenchmarkScenario,
34
+ ScenarioContext,
35
+ } from "./benchmark-scenarios.js"
36
+
37
+ // =============================================================================
38
+ // Types
39
+ // =============================================================================
40
+
41
+ export interface BenchmarkRunnerOptions {
42
+ /** Path to client adapter executable, or "ts" for built-in TypeScript adapter */
43
+ clientAdapter: string
44
+ /** Arguments to pass to client adapter */
45
+ clientArgs?: Array<string>
46
+ /** Specific scenarios to run (default: all) */
47
+ scenarios?: Array<string>
48
+ /** Categories to run (default: all) */
49
+ categories?: Array<`latency` | `throughput` | `streaming`>
50
+ /** Verbose output */
51
+ verbose?: boolean
52
+ /** Port for reference server (0 for random) */
53
+ serverPort?: number
54
+ /** Output format */
55
+ format?: `console` | `json` | `markdown`
56
+ }
57
+
58
+ export interface ScenarioResult {
59
+ scenario: BenchmarkScenario
60
+ stats: BenchmarkStats
61
+ criteriaMet: boolean
62
+ criteriaDetails: Array<CriteriaResult>
63
+ skipped: boolean
64
+ skipReason?: string
65
+ error?: string
66
+ /** Computed ops/sec for throughput scenarios */
67
+ opsPerSec?: number
68
+ /** Computed MB/sec for throughput scenarios */
69
+ mbPerSec?: number
70
+ }
71
+
72
+ export interface CriteriaResult {
73
+ criterion: string
74
+ met: boolean
75
+ actual: number
76
+ expected: number
77
+ }
78
+
79
+ export interface BenchmarkSummary {
80
+ adapter: string
81
+ adapterVersion: string
82
+ serverUrl: string
83
+ timestamp: string
84
+ duration: number
85
+ results: Array<ScenarioResult>
86
+ passed: number
87
+ failed: number
88
+ skipped: number
89
+ }
90
+
91
+ /** Client feature flags reported by the adapter */
92
+ interface ClientFeatures {
93
+ batching?: boolean
94
+ sse?: boolean
95
+ longPoll?: boolean
96
+ streaming?: boolean
97
+ }
98
+
99
+ // =============================================================================
100
+ // Client Adapter Communication
101
+ // =============================================================================
102
+
103
+ class BenchmarkClientAdapter {
104
+ private process: ChildProcess
105
+ private readline: ReadlineInterface
106
+ private pendingResponse: {
107
+ resolve: (result: TestResult) => void
108
+ reject: (error: Error) => void
109
+ } | null = null
110
+ private initialized = false
111
+
112
+ constructor(executable: string, args: Array<string> = []) {
113
+ this.process = spawn(executable, args, {
114
+ stdio: [`pipe`, `pipe`, `pipe`],
115
+ })
116
+
117
+ if (!this.process.stdout || !this.process.stdin) {
118
+ throw new Error(`Failed to create client adapter process`)
119
+ }
120
+
121
+ this.readline = createInterface({
122
+ input: this.process.stdout,
123
+ crlfDelay: Infinity,
124
+ })
125
+
126
+ this.readline.on(`line`, (line) => {
127
+ if (this.pendingResponse) {
128
+ try {
129
+ const result = parseResult(line)
130
+ this.pendingResponse.resolve(result)
131
+ } catch {
132
+ this.pendingResponse.reject(
133
+ new Error(`Failed to parse client response: ${line}`)
134
+ )
135
+ }
136
+ this.pendingResponse = null
137
+ }
138
+ })
139
+
140
+ this.process.stderr?.on(`data`, (data) => {
141
+ console.error(`[client stderr] ${data.toString().trim()}`)
142
+ })
143
+
144
+ this.process.on(`error`, (err) => {
145
+ if (this.pendingResponse) {
146
+ this.pendingResponse.reject(err)
147
+ this.pendingResponse = null
148
+ }
149
+ })
150
+
151
+ this.process.on(`exit`, (code) => {
152
+ if (this.pendingResponse) {
153
+ this.pendingResponse.reject(
154
+ new Error(`Client adapter exited with code ${code}`)
155
+ )
156
+ this.pendingResponse = null
157
+ }
158
+ })
159
+ }
160
+
161
+ async send(command: TestCommand, timeoutMs = 60000): Promise<TestResult> {
162
+ if (!this.process.stdin) {
163
+ throw new Error(`Client adapter stdin not available`)
164
+ }
165
+
166
+ return new Promise((resolve, reject) => {
167
+ const timeout = setTimeout(() => {
168
+ this.pendingResponse = null
169
+ reject(
170
+ new Error(`Command timed out after ${timeoutMs}ms: ${command.type}`)
171
+ )
172
+ }, timeoutMs)
173
+
174
+ this.pendingResponse = {
175
+ resolve: (result) => {
176
+ clearTimeout(timeout)
177
+ resolve(result)
178
+ },
179
+ reject: (error) => {
180
+ clearTimeout(timeout)
181
+ reject(error)
182
+ },
183
+ }
184
+
185
+ const line = serializeCommand(command) + `\n`
186
+ this.process.stdin!.write(line)
187
+ })
188
+ }
189
+
190
+ async init(serverUrl: string): Promise<TestResult> {
191
+ const result = await this.send({ type: `init`, serverUrl })
192
+ if (result.success) {
193
+ this.initialized = true
194
+ }
195
+ return result
196
+ }
197
+
198
+ async benchmark(
199
+ iterationId: string,
200
+ operation: BenchmarkOperation
201
+ ): Promise<BenchmarkResult | null> {
202
+ const result = await this.send({
203
+ type: `benchmark`,
204
+ iterationId,
205
+ operation,
206
+ })
207
+
208
+ if (result.type === `benchmark`) {
209
+ return result
210
+ }
211
+
212
+ return null
213
+ }
214
+
215
+ async shutdown(): Promise<void> {
216
+ if (this.initialized) {
217
+ try {
218
+ await this.send({ type: `shutdown` }, 5000)
219
+ } catch {
220
+ // Ignore shutdown errors
221
+ }
222
+ }
223
+ this.process.kill()
224
+ this.readline.close()
225
+ }
226
+
227
+ isInitialized(): boolean {
228
+ return this.initialized
229
+ }
230
+ }
231
+
232
+ // =============================================================================
233
+ // Scenario Execution
234
+ // =============================================================================
235
+
236
+ async function runScenario(
237
+ scenario: BenchmarkScenario,
238
+ client: BenchmarkClientAdapter,
239
+ serverUrl: string,
240
+ clientFeatures: ClientFeatures,
241
+ verbose: boolean,
242
+ log: (message: string) => void
243
+ ): Promise<ScenarioResult> {
244
+ // Check required features
245
+ if (scenario.requires) {
246
+ const missing = scenario.requires.filter((f) => !clientFeatures[f])
247
+ if (missing.length > 0) {
248
+ return {
249
+ scenario,
250
+ stats: calculateStats([]),
251
+ criteriaMet: true,
252
+ criteriaDetails: [],
253
+ skipped: true,
254
+ skipReason: `missing features: ${missing.join(`, `)}`,
255
+ }
256
+ }
257
+ }
258
+
259
+ const basePath = `/bench-${randomUUID()}`
260
+ const durations: Array<bigint> = []
261
+
262
+ try {
263
+ // Create the base stream for the scenario if needed
264
+ const setupCtx: ScenarioContext = {
265
+ basePath,
266
+ iteration: 0,
267
+ setupData: {},
268
+ }
269
+
270
+ // Run setup
271
+ if (scenario.setup) {
272
+ await scenario.setup(setupCtx)
273
+ }
274
+
275
+ // Pre-create stream for append/read scenarios
276
+ if (
277
+ scenario.id === `latency-append` ||
278
+ scenario.id === `latency-read` ||
279
+ scenario.id.startsWith(`throughput-`)
280
+ ) {
281
+ const streamUrl = `${serverUrl}${basePath}/stream`
282
+ await DurableStream.create({
283
+ url: streamUrl,
284
+ contentType: `application/octet-stream`,
285
+ })
286
+
287
+ // For read latency, pre-populate with some data so we're measuring actual reads
288
+ if (scenario.id === `latency-read`) {
289
+ const stream = new DurableStream({
290
+ url: streamUrl,
291
+ contentType: `application/octet-stream`,
292
+ })
293
+ // Add 10 chunks of 1KB each (10KB total)
294
+ const chunk = new Uint8Array(1024).fill(42)
295
+ for (let i = 0; i < 10; i++) {
296
+ await stream.append(chunk)
297
+ }
298
+ }
299
+
300
+ // For read throughput, pre-populate with JSON messages
301
+ if (scenario.id === `throughput-read`) {
302
+ const url = `${serverUrl}${basePath}/throughput-read`
303
+ // Create the stream with JSON content type
304
+ await DurableStream.create({
305
+ url,
306
+ contentType: `application/json`,
307
+ })
308
+ const ds = new DurableStream({
309
+ url,
310
+ contentType: `application/json`,
311
+ })
312
+ // Populate with 10000 JSON messages (each ~100 bytes)
313
+ // Using batching for speed - append many at once
314
+ const messages = []
315
+ for (let i = 0; i < 10000; i++) {
316
+ messages.push({ n: i, data: `message-${i}-padding-for-size` })
317
+ }
318
+ // Batch append for speed
319
+ await Promise.all(messages.map((msg) => ds.append(msg)))
320
+ }
321
+ }
322
+
323
+ // Warmup iterations
324
+ if (verbose) {
325
+ log(` Warmup: ${scenario.config.warmupIterations} iterations...`)
326
+ }
327
+
328
+ for (let i = 0; i < scenario.config.warmupIterations; i++) {
329
+ const ctx: ScenarioContext = {
330
+ basePath,
331
+ iteration: i,
332
+ setupData: setupCtx.setupData,
333
+ }
334
+ const operation = scenario.createOperation(ctx)
335
+ await client.benchmark(`warmup-${i}`, operation)
336
+ }
337
+
338
+ // Measured iterations
339
+ if (verbose) {
340
+ log(` Measuring: ${scenario.config.measureIterations} iterations...`)
341
+ }
342
+
343
+ let totalMessagesProcessed = 0
344
+ let totalBytesTransferred = 0
345
+
346
+ for (let i = 0; i < scenario.config.measureIterations; i++) {
347
+ const ctx: ScenarioContext = {
348
+ basePath,
349
+ iteration: i,
350
+ setupData: setupCtx.setupData,
351
+ }
352
+ const operation = scenario.createOperation(ctx)
353
+ const result = await client.benchmark(`measure-${i}`, operation)
354
+
355
+ if (result) {
356
+ durations.push(BigInt(result.durationNs))
357
+ // Track metrics for throughput calculations
358
+ if (result.metrics) {
359
+ totalMessagesProcessed += result.metrics.messagesProcessed ?? 0
360
+ totalBytesTransferred += result.metrics.bytesTransferred ?? 0
361
+ }
362
+ }
363
+ }
364
+
365
+ // Cleanup
366
+ if (scenario.cleanup) {
367
+ await scenario.cleanup(setupCtx)
368
+ }
369
+
370
+ // Fail if no samples were collected (adapter errors or missing results)
371
+ if (durations.length === 0) {
372
+ return {
373
+ scenario,
374
+ stats: calculateStats([]),
375
+ criteriaMet: false,
376
+ criteriaDetails: [],
377
+ skipped: false,
378
+ error: `No benchmark samples collected (adapter returned no results)`,
379
+ }
380
+ }
381
+
382
+ // Calculate statistics
383
+ const stats = calculateStats(durations)
384
+
385
+ // Calculate total time in seconds for throughput
386
+ const totalTimeMs = durations.reduce(
387
+ (sum, ns) => sum + Number(ns) / 1_000_000,
388
+ 0
389
+ )
390
+ const totalTimeSec = totalTimeMs / 1000
391
+
392
+ // Compute ops/sec for throughput scenarios
393
+ // Use actual messages processed if available, otherwise estimate from mean latency
394
+ let computedOpsPerSec: number | undefined
395
+ let computedMbPerSec: number | undefined
396
+ if (scenario.category === `throughput`) {
397
+ if (totalMessagesProcessed > 0 && totalTimeSec > 0) {
398
+ computedOpsPerSec = totalMessagesProcessed / totalTimeSec
399
+ } else if (stats.mean > 0) {
400
+ computedOpsPerSec = 1000 / stats.mean
401
+ }
402
+ // Compute MB/sec from bytes transferred
403
+ if (totalBytesTransferred > 0 && totalTimeSec > 0) {
404
+ computedMbPerSec = totalBytesTransferred / 1024 / 1024 / totalTimeSec
405
+ }
406
+ }
407
+
408
+ // Check criteria
409
+ const criteriaDetails: Array<CriteriaResult> = []
410
+ let criteriaMet = true
411
+
412
+ if (scenario.criteria) {
413
+ if (scenario.criteria.maxP50Ms !== undefined) {
414
+ const met = stats.median <= scenario.criteria.maxP50Ms
415
+ criteriaDetails.push({
416
+ criterion: `p50 <= ${scenario.criteria.maxP50Ms}ms`,
417
+ met,
418
+ actual: stats.median,
419
+ expected: scenario.criteria.maxP50Ms,
420
+ })
421
+ if (!met) criteriaMet = false
422
+ }
423
+
424
+ if (scenario.criteria.maxP99Ms !== undefined) {
425
+ const met = stats.p99 <= scenario.criteria.maxP99Ms
426
+ criteriaDetails.push({
427
+ criterion: `p99 <= ${scenario.criteria.maxP99Ms}ms`,
428
+ met,
429
+ actual: stats.p99,
430
+ expected: scenario.criteria.maxP99Ms,
431
+ })
432
+ if (!met) criteriaMet = false
433
+ }
434
+
435
+ if (scenario.criteria.minOpsPerSecond !== undefined) {
436
+ const opsPerSec = computedOpsPerSec ?? 0
437
+ const met = opsPerSec >= scenario.criteria.minOpsPerSecond
438
+ criteriaDetails.push({
439
+ criterion: `ops/sec >= ${scenario.criteria.minOpsPerSecond}`,
440
+ met,
441
+ actual: opsPerSec,
442
+ expected: scenario.criteria.minOpsPerSecond,
443
+ })
444
+ if (!met) criteriaMet = false
445
+ }
446
+
447
+ if (scenario.criteria.minMBPerSecond !== undefined) {
448
+ const mbPerSec =
449
+ totalTimeSec > 0
450
+ ? totalBytesTransferred / 1024 / 1024 / totalTimeSec
451
+ : 0
452
+ const met = mbPerSec >= scenario.criteria.minMBPerSecond
453
+ criteriaDetails.push({
454
+ criterion: `MB/sec >= ${scenario.criteria.minMBPerSecond}`,
455
+ met,
456
+ actual: mbPerSec,
457
+ expected: scenario.criteria.minMBPerSecond,
458
+ })
459
+ if (!met) criteriaMet = false
460
+ }
461
+ }
462
+
463
+ return {
464
+ scenario,
465
+ stats,
466
+ criteriaMet,
467
+ criteriaDetails,
468
+ skipped: false,
469
+ opsPerSec: computedOpsPerSec,
470
+ mbPerSec: computedMbPerSec,
471
+ }
472
+ } catch (err) {
473
+ return {
474
+ scenario,
475
+ stats: calculateStats([]),
476
+ criteriaMet: false,
477
+ criteriaDetails: [],
478
+ skipped: false,
479
+ error: err instanceof Error ? err.message : String(err),
480
+ }
481
+ }
482
+ }
483
+
484
+ // =============================================================================
485
+ // Output Formatting
486
+ // =============================================================================
487
+
488
+ function printConsoleResults(summary: BenchmarkSummary): void {
489
+ console.log(`\n${`=`.repeat(60)}`)
490
+ console.log(`CLIENT BENCHMARK RESULTS`)
491
+ console.log(`${`=`.repeat(60)}`)
492
+ console.log(`Adapter: ${summary.adapter} v${summary.adapterVersion}`)
493
+ console.log(`Server: ${summary.serverUrl}`)
494
+ console.log(`Timestamp: ${summary.timestamp}`)
495
+ console.log(`Duration: ${(summary.duration / 1000).toFixed(2)}s`)
496
+ console.log()
497
+
498
+ // Group by category
499
+ const byCategory = {
500
+ latency: summary.results.filter((r) => r.scenario.category === `latency`),
501
+ throughput: summary.results.filter(
502
+ (r) => r.scenario.category === `throughput`
503
+ ),
504
+ streaming: summary.results.filter(
505
+ (r) => r.scenario.category === `streaming`
506
+ ),
507
+ }
508
+
509
+ for (const [category, results] of Object.entries(byCategory)) {
510
+ if (results.length === 0) continue
511
+
512
+ console.log(`\n${category.toUpperCase()}`)
513
+ console.log(`${`-`.repeat(40)}`)
514
+
515
+ for (const result of results) {
516
+ const icon = result.skipped ? `○` : result.criteriaMet ? `✓` : `✗`
517
+ const status = result.skipped
518
+ ? `skipped: ${result.skipReason}`
519
+ : result.error
520
+ ? `error: ${result.error}`
521
+ : result.criteriaMet
522
+ ? `passed`
523
+ : `failed`
524
+
525
+ console.log(`${icon} ${result.scenario.name} (${status})`)
526
+
527
+ if (!result.skipped && !result.error) {
528
+ const formatted = formatStats(result.stats)
529
+ // Show ops/sec and MB/sec for throughput scenarios, latency for others
530
+ // For read throughput, only show MB/sec (ops/sec is not meaningful)
531
+ if (result.scenario.category === `throughput`) {
532
+ const mbStr = result.mbPerSec
533
+ ? result.mbPerSec.toLocaleString(`en-US`, {
534
+ minimumFractionDigits: 1,
535
+ maximumFractionDigits: 1,
536
+ })
537
+ : `N/A`
538
+ if (result.scenario.id === `throughput-read`) {
539
+ console.log(` MB/sec: ${mbStr}`)
540
+ } else {
541
+ const opsStr = result.opsPerSec
542
+ ? result.opsPerSec.toLocaleString(`en-US`, {
543
+ maximumFractionDigits: 0,
544
+ })
545
+ : `N/A`
546
+ console.log(` Ops/sec: ${opsStr} MB/sec: ${mbStr}`)
547
+ }
548
+ } else {
549
+ console.log(` Median: ${formatted.Median} P99: ${formatted.P99}`)
550
+ }
551
+
552
+ if (!result.criteriaMet) {
553
+ for (const c of result.criteriaDetails.filter((d) => !d.met)) {
554
+ console.log(
555
+ ` ✗ ${c.criterion}: got ${c.actual.toFixed(2)}, expected ${c.expected}`
556
+ )
557
+ }
558
+ }
559
+ }
560
+ }
561
+ }
562
+
563
+ console.log(`\n${`=`.repeat(60)}`)
564
+ console.log(
565
+ `Summary: ${summary.passed} passed, ${summary.failed} failed, ${summary.skipped} skipped`
566
+ )
567
+ console.log(`${`=`.repeat(60)}\n`)
568
+ }
569
+
570
+ function generateMarkdownReport(summary: BenchmarkSummary): string {
571
+ const lines: Array<string> = []
572
+
573
+ // Create collapsible section with summary status
574
+ const statusIcon = summary.failed === 0 ? `✓` : `✗`
575
+ const statusText =
576
+ summary.failed === 0
577
+ ? `${summary.passed} passed`
578
+ : `${summary.passed} passed, ${summary.failed} failed`
579
+
580
+ lines.push(`<details>`)
581
+ lines.push(
582
+ `<summary><strong>${summary.adapter}</strong>: ${statusText} ${statusIcon}</summary>`
583
+ )
584
+ lines.push(``)
585
+ lines.push(`### ${summary.adapter} v${summary.adapterVersion}`)
586
+ lines.push(``)
587
+ lines.push(`**Server**: ${summary.serverUrl}`)
588
+ lines.push(`**Date**: ${summary.timestamp}`)
589
+ lines.push(`**Duration**: ${(summary.duration / 1000).toFixed(2)}s`)
590
+ lines.push(``)
591
+
592
+ // Latency section
593
+ const latencyResults = summary.results.filter(
594
+ (r) => r.scenario.category === `latency` && !r.skipped && !r.error
595
+ )
596
+ if (latencyResults.length > 0) {
597
+ lines.push(`#### Latency`)
598
+ lines.push(``)
599
+ lines.push(
600
+ `Single-operation latency tests measure the time for individual operations to complete.`
601
+ )
602
+ lines.push(``)
603
+ lines.push(
604
+ `| Scenario | Description | Min | Median | P95 | P99 | Max | Status |`
605
+ )
606
+ lines.push(
607
+ `|----------|-------------|-----|--------|-----|-----|-----|--------|`
608
+ )
609
+ for (const r of latencyResults) {
610
+ const s = r.stats
611
+ const status = r.criteriaMet ? `Pass` : `Fail`
612
+ lines.push(
613
+ `| ${r.scenario.name} | ${r.scenario.description} | ${s.min.toFixed(2)}ms | ${s.median.toFixed(2)}ms | ${s.p95.toFixed(2)}ms | ${s.p99.toFixed(2)}ms | ${s.max.toFixed(2)}ms | ${status} |`
614
+ )
615
+ }
616
+ lines.push(``)
617
+ }
618
+
619
+ // Throughput section
620
+ const throughputResults = summary.results.filter(
621
+ (r) => r.scenario.category === `throughput` && !r.skipped && !r.error
622
+ )
623
+ if (throughputResults.length > 0) {
624
+ lines.push(`#### Throughput`)
625
+ lines.push(``)
626
+ lines.push(
627
+ `Throughput tests measure how quickly the client can batch and send/receive data.`
628
+ )
629
+ lines.push(
630
+ `Writes use automatic batching to maximize operations per second.`
631
+ )
632
+ lines.push(`Reads measure JSON parsing and iteration speed.`)
633
+ lines.push(``)
634
+ lines.push(`| Scenario | Description | Ops/sec | MB/sec | Status |`)
635
+ lines.push(`|----------|-------------|---------|--------|--------|`)
636
+ for (const r of throughputResults) {
637
+ // For read throughput, ops/sec is not meaningful - show "-"
638
+ const opsPerSec =
639
+ r.scenario.id === `throughput-read`
640
+ ? `-`
641
+ : r.opsPerSec !== undefined
642
+ ? r.opsPerSec.toLocaleString(`en-US`, { maximumFractionDigits: 0 })
643
+ : `N/A`
644
+ const mbPerSec =
645
+ r.mbPerSec !== undefined
646
+ ? r.mbPerSec.toLocaleString(`en-US`, {
647
+ minimumFractionDigits: 1,
648
+ maximumFractionDigits: 1,
649
+ })
650
+ : `N/A`
651
+ const status = r.criteriaMet ? `Pass` : `Fail`
652
+ lines.push(
653
+ `| ${r.scenario.name} | ${r.scenario.description} | ${opsPerSec} | ${mbPerSec} | ${status} |`
654
+ )
655
+ }
656
+ lines.push(``)
657
+ }
658
+
659
+ // Streaming section
660
+ const streamingResults = summary.results.filter(
661
+ (r) => r.scenario.category === `streaming` && !r.skipped && !r.error
662
+ )
663
+ if (streamingResults.length > 0) {
664
+ lines.push(`#### Streaming`)
665
+ lines.push(``)
666
+ lines.push(
667
+ `Streaming tests measure real-time event delivery via SSE (Server-Sent Events).`
668
+ )
669
+ lines.push(``)
670
+ lines.push(
671
+ `| Scenario | Description | Min | Median | P95 | P99 | Max | Status |`
672
+ )
673
+ lines.push(
674
+ `|----------|-------------|-----|--------|-----|-----|-----|--------|`
675
+ )
676
+ for (const r of streamingResults) {
677
+ const s = r.stats
678
+ const status = r.criteriaMet ? `Pass` : `Fail`
679
+ lines.push(
680
+ `| ${r.scenario.name} | ${r.scenario.description} | ${s.min.toFixed(2)}ms | ${s.median.toFixed(2)}ms | ${s.p95.toFixed(2)}ms | ${s.p99.toFixed(2)}ms | ${s.max.toFixed(2)}ms | ${status} |`
681
+ )
682
+ }
683
+ lines.push(``)
684
+ }
685
+
686
+ // Summary
687
+ lines.push(`#### Summary`)
688
+ lines.push(``)
689
+ lines.push(`- **Passed**: ${summary.passed}`)
690
+ lines.push(`- **Failed**: ${summary.failed}`)
691
+ lines.push(`- **Skipped**: ${summary.skipped}`)
692
+ lines.push(``)
693
+ lines.push(`</details>`)
694
+
695
+ return lines.join(`\n`)
696
+ }
697
+
698
+ // =============================================================================
699
+ // Public API
700
+ // =============================================================================
701
+
702
+ export async function runBenchmarks(
703
+ options: BenchmarkRunnerOptions
704
+ ): Promise<BenchmarkSummary> {
705
+ const startTime = Date.now()
706
+ const results: Array<ScenarioResult> = []
707
+
708
+ // When format is json or markdown, progress output goes to stderr so only the report goes to stdout
709
+ const log = (message: string): void => {
710
+ if (options.format === `json` || options.format === `markdown`) {
711
+ process.stderr.write(message + `\n`)
712
+ } else {
713
+ console.log(message)
714
+ }
715
+ }
716
+
717
+ // Filter scenarios
718
+ let scenarios = allScenarios
719
+ if (options.scenarios && options.scenarios.length > 0) {
720
+ scenarios = options.scenarios
721
+ .map((id) => getScenarioById(id))
722
+ .filter((s): s is BenchmarkScenario => s !== undefined)
723
+ }
724
+ if (options.categories && options.categories.length > 0) {
725
+ scenarios = scenarios.filter((s) =>
726
+ options.categories!.includes(s.category)
727
+ )
728
+ }
729
+
730
+ log(`\nRunning ${scenarios.length} benchmark scenarios...\n`)
731
+
732
+ // Start reference server
733
+ const server = new DurableStreamTestServer({ port: options.serverPort ?? 0 })
734
+ await server.start()
735
+ const serverUrl = server.url
736
+
737
+ log(`Reference server started at ${serverUrl}\n`)
738
+
739
+ // Resolve client adapter path
740
+ let adapterPath = options.clientAdapter
741
+ let adapterArgs = options.clientArgs ?? []
742
+
743
+ if (adapterPath === `ts` || adapterPath === `typescript`) {
744
+ adapterPath = `npx`
745
+ adapterArgs = [
746
+ `tsx`,
747
+ new URL(`./adapters/typescript-adapter.ts`, import.meta.url).pathname,
748
+ ]
749
+ }
750
+
751
+ // Start client adapter
752
+ const client = new BenchmarkClientAdapter(adapterPath, adapterArgs)
753
+
754
+ let adapterName = `unknown`
755
+ let adapterVersion = `0.0.0`
756
+ let clientFeatures: ClientFeatures = {}
757
+
758
+ try {
759
+ // Initialize client
760
+ const initResult = await client.init(serverUrl)
761
+ if (!initResult.success) {
762
+ throw new Error(`Failed to initialize client adapter`)
763
+ }
764
+
765
+ if (initResult.type === `init`) {
766
+ adapterName = initResult.clientName
767
+ adapterVersion = initResult.clientVersion
768
+ clientFeatures = initResult.features ?? {}
769
+
770
+ log(`Client: ${adapterName} v${adapterVersion}`)
771
+ const featureList = Object.entries(clientFeatures)
772
+ .filter(([, v]) => v)
773
+ .map(([k]) => k)
774
+ log(`Features: ${featureList.join(`, `) || `none`}\n`)
775
+ }
776
+
777
+ // Run each scenario
778
+ for (const scenario of scenarios) {
779
+ log(`\n${scenario.name}`)
780
+ log(`${`─`.repeat(scenario.name.length)}`)
781
+ log(`${scenario.description}`)
782
+
783
+ const result = await runScenario(
784
+ scenario,
785
+ client,
786
+ serverUrl,
787
+ clientFeatures,
788
+ options.verbose ?? false,
789
+ log
790
+ )
791
+
792
+ results.push(result)
793
+
794
+ if (result.skipped) {
795
+ log(` Skipped: ${result.skipReason}`)
796
+ } else if (result.error) {
797
+ log(` Error: ${result.error}`)
798
+ } else {
799
+ const icon = result.criteriaMet ? `✓` : `✗`
800
+ // Show ops/sec and MB/sec for throughput, latency for others
801
+ // For read throughput, only show MB/sec (ops/sec is not meaningful)
802
+ if (result.scenario.category === `throughput`) {
803
+ const mbStr = result.mbPerSec
804
+ ? result.mbPerSec.toLocaleString(`en-US`, {
805
+ minimumFractionDigits: 1,
806
+ maximumFractionDigits: 1,
807
+ })
808
+ : `N/A`
809
+ if (result.scenario.id === `throughput-read`) {
810
+ log(` ${icon} MB/sec: ${mbStr}`)
811
+ } else {
812
+ const opsStr = result.opsPerSec
813
+ ? result.opsPerSec.toLocaleString(`en-US`, {
814
+ maximumFractionDigits: 0,
815
+ })
816
+ : `N/A`
817
+ log(` ${icon} Ops/sec: ${opsStr}, MB/sec: ${mbStr}`)
818
+ }
819
+ } else {
820
+ log(
821
+ ` ${icon} Median: ${result.stats.median.toFixed(2)}ms, P99: ${result.stats.p99.toFixed(2)}ms`
822
+ )
823
+ }
824
+ }
825
+ }
826
+ } finally {
827
+ await client.shutdown()
828
+ await server.stop()
829
+ }
830
+
831
+ const summary: BenchmarkSummary = {
832
+ adapter: adapterName,
833
+ adapterVersion,
834
+ serverUrl,
835
+ timestamp: new Date().toISOString(),
836
+ duration: Date.now() - startTime,
837
+ results,
838
+ passed: results.filter((r) => !r.skipped && !r.error && r.criteriaMet)
839
+ .length,
840
+ failed: results.filter((r) => !r.skipped && (!r.criteriaMet || r.error))
841
+ .length,
842
+ skipped: results.filter((r) => r.skipped).length,
843
+ }
844
+
845
+ // Output results
846
+ switch (options.format) {
847
+ case `json`:
848
+ console.log(JSON.stringify(summary, null, 2))
849
+ break
850
+ case `markdown`:
851
+ console.log(generateMarkdownReport(summary))
852
+ break
853
+ default:
854
+ printConsoleResults(summary)
855
+ }
856
+
857
+ return summary
858
+ }
859
+
860
+ export { allScenarios, getScenarioById }