isage-control-plane-benchmark 0.1.0.1__cp311-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. isage_control_plane_benchmark-0.1.0.1.dist-info/METADATA +596 -0
  2. isage_control_plane_benchmark-0.1.0.1.dist-info/RECORD +46 -0
  3. isage_control_plane_benchmark-0.1.0.1.dist-info/WHEEL +5 -0
  4. isage_control_plane_benchmark-0.1.0.1.dist-info/entry_points.txt +2 -0
  5. isage_control_plane_benchmark-0.1.0.1.dist-info/licenses/LICENSE +21 -0
  6. isage_control_plane_benchmark-0.1.0.1.dist-info/top_level.txt +1 -0
  7. sage/__init__.py +0 -0
  8. sage/benchmark_control_plane/__init__.py +149 -0
  9. sage/benchmark_control_plane/_version.py +4 -0
  10. sage/benchmark_control_plane/cli.py +1273 -0
  11. sage/benchmark_control_plane/client.py +457 -0
  12. sage/benchmark_control_plane/common/__init__.py +48 -0
  13. sage/benchmark_control_plane/common/base_config.py +221 -0
  14. sage/benchmark_control_plane/common/base_metrics.py +368 -0
  15. sage/benchmark_control_plane/common/gpu_monitor.py +531 -0
  16. sage/benchmark_control_plane/common/strategy_adapter.py +365 -0
  17. sage/benchmark_control_plane/config.py +246 -0
  18. sage/benchmark_control_plane/experiments/__init__.py +61 -0
  19. sage/benchmark_control_plane/experiments/base_experiment.py +332 -0
  20. sage/benchmark_control_plane/experiments/latency_exp.py +348 -0
  21. sage/benchmark_control_plane/experiments/mixed_ratio_exp.py +387 -0
  22. sage/benchmark_control_plane/experiments/slo_compliance_exp.py +386 -0
  23. sage/benchmark_control_plane/experiments/throughput_exp.py +269 -0
  24. sage/benchmark_control_plane/hybrid_scheduler/__init__.py +54 -0
  25. sage/benchmark_control_plane/hybrid_scheduler/client.py +596 -0
  26. sage/benchmark_control_plane/hybrid_scheduler/config.py +349 -0
  27. sage/benchmark_control_plane/hybrid_scheduler/metrics.py +457 -0
  28. sage/benchmark_control_plane/hybrid_scheduler/reporter.py +530 -0
  29. sage/benchmark_control_plane/hybrid_scheduler/runner.py +560 -0
  30. sage/benchmark_control_plane/hybrid_scheduler/workload.py +595 -0
  31. sage/benchmark_control_plane/llm_scheduler/__init__.py +61 -0
  32. sage/benchmark_control_plane/llm_scheduler/client.py +423 -0
  33. sage/benchmark_control_plane/llm_scheduler/config.py +185 -0
  34. sage/benchmark_control_plane/llm_scheduler/metrics.py +200 -0
  35. sage/benchmark_control_plane/llm_scheduler/reporter.py +366 -0
  36. sage/benchmark_control_plane/llm_scheduler/runner.py +471 -0
  37. sage/benchmark_control_plane/llm_scheduler/workload.py +404 -0
  38. sage/benchmark_control_plane/metrics.py +339 -0
  39. sage/benchmark_control_plane/reporter.py +338 -0
  40. sage/benchmark_control_plane/runner.py +368 -0
  41. sage/benchmark_control_plane/visualization/__init__.py +49 -0
  42. sage/benchmark_control_plane/visualization/charts.py +1134 -0
  43. sage/benchmark_control_plane/visualization/report_generator.py +625 -0
  44. sage/benchmark_control_plane/visualization/templates/benchmark_report.html +492 -0
  45. sage/benchmark_control_plane/visualization/templates/comparison_report.html +514 -0
  46. sage/benchmark_control_plane/workload.py +387 -0
@@ -0,0 +1,596 @@
1
+ Metadata-Version: 2.4
2
+ Name: isage-control-plane-benchmark
3
+ Version: 0.1.0.1
4
+ Summary: Control Plane scheduling benchmark for the SAGE ecosystem
5
+ Author-email: IntelliStream Team <shuhao_zhang@hust.edu.cn>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/intellistream/sage-control-plane-benchmark
8
+ Project-URL: Documentation, https://github.com/intellistream/sage-control-plane-benchmark#readme
9
+ Project-URL: Repository, https://github.com/intellistream/sage-control-plane-benchmark
10
+ Project-URL: Issues, https://github.com/intellistream/sage-control-plane-benchmark/issues
11
+ Keywords: sage,benchmark,control-plane,scheduling,evaluation,intellistream
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3 :: Only
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Requires-Python: >=3.11
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: isage-common
25
+ Requires-Dist: isage-kernel
26
+ Requires-Dist: isage-middleware>=0.2.4.0
27
+ Requires-Dist: isage-libs
28
+ Requires-Dist: aiohttp>=3.9.0
29
+ Requires-Dist: numpy<2.3.0,>=1.26.0
30
+ Requires-Dist: pandas>=2.0.0
31
+ Requires-Dist: pyyaml>=6.0
32
+ Requires-Dist: typer<1.0.0,>=0.15.0
33
+ Requires-Dist: rich<14.0.0,>=13.0.0
34
+ Requires-Dist: matplotlib>=3.7.0
35
+ Requires-Dist: seaborn>=0.12.0
36
+ Requires-Dist: jinja2>=3.1.0
37
+ Provides-Extra: dev
38
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
39
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
40
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
41
+ Requires-Dist: black>=23.0.0; extra == "dev"
42
+ Requires-Dist: ruff==0.14.6; extra == "dev"
43
+ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
44
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
45
+ Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
46
+ Dynamic: license-file
47
+
48
+ # sageLLM Control Plane Benchmark
49
+
50
+ This module provides comprehensive benchmarking tools for evaluating different scheduling policies
51
+ in sageLLM's Control Plane. It supports both **LLM-only** and **Hybrid (LLM + Embedding)**
52
+ workloads.
53
+
54
+ ## Overview
55
+
56
+ The benchmark measures key performance metrics across various scheduling strategies:
57
+
58
+ - **Throughput**: Requests per second and tokens per second
59
+ - **Latency**: End-to-end latency, Time to First Token (TTFT), Time Between Tokens (TBT)
60
+ - **SLO Compliance**: Percentage of requests meeting their SLO deadlines
61
+ - **Error Rates**: Failed requests and timeout rates
62
+ - **Resource Utilization**: GPU memory and compute utilization (optional)
63
+
64
+ ## Architecture
65
+
66
+ ```
67
+ ┌─────────────────────────────────────────────┐
68
+ │ Control Plane │
69
+ ┌─────────────┐ HTTP │ ┌─────────────────────────────────────┐ │
70
+ │ Benchmark │ ───────────────► │ │ Scheduler (Policy: X) │ │
71
+ │ Client │ │ │ ┌───────────┬───────────────────┐ │ │
72
+ │ │ │ │ │ LLM Queue │ Embedding Queue │ │ │
73
+ └─────────────┘ │ │ └───────────┴───────────────────┘ │ │
74
+ │ │ └─────────────────────────────────────┘ │
75
+ │ └──────────────────┬──────────────────────────┘
76
+ │ │
77
+ │ ┌──────────────────┴──────────────────────┐
78
+ │ │ │
79
+ ▼ ▼ ▼
80
+ ┌─────────────┐ ┌──────────────┐ ┌──────────────┐
81
+ │ Metrics │ │ vLLM Inst 1 │ │ Embedding │
82
+ │ Collector │ │ (Qwen-7B) │ │ Server │
83
+ └─────────────┘ ├──────────────┤ │ (BGE-M3) │
84
+ │ vLLM Inst 2 │ └──────────────┘
85
+ │ (Llama-13B) │
86
+ └──────────────┘
87
+ ```
88
+
89
+ ## Quick Start
90
+
91
+ ### Installation
92
+
93
+ ```bash
94
+ # Install sage-benchmark package
95
+ pip install isage-control-plane-benchmark
96
+
97
+ # Or for development:
98
+ pip install -e "packages/sage-benchmark[dev]"
99
+
100
+ # CLI dependencies
101
+ pip install typer aiohttp pyyaml
102
+
103
+ # Visualization dependencies (optional)
104
+ pip install matplotlib jinja2
105
+ ```
106
+
107
+ ### Running Your First Benchmark
108
+
109
+ ```bash
110
+ # 1. Run a simple LLM benchmark
111
+ sage-cp-bench run --mode llm --policy fifo --requests 100 --rate 10
112
+
113
+ # 2. Run a hybrid (LLM + Embedding) benchmark
114
+ sage-cp-bench run --mode hybrid --policy hybrid_slo --llm-ratio 0.7 --requests 100
115
+
116
+ # 3. Compare multiple policies
117
+ sage-cp-bench compare --mode llm --policies fifo,priority,slo_aware --requests 500
118
+
119
+ # 4. Run a predefined experiment
120
+ sage-cp-bench experiment --name throughput --policies fifo,priority
121
+ ```
122
+
123
+ ## CLI Reference
124
+
125
+ ### Commands Overview
126
+
127
+ | Command | Description |
128
+ | ------------ | -------------------------------------------- |
129
+ | `run` | Run benchmark for a single scheduling policy |
130
+ | `compare` | Compare multiple scheduling policies |
131
+ | `sweep` | Sweep across multiple request rates |
132
+ | `experiment` | Run predefined experiments |
133
+ | `visualize` | Generate charts from existing results |
134
+ | `config` | Show/save example configuration |
135
+ | `validate` | Validate a configuration file |
136
+
137
+ ### `run` Command
138
+
139
+ ```bash
140
+ sage-cp-bench run [OPTIONS]
141
+
142
+ Options:
143
+ --mode -m [llm|hybrid] Benchmark mode (default: llm)
144
+ --control-plane -c TEXT Control Plane URL (default: http://localhost:8080)
145
+ --policy -p TEXT Scheduling policy (default: fifo)
146
+ --requests -n INTEGER Number of requests (default: 100)
147
+ --rate -r FLOAT Request rate req/s (default: 10.0)
148
+ --llm-ratio FLOAT LLM ratio for hybrid mode (default: 0.7)
149
+ --output -o TEXT Output directory (default: ./benchmark_results)
150
+ --warmup -w INTEGER Warmup requests (default: 10)
151
+ --timeout -t FLOAT Request timeout seconds (default: 60.0)
152
+ --no-visualize Disable auto visualization
153
+ --config TEXT Load config from YAML/JSON file
154
+ --quiet -q Suppress progress output
155
+ ```
156
+
157
+ **Examples:**
158
+
159
+ ```bash
160
+ # LLM-only benchmark
161
+ sage-cp-bench run --mode llm --policy fifo --requests 100 --rate 10
162
+
163
+ # Hybrid benchmark with 70% LLM, 30% Embedding
164
+ sage-cp-bench run --mode hybrid --policy hybrid_slo --llm-ratio 0.7 --requests 100
165
+
166
+ # Load configuration from file
167
+ sage-cp-bench run --config benchmark_config.yaml
168
+ ```
169
+
170
+ ### `compare` Command
171
+
172
+ ```bash
173
+ sage-cp-bench compare [OPTIONS]
174
+
175
+ Options:
176
+ --mode -m [llm|hybrid] Benchmark mode (default: llm)
177
+ --policies -p TEXT Comma-separated policy list (default: fifo,priority,slo_aware)
178
+ --requests -n INTEGER Requests per policy (default: 100)
179
+ --rate -r FLOAT Request rate (default: 10.0)
180
+ --llm-ratio FLOAT LLM ratio for hybrid mode (default: 0.7)
181
+ --output -o TEXT Output directory
182
+ --no-visualize Disable comparison charts
183
+ ```
184
+
185
+ **Examples:**
186
+
187
+ ```bash
188
+ # Compare LLM scheduling policies
189
+ sage-cp-bench compare --mode llm --policies fifo,priority,slo_aware
190
+
191
+ # Compare hybrid scheduling policies
192
+ sage-cp-bench compare --mode hybrid --policies fifo,hybrid_slo --llm-ratio 0.7
193
+ ```
194
+
195
+ ### `sweep` Command
196
+
197
+ ```bash
198
+ sage-cp-bench sweep [OPTIONS]
199
+
200
+ Options:
201
+ --mode -m [llm|hybrid] Benchmark mode (default: llm)
202
+ --policy -p TEXT Policy to test (default: fifo)
203
+ --rates TEXT Comma-separated rates (default: 10,50,100,200)
204
+ --requests -n INTEGER Requests per rate (default: 100)
205
+ --output -o TEXT Output directory
206
+ ```
207
+
208
+ **Examples:**
209
+
210
+ ```bash
211
+ # Sweep request rates for LLM benchmark
212
+ sage-cp-bench sweep --mode llm --policy fifo --rates 10,50,100,200
213
+
214
+ # Sweep rates for hybrid benchmark
215
+ sage-cp-bench sweep --mode hybrid --policy hybrid_slo --rates 10,50,100
216
+ ```
217
+
218
+ ### `experiment` Command
219
+
220
+ ```bash
221
+ sage-cp-bench experiment [OPTIONS]
222
+
223
+ Options:
224
+ --name -e TEXT Experiment: throughput|latency|slo|mixed_ratio [required]
225
+ --control-plane -c TEXT Control Plane URL
226
+ --requests -n INTEGER Requests per test (default: 500)
227
+ --rate -r INTEGER Request rate (default: 100)
228
+ --llm-ratio FLOAT LLM ratio (default: 0.5)
229
+ --policies -p TEXT Policies to test (default: fifo,priority,slo_aware)
230
+ --output -o TEXT Output directory
231
+ --no-visualize Skip visualization
232
+ ```
233
+
234
+ **Available Experiments:**
235
+
236
+ | Experiment | Description |
237
+ | ------------- | --------------------------------------------- |
238
+ | `throughput` | Sweep request rates to find max throughput |
239
+ | `latency` | Analyze latency distribution under fixed load |
240
+ | `slo` | Compare SLO compliance across policies |
241
+ | `mixed_ratio` | Test different LLM/Embedding ratios |
242
+
243
+ **Examples:**
244
+
245
+ ```bash
246
+ # Run throughput experiment
247
+ sage-cp-bench experiment --name throughput --policies fifo,priority
248
+
249
+ # Run latency analysis
250
+ sage-cp-bench experiment --name latency --rate 100 --requests 1000
251
+
252
+ # Run SLO compliance comparison
253
+ sage-cp-bench experiment --name slo --policies fifo,slo_aware
254
+
255
+ # Run mixed ratio sweep (hybrid only)
256
+ sage-cp-bench experiment --name mixed_ratio --rate 100
257
+ ```
258
+
259
+ ### `visualize` Command
260
+
261
+ ```bash
262
+ sage-cp-bench visualize [OPTIONS]
263
+
264
+ Options:
265
+ --input -i TEXT Results JSON file [required]
266
+ --output -o TEXT Output directory (default: ./visualizations)
267
+ --format -f TEXT Output format: charts|html|markdown|all (default: all)
268
+ ```
269
+
270
+ **Examples:**
271
+
272
+ ```bash
273
+ # Generate all visualizations
274
+ sage-cp-bench visualize --input results.json --output ./charts
275
+
276
+ # Generate only HTML report
277
+ sage-cp-bench visualize --input results.json --format html
278
+ ```
279
+
280
+ ### `config` and `validate` Commands
281
+
282
+ ```bash
283
+ # Show example LLM configuration
284
+ sage-cp-bench config --mode llm
285
+
286
+ # Show and save hybrid configuration
287
+ sage-cp-bench config --mode hybrid --output config.yaml
288
+
289
+ # Validate configuration file
290
+ sage-cp-bench validate config.json --mode llm
291
+ sage-cp-bench validate config.yaml --mode hybrid
292
+ ```
293
+
294
+ ## Python API
295
+
296
+ ### LLM-only Benchmark
297
+
298
+ ```python
299
+ import asyncio
300
+ from sage.benchmark_control_plane import (
301
+ BenchmarkConfig,
302
+ BenchmarkRunner,
303
+ BenchmarkReporter,
304
+ )
305
+
306
+ # Configure benchmark
307
+ config = BenchmarkConfig(
308
+ control_plane_url="http://localhost:8080",
309
+ policies=["fifo", "priority", "slo_aware"],
310
+ num_requests=1000,
311
+ request_rate=100.0,
312
+ )
313
+
314
+ # Run benchmark
315
+ runner = BenchmarkRunner(config)
316
+ result = asyncio.run(runner.run())
317
+
318
+ # Generate report
319
+ reporter = BenchmarkReporter(result)
320
+ reporter.print_summary()
321
+ reporter.save_all("./benchmark_results")
322
+ ```
323
+
324
+ ### Hybrid Benchmark (LLM + Embedding)
325
+
326
+ ```python
327
+ import asyncio
328
+ from sage.benchmark_control_plane.hybrid_scheduler import (
329
+ HybridBenchmarkConfig,
330
+ HybridBenchmarkRunner,
331
+ HybridBenchmarkReporter,
332
+ )
333
+
334
+ # Configure hybrid benchmark
335
+ config = HybridBenchmarkConfig(
336
+ control_plane_url="http://localhost:8080",
337
+ num_requests=1000,
338
+ request_rate=100.0,
339
+ llm_ratio=0.7, # 70% LLM, 30% Embedding
340
+ embedding_ratio=0.3,
341
+ policies=["fifo", "hybrid_slo"],
342
+ )
343
+
344
+ # Run benchmark
345
+ runner = HybridBenchmarkRunner(config)
346
+ result = asyncio.run(runner.run())
347
+
348
+ # Generate report
349
+ reporter = HybridBenchmarkReporter(result)
350
+ reporter.print_summary()
351
+ reporter.save_json("./results/hybrid_benchmark.json")
352
+ ```
353
+
354
+ ### Running Predefined Experiments
355
+
356
+ ```python
357
+ import asyncio
358
+ from sage.benchmark_control_plane.experiments import (
359
+ ThroughputExperiment,
360
+ LatencyExperiment,
361
+ SLOComplianceExperiment,
362
+ MixedRatioExperiment,
363
+ )
364
+ from sage.benchmark_control_plane.common.base_config import SchedulingPolicy
365
+
366
+ # Throughput experiment
367
+ exp = ThroughputExperiment(
368
+ name="throughput_sweep",
369
+ control_plane_url="http://localhost:8080",
370
+ policies=[SchedulingPolicy.FIFO, SchedulingPolicy.PRIORITY],
371
+ request_rates=[50, 100, 200, 500],
372
+ )
373
+ result = asyncio.run(exp.run_full()) # Includes visualization
374
+ print(f"Best policy: {result.summary['best_policy']}")
375
+
376
+ # Latency experiment
377
+ exp = LatencyExperiment(
378
+ name="latency_analysis",
379
+ control_plane_url="http://localhost:8080",
380
+ request_rate=100,
381
+ num_requests=1000,
382
+ )
383
+ result = asyncio.run(exp.run_full())
384
+
385
+ # Mixed ratio experiment (hybrid)
386
+ exp = MixedRatioExperiment(
387
+ name="ratio_sweep",
388
+ control_plane_url="http://localhost:8080",
389
+ llm_ratios=[0.0, 0.25, 0.5, 0.75, 1.0],
390
+ )
391
+ result = asyncio.run(exp.run_full())
392
+ ```
393
+
394
+ ### Generating Visualizations
395
+
396
+ ```python
397
+ from pathlib import Path
398
+ from sage.benchmark_control_plane.visualization import (
399
+ BenchmarkCharts,
400
+ ReportGenerator,
401
+ )
402
+
403
+ # Generate charts
404
+ charts = BenchmarkCharts(output_dir=Path("./charts"))
405
+ charts.plot_throughput_comparison(policy_metrics)
406
+ charts.plot_latency_distribution(latency_data)
407
+ charts.plot_slo_compliance(slo_data)
408
+
409
+ # Generate reports
410
+ report_gen = ReportGenerator(result=benchmark_result, charts_dir=Path("./charts"))
411
+ report_gen.generate_html_report(Path("./report.html"))
412
+ report_gen.generate_markdown_report(Path("./report.md"))
413
+ ```
414
+
415
+ ## Supported Scheduling Policies
416
+
417
+ | Policy | Mode | Description |
418
+ | ---------------- | ------ | ----------------------------------------------- |
419
+ | `fifo` | Both | First-In-First-Out scheduling |
420
+ | `priority` | Both | Priority-based scheduling |
421
+ | `slo_aware` | Both | SLO-deadline aware scheduling |
422
+ | `cost_optimized` | LLM | Cost-optimized scheduling |
423
+ | `adaptive` | LLM | Adaptive scheduling based on system state |
424
+ | `aegaeon` | LLM | Advanced scheduling with multiple optimizations |
425
+ | `hybrid` | Hybrid | Hybrid LLM/Embedding scheduling |
426
+ | `hybrid_slo` | Hybrid | Hybrid with SLO awareness |
427
+
428
+ ## Configuration Options
429
+
430
+ ### LLM Benchmark Configuration
431
+
432
+ | Option | Description | Default |
433
+ | ----------------------- | ---------------------------------- | ----------------------------------- |
434
+ | `control_plane_url` | Control Plane HTTP address | `http://localhost:8080` |
435
+ | `policies` | List of policies to benchmark | `["fifo", "priority", "slo_aware"]` |
436
+ | `num_requests` | Total requests per policy | `100` |
437
+ | `request_rate` | Target request rate (req/s) | `10.0` |
438
+ | `arrival_pattern` | Request arrival pattern | `poisson` |
439
+ | `model_distribution` | Request distribution across models | `{"default": 1.0}` |
440
+ | `priority_distribution` | Request priority distribution | `{"NORMAL": 1.0}` |
441
+ | `timeout_seconds` | Request timeout | `60.0` |
442
+ | `warmup_requests` | Warmup requests before measurement | `10` |
443
+
444
+ ### Hybrid Benchmark Configuration
445
+
446
+ | Option | Description | Default |
447
+ | --------------------------- | --------------------------------- | ------------- |
448
+ | `llm_ratio` | Ratio of LLM requests (0.0-1.0) | `0.5` |
449
+ | `embedding_ratio` | Ratio of Embedding requests | `0.5` |
450
+ | `embedding_model` | Embedding model name | `BAAI/bge-m3` |
451
+ | `embedding_batch_size` | Batch size for embedding requests | `32` |
452
+ | `llm_slo_deadline_ms` | SLO deadline for LLM requests | `5000` |
453
+ | `embedding_slo_deadline_ms` | SLO deadline for embedding | `500` |
454
+
455
+ ## Output Formats
456
+
457
+ ### Terminal Output
458
+
459
+ ```
460
+ ============================================================
461
+ sageLLM Hybrid Scheduling Benchmark Report
462
+ ============================================================
463
+ Config: 1000 requests @ 100 req/s | LLM: 70% | Embedding: 30%
464
+ ------------------------------------------------------------
465
+
466
+ | Policy | Throughput | LLM Avg | Emb Avg | LLM SLO | Emb SLO | Errors |
467
+ |------------|------------|---------|---------|---------|---------|--------|
468
+ | fifo | 95.2 req/s | 156 ms | 23 ms | 71.2% | 92.1% | 0.3% |
469
+ | hybrid_slo | 98.5 req/s | 132 ms | 18 ms | 93.7% | 98.2% | 0.1% |
470
+
471
+ Best Throughput: hybrid_slo (98.5 req/s)
472
+ Best LLM SLO: hybrid_slo (93.7%)
473
+ Best Embedding SLO: hybrid_slo (98.2%)
474
+ ```
475
+
476
+ ### JSON Report
477
+
478
+ Full results saved to `report_<timestamp>.json` including:
479
+
480
+ - Configuration summary
481
+ - Per-policy metrics
482
+ - Raw request results
483
+ - Summary statistics
484
+
485
+ ### HTML Report
486
+
487
+ Interactive HTML report with embedded charts and tables.
488
+
489
+ ### Markdown Report
490
+
491
+ Markdown format suitable for documentation and GitHub.
492
+
493
+ ## Module Structure
494
+
495
+ ```
496
+ benchmark_control_plane/
497
+ ├── __init__.py # Module exports (backward compatible)
498
+ ├── cli.py # CLI interface (sage-cp-bench)
499
+ ├── config.py # Legacy config (→ llm_scheduler)
500
+ ├── workload.py # Legacy workload (→ llm_scheduler)
501
+ ├── client.py # Legacy client (→ llm_scheduler)
502
+ ├── metrics.py # Legacy metrics (→ llm_scheduler)
503
+ ├── runner.py # Legacy runner (→ llm_scheduler)
504
+ ├── reporter.py # Legacy reporter (→ llm_scheduler)
505
+ ├── README.md # This file
506
+
507
+ ├── common/ # Shared components
508
+ │ ├── __init__.py
509
+ │ ├── base_config.py # Base configuration classes
510
+ │ ├── base_metrics.py # Base metrics classes
511
+ │ ├── gpu_monitor.py # GPU resource monitoring
512
+ │ └── strategy_adapter.py # Scheduling strategy adapter
513
+
514
+ ├── llm_scheduler/ # LLM-only benchmark
515
+ │ ├── __init__.py
516
+ │ ├── config.py # LLM benchmark config
517
+ │ ├── workload.py # LLM workload generation
518
+ │ ├── client.py # LLM HTTP client
519
+ │ ├── metrics.py # LLM metrics collection
520
+ │ ├── runner.py # LLM benchmark runner
521
+ │ └── reporter.py # LLM result reporting
522
+
523
+ ├── hybrid_scheduler/ # Hybrid LLM+Embedding benchmark
524
+ │ ├── __init__.py
525
+ │ ├── config.py # Hybrid benchmark config
526
+ │ ├── workload.py # Hybrid workload generation
527
+ │ ├── client.py # Hybrid HTTP client
528
+ │ ├── metrics.py # Hybrid metrics collection
529
+ │ ├── runner.py # Hybrid benchmark runner
530
+ │ └── reporter.py # Hybrid result reporting
531
+
532
+ ├── visualization/ # Charts and reports
533
+ │ ├── __init__.py
534
+ │ ├── charts.py # Matplotlib chart generation
535
+ │ ├── report_generator.py # HTML/Markdown reports
536
+ │ └── templates/ # Report templates
537
+ │ ├── benchmark_report.html
538
+ │ └── comparison_report.html
539
+
540
+ └── experiments/ # Predefined experiments
541
+ ├── __init__.py
542
+ ├── base_experiment.py # Experiment base class
543
+ ├── throughput_exp.py # Throughput sweep
544
+ ├── latency_exp.py # Latency analysis
545
+ ├── slo_compliance_exp.py # SLO compliance
546
+ └── mixed_ratio_exp.py # LLM/Embedding ratio sweep
547
+ ```
548
+
549
+ ## Related Documentation
550
+
551
+ - [DATA_PATHS.md](./DATA_PATHS.md) - Data directory structure and formats
552
+ - [VISUALIZATION.md](./VISUALIZATION.md) - Chart types and report formats
553
+ - [examples/run_llm_benchmark.py](../../../../examples/benchmark/run_llm_benchmark.py) - LLM
554
+ benchmark example
555
+ - [examples/run_hybrid_benchmark.py](../../../../examples/benchmark/run_hybrid_benchmark.py) -
556
+ Hybrid benchmark example
557
+
558
+ ## Control Plane Integration
559
+
560
+ ### Required API Endpoints
561
+
562
+ | Endpoint | Method | Description |
563
+ | ---------------------- | ------ | ------------------------------------ |
564
+ | `/health` | GET | Health check |
565
+ | `/v1/chat/completions` | POST | OpenAI-compatible LLM endpoint |
566
+ | `/v1/embeddings` | POST | OpenAI-compatible embedding endpoint |
567
+ | `/admin/set_policy` | POST | Switch scheduling policy |
568
+ | `/admin/metrics` | GET | Get Control Plane metrics |
569
+
570
+ ### Request Headers
571
+
572
+ - `X-Request-ID`: Unique request identifier
573
+ - `X-Request-Priority`: Request priority (HIGH, NORMAL, LOW)
574
+ - `X-SLO-Deadline-Ms`: SLO deadline in milliseconds
575
+ - `X-Request-Type`: Request type (llm_chat, llm_generate, embedding)
576
+
577
+ ## Troubleshooting
578
+
579
+ ### Common Issues
580
+
581
+ 1. **Connection refused**: Ensure Control Plane is running at the specified URL
582
+ 1. **Timeout errors**: Increase `--timeout` or reduce `--rate`
583
+ 1. **No visualization**: Install matplotlib: `pip install matplotlib`
584
+ 1. **YAML config error**: Install pyyaml: `pip install pyyaml`
585
+
586
+ ### Debug Mode
587
+
588
+ ```bash
589
+ # Enable verbose logging
590
+ export SAGE_LOG_LEVEL=DEBUG
591
+ sage-cp-bench run --mode llm --policy fifo --requests 10
592
+ ```
593
+
594
+ ______________________________________________________________________
595
+
596
+ *Updated: 2025-11-28*
@@ -0,0 +1,46 @@
1
+ isage_control_plane_benchmark-0.1.0.1.dist-info/licenses/LICENSE,sha256=vBNVIGkYYZY0B8f0Ui1ITYwRu7WNtSwyxvIAVGYS6jU,1075
2
+ sage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ sage/benchmark_control_plane/__init__.py,sha256=Zp1bFy7qeiIGaXOtPL_uImkKoy7MFtuj4Df4UKL5uK8,5350
4
+ sage/benchmark_control_plane/_version.py,sha256=Z1QHTVqqrdwKiMdqUj-e4F0qu5MMUIw8jhHVJSYgGT8,157
5
+ sage/benchmark_control_plane/cli.py,sha256=ITXtS1UyPrWz7DHbM8iKSqggjHcxJ4hMkLRoxRBAYaQ,42855
6
+ sage/benchmark_control_plane/client.py,sha256=tFbafZOGj7I4ouPqoflRL3WAJ_ggiFRXVbEdbxPQQGE,15128
7
+ sage/benchmark_control_plane/config.py,sha256=_Li77HAEMGNuk6EU8IcNaTQPHWJNn3dRLyS3I0gG7WM,9295
8
+ sage/benchmark_control_plane/metrics.py,sha256=s8Vn6dCCz4h1YM52aMGLZh4xHSxIXq43W5PFL3GhGlY,11991
9
+ sage/benchmark_control_plane/reporter.py,sha256=qSkCHhQHQkEpKqDJNhfAnMoxc0r34KuPoDkQ8SEypPQ,11367
10
+ sage/benchmark_control_plane/runner.py,sha256=JH3a4prTxlgs7I3aLv2Y6shGtn95rH54QgxCCn-rHLk,11873
11
+ sage/benchmark_control_plane/workload.py,sha256=gNMCWLgXrzTZ-PE275m6aIqcHG4v95ssSuVHHLLY9-c,12653
12
+ sage/benchmark_control_plane/common/__init__.py,sha256=1xrrrYTOVAPNuNHLxzrSf4VQKcHH4LOZWvpYKb-1GcY,1158
13
+ sage/benchmark_control_plane/common/base_config.py,sha256=lmSWFiB5m-oy2kugsed989kxiU4msY_-b0NWsgMFxAw,7185
14
+ sage/benchmark_control_plane/common/base_metrics.py,sha256=XFlVsX4SP0noDBdeGp7gqmyOvvZMZARq_080YBlU9xk,12357
15
+ sage/benchmark_control_plane/common/gpu_monitor.py,sha256=smU8bDUmHq6jQCy-jeFIKoELNKlsjdF2aON-MP-o_1I,18144
16
+ sage/benchmark_control_plane/common/strategy_adapter.py,sha256=QPXf3MLBTJS7yGUGZ3UwfSZGT_-QIBJl1O5ShU_nPQc,11655
17
+ sage/benchmark_control_plane/experiments/__init__.py,sha256=GzH5YZ-evPy3TELxNAYPnn2oUt4MvjnQ7ms6kNOjkcY,1816
18
+ sage/benchmark_control_plane/experiments/base_experiment.py,sha256=wa3HwRFtUVEv9pEgzQfV0b8Z3TxIdPK52Zvz5d87zbw,9505
19
+ sage/benchmark_control_plane/experiments/latency_exp.py,sha256=8dbq-p9O0qJ26LjrAfc_wjPSQlmx4cfV3NXw9qJlRnY,12299
20
+ sage/benchmark_control_plane/experiments/mixed_ratio_exp.py,sha256=KrYxj3Kln7nt4umONkFwoApO2sqPO5Zbaq14_-7b5PA,14286
21
+ sage/benchmark_control_plane/experiments/slo_compliance_exp.py,sha256=kvzZrok5mIYH6xu0XQJpiR41lk2YhwLED0wo_T_nEDQ,14513
22
+ sage/benchmark_control_plane/experiments/throughput_exp.py,sha256=eQ1Sc02IXBGErwM-4KRYtEhqa136iPCY8fFzahGVXjI,9828
23
+ sage/benchmark_control_plane/hybrid_scheduler/__init__.py,sha256=2rvc376TWkGy5NQoHmb_aTjJXMRGRC7f8IKQK_mFSEY,2037
24
+ sage/benchmark_control_plane/hybrid_scheduler/client.py,sha256=cSHPtd5PA8nTn1WATUqzy1xmojF-RsQC22Gtj2tQAzs,19803
25
+ sage/benchmark_control_plane/hybrid_scheduler/config.py,sha256=Ms-HgV8cU3jXntox0kHsr0Z3QamRJVx_V2WWXLWcmnk,13444
26
+ sage/benchmark_control_plane/hybrid_scheduler/metrics.py,sha256=B87ivvSPdHsL0dRhJpVKa0zaff9QKf47WxnRDlnKYgU,18296
27
+ sage/benchmark_control_plane/hybrid_scheduler/reporter.py,sha256=p4GE8v9gszsrLwM5_DIlqwjCqcnNsru0KelKol8iCLw,18102
28
+ sage/benchmark_control_plane/hybrid_scheduler/runner.py,sha256=zDfNYnLP70ls9Unws2q2B5PKns0FlVmXWWObldiwdnQ,20050
29
+ sage/benchmark_control_plane/hybrid_scheduler/workload.py,sha256=-irkMjJAXzR3KaEullcZVdyCpmKtlCJmW5r-jechBqU,20607
30
+ sage/benchmark_control_plane/llm_scheduler/__init__.py,sha256=OWYYnsil5dIoiciRWW9xUZrSYR0cGz52yBzaYszFuqM,1754
31
+ sage/benchmark_control_plane/llm_scheduler/client.py,sha256=gXl096l2O99Mq5U-pO69upMdwScovUC_h8Cye4YvXlY,13777
32
+ sage/benchmark_control_plane/llm_scheduler/config.py,sha256=Yum_cB3kqhuqwLd4YRUSNktIkt-m33_r7tUJJ_lgC58,6245
33
+ sage/benchmark_control_plane/llm_scheduler/metrics.py,sha256=lls_-fKtdIHiHBwQAFjrJVasIKpDbHA55Q8cmbYgbVE,6950
34
+ sage/benchmark_control_plane/llm_scheduler/reporter.py,sha256=DTP7uI-YBzC1SMT0y3mstk53MV5UWeH8j1ZTz7Bk5nw,12434
35
+ sage/benchmark_control_plane/llm_scheduler/runner.py,sha256=ZIyTQweZQBT8vxmilOmfAzSmMGbqh8dAkU0yNWqcPxo,15834
36
+ sage/benchmark_control_plane/llm_scheduler/workload.py,sha256=ioqV5p0vx9KiMz66wQqVZAuGcM4KeTOEDltzfGfg5x8,13058
37
+ sage/benchmark_control_plane/visualization/__init__.py,sha256=7qE5ghCkceOH8wIT4c8076TFboXJeCoZN0U7yZHGsoA,1496
38
+ sage/benchmark_control_plane/visualization/charts.py,sha256=dbD20L1omtpPv9XZJh-obBQXrZa4d55Hr0mqM50phhE,36132
39
+ sage/benchmark_control_plane/visualization/report_generator.py,sha256=cO712ZgbVER8Cc4bdgNOX3w3DoQ3jigyEvXC20c_MO8,25078
40
+ sage/benchmark_control_plane/visualization/templates/benchmark_report.html,sha256=rxjE9lSePfSaCR_qsTSPlNWh9JpC6dqwMvnD3EUz9d8,16616
41
+ sage/benchmark_control_plane/visualization/templates/comparison_report.html,sha256=pmsC1HiT1MIotody_Hy5P40y6tr6w_rVVJjNS2vhncE,15217
42
+ isage_control_plane_benchmark-0.1.0.1.dist-info/METADATA,sha256=hhm-ypZm__VFDpsBYgBa-5M6uQBp_RUFkta13cQ37ho,23128
43
+ isage_control_plane_benchmark-0.1.0.1.dist-info/WHEEL,sha256=yk-B4c9kYsinhQ_MzhPAVcDm9mhkAVmdo0rg0jgFCmo,94
44
+ isage_control_plane_benchmark-0.1.0.1.dist-info/entry_points.txt,sha256=Vz3Grx2gvseJda-U9vCB-vRNJGfGtUnw4uYXWlm2xqQ,72
45
+ isage_control_plane_benchmark-0.1.0.1.dist-info/top_level.txt,sha256=hibFyzQHiLOMK68qL1OWsNKaXOmSXqZjeLTBem6Yy7I,5
46
+ isage_control_plane_benchmark-0.1.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: cp311-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ sage-cp-bench = sage.benchmark_control_plane.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 IntelliStream Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
sage/__init__.py ADDED
File without changes