clawperf 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
clawperf/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """ClawPerf - Performance benchmarking tool for LLM Serving backends.
2
+
3
+ Reuses EvalScope's perf infrastructure for HTTP, streaming, and timing,
4
+ and adds multi-turn long-context workloads with append-mode compaction,
5
+ user arrival scheduling, and system metrics polling.
6
+ """
7
+
8
+ __version__ = "0.1.0"
clawperf/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Entry point for running ClawPerfBench as a module: python -m clawperf"""
2
+
3
+ from clawperf.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
clawperf/cli.py ADDED
@@ -0,0 +1,107 @@
1
+ """CLI argument parser for ClawPerf."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import asyncio
7
+
8
+ from clawperf.config import BenchmarkConfig
9
+
10
+
11
+ def build_parser() -> argparse.ArgumentParser:
12
+ parser = argparse.ArgumentParser(
13
+ prog="clawperf",
14
+ description=(
15
+ "ClawPerf - Performance testing tool for LLM Serving backends. "
16
+ "Simulates multi-user, multi-turn, long-context workloads against "
17
+ "vLLM, SGLang, and MindIE backends. "
18
+ "Built on EvalScope's perf infrastructure."
19
+ ),
20
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
21
+ )
22
+
23
+ # ── User configuration ──
24
+ g = parser.add_argument_group("User Configuration")
25
+ g.add_argument("--num-users", type=int, default=1, help="Total concurrent users.")
26
+ g.add_argument(
27
+ "--user-arrival", type=str, default="burst",
28
+ help="'burst', 'steady:<seconds>', or 'poisson:<lambda>'.",
29
+ )
30
+
31
+ # ── Context configuration ──
32
+ g = parser.add_argument_group("Context Configuration")
33
+ g.add_argument("--system-prefix-tokens", type=int, default=15000)
34
+ g.add_argument("--system-prefix-source", type=str, default="random")
35
+ g.add_argument("--user-prefix-tokens", type=int, default=5000)
36
+ g.add_argument("--input-tokens-per-turn", type=int, default=5000)
37
+ g.add_argument("--output-tokens-per-turn", type=int, default=1000)
38
+ g.add_argument("--max-context-tokens", type=int, default=128000)
39
+ g.add_argument("--compaction-prefix-increment", type=int, default=5000)
40
+
41
+ # ── Run configuration ──
42
+ g = parser.add_argument_group("Run Configuration")
43
+ g.add_argument("--max-turns", type=int, default=100)
44
+
45
+ # ── API configuration ──
46
+ g = parser.add_argument_group("API Configuration")
47
+ g.add_argument("--endpoint", type=str, required=True)
48
+ g.add_argument("--model", type=str, required=True)
49
+ g.add_argument("--api-key", type=str, default="")
50
+ g.add_argument("--tokenizer", type=str, default="")
51
+ g.add_argument("--ignore-eos", action="store_true", default=True)
52
+ g.add_argument("--no-ignore-eos", action="store_false", dest="ignore_eos")
53
+ g.add_argument("--request-timeout", type=int, default=600)
54
+
55
+ # ── System metrics ──
56
+ g = parser.add_argument_group("System Metrics")
57
+ g.add_argument("--metrics-endpoint", type=str, default=None)
58
+ g.add_argument("--metrics-interval", type=int, default=5)
59
+ g.add_argument("--backend", type=str, default="vllm", choices=["vllm", "sglang", "mindie"])
60
+
61
+ # ── Output ──
62
+ g = parser.add_argument_group("Output")
63
+ g.add_argument("--output", type=str, default="results.json")
64
+ g.add_argument("-v", "--verbose", action="store_true", default=False,
65
+ help="Print per-turn progress lines (default: tqdm progress bar)")
66
+
67
+ return parser
68
+
69
+
70
+ def parse_args(argv: list[str] | None = None) -> BenchmarkConfig:
71
+ parser = build_parser()
72
+ args = parser.parse_args(argv)
73
+ return BenchmarkConfig(
74
+ num_users=args.num_users,
75
+ user_arrival=args.user_arrival,
76
+ system_prefix_tokens=args.system_prefix_tokens,
77
+ system_prefix_source=args.system_prefix_source,
78
+ user_prefix_tokens=args.user_prefix_tokens,
79
+ input_tokens_per_turn=args.input_tokens_per_turn,
80
+ output_tokens_per_turn=args.output_tokens_per_turn,
81
+ max_context_tokens=args.max_context_tokens,
82
+ compaction_prefix_increment=args.compaction_prefix_increment,
83
+ max_turns=args.max_turns,
84
+ endpoint=args.endpoint,
85
+ model=args.model,
86
+ api_key=args.api_key,
87
+ tokenizer=args.tokenizer,
88
+ ignore_eos=args.ignore_eos,
89
+ request_timeout=args.request_timeout,
90
+ metrics_endpoint=args.metrics_endpoint,
91
+ metrics_interval=args.metrics_interval,
92
+ backend=args.backend,
93
+ output=args.output,
94
+ verbose=args.verbose,
95
+ )
96
+
97
+
98
+ def main():
99
+ config = parse_args()
100
+ from clawperf.runner import BenchmarkRunner
101
+
102
+ runner = BenchmarkRunner(config)
103
+ try:
104
+ asyncio.run(runner.run())
105
+ except KeyboardInterrupt:
106
+ print("\n[ClawPerf] Interrupted. Saving partial results...")
107
+ asyncio.run(runner.shutdown_and_save())
clawperf/config.py ADDED
@@ -0,0 +1,107 @@
1
+ """Configuration for ClawPerfBench.
2
+
3
+ Wraps EvalScope's Arguments where possible, extends with
4
+ ClawPerfBench-specific context/compaction/scheduling parameters.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import dataclasses
10
+ from typing import Optional
11
+
12
+
13
+ @dataclasses.dataclass
14
+ class BenchmarkConfig:
15
+ """All configurable parameters for a benchmark run."""
16
+
17
+ # ── User configuration ──
18
+ num_users: int = 1
19
+ user_arrival: str = "burst" # "burst", "steady:<interval>", "poisson:<lambda>"
20
+
21
+ # ── Context configuration ──
22
+ system_prefix_tokens: int = 15000
23
+ system_prefix_source: str = "random" # "random" or file path
24
+ user_prefix_tokens: int = 5000
25
+ input_tokens_per_turn: int = 5000
26
+ output_tokens_per_turn: int = 1000
27
+ max_context_tokens: int = 128000
28
+ compaction_prefix_increment: int = 5000
29
+
30
+ # ── Run configuration ──
31
+ max_turns: int = 100
32
+
33
+ # ── API configuration ──
34
+ endpoint: str = ""
35
+ model: str = ""
36
+ api_key: str = ""
37
+ tokenizer: str = "" # defaults to model if empty
38
+ ignore_eos: bool = True
39
+ request_timeout: int = 600
40
+
41
+ # ── System metrics configuration ──
42
+ metrics_endpoint: Optional[str] = None
43
+ metrics_interval: int = 5
44
+ backend: str = "vllm" # "vllm", "sglang", "mindie"
45
+
46
+ # ── Output configuration ──
47
+ output: str = "results.json"
48
+ verbose: bool = False # per-turn detailed logging
49
+
50
+ # ── Derived fields ──
51
+ arrival_mode: str = ""
52
+ arrival_param: float = 0.0
53
+
54
+ def __post_init__(self):
55
+ if not self.tokenizer:
56
+ self.tokenizer = self.model
57
+ self._parse_arrival_mode()
58
+
59
+ def _parse_arrival_mode(self):
60
+ if self.user_arrival == "burst":
61
+ self.arrival_mode = "burst"
62
+ self.arrival_param = 0.0
63
+ elif self.user_arrival.startswith("steady:"):
64
+ self.arrival_mode = "steady"
65
+ self.arrival_param = float(self.user_arrival.split(":")[1])
66
+ elif self.user_arrival.startswith("poisson:"):
67
+ self.arrival_mode = "poisson"
68
+ self.arrival_param = float(self.user_arrival.split(":")[1])
69
+ else:
70
+ raise ValueError(
71
+ f"Invalid user_arrival format: {self.user_arrival!r}. "
72
+ "Expected 'burst', 'steady:<interval>', or 'poisson:<lambda>'."
73
+ )
74
+
75
+ def to_evalscope_args(self):
76
+ """Build an EvalScope Arguments object from this config.
77
+
78
+ Reuses EvalScope's connection/timeout/stream/model settings.
79
+ Note: EvalScope's number/parallel are lists after validation.
80
+ """
81
+ from evalscope.perf.arguments import Arguments
82
+
83
+ args = Arguments(
84
+ model=self.model,
85
+ url=self.endpoint,
86
+ tokenizer_path=self.tokenizer,
87
+ stream=True,
88
+ max_tokens=self.output_tokens_per_turn,
89
+ number=[self.num_users],
90
+ parallel=[self.num_users],
91
+ total_timeout=self.request_timeout,
92
+ api="openai",
93
+ no_test_connection=True,
94
+ )
95
+ if self.api_key:
96
+ args.headers["Authorization"] = f"Bearer {self.api_key}"
97
+
98
+ # Handle ignore_eos via extra_args
99
+ if self.ignore_eos:
100
+ extra = dict(args.extra_args) if args.extra_args else {}
101
+ extra["ignore_eos"] = True
102
+ args.extra_args = extra
103
+
104
+ return args
105
+
106
+ def to_dict(self) -> dict:
107
+ return dataclasses.asdict(self)
clawperf/context.py ADDED
@@ -0,0 +1,138 @@
1
+ """Context assembly and compaction logic.
2
+
3
+ Implements the per-turn context structure:
4
+ [System Prefix] [User Prefix] [History] [Current Input]
5
+
6
+ And the append-mode compaction mechanism.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ from dataclasses import dataclass, field
13
+ from typing import List, Tuple
14
+
15
+ logger = logging.getLogger("clawperf")
16
+
17
+
18
+ @dataclass
19
+ class CompactionEvent:
20
+ user_id: int
21
+ turn: int
22
+ time: float
23
+ old_prefix_len: int
24
+ new_prefix_len: int
25
+
26
+
27
+ @dataclass
28
+ class UserContext:
29
+ """Maintains the evolving context state for a single user."""
30
+
31
+ user_id: int
32
+ system_prefix: str
33
+ user_prefix_tokens: int
34
+ user_prefix_content: str
35
+ input_tokens_per_turn: int
36
+ max_context_tokens: int
37
+ compaction_prefix_increment: int
38
+ max_turns: int
39
+
40
+ history: List[Tuple[str, str]] = field(default_factory=list)
41
+ compaction_events: List[CompactionEvent] = field(default_factory=list)
42
+
43
+ def prepare_turn(
44
+ self,
45
+ turn_id: int,
46
+ current_input_content: str,
47
+ tokenizer_manager,
48
+ ) -> dict:
49
+ """Prepare context for a turn. May trigger compaction.
50
+
51
+ Returns dict with: messages, context_tokens, compaction_triggered,
52
+ compaction_event, context_overflow.
53
+ """
54
+ compaction_event = None
55
+ context_overflow = False
56
+ messages = self._build_messages(current_input_content)
57
+ context_tokens = tokenizer_manager.count_chat_tokens(messages)
58
+
59
+ if context_tokens >= self.max_context_tokens:
60
+ # Check if base context (without history) already exceeds limit
61
+ # — compaction can't help if system+prefix+input alone is too large
62
+ base_messages = self._build_messages(current_input_content, skip_history=True)
63
+ base_tokens = tokenizer_manager.count_chat_tokens(base_messages)
64
+ if base_tokens >= self.max_context_tokens:
65
+ context_overflow = True
66
+ logger.warning(
67
+ "[User %02d] Base context (%d tokens) already exceeds limit (%d) "
68
+ "— compaction cannot help, skipping turn %d",
69
+ self.user_id, base_tokens, self.max_context_tokens, turn_id,
70
+ )
71
+ else:
72
+ old_prefix_len = self.user_prefix_tokens
73
+ self.history.clear()
74
+ self.user_prefix_tokens += self.compaction_prefix_increment
75
+ self.user_prefix_content = tokenizer_manager.generate_random_content(
76
+ self.user_prefix_tokens
77
+ )
78
+ compaction_event = CompactionEvent(
79
+ user_id=self.user_id,
80
+ turn=turn_id,
81
+ time=0.0,
82
+ old_prefix_len=old_prefix_len,
83
+ new_prefix_len=self.user_prefix_tokens,
84
+ )
85
+ self.compaction_events.append(compaction_event)
86
+ logger.info(
87
+ "[User %02d] Compaction at turn %d: prefix %d → %d",
88
+ self.user_id, turn_id, old_prefix_len, self.user_prefix_tokens,
89
+ )
90
+ messages = self._build_messages(current_input_content)
91
+ context_tokens = tokenizer_manager.count_chat_tokens(messages)
92
+
93
+ if context_tokens >= self.max_context_tokens:
94
+ context_overflow = True
95
+ logger.warning(
96
+ "[User %02d] Context still exceeds limit after compaction: "
97
+ "%d >= %d tokens",
98
+ self.user_id, context_tokens, self.max_context_tokens,
99
+ )
100
+
101
+ return {
102
+ "messages": messages,
103
+ "context_tokens": context_tokens,
104
+ "compaction_triggered": compaction_event is not None,
105
+ "compaction_event": compaction_event,
106
+ "context_overflow": context_overflow,
107
+ }
108
+
109
+ def _build_messages(self, current_input: str, skip_history: bool = False) -> list[dict]:
110
+ messages = []
111
+ if self.system_prefix:
112
+ messages.append({"role": "system", "content": self.system_prefix})
113
+
114
+ user_parts = []
115
+ if self.user_prefix_content:
116
+ user_parts.append(self.user_prefix_content)
117
+
118
+ for i, (user_msg, assistant_msg) in enumerate(self.history):
119
+ if skip_history:
120
+ continue
121
+ if i == 0 and user_parts:
122
+ user_parts.append(user_msg)
123
+ messages.append({"role": "user", "content": "\n".join(user_parts)})
124
+ user_parts = []
125
+ else:
126
+ messages.append({"role": "user", "content": user_msg})
127
+ messages.append({"role": "assistant", "content": assistant_msg})
128
+
129
+ if user_parts:
130
+ user_parts.append(current_input)
131
+ messages.append({"role": "user", "content": "\n".join(user_parts)})
132
+ else:
133
+ messages.append({"role": "user", "content": current_input})
134
+
135
+ return messages
136
+
137
+ def append_history(self, user_message: str, assistant_reply: str):
138
+ self.history.append((user_message, assistant_reply))