llama-benchy 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_benchy/__init__.py +0 -16
- llama_benchy/__main__.py +4 -4
- llama_benchy/_version.py +2 -2
- {llama_benchy-0.1.0.dist-info → llama_benchy-0.1.1.dist-info}/METADATA +25 -3
- llama_benchy-0.1.1.dist-info/RECORD +8 -0
- llama_benchy-0.1.0.dist-info/RECORD +0 -8
- {llama_benchy-0.1.0.dist-info → llama_benchy-0.1.1.dist-info}/WHEEL +0 -0
- {llama_benchy-0.1.0.dist-info → llama_benchy-0.1.1.dist-info}/entry_points.txt +0 -0
- {llama_benchy-0.1.0.dist-info → llama_benchy-0.1.1.dist-info}/licenses/LICENSE +0 -0
llama_benchy/__init__.py
CHANGED
|
@@ -6,19 +6,3 @@ generating statistics similar to `llama-bench`.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
from ._version import __version__
|
|
9
|
-
|
|
10
|
-
# Extract build number from the version string
|
|
11
|
-
# Version format is like: '0.1.dev34+g33f03d886.d20260105'
|
|
12
|
-
# We want to extract the git hash part: '33f03d886'
|
|
13
|
-
__build__ = "unknown"
|
|
14
|
-
if "+" in __version__:
|
|
15
|
-
try:
|
|
16
|
-
# Extract the part after the '+' and before the '.'
|
|
17
|
-
build_part = __version__.split("+")[1].split(".")[0]
|
|
18
|
-
# Remove the 'g' prefix if it exists
|
|
19
|
-
if build_part.startswith("g"):
|
|
20
|
-
__build__ = build_part[1:]
|
|
21
|
-
else:
|
|
22
|
-
__build__ = build_part
|
|
23
|
-
except (IndexError, AttributeError):
|
|
24
|
-
pass
|
llama_benchy/__main__.py
CHANGED
|
@@ -19,12 +19,13 @@ from transformers import AutoTokenizer
|
|
|
19
19
|
import requests
|
|
20
20
|
|
|
21
21
|
# Build number is now imported from __init__.py
|
|
22
|
-
from . import
|
|
22
|
+
from . import __version__
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
def parse_arguments():
|
|
27
27
|
parser = argparse.ArgumentParser(description="LLM Benchmark Script")
|
|
28
|
+
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
|
28
29
|
parser.add_argument("--base-url", type=str, required=True, help="OpenAI compatible endpoint URL")
|
|
29
30
|
parser.add_argument("--api-key", type=str, default="EMPTY", help="API Key for the endpoint")
|
|
30
31
|
parser.add_argument("--model", type=str, required=True, help="Model name to use for benchmarking")
|
|
@@ -369,11 +370,10 @@ async def main_async():
|
|
|
369
370
|
print("Error: --enable-prefix-caching and --no-cache are incompatible.")
|
|
370
371
|
return
|
|
371
372
|
|
|
372
|
-
build_number = __build__
|
|
373
373
|
version_number = __version__
|
|
374
374
|
|
|
375
375
|
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
376
|
-
print(f"llama-benchy ({version_number}
|
|
376
|
+
print(f"llama-benchy ({version_number})")
|
|
377
377
|
print(f"Date: {current_time}")
|
|
378
378
|
print(f"Benchmarking model: {args.model} at {args.base_url}")
|
|
379
379
|
|
|
@@ -522,7 +522,7 @@ async def main_async():
|
|
|
522
522
|
print("No results collected. Check if the model is generating tokens.")
|
|
523
523
|
else:
|
|
524
524
|
print(tabulate(results, headers=["model", "test", "t/s", "ttfr (ms)", "est_ppt (ms)", "e2e_ttft (ms)"], tablefmt="pipe", colalign=("left", "right", "right", "right", "right", "right")))
|
|
525
|
-
print(f"\nllama-benchy ({version_number}
|
|
525
|
+
print(f"\nllama-benchy ({version_number})")
|
|
526
526
|
print(f"date: {current_time} | latency mode: {args.latency_mode}")
|
|
527
527
|
|
|
528
528
|
|
llama_benchy/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 1)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llama-benchy
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: llama-bench style benchmarking tool for all OpenAI-compatible LLM endpoints
|
|
5
5
|
Project-URL: Homepage, https://github.com/eugr/llama-benchy
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/eugr/llama-benchy/issues
|
|
@@ -76,12 +76,26 @@ As of January 2nd, 2026, I wasn't able to find any existing benchmarking tool th
|
|
|
76
76
|
- Supports executing a command after each run (e.g., to clear cache).
|
|
77
77
|
- Configurable latency measurement mode.
|
|
78
78
|
|
|
79
|
+
# Current Limitations
|
|
80
|
+
|
|
81
|
+
- Evaluates against `/v1/chat/completions` endpoint only.
|
|
82
|
+
- Doesn't measure throughput in concurrency mode (coming later).
|
|
83
|
+
- Outputs results as a Markdown table only for now.
|
|
84
|
+
|
|
79
85
|
## Installation
|
|
80
86
|
|
|
81
|
-
|
|
87
|
+
Using `uv` is recommended. You can install `uv` here: https://docs.astral.sh/uv/getting-started/installation/
|
|
82
88
|
|
|
83
89
|
### Option 1: Run without installation using `uvx`
|
|
84
90
|
|
|
91
|
+
Run the release version from PyPI:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
uvx llama-benchy --base-url <ENDPOINT_URL> --model <MODEL_NAME>
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Run the latest version from the main branch:
|
|
98
|
+
|
|
85
99
|
```bash
|
|
86
100
|
uvx --from git+https://github.com/eugr/llama-benchy llama-benchy --base-url <ENDPOINT_URL> --model <MODEL_NAME>
|
|
87
101
|
```
|
|
@@ -126,6 +140,14 @@ uv run llama-benchy --base-url <ENDPOINT_URL> --model <MODEL_NAME>
|
|
|
126
140
|
|
|
127
141
|
### Option 3: Install into system path
|
|
128
142
|
|
|
143
|
+
Release version from PyPI:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
uv pip install -U llama-benchy
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Current version from the main branch:
|
|
150
|
+
|
|
129
151
|
```bash
|
|
130
152
|
uv pip install git+https://github.com/eugr/llama-benchy --system
|
|
131
153
|
```
|
|
@@ -233,7 +255,7 @@ When `--enable-prefix-caching` is used (with `--depth` > 0), the script performs
|
|
|
233
255
|
2. **Inference**: Sends the same context (system message) followed by the actual prompt (user message). The server should reuse the cached context.
|
|
234
256
|
- Reported as standard `pp{tokens} @ d{depth}` and `tg{tokens} @ d{depth}`.
|
|
235
257
|
|
|
236
|
-
|
|
258
|
+
In this case, `pp` and `tg` speeds will show an actual prompt processing / token generation speeds for a follow up prompt with a context pre-filled.
|
|
237
259
|
|
|
238
260
|
### Example
|
|
239
261
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
llama_benchy/__init__.py,sha256=D2TacJCNiAvfxHovv86Cm1kkFfmwgj_Z6QPoWdjJFhs,239
|
|
2
|
+
llama_benchy/__main__.py,sha256=RZalKXmtAAKiCBenE1maVeyvly5fsGQanS5v3YLeDLs,24371
|
|
3
|
+
llama_benchy/_version.py,sha256=m8HxkqoKGw_wAJtc4ZokpJKNLXqp4zwnNhbnfDtro7w,704
|
|
4
|
+
llama_benchy-0.1.1.dist-info/METADATA,sha256=O6DTAZAJta_puufDXqbeFhhlTT-WaeBVoJSfDLOREDo,13439
|
|
5
|
+
llama_benchy-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
+
llama_benchy-0.1.1.dist-info/entry_points.txt,sha256=ZWci87MxOyQtH4tBsuxiLxxnZW7Z-pGiUtmObnXeOv0,60
|
|
7
|
+
llama_benchy-0.1.1.dist-info/licenses/LICENSE,sha256=K71ff-hxnl3muDdvJ3-fbbf5uVgv2dNkzJQXj4G20nk,1075
|
|
8
|
+
llama_benchy-0.1.1.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
llama_benchy/__init__.py,sha256=4DTHEmeJShcJJdb6rPhFqv4_zREEQRZGZ3h0ThA6YpU,798
|
|
2
|
-
llama_benchy/__main__.py,sha256=CZEf_36w5iut5RZjAZ4F894PAHF3hoxDWkT6lTFcr_I,24351
|
|
3
|
-
llama_benchy/_version.py,sha256=5jwwVncvCiTnhOedfkzzxmxsggwmTBORdFL_4wq0ZeY,704
|
|
4
|
-
llama_benchy-0.1.0.dist-info/METADATA,sha256=WEL0ASCMSmJA8QPXJzAYA0fAztA_6-D7xGjNdx3o4vY,12943
|
|
5
|
-
llama_benchy-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
-
llama_benchy-0.1.0.dist-info/entry_points.txt,sha256=ZWci87MxOyQtH4tBsuxiLxxnZW7Z-pGiUtmObnXeOv0,60
|
|
7
|
-
llama_benchy-0.1.0.dist-info/licenses/LICENSE,sha256=K71ff-hxnl3muDdvJ3-fbbf5uVgv2dNkzJQXj4G20nk,1075
|
|
8
|
-
llama_benchy-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|