llama-benchy 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_benchy/__main__.py +22 -4
- llama_benchy/_version.py +2 -2
- {llama_benchy-0.1.1.dist-info → llama_benchy-0.1.2.dist-info}/METADATA +1 -1
- llama_benchy-0.1.2.dist-info/RECORD +8 -0
- llama_benchy-0.1.1.dist-info/RECORD +0 -8
- {llama_benchy-0.1.1.dist-info → llama_benchy-0.1.2.dist-info}/WHEEL +0 -0
- {llama_benchy-0.1.1.dist-info → llama_benchy-0.1.2.dist-info}/entry_points.txt +0 -0
- {llama_benchy-0.1.1.dist-info → llama_benchy-0.1.2.dist-info}/licenses/LICENSE +0 -0
llama_benchy/__main__.py
CHANGED
|
@@ -241,6 +241,9 @@ async def run_benchmark(session, base_url, api_key, model_name, context_text, pr
|
|
|
241
241
|
"est_ppt": None,
|
|
242
242
|
"e2e_ttft": None
|
|
243
243
|
}
|
|
244
|
+
|
|
245
|
+
# DEBUG: Buffer to store first few lines of raw response
|
|
246
|
+
debug_lines = []
|
|
244
247
|
|
|
245
248
|
try:
|
|
246
249
|
payload = {
|
|
@@ -276,12 +279,22 @@ async def run_benchmark(session, base_url, api_key, model_name, context_text, pr
|
|
|
276
279
|
while "\n" in buffer:
|
|
277
280
|
line, buffer = buffer.split("\n", 1)
|
|
278
281
|
line = line.strip()
|
|
279
|
-
if not line
|
|
282
|
+
if not line:
|
|
283
|
+
continue
|
|
284
|
+
|
|
285
|
+
# Capture first 5 lines for debugging if needed
|
|
286
|
+
if len(debug_lines) < 5:
|
|
287
|
+
debug_lines.append(line)
|
|
288
|
+
|
|
289
|
+
if line == 'data: [DONE]' or line == 'data:[DONE]':
|
|
280
290
|
continue
|
|
281
291
|
|
|
282
|
-
if line.startswith('data:
|
|
292
|
+
if line.startswith('data:'):
|
|
283
293
|
try:
|
|
284
|
-
|
|
294
|
+
# Strip 'data:' and potential whitespace
|
|
295
|
+
json_str = line[5:].strip()
|
|
296
|
+
chunk = json.loads(json_str)
|
|
297
|
+
|
|
285
298
|
if 'usage' in chunk:
|
|
286
299
|
prompt_usage_tokens = chunk['usage'].get('prompt_tokens', 0)
|
|
287
300
|
|
|
@@ -292,8 +305,9 @@ async def run_benchmark(session, base_url, api_key, model_name, context_text, pr
|
|
|
292
305
|
delta = chunk['choices'][0].get('delta', {})
|
|
293
306
|
content = delta.get('content')
|
|
294
307
|
reasoning_content = delta.get('reasoning_content')
|
|
308
|
+
reasoning = delta.get('reasoning')
|
|
295
309
|
|
|
296
|
-
if content or reasoning_content:
|
|
310
|
+
if content or reasoning_content or reasoning:
|
|
297
311
|
if token_count == 0:
|
|
298
312
|
first_token_time = chunk_time
|
|
299
313
|
e2e_ttft = first_token_time - start_time
|
|
@@ -307,6 +321,10 @@ async def run_benchmark(session, base_url, api_key, model_name, context_text, pr
|
|
|
307
321
|
|
|
308
322
|
end_time = time.perf_counter()
|
|
309
323
|
|
|
324
|
+
# DEBUG: Print warning if no tokens were collected
|
|
325
|
+
if token_count == 0:
|
|
326
|
+
print(f"\n[Warning] Run generated 0 tokens. Raw response sample: {debug_lines}")
|
|
327
|
+
|
|
310
328
|
if token_count > 0:
|
|
311
329
|
# Calculate decode time (time for subsequent tokens)
|
|
312
330
|
# If only 1 token, decode_time is effectively 0, so we can't calculate inter-token speed
|
llama_benchy/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 2)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llama-benchy
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: llama-bench style benchmarking tool for all OpenAI-compatible LLM endpoints
|
|
5
5
|
Project-URL: Homepage, https://github.com/eugr/llama-benchy
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/eugr/llama-benchy/issues
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
llama_benchy/__init__.py,sha256=D2TacJCNiAvfxHovv86Cm1kkFfmwgj_Z6QPoWdjJFhs,239
|
|
2
|
+
llama_benchy/__main__.py,sha256=ArgfdkzjgVv-tdoRW0WXxKEGfdbFDzmH6h3w3lay5zI,25120
|
|
3
|
+
llama_benchy/_version.py,sha256=Ok5oAXdWgR9aghaFXTafTeDW6sYO3uVe6d2Nket57R4,704
|
|
4
|
+
llama_benchy-0.1.2.dist-info/METADATA,sha256=oiJHBXHW_74XnVoKPvALBVP5-sXibFPDtELiCcdQaFw,13439
|
|
5
|
+
llama_benchy-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
+
llama_benchy-0.1.2.dist-info/entry_points.txt,sha256=ZWci87MxOyQtH4tBsuxiLxxnZW7Z-pGiUtmObnXeOv0,60
|
|
7
|
+
llama_benchy-0.1.2.dist-info/licenses/LICENSE,sha256=K71ff-hxnl3muDdvJ3-fbbf5uVgv2dNkzJQXj4G20nk,1075
|
|
8
|
+
llama_benchy-0.1.2.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
llama_benchy/__init__.py,sha256=D2TacJCNiAvfxHovv86Cm1kkFfmwgj_Z6QPoWdjJFhs,239
|
|
2
|
-
llama_benchy/__main__.py,sha256=RZalKXmtAAKiCBenE1maVeyvly5fsGQanS5v3YLeDLs,24371
|
|
3
|
-
llama_benchy/_version.py,sha256=m8HxkqoKGw_wAJtc4ZokpJKNLXqp4zwnNhbnfDtro7w,704
|
|
4
|
-
llama_benchy-0.1.1.dist-info/METADATA,sha256=O6DTAZAJta_puufDXqbeFhhlTT-WaeBVoJSfDLOREDo,13439
|
|
5
|
-
llama_benchy-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
-
llama_benchy-0.1.1.dist-info/entry_points.txt,sha256=ZWci87MxOyQtH4tBsuxiLxxnZW7Z-pGiUtmObnXeOv0,60
|
|
7
|
-
llama_benchy-0.1.1.dist-info/licenses/LICENSE,sha256=K71ff-hxnl3muDdvJ3-fbbf5uVgv2dNkzJQXj4G20nk,1075
|
|
8
|
-
llama_benchy-0.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|