PyPI - llama-benchy - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

llama-benchy 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

llama_benchy/__main__.py CHANGED Viewed

@@ -241,6 +241,9 @@ async def run_benchmark(session, base_url, api_key, model_name, context_text, pr
         "est_ppt": None,
         "e2e_ttft": None
     }
+    # DEBUG: Buffer to store first few lines of raw response
+    debug_lines = []
     try:
         payload = {
@@ -276,12 +279,22 @@ async def run_benchmark(session, base_url, api_key, model_name, context_text, pr
                 while "\n" in buffer:
                     line, buffer = buffer.split("\n", 1)
                     line = line.strip()
-                    if not line or line == 'data: [DONE]':
+                    if not line:
+                        continue
+                    # Capture first 5 lines for debugging if needed
+                    if len(debug_lines) < 5:
+                        debug_lines.append(line)
+                    if line == 'data: [DONE]' or line == 'data:[DONE]':
                         continue
-                    if line.startswith('data: '):
+                    if line.startswith('data:'):
                         try:
-                            chunk = json.loads(line[6:])
+                            # Strip 'data:' and potential whitespace
+                            json_str = line[5:].strip()
+                            chunk = json.loads(json_str)
                             if 'usage' in chunk:
                                 prompt_usage_tokens = chunk['usage'].get('prompt_tokens', 0)
@@ -292,8 +305,9 @@ async def run_benchmark(session, base_url, api_key, model_name, context_text, pr
                                 delta = chunk['choices'][0].get('delta', {})
                                 content = delta.get('content')
                                 reasoning_content = delta.get('reasoning_content')
+                                reasoning = delta.get('reasoning')
-                                if content or reasoning_content:
+                                if content or reasoning_content or reasoning:
                                     if token_count == 0:
                                         first_token_time = chunk_time
                                         e2e_ttft = first_token_time - start_time
@@ -307,6 +321,10 @@ async def run_benchmark(session, base_url, api_key, model_name, context_text, pr
         end_time = time.perf_counter()
+        # DEBUG: Print warning if no tokens were collected
+        if token_count == 0:
+            print(f"\n[Warning] Run generated 0 tokens. Raw response sample: {debug_lines}")
         if token_count > 0:
             # Calculate decode time (time for subsequent tokens)
             # If only 1 token, decode_time is effectively 0, so we can't calculate inter-token speed

llama_benchy/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.1.1'
-__version_tuple__ = version_tuple = (0, 1, 1)
+__version__ = version = '0.1.2'
+__version_tuple__ = version_tuple = (0, 1, 2)
 __commit_id__ = commit_id = None

{llama_benchy-0.1.1.dist-info → llama_benchy-0.1.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama-benchy
-Version: 0.1.1
+Version: 0.1.2
 Summary: llama-bench style benchmarking tool for all OpenAI-compatible LLM endpoints
 Project-URL: Homepage, https://github.com/eugr/llama-benchy
 Project-URL: Bug Tracker, https://github.com/eugr/llama-benchy/issues

llama_benchy-0.1.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+llama_benchy/__init__.py,sha256=D2TacJCNiAvfxHovv86Cm1kkFfmwgj_Z6QPoWdjJFhs,239
+llama_benchy/__main__.py,sha256=ArgfdkzjgVv-tdoRW0WXxKEGfdbFDzmH6h3w3lay5zI,25120
+llama_benchy/_version.py,sha256=Ok5oAXdWgR9aghaFXTafTeDW6sYO3uVe6d2Nket57R4,704
+llama_benchy-0.1.2.dist-info/METADATA,sha256=oiJHBXHW_74XnVoKPvALBVP5-sXibFPDtELiCcdQaFw,13439
+llama_benchy-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+llama_benchy-0.1.2.dist-info/entry_points.txt,sha256=ZWci87MxOyQtH4tBsuxiLxxnZW7Z-pGiUtmObnXeOv0,60
+llama_benchy-0.1.2.dist-info/licenses/LICENSE,sha256=K71ff-hxnl3muDdvJ3-fbbf5uVgv2dNkzJQXj4G20nk,1075
+llama_benchy-0.1.2.dist-info/RECORD,,

llama_benchy-0.1.1.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-llama_benchy/__init__.py,sha256=D2TacJCNiAvfxHovv86Cm1kkFfmwgj_Z6QPoWdjJFhs,239
-llama_benchy/__main__.py,sha256=RZalKXmtAAKiCBenE1maVeyvly5fsGQanS5v3YLeDLs,24371
-llama_benchy/_version.py,sha256=m8HxkqoKGw_wAJtc4ZokpJKNLXqp4zwnNhbnfDtro7w,704
-llama_benchy-0.1.1.dist-info/METADATA,sha256=O6DTAZAJta_puufDXqbeFhhlTT-WaeBVoJSfDLOREDo,13439
-llama_benchy-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-llama_benchy-0.1.1.dist-info/entry_points.txt,sha256=ZWci87MxOyQtH4tBsuxiLxxnZW7Z-pGiUtmObnXeOv0,60
-llama_benchy-0.1.1.dist-info/licenses/LICENSE,sha256=K71ff-hxnl3muDdvJ3-fbbf5uVgv2dNkzJQXj4G20nk,1075
-llama_benchy-0.1.1.dist-info/RECORD,,

{llama_benchy-0.1.1.dist-info → llama_benchy-0.1.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{llama_benchy-0.1.1.dist-info → llama_benchy-0.1.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{llama_benchy-0.1.1.dist-info → llama_benchy-0.1.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

llama-benchy 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

llama-benchy 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl