PyPI - llama-benchy - Versions diffs - 0.1.1__tar.gz → 0.1.2__tar.gz - Mend

llama-benchy 0.1.1tar.gz → 0.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{llama_benchy-0.1.1 → llama_benchy-0.1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama-benchy
-Version: 0.1.1
+Version: 0.1.2
 Summary: llama-bench style benchmarking tool for all OpenAI-compatible LLM endpoints
 Author: eugr
 License: MIT License

{llama_benchy-0.1.1 → llama_benchy-0.1.2}/src/llama_benchy/__main__.py RENAMED Viewed

@@ -241,6 +241,9 @@ async def run_benchmark(session, base_url, api_key, model_name, context_text, pr
         "est_ppt": None,
         "e2e_ttft": None
     }
+    # DEBUG: Buffer to store first few lines of raw response
+    debug_lines = []
     try:
         payload = {
@@ -276,12 +279,22 @@ async def run_benchmark(session, base_url, api_key, model_name, context_text, pr
                 while "\n" in buffer:
                     line, buffer = buffer.split("\n", 1)
                     line = line.strip()
-                    if not line or line == 'data: [DONE]':
+                    if not line:
+                        continue
+                    # Capture first 5 lines for debugging if needed
+                    if len(debug_lines) < 5:
+                        debug_lines.append(line)
+                    if line == 'data: [DONE]' or line == 'data:[DONE]':
                         continue
-                    if line.startswith('data: '):
+                    if line.startswith('data:'):
                         try:
-                            chunk = json.loads(line[6:])
+                            # Strip 'data:' and potential whitespace
+                            json_str = line[5:].strip()
+                            chunk = json.loads(json_str)
                             if 'usage' in chunk:
                                 prompt_usage_tokens = chunk['usage'].get('prompt_tokens', 0)
@@ -292,8 +305,9 @@ async def run_benchmark(session, base_url, api_key, model_name, context_text, pr
                                 delta = chunk['choices'][0].get('delta', {})
                                 content = delta.get('content')
                                 reasoning_content = delta.get('reasoning_content')
+                                reasoning = delta.get('reasoning')
-                                if content or reasoning_content:
+                                if content or reasoning_content or reasoning:
                                     if token_count == 0:
                                         first_token_time = chunk_time
                                         e2e_ttft = first_token_time - start_time
@@ -307,6 +321,10 @@ async def run_benchmark(session, base_url, api_key, model_name, context_text, pr
         end_time = time.perf_counter()
+        # DEBUG: Print warning if no tokens were collected
+        if token_count == 0:
+            print(f"\n[Warning] Run generated 0 tokens. Raw response sample: {debug_lines}")
         if token_count > 0:
             # Calculate decode time (time for subsequent tokens)
             # If only 1 token, decode_time is effectively 0, so we can't calculate inter-token speed

{llama_benchy-0.1.1 → llama_benchy-0.1.2}/src/llama_benchy/_version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.1.1'
-__version_tuple__ = version_tuple = (0, 1, 1)
+__version__ = version = '0.1.2'
+__version_tuple__ = version_tuple = (0, 1, 2)
 __commit_id__ = commit_id = None

{llama_benchy-0.1.1 → llama_benchy-0.1.2}/.gitignore RENAMED Viewed

File without changes

{llama_benchy-0.1.1 → llama_benchy-0.1.2}/LICENSE RENAMED Viewed

File without changes

{llama_benchy-0.1.1 → llama_benchy-0.1.2}/README.md RENAMED Viewed

File without changes

{llama_benchy-0.1.1 → llama_benchy-0.1.2}/pyproject.toml RENAMED Viewed

File without changes

{llama_benchy-0.1.1 → llama_benchy-0.1.2}/src/llama_benchy/__init__.py RENAMED Viewed

File without changes

llama-benchy 0.1.1__tar.gz → 0.1.2__tar.gz

llama-benchy 0.1.1tar.gz → 0.1.2tar.gz