PyPI - h2loop-bench - Versions diffs - 0.1.0__tar.gz - Mend

h2loop-bench 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

h2loop_bench-0.1.0/PKG-INFO +6 -0
h2loop_bench-0.1.0/h2loop_bench/__init__.py +3 -0
h2loop_bench-0.1.0/h2loop_bench/_core.py +155 -0
h2loop_bench-0.1.0/h2loop_bench.egg-info/PKG-INFO +6 -0
h2loop_bench-0.1.0/h2loop_bench.egg-info/SOURCES.txt +8 -0
h2loop_bench-0.1.0/h2loop_bench.egg-info/dependency_links.txt +1 -0
h2loop_bench-0.1.0/h2loop_bench.egg-info/requires.txt +1 -0
h2loop_bench-0.1.0/h2loop_bench.egg-info/top_level.txt +1 -0
h2loop_bench-0.1.0/pyproject.toml +16 -0
h2loop_bench-0.1.0/setup.cfg +4 -0

h2loop_bench-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,6 @@
+Metadata-Version: 2.4
+Name: h2loop-bench
+Version: 0.1.0
+Summary: H2Loop inference contest benchmark submission tool
+Requires-Python: >=3.8
+Requires-Dist: requests>=2.28.0

h2loop_bench-0.1.0/h2loop_bench/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from ._core import result
+__all__ = ["result"]

h2loop_bench-0.1.0/h2loop_bench/_core.py ADDED Viewed

@@ -0,0 +1,155 @@
+import json
+import os
+import shutil
+import subprocess
+import requests
+_SUBMIT_URL = "https://inference-contest-airtable-629464143743.asia-south1.run.app/"
+def _run_benchmark(port: int) -> None:
+    os.makedirs("results", exist_ok=True)
+    cmd = [
+        "vllm", "bench", "serve",
+        "--backend", "openai",
+        "--base-url", f"http://localhost:{port}/v1",
+        "--endpoint", "/completions",
+        "--model", "Qwen/Qwen2.5-0.5B",
+        "--tokenizer", "Qwen/Qwen2.5-0.5B",
+        "--max-concurrency", "50",
+        "--num-prompts", "200",
+        "--ignore-eos",
+        "--random-input-len", "512",
+        "--random-output-len", "512",
+        "--save-result",
+        "--result-dir", "./results",
+        "--result-filename", "baseline.json",
+        "--label", "baseline",
+    ]
+    if not shutil.which("vllm"):
+        raise RuntimeError(
+            "'vllm' is not installed or not on PATH. "
+            "Install it with: pip install vllm"
+        )
+    print(f"Running benchmark against http://localhost:{port}/v1 ...")
+    result = subprocess.run(cmd)
+    if result.returncode != 0:
+        raise RuntimeError(
+            f"Benchmark failed with exit code {result.returncode}. "
+            f"Make sure your vLLM server is running on port {port} and is healthy."
+        )
+def _load_results(path: str = "results/baseline.json") -> dict:
+    if not os.path.exists(path):
+        raise FileNotFoundError(
+            f"Results file not found at '{path}'. "
+            "The benchmark may not have completed successfully."
+        )
+    with open(path) as f:
+        data = json.load(f)
+    total = data["completed"] + data.get("failed", 0)
+    print("=" * 60)
+    print("BASELINE RESULTS")
+    print("=" * 60)
+    print(f"  Output throughput:  {data['output_throughput']:.2f} tok/s")
+    print(f"  Mean TPOT:          {data['mean_tpot_ms']:.2f} ms")
+    print(f"  P99 TPOT:           {data['p99_tpot_ms']:.2f} ms  (limit: 50 ms)")
+    print(f"  Mean TTFT:          {data['mean_ttft_ms']:.2f} ms")
+    print(f"  P99 TTFT:           {data['p99_ttft_ms']:.2f} ms  (limit: 2000 ms)")
+    print(f"  Completed requests: {data['completed']}/{total}")
+    print(f"  Failed requests:    {data.get('failed', 0)}")
+    print("=" * 60)
+    return data
+def _collect_gpu_info() -> list | None:
+    if not shutil.which("nvidia-smi"):
+        return None
+    try:
+        proc = subprocess.run(
+            [
+                "nvidia-smi",
+                "--query-gpu=name,memory.total,memory.used,utilization.gpu",
+                "--format=csv,noheader,nounits",
+            ],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        gpus = []
+        for line in proc.stdout.strip().split("\n"):
+            name, mem_total, mem_used, util = [x.strip() for x in line.split(",")]
+            gpus.append(
+                {
+                    "name": name,
+                    "memory_total_mb": int(mem_total),
+                    "memory_used_mb": int(mem_used),
+                    "utilization_percent": int(util),
+                }
+            )
+        return gpus
+    except Exception:
+        return None
+def _submit(
+    name: str,
+    email: str,
+    contact_number: str,
+    colab_link: str,
+    data: dict,
+) -> None:
+    gpu_info = _collect_gpu_info()
+    gpu_str = ""
+    if gpu_info:
+        gpu_str = ", ".join(
+            f"{g['name']} ({g['memory_total_mb']} MB)" for g in gpu_info
+        )
+    payload = {
+        "fields": {
+            "Name": name,
+            "Email": email,
+            "Contact number": contact_number,
+            "Colab_Link": colab_link,
+            "Output Throughput (tok/s)": data["output_throughput"],
+            "99 TPOT (ms)": data["p99_tpot_ms"],
+            "P99 TTFT (ms)": data["p99_ttft_ms"],
+            "GPU Info": gpu_str,
+        }
+    }
+    print("\nSubmitting results to H2Loop...")
+    try:
+        response = requests.post(_SUBMIT_URL, json=payload)
+        response.raise_for_status()
+    except requests.exceptions.ConnectionError:
+        raise ConnectionError("Could not connect to submission server. Check your internet connection.")
+    except requests.exceptions.Timeout:
+        raise TimeoutError("Submission request timed out.")
+    except requests.exceptions.HTTPError as e:
+        raise RuntimeError(f"Submission failed: {e.response.status_code} {e.response.text}")
+    record_id = response.json()["record_id"]
+    print(f"Submission successful! Record ID: {record_id}")
+def result(
+    name: str,
+    email: str,
+    contact_number: str,
+    colab_link: str,
+    port: int,
+) -> None:
+    _run_benchmark(port)
+    data = _load_results()
+    _submit(name, email, contact_number, colab_link, data)

h2loop_bench-0.1.0/h2loop_bench.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,6 @@
+Metadata-Version: 2.4
+Name: h2loop-bench
+Version: 0.1.0
+Summary: H2Loop inference contest benchmark submission tool
+Requires-Python: >=3.8
+Requires-Dist: requests>=2.28.0

h2loop_bench-0.1.0/h2loop_bench.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,8 @@
+pyproject.toml
+h2loop_bench/__init__.py
+h2loop_bench/_core.py
+h2loop_bench.egg-info/PKG-INFO
+h2loop_bench.egg-info/SOURCES.txt
+h2loop_bench.egg-info/dependency_links.txt
+h2loop_bench.egg-info/requires.txt
+h2loop_bench.egg-info/top_level.txt

h2loop_bench-0.1.0/h2loop_bench.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

h2loop_bench-0.1.0/h2loop_bench.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ requests>=2.28.0

h2loop_bench-0.1.0/h2loop_bench.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ h2loop_bench

h2loop_bench-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,16 @@
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "h2loop-bench"
+version = "0.1.0"
+description = "H2Loop inference contest benchmark submission tool"
+requires-python = ">=3.8"
+dependencies = [
+    "requests>=2.28.0",
+]
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["h2loop_bench*"]

h2loop_bench-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0