benchgr-agent 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchgr_agent-0.1.0/PKG-INFO +31 -0
- benchgr_agent-0.1.0/README.md +16 -0
- benchgr_agent-0.1.0/benchgr_agent/__init__.py +1 -0
- benchgr_agent-0.1.0/benchgr_agent/benchmark.py +111 -0
- benchgr_agent-0.1.0/benchgr_agent/main.py +96 -0
- benchgr_agent-0.1.0/benchgr_agent.egg-info/PKG-INFO +31 -0
- benchgr_agent-0.1.0/benchgr_agent.egg-info/SOURCES.txt +11 -0
- benchgr_agent-0.1.0/benchgr_agent.egg-info/dependency_links.txt +1 -0
- benchgr_agent-0.1.0/benchgr_agent.egg-info/entry_points.txt +2 -0
- benchgr_agent-0.1.0/benchgr_agent.egg-info/requires.txt +4 -0
- benchgr_agent-0.1.0/benchgr_agent.egg-info/top_level.txt +1 -0
- benchgr_agent-0.1.0/pyproject.toml +27 -0
- benchgr_agent-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: benchgr-agent
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: BenchGR GPU Benchmark Agent - Test your GPU and submit to the leaderboard
|
|
5
|
+
Author: San S
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://benchgr-frontend.vercel.app
|
|
8
|
+
Keywords: gpu,benchmark,leaderboard,cuda,ai
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: torch>=2.0.0
|
|
12
|
+
Requires-Dist: transformers>=4.30.0
|
|
13
|
+
Requires-Dist: requests>=2.28.0
|
|
14
|
+
Requires-Dist: numpy>=1.24.0
|
|
15
|
+
|
|
16
|
+
# benchgr-agent
|
|
17
|
+
|
|
18
|
+
GPU Benchmark Agent for [BenchGR Leaderboard](https://benchgr-frontend.vercel.app).
|
|
19
|
+
|
|
20
|
+
## Install
|
|
21
|
+
pip install benchgr-agent
|
|
22
|
+
|
|
23
|
+
## Usage
|
|
24
|
+
# Check your GPU
|
|
25
|
+
benchgr info
|
|
26
|
+
|
|
27
|
+
# Run benchmark (no submit)
|
|
28
|
+
benchgr run --no-submit
|
|
29
|
+
|
|
30
|
+
# Run and submit to leaderboard
|
|
31
|
+
benchgr run --api-key YOUR_API_KEY
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# benchgr-agent
|
|
2
|
+
|
|
3
|
+
GPU Benchmark Agent for [BenchGR Leaderboard](https://benchgr-frontend.vercel.app).
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
pip install benchgr-agent
|
|
7
|
+
|
|
8
|
+
## Usage
|
|
9
|
+
# Check your GPU
|
|
10
|
+
benchgr info
|
|
11
|
+
|
|
12
|
+
# Run benchmark (no submit)
|
|
13
|
+
benchgr run --no-submit
|
|
14
|
+
|
|
15
|
+
# Run and submit to leaderboard
|
|
16
|
+
benchgr run --api-key YOUR_API_KEY
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import torch
|
|
3
|
+
import numpy as np
|
|
4
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
5
|
+
|
|
6
|
+
BACKEND_URL = "https://benchgr-backend.vercel.app"
|
|
7
|
+
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_gpu_info():
|
|
11
|
+
if not torch.cuda.is_available():
|
|
12
|
+
return None, "No CUDA GPU detected. Please run on a CUDA-enabled GPU."
|
|
13
|
+
|
|
14
|
+
gpu_name = torch.cuda.get_device_name(0)
|
|
15
|
+
vram_total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
|
|
16
|
+
return {"gpu_name": gpu_name, "vram_gb": round(vram_total, 2)}, None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def run_inference_benchmark():
|
|
20
|
+
print("š„ Loading TinyLlama 1.1B (first run may take 1-2 mins to download ~600MB)...")
|
|
21
|
+
|
|
22
|
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
|
23
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
24
|
+
MODEL_ID,
|
|
25
|
+
torch_dtype=torch.float16,
|
|
26
|
+
device_map="cuda"
|
|
27
|
+
)
|
|
28
|
+
model.eval()
|
|
29
|
+
|
|
30
|
+
prompt = "Explain what a GPU does in simple terms."
|
|
31
|
+
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
|
32
|
+
|
|
33
|
+
# Warmup pass
|
|
34
|
+
with torch.no_grad():
|
|
35
|
+
model.generate(**inputs, max_new_tokens=20)
|
|
36
|
+
|
|
37
|
+
# Actual benchmark ā 3 runs averaged
|
|
38
|
+
total_tokens = 0
|
|
39
|
+
total_time = 0.0
|
|
40
|
+
runs = 3
|
|
41
|
+
|
|
42
|
+
print("ā” Running inference benchmark (3 passes)...")
|
|
43
|
+
for i in range(runs):
|
|
44
|
+
torch.cuda.synchronize()
|
|
45
|
+
start = time.perf_counter()
|
|
46
|
+
with torch.no_grad():
|
|
47
|
+
output = model.generate(**inputs, max_new_tokens=100)
|
|
48
|
+
torch.cuda.synchronize()
|
|
49
|
+
end = time.perf_counter()
|
|
50
|
+
|
|
51
|
+
tokens_generated = output.shape[1] - inputs["input_ids"].shape[1]
|
|
52
|
+
total_tokens += tokens_generated
|
|
53
|
+
total_time += (end - start)
|
|
54
|
+
print(f" Pass {i+1}: {tokens_generated / (end - start):.1f} tok/s")
|
|
55
|
+
|
|
56
|
+
tokens_per_sec = round(total_tokens / total_time, 2)
|
|
57
|
+
return tokens_per_sec
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def run_cuda_benchmark():
|
|
61
|
+
print("š¢ Running CUDA matrix benchmark...")
|
|
62
|
+
size = 4096
|
|
63
|
+
a = torch.randn(size, size, device="cuda", dtype=torch.float16)
|
|
64
|
+
b = torch.randn(size, size, device="cuda", dtype=torch.float16)
|
|
65
|
+
|
|
66
|
+
# Warmup
|
|
67
|
+
torch.matmul(a, b)
|
|
68
|
+
torch.cuda.synchronize()
|
|
69
|
+
|
|
70
|
+
runs = 10
|
|
71
|
+
start = time.perf_counter()
|
|
72
|
+
for _ in range(runs):
|
|
73
|
+
torch.matmul(a, b)
|
|
74
|
+
torch.cuda.synchronize()
|
|
75
|
+
end = time.perf_counter()
|
|
76
|
+
|
|
77
|
+
avg_ms = ((end - start) / runs) * 1000
|
|
78
|
+
# TFLOPS = (2 * N^3) / (time_in_seconds * 1e12)
|
|
79
|
+
tflops = round((2 * size**3) / ((avg_ms / 1000) * 1e12), 2)
|
|
80
|
+
return tflops
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def run_memory_bandwidth_benchmark():
|
|
84
|
+
print("š¾ Running memory bandwidth benchmark...")
|
|
85
|
+
size = 256 * 1024 * 1024 // 4 # 256MB of float32
|
|
86
|
+
a = torch.ones(size, device="cuda", dtype=torch.float32)
|
|
87
|
+
b = torch.empty(size, device="cuda", dtype=torch.float32)
|
|
88
|
+
|
|
89
|
+
torch.cuda.synchronize()
|
|
90
|
+
start = time.perf_counter()
|
|
91
|
+
for _ in range(5):
|
|
92
|
+
b.copy_(a)
|
|
93
|
+
torch.cuda.synchronize()
|
|
94
|
+
end = time.perf_counter()
|
|
95
|
+
|
|
96
|
+
mem_bw_gbps = round((5 * 256 * 2) / (end - start), 2) # GB/s (read + write)
|
|
97
|
+
return mem_bw_gbps
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def submit_results(api_key, results):
|
|
101
|
+
import requests
|
|
102
|
+
try:
|
|
103
|
+
resp = requests.post(
|
|
104
|
+
f"{BACKEND_URL}/api/results/submit",
|
|
105
|
+
params={"api_key": api_key},
|
|
106
|
+
json=results,
|
|
107
|
+
timeout=10
|
|
108
|
+
)
|
|
109
|
+
return resp.json()
|
|
110
|
+
except Exception as e:
|
|
111
|
+
return {"error": str(e)}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
from benchgr_agent.benchmark import (
|
|
4
|
+
get_gpu_info,
|
|
5
|
+
run_inference_benchmark,
|
|
6
|
+
run_cuda_benchmark,
|
|
7
|
+
run_memory_bandwidth_benchmark,
|
|
8
|
+
submit_results
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def main():
|
|
13
|
+
parser = argparse.ArgumentParser(
|
|
14
|
+
prog="benchgr",
|
|
15
|
+
description="BenchGR ā GPU Benchmark Agent"
|
|
16
|
+
)
|
|
17
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
18
|
+
|
|
19
|
+
# benchgr run
|
|
20
|
+
run_parser = subparsers.add_parser("run", help="Run benchmark and optionally submit")
|
|
21
|
+
run_parser.add_argument("--api-key", type=str, help="Your BenchGR API key", default=None)
|
|
22
|
+
run_parser.add_argument("--no-submit", action="store_true", help="Run without submitting results")
|
|
23
|
+
|
|
24
|
+
# benchgr info
|
|
25
|
+
subparsers.add_parser("info", help="Show detected GPU info")
|
|
26
|
+
|
|
27
|
+
args = parser.parse_args()
|
|
28
|
+
|
|
29
|
+
if args.command == "info":
|
|
30
|
+
gpu, err = get_gpu_info()
|
|
31
|
+
if err:
|
|
32
|
+
print(f"ā {err}")
|
|
33
|
+
else:
|
|
34
|
+
print(f"ā
GPU: {gpu['gpu_name']}")
|
|
35
|
+
print(f" VRAM: {gpu['vram_gb']} GB")
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
if args.command == "run":
|
|
39
|
+
print("=" * 50)
|
|
40
|
+
print(" š BenchGR GPU Benchmark Agent")
|
|
41
|
+
print("=" * 50)
|
|
42
|
+
|
|
43
|
+
gpu, err = get_gpu_info()
|
|
44
|
+
if err:
|
|
45
|
+
print(f"ā {err}")
|
|
46
|
+
sys.exit(1)
|
|
47
|
+
|
|
48
|
+
print(f"ā
Detected GPU: {gpu['gpu_name']} ({gpu['vram_gb']} GB VRAM)\n")
|
|
49
|
+
|
|
50
|
+
tokens_per_sec = run_inference_benchmark()
|
|
51
|
+
tflops = run_cuda_benchmark()
|
|
52
|
+
mem_bw = run_memory_bandwidth_benchmark()
|
|
53
|
+
|
|
54
|
+
results = {
|
|
55
|
+
"gpu_name": gpu["gpu_name"],
|
|
56
|
+
"vram_gb": gpu["vram_gb"],
|
|
57
|
+
"tokens_per_sec": tokens_per_sec,
|
|
58
|
+
"tflops_fp16": tflops,
|
|
59
|
+
"memory_bw_gbps": mem_bw,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
print("\n" + "=" * 50)
|
|
63
|
+
print("š BENCHMARK RESULTS")
|
|
64
|
+
print("=" * 50)
|
|
65
|
+
print(f" GPU : {gpu['gpu_name']}")
|
|
66
|
+
print(f" Tokens/sec : {tokens_per_sec} tok/s")
|
|
67
|
+
print(f" TFLOPS FP16 : {tflops} TFLOPS")
|
|
68
|
+
print(f" Mem BW : {mem_bw} GB/s")
|
|
69
|
+
print("=" * 50)
|
|
70
|
+
|
|
71
|
+
if not args.no_submit:
|
|
72
|
+
if not args.api_key:
|
|
73
|
+
print("\nā ļø No API key provided. Get yours at:")
|
|
74
|
+
print(" š https://benchgr-frontend.vercel.app/dashboard")
|
|
75
|
+
print("\nRun again with:")
|
|
76
|
+
print(" benchgr run --api-key YOUR_KEY_HERE")
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
print("\nš¤ Submitting to leaderboard...")
|
|
80
|
+
response = submit_results(args.api_key, results)
|
|
81
|
+
if "error" in response:
|
|
82
|
+
print(f"ā Submission failed: {response['error']}")
|
|
83
|
+
else:
|
|
84
|
+
print(f"ā
Submitted! Check your rank at:")
|
|
85
|
+
print(f" š https://benchgr-frontend.vercel.app/leaderboard")
|
|
86
|
+
else:
|
|
87
|
+
print("\nā
Done! (Results not submitted ā remove --no-submit to submit)")
|
|
88
|
+
|
|
89
|
+
else:
|
|
90
|
+
print("Usage: benchgr run --api-key YOUR_KEY")
|
|
91
|
+
print(" benchgr info")
|
|
92
|
+
print(" benchgr run --no-submit")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
if __name__ == "__main__":
|
|
96
|
+
main()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: benchgr-agent
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: BenchGR GPU Benchmark Agent - Test your GPU and submit to the leaderboard
|
|
5
|
+
Author: San S
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://benchgr-frontend.vercel.app
|
|
8
|
+
Keywords: gpu,benchmark,leaderboard,cuda,ai
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: torch>=2.0.0
|
|
12
|
+
Requires-Dist: transformers>=4.30.0
|
|
13
|
+
Requires-Dist: requests>=2.28.0
|
|
14
|
+
Requires-Dist: numpy>=1.24.0
|
|
15
|
+
|
|
16
|
+
# benchgr-agent
|
|
17
|
+
|
|
18
|
+
GPU Benchmark Agent for [BenchGR Leaderboard](https://benchgr-frontend.vercel.app).
|
|
19
|
+
|
|
20
|
+
## Install
|
|
21
|
+
pip install benchgr-agent
|
|
22
|
+
|
|
23
|
+
## Usage
|
|
24
|
+
# Check your GPU
|
|
25
|
+
benchgr info
|
|
26
|
+
|
|
27
|
+
# Run benchmark (no submit)
|
|
28
|
+
benchgr run --no-submit
|
|
29
|
+
|
|
30
|
+
# Run and submit to leaderboard
|
|
31
|
+
benchgr run --api-key YOUR_API_KEY
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
benchgr_agent/__init__.py
|
|
4
|
+
benchgr_agent/benchmark.py
|
|
5
|
+
benchgr_agent/main.py
|
|
6
|
+
benchgr_agent.egg-info/PKG-INFO
|
|
7
|
+
benchgr_agent.egg-info/SOURCES.txt
|
|
8
|
+
benchgr_agent.egg-info/dependency_links.txt
|
|
9
|
+
benchgr_agent.egg-info/entry_points.txt
|
|
10
|
+
benchgr_agent.egg-info/requires.txt
|
|
11
|
+
benchgr_agent.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
benchgr_agent
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "benchgr-agent"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "BenchGR GPU Benchmark Agent - Test your GPU and submit to the leaderboard"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "San S"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["gpu", "benchmark", "leaderboard", "cuda", "ai"]
|
|
16
|
+
dependencies = [
|
|
17
|
+
"torch>=2.0.0",
|
|
18
|
+
"transformers>=4.30.0",
|
|
19
|
+
"requests>=2.28.0",
|
|
20
|
+
"numpy>=1.24.0",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.scripts]
|
|
24
|
+
benchgr = "benchgr_agent.main:main"
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Homepage = "https://benchgr-frontend.vercel.app"
|