pyaccelerate 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,52 @@
1
+ """
2
+ PyAccelerate — High-performance Python acceleration engine.
3
+
4
+ Modules
5
+ -------
6
+ - **cpu** : CPU detection, core count, frequency, affinity, NUMA topology
7
+ - **threads** : Virtual thread pool, sliding-window executor, async bridge
8
+ - **gpu** : Multi-vendor GPU detection, ranking, dispatch (CUDA/OpenCL/Intel)
9
+ - **virt** : Virtualization detection (Hyper-V, VT-x/AMD-V, WSL2, Docker)
10
+ - **memory** : Memory monitoring, pressure detection, pool allocator
11
+ - **profiler** : Decorator-based profiling & timing utilities
12
+ - **benchmark** : Built-in micro-benchmarks for the current host
13
+ - **engine** : Unified orchestrator that auto-tunes all subsystems
14
+
15
+ Quick start::
16
+
17
+ from pyaccelerate import Engine
18
+
19
+ engine = Engine() # auto-detects hardware
20
+ print(engine.summary()) # human-readable report
21
+
22
+ # Use the shared virtual-thread pool
23
+ from pyaccelerate.threads import get_pool, run_parallel
24
+
25
+ pool = get_pool()
26
+ fut = pool.submit(my_io_func, arg1, arg2)
27
+
28
+ # GPU compute
29
+ from pyaccelerate.gpu import detect_all, best_gpu, dispatch
30
+
31
+ gpus = detect_all()
32
+ result = dispatch(my_kernel, data, gpus=gpus)
33
+ """
34
+
35
+ from importlib.metadata import PackageNotFoundError, version as _version
36
+
37
+ try:
38
+ __version__: str = _version("pyaccelerate")
39
+ except PackageNotFoundError:
40
+ # Running from source / not installed
41
+ from pathlib import Path as _Path
42
+
43
+ _vf = _Path(__file__).resolve().parent.parent.parent / "VERSION"
44
+ __version__ = _vf.read_text().strip() if _vf.exists() else "0.0.0-dev"
45
+
46
+ # Convenience re-exports
47
+ from pyaccelerate.engine import Engine # noqa: E402, F401
48
+
49
+ __all__ = [
50
+ "__version__",
51
+ "Engine",
52
+ ]
@@ -0,0 +1,308 @@
1
+ """
2
+ pyaccelerate.benchmark — Built-in micro-benchmarks for the current host.
3
+
4
+ Runs quick benchmarks to characterise host performance:
5
+ - CPU single-thread & multi-thread throughput
6
+ - Memory bandwidth
7
+ - I/O thread pool latency
8
+ - GPU compute throughput (if available)
9
+
10
+ Results are returned as structured dicts suitable for logging, dashboards
11
+ or automated tuning decisions.
12
+
13
+ Usage::
14
+
15
+ from pyaccelerate.benchmark import run_all, run_cpu, run_gpu
16
+
17
+ report = run_all()
18
+ print(report)
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import hashlib
24
+ import logging
25
+ import math
26
+ import os
27
+ import time
28
+ from concurrent.futures import ThreadPoolExecutor, as_completed
29
+ from typing import Any, Dict, List, Optional
30
+
31
+ log = logging.getLogger("pyaccelerate.benchmark")
32
+
33
+
34
+ # ═══════════════════════════════════════════════════════════════════════════
35
+ # CPU benchmarks
36
+ # ═══════════════════════════════════════════════════════════════════════════
37
+
38
+ def run_cpu(
39
+ iterations: int = 500_000,
40
+ hash_bytes: int = 4096,
41
+ ) -> Dict[str, Any]:
42
+ """Single-thread CPU throughput: math + hashing workload."""
43
+ # Math workload
44
+ t0 = time.perf_counter()
45
+ total = 0.0
46
+ for i in range(1, iterations + 1):
47
+ total += math.sqrt(i) * math.sin(i)
48
+ math_time = time.perf_counter() - t0
49
+
50
+ # Hash workload
51
+ data = os.urandom(hash_bytes)
52
+ t0 = time.perf_counter()
53
+ for _ in range(iterations):
54
+ hashlib.md5(data).hexdigest()
55
+ hash_time = time.perf_counter() - t0
56
+
57
+ return {
58
+ "benchmark": "cpu_single_thread",
59
+ "iterations": iterations,
60
+ "math_time_s": round(math_time, 4),
61
+ "hash_time_s": round(hash_time, 4),
62
+ "total_s": round(math_time + hash_time, 4),
63
+ "math_ops_per_sec": int(iterations / math_time) if math_time > 0 else 0,
64
+ "hash_ops_per_sec": int(iterations / hash_time) if hash_time > 0 else 0,
65
+ }
66
+
67
+
68
+ def run_cpu_multithread(
69
+ iterations: int = 200_000,
70
+ workers: int = 0,
71
+ ) -> Dict[str, Any]:
72
+ """Multi-thread CPU throughput using standard thread pool."""
73
+ if workers <= 0:
74
+ workers = os.cpu_count() or 4
75
+
76
+ def _work(start: int, end: int) -> float:
77
+ total = 0.0
78
+ for i in range(start, end):
79
+ total += math.sqrt(i) * math.sin(i)
80
+ return total
81
+
82
+ chunk = iterations // workers
83
+ ranges = [(i * chunk, (i + 1) * chunk) for i in range(workers)]
84
+
85
+ t0 = time.perf_counter()
86
+ with ThreadPoolExecutor(max_workers=workers) as pool:
87
+ futures = [pool.submit(_work, s, e) for s, e in ranges]
88
+ results = [f.result() for f in futures]
89
+ elapsed = time.perf_counter() - t0
90
+
91
+ return {
92
+ "benchmark": "cpu_multi_thread",
93
+ "iterations": iterations,
94
+ "workers": workers,
95
+ "time_s": round(elapsed, 4),
96
+ "ops_per_sec": int(iterations / elapsed) if elapsed > 0 else 0,
97
+ }
98
+
99
+
100
+ # ═══════════════════════════════════════════════════════════════════════════
101
+ # Thread pool latency
102
+ # ═══════════════════════════════════════════════════════════════════════════
103
+
104
+ def run_thread_pool_latency(
105
+ tasks: int = 1000,
106
+ pool_size: int = 0,
107
+ ) -> Dict[str, Any]:
108
+ """Measure submit→complete latency for no-op tasks on the I/O pool."""
109
+ from pyaccelerate.threads import get_pool, io_pool_size
110
+
111
+ if pool_size <= 0:
112
+ pool_size = io_pool_size()
113
+
114
+ pool = get_pool()
115
+ latencies: list[float] = []
116
+
117
+ for _ in range(tasks):
118
+ t0 = time.perf_counter()
119
+ fut = pool.submit(lambda: None)
120
+ fut.result()
121
+ latencies.append(time.perf_counter() - t0)
122
+
123
+ avg = sum(latencies) / len(latencies) if latencies else 0
124
+ p95_idx = int(len(latencies) * 0.95)
125
+ sorted_lat = sorted(latencies)
126
+
127
+ return {
128
+ "benchmark": "thread_pool_latency",
129
+ "tasks": tasks,
130
+ "pool_size": pool_size,
131
+ "avg_latency_us": round(avg * 1_000_000, 1),
132
+ "p95_latency_us": round(sorted_lat[p95_idx] * 1_000_000, 1) if sorted_lat else 0,
133
+ "min_latency_us": round(sorted_lat[0] * 1_000_000, 1) if sorted_lat else 0,
134
+ "max_latency_us": round(sorted_lat[-1] * 1_000_000, 1) if sorted_lat else 0,
135
+ }
136
+
137
+
138
+ # ═══════════════════════════════════════════════════════════════════════════
139
+ # Memory bandwidth
140
+ # ═══════════════════════════════════════════════════════════════════════════
141
+
142
+ def run_memory_bandwidth(
143
+ size_mb: int = 64,
144
+ iterations: int = 10,
145
+ ) -> Dict[str, Any]:
146
+ """Measure sequential memory read/write bandwidth."""
147
+ size = size_mb * 1024 * 1024
148
+
149
+ # Write
150
+ t0 = time.perf_counter()
151
+ for _ in range(iterations):
152
+ buf = bytearray(size)
153
+ write_time = time.perf_counter() - t0
154
+
155
+ # Read (hash to prevent optimization)
156
+ t0 = time.perf_counter()
157
+ for _ in range(iterations):
158
+ _h = hashlib.md5(buf).digest()
159
+ read_time = time.perf_counter() - t0
160
+
161
+ total_bytes = size * iterations
162
+ return {
163
+ "benchmark": "memory_bandwidth",
164
+ "size_mb": size_mb,
165
+ "iterations": iterations,
166
+ "write_gbps": round(total_bytes / write_time / (1024 ** 3), 2) if write_time > 0 else 0,
167
+ "read_gbps": round(total_bytes / read_time / (1024 ** 3), 2) if read_time > 0 else 0,
168
+ }
169
+
170
+
171
+ # ═══════════════════════════════════════════════════════════════════════════
172
+ # GPU benchmark
173
+ # ═══════════════════════════════════════════════════════════════════════════
174
+
175
+ def run_gpu(
176
+ size: int = 10_000_000,
177
+ iterations: int = 100,
178
+ ) -> Dict[str, Any]:
179
+ """GPU compute throughput: element-wise operations on a large array.
180
+
181
+ Falls back to a CPU result if no GPU backend is available.
182
+ """
183
+ from pyaccelerate.gpu import gpu_available, best_gpu
184
+
185
+ if not gpu_available():
186
+ return {
187
+ "benchmark": "gpu_compute",
188
+ "available": False,
189
+ "note": "No usable GPU — skipped",
190
+ }
191
+
192
+ gpu = best_gpu()
193
+ backend = gpu.backend if gpu else "none"
194
+
195
+ if backend == "cuda":
196
+ return _bench_cuda(size, iterations, gpu)
197
+ elif backend == "opencl":
198
+ return _bench_opencl(size, iterations, gpu)
199
+ elif backend == "intel":
200
+ return _bench_intel(size, iterations, gpu)
201
+
202
+ return {"benchmark": "gpu_compute", "available": False, "note": "Unsupported backend"}
203
+
204
+
205
+ def _bench_cuda(size: int, iterations: int, gpu: Any) -> Dict[str, Any]:
206
+ try:
207
+ import cupy as cp # type: ignore[import-untyped]
208
+ with cp.cuda.Device(gpu._index):
209
+ a = cp.random.random(size, dtype=cp.float32)
210
+ b = cp.random.random(size, dtype=cp.float32)
211
+ cp.cuda.Device(gpu._index).synchronize()
212
+
213
+ t0 = time.perf_counter()
214
+ for _ in range(iterations):
215
+ c = a * b + a
216
+ cp.cuda.Device(gpu._index).synchronize()
217
+ elapsed = time.perf_counter() - t0
218
+
219
+ ops = size * 2 * iterations # mul + add
220
+ return {
221
+ "benchmark": "gpu_compute",
222
+ "available": True,
223
+ "backend": "cuda",
224
+ "device": gpu.name,
225
+ "elements": size,
226
+ "iterations": iterations,
227
+ "time_s": round(elapsed, 4),
228
+ "gflops": round(ops / elapsed / 1e9, 2) if elapsed > 0 else 0,
229
+ }
230
+ except Exception as exc:
231
+ return {"benchmark": "gpu_compute", "available": True, "error": str(exc)}
232
+
233
+
234
+ def _bench_opencl(size: int, iterations: int, gpu: Any) -> Dict[str, Any]:
235
+ try:
236
+ import numpy as np # type: ignore[import-untyped]
237
+ a = np.random.random(size).astype(np.float32)
238
+ b = np.random.random(size).astype(np.float32)
239
+
240
+ t0 = time.perf_counter()
241
+ for _ in range(iterations):
242
+ c = a * b + a
243
+ elapsed = time.perf_counter() - t0
244
+
245
+ ops = size * 2 * iterations
246
+ return {
247
+ "benchmark": "gpu_compute",
248
+ "available": True,
249
+ "backend": "opencl",
250
+ "device": gpu.name,
251
+ "elements": size,
252
+ "iterations": iterations,
253
+ "time_s": round(elapsed, 4),
254
+ "gflops": round(ops / elapsed / 1e9, 2) if elapsed > 0 else 0,
255
+ "note": "OpenCL benchmark uses numpy host-side as proxy",
256
+ }
257
+ except Exception as exc:
258
+ return {"benchmark": "gpu_compute", "available": True, "error": str(exc)}
259
+
260
+
261
+ def _bench_intel(size: int, iterations: int, gpu: Any) -> Dict[str, Any]:
262
+ try:
263
+ import dpnp # type: ignore[import-untyped]
264
+ a = dpnp.random.random(size).astype(dpnp.float32)
265
+ b = dpnp.random.random(size).astype(dpnp.float32)
266
+
267
+ t0 = time.perf_counter()
268
+ for _ in range(iterations):
269
+ c = a * b + a
270
+ elapsed = time.perf_counter() - t0
271
+
272
+ ops = size * 2 * iterations
273
+ return {
274
+ "benchmark": "gpu_compute",
275
+ "available": True,
276
+ "backend": "intel",
277
+ "device": gpu.name,
278
+ "elements": size,
279
+ "iterations": iterations,
280
+ "time_s": round(elapsed, 4),
281
+ "gflops": round(ops / elapsed / 1e9, 2) if elapsed > 0 else 0,
282
+ }
283
+ except Exception as exc:
284
+ return {"benchmark": "gpu_compute", "available": True, "error": str(exc)}
285
+
286
+
287
+ # ═══════════════════════════════════════════════════════════════════════════
288
+ # Full suite
289
+ # ═══════════════════════════════════════════════════════════════════════════
290
+
291
+ def run_all(quick: bool = True) -> Dict[str, Any]:
292
+ """Run all benchmarks and return a combined report.
293
+
294
+ Parameters
295
+ ----------
296
+ quick : bool
297
+ If True, use reduced iteration counts for a faster run (~5 s).
298
+ """
299
+ scale = 1 if quick else 5
300
+
301
+ results: Dict[str, Any] = {}
302
+ results["cpu_single"] = run_cpu(iterations=100_000 * scale)
303
+ results["cpu_multi"] = run_cpu_multithread(iterations=100_000 * scale)
304
+ results["thread_latency"] = run_thread_pool_latency(tasks=200 * scale)
305
+ results["memory"] = run_memory_bandwidth(size_mb=16 * scale, iterations=3 * scale)
306
+ results["gpu"] = run_gpu(size=1_000_000 * scale, iterations=20 * scale)
307
+
308
+ return results
pyaccelerate/cli.py ADDED
@@ -0,0 +1,156 @@
1
+ """
2
+ pyaccelerate.cli — Command-line interface.
3
+
4
+ Provides quick access to hardware detection, benchmarks and diagnostics::
5
+
6
+ pyaccelerate info # Show full engine report
7
+ pyaccelerate benchmark # Run micro-benchmarks
8
+ pyaccelerate gpu # GPU detection details
9
+ pyaccelerate status # One-line status
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import argparse
15
+ import json
16
+ import logging
17
+ import sys
18
+
19
+
20
+ def main(argv: list[str] | None = None) -> None:
21
+ parser = argparse.ArgumentParser(
22
+ prog="pyaccelerate",
23
+ description="PyAccelerate — High-performance Python acceleration engine",
24
+ )
25
+ parser.add_argument(
26
+ "-v", "--verbose",
27
+ action="store_true",
28
+ help="Enable debug logging",
29
+ )
30
+
31
+ sub = parser.add_subparsers(dest="command")
32
+
33
+ # info
34
+ sub.add_parser("info", help="Full engine report")
35
+
36
+ # status
37
+ sub.add_parser("status", help="One-line status")
38
+
39
+ # benchmark
40
+ bench_p = sub.add_parser("benchmark", help="Run micro-benchmarks")
41
+ bench_p.add_argument("--full", action="store_true", help="Run full (slower) suite")
42
+ bench_p.add_argument("--json", action="store_true", dest="as_json", help="Output as JSON")
43
+
44
+ # gpu
45
+ sub.add_parser("gpu", help="GPU detection details")
46
+
47
+ # cpu
48
+ sub.add_parser("cpu", help="CPU detection details")
49
+
50
+ # virt
51
+ sub.add_parser("virt", help="Virtualization detection")
52
+
53
+ # memory
54
+ sub.add_parser("memory", help="Memory stats")
55
+
56
+ # version
57
+ sub.add_parser("version", help="Show version")
58
+
59
+ args = parser.parse_args(argv)
60
+
61
+ if args.verbose:
62
+ logging.basicConfig(level=logging.DEBUG, format="%(name)s %(message)s")
63
+ else:
64
+ logging.basicConfig(level=logging.WARNING)
65
+
66
+ if args.command is None:
67
+ parser.print_help()
68
+ return
69
+
70
+ if args.command == "version":
71
+ from pyaccelerate import __version__
72
+ print(f"pyaccelerate {__version__}")
73
+ return
74
+
75
+ if args.command == "info":
76
+ from pyaccelerate.engine import Engine
77
+ engine = Engine()
78
+ print(engine.summary())
79
+ return
80
+
81
+ if args.command == "status":
82
+ from pyaccelerate.engine import Engine
83
+ engine = Engine()
84
+ print(engine.status_line())
85
+ return
86
+
87
+ if args.command == "benchmark":
88
+ from pyaccelerate.benchmark import run_all
89
+ print("Running benchmarks...")
90
+ results = run_all(quick=not args.full)
91
+ if args.as_json:
92
+ print(json.dumps(results, indent=2))
93
+ else:
94
+ for name, data in results.items():
95
+ print(f"\n{'─' * 50}")
96
+ print(f" {name}")
97
+ print(f"{'─' * 50}")
98
+ if isinstance(data, dict):
99
+ for k, v in data.items():
100
+ print(f" {k}: {v}")
101
+ return
102
+
103
+ if args.command == "gpu":
104
+ from pyaccelerate.gpu import detect_all, get_install_hint
105
+ gpus = detect_all()
106
+ if not gpus:
107
+ print("No GPU detected.")
108
+ for i, g in enumerate(gpus):
109
+ print(f"\n[{i}] {g.short_label()}")
110
+ print(f" Vendor: {g.vendor} | Backend: {g.backend}")
111
+ print(f" VRAM: {g.memory_gb:.1f} GB | CUs: {g.compute_units}")
112
+ print(f" Discrete: {g.is_discrete} | Score: {g.score}")
113
+ print(f" Usable: {g.usable}")
114
+ hint = get_install_hint()
115
+ if hint:
116
+ print(f"\n{hint}")
117
+ return
118
+
119
+ if args.command == "cpu":
120
+ from pyaccelerate.cpu import detect
121
+ info = detect()
122
+ print(f"Brand: {info.brand}")
123
+ print(f"Architecture: {info.arch}")
124
+ print(f"Physical cores: {info.physical_cores}")
125
+ print(f"Logical cores: {info.logical_cores}")
126
+ print(f"Frequency: {info.frequency_mhz:.0f} MHz (boost: {info.frequency_max_mhz:.0f} MHz)")
127
+ print(f"NUMA nodes: {info.numa_nodes}")
128
+ print(f"SMT ratio: {info.smt_ratio:.1f}x")
129
+ if info.flags:
130
+ print(f"ISA flags: {', '.join(info.flags)}")
131
+ return
132
+
133
+ if args.command == "virt":
134
+ from pyaccelerate.virt import detect
135
+ vi = detect()
136
+ parts = vi.summary_parts()
137
+ if parts:
138
+ print("Detected:", ", ".join(parts))
139
+ else:
140
+ print("No virtualization features detected.")
141
+ return
142
+
143
+ if args.command == "memory":
144
+ from pyaccelerate.memory import get_stats, get_pressure
145
+ stats = get_stats()
146
+ pressure = get_pressure()
147
+ print(f"Pressure: {pressure.name}")
148
+ for k, v in stats.items():
149
+ if k == "error":
150
+ continue
151
+ print(f" {k}: {v:.2f}")
152
+ return
153
+
154
+
155
+ if __name__ == "__main__":
156
+ main()