hip-quant 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {hip_quant-0.2.2/hip_quant.egg-info → hip_quant-0.2.3}/PKG-INFO +1 -1
  2. {hip_quant-0.2.2 → hip_quant-0.2.3}/__init__.py +2 -0
  3. {hip_quant-0.2.2 → hip_quant-0.2.3/hip_quant.egg-info}/PKG-INFO +1 -1
  4. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quantize.cpp +36 -24
  5. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quantize.dll +0 -0
  6. {hip_quant-0.2.2 → hip_quant-0.2.3}/pyproject.toml +1 -1
  7. {hip_quant-0.2.2 → hip_quant-0.2.3}/MANIFEST.in +0 -0
  8. {hip_quant-0.2.2 → hip_quant-0.2.3}/README.md +0 -0
  9. {hip_quant-0.2.2 → hip_quant-0.2.3}/build.ps1 +0 -0
  10. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_iquant_util.h +0 -0
  11. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant.egg-info/SOURCES.txt +0 -0
  12. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant.egg-info/dependency_links.txt +0 -0
  13. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant.egg-info/requires.txt +0 -0
  14. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant.egg-info/top_level.txt +0 -0
  15. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_iq1s_data.h +0 -0
  16. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_iq2xs_data.h +0 -0
  17. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_iq2xxs_data.h +0 -0
  18. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_iq3s_data.h +0 -0
  19. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_iq3xxs_data.h +0 -0
  20. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_types.h +0 -0
  21. {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_util.h +0 -0
  22. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/fp8_expand.cu +0 -0
  23. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_f8_e4m3.cu +0 -0
  24. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq1_s.cu +0 -0
  25. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq2_xs.cu +0 -0
  26. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq2_xxs.cu +0 -0
  27. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq3_s.cu +0 -0
  28. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq3_xxs.cu +0 -0
  29. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq4_nl.cu +0 -0
  30. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq4_xs.cu +0 -0
  31. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q2_K.cu +0 -0
  32. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q3_K.cu +0 -0
  33. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q4_0.cu +0 -0
  34. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q4_1.cu +0 -0
  35. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q4_K.cu +0 -0
  36. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q5_0.cu +0 -0
  37. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q5_1.cu +0 -0
  38. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q5_K.cu +0 -0
  39. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q6_K.cu +0 -0
  40. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q8_0.cu +0 -0
  41. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q8_1.cu +0 -0
  42. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_tq1_0.cu +0 -0
  43. {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_tq2_0.cu +0 -0
  44. {hip_quant-0.2.2 → hip_quant-0.2.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hip-quant
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: HIP/ROCm 7.1 Quantization library for AMD GPUs
5
5
  Author-email: Your Name <your.email@example.com>
6
6
  Requires-Python: >=3.8
@@ -128,6 +128,8 @@ class HipQuant:
128
128
  self._dll.get_device_name.restype = ctypes.c_char_p
129
129
  self._dll.get_device_count.restype = ctypes.c_int
130
130
  self._dll.get_device_count.argtypes = []
131
+ self._dll.quantize_reset.restype = None
132
+ self._dll.quantize_reset.argtypes = []
131
133
 
132
134
  @property
133
135
  def device_count(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hip-quant
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: HIP/ROCm 7.1 Quantization library for AMD GPUs
5
5
  Author-email: Your Name <your.email@example.com>
6
6
  Requires-Python: >=3.8
@@ -62,6 +62,15 @@ static int8_t *d_iq3s_grid_data = NULL;
62
62
  static int *d_iq3s_map_data = NULL;
63
63
  static uint16_t *d_iq3s_neighbours_data = NULL;
64
64
 
65
+ // Per-thread cached GPU buffers for quantize_tensor.
66
+ // File-scope so quantize_reset() can free them from any thread.
67
+ static thread_local float *g_d_src = NULL;
68
+ static thread_local uint8_t *g_d_dst = NULL;
69
+ static thread_local float *g_d_imatrix = NULL;
70
+ static thread_local size_t g_d_src_cap = 0;
71
+ static thread_local size_t g_d_dst_cap = 0;
72
+ static thread_local size_t g_d_imatrix_cap = 0;
73
+
65
74
  #ifdef __cplusplus
66
75
  extern "C" {
67
76
  #endif
@@ -370,51 +379,45 @@ __declspec(dllexport) size_t quantize_tensor(
370
379
  size_t total_size = row_size * nrows;
371
380
  if (total_size == 0) return 0;
372
381
 
373
- int n_blocks_per_row = get_blocks_per_row(type, n_per_row); // Use thread_local caching to avoid hipMalloc driver leak and overhead on ROCm Windows
374
- static thread_local float *d_src = NULL;
375
- static thread_local uint8_t *d_dst = NULL;
376
- static thread_local float *d_imatrix = NULL;
377
- static thread_local size_t d_src_cap = 0;
378
- static thread_local size_t d_dst_cap = 0;
379
- static thread_local size_t d_imatrix_cap = 0;
382
+ int n_blocks_per_row = get_blocks_per_row(type, n_per_row);
380
383
 
381
384
  size_t src_bytes = (size_t)nrows * n_per_row * sizeof(float);
382
385
  size_t dst_bytes = total_size;
383
386
  size_t imatrix_bytes = imatrix ? src_bytes : 0;
384
387
 
385
- if (src_bytes > d_src_cap) {
386
- if (d_src) hipFree(d_src);
387
- hipError_t e = hipMalloc(&d_src, src_bytes);
388
+ if (src_bytes > g_d_src_cap) {
389
+ if (g_d_src) hipFree(g_d_src);
390
+ hipError_t e = hipMalloc(&g_d_src, src_bytes);
388
391
  if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMalloc(d_src, %zu) failed: %s\n", src_bytes, hipGetErrorString(e)); return 0; }
389
- d_src_cap = src_bytes;
392
+ g_d_src_cap = src_bytes;
390
393
  }
391
- if (dst_bytes > d_dst_cap) {
392
- if (d_dst) hipFree(d_dst);
393
- hipError_t e = hipMalloc(&d_dst, dst_bytes);
394
+ if (dst_bytes > g_d_dst_cap) {
395
+ if (g_d_dst) hipFree(g_d_dst);
396
+ hipError_t e = hipMalloc(&g_d_dst, dst_bytes);
394
397
  if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMalloc(d_dst, %zu) failed: %s\n", dst_bytes, hipGetErrorString(e)); return 0; }
395
- d_dst_cap = dst_bytes;
398
+ g_d_dst_cap = dst_bytes;
396
399
  }
397
- if (imatrix_bytes > d_imatrix_cap) {
398
- if (d_imatrix) hipFree(d_imatrix);
399
- hipError_t e = hipMalloc(&d_imatrix, imatrix_bytes);
400
+ if (imatrix_bytes > g_d_imatrix_cap) {
401
+ if (g_d_imatrix) hipFree(g_d_imatrix);
402
+ hipError_t e = hipMalloc(&g_d_imatrix, imatrix_bytes);
400
403
  if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMalloc(d_imatrix) failed: %s\n", hipGetErrorString(e)); return 0; }
401
- d_imatrix_cap = imatrix_bytes;
404
+ g_d_imatrix_cap = imatrix_bytes;
402
405
  }
403
406
 
404
407
  {
405
- hipError_t e = hipMemcpy(d_src, src, src_bytes, hipMemcpyHostToDevice);
408
+ hipError_t e = hipMemcpy(g_d_src, src, src_bytes, hipMemcpyHostToDevice);
406
409
  if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMemcpy(d_src) failed: %s\n", hipGetErrorString(e)); return 0; }
407
410
  }
408
411
  if (imatrix) {
409
- hipError_t e = hipMemcpy(d_imatrix, imatrix, imatrix_bytes, hipMemcpyHostToDevice);
412
+ hipError_t e = hipMemcpy(g_d_imatrix, imatrix, imatrix_bytes, hipMemcpyHostToDevice);
410
413
  if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMemcpy(d_imatrix) failed: %s\n", hipGetErrorString(e)); return 0; }
411
414
  }
412
415
  {
413
- hipError_t e = hipMemset(d_dst, 0, dst_bytes);
416
+ hipError_t e = hipMemset(g_d_dst, 0, dst_bytes);
414
417
  if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMemset failed: %s\n", hipGetErrorString(e)); return 0; }
415
418
  }
416
419
 
417
- if (!dispatch_quantize_kernel(type, d_src, d_dst, d_imatrix, (int)nrows, (int)n_per_row, n_blocks_per_row)) {
420
+ if (!dispatch_quantize_kernel(type, g_d_src, g_d_dst, g_d_imatrix, (int)nrows, (int)n_per_row, n_blocks_per_row)) {
418
421
  return 0;
419
422
  }
420
423
 
@@ -430,7 +433,7 @@ __declspec(dllexport) size_t quantize_tensor(
430
433
  }
431
434
 
432
435
  {
433
- hipError_t e = hipMemcpy(dst, d_dst, dst_bytes, hipMemcpyDeviceToHost);
436
+ hipError_t e = hipMemcpy(dst, g_d_dst, dst_bytes, hipMemcpyDeviceToHost);
434
437
  if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMemcpy(dst) failed: %s\n", hipGetErrorString(e)); return 0; }
435
438
  }
436
439
 
@@ -564,6 +567,15 @@ __declspec(dllexport) size_t quantize_tensor_fp8_input(
564
567
  return total_size;
565
568
  }
566
569
 
570
+ __declspec(dllexport) void quantize_reset() {
571
+ if (g_d_src) { hipFree(g_d_src); g_d_src = NULL; }
572
+ if (g_d_dst) { hipFree(g_d_dst); g_d_dst = NULL; }
573
+ if (g_d_imatrix) { hipFree(g_d_imatrix); g_d_imatrix = NULL; }
574
+ g_d_src_cap = 0;
575
+ g_d_dst_cap = 0;
576
+ g_d_imatrix_cap = 0;
577
+ }
578
+
567
579
  __declspec(dllexport) int get_device_count() {
568
580
  int count = 0;
569
581
  hipGetDeviceCount(&count);
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hip-quant"
7
- version = "0.2.2"
7
+ version = "0.2.3"
8
8
  description = "HIP/ROCm 7.1 Quantization library for AMD GPUs"
9
9
  authors = [
10
10
  { name="Your Name", email="your.email@example.com" }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes