hip-quant 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hip_quant-0.2.2/hip_quant.egg-info → hip_quant-0.2.3}/PKG-INFO +1 -1
- {hip_quant-0.2.2 → hip_quant-0.2.3}/__init__.py +2 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3/hip_quant.egg-info}/PKG-INFO +1 -1
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quantize.cpp +36 -24
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quantize.dll +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/pyproject.toml +1 -1
- {hip_quant-0.2.2 → hip_quant-0.2.3}/MANIFEST.in +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/README.md +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/build.ps1 +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_iquant_util.h +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant.egg-info/SOURCES.txt +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant.egg-info/dependency_links.txt +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant.egg-info/requires.txt +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant.egg-info/top_level.txt +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_iq1s_data.h +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_iq2xs_data.h +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_iq2xxs_data.h +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_iq3s_data.h +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_iq3xxs_data.h +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_types.h +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/hip_quant_util.h +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/fp8_expand.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_f8_e4m3.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq1_s.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq2_xs.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq2_xxs.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq3_s.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq3_xxs.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq4_nl.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_iq4_xs.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q2_K.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q3_K.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q4_0.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q4_1.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q4_K.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q5_0.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q5_1.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q5_K.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q6_K.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q8_0.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_q8_1.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_tq1_0.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/kernels/quant_tq2_0.cu +0 -0
- {hip_quant-0.2.2 → hip_quant-0.2.3}/setup.cfg +0 -0
|
@@ -128,6 +128,8 @@ class HipQuant:
|
|
|
128
128
|
self._dll.get_device_name.restype = ctypes.c_char_p
|
|
129
129
|
self._dll.get_device_count.restype = ctypes.c_int
|
|
130
130
|
self._dll.get_device_count.argtypes = []
|
|
131
|
+
self._dll.quantize_reset.restype = None
|
|
132
|
+
self._dll.quantize_reset.argtypes = []
|
|
131
133
|
|
|
132
134
|
@property
|
|
133
135
|
def device_count(self):
|
|
@@ -62,6 +62,15 @@ static int8_t *d_iq3s_grid_data = NULL;
|
|
|
62
62
|
static int *d_iq3s_map_data = NULL;
|
|
63
63
|
static uint16_t *d_iq3s_neighbours_data = NULL;
|
|
64
64
|
|
|
65
|
+
// Per-thread cached GPU buffers for quantize_tensor.
|
|
66
|
+
// File-scope so quantize_reset() can free them from any thread.
|
|
67
|
+
static thread_local float *g_d_src = NULL;
|
|
68
|
+
static thread_local uint8_t *g_d_dst = NULL;
|
|
69
|
+
static thread_local float *g_d_imatrix = NULL;
|
|
70
|
+
static thread_local size_t g_d_src_cap = 0;
|
|
71
|
+
static thread_local size_t g_d_dst_cap = 0;
|
|
72
|
+
static thread_local size_t g_d_imatrix_cap = 0;
|
|
73
|
+
|
|
65
74
|
#ifdef __cplusplus
|
|
66
75
|
extern "C" {
|
|
67
76
|
#endif
|
|
@@ -370,51 +379,45 @@ __declspec(dllexport) size_t quantize_tensor(
|
|
|
370
379
|
size_t total_size = row_size * nrows;
|
|
371
380
|
if (total_size == 0) return 0;
|
|
372
381
|
|
|
373
|
-
int n_blocks_per_row = get_blocks_per_row(type, n_per_row);
|
|
374
|
-
static thread_local float *d_src = NULL;
|
|
375
|
-
static thread_local uint8_t *d_dst = NULL;
|
|
376
|
-
static thread_local float *d_imatrix = NULL;
|
|
377
|
-
static thread_local size_t d_src_cap = 0;
|
|
378
|
-
static thread_local size_t d_dst_cap = 0;
|
|
379
|
-
static thread_local size_t d_imatrix_cap = 0;
|
|
382
|
+
int n_blocks_per_row = get_blocks_per_row(type, n_per_row);
|
|
380
383
|
|
|
381
384
|
size_t src_bytes = (size_t)nrows * n_per_row * sizeof(float);
|
|
382
385
|
size_t dst_bytes = total_size;
|
|
383
386
|
size_t imatrix_bytes = imatrix ? src_bytes : 0;
|
|
384
387
|
|
|
385
|
-
if (src_bytes >
|
|
386
|
-
if (
|
|
387
|
-
hipError_t e = hipMalloc(&
|
|
388
|
+
if (src_bytes > g_d_src_cap) {
|
|
389
|
+
if (g_d_src) hipFree(g_d_src);
|
|
390
|
+
hipError_t e = hipMalloc(&g_d_src, src_bytes);
|
|
388
391
|
if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMalloc(d_src, %zu) failed: %s\n", src_bytes, hipGetErrorString(e)); return 0; }
|
|
389
|
-
|
|
392
|
+
g_d_src_cap = src_bytes;
|
|
390
393
|
}
|
|
391
|
-
if (dst_bytes >
|
|
392
|
-
if (
|
|
393
|
-
hipError_t e = hipMalloc(&
|
|
394
|
+
if (dst_bytes > g_d_dst_cap) {
|
|
395
|
+
if (g_d_dst) hipFree(g_d_dst);
|
|
396
|
+
hipError_t e = hipMalloc(&g_d_dst, dst_bytes);
|
|
394
397
|
if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMalloc(d_dst, %zu) failed: %s\n", dst_bytes, hipGetErrorString(e)); return 0; }
|
|
395
|
-
|
|
398
|
+
g_d_dst_cap = dst_bytes;
|
|
396
399
|
}
|
|
397
|
-
if (imatrix_bytes >
|
|
398
|
-
if (
|
|
399
|
-
hipError_t e = hipMalloc(&
|
|
400
|
+
if (imatrix_bytes > g_d_imatrix_cap) {
|
|
401
|
+
if (g_d_imatrix) hipFree(g_d_imatrix);
|
|
402
|
+
hipError_t e = hipMalloc(&g_d_imatrix, imatrix_bytes);
|
|
400
403
|
if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMalloc(d_imatrix) failed: %s\n", hipGetErrorString(e)); return 0; }
|
|
401
|
-
|
|
404
|
+
g_d_imatrix_cap = imatrix_bytes;
|
|
402
405
|
}
|
|
403
406
|
|
|
404
407
|
{
|
|
405
|
-
hipError_t e = hipMemcpy(
|
|
408
|
+
hipError_t e = hipMemcpy(g_d_src, src, src_bytes, hipMemcpyHostToDevice);
|
|
406
409
|
if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMemcpy(d_src) failed: %s\n", hipGetErrorString(e)); return 0; }
|
|
407
410
|
}
|
|
408
411
|
if (imatrix) {
|
|
409
|
-
hipError_t e = hipMemcpy(
|
|
412
|
+
hipError_t e = hipMemcpy(g_d_imatrix, imatrix, imatrix_bytes, hipMemcpyHostToDevice);
|
|
410
413
|
if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMemcpy(d_imatrix) failed: %s\n", hipGetErrorString(e)); return 0; }
|
|
411
414
|
}
|
|
412
415
|
{
|
|
413
|
-
hipError_t e = hipMemset(
|
|
416
|
+
hipError_t e = hipMemset(g_d_dst, 0, dst_bytes);
|
|
414
417
|
if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMemset failed: %s\n", hipGetErrorString(e)); return 0; }
|
|
415
418
|
}
|
|
416
419
|
|
|
417
|
-
if (!dispatch_quantize_kernel(type,
|
|
420
|
+
if (!dispatch_quantize_kernel(type, g_d_src, g_d_dst, g_d_imatrix, (int)nrows, (int)n_per_row, n_blocks_per_row)) {
|
|
418
421
|
return 0;
|
|
419
422
|
}
|
|
420
423
|
|
|
@@ -430,7 +433,7 @@ __declspec(dllexport) size_t quantize_tensor(
|
|
|
430
433
|
}
|
|
431
434
|
|
|
432
435
|
{
|
|
433
|
-
hipError_t e = hipMemcpy(dst,
|
|
436
|
+
hipError_t e = hipMemcpy(dst, g_d_dst, dst_bytes, hipMemcpyDeviceToHost);
|
|
434
437
|
if (e != hipSuccess) { fprintf(stderr, "hip_quantize: hipMemcpy(dst) failed: %s\n", hipGetErrorString(e)); return 0; }
|
|
435
438
|
}
|
|
436
439
|
|
|
@@ -564,6 +567,15 @@ __declspec(dllexport) size_t quantize_tensor_fp8_input(
|
|
|
564
567
|
return total_size;
|
|
565
568
|
}
|
|
566
569
|
|
|
570
|
+
__declspec(dllexport) void quantize_reset() {
|
|
571
|
+
if (g_d_src) { hipFree(g_d_src); g_d_src = NULL; }
|
|
572
|
+
if (g_d_dst) { hipFree(g_d_dst); g_d_dst = NULL; }
|
|
573
|
+
if (g_d_imatrix) { hipFree(g_d_imatrix); g_d_imatrix = NULL; }
|
|
574
|
+
g_d_src_cap = 0;
|
|
575
|
+
g_d_dst_cap = 0;
|
|
576
|
+
g_d_imatrix_cap = 0;
|
|
577
|
+
}
|
|
578
|
+
|
|
567
579
|
__declspec(dllexport) int get_device_count() {
|
|
568
580
|
int count = 0;
|
|
569
581
|
hipGetDeviceCount(&count);
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|