compressed-tensors 0.9.5a20250425__py3-none-any.whl → 0.9.5a20250428__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/quantization/lifecycle/initialize.py +3 -6
- compressed_tensors/utils/offload.py +40 -1
- compressed_tensors/version.py +1 -1
- {compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/METADATA +1 -1
- {compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/RECORD +8 -8
- {compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/WHEEL +0 -0
- {compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/licenses/LICENSE +0 -0
- {compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/top_level.txt +0 -0
@@ -31,7 +31,7 @@ from compressed_tensors.quantization.quant_scheme import QuantizationScheme
|
|
31
31
|
from compressed_tensors.quantization.utils import is_kv_cache_quant_scheme
|
32
32
|
from compressed_tensors.utils import (
|
33
33
|
disable_hf_hook,
|
34
|
-
|
34
|
+
get_execution_device,
|
35
35
|
register_offload_parameter,
|
36
36
|
)
|
37
37
|
from torch.nn import Module, Parameter
|
@@ -148,11 +148,8 @@ def _initialize_scale_zero_point(
|
|
148
148
|
if quantization_args.dynamic:
|
149
149
|
return
|
150
150
|
|
151
|
-
#
|
152
|
-
|
153
|
-
# if they're going to be immediately offloaded by `register_offload_parameter`
|
154
|
-
params_device = next(module.parameters()).device
|
155
|
-
device = "cpu" if has_offloaded_params(module) else params_device
|
151
|
+
# initialize on execution device to avoid performing quantized ops on cpu
|
152
|
+
device = get_execution_device(module)
|
156
153
|
|
157
154
|
# infer expected scale/zero point shape
|
158
155
|
if quantization_args.strategy == QuantizationStrategy.TOKEN:
|
@@ -28,7 +28,7 @@ Utilities associated with offloading functionality provided by `accelerate`.
|
|
28
28
|
import contextlib
|
29
29
|
import warnings
|
30
30
|
from functools import wraps
|
31
|
-
from typing import Any, Callable, Dict, Literal, Optional, Union
|
31
|
+
from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
|
32
32
|
|
33
33
|
import torch
|
34
34
|
|
@@ -67,6 +67,8 @@ __all__ = [
|
|
67
67
|
"delete_offload_parameter",
|
68
68
|
"has_offloaded_params",
|
69
69
|
"disable_hf_hook",
|
70
|
+
"disable_offload",
|
71
|
+
"align_modules",
|
70
72
|
"align_module_device",
|
71
73
|
]
|
72
74
|
|
@@ -344,6 +346,43 @@ def delete_from_weights_map(
|
|
344
346
|
)
|
345
347
|
|
346
348
|
|
349
|
+
@contextlib.contextmanager
|
350
|
+
def disable_offload(module: torch.nn.Module):
|
351
|
+
"""
|
352
|
+
Context manager to disable module onloading and offloading. Parameters will stay on
|
353
|
+
their current device
|
354
|
+
|
355
|
+
:param module: module to disable offloading for
|
356
|
+
"""
|
357
|
+
if has_offloaded_params(module):
|
358
|
+
module._hf_hook.offload = False
|
359
|
+
yield
|
360
|
+
module._hf_hook.offload = True
|
361
|
+
else:
|
362
|
+
yield
|
363
|
+
|
364
|
+
|
365
|
+
@contextlib.contextmanager
|
366
|
+
def align_modules(
|
367
|
+
modules: Union[torch.nn.Module, Iterable[torch.nn.Module]],
|
368
|
+
execution_device: Optional[torch.device] = None,
|
369
|
+
):
|
370
|
+
"""
|
371
|
+
Context manager for onloading modules to a device, and disabling onload and offload
|
372
|
+
attempts triggered by forward calls. Used for sequential onloading of layers
|
373
|
+
|
374
|
+
:param modules: `torch.nn.Module` or iterable of `torch.nn.Module`s to onload
|
375
|
+
:param execution_device: device to onload to
|
376
|
+
"""
|
377
|
+
modules = (modules,) if isinstance(modules, torch.nn.Module) else modules
|
378
|
+
|
379
|
+
with contextlib.ExitStack() as stack:
|
380
|
+
for module in modules:
|
381
|
+
stack.enter_context(align_module_device(module, execution_device))
|
382
|
+
stack.enter_context(disable_offload(module)) # disable redundant onloading
|
383
|
+
yield
|
384
|
+
|
385
|
+
|
347
386
|
""" Upstreamed Functions """
|
348
387
|
|
349
388
|
|
compressed_tensors/version.py
CHANGED
{compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.5a20250428
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
{compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/RECORD
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
|
2
2
|
compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
|
3
|
-
compressed_tensors/version.py,sha256=
|
3
|
+
compressed_tensors/version.py,sha256=KzQHMtL_lC-Yc3CaKzKPE8g_ZpnqQDe-a-Boi82UqKY,521
|
4
4
|
compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
|
5
5
|
compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
|
6
6
|
compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
|
@@ -33,20 +33,20 @@ compressed_tensors/quantization/lifecycle/apply.py,sha256=OR-6QmN9pFRGteYMBAatu2
|
|
33
33
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
|
34
34
|
compressed_tensors/quantization/lifecycle/forward.py,sha256=DOWouUqfaLA4Qhg-ojVVBdhhSAlgZqFC26vZARxE0ko,12961
|
35
35
|
compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
|
36
|
-
compressed_tensors/quantization/lifecycle/initialize.py,sha256=
|
36
|
+
compressed_tensors/quantization/lifecycle/initialize.py,sha256=7bqFwozBdw5hkJ7pwavJUweJ4nqsaZGtXYMQTb6WJcQ,7767
|
37
37
|
compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
|
38
38
|
compressed_tensors/quantization/utils/helpers.py,sha256=-wX0H7zVysJ67jRRCGbx6BfxbMU_1sqffTf5YUIpPiU,14391
|
39
39
|
compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
|
40
40
|
compressed_tensors/registry/registry.py,sha256=vRcjVB1ITfSbfYUaGndBBmqhip_5vsS62weorVg0iXo,11896
|
41
41
|
compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
|
42
42
|
compressed_tensors/utils/helpers.py,sha256=RrNvzD08naEjEiXdU-FdZjQVda1nQywu1hA_GCDj0vg,10415
|
43
|
-
compressed_tensors/utils/offload.py,sha256=
|
43
|
+
compressed_tensors/utils/offload.py,sha256=JNQ66_6vhSsizhlUaMgyEdBuFolYxbgUuT1mAZrCfKY,15436
|
44
44
|
compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
|
45
45
|
compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
|
46
46
|
compressed_tensors/utils/safetensors_load.py,sha256=rwj0ufU5561ScWDoCG7tzLBRDtiykNno2Iq4PM_JA7E,11499
|
47
47
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
48
|
-
compressed_tensors-0.9.
|
49
|
-
compressed_tensors-0.9.
|
50
|
-
compressed_tensors-0.9.
|
51
|
-
compressed_tensors-0.9.
|
52
|
-
compressed_tensors-0.9.
|
48
|
+
compressed_tensors-0.9.5a20250428.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
49
|
+
compressed_tensors-0.9.5a20250428.dist-info/METADATA,sha256=eLy89co_VEi2up7wSW_5HntVVqtokiWBAoXa1rJ2TGQ,7004
|
50
|
+
compressed_tensors-0.9.5a20250428.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
|
51
|
+
compressed_tensors-0.9.5a20250428.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
52
|
+
compressed_tensors-0.9.5a20250428.dist-info/RECORD,,
|
{compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|