compressed-tensors 0.9.5a20250424__py3-none-any.whl → 0.9.5a20250428__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,7 +31,7 @@ from compressed_tensors.quantization.quant_scheme import QuantizationScheme
31
31
  from compressed_tensors.quantization.utils import is_kv_cache_quant_scheme
32
32
  from compressed_tensors.utils import (
33
33
  disable_hf_hook,
34
- has_offloaded_params,
34
+ get_execution_device,
35
35
  register_offload_parameter,
36
36
  )
37
37
  from torch.nn import Module, Parameter
@@ -148,11 +148,8 @@ def _initialize_scale_zero_point(
148
148
  if quantization_args.dynamic:
149
149
  return
150
150
 
151
- # begin on the same device as other parameters or cpu if offloaded.
152
- # in the offloaded case, there's no point moving tensors to the execution device
153
- # if they're going to be immediately offloaded by `register_offload_parameter`
154
- params_device = next(module.parameters()).device
155
- device = "cpu" if has_offloaded_params(module) else params_device
151
+ # initialize on execution device to avoid performing quantized ops on cpu
152
+ device = get_execution_device(module)
156
153
 
157
154
  # infer expected scale/zero point shape
158
155
  if quantization_args.strategy == QuantizationStrategy.TOKEN:
@@ -28,7 +28,7 @@ Utilities associated with offloading functionality provided by `accelerate`.
28
28
  import contextlib
29
29
  import warnings
30
30
  from functools import wraps
31
- from typing import Any, Callable, Dict, Literal, Optional, Union
31
+ from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
32
32
 
33
33
  import torch
34
34
 
@@ -67,6 +67,8 @@ __all__ = [
67
67
  "delete_offload_parameter",
68
68
  "has_offloaded_params",
69
69
  "disable_hf_hook",
70
+ "disable_offload",
71
+ "align_modules",
70
72
  "align_module_device",
71
73
  ]
72
74
 
@@ -344,6 +346,43 @@ def delete_from_weights_map(
344
346
  )
345
347
 
346
348
 
349
+ @contextlib.contextmanager
350
+ def disable_offload(module: torch.nn.Module):
351
+ """
352
+ Context manager to disable module onloading and offloading. Parameters will stay on
353
+ their current device
354
+
355
+ :param module: module to disable offloading for
356
+ """
357
+ if has_offloaded_params(module):
358
+ module._hf_hook.offload = False
359
+ yield
360
+ module._hf_hook.offload = True
361
+ else:
362
+ yield
363
+
364
+
365
+ @contextlib.contextmanager
366
+ def align_modules(
367
+ modules: Union[torch.nn.Module, Iterable[torch.nn.Module]],
368
+ execution_device: Optional[torch.device] = None,
369
+ ):
370
+ """
371
+ Context manager for onloading modules to a device, and disabling onload and offload
372
+ attempts triggered by forward calls. Used for sequential onloading of layers
373
+
374
+ :param modules: `torch.nn.Module` or iterable of `torch.nn.Module`s to onload
375
+ :param execution_device: device to onload to
376
+ """
377
+ modules = (modules,) if isinstance(modules, torch.nn.Module) else modules
378
+
379
+ with contextlib.ExitStack() as stack:
380
+ for module in modules:
381
+ stack.enter_context(align_module_device(module, execution_device))
382
+ stack.enter_context(disable_offload(module)) # disable redundant onloading
383
+ yield
384
+
385
+
347
386
  """ Upstreamed Functions """
348
387
 
349
388
 
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.9.5.a20250424'
20
+ __version__ = version = '0.9.5.a20250428'
21
21
  __version_tuple__ = version_tuple = (0, 9, 5)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.5a20250424
3
+ Version: 0.9.5a20250428
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,6 +1,6 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=fMpLfUNedNFTmTmQeHxGZnMaXAKOKiqpI9xyx46F2gI,521
3
+ compressed_tensors/version.py,sha256=KzQHMtL_lC-Yc3CaKzKPE8g_ZpnqQDe-a-Boi82UqKY,521
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -33,20 +33,20 @@ compressed_tensors/quantization/lifecycle/apply.py,sha256=OR-6QmN9pFRGteYMBAatu2
33
33
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
34
34
  compressed_tensors/quantization/lifecycle/forward.py,sha256=DOWouUqfaLA4Qhg-ojVVBdhhSAlgZqFC26vZARxE0ko,12961
35
35
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
36
- compressed_tensors/quantization/lifecycle/initialize.py,sha256=SY4-FJWpVSupQjuvy7rrIc0pFYU9cRL5Lo1KyfUSvoU,8010
36
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=7bqFwozBdw5hkJ7pwavJUweJ4nqsaZGtXYMQTb6WJcQ,7767
37
37
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
38
38
  compressed_tensors/quantization/utils/helpers.py,sha256=-wX0H7zVysJ67jRRCGbx6BfxbMU_1sqffTf5YUIpPiU,14391
39
39
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
40
40
  compressed_tensors/registry/registry.py,sha256=vRcjVB1ITfSbfYUaGndBBmqhip_5vsS62weorVg0iXo,11896
41
41
  compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
42
42
  compressed_tensors/utils/helpers.py,sha256=RrNvzD08naEjEiXdU-FdZjQVda1nQywu1hA_GCDj0vg,10415
43
- compressed_tensors/utils/offload.py,sha256=Fmb4jBJhH5OdSQFaecFSHK_UreSyZdynEkadZ_oKcvM,14153
43
+ compressed_tensors/utils/offload.py,sha256=JNQ66_6vhSsizhlUaMgyEdBuFolYxbgUuT1mAZrCfKY,15436
44
44
  compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
45
45
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
46
46
  compressed_tensors/utils/safetensors_load.py,sha256=rwj0ufU5561ScWDoCG7tzLBRDtiykNno2Iq4PM_JA7E,11499
47
47
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
48
- compressed_tensors-0.9.5a20250424.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
- compressed_tensors-0.9.5a20250424.dist-info/METADATA,sha256=P0oAhrS28ZU90nUEi9yjIu3CE-968yZTsTLTx1Uj1nM,7004
50
- compressed_tensors-0.9.5a20250424.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
51
- compressed_tensors-0.9.5a20250424.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
52
- compressed_tensors-0.9.5a20250424.dist-info/RECORD,,
48
+ compressed_tensors-0.9.5a20250428.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
+ compressed_tensors-0.9.5a20250428.dist-info/METADATA,sha256=eLy89co_VEi2up7wSW_5HntVVqtokiWBAoXa1rJ2TGQ,7004
50
+ compressed_tensors-0.9.5a20250428.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
51
+ compressed_tensors-0.9.5a20250428.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
52
+ compressed_tensors-0.9.5a20250428.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.1)
2
+ Generator: setuptools (80.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5