compressed-tensors 0.10.2a20250616__py3-none-any.whl → 0.10.2a20250620__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,6 +85,7 @@ __all__ = [
85
85
  "delete_offload_module",
86
86
  "offloaded_dispatch",
87
87
  "disable_offloading",
88
+ "remove_dispatch",
88
89
  ]
89
90
 
90
91
 
@@ -170,22 +171,22 @@ def update_parameter_data(
170
171
 
171
172
  def get_execution_device(module: torch.nn.Module) -> torch.device:
172
173
  """
173
- Get the device which inputs should be moved to before module execution
174
+ Get the device which inputs should be moved to before module execution.
175
+ Assume that modules execute in the same order as returned by `model.modules()`
174
176
 
175
177
  :param module: module to check, may be offloaded
176
178
  :return: onload device of module
177
179
  """
178
- if has_offloaded_params(module):
179
- return module._hf_hook.execution_device
180
+ for submodule in module.modules():
181
+ if has_offloaded_params(submodule):
182
+ return submodule._hf_hook.execution_device
180
183
 
181
- first_param = next(module.parameters(), None)
182
- if first_param is None:
183
- warnings.warn(
184
- f"Unable able to infer execution device of {module}, falling back to CPU"
185
- )
186
- return torch.device("cpu")
184
+ param = next(submodule.parameters(recurse=False), None)
185
+ if param is not None:
186
+ return param.device
187
187
 
188
- return first_param.device
188
+ warnings.warn(f"Unable to get execution device of {module}, falling back to CPU")
189
+ return torch.device("cpu")
189
190
 
190
191
 
191
192
  def register_offload_parameter(
@@ -514,6 +515,9 @@ def offloaded_dispatch(
514
515
  if offload_device == "disk":
515
516
  raise NotImplementedError("Disk offloading is not currently supported")
516
517
 
518
+ # remove any existing hooks
519
+ remove_dispatch(module)
520
+
517
521
  # create weights map
518
522
  state_dict = module.state_dict()
519
523
  state_dict = {key: val.to(offload_device) for key, val in state_dict.items()}
@@ -535,6 +539,33 @@ def offloaded_dispatch(
535
539
  weights_map=weights_map,
536
540
  tied_params_map=tied_params_map,
537
541
  )
542
+
543
+ # when saving a model, `PretrainedModel.save_pretrained` will only
544
+ # onload weights if the following requirements are met
545
+ # if (
546
+ # hasattr(self, "hf_device_map")
547
+ # and len(set(self.hf_device_map.values())) > 1
548
+ # and ("cpu" in self.hf_device_map.values()
549
+ # or "disk" in self.hf_device_map.values())
550
+ # ):
551
+ # because this function always offloads, disregard actual devices and
552
+ # always use `cpu` and `cuda:0` to guarantee this condition passes
553
+ setattr(module, "hf_device_map", {"fake_offload": "cpu", "fake_exec": "cuda:0"})
554
+
555
+ return module
556
+
557
+
558
+ def remove_dispatch(module: torch.nn.Module) -> torch.nn.Module:
559
+ """
560
+ Remove any existing dispatches from module
561
+
562
+ :param module: module which may be dispatched with hf hooks
563
+ :return: module without dispatch
564
+ """
565
+ remove_hook_from_module(module, recurse=True)
566
+ if hasattr(module, "hf_device_map"):
567
+ delattr(module, "hf_device_map")
568
+
538
569
  return module
539
570
 
540
571
 
@@ -563,7 +594,7 @@ def disable_offloading():
563
594
  # update any parameters which may have changed
564
595
  for module, (hook, offload) in onloaded_modules.items():
565
596
  hook.offload = offload
566
- for name, param in module.named_parameters():
597
+ for name, param in module.named_parameters(recurse=False):
567
598
  update_offload_parameter(module, name, param.data)
568
599
  hook.post_forward(module, None)
569
600
 
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.2.a20250616'
20
+ __version__ = version = '0.10.2.a20250620'
21
21
  __version_tuple__ = version_tuple = (0, 10, 2)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.2a20250616
3
+ Version: 0.10.2a20250620
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,6 +1,6 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=luzxcDdA8EA_-n05GudjzinnLHUkR5eQCjyTTNLxDnw,523
3
+ compressed_tensors/version.py,sha256=fdaxTQXKxDCy4sL7AzY19gLcpfD2GYpzFhuqxROV3Bo,523
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -54,13 +54,13 @@ compressed_tensors/transform/utils/hadamards.safetensors,sha256=mFd1GzNodGG-ifA1
54
54
  compressed_tensors/transform/utils/utils.py,sha256=PRPTYwPs2nnNaQMq2GEbC4QYKHFKlZwaRyPgdDhl66g,2992
55
55
  compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
56
56
  compressed_tensors/utils/helpers.py,sha256=cPg-ikdeA92aIGwBONg8GmPNvcGlFhozyJVwsRiXBTA,11981
57
- compressed_tensors/utils/offload.py,sha256=11k8p3y_0WVbrozYsrPjJb_4Ln8YkeO_4Q8W4ibFVlc,22710
57
+ compressed_tensors/utils/offload.py,sha256=3XiBuWbUkBAt8v1t5i57qDcbB3VJQs_FDeayi-JzIWg,23896
58
58
  compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
59
59
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
60
60
  compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
61
61
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
62
- compressed_tensors-0.10.2a20250616.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
- compressed_tensors-0.10.2a20250616.dist-info/METADATA,sha256=V_L8-0yZM5t7FG0knLacaaIAHNTuEmTe1f7CqmDCAXg,7005
64
- compressed_tensors-0.10.2a20250616.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
65
- compressed_tensors-0.10.2a20250616.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
66
- compressed_tensors-0.10.2a20250616.dist-info/RECORD,,
62
+ compressed_tensors-0.10.2a20250620.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
+ compressed_tensors-0.10.2a20250620.dist-info/METADATA,sha256=hc0YFMpgLrIwHZUEvRbpaShjSDbczmVgRm4pEDst6QM,7005
64
+ compressed_tensors-0.10.2a20250620.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
65
+ compressed_tensors-0.10.2a20250620.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
66
+ compressed_tensors-0.10.2a20250620.dist-info/RECORD,,