compressed-tensors 0.10.2a20250616__py3-none-any.whl → 0.10.2a20250620__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/utils/offload.py +42 -11
- compressed_tensors/version.py +1 -1
- {compressed_tensors-0.10.2a20250616.dist-info → compressed_tensors-0.10.2a20250620.dist-info}/METADATA +1 -1
- {compressed_tensors-0.10.2a20250616.dist-info → compressed_tensors-0.10.2a20250620.dist-info}/RECORD +7 -7
- {compressed_tensors-0.10.2a20250616.dist-info → compressed_tensors-0.10.2a20250620.dist-info}/WHEEL +0 -0
- {compressed_tensors-0.10.2a20250616.dist-info → compressed_tensors-0.10.2a20250620.dist-info}/licenses/LICENSE +0 -0
- {compressed_tensors-0.10.2a20250616.dist-info → compressed_tensors-0.10.2a20250620.dist-info}/top_level.txt +0 -0
@@ -85,6 +85,7 @@ __all__ = [
|
|
85
85
|
"delete_offload_module",
|
86
86
|
"offloaded_dispatch",
|
87
87
|
"disable_offloading",
|
88
|
+
"remove_dispatch",
|
88
89
|
]
|
89
90
|
|
90
91
|
|
@@ -170,22 +171,22 @@ def update_parameter_data(
|
|
170
171
|
|
171
172
|
def get_execution_device(module: torch.nn.Module) -> torch.device:
|
172
173
|
"""
|
173
|
-
Get the device which inputs should be moved to before module execution
|
174
|
+
Get the device which inputs should be moved to before module execution.
|
175
|
+
Assume that modules execute in the same order as returned by `model.modules()`
|
174
176
|
|
175
177
|
:param module: module to check, may be offloaded
|
176
178
|
:return: onload device of module
|
177
179
|
"""
|
178
|
-
|
179
|
-
|
180
|
+
for submodule in module.modules():
|
181
|
+
if has_offloaded_params(submodule):
|
182
|
+
return submodule._hf_hook.execution_device
|
180
183
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
f"Unable able to infer execution device of {module}, falling back to CPU"
|
185
|
-
)
|
186
|
-
return torch.device("cpu")
|
184
|
+
param = next(submodule.parameters(recurse=False), None)
|
185
|
+
if param is not None:
|
186
|
+
return param.device
|
187
187
|
|
188
|
-
|
188
|
+
warnings.warn(f"Unable to get execution device of {module}, falling back to CPU")
|
189
|
+
return torch.device("cpu")
|
189
190
|
|
190
191
|
|
191
192
|
def register_offload_parameter(
|
@@ -514,6 +515,9 @@ def offloaded_dispatch(
|
|
514
515
|
if offload_device == "disk":
|
515
516
|
raise NotImplementedError("Disk offloading is not currently supported")
|
516
517
|
|
518
|
+
# remove any existing hooks
|
519
|
+
remove_dispatch(module)
|
520
|
+
|
517
521
|
# create weights map
|
518
522
|
state_dict = module.state_dict()
|
519
523
|
state_dict = {key: val.to(offload_device) for key, val in state_dict.items()}
|
@@ -535,6 +539,33 @@ def offloaded_dispatch(
|
|
535
539
|
weights_map=weights_map,
|
536
540
|
tied_params_map=tied_params_map,
|
537
541
|
)
|
542
|
+
|
543
|
+
# when saving a model, `PretrainedModel.save_pretrained` will only
|
544
|
+
# onload weights if the following requirements are met
|
545
|
+
# if (
|
546
|
+
# hasattr(self, "hf_device_map")
|
547
|
+
# and len(set(self.hf_device_map.values())) > 1
|
548
|
+
# and ("cpu" in self.hf_device_map.values()
|
549
|
+
# or "disk" in self.hf_device_map.values())
|
550
|
+
# ):
|
551
|
+
# because this function always offloads, disregard actual devices and
|
552
|
+
# always use `cpu` and `cuda:0` to guarantee this condition passes
|
553
|
+
setattr(module, "hf_device_map", {"fake_offload": "cpu", "fake_exec": "cuda:0"})
|
554
|
+
|
555
|
+
return module
|
556
|
+
|
557
|
+
|
558
|
+
def remove_dispatch(module: torch.nn.Module) -> torch.nn.Module:
|
559
|
+
"""
|
560
|
+
Remove any existing dispatches from module
|
561
|
+
|
562
|
+
:param module: module which may be dispatched with hf hooks
|
563
|
+
:return: module without dispatch
|
564
|
+
"""
|
565
|
+
remove_hook_from_module(module, recurse=True)
|
566
|
+
if hasattr(module, "hf_device_map"):
|
567
|
+
delattr(module, "hf_device_map")
|
568
|
+
|
538
569
|
return module
|
539
570
|
|
540
571
|
|
@@ -563,7 +594,7 @@ def disable_offloading():
|
|
563
594
|
# update any parameters which may have changed
|
564
595
|
for module, (hook, offload) in onloaded_modules.items():
|
565
596
|
hook.offload = offload
|
566
|
-
for name, param in module.named_parameters():
|
597
|
+
for name, param in module.named_parameters(recurse=False):
|
567
598
|
update_offload_parameter(module, name, param.data)
|
568
599
|
hook.post_forward(module, None)
|
569
600
|
|
compressed_tensors/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.2a20250620
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
{compressed_tensors-0.10.2a20250616.dist-info → compressed_tensors-0.10.2a20250620.dist-info}/RECORD
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
|
2
2
|
compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
|
3
|
-
compressed_tensors/version.py,sha256=
|
3
|
+
compressed_tensors/version.py,sha256=fdaxTQXKxDCy4sL7AzY19gLcpfD2GYpzFhuqxROV3Bo,523
|
4
4
|
compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
|
5
5
|
compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
|
6
6
|
compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
|
@@ -54,13 +54,13 @@ compressed_tensors/transform/utils/hadamards.safetensors,sha256=mFd1GzNodGG-ifA1
|
|
54
54
|
compressed_tensors/transform/utils/utils.py,sha256=PRPTYwPs2nnNaQMq2GEbC4QYKHFKlZwaRyPgdDhl66g,2992
|
55
55
|
compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
|
56
56
|
compressed_tensors/utils/helpers.py,sha256=cPg-ikdeA92aIGwBONg8GmPNvcGlFhozyJVwsRiXBTA,11981
|
57
|
-
compressed_tensors/utils/offload.py,sha256=
|
57
|
+
compressed_tensors/utils/offload.py,sha256=3XiBuWbUkBAt8v1t5i57qDcbB3VJQs_FDeayi-JzIWg,23896
|
58
58
|
compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
|
59
59
|
compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
|
60
60
|
compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
|
61
61
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
62
|
-
compressed_tensors-0.10.
|
63
|
-
compressed_tensors-0.10.
|
64
|
-
compressed_tensors-0.10.
|
65
|
-
compressed_tensors-0.10.
|
66
|
-
compressed_tensors-0.10.
|
62
|
+
compressed_tensors-0.10.2a20250620.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
63
|
+
compressed_tensors-0.10.2a20250620.dist-info/METADATA,sha256=hc0YFMpgLrIwHZUEvRbpaShjSDbczmVgRm4pEDst6QM,7005
|
64
|
+
compressed_tensors-0.10.2a20250620.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
65
|
+
compressed_tensors-0.10.2a20250620.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
66
|
+
compressed_tensors-0.10.2a20250620.dist-info/RECORD,,
|
{compressed_tensors-0.10.2a20250616.dist-info → compressed_tensors-0.10.2a20250620.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|