compressed-tensors 0.10.2a20250612__py3-none-any.whl → 0.10.2a20250613__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -59,7 +59,7 @@ class HadamardFactory(TransformFactory):
59
59
  return HadamardTransform(weight, args)
60
60
 
61
61
  def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
62
- data = deterministic_hadamard_matrix(size)
62
+ data = deterministic_hadamard_matrix(size, dtype, device)
63
63
  data = data.to(dtype=dtype, device=device)
64
64
  return Parameter(data, requires_grad=self.scheme.requires_grad)
65
65
 
@@ -29,6 +29,6 @@ class RandomHadamardFactory(HadamardFactory):
29
29
  """
30
30
 
31
31
  def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
32
- data = random_hadamard_matrix(size, self.generator)
32
+ data = random_hadamard_matrix(size, dtype, device, self.generator)
33
33
  data = data.to(dtype=dtype, device=device)
34
34
  return Parameter(data, requires_grad=self.scheme.requires_grad)
@@ -13,95 +13,133 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import math
16
- from typing import Optional, Tuple
16
+ from pathlib import Path
17
+ from typing import Optional
17
18
 
18
- import numpy
19
19
  import torch
20
+ from safetensors import safe_open
20
21
 
21
22
 
22
- __all__ = ["random_hadamard_matrix", "deterministic_hadamard_matrix"]
23
+ REPO_PATH = Path(__file__).parent / "hadamards.safetensors"
23
24
 
24
- # adapted from:
25
- # https://github.com/scipy/scipy/blob/v1.15.2/scipy/linalg/_special_matrices.py
26
- def deterministic_hadamard_matrix(size: int) -> torch.Tensor:
25
+
26
+ __all__ = ["random_hadamard_matrix", "deterministic_hadamard_matrix", "is_pow2"]
27
+
28
+
29
+ # note that hadamard matrix multiplication can be accelerated using a library such as
30
+ # https://github.com/Dao-AILab/fast-hadamard-transform/tree/master
31
+
32
+
33
+ def deterministic_hadamard_matrix(
34
+ size: int,
35
+ dtype: torch.dtype = torch.bfloat16,
36
+ device: torch.device = torch.device("cpu"),
37
+ ) -> torch.Tensor:
27
38
  """
28
39
  Construct an n-by-n Hadamard matrix, using Sylvester's construction.
29
40
  `n` must be a power of 2.
30
41
 
42
+ Adapated from https://github.com/scipy/scipy/blob/v1.15.2/scipy/linalg/_special_matrices.py # noqa: E501
43
+
31
44
  :param size: order of the matrix, must be a power of 2
45
+ :param dtype: data type of matrix
46
+ :param device: device to construct matrix on
32
47
  :return: hadamard matrix of size `size`
33
48
  """
34
49
  if size <= 0:
35
50
  raise ValueError("Cannot construct deterministic hadamard of size <= 0")
36
51
 
37
- log2 = int(math.log(size, 2))
52
+ log2 = int(math.log2(size))
38
53
  if size != 2**log2:
39
54
  raise ValueError("Cannot construct deterministic hadamard of size != 2^n")
40
55
 
41
- H = numpy.array([[1]], dtype=int)
56
+ H = torch.tensor([[1]], dtype=dtype, device=device)
42
57
 
43
58
  # Sylvester's construction
44
- for i in range(0, log2):
45
- H = numpy.vstack((numpy.hstack((H, H)), numpy.hstack((H, -H))))
46
-
47
- return torch.from_numpy(H / math.sqrt(size))
59
+ for _ in range(log2):
60
+ H = torch.vstack((torch.hstack((H, H)), torch.hstack((H, -H))))
48
61
 
49
-
50
- # adapted from:
51
- # https://github.com/facebookresearch/SpinQuant/blob/main/utils/hadamard_utils.py
52
-
53
- # TODO: the following library exists for online rotations and should be considered
54
- # in the future:
55
- # https://github.com/Dao-AILab/fast-hadamard-transform/tree/master
62
+ return H / math.sqrt(size)
56
63
 
57
64
 
58
65
  def random_hadamard_matrix(
59
- size: int, gen: Optional[torch.Generator] = None
66
+ size: int,
67
+ dtype: torch.dtype = torch.bfloat16,
68
+ device: torch.device = torch.device("cpu"),
69
+ gen: Optional[torch.Generator] = None,
60
70
  ) -> torch.Tensor:
61
71
  """
62
- Produces a randomly generated Hadamard matrix.
63
- See https://cornell-relaxml.github.io/quip-sharp/ ,
64
- Section "Randomized Hadamard Transformation"
72
+ Produces a randomly generated Hadamard matrix. Differs from
73
+ `deterministic_hadamard_matrix` in that this function supports non powers of 2
74
+ and randomization using a seeded generator
75
+
76
+ Adapated from https://github.com/facebookresearch/SpinQuant/blob/main/utils/hadamard_utils.py # noqa: E501
77
+ Known matrices were retrieved from N. J. A. Sloane's Library of Hadamard Matrices http://www.neilsloane.com/hadamard/ # noqa: E501
65
78
 
66
79
  :param size: The dimension of the hamadard matrix
80
+ :param dtype: data type of matrix
81
+ :param device: device to construct matrix on
67
82
  :param gen: Optional generator random values
68
83
  :return: randomly generated hadamard matrix
69
84
  """
70
- # Benefits: support other shapes / non powers of 2, support randomization
71
- Q = torch.randint(low=0, high=2, size=(size,), generator=gen, dtype=torch.float64)
85
+ Q = torch.randint(low=0, high=2, size=(size,), generator=gen, dtype=dtype) # cpu
86
+ Q = Q.to(device=device)
72
87
  Q = Q * 2 - 1
73
88
  Q = torch.diag(Q)
74
89
  return _matmul_hadU(Q) / math.sqrt(size)
75
90
 
76
91
 
77
- def _get_hadK(n: int, transpose: bool = False) -> Tuple[torch.Tensor, int]:
78
- # NOTE: we can easily extend the list of supported shapes/sizes
79
- # by adding to these methods
80
- hadK, K = None, None
81
- if n % 20 == 0:
82
- assert _is_pow2(n // 20)
83
- K = 20
84
- hadK = _get_had20().T if transpose else _get_had20()
85
- elif n % 12 == 0:
86
- assert _is_pow2(n // 12)
87
- K = 12
88
- hadK = _get_had12().T if transpose else _get_had12()
89
- else:
90
- assert _is_pow2(n)
91
- K = 1
92
+ def is_pow2(n: int) -> bool:
93
+ """
94
+ Check if a number is a power of 2
92
95
 
93
- return hadK, K
96
+ :param n: number to check
97
+ :return: True iff `n` is a power of 2
98
+ """
99
+ return n > 0 and (n & (n - 1) == 0)
100
+
101
+
102
+ def _fetch_hadamard_divisor(
103
+ n: int,
104
+ dtype: torch.dtype,
105
+ device: torch.device = torch.device("cpu"),
106
+ file_path: str = REPO_PATH,
107
+ ) -> Optional[torch.Tensor]:
108
+ """
109
+ Fetch a known hadamard matrix from the given file path. The returned matrix will
110
+ be of of size `k` such that `n / k` is a power of two. Return None if no such
111
+ matrix exists.
94
112
 
113
+ Note: This function reopens the safetensors file every time it is called.
114
+ This is technically inefficient, but a very small runtime cost and simpler
115
+ than forcing callers to manage the file open context
116
+
117
+ :param n: size of known hadamard matrix
118
+ :return: a known hadamard matrix of size `n` if one exists, else None
119
+ """
120
+ with safe_open(file_path, framework="pt", device=str(device)) as file:
121
+ divisors = sorted((int(key) for key in file.keys()), reverse=True)
122
+ for divisor in divisors:
123
+ if n % divisor == 0 and is_pow2(n // divisor):
124
+ return file.get_tensor(str(divisor)).to(dtype=dtype)
125
+
126
+ return None
127
+
128
+
129
+ def _matmul_hadU(X: torch.Tensor) -> torch.Tensor:
130
+ size = X.size(0)
131
+ dtype = X.dtype
132
+ device = X.device
95
133
 
96
- def _matmul_hadU(X, transpose=False) -> torch.Tensor:
97
- n = X.shape[-1]
98
134
  # Check if we have the determined hadamard matrix
99
- hadK, K = _get_hadK(n, transpose)
135
+ hadK = _fetch_hadamard_divisor(size, dtype, device=device)
136
+ if hadK is None:
137
+ raise ValueError(f"Cannot construct random hadamard matrix of size {size}")
138
+ K = hadK.size(0)
139
+
100
140
  # Reshape diag matrix with randomized -1/+1
101
- input = X.clone().view(-1, n, 1)
141
+ input = X.clone().view(-1, size, 1)
102
142
  output = input.clone()
103
-
104
- # for cases when hadK is not predetermined, determine hadamard matrix
105
143
  while input.shape[1] > K:
106
144
  input = input.view(input.shape[0], input.shape[1] // 2, 2, input.shape[2])
107
145
  output = output.view(input.shape)
@@ -109,53 +147,14 @@ def _matmul_hadU(X, transpose=False) -> torch.Tensor:
109
147
  output[:, :, 1, :] = input[:, :, 0, :] - input[:, :, 1, :]
110
148
  output = output.view(input.shape[0], input.shape[1], -1)
111
149
  (input, output) = (output, input)
150
+ assert input.shape[1] == K
112
151
  del output
113
152
 
114
- # K == 1 when hadK is None; this happens when the size dim (n)
115
- # is not comaptible with any of the maintained hadamard matrices
116
-
117
- if K > 1:
118
- # Do not explicitly repeat - OOM
119
- # input = torch.bmm(
120
- # hadK.repeat(len(input), 1, 1).to(input.device).to(input.dtype), input)
121
- # Use bcast instead
122
-
123
- # for cases when hadK is pre-determined
124
- input = hadK.view(1, K, K).to(input) @ input
153
+ # Do not explicitly repeat - OOM
154
+ # input = torch.bmm(
155
+ # hadK.repeat(len(input), 1, 1).to(input.device).to(input.dtype), input)
156
+ # Use bcast instead
157
+ input = hadK.view(1, K, K).to(input) @ input
125
158
 
126
159
  # normalize
127
160
  return input.view(X.shape)
128
-
129
-
130
- def _is_pow2(n: int) -> bool:
131
- return (n & (n - 1) == 0) and (n > 0)
132
-
133
-
134
- def _reshape_bits(packed_bits: numpy.ndarray, original_size: int) -> numpy.ndarray:
135
- had_unpacked = numpy.unpackbits(packed_bits)
136
- had_unpacked = [1 if x == 1 else -1 for x in had_unpacked]
137
- had_unpacked = numpy.array(had_unpacked).reshape((original_size, original_size))
138
- return had_unpacked
139
-
140
-
141
- # http://www.neilsloane.com/hadamard/index.html
142
- def _get_had12() -> torch.Tensor:
143
- # fmt: off
144
- had_12 = numpy.array([128, 13, 29, 232, 235, 71, 218,
145
- 62, 209, 246, 139, 180, 157, 168, 237, 199, 106, 59], dtype=numpy.uint8)
146
- # fmt: on
147
- # TODO: just unpack during apply
148
- had_12_unpacked = _reshape_bits(had_12, original_size=12)
149
- return torch.tensor(had_12_unpacked)
150
-
151
-
152
- def _get_had20() -> torch.Tensor:
153
- # fmt: off
154
- had_20 = numpy.array([128, 0, 13, 133, 121, 236, 43, 203, 97, 94, 155, 10, 252,
155
- 216, 87, 230, 194, 191, 54, 21, 249, 176, 171, 205, 133, 222, 108, 42, 243,
156
- 97, 215, 155, 10, 188, 216, 149, 230, 200, 175, 54, 133, 121, 188, 43,
157
- 205, 225, 94, 107, 10, 243], dtype=numpy.uint8)
158
- # fmt: on
159
- # TODO: just unpack during apply
160
- had_20_unpacked = _reshape_bits(had_20, original_size=20)
161
- return torch.tensor(had_20_unpacked)
@@ -31,9 +31,10 @@ import contextlib
31
31
  import warnings
32
32
  from functools import wraps
33
33
  from operator import attrgetter
34
- from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
34
+ from typing import Any, Callable, Dict, Iterable, Literal, Optional, Tuple, Union
35
35
 
36
36
  import torch
37
+ from compressed_tensors.utils import patch_attr
37
38
 
38
39
 
39
40
  try:
@@ -83,6 +84,7 @@ __all__ = [
83
84
  "register_offload_module",
84
85
  "delete_offload_module",
85
86
  "offloaded_dispatch",
87
+ "disable_offloading",
86
88
  ]
87
89
 
88
90
 
@@ -214,7 +216,7 @@ def register_offload_parameter(
214
216
  def update_offload_parameter(
215
217
  module: torch.nn.Module,
216
218
  name: str,
217
- data: Optional[torch.Tensor],
219
+ data: torch.Tensor,
218
220
  offload_device: Optional[Union[torch.device, Literal["disk"]]] = None,
219
221
  ):
220
222
  """
@@ -227,7 +229,7 @@ def update_offload_parameter(
227
229
  :param offload_device: device on which weight will be offloaded to. If None is
228
230
  provided, then infer device from parameters on module
229
231
  """
230
- param = getattr(module, name)
232
+ param: torch.nn.Parameter = getattr(module, name)
231
233
  if param.data.shape != data.shape:
232
234
  warnings.warn(
233
235
  f"Shape of parameter being updated {param.data.shape} does not match shape "
@@ -235,7 +237,7 @@ def update_offload_parameter(
235
237
  )
236
238
 
237
239
  # copy data into onloaded parameter if applicable
238
- if param.device != torch.device("meta"):
240
+ if param.device != torch.device("meta") and data is not param.data:
239
241
  param.data.copy_(data)
240
242
 
241
243
  # update offload dict
@@ -501,7 +503,9 @@ def offloaded_dispatch(
501
503
  raise NotImplementedError("Disk offloading is not currently supported")
502
504
 
503
505
  # create weights map
504
- weights_map = OffloadedWeightsLoader(state_dict=module.state_dict(), device="cpu")
506
+ state_dict = module.state_dict()
507
+ state_dict = {key: val.to(offload_device) for key, val in state_dict.items()}
508
+ weights_map = OffloadedWeightsLoader(state_dict=state_dict, device=offload_device)
505
509
 
506
510
  # create tied params map
507
511
  tied_params = find_tied_parameters(module)
@@ -522,6 +526,36 @@ def offloaded_dispatch(
522
526
  return module
523
527
 
524
528
 
529
+ @contextlib.contextmanager
530
+ def disable_offloading():
531
+ """
532
+ Keep modules onloaded and disable offloading until this context exits.
533
+ Affects modules which have been hooked with accelerate's `AlignDevicesHook`
534
+ """
535
+ original_pre_forward = AlignDevicesHook.pre_forward
536
+ onloaded_modules: Dict[torch.nn.Module, Tuple[AlignDevicesHook, bool]] = dict()
537
+
538
+ # onload once and disable any future onloading/offloading steps
539
+ def keep_onload_pre_forward(self: AlignDevicesHook, module, *args, **kwargs):
540
+ ret = original_pre_forward(self, module, *args, **kwargs)
541
+ if module not in onloaded_modules:
542
+ onloaded_modules[module] = (self, self.offload)
543
+ self.offload = False
544
+ return ret
545
+
546
+ # use the patched pre_forward function within the context
547
+ with patch_attr(AlignDevicesHook, "pre_forward", keep_onload_pre_forward):
548
+ yield
549
+
550
+ # manually offload all modules that were onloaded
551
+ # update any parameters which may have changed
552
+ for module, (hook, offload) in onloaded_modules.items():
553
+ hook.offload = offload
554
+ for name, param in module.named_parameters():
555
+ update_offload_parameter(module, name, param.data)
556
+ hook.post_forward(module, None)
557
+
558
+
525
559
  """ Upstreamed Functions """
526
560
 
527
561
 
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.2.a20250612'
20
+ __version__ = version = '0.10.2.a20250613'
21
21
  __version_tuple__ = version_tuple = (0, 10, 2)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.2a20250612
3
+ Version: 0.10.2a20250613
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,6 +1,6 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=F2izwCTRKbiv1mAW6qD3TbJD5cXQrz4zRmew4qZ4Ud0,523
3
+ compressed_tensors/version.py,sha256=W2dIoBkBkOOTKcVgQ7KVgwm7EtQxgrkm_57h8wJ40X0,523
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -45,21 +45,22 @@ compressed_tensors/transform/transform_config.py,sha256=6JA8VFcoz4EGHOev6thj51Ou
45
45
  compressed_tensors/transform/transform_scheme.py,sha256=c7NAuLDL0itFgUfBMNShegMI9bzKL7s4LR3QJTHsXLs,1733
46
46
  compressed_tensors/transform/factory/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
47
47
  compressed_tensors/transform/factory/base.py,sha256=yVrYWEnrr2RFWE5AjSNeXzO9aXc443dTNMVSxuLztz8,5940
48
- compressed_tensors/transform/factory/hadamard.py,sha256=tuFVpKsv__SeDj-QwKxtipLvjb993DOSIFvWcUh42Ww,3124
48
+ compressed_tensors/transform/factory/hadamard.py,sha256=zkq6w8uJXRLokUXajAkFb2fJrH0K3SL6qrR2dARrAr8,3139
49
49
  compressed_tensors/transform/factory/matrix_multiply.py,sha256=0g4sYC_tOmCjOomae2gl54UTXiFdl0mCCkmbqIRX8yw,3613
50
- compressed_tensors/transform/factory/random_hadamard.py,sha256=6kqr9z6kFc-2qRNskhWRsLGTDT_NfNAkFcTLMqQJcWA,1484
50
+ compressed_tensors/transform/factory/random_hadamard.py,sha256=TFInxbHslqREOFFiy_mpR88eEYXQnslxXmyh-ZbN-MU,1499
51
51
  compressed_tensors/transform/utils/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
52
- compressed_tensors/transform/utils/hadamard.py,sha256=SmPZmnHtc5N36gJA5EbM1T65uf4w1_flgl7SWBeg_W8,5642
52
+ compressed_tensors/transform/utils/hadamard.py,sha256=U27Kvo-eDebKcVt8oXTSIAaQ5DvPQj9tDv2hdXHCPPQ,5584
53
+ compressed_tensors/transform/utils/hadamards.safetensors,sha256=mFd1GzNodGG-ifA1IoH-0nHYzfraCOvrq_dX2zFI1B4,1436901
53
54
  compressed_tensors/transform/utils/utils.py,sha256=PRPTYwPs2nnNaQMq2GEbC4QYKHFKlZwaRyPgdDhl66g,2992
54
55
  compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
55
56
  compressed_tensors/utils/helpers.py,sha256=cPg-ikdeA92aIGwBONg8GmPNvcGlFhozyJVwsRiXBTA,11981
56
- compressed_tensors/utils/offload.py,sha256=myV7iC75gA8A3BGgwR3uoeaJkIC9oigKp9CcqsHsVJc,20686
57
+ compressed_tensors/utils/offload.py,sha256=57TvfCPUYG81q0yyCOWRABaIsg5qIuOrXMI1mpRCLMM,22172
57
58
  compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
58
59
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
59
60
  compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
60
61
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
61
- compressed_tensors-0.10.2a20250612.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
62
- compressed_tensors-0.10.2a20250612.dist-info/METADATA,sha256=541wdYU5905X69fwti-7pubCIzjsENQnbOxpJt4X2qQ,7005
63
- compressed_tensors-0.10.2a20250612.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
64
- compressed_tensors-0.10.2a20250612.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
65
- compressed_tensors-0.10.2a20250612.dist-info/RECORD,,
62
+ compressed_tensors-0.10.2a20250613.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
+ compressed_tensors-0.10.2a20250613.dist-info/METADATA,sha256=E2m2_QlCeFFDbi6cfe4Uf13f7xaF-84jVzfuzywN2No,7005
64
+ compressed_tensors-0.10.2a20250613.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
65
+ compressed_tensors-0.10.2a20250613.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
66
+ compressed_tensors-0.10.2a20250613.dist-info/RECORD,,