keras-nightly 3.12.0.dev2025083103__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras/__init__.py +1 -0
- keras/_tf_keras/keras/__init__.py +1 -0
- keras/_tf_keras/keras/callbacks/__init__.py +3 -0
- keras/_tf_keras/keras/distillation/__init__.py +16 -0
- keras/_tf_keras/keras/distribution/__init__.py +3 -0
- keras/_tf_keras/keras/dtype_policies/__init__.py +6 -0
- keras/_tf_keras/keras/layers/__init__.py +21 -0
- keras/_tf_keras/keras/ops/__init__.py +16 -0
- keras/_tf_keras/keras/ops/image/__init__.py +1 -0
- keras/_tf_keras/keras/ops/linalg/__init__.py +1 -0
- keras/_tf_keras/keras/ops/nn/__init__.py +3 -0
- keras/_tf_keras/keras/ops/numpy/__init__.py +12 -0
- keras/_tf_keras/keras/quantizers/__init__.py +13 -0
- keras/callbacks/__init__.py +3 -0
- keras/distillation/__init__.py +16 -0
- keras/distribution/__init__.py +3 -0
- keras/dtype_policies/__init__.py +6 -0
- keras/layers/__init__.py +21 -0
- keras/ops/__init__.py +16 -0
- keras/ops/image/__init__.py +1 -0
- keras/ops/linalg/__init__.py +1 -0
- keras/ops/nn/__init__.py +3 -0
- keras/ops/numpy/__init__.py +12 -0
- keras/quantizers/__init__.py +13 -0
- keras/src/applications/imagenet_utils.py +4 -1
- keras/src/backend/common/backend_utils.py +30 -6
- keras/src/backend/common/dtypes.py +6 -12
- keras/src/backend/common/name_scope.py +2 -1
- keras/src/backend/common/variables.py +38 -20
- keras/src/backend/jax/core.py +126 -78
- keras/src/backend/jax/distribution_lib.py +16 -2
- keras/src/backend/jax/layer.py +3 -1
- keras/src/backend/jax/linalg.py +4 -0
- keras/src/backend/jax/nn.py +511 -29
- keras/src/backend/jax/numpy.py +109 -23
- keras/src/backend/jax/optimizer.py +3 -2
- keras/src/backend/jax/trainer.py +18 -3
- keras/src/backend/numpy/linalg.py +4 -0
- keras/src/backend/numpy/nn.py +313 -2
- keras/src/backend/numpy/numpy.py +97 -8
- keras/src/backend/openvino/__init__.py +1 -0
- keras/src/backend/openvino/core.py +6 -23
- keras/src/backend/openvino/linalg.py +4 -0
- keras/src/backend/openvino/nn.py +271 -20
- keras/src/backend/openvino/numpy.py +1369 -195
- keras/src/backend/openvino/random.py +7 -14
- keras/src/backend/tensorflow/layer.py +43 -9
- keras/src/backend/tensorflow/linalg.py +24 -0
- keras/src/backend/tensorflow/nn.py +545 -1
- keras/src/backend/tensorflow/numpy.py +351 -56
- keras/src/backend/tensorflow/trainer.py +6 -2
- keras/src/backend/torch/core.py +3 -1
- keras/src/backend/torch/linalg.py +4 -0
- keras/src/backend/torch/nn.py +125 -0
- keras/src/backend/torch/numpy.py +109 -9
- keras/src/backend/torch/trainer.py +8 -2
- keras/src/callbacks/__init__.py +1 -0
- keras/src/callbacks/callback_list.py +45 -11
- keras/src/callbacks/model_checkpoint.py +5 -0
- keras/src/callbacks/orbax_checkpoint.py +332 -0
- keras/src/callbacks/terminate_on_nan.py +54 -5
- keras/src/datasets/cifar10.py +5 -0
- keras/src/distillation/__init__.py +1 -0
- keras/src/distillation/distillation_loss.py +390 -0
- keras/src/distillation/distiller.py +598 -0
- keras/src/distribution/distribution_lib.py +14 -0
- keras/src/dtype_policies/__init__.py +4 -0
- keras/src/dtype_policies/dtype_policy.py +180 -1
- keras/src/export/__init__.py +2 -0
- keras/src/export/export_utils.py +39 -2
- keras/src/export/litert.py +248 -0
- keras/src/export/onnx.py +6 -0
- keras/src/export/openvino.py +1 -1
- keras/src/export/tf2onnx_lib.py +3 -0
- keras/src/layers/__init__.py +13 -0
- keras/src/layers/activations/softmax.py +9 -4
- keras/src/layers/attention/attention.py +1 -1
- keras/src/layers/attention/multi_head_attention.py +4 -1
- keras/src/layers/core/dense.py +406 -102
- keras/src/layers/core/einsum_dense.py +521 -116
- keras/src/layers/core/embedding.py +257 -99
- keras/src/layers/core/input_layer.py +1 -0
- keras/src/layers/core/reversible_embedding.py +399 -0
- keras/src/layers/input_spec.py +17 -17
- keras/src/layers/layer.py +50 -15
- keras/src/layers/merging/concatenate.py +6 -5
- keras/src/layers/merging/dot.py +4 -1
- keras/src/layers/pooling/adaptive_average_pooling1d.py +65 -0
- keras/src/layers/pooling/adaptive_average_pooling2d.py +62 -0
- keras/src/layers/pooling/adaptive_average_pooling3d.py +63 -0
- keras/src/layers/pooling/adaptive_max_pooling1d.py +65 -0
- keras/src/layers/pooling/adaptive_max_pooling2d.py +62 -0
- keras/src/layers/pooling/adaptive_max_pooling3d.py +63 -0
- keras/src/layers/pooling/base_adaptive_pooling.py +63 -0
- keras/src/layers/preprocessing/discretization.py +6 -5
- keras/src/layers/preprocessing/feature_space.py +8 -4
- keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
- keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/validation.py +5 -5
- keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
- keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
- keras/src/layers/preprocessing/index_lookup.py +19 -1
- keras/src/layers/preprocessing/normalization.py +16 -1
- keras/src/layers/preprocessing/string_lookup.py +26 -28
- keras/src/layers/regularization/dropout.py +43 -1
- keras/src/layers/rnn/gru.py +1 -1
- keras/src/layers/rnn/lstm.py +2 -2
- keras/src/layers/rnn/rnn.py +19 -0
- keras/src/layers/rnn/simple_rnn.py +1 -1
- keras/src/legacy/preprocessing/image.py +4 -1
- keras/src/legacy/preprocessing/sequence.py +20 -12
- keras/src/losses/loss.py +1 -1
- keras/src/losses/losses.py +24 -0
- keras/src/metrics/confusion_metrics.py +7 -6
- keras/src/models/cloning.py +4 -0
- keras/src/models/functional.py +11 -3
- keras/src/models/model.py +195 -44
- keras/src/ops/image.py +257 -20
- keras/src/ops/linalg.py +93 -0
- keras/src/ops/nn.py +268 -2
- keras/src/ops/numpy.py +701 -44
- keras/src/ops/operation.py +90 -29
- keras/src/ops/operation_utils.py +2 -0
- keras/src/optimizers/adafactor.py +29 -10
- keras/src/optimizers/base_optimizer.py +22 -3
- keras/src/optimizers/loss_scale_optimizer.py +51 -18
- keras/src/optimizers/muon.py +65 -31
- keras/src/optimizers/schedules/learning_rate_schedule.py +4 -3
- keras/src/quantizers/__init__.py +14 -1
- keras/src/quantizers/awq.py +361 -0
- keras/src/quantizers/awq_config.py +140 -0
- keras/src/quantizers/awq_core.py +217 -0
- keras/src/quantizers/gptq.py +346 -207
- keras/src/quantizers/gptq_config.py +63 -13
- keras/src/quantizers/gptq_core.py +328 -215
- keras/src/quantizers/quantization_config.py +246 -0
- keras/src/quantizers/quantizers.py +407 -38
- keras/src/quantizers/utils.py +23 -0
- keras/src/random/seed_generator.py +6 -4
- keras/src/saving/file_editor.py +81 -6
- keras/src/saving/orbax_util.py +26 -0
- keras/src/saving/saving_api.py +37 -14
- keras/src/saving/saving_lib.py +1 -1
- keras/src/testing/__init__.py +1 -0
- keras/src/testing/test_case.py +45 -5
- keras/src/trainers/compile_utils.py +38 -17
- keras/src/trainers/data_adapters/grain_dataset_adapter.py +1 -5
- keras/src/tree/torchtree_impl.py +215 -0
- keras/src/tree/tree_api.py +6 -1
- keras/src/utils/backend_utils.py +31 -4
- keras/src/utils/dataset_utils.py +234 -35
- keras/src/utils/file_utils.py +49 -11
- keras/src/utils/image_utils.py +14 -2
- keras/src/utils/jax_layer.py +244 -55
- keras/src/utils/module_utils.py +29 -0
- keras/src/utils/progbar.py +10 -12
- keras/src/utils/python_utils.py +5 -0
- keras/src/utils/rng_utils.py +9 -1
- keras/src/utils/tracking.py +70 -5
- keras/src/version.py +1 -1
- {keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/METADATA +16 -6
- {keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/RECORD +163 -142
- keras/src/quantizers/gptq_quant.py +0 -133
- {keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/WHEEL +0 -0
- {keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
from absl import logging
|
|
2
|
-
|
|
3
1
|
from keras.src.api_export import keras_export
|
|
4
|
-
from keras.src.quantizers.
|
|
2
|
+
from keras.src.quantizers.quantization_config import QuantizationConfig
|
|
5
3
|
|
|
6
4
|
|
|
7
5
|
@keras_export("keras.quantizers.GPTQConfig")
|
|
8
|
-
class GPTQConfig:
|
|
6
|
+
class GPTQConfig(QuantizationConfig):
|
|
9
7
|
"""Configuration class for the GPTQ (Gradient-based Post-Training
|
|
10
8
|
Quantization) algorithm.
|
|
11
9
|
|
|
@@ -134,36 +132,88 @@ class GPTQConfig:
|
|
|
134
132
|
activation_order: (bool, optional) If `True`, reorders weight columns
|
|
135
133
|
based on activation magnitude, which can improve quantization
|
|
136
134
|
accuracy. Defaults to `False`.
|
|
135
|
+
quantization_layer_structure: (dict, optional) A dictionary defining the
|
|
136
|
+
model's quantization structure. It should contain:
|
|
137
|
+
- "pre_block_layers": list of layers to run before the first block.
|
|
138
|
+
- "sequential_blocks": list of blocks to be quantized sequentially.
|
|
139
|
+
If not provided, the model must implement
|
|
140
|
+
`get_quantization_layer_structure`.
|
|
137
141
|
"""
|
|
138
142
|
|
|
139
143
|
def __init__(
|
|
140
144
|
self,
|
|
141
145
|
dataset,
|
|
142
146
|
tokenizer,
|
|
147
|
+
*,
|
|
143
148
|
weight_bits: int = 4,
|
|
144
149
|
num_samples: int = 128,
|
|
150
|
+
per_channel: bool = True,
|
|
145
151
|
sequence_length: int = 512,
|
|
146
152
|
hessian_damping: float = 0.01,
|
|
147
153
|
group_size: int = 128,
|
|
148
154
|
symmetric: bool = False,
|
|
149
155
|
activation_order: bool = False,
|
|
156
|
+
quantization_layer_structure: dict = None,
|
|
150
157
|
):
|
|
158
|
+
super().__init__()
|
|
159
|
+
if weight_bits not in [2, 3, 4, 8]:
|
|
160
|
+
raise ValueError(
|
|
161
|
+
f"Unsupported weight_bits {weight_bits}. "
|
|
162
|
+
"Supported values are 2, 3, 4, and 8."
|
|
163
|
+
)
|
|
164
|
+
if num_samples <= 0:
|
|
165
|
+
raise ValueError("num_samples must be a positive integer.")
|
|
166
|
+
if sequence_length <= 0:
|
|
167
|
+
raise ValueError("sequence_length must be a positive integer.")
|
|
168
|
+
if hessian_damping < 0 or hessian_damping > 1:
|
|
169
|
+
raise ValueError("hessian_damping must be between 0 and 1.")
|
|
170
|
+
if group_size < -1 or group_size == 0:
|
|
171
|
+
raise ValueError(
|
|
172
|
+
"Invalid group_size. Supported values are -1 (whole-tensor) "
|
|
173
|
+
"or a positive integer, "
|
|
174
|
+
f"but got {group_size}."
|
|
175
|
+
)
|
|
151
176
|
self.dataset = dataset
|
|
152
177
|
self.tokenizer = tokenizer
|
|
153
178
|
self.num_samples = num_samples
|
|
179
|
+
self.per_channel = per_channel
|
|
154
180
|
self.sequence_length = sequence_length
|
|
155
181
|
self.hessian_damping = hessian_damping
|
|
156
182
|
self.weight_bits = weight_bits
|
|
157
183
|
self.group_size = group_size
|
|
158
184
|
self.symmetric = symmetric
|
|
159
185
|
self.activation_order = activation_order
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
186
|
+
self.quantization_layer_structure = quantization_layer_structure
|
|
187
|
+
|
|
188
|
+
def get_config(self):
|
|
189
|
+
return {
|
|
190
|
+
# Dataset and Tokenizer are only required for a one-time
|
|
191
|
+
# calibration and are not saved in the config.
|
|
192
|
+
"dataset": None,
|
|
193
|
+
"tokenizer": None,
|
|
194
|
+
"weight_bits": self.weight_bits,
|
|
195
|
+
"num_samples": self.num_samples,
|
|
196
|
+
"per_channel": self.per_channel,
|
|
197
|
+
"sequence_length": self.sequence_length,
|
|
198
|
+
"hessian_damping": self.hessian_damping,
|
|
199
|
+
"group_size": self.group_size,
|
|
200
|
+
"symmetric": self.symmetric,
|
|
201
|
+
"activation_order": self.activation_order,
|
|
202
|
+
"quantization_layer_structure": self.quantization_layer_structure,
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
@classmethod
|
|
206
|
+
def from_config(cls, config):
|
|
207
|
+
return cls(**config)
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
def mode(self):
|
|
211
|
+
return "gptq"
|
|
212
|
+
|
|
213
|
+
def dtype_policy_string(self):
|
|
214
|
+
"""Returns the dtype policy string for this configuration.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
A string representing the dtype policy, e.g. "gptq_4bit".
|
|
165
218
|
"""
|
|
166
|
-
|
|
167
|
-
# The core logic is now delegated to gptqutils, which will handle
|
|
168
|
-
# the dynamic imports and data loading.
|
|
169
|
-
quantize_model(model=model, config=self)
|
|
219
|
+
return f"gptq/{self.weight_bits}/{self.group_size}"
|