keras-hub-nightly 0.23.0.dev202508260411__py3-none-any.whl → 0.23.0.dev202508280418__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/layers/__init__.py +6 -0
- keras_hub/models/__init__.py +21 -0
- keras_hub/src/layers/modeling/position_embedding.py +21 -6
- keras_hub/src/layers/modeling/rotary_embedding.py +16 -6
- keras_hub/src/layers/modeling/sine_position_encoding.py +21 -8
- keras_hub/src/layers/modeling/token_and_position_embedding.py +2 -1
- keras_hub/src/models/backbone.py +10 -15
- keras_hub/src/models/d_fine/__init__.py +0 -0
- keras_hub/src/models/d_fine/d_fine_attention.py +461 -0
- keras_hub/src/models/d_fine/d_fine_backbone.py +891 -0
- keras_hub/src/models/d_fine/d_fine_decoder.py +944 -0
- keras_hub/src/models/d_fine/d_fine_encoder.py +365 -0
- keras_hub/src/models/d_fine/d_fine_hybrid_encoder.py +642 -0
- keras_hub/src/models/d_fine/d_fine_image_converter.py +8 -0
- keras_hub/src/models/d_fine/d_fine_layers.py +1828 -0
- keras_hub/src/models/d_fine/d_fine_loss.py +938 -0
- keras_hub/src/models/d_fine/d_fine_object_detector.py +875 -0
- keras_hub/src/models/d_fine/d_fine_object_detector_preprocessor.py +14 -0
- keras_hub/src/models/d_fine/d_fine_presets.py +2 -0
- keras_hub/src/models/d_fine/d_fine_utils.py +827 -0
- keras_hub/src/models/hgnetv2/hgnetv2_backbone.py +4 -1
- keras_hub/src/models/hgnetv2/hgnetv2_encoder.py +3 -2
- keras_hub/src/models/hgnetv2/hgnetv2_layers.py +27 -11
- keras_hub/src/models/parseq/__init__.py +0 -0
- keras_hub/src/models/parseq/parseq_backbone.py +134 -0
- keras_hub/src/models/parseq/parseq_causal_lm.py +466 -0
- keras_hub/src/models/parseq/parseq_causal_lm_preprocessor.py +168 -0
- keras_hub/src/models/parseq/parseq_decoder.py +418 -0
- keras_hub/src/models/parseq/parseq_image_converter.py +8 -0
- keras_hub/src/models/parseq/parseq_tokenizer.py +221 -0
- keras_hub/src/tests/test_case.py +37 -1
- keras_hub/src/utils/preset_utils.py +49 -0
- keras_hub/src/utils/tensor_utils.py +23 -1
- keras_hub/src/utils/transformers/convert_vit.py +4 -1
- keras_hub/src/version.py +1 -1
- keras_hub/tokenizers/__init__.py +3 -0
- {keras_hub_nightly-0.23.0.dev202508260411.dist-info → keras_hub_nightly-0.23.0.dev202508280418.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.23.0.dev202508260411.dist-info → keras_hub_nightly-0.23.0.dev202508280418.dist-info}/RECORD +40 -20
- {keras_hub_nightly-0.23.0.dev202508260411.dist-info → keras_hub_nightly-0.23.0.dev202508280418.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.23.0.dev202508260411.dist-info → keras_hub_nightly-0.23.0.dev202508280418.dist-info}/top_level.txt +0 -0
keras_hub/layers/__init__.py
CHANGED
@@ -75,6 +75,9 @@ from keras_hub.src.models.clip.clip_image_converter import (
|
|
75
75
|
from keras_hub.src.models.cspnet.cspnet_image_converter import (
|
76
76
|
CSPNetImageConverter as CSPNetImageConverter,
|
77
77
|
)
|
78
|
+
from keras_hub.src.models.d_fine.d_fine_image_converter import (
|
79
|
+
DFineImageConverter as DFineImageConverter,
|
80
|
+
)
|
78
81
|
from keras_hub.src.models.deeplab_v3.deeplab_v3_image_converter import (
|
79
82
|
DeepLabV3ImageConverter as DeepLabV3ImageConverter,
|
80
83
|
)
|
@@ -108,6 +111,9 @@ from keras_hub.src.models.moonshine.moonshine_audio_converter import (
|
|
108
111
|
from keras_hub.src.models.pali_gemma.pali_gemma_image_converter import (
|
109
112
|
PaliGemmaImageConverter as PaliGemmaImageConverter,
|
110
113
|
)
|
114
|
+
from keras_hub.src.models.parseq.parseq_image_converter import (
|
115
|
+
PARSeqImageConverter as PARSeqImageConverter,
|
116
|
+
)
|
111
117
|
from keras_hub.src.models.resnet.resnet_image_converter import (
|
112
118
|
ResNetImageConverter as ResNetImageConverter,
|
113
119
|
)
|
keras_hub/models/__init__.py
CHANGED
@@ -108,6 +108,15 @@ from keras_hub.src.models.cspnet.cspnet_image_classifier import (
|
|
108
108
|
from keras_hub.src.models.cspnet.cspnet_image_classifier_preprocessor import (
|
109
109
|
CSPNetImageClassifierPreprocessor as CSPNetImageClassifierPreprocessor,
|
110
110
|
)
|
111
|
+
from keras_hub.src.models.d_fine.d_fine_backbone import (
|
112
|
+
DFineBackbone as DFineBackbone,
|
113
|
+
)
|
114
|
+
from keras_hub.src.models.d_fine.d_fine_object_detector import (
|
115
|
+
DFineObjectDetector as DFineObjectDetector,
|
116
|
+
)
|
117
|
+
from keras_hub.src.models.d_fine.d_fine_object_detector_preprocessor import (
|
118
|
+
DFineObjectDetectorPreprocessor as DFineObjectDetectorPreprocessor,
|
119
|
+
)
|
111
120
|
from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
|
112
121
|
DebertaV3Backbone as DebertaV3Backbone,
|
113
122
|
)
|
@@ -446,6 +455,18 @@ from keras_hub.src.models.pali_gemma.pali_gemma_causal_lm_preprocessor import (
|
|
446
455
|
from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import (
|
447
456
|
PaliGemmaTokenizer as PaliGemmaTokenizer,
|
448
457
|
)
|
458
|
+
from keras_hub.src.models.parseq.parseq_backbone import (
|
459
|
+
PARSeqBackbone as PARSeqBackbone,
|
460
|
+
)
|
461
|
+
from keras_hub.src.models.parseq.parseq_causal_lm import (
|
462
|
+
PARSeqCausalLM as PARSeqCausalLM,
|
463
|
+
)
|
464
|
+
from keras_hub.src.models.parseq.parseq_causal_lm_preprocessor import (
|
465
|
+
PARSeqCausalLMPreprocessor as PARSeqCausalLMPreprocessor,
|
466
|
+
)
|
467
|
+
from keras_hub.src.models.parseq.parseq_tokenizer import (
|
468
|
+
PARSeqTokenizer as PARSeqTokenizer,
|
469
|
+
)
|
449
470
|
from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone as Phi3Backbone
|
450
471
|
from keras_hub.src.models.phi3.phi3_causal_lm import (
|
451
472
|
Phi3CausalLM as Phi3CausalLM,
|
@@ -31,6 +31,11 @@ class PositionEmbedding(keras.layers.Layer):
|
|
31
31
|
start_index: An integer or integer tensor. The starting position to
|
32
32
|
compute the position embedding from. This is useful during cached
|
33
33
|
decoding, where each position is predicted separately in a loop.
|
34
|
+
positions: Tensor of shape `(sequence_length,)` or
|
35
|
+
`(batch_size, sequence_length)`. Custom positions for the input
|
36
|
+
sequence. If specified, this tensor will be used to
|
37
|
+
compute the position embedding, and the `start_index` argument will
|
38
|
+
be ignored. This is useful for cases with non-standard positions.
|
34
39
|
|
35
40
|
Example:
|
36
41
|
|
@@ -91,18 +96,28 @@ class PositionEmbedding(keras.layers.Layer):
|
|
91
96
|
)
|
92
97
|
self.built = True
|
93
98
|
|
94
|
-
def call(self, inputs, start_index=0):
|
99
|
+
def call(self, inputs, start_index=0, positions=None):
|
95
100
|
shape = ops.shape(inputs)
|
96
101
|
feature_length = shape[-1]
|
97
102
|
sequence_length = shape[-2]
|
98
103
|
# trim to match the length of the input sequence, which might be less
|
99
104
|
# than the sequence_length of the layer.
|
100
105
|
position_embeddings = ops.convert_to_tensor(self.position_embeddings)
|
101
|
-
|
102
|
-
position_embeddings
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
+
if positions is None:
|
107
|
+
position_embeddings = ops.slice(
|
108
|
+
position_embeddings,
|
109
|
+
(start_index, 0),
|
110
|
+
(sequence_length, feature_length),
|
111
|
+
)
|
112
|
+
else:
|
113
|
+
# Take care of unbatched `positions`.
|
114
|
+
if len(ops.shape(positions)) == 1:
|
115
|
+
positions = ops.expand_dims(positions, axis=0)
|
116
|
+
|
117
|
+
position_embeddings = ops.take(
|
118
|
+
position_embeddings, positions, axis=0
|
119
|
+
)
|
120
|
+
|
106
121
|
return ops.broadcast_to(position_embeddings, shape)
|
107
122
|
|
108
123
|
def compute_output_shape(self, input_shape):
|
@@ -37,6 +37,11 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
37
37
|
start_index: An integer or integer tensor. The starting position to
|
38
38
|
compute the rotary embedding from. This is useful during cached
|
39
39
|
decoding, where each position is predicted separately in a loop.
|
40
|
+
positions: Tensor of shape `(sequence_length,)` or
|
41
|
+
`(batch_size, sequence_length)`. Custom positions for the input
|
42
|
+
sequence. If specified, this tensor will be used to
|
43
|
+
compute the rotary embedding, and the `start_index` argument will
|
44
|
+
be ignored. This is useful for cases with non-standard positions.
|
40
45
|
|
41
46
|
Examples:
|
42
47
|
|
@@ -76,6 +81,11 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
76
81
|
self.built = True
|
77
82
|
|
78
83
|
def call(self, inputs, start_index=0, positions=None):
|
84
|
+
# Take care of unbatched `positions`.
|
85
|
+
if positions is not None:
|
86
|
+
if len(ops.shape(positions)) == 1:
|
87
|
+
positions = ops.expand_dims(positions, axis=0)
|
88
|
+
|
79
89
|
inputs = ops.moveaxis(
|
80
90
|
inputs, (self.feature_axis, self.sequence_axis), (-1, 1)
|
81
91
|
)
|
@@ -103,6 +113,7 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
103
113
|
return positions + ops.cast(start_index, dtype="float32")
|
104
114
|
|
105
115
|
def _compute_cos_sin_embedding(self, inputs, start_index=0, positions=None):
|
116
|
+
batch_axis = 0
|
106
117
|
feature_axis = len(inputs.shape) - 1
|
107
118
|
sequence_axis = 1
|
108
119
|
|
@@ -111,21 +122,20 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
111
122
|
|
112
123
|
if positions is None:
|
113
124
|
positions = self._compute_positions(inputs, start_index)
|
125
|
+
positions = ops.expand_dims(positions, axis=batch_axis)
|
114
126
|
else:
|
115
127
|
positions = ops.cast(positions, "float32")
|
116
|
-
|
117
128
|
positions = positions / ops.cast(self.scaling_factor, "float32")
|
118
|
-
|
129
|
+
|
130
|
+
freq = ops.einsum("bi,j->bij", positions, inverse_freq)
|
131
|
+
|
119
132
|
embedding = ops.stack((freq, freq), axis=-2)
|
120
133
|
embedding = ops.reshape(
|
121
134
|
embedding, (*ops.shape(freq)[:-1], ops.shape(freq)[-1] * 2)
|
122
135
|
)
|
123
136
|
|
124
|
-
# Reshape the embedding to be broadcastable with input shape.
|
125
|
-
if feature_axis < sequence_axis:
|
126
|
-
embedding = ops.transpose(embedding)
|
127
137
|
for axis in range(len(inputs.shape)):
|
128
|
-
if axis
|
138
|
+
if axis not in (batch_axis, sequence_axis, feature_axis):
|
129
139
|
embedding = ops.expand_dims(embedding, axis)
|
130
140
|
|
131
141
|
cos_emb = ops.cast(ops.cos(embedding), self.compute_dtype)
|
@@ -30,6 +30,11 @@ class SinePositionEncoding(keras.layers.Layer):
|
|
30
30
|
start_index: An integer or integer tensor. The starting position to
|
31
31
|
compute the encoding from. This is useful during cached decoding,
|
32
32
|
where each position is predicted separately in a loop.
|
33
|
+
positions: Tensor of shape `(sequence_length,)` or
|
34
|
+
`(batch_size, sequence_length)`. Custom positions for the input
|
35
|
+
sequence. If specified, this tensor will be used to
|
36
|
+
compute the position embedding, and the `start_index` argument will
|
37
|
+
be ignored. This is useful for cases with non-standard positions.
|
33
38
|
|
34
39
|
Example:
|
35
40
|
```python
|
@@ -58,27 +63,35 @@ class SinePositionEncoding(keras.layers.Layer):
|
|
58
63
|
self.max_wavelength = max_wavelength
|
59
64
|
self.built = True
|
60
65
|
|
61
|
-
def call(self, inputs, start_index=0):
|
66
|
+
def call(self, inputs, start_index=0, positions=None):
|
62
67
|
shape = ops.shape(inputs)
|
63
68
|
seq_length = shape[-2]
|
64
69
|
hidden_size = shape[-1]
|
65
|
-
|
66
|
-
|
70
|
+
|
71
|
+
if positions is None:
|
72
|
+
positions = ops.arange(seq_length)
|
73
|
+
positions = ops.cast(positions + start_index, self.compute_dtype)
|
74
|
+
|
75
|
+
# Take care of unbatched `positions`.
|
76
|
+
if len(ops.shape(positions)) == 1:
|
77
|
+
positions = ops.expand_dims(positions, axis=0)
|
78
|
+
|
67
79
|
min_freq = ops.cast(1 / self.max_wavelength, dtype=self.compute_dtype)
|
68
80
|
timescales = ops.power(
|
69
81
|
min_freq,
|
70
82
|
ops.cast(2 * (ops.arange(hidden_size) // 2), self.compute_dtype)
|
71
83
|
/ ops.cast(hidden_size, self.compute_dtype),
|
72
84
|
)
|
73
|
-
angles = ops.
|
85
|
+
angles = ops.einsum("bi,j->bij", positions, timescales)
|
86
|
+
|
74
87
|
# even indices are sine, odd are cosine
|
75
88
|
cos_mask = ops.cast(ops.arange(hidden_size) % 2, self.compute_dtype)
|
76
89
|
sin_mask = 1 - cos_mask
|
77
|
-
# embedding shape is [seq_length, hidden_size]
|
78
|
-
positional_encodings = (
|
79
|
-
ops.sin(angles) * sin_mask + ops.cos(angles) * cos_mask
|
80
|
-
)
|
81
90
|
|
91
|
+
# embedding shape is `[bsz (or 1), seq_length, hidden_size]`.
|
92
|
+
positional_encodings = ops.einsum(
|
93
|
+
"bij,j->bij", ops.sin(angles), sin_mask
|
94
|
+
) + ops.einsum("bij,j->bij", ops.cos(angles), cos_mask)
|
82
95
|
return ops.broadcast_to(positional_encodings, shape)
|
83
96
|
|
84
97
|
def get_config(self):
|
@@ -120,11 +120,12 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
|
|
120
120
|
)
|
121
121
|
return config
|
122
122
|
|
123
|
-
def call(self, inputs, start_index=0):
|
123
|
+
def call(self, inputs, start_index=0, positions=None):
|
124
124
|
embedded_tokens = self.token_embedding(inputs)
|
125
125
|
embedded_positions = self.position_embedding(
|
126
126
|
embedded_tokens,
|
127
127
|
start_index=start_index,
|
128
|
+
positions=positions,
|
128
129
|
)
|
129
130
|
outputs = embedded_tokens + embedded_positions
|
130
131
|
return outputs
|
keras_hub/src/models/backbone.py
CHANGED
@@ -91,21 +91,16 @@ class Backbone(keras.Model):
|
|
91
91
|
}
|
92
92
|
|
93
93
|
# Add quantization support by utilizing `DTypePolicyMap`
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
):
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
if len(policy_map) > 0:
|
105
|
-
config.update({"dtype": policy_map})
|
106
|
-
# Before Keras 3.2, there is no `keras.dtype_policies.get`.
|
107
|
-
except AttributeError:
|
108
|
-
pass
|
94
|
+
dtype = self.dtype_policy
|
95
|
+
if not isinstance(dtype, keras.dtype_policies.DTypePolicyMap):
|
96
|
+
policy_map = keras.dtype_policies.DTypePolicyMap()
|
97
|
+
for layer in self._flatten_layers():
|
98
|
+
if layer.quantization_mode is not None:
|
99
|
+
policy_map[layer.path] = layer.dtype_policy
|
100
|
+
if len(policy_map) > 0:
|
101
|
+
dtype = policy_map
|
102
|
+
|
103
|
+
config.update({"dtype": keras.dtype_policies.serialize(dtype)})
|
109
104
|
return config
|
110
105
|
|
111
106
|
@classmethod
|
File without changes
|