keras-hub-nightly 0.16.1.dev202410150342__py3-none-any.whl → 0.16.1.dev202410170342__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/src/models/causal_lm.py +37 -1
- keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py +1 -1
- keras_hub/src/models/gemma/gemma_backbone.py +11 -3
- keras_hub/src/models/llama/llama_backbone.py +118 -0
- keras_hub/src/models/llama/llama_causal_lm.py +3 -1
- keras_hub/src/models/mistral/mistral_causal_lm.py +3 -1
- keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py +3 -1
- keras_hub/src/models/phi3/phi3_causal_lm.py +3 -1
- keras_hub/src/tests/test_case.py +9 -0
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.16.1.dev202410150342.dist-info → keras_hub_nightly-0.16.1.dev202410170342.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.16.1.dev202410150342.dist-info → keras_hub_nightly-0.16.1.dev202410170342.dist-info}/RECORD +14 -14
- {keras_hub_nightly-0.16.1.dev202410150342.dist-info → keras_hub_nightly-0.16.1.dev202410170342.dist-info}/WHEEL +1 -1
- {keras_hub_nightly-0.16.1.dev202410150342.dist-info → keras_hub_nightly-0.16.1.dev202410170342.dist-info}/top_level.txt +0 -0
@@ -274,6 +274,7 @@ class CausalLM(Task):
|
|
274
274
|
inputs,
|
275
275
|
max_length=None,
|
276
276
|
stop_token_ids="auto",
|
277
|
+
strip_prompt=False,
|
277
278
|
):
|
278
279
|
"""Generate text given prompt `inputs`.
|
279
280
|
|
@@ -309,6 +310,9 @@ class CausalLM(Task):
|
|
309
310
|
specify a list of token id's the model should stop on. Note that
|
310
311
|
sequences of tokens will each be interpreted as a stop token,
|
311
312
|
multi-token stop sequences are not supported.
|
313
|
+
strip_prompt: Optional. By default, generate() returns the full prompt
|
314
|
+
followed by its completion generated by the model. If this option
|
315
|
+
is set to True, only the newly generated text is returned.
|
312
316
|
"""
|
313
317
|
# Setup our three main passes.
|
314
318
|
# 1. Optionally preprocessing strings to dense integer tensors.
|
@@ -339,6 +343,33 @@ class CausalLM(Task):
|
|
339
343
|
def generate(x):
|
340
344
|
return generate_function(x, stop_token_ids=stop_token_ids)
|
341
345
|
|
346
|
+
def strip_prompt_function(x, prompt):
|
347
|
+
# This function removes the prompt from the generated
|
348
|
+
# response, in a batch-friendly fashion.
|
349
|
+
y = {}
|
350
|
+
prompt_mask = prompt["padding_mask"]
|
351
|
+
seq_len = prompt_mask.shape[1]
|
352
|
+
|
353
|
+
# We need to shift every output sequence by the size of the prompt.
|
354
|
+
shifts = -ops.sum(ops.cast(prompt_mask, "int"), axis=1) % seq_len
|
355
|
+
ix = ops.arange(seq_len, dtype="int")
|
356
|
+
ix = ops.expand_dims(ix, axis=0) - ops.expand_dims(shifts, axis=1)
|
357
|
+
|
358
|
+
# This produces the desired shift (in fact a rollover).
|
359
|
+
def roll_sequence(seq):
|
360
|
+
return ops.take_along_axis(seq, ix, axis=1)
|
361
|
+
|
362
|
+
# The shifting rolls the content over so the prompt is at the end of
|
363
|
+
# the sequence and the generated text is at the beginning. We mask
|
364
|
+
# it to retain the generated text only.
|
365
|
+
y["padding_mask"] = ops.logical_xor(
|
366
|
+
roll_sequence(prompt_mask), roll_sequence(x["padding_mask"])
|
367
|
+
)
|
368
|
+
# we assume the mask is enough and there is no need to zero-out the values
|
369
|
+
y["token_ids"] = roll_sequence(x["token_ids"])
|
370
|
+
|
371
|
+
return y
|
372
|
+
|
342
373
|
def postprocess(x):
|
343
374
|
return self.preprocessor.generate_postprocess(x)
|
344
375
|
|
@@ -347,7 +378,12 @@ class CausalLM(Task):
|
|
347
378
|
|
348
379
|
if self.preprocessor is not None:
|
349
380
|
inputs = [preprocess(x) for x in inputs]
|
350
|
-
|
381
|
+
|
382
|
+
if strip_prompt:
|
383
|
+
outputs = [strip_prompt_function(generate(x), x) for x in inputs]
|
384
|
+
else:
|
385
|
+
outputs = [generate(x) for x in inputs]
|
386
|
+
|
351
387
|
if self.preprocessor is not None:
|
352
388
|
outputs = [postprocess(x) for x in outputs]
|
353
389
|
|
@@ -13,6 +13,6 @@ backbone_presets = {
|
|
13
13
|
"path": "deeplabv3",
|
14
14
|
"model_card": "https://arxiv.org/abs/1802.02611",
|
15
15
|
},
|
16
|
-
"kaggle_handle": "kaggle://keras/deeplabv3/keras/
|
16
|
+
"kaggle_handle": "kaggle://keras/deeplabv3/keras/deeplab_v3_plus_resnet50_pascalvoc/3",
|
17
17
|
},
|
18
18
|
}
|
@@ -224,7 +224,7 @@ class GemmaBackbone(Backbone):
|
|
224
224
|
|
225
225
|
Example:
|
226
226
|
```
|
227
|
-
# Feel free to change the mesh shape to balance data and model
|
227
|
+
# Feel free to change the mesh shape to balance data and model parallelism
|
228
228
|
mesh = keras.distribution.DeviceMesh(
|
229
229
|
shape=(1, 8), axis_names=('batch', 'model'),
|
230
230
|
devices=keras.distribution.list_devices())
|
@@ -232,11 +232,19 @@ class GemmaBackbone(Backbone):
|
|
232
232
|
mesh, model_parallel_dim_name="model")
|
233
233
|
|
234
234
|
distribution = keras.distribution.ModelParallel(
|
235
|
-
|
235
|
+
layout_map=layout_map, batch_dim_name='batch')
|
236
236
|
with distribution.scope():
|
237
237
|
gemma_model = keras_hub.models.GemmaCausalLM.from_preset()
|
238
238
|
```
|
239
239
|
|
240
|
+
To see how the layout map was applied, load the model then run (for one decoder block):
|
241
|
+
```
|
242
|
+
embedding_layer = gemma_model.backbone.get_layer("token_embedding")
|
243
|
+
decoder_block_1 = gemma_model.backbone.get_layer('decoder_block_1')
|
244
|
+
for variable in embedding_layer.weights + decoder_block_1.weights:
|
245
|
+
print(f'{variable.path:<58} {str(variable.shape):<16} {str(variable.value.sharding.spec)}')
|
246
|
+
```
|
247
|
+
|
240
248
|
Args:
|
241
249
|
device_mesh: The `keras.distribution.DeviceMesh` instance for
|
242
250
|
distribution.
|
@@ -246,7 +254,7 @@ class GemmaBackbone(Backbone):
|
|
246
254
|
the data should be partition on.
|
247
255
|
Return:
|
248
256
|
`keras.distribution.LayoutMap` that contains the sharding spec
|
249
|
-
|
257
|
+
for all the model weights.
|
250
258
|
"""
|
251
259
|
# The weight path and shape of the Gemma backbone is like below (for 2G)
|
252
260
|
# token_embedding/embeddings, (256128, 2048), 524550144
|
@@ -175,3 +175,121 @@ class LlamaBackbone(Backbone):
|
|
175
175
|
}
|
176
176
|
)
|
177
177
|
return config
|
178
|
+
|
179
|
+
@staticmethod
|
180
|
+
def get_layout_map(
|
181
|
+
device_mesh,
|
182
|
+
model_parallel_dim_name="model",
|
183
|
+
data_parallel_dim_name="batch",
|
184
|
+
):
|
185
|
+
"""Get a `keras.distribution.LayoutMap` for model parallel distribution.
|
186
|
+
|
187
|
+
The returned `LayoutMap` contains the sharding spec for the Llama
|
188
|
+
backbone weights, so that you can use it to distribute weights across
|
189
|
+
the accelerators.
|
190
|
+
|
191
|
+
Example:
|
192
|
+
```
|
193
|
+
# Feel free to change the mesh shape to balance data and model parallelism
|
194
|
+
mesh = keras.distribution.DeviceMesh(
|
195
|
+
shape=(1, 8),
|
196
|
+
axis_names=('batch', 'model'),
|
197
|
+
devices=keras.distribution.list_devices(),
|
198
|
+
)
|
199
|
+
layout_map = LlamaBackbone.get_layout_map(
|
200
|
+
mesh,
|
201
|
+
model_parallel_dim_name="model",
|
202
|
+
)
|
203
|
+
|
204
|
+
distribution = keras.distribution.ModelParallel(
|
205
|
+
layout_map=layout_map,
|
206
|
+
batch_dim_name='batch',
|
207
|
+
)
|
208
|
+
|
209
|
+
with distribution.scope():
|
210
|
+
llama_model = keras_hub.models.LlamaCausalLM.from_preset()
|
211
|
+
```
|
212
|
+
|
213
|
+
To see how the layout map was applied, load the model then run (for one decoder block):
|
214
|
+
```
|
215
|
+
embedding_layer = llama_model.backbone.get_layer("token_embedding")
|
216
|
+
decoder_block_1 = llama_model.backbone.get_layer('transformer_layer_0')
|
217
|
+
for variable in embedding_layer.weights + decoder_block_1.weights:
|
218
|
+
print(f'{variable.path:<58} {str(variable.shape):<16} {str(variable.value.sharding.spec)}')
|
219
|
+
```
|
220
|
+
|
221
|
+
Args:
|
222
|
+
device_mesh: The `keras.distribution.DeviceMesh` instance for
|
223
|
+
distribution.
|
224
|
+
model_parallel_dim_name: The axis name of the device mesh, where
|
225
|
+
the weights should be partition on.
|
226
|
+
data_parallel_dim_name: The axis name of the device mesh, where
|
227
|
+
the data should be partition on.
|
228
|
+
Return:
|
229
|
+
`keras.distribution.LayoutMap` that contains the sharding spec
|
230
|
+
for all the model weights.
|
231
|
+
"""
|
232
|
+
# The weight path and shape of the Llama backbone is like below
|
233
|
+
# token_embedding/embeddings (128256, 2048)
|
234
|
+
# repeat block for decoder
|
235
|
+
# transformer_layer_0/self_attention/query/kernel (2048, 32, 64)
|
236
|
+
# transformer_layer_0/self_attention/key/kernel (2048, 8, 64)
|
237
|
+
# transformer_layer_0/self_attention/value/kernel (2048, 8, 64)
|
238
|
+
# transformer_layer_0/self_attention/attention_output/kernel (32, 64, 2048)
|
239
|
+
# transformer_layer_0/self_attention_layernorm/scale (2048,)
|
240
|
+
# transformer_layer_0/feedforward_intermediate_dense/kernel (2048, 8192)
|
241
|
+
# transformer_layer_0/feedforward_gate_dense/kernel (2048, 8192)
|
242
|
+
# transformer_layer_0/feedforward_output_dense/kernel (8192, 2048)
|
243
|
+
# transformer_layer_0/feedforward_layernorm/scale (2048,)
|
244
|
+
|
245
|
+
if not isinstance(device_mesh, keras.distribution.DeviceMesh):
|
246
|
+
raise ValueError(
|
247
|
+
"Invalid device_mesh type. Expected `keras.distribution.Device`,"
|
248
|
+
f" got {type(device_mesh)}"
|
249
|
+
)
|
250
|
+
if model_parallel_dim_name not in device_mesh.axis_names:
|
251
|
+
raise ValueError(
|
252
|
+
f"{model_parallel_dim_name} is not found in the "
|
253
|
+
f"device_mesh.axis_names. {device_mesh.axis_name=}"
|
254
|
+
)
|
255
|
+
if data_parallel_dim_name not in device_mesh.axis_names:
|
256
|
+
raise ValueError(
|
257
|
+
f"{data_parallel_dim_name} is not found in the "
|
258
|
+
f"device_mesh.axis_names. {device_mesh.axis_name=}"
|
259
|
+
)
|
260
|
+
# Note that it is possible to further config the mesh to be 3D, eg
|
261
|
+
# (data, seq, model). We leave it as 2D for now for simplicity.
|
262
|
+
data_dim = data_parallel_dim_name
|
263
|
+
model_dim = model_parallel_dim_name
|
264
|
+
# The sharding config is based on the Gemma team training config.
|
265
|
+
# See https://arxiv.org/abs/2403.08295
|
266
|
+
layout_map = keras.distribution.LayoutMap(device_mesh)
|
267
|
+
layout_map["token_embedding/embeddings"] = (model_dim, data_dim)
|
268
|
+
layout_map[
|
269
|
+
"transformer_layer.*self_attention.*(query|key|value).kernel"
|
270
|
+
] = (
|
271
|
+
model_dim,
|
272
|
+
data_dim,
|
273
|
+
None,
|
274
|
+
)
|
275
|
+
layout_map["transformer_layer.*attention_output.kernel"] = (
|
276
|
+
model_dim,
|
277
|
+
None,
|
278
|
+
data_dim,
|
279
|
+
)
|
280
|
+
layout_map[
|
281
|
+
"transformer_layer.*feedforward_intermediate_dense.kernel"
|
282
|
+
] = (
|
283
|
+
data_dim,
|
284
|
+
model_dim,
|
285
|
+
)
|
286
|
+
layout_map["transformer_layer.*feedforward_gate_dense.kernel"] = (
|
287
|
+
data_dim,
|
288
|
+
model_dim,
|
289
|
+
)
|
290
|
+
layout_map["transformer_layer.*feedforward_output_dense.kernel"] = (
|
291
|
+
model_dim,
|
292
|
+
data_dim,
|
293
|
+
)
|
294
|
+
|
295
|
+
return layout_map
|
@@ -42,7 +42,9 @@ class LlamaCausalLM(CausalLM):
|
|
42
42
|
self.preprocessor = preprocessor
|
43
43
|
|
44
44
|
# === Functional Model ===
|
45
|
-
|
45
|
+
# This must be "backbone.input" i.e. the full input structure,
|
46
|
+
# rather than "backbone.inputs" which is the flattened list of inputs.
|
47
|
+
inputs = backbone.input
|
46
48
|
hidden_states = backbone(inputs)
|
47
49
|
outputs = backbone.token_embedding(hidden_states, reverse=True)
|
48
50
|
super().__init__(
|
@@ -42,7 +42,9 @@ class MistralCausalLM(CausalLM):
|
|
42
42
|
self.preprocessor = preprocessor
|
43
43
|
|
44
44
|
# === Functional Model ===
|
45
|
-
|
45
|
+
# This must be "backbone.input" i.e. the full input structure,
|
46
|
+
# rather than "backbone.inputs" which is the flattened list of inputs.
|
47
|
+
inputs = backbone.input
|
46
48
|
hidden_states = backbone(inputs)
|
47
49
|
outputs = backbone.token_embedding(hidden_states, reverse=True)
|
48
50
|
super().__init__(
|
@@ -110,7 +110,9 @@ class PaliGemmaCausalLM(CausalLM):
|
|
110
110
|
self.backbone = backbone
|
111
111
|
|
112
112
|
# === Functional Model ===
|
113
|
-
|
113
|
+
# This must be "backbone.input" i.e. the full input structure,
|
114
|
+
# rather than "backbone.inputs" which is the flattened list of inputs.
|
115
|
+
inputs = backbone.input
|
114
116
|
hidden_state = backbone(inputs=inputs)
|
115
117
|
outputs = backbone.token_embedding(hidden_state, reverse=True)
|
116
118
|
outputs = outputs[:, backbone.image_sequence_length :, :]
|
@@ -41,7 +41,9 @@ class Phi3CausalLM(CausalLM):
|
|
41
41
|
self.preprocessor = preprocessor
|
42
42
|
|
43
43
|
# === Functional Model ===
|
44
|
-
|
44
|
+
# This must be "backbone.input" i.e. the full input structure,
|
45
|
+
# rather than "backbone.inputs" which is the flattened list of inputs.
|
46
|
+
inputs = backbone.input
|
45
47
|
hidden_states = backbone(inputs)
|
46
48
|
outputs = backbone.token_embedding(hidden_states, reverse=True)
|
47
49
|
super().__init__(
|
keras_hub/src/tests/test_case.py
CHANGED
@@ -569,6 +569,15 @@ class TestCase(tf.test.TestCase, parameterized.TestCase):
|
|
569
569
|
ds = tf.data.Dataset.from_tensor_slices(train_data).batch(batch_size)
|
570
570
|
x, y, sw = keras.utils.unpack_x_y_sample_weight(train_data)
|
571
571
|
|
572
|
+
# Test: the tree struct output by the
|
573
|
+
# preprocessor must match what model expects.
|
574
|
+
preprocessed_data = preprocessor(*train_data)[0]
|
575
|
+
tree.assert_same_structure(
|
576
|
+
preprocessed_data,
|
577
|
+
task._inputs_struct,
|
578
|
+
check_types=False,
|
579
|
+
)
|
580
|
+
|
572
581
|
# Test predict.
|
573
582
|
output = task.predict(x)
|
574
583
|
if expected_output_shape is not None:
|
keras_hub/src/version_utils.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: keras-hub-nightly
|
3
|
-
Version: 0.16.1.
|
3
|
+
Version: 0.16.1.dev202410170342
|
4
4
|
Summary: Industry-strength Natural Language Processing extensions for Keras.
|
5
5
|
Home-page: https://github.com/keras-team/keras-hub
|
6
6
|
Author: Keras team
|
@@ -9,7 +9,7 @@ keras_hub/api/tokenizers/__init__.py,sha256=_f-r_cyUM2fjBB7iO84ThOdqqsAxHNIewJ2E
|
|
9
9
|
keras_hub/api/utils/__init__.py,sha256=Gp1E6gG-RtKQS3PBEQEOz9PQvXkXaJ0ySGMqZ7myN7A,215
|
10
10
|
keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
|
12
|
-
keras_hub/src/version_utils.py,sha256=
|
12
|
+
keras_hub/src/version_utils.py,sha256=JKhHcqjvch67-7KPLpPGS3nhs1bP6bpaXaMsSpxp0p4,222
|
13
13
|
keras_hub/src/bounding_box/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
keras_hub/src/bounding_box/converters.py,sha256=a5po8DBm87oz2EXfi-0uEZHCMlCJPIb4-MaZIdYx3Dg,17865
|
15
15
|
keras_hub/src/bounding_box/formats.py,sha256=YmskOz2BOSat7NaE__J9VfpSNGPJJR0znSzA4lp8MMI,3868
|
@@ -50,7 +50,7 @@ keras_hub/src/metrics/rouge_l.py,sha256=JlZhMBV6wS_6zMd57pkTc6yxHkEJT9fVQMlPZKek
|
|
50
50
|
keras_hub/src/metrics/rouge_n.py,sha256=JoFtmgjF4Ic263ny6bfD6vMHKreH9le3HnOOxemupRc,3620
|
51
51
|
keras_hub/src/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
52
|
keras_hub/src/models/backbone.py,sha256=2OZx6WAx2q9JK2yue5BoUUipIBjpOJRVNnMjXLVDLRk,11185
|
53
|
-
keras_hub/src/models/causal_lm.py,sha256=
|
53
|
+
keras_hub/src/models/causal_lm.py,sha256=0Asl5v86jm4PnqCoQv7u4Sy8NfMoUvrQxnxQSTQLKog,16721
|
54
54
|
keras_hub/src/models/causal_lm_preprocessor.py,sha256=YY7VJZicdmnjDSWi9g4_pEpd5bdJK166GlWcapvokF0,6663
|
55
55
|
keras_hub/src/models/feature_pyramid_backbone.py,sha256=clEW-TTQSVJ_5qFNdDF0iABkin1p_xlBUFjJrC7T0IA,2247
|
56
56
|
keras_hub/src/models/image_classifier.py,sha256=yt6cjhPfqs8A_eWXBsXdXFzn-aRgH2rVHUq7Zu7CyK8,7804
|
@@ -122,7 +122,7 @@ keras_hub/src/models/deeplab_v3/deeplab_v3_backbone.py,sha256=WyFhuLcjFPFVuNL09b
|
|
122
122
|
keras_hub/src/models/deeplab_v3/deeplab_v3_image_converter.py,sha256=mRkH3HdhpV0fCcQcVXEvIX7SNk-bAMb3SAHzgK-FD5c,371
|
123
123
|
keras_hub/src/models/deeplab_v3/deeplab_v3_image_segmeter_preprocessor.py,sha256=hR9S6lNYamY0EBDBo3e1qTCiwtftmLXrN-UYuzfw5Io,581
|
124
124
|
keras_hub/src/models/deeplab_v3/deeplab_v3_layers.py,sha256=qmEiolOOriLAojXB67xXW9IOo717kaCGeDVZJLaGY98,7834
|
125
|
-
keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py,sha256=
|
125
|
+
keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py,sha256=lbkP16g-w2-4RKSnISwW-RfvI6qqbE8yZzjRwgiXUIU,703
|
126
126
|
keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py,sha256=tiMDcCFANHMUx3IVtW3r1P_JTazgPPsbW4IktIytKEU,3650
|
127
127
|
keras_hub/src/models/densenet/__init__.py,sha256=r7StyamnWeeZxOk9r4ZYNbS_YVhu9YGPyXhNxljvdPg,269
|
128
128
|
keras_hub/src/models/densenet/densenet_backbone.py,sha256=dN9lUwKzO3E2HthNV2x54ozeBEQ0ilNs5uYHshFQpT0,6723
|
@@ -164,7 +164,7 @@ keras_hub/src/models/falcon/falcon_tokenizer.py,sha256=2B5vmpakj_iVB7T95_8OVreJb
|
|
164
164
|
keras_hub/src/models/falcon/falcon_transformer_decoder.py,sha256=QqIK6v97uBXZFBG3qS6O8HrP9_93uOFzvHQgOiMO2eY,8125
|
165
165
|
keras_hub/src/models/gemma/__init__.py,sha256=rVzOJMJ39bgVlT8UdC0t8PlN2c237GKTBmfHIsbPuOQ,251
|
166
166
|
keras_hub/src/models/gemma/gemma_attention.py,sha256=1CVN5z9GKoU8TuNMih2_MweDkpd98xSqdic9F8xIBE8,8317
|
167
|
-
keras_hub/src/models/gemma/gemma_backbone.py,sha256=
|
167
|
+
keras_hub/src/models/gemma/gemma_backbone.py,sha256=P5srrrqIrFIBF84KCKKl9vKyYiq0CxjhdcVk76PKVTQ,13377
|
168
168
|
keras_hub/src/models/gemma/gemma_causal_lm.py,sha256=BNBoQIf0HoqCooalYsWE-28v5BGUNvL9YdUB8_NSkBU,16770
|
169
169
|
keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py,sha256=bpKkEurWIfa6Kp9s4pz84-sBDSA6ZFNHP8nXG1fFQrg,2912
|
170
170
|
keras_hub/src/models/gemma/gemma_decoder_block.py,sha256=tpBfH86Q48EvV0COkd1g2FJg9zHp7ktZBjegs3ehOYo,7588
|
@@ -187,8 +187,8 @@ keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py,sha256=xSLDgavOhhm3SZc18VN60
|
|
187
187
|
keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py,sha256=aKso-8yGrynn3tZ5xm2egcXIBQo3__sWZDBtjmS3ZgU,1991
|
188
188
|
keras_hub/src/models/llama/__init__.py,sha256=svVZjGi71R3lVbq0AdbqlXj909mr3Rp9EPXdiO0w0G0,251
|
189
189
|
keras_hub/src/models/llama/llama_attention.py,sha256=HzTWtvTjfN_j0vA9-ComstHpI81tzUrJU3RSSvSCaI4,7194
|
190
|
-
keras_hub/src/models/llama/llama_backbone.py,sha256=
|
191
|
-
keras_hub/src/models/llama/llama_causal_lm.py,sha256=
|
190
|
+
keras_hub/src/models/llama/llama_backbone.py,sha256=ElMjhfyTwXcChQPcrKo3bZozeRhzGyCXqOWA_siQFj8,11687
|
191
|
+
keras_hub/src/models/llama/llama_causal_lm.py,sha256=9bP4-XDCMgsZuH1ILIMzmwq2Fyy6vkk1Vsht-lMGCNo,13258
|
192
192
|
keras_hub/src/models/llama/llama_causal_lm_preprocessor.py,sha256=VTboOMiRBoxHrwP343upLUTsv3AG65r2H8h_PNPVphE,3047
|
193
193
|
keras_hub/src/models/llama/llama_decoder.py,sha256=6iERIblED0ZB5w_EUlHks4UvMnsrWONdO_Xdz2OzhWM,8623
|
194
194
|
keras_hub/src/models/llama/llama_layernorm.py,sha256=LfRbePHUJs00Ptf7dvNaw3Aj9n1xBMBpE_rS5zzsYMo,1050
|
@@ -203,7 +203,7 @@ keras_hub/src/models/llama3/llama3_tokenizer.py,sha256=J-KxRc08vGs4olFw_4mtJs0W_
|
|
203
203
|
keras_hub/src/models/mistral/__init__.py,sha256=vjBlzcrIsFSwJKnfwfTNMKstIEKGFTE3kVcdAdfwlnE,263
|
204
204
|
keras_hub/src/models/mistral/mistral_attention.py,sha256=HCkUIc2DVIlYC5hhwomENlqLOsKTvbCKF0lx0_OBAyA,7862
|
205
205
|
keras_hub/src/models/mistral/mistral_backbone.py,sha256=x4BfyfWTCUXcjPSxdPSl8QITXgzUg1oJlAQt2acZfv4,7245
|
206
|
-
keras_hub/src/models/mistral/mistral_causal_lm.py,sha256=
|
206
|
+
keras_hub/src/models/mistral/mistral_causal_lm.py,sha256=gEGUnB6yOib9G71n5Em5X8TPOllJW53UXlUCNJkL_ZU,13234
|
207
207
|
keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py,sha256=_4qq-uKktfIg_i081ZWjZGEIYZpedBwtBGpchQQ-qEk,3079
|
208
208
|
keras_hub/src/models/mistral/mistral_layer_norm.py,sha256=nimMZ5CTPK8v9eflfrGuzqmv-2vd2rGlPvcHOMwYZyg,1063
|
209
209
|
keras_hub/src/models/mistral/mistral_presets.py,sha256=gucgdaFAiU-vRDS1g9zWGHjbDF_jaCiljPibCF4yVqY,1329
|
@@ -227,7 +227,7 @@ keras_hub/src/models/opt/opt_presets.py,sha256=J1IJ5VRcZZ6UZJSLrxpbWXw39YmbRd_WQ
|
|
227
227
|
keras_hub/src/models/opt/opt_tokenizer.py,sha256=oDHeed4xf07tm14hj_C78BkzMuuRwRP2cRHmqYnObrs,2557
|
228
228
|
keras_hub/src/models/pali_gemma/__init__.py,sha256=uODWTlttOOchcTLpiYHCEWMXnDxIz8ZVIeYFQN2bd8o,288
|
229
229
|
keras_hub/src/models/pali_gemma/pali_gemma_backbone.py,sha256=srZyBsA5tulO_Fb03g9FE-vaw2j9ftfxnAy4P8cYB6o,10916
|
230
|
-
keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py,sha256=
|
230
|
+
keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py,sha256=AViEs6YltUqWnIVo7J02JkXcanBgLSdwZwF56TVr8gc,11345
|
231
231
|
keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py,sha256=F57y0fZ0wYYxfGIjfrJc1W9uQpViYFx5bvFjj5CqUbI,4814
|
232
232
|
keras_hub/src/models/pali_gemma/pali_gemma_decoder_block.py,sha256=Q_sPAULiSo_ZJeXklZjCLhvOMXk8MrPZhEXtL5yNOiI,5175
|
233
233
|
keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py,sha256=5yM_jUtrFsWIieiwfFBoP7mtPmQAwywkeLKbd7fhmzk,371
|
@@ -237,7 +237,7 @@ keras_hub/src/models/pali_gemma/pali_gemma_vit.py,sha256=wP1UtW0WnlRmga-JQRxWTfA
|
|
237
237
|
keras_hub/src/models/phi3/__init__.py,sha256=zIbf1MU-ks91mEkjTRJAsk51N3BBnXDF2JM1vO-13PQ,245
|
238
238
|
keras_hub/src/models/phi3/phi3_attention.py,sha256=dN8QwwTP9TxPBDv0MCvObLF3nHm1H6xbYr3T1K0nmg8,9243
|
239
239
|
keras_hub/src/models/phi3/phi3_backbone.py,sha256=fY-OY2ZrqxDHglYjTM0OCacBdEQHwj-XNmU0MnXL7iU,8885
|
240
|
-
keras_hub/src/models/phi3/phi3_causal_lm.py,sha256=
|
240
|
+
keras_hub/src/models/phi3/phi3_causal_lm.py,sha256=kMMq7fQ8hlb_mLO_nU1lGVqILayulVvzzZgl2EvY9_k,8389
|
241
241
|
keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py,sha256=gNx1k-n7d0XDwpNbcZiO9yLkwdXYCvwGyA3b0QCnPAE,3043
|
242
242
|
keras_hub/src/models/phi3/phi3_decoder.py,sha256=1raVexz1TkpqvMwW1Zs08KSxTs9gDc6VWUKJ9sM1VFY,9587
|
243
243
|
keras_hub/src/models/phi3/phi3_layernorm.py,sha256=Oqu81tGd97Lzx3kG1QEtZ0S6gbfn3GLgRzY8UWGJRBo,1049
|
@@ -333,7 +333,7 @@ keras_hub/src/samplers/serialization.py,sha256=K6FC4AY1sfOLLIk2k4G783XWnQ_Rk3z1Q
|
|
333
333
|
keras_hub/src/samplers/top_k_sampler.py,sha256=WSyrhmOCan55X2JYAnNWE88rkx66sXqdoerl87nOrDQ,2250
|
334
334
|
keras_hub/src/samplers/top_p_sampler.py,sha256=9r29WdqBlrW_2TBma6QqkRps2Uit4a6iZPmq1Gsiuko,3400
|
335
335
|
keras_hub/src/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
336
|
-
keras_hub/src/tests/test_case.py,sha256=
|
336
|
+
keras_hub/src/tests/test_case.py,sha256=KMFdQoTqAGotj8Pt8AxXjTJ_f0qwavIGUh-iqN1nQvA,26304
|
337
337
|
keras_hub/src/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
338
338
|
keras_hub/src/tokenizers/byte_pair_tokenizer.py,sha256=fGFp3WgPNYGTztpSGMl0kKFjn1bCeZB71lSJfT1eqEE,24052
|
339
339
|
keras_hub/src/tokenizers/byte_tokenizer.py,sha256=vjgrTT8FdtZVAlr0mU13alzADcUhtMrzgOs4lYeHvAQ,10648
|
@@ -368,7 +368,7 @@ keras_hub/src/utils/transformers/convert_mistral.py,sha256=kVhN9h1ZFVhwkNW8p3wnS
|
|
368
368
|
keras_hub/src/utils/transformers/convert_pali_gemma.py,sha256=B1leeDw96Yvu81hYumf66hIid07k5NLqoeWAJgPnaLs,10649
|
369
369
|
keras_hub/src/utils/transformers/preset_loader.py,sha256=GS44hZUuGQCtzsyn8z44ZpHdftd3DFemwV2hx2bQa-U,2738
|
370
370
|
keras_hub/src/utils/transformers/safetensor_utils.py,sha256=rPK-Uw1CG0DX0d_UAD-r2cG9fw8GI8bvAlrcXfQ9g4c,3323
|
371
|
-
keras_hub_nightly-0.16.1.
|
372
|
-
keras_hub_nightly-0.16.1.
|
373
|
-
keras_hub_nightly-0.16.1.
|
374
|
-
keras_hub_nightly-0.16.1.
|
371
|
+
keras_hub_nightly-0.16.1.dev202410170342.dist-info/METADATA,sha256=SEFjDxUxSNwbNsZtCnanXay7BQFJrNDM1zgriF55ATQ,7458
|
372
|
+
keras_hub_nightly-0.16.1.dev202410170342.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
373
|
+
keras_hub_nightly-0.16.1.dev202410170342.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
|
374
|
+
keras_hub_nightly-0.16.1.dev202410170342.dist-info/RECORD,,
|