keras-hub-nightly 0.16.1.dev202410150342__py3-none-any.whl → 0.16.1.dev202410170342__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -274,6 +274,7 @@ class CausalLM(Task):
274
274
  inputs,
275
275
  max_length=None,
276
276
  stop_token_ids="auto",
277
+ strip_prompt=False,
277
278
  ):
278
279
  """Generate text given prompt `inputs`.
279
280
 
@@ -309,6 +310,9 @@ class CausalLM(Task):
309
310
  specify a list of token id's the model should stop on. Note that
310
311
  sequences of tokens will each be interpreted as a stop token,
311
312
  multi-token stop sequences are not supported.
313
+ strip_prompt: Optional. By default, generate() returns the full prompt
314
+ followed by its completion generated by the model. If this option
315
+ is set to True, only the newly generated text is returned.
312
316
  """
313
317
  # Setup our three main passes.
314
318
  # 1. Optionally preprocessing strings to dense integer tensors.
@@ -339,6 +343,33 @@ class CausalLM(Task):
339
343
  def generate(x):
340
344
  return generate_function(x, stop_token_ids=stop_token_ids)
341
345
 
346
+ def strip_prompt_function(x, prompt):
347
+ # This function removes the prompt from the generated
348
+ # response, in a batch-friendly fashion.
349
+ y = {}
350
+ prompt_mask = prompt["padding_mask"]
351
+ seq_len = prompt_mask.shape[1]
352
+
353
+ # We need to shift every output sequence by the size of the prompt.
354
+ shifts = -ops.sum(ops.cast(prompt_mask, "int"), axis=1) % seq_len
355
+ ix = ops.arange(seq_len, dtype="int")
356
+ ix = ops.expand_dims(ix, axis=0) - ops.expand_dims(shifts, axis=1)
357
+
358
+ # This produces the desired shift (in fact a rollover).
359
+ def roll_sequence(seq):
360
+ return ops.take_along_axis(seq, ix, axis=1)
361
+
362
+ # The shifting rolls the content over so the prompt is at the end of
363
+ # the sequence and the generated text is at the beginning. We mask
364
+ # it to retain the generated text only.
365
+ y["padding_mask"] = ops.logical_xor(
366
+ roll_sequence(prompt_mask), roll_sequence(x["padding_mask"])
367
+ )
368
+ # we assume the mask is enough and there is no need to zero-out the values
369
+ y["token_ids"] = roll_sequence(x["token_ids"])
370
+
371
+ return y
372
+
342
373
  def postprocess(x):
343
374
  return self.preprocessor.generate_postprocess(x)
344
375
 
@@ -347,7 +378,12 @@ class CausalLM(Task):
347
378
 
348
379
  if self.preprocessor is not None:
349
380
  inputs = [preprocess(x) for x in inputs]
350
- outputs = [generate(x) for x in inputs]
381
+
382
+ if strip_prompt:
383
+ outputs = [strip_prompt_function(generate(x), x) for x in inputs]
384
+ else:
385
+ outputs = [generate(x) for x in inputs]
386
+
351
387
  if self.preprocessor is not None:
352
388
  outputs = [postprocess(x) for x in outputs]
353
389
 
@@ -13,6 +13,6 @@ backbone_presets = {
13
13
  "path": "deeplabv3",
14
14
  "model_card": "https://arxiv.org/abs/1802.02611",
15
15
  },
16
- "kaggle_handle": "kaggle://keras/deeplabv3/keras/deeplabv3_plus_resnet50_pascalvoc/3",
16
+ "kaggle_handle": "kaggle://keras/deeplabv3/keras/deeplab_v3_plus_resnet50_pascalvoc/3",
17
17
  },
18
18
  }
@@ -224,7 +224,7 @@ class GemmaBackbone(Backbone):
224
224
 
225
225
  Example:
226
226
  ```
227
- # Feel free to change the mesh shape to balance data and model parallel
227
+ # Feel free to change the mesh shape to balance data and model parallelism
228
228
  mesh = keras.distribution.DeviceMesh(
229
229
  shape=(1, 8), axis_names=('batch', 'model'),
230
230
  devices=keras.distribution.list_devices())
@@ -232,11 +232,19 @@ class GemmaBackbone(Backbone):
232
232
  mesh, model_parallel_dim_name="model")
233
233
 
234
234
  distribution = keras.distribution.ModelParallel(
235
- mesh, layout_map, batch_dim_name='batch')
235
+ layout_map=layout_map, batch_dim_name='batch')
236
236
  with distribution.scope():
237
237
  gemma_model = keras_hub.models.GemmaCausalLM.from_preset()
238
238
  ```
239
239
 
240
+ To see how the layout map was applied, load the model then run (for one decoder block):
241
+ ```
242
+ embedding_layer = gemma_model.backbone.get_layer("token_embedding")
243
+ decoder_block_1 = gemma_model.backbone.get_layer('decoder_block_1')
244
+ for variable in embedding_layer.weights + decoder_block_1.weights:
245
+ print(f'{variable.path:<58} {str(variable.shape):<16} {str(variable.value.sharding.spec)}')
246
+ ```
247
+
240
248
  Args:
241
249
  device_mesh: The `keras.distribution.DeviceMesh` instance for
242
250
  distribution.
@@ -246,7 +254,7 @@ class GemmaBackbone(Backbone):
246
254
  the data should be partition on.
247
255
  Return:
248
256
  `keras.distribution.LayoutMap` that contains the sharding spec
249
- of all the model weights.
257
+ for all the model weights.
250
258
  """
251
259
  # The weight path and shape of the Gemma backbone is like below (for 2G)
252
260
  # token_embedding/embeddings, (256128, 2048), 524550144
@@ -175,3 +175,121 @@ class LlamaBackbone(Backbone):
175
175
  }
176
176
  )
177
177
  return config
178
+
179
+ @staticmethod
180
+ def get_layout_map(
181
+ device_mesh,
182
+ model_parallel_dim_name="model",
183
+ data_parallel_dim_name="batch",
184
+ ):
185
+ """Get a `keras.distribution.LayoutMap` for model parallel distribution.
186
+
187
+ The returned `LayoutMap` contains the sharding spec for the Llama
188
+ backbone weights, so that you can use it to distribute weights across
189
+ the accelerators.
190
+
191
+ Example:
192
+ ```
193
+ # Feel free to change the mesh shape to balance data and model parallelism
194
+ mesh = keras.distribution.DeviceMesh(
195
+ shape=(1, 8),
196
+ axis_names=('batch', 'model'),
197
+ devices=keras.distribution.list_devices(),
198
+ )
199
+ layout_map = LlamaBackbone.get_layout_map(
200
+ mesh,
201
+ model_parallel_dim_name="model",
202
+ )
203
+
204
+ distribution = keras.distribution.ModelParallel(
205
+ layout_map=layout_map,
206
+ batch_dim_name='batch',
207
+ )
208
+
209
+ with distribution.scope():
210
+ llama_model = keras_hub.models.LlamaCausalLM.from_preset()
211
+ ```
212
+
213
+ To see how the layout map was applied, load the model then run (for one decoder block):
214
+ ```
215
+ embedding_layer = llama_model.backbone.get_layer("token_embedding")
216
+ decoder_block_1 = llama_model.backbone.get_layer('transformer_layer_0')
217
+ for variable in embedding_layer.weights + decoder_block_1.weights:
218
+ print(f'{variable.path:<58} {str(variable.shape):<16} {str(variable.value.sharding.spec)}')
219
+ ```
220
+
221
+ Args:
222
+ device_mesh: The `keras.distribution.DeviceMesh` instance for
223
+ distribution.
224
+ model_parallel_dim_name: The axis name of the device mesh, where
225
+ the weights should be partition on.
226
+ data_parallel_dim_name: The axis name of the device mesh, where
227
+ the data should be partition on.
228
+ Return:
229
+ `keras.distribution.LayoutMap` that contains the sharding spec
230
+ for all the model weights.
231
+ """
232
+ # The weight path and shape of the Llama backbone is like below
233
+ # token_embedding/embeddings (128256, 2048)
234
+ # repeat block for decoder
235
+ # transformer_layer_0/self_attention/query/kernel (2048, 32, 64)
236
+ # transformer_layer_0/self_attention/key/kernel (2048, 8, 64)
237
+ # transformer_layer_0/self_attention/value/kernel (2048, 8, 64)
238
+ # transformer_layer_0/self_attention/attention_output/kernel (32, 64, 2048)
239
+ # transformer_layer_0/self_attention_layernorm/scale (2048,)
240
+ # transformer_layer_0/feedforward_intermediate_dense/kernel (2048, 8192)
241
+ # transformer_layer_0/feedforward_gate_dense/kernel (2048, 8192)
242
+ # transformer_layer_0/feedforward_output_dense/kernel (8192, 2048)
243
+ # transformer_layer_0/feedforward_layernorm/scale (2048,)
244
+
245
+ if not isinstance(device_mesh, keras.distribution.DeviceMesh):
246
+ raise ValueError(
247
+ "Invalid device_mesh type. Expected `keras.distribution.Device`,"
248
+ f" got {type(device_mesh)}"
249
+ )
250
+ if model_parallel_dim_name not in device_mesh.axis_names:
251
+ raise ValueError(
252
+ f"{model_parallel_dim_name} is not found in the "
253
+ f"device_mesh.axis_names. {device_mesh.axis_name=}"
254
+ )
255
+ if data_parallel_dim_name not in device_mesh.axis_names:
256
+ raise ValueError(
257
+ f"{data_parallel_dim_name} is not found in the "
258
+ f"device_mesh.axis_names. {device_mesh.axis_name=}"
259
+ )
260
+ # Note that it is possible to further config the mesh to be 3D, eg
261
+ # (data, seq, model). We leave it as 2D for now for simplicity.
262
+ data_dim = data_parallel_dim_name
263
+ model_dim = model_parallel_dim_name
264
+ # The sharding config is based on the Gemma team training config.
265
+ # See https://arxiv.org/abs/2403.08295
266
+ layout_map = keras.distribution.LayoutMap(device_mesh)
267
+ layout_map["token_embedding/embeddings"] = (model_dim, data_dim)
268
+ layout_map[
269
+ "transformer_layer.*self_attention.*(query|key|value).kernel"
270
+ ] = (
271
+ model_dim,
272
+ data_dim,
273
+ None,
274
+ )
275
+ layout_map["transformer_layer.*attention_output.kernel"] = (
276
+ model_dim,
277
+ None,
278
+ data_dim,
279
+ )
280
+ layout_map[
281
+ "transformer_layer.*feedforward_intermediate_dense.kernel"
282
+ ] = (
283
+ data_dim,
284
+ model_dim,
285
+ )
286
+ layout_map["transformer_layer.*feedforward_gate_dense.kernel"] = (
287
+ data_dim,
288
+ model_dim,
289
+ )
290
+ layout_map["transformer_layer.*feedforward_output_dense.kernel"] = (
291
+ model_dim,
292
+ data_dim,
293
+ )
294
+
295
+ return layout_map
@@ -42,7 +42,9 @@ class LlamaCausalLM(CausalLM):
42
42
  self.preprocessor = preprocessor
43
43
 
44
44
  # === Functional Model ===
45
- inputs = backbone.inputs
45
+ # This must be "backbone.input" i.e. the full input structure,
46
+ # rather than "backbone.inputs" which is the flattened list of inputs.
47
+ inputs = backbone.input
46
48
  hidden_states = backbone(inputs)
47
49
  outputs = backbone.token_embedding(hidden_states, reverse=True)
48
50
  super().__init__(
@@ -42,7 +42,9 @@ class MistralCausalLM(CausalLM):
42
42
  self.preprocessor = preprocessor
43
43
 
44
44
  # === Functional Model ===
45
- inputs = backbone.inputs
45
+ # This must be "backbone.input" i.e. the full input structure,
46
+ # rather than "backbone.inputs" which is the flattened list of inputs.
47
+ inputs = backbone.input
46
48
  hidden_states = backbone(inputs)
47
49
  outputs = backbone.token_embedding(hidden_states, reverse=True)
48
50
  super().__init__(
@@ -110,7 +110,9 @@ class PaliGemmaCausalLM(CausalLM):
110
110
  self.backbone = backbone
111
111
 
112
112
  # === Functional Model ===
113
- inputs = backbone.inputs
113
+ # This must be "backbone.input" i.e. the full input structure,
114
+ # rather than "backbone.inputs" which is the flattened list of inputs.
115
+ inputs = backbone.input
114
116
  hidden_state = backbone(inputs=inputs)
115
117
  outputs = backbone.token_embedding(hidden_state, reverse=True)
116
118
  outputs = outputs[:, backbone.image_sequence_length :, :]
@@ -41,7 +41,9 @@ class Phi3CausalLM(CausalLM):
41
41
  self.preprocessor = preprocessor
42
42
 
43
43
  # === Functional Model ===
44
- inputs = backbone.inputs
44
+ # This must be "backbone.input" i.e. the full input structure,
45
+ # rather than "backbone.inputs" which is the flattened list of inputs.
46
+ inputs = backbone.input
45
47
  hidden_states = backbone(inputs)
46
48
  outputs = backbone.token_embedding(hidden_states, reverse=True)
47
49
  super().__init__(
@@ -569,6 +569,15 @@ class TestCase(tf.test.TestCase, parameterized.TestCase):
569
569
  ds = tf.data.Dataset.from_tensor_slices(train_data).batch(batch_size)
570
570
  x, y, sw = keras.utils.unpack_x_y_sample_weight(train_data)
571
571
 
572
+ # Test: the tree struct output by the
573
+ # preprocessor must match what model expects.
574
+ preprocessed_data = preprocessor(*train_data)[0]
575
+ tree.assert_same_structure(
576
+ preprocessed_data,
577
+ task._inputs_struct,
578
+ check_types=False,
579
+ )
580
+
572
581
  # Test predict.
573
582
  output = task.predict(x)
574
583
  if expected_output_shape is not None:
@@ -1,7 +1,7 @@
1
1
  from keras_hub.src.api_export import keras_hub_export
2
2
 
3
3
  # Unique source of truth for the version number.
4
- __version__ = "0.16.1.dev202410150342"
4
+ __version__ = "0.16.1.dev202410170342"
5
5
 
6
6
 
7
7
  @keras_hub_export("keras_hub.version")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: keras-hub-nightly
3
- Version: 0.16.1.dev202410150342
3
+ Version: 0.16.1.dev202410170342
4
4
  Summary: Industry-strength Natural Language Processing extensions for Keras.
5
5
  Home-page: https://github.com/keras-team/keras-hub
6
6
  Author: Keras team
@@ -9,7 +9,7 @@ keras_hub/api/tokenizers/__init__.py,sha256=_f-r_cyUM2fjBB7iO84ThOdqqsAxHNIewJ2E
9
9
  keras_hub/api/utils/__init__.py,sha256=Gp1E6gG-RtKQS3PBEQEOz9PQvXkXaJ0ySGMqZ7myN7A,215
10
10
  keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
12
- keras_hub/src/version_utils.py,sha256=rY0OztXmBo2_0LAdO10JpHYw9H8oOtH9eTkj7k4SVno,222
12
+ keras_hub/src/version_utils.py,sha256=JKhHcqjvch67-7KPLpPGS3nhs1bP6bpaXaMsSpxp0p4,222
13
13
  keras_hub/src/bounding_box/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  keras_hub/src/bounding_box/converters.py,sha256=a5po8DBm87oz2EXfi-0uEZHCMlCJPIb4-MaZIdYx3Dg,17865
15
15
  keras_hub/src/bounding_box/formats.py,sha256=YmskOz2BOSat7NaE__J9VfpSNGPJJR0znSzA4lp8MMI,3868
@@ -50,7 +50,7 @@ keras_hub/src/metrics/rouge_l.py,sha256=JlZhMBV6wS_6zMd57pkTc6yxHkEJT9fVQMlPZKek
50
50
  keras_hub/src/metrics/rouge_n.py,sha256=JoFtmgjF4Ic263ny6bfD6vMHKreH9le3HnOOxemupRc,3620
51
51
  keras_hub/src/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  keras_hub/src/models/backbone.py,sha256=2OZx6WAx2q9JK2yue5BoUUipIBjpOJRVNnMjXLVDLRk,11185
53
- keras_hub/src/models/causal_lm.py,sha256=zGUamLuL2HlTgummUhfnA8Uoe4QMsGGLD4uJazxJe-Y,15079
53
+ keras_hub/src/models/causal_lm.py,sha256=0Asl5v86jm4PnqCoQv7u4Sy8NfMoUvrQxnxQSTQLKog,16721
54
54
  keras_hub/src/models/causal_lm_preprocessor.py,sha256=YY7VJZicdmnjDSWi9g4_pEpd5bdJK166GlWcapvokF0,6663
55
55
  keras_hub/src/models/feature_pyramid_backbone.py,sha256=clEW-TTQSVJ_5qFNdDF0iABkin1p_xlBUFjJrC7T0IA,2247
56
56
  keras_hub/src/models/image_classifier.py,sha256=yt6cjhPfqs8A_eWXBsXdXFzn-aRgH2rVHUq7Zu7CyK8,7804
@@ -122,7 +122,7 @@ keras_hub/src/models/deeplab_v3/deeplab_v3_backbone.py,sha256=WyFhuLcjFPFVuNL09b
122
122
  keras_hub/src/models/deeplab_v3/deeplab_v3_image_converter.py,sha256=mRkH3HdhpV0fCcQcVXEvIX7SNk-bAMb3SAHzgK-FD5c,371
123
123
  keras_hub/src/models/deeplab_v3/deeplab_v3_image_segmeter_preprocessor.py,sha256=hR9S6lNYamY0EBDBo3e1qTCiwtftmLXrN-UYuzfw5Io,581
124
124
  keras_hub/src/models/deeplab_v3/deeplab_v3_layers.py,sha256=qmEiolOOriLAojXB67xXW9IOo717kaCGeDVZJLaGY98,7834
125
- keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py,sha256=tfTOz0H_XX1o-0oM7O3j7OyKxPDIesrV8FMO4IfbbBk,702
125
+ keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py,sha256=lbkP16g-w2-4RKSnISwW-RfvI6qqbE8yZzjRwgiXUIU,703
126
126
  keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py,sha256=tiMDcCFANHMUx3IVtW3r1P_JTazgPPsbW4IktIytKEU,3650
127
127
  keras_hub/src/models/densenet/__init__.py,sha256=r7StyamnWeeZxOk9r4ZYNbS_YVhu9YGPyXhNxljvdPg,269
128
128
  keras_hub/src/models/densenet/densenet_backbone.py,sha256=dN9lUwKzO3E2HthNV2x54ozeBEQ0ilNs5uYHshFQpT0,6723
@@ -164,7 +164,7 @@ keras_hub/src/models/falcon/falcon_tokenizer.py,sha256=2B5vmpakj_iVB7T95_8OVreJb
164
164
  keras_hub/src/models/falcon/falcon_transformer_decoder.py,sha256=QqIK6v97uBXZFBG3qS6O8HrP9_93uOFzvHQgOiMO2eY,8125
165
165
  keras_hub/src/models/gemma/__init__.py,sha256=rVzOJMJ39bgVlT8UdC0t8PlN2c237GKTBmfHIsbPuOQ,251
166
166
  keras_hub/src/models/gemma/gemma_attention.py,sha256=1CVN5z9GKoU8TuNMih2_MweDkpd98xSqdic9F8xIBE8,8317
167
- keras_hub/src/models/gemma/gemma_backbone.py,sha256=EttzmZHWXWl95__09reVFJxT__MtUSUtJAw15-Dao28,12914
167
+ keras_hub/src/models/gemma/gemma_backbone.py,sha256=P5srrrqIrFIBF84KCKKl9vKyYiq0CxjhdcVk76PKVTQ,13377
168
168
  keras_hub/src/models/gemma/gemma_causal_lm.py,sha256=BNBoQIf0HoqCooalYsWE-28v5BGUNvL9YdUB8_NSkBU,16770
169
169
  keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py,sha256=bpKkEurWIfa6Kp9s4pz84-sBDSA6ZFNHP8nXG1fFQrg,2912
170
170
  keras_hub/src/models/gemma/gemma_decoder_block.py,sha256=tpBfH86Q48EvV0COkd1g2FJg9zHp7ktZBjegs3ehOYo,7588
@@ -187,8 +187,8 @@ keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py,sha256=xSLDgavOhhm3SZc18VN60
187
187
  keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py,sha256=aKso-8yGrynn3tZ5xm2egcXIBQo3__sWZDBtjmS3ZgU,1991
188
188
  keras_hub/src/models/llama/__init__.py,sha256=svVZjGi71R3lVbq0AdbqlXj909mr3Rp9EPXdiO0w0G0,251
189
189
  keras_hub/src/models/llama/llama_attention.py,sha256=HzTWtvTjfN_j0vA9-ComstHpI81tzUrJU3RSSvSCaI4,7194
190
- keras_hub/src/models/llama/llama_backbone.py,sha256=6tkTvAwhFZjnHFIzQbUYlgByMt2qQE2F3sfBluVhON0,6703
191
- keras_hub/src/models/llama/llama_causal_lm.py,sha256=JyTiCt1mxvf6QNxhjCjAW-aopTL4teS1EHTb_K-RGrs,13109
190
+ keras_hub/src/models/llama/llama_backbone.py,sha256=ElMjhfyTwXcChQPcrKo3bZozeRhzGyCXqOWA_siQFj8,11687
191
+ keras_hub/src/models/llama/llama_causal_lm.py,sha256=9bP4-XDCMgsZuH1ILIMzmwq2Fyy6vkk1Vsht-lMGCNo,13258
192
192
  keras_hub/src/models/llama/llama_causal_lm_preprocessor.py,sha256=VTboOMiRBoxHrwP343upLUTsv3AG65r2H8h_PNPVphE,3047
193
193
  keras_hub/src/models/llama/llama_decoder.py,sha256=6iERIblED0ZB5w_EUlHks4UvMnsrWONdO_Xdz2OzhWM,8623
194
194
  keras_hub/src/models/llama/llama_layernorm.py,sha256=LfRbePHUJs00Ptf7dvNaw3Aj9n1xBMBpE_rS5zzsYMo,1050
@@ -203,7 +203,7 @@ keras_hub/src/models/llama3/llama3_tokenizer.py,sha256=J-KxRc08vGs4olFw_4mtJs0W_
203
203
  keras_hub/src/models/mistral/__init__.py,sha256=vjBlzcrIsFSwJKnfwfTNMKstIEKGFTE3kVcdAdfwlnE,263
204
204
  keras_hub/src/models/mistral/mistral_attention.py,sha256=HCkUIc2DVIlYC5hhwomENlqLOsKTvbCKF0lx0_OBAyA,7862
205
205
  keras_hub/src/models/mistral/mistral_backbone.py,sha256=x4BfyfWTCUXcjPSxdPSl8QITXgzUg1oJlAQt2acZfv4,7245
206
- keras_hub/src/models/mistral/mistral_causal_lm.py,sha256=OQ3IbdkVlNIXsByZ5ClJoCs0PA86AbMeG6UESbnlfE8,13085
206
+ keras_hub/src/models/mistral/mistral_causal_lm.py,sha256=gEGUnB6yOib9G71n5Em5X8TPOllJW53UXlUCNJkL_ZU,13234
207
207
  keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py,sha256=_4qq-uKktfIg_i081ZWjZGEIYZpedBwtBGpchQQ-qEk,3079
208
208
  keras_hub/src/models/mistral/mistral_layer_norm.py,sha256=nimMZ5CTPK8v9eflfrGuzqmv-2vd2rGlPvcHOMwYZyg,1063
209
209
  keras_hub/src/models/mistral/mistral_presets.py,sha256=gucgdaFAiU-vRDS1g9zWGHjbDF_jaCiljPibCF4yVqY,1329
@@ -227,7 +227,7 @@ keras_hub/src/models/opt/opt_presets.py,sha256=J1IJ5VRcZZ6UZJSLrxpbWXw39YmbRd_WQ
227
227
  keras_hub/src/models/opt/opt_tokenizer.py,sha256=oDHeed4xf07tm14hj_C78BkzMuuRwRP2cRHmqYnObrs,2557
228
228
  keras_hub/src/models/pali_gemma/__init__.py,sha256=uODWTlttOOchcTLpiYHCEWMXnDxIz8ZVIeYFQN2bd8o,288
229
229
  keras_hub/src/models/pali_gemma/pali_gemma_backbone.py,sha256=srZyBsA5tulO_Fb03g9FE-vaw2j9ftfxnAy4P8cYB6o,10916
230
- keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py,sha256=qlcBnFtPgKIRtdHgA4rrhiktBJq4h_uV-HriuuRBVwc,11196
230
+ keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py,sha256=AViEs6YltUqWnIVo7J02JkXcanBgLSdwZwF56TVr8gc,11345
231
231
  keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py,sha256=F57y0fZ0wYYxfGIjfrJc1W9uQpViYFx5bvFjj5CqUbI,4814
232
232
  keras_hub/src/models/pali_gemma/pali_gemma_decoder_block.py,sha256=Q_sPAULiSo_ZJeXklZjCLhvOMXk8MrPZhEXtL5yNOiI,5175
233
233
  keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py,sha256=5yM_jUtrFsWIieiwfFBoP7mtPmQAwywkeLKbd7fhmzk,371
@@ -237,7 +237,7 @@ keras_hub/src/models/pali_gemma/pali_gemma_vit.py,sha256=wP1UtW0WnlRmga-JQRxWTfA
237
237
  keras_hub/src/models/phi3/__init__.py,sha256=zIbf1MU-ks91mEkjTRJAsk51N3BBnXDF2JM1vO-13PQ,245
238
238
  keras_hub/src/models/phi3/phi3_attention.py,sha256=dN8QwwTP9TxPBDv0MCvObLF3nHm1H6xbYr3T1K0nmg8,9243
239
239
  keras_hub/src/models/phi3/phi3_backbone.py,sha256=fY-OY2ZrqxDHglYjTM0OCacBdEQHwj-XNmU0MnXL7iU,8885
240
- keras_hub/src/models/phi3/phi3_causal_lm.py,sha256=a1TVyDgEv3Sd66Cf7xfa28dESGrUX1bM7pHAw1QfTaw,8240
240
+ keras_hub/src/models/phi3/phi3_causal_lm.py,sha256=kMMq7fQ8hlb_mLO_nU1lGVqILayulVvzzZgl2EvY9_k,8389
241
241
  keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py,sha256=gNx1k-n7d0XDwpNbcZiO9yLkwdXYCvwGyA3b0QCnPAE,3043
242
242
  keras_hub/src/models/phi3/phi3_decoder.py,sha256=1raVexz1TkpqvMwW1Zs08KSxTs9gDc6VWUKJ9sM1VFY,9587
243
243
  keras_hub/src/models/phi3/phi3_layernorm.py,sha256=Oqu81tGd97Lzx3kG1QEtZ0S6gbfn3GLgRzY8UWGJRBo,1049
@@ -333,7 +333,7 @@ keras_hub/src/samplers/serialization.py,sha256=K6FC4AY1sfOLLIk2k4G783XWnQ_Rk3z1Q
333
333
  keras_hub/src/samplers/top_k_sampler.py,sha256=WSyrhmOCan55X2JYAnNWE88rkx66sXqdoerl87nOrDQ,2250
334
334
  keras_hub/src/samplers/top_p_sampler.py,sha256=9r29WdqBlrW_2TBma6QqkRps2Uit4a6iZPmq1Gsiuko,3400
335
335
  keras_hub/src/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
336
- keras_hub/src/tests/test_case.py,sha256=9-yV87k508ciVOJUBiypd8HmnDTHVtWU2m_RbOHMv5Q,26005
336
+ keras_hub/src/tests/test_case.py,sha256=KMFdQoTqAGotj8Pt8AxXjTJ_f0qwavIGUh-iqN1nQvA,26304
337
337
  keras_hub/src/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
338
338
  keras_hub/src/tokenizers/byte_pair_tokenizer.py,sha256=fGFp3WgPNYGTztpSGMl0kKFjn1bCeZB71lSJfT1eqEE,24052
339
339
  keras_hub/src/tokenizers/byte_tokenizer.py,sha256=vjgrTT8FdtZVAlr0mU13alzADcUhtMrzgOs4lYeHvAQ,10648
@@ -368,7 +368,7 @@ keras_hub/src/utils/transformers/convert_mistral.py,sha256=kVhN9h1ZFVhwkNW8p3wnS
368
368
  keras_hub/src/utils/transformers/convert_pali_gemma.py,sha256=B1leeDw96Yvu81hYumf66hIid07k5NLqoeWAJgPnaLs,10649
369
369
  keras_hub/src/utils/transformers/preset_loader.py,sha256=GS44hZUuGQCtzsyn8z44ZpHdftd3DFemwV2hx2bQa-U,2738
370
370
  keras_hub/src/utils/transformers/safetensor_utils.py,sha256=rPK-Uw1CG0DX0d_UAD-r2cG9fw8GI8bvAlrcXfQ9g4c,3323
371
- keras_hub_nightly-0.16.1.dev202410150342.dist-info/METADATA,sha256=Tj8fIeiKR1xN6oFPr7bWgB_jGSpMm8ZiyE5baY9IC6U,7458
372
- keras_hub_nightly-0.16.1.dev202410150342.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
373
- keras_hub_nightly-0.16.1.dev202410150342.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
374
- keras_hub_nightly-0.16.1.dev202410150342.dist-info/RECORD,,
371
+ keras_hub_nightly-0.16.1.dev202410170342.dist-info/METADATA,sha256=SEFjDxUxSNwbNsZtCnanXay7BQFJrNDM1zgriF55ATQ,7458
372
+ keras_hub_nightly-0.16.1.dev202410170342.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
373
+ keras_hub_nightly-0.16.1.dev202410170342.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
374
+ keras_hub_nightly-0.16.1.dev202410170342.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5