keras-hub-nightly 0.22.0.dev202506020416__py3-none-any.whl → 0.22.0.dev202506050411__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,130 @@ from keras_hub.src.utils.tensor_utils import any_equal
17
17
  ]
18
18
  )
19
19
  class QwenCausalLM(CausalLM):
20
+ """An end-to-end Qwen model for causal language modeling.
21
+
22
+ A causal language model (LM) predicts the next token based on previous
23
+ tokens. This task setup can be used to train the model unsupervised on plain
24
+ text input, or to autoregressively generate plain text similar to the data
25
+ used for training. This task can be used for pre-training or fine-tuning a
26
+ Qwen model, simply by calling `fit()`.
27
+
28
+ This model has a `generate()` method, which generates text based on a
29
+ prompt. The generation strategy used is controlled by an additional
30
+ `sampler` argument on `compile()`. You can recompile the model with
31
+ different `keras_hub.samplers` objects to control the generation.
32
+ By default, `"greedy"` sampling will be used.
33
+
34
+ This model can optionally be configured with a `preprocessor` layer, in
35
+ which case it will automatically apply preprocessing to string inputs during
36
+ `fit()`, `predict()`, `evaluate()`, and `generate()`. This is done by
37
+ default when creating the model with `from_preset()`.
38
+
39
+ Args:
40
+ backbone: A `keras_hub.models.QwenBackbone` instance.
41
+ preprocessor: A `keras_hub.models.QwenCausalLMPreprocessor` or
42
+ `None`. If `None`, this model will not apply preprocessing, and
43
+ inputs should be preprocessed before calling the model.
44
+
45
+ Examples:
46
+
47
+ Use `generate()` to do text generation.
48
+ ```python
49
+ qwen_lm = keras_hub.models.QwenCausalLM.from_preset("qwen2.5_0.5b_en")
50
+ qwen_lm.generate("I want to say", max_length=30)
51
+
52
+ # Generate with batched prompts.
53
+ qwen_lm.generate(["This is a", "Where are you"], max_length=30)
54
+ ```
55
+
56
+ Compile the `generate()` function with a custom sampler.
57
+ ```python
58
+ qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset("qwen2.5_0.5b_en")
59
+ qwen_lm.compile(sampler="top_k")
60
+ qwen_lm.generate("I want to say", max_length=30)
61
+
62
+ qwen_lm.compile(sampler=keras_hub.samplers.BeamSampler(num_beams=2))
63
+ qwen_lm.generate("I want to say", max_length=30)
64
+ ```
65
+
66
+ Use `generate()` without preprocessing.
67
+ ```python
68
+ prompt = {
69
+ # Token ids for "<bos> Qwen is".
70
+ "token_ids": np.array([[2, 12345, 678, 0, 0, 0, 0]] * 2),
71
+ # Use `"padding_mask"` to indicate values that should not be overridden.
72
+ "padding_mask": np.array([[1, 1, 1, 0, 0, 0, 0]] * 2),
73
+ }
74
+
75
+ qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset(
76
+ "qwen2.5_0.5b_en",
77
+ preprocessor=None,
78
+ )
79
+ qwen_lm.generate(prompt)
80
+ ```
81
+
82
+ Call `fit()` on a single batch.
83
+ ```python
84
+ features = ["The quick brown fox jumped.", "I forgot my homework."]
85
+ qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset("qwen2.5_0.5b_en")
86
+ qwen_lm.fit(x=features, batch_size=2)
87
+ ```
88
+
89
+ Call `fit()` with LoRA fine-tuning enabled.
90
+ ```python
91
+ features = ["The quick brown fox jumped.", "I forgot my homework."]
92
+ qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset("qwen2.5_0.5b_en")
93
+ qwen_lm.backbone.enable_lora(rank=4)
94
+ qwen_lm.fit(x=features, batch_size=2)
95
+ ```
96
+
97
+ Call `fit()` without preprocessing.
98
+ ```python
99
+ x = {
100
+ # Token ids for "<bos> Qwen is a language model<eos>"
101
+ "token_ids": np.array([[2, 12345, 678, 543, 9876, 1, 0, 0]] * 2),
102
+ "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 0, 0]] * 2),
103
+ }
104
+ y = np.array([[12345, 678, 543, 9876, 1, 0, 0, 0]] * 2)
105
+ sw = np.array([[1, 1, 1, 1, 1, 0, 0, 0]] * 2)
106
+
107
+ qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset(
108
+ "qwen2.5_0.5b_en",
109
+ preprocessor=None,
110
+ )
111
+ qwen_lm.fit(x=x, y=y, sample_weight=sw, batch_size=2)
112
+ ```
113
+
114
+ Custom backbone and vocabulary.
115
+ ```python
116
+ tokenizer = keras_hub.models.QwenMoeTokenizer(
117
+ proto="qwen_moe_vocab.spm",
118
+ )
119
+ preprocessor = keras_hub.models.QwenMoeCausalLMPreprocessor(
120
+ tokenizer=tokenizer,
121
+ sequence_length=128,
122
+ )
123
+ backbone = keras_hub.models.QwenMoeBackbone(
124
+ vocabulary_size=151936,
125
+ num_layers=28,
126
+ num_query_heads=16,
127
+ num_key_value_heads=8,
128
+ hidden_dim=2048,
129
+ intermediate_dim=4096,
130
+ moe_intermediate_dim=128,
131
+ shared_expert_intermediate_dim=4096,
132
+ num_experts=60,
133
+ top_k=4,
134
+ max_sequence_length=4096,
135
+ )
136
+ qwen_lm = keras_hub.models.QwenMoeCausalLM(
137
+ backbone=backbone,
138
+ preprocessor=preprocessor,
139
+ )
140
+ qwen_lm.fit(x=features, batch_size=2)
141
+ ```
142
+ """
143
+
20
144
  backbone_cls = QwenBackbone
21
145
  preprocessor_cls = QwenCausalLMPreprocessor
22
146
 
@@ -11,6 +11,72 @@ from keras_hub.src.models.qwen.qwen_tokenizer import QwenTokenizer
11
11
  ]
12
12
  )
13
13
  class QwenCausalLMPreprocessor(CausalLMPreprocessor):
14
+ """Qwen Causal LM preprocessor.
15
+
16
+ This preprocessing layer is meant for use with
17
+ `keras_hub.models.QwenCausalLM`. By default, it will take in batches of
18
+ strings, and return outputs in a `(x, y, sample_weight)` format, where the
19
+ `y` label is the next token id in the `x` sequence.
20
+
21
+ For use with generation, the layer also exposes two methods
22
+ `generate_preprocess()` and `generate_postprocess()`. When this preprocessor
23
+ is attached to a `keras_hub.models.QwenCausalLM` instance, these methods
24
+ will be called implicitly in `generate()`. They can also be called
25
+ standalone (e.g. to precompute preprocessing inputs for generation in a
26
+ separate process).
27
+
28
+ Args:
29
+ tokenizer: A `keras_hub.models.QwenTokenizer` instance.
30
+ sequence_length: The length of the packed inputs.
31
+ add_start_token: If `True`, the preprocessor will prepend the tokenizer
32
+ start token to each input sequence. Default is `True`.
33
+ add_end_token: If `True`, the preprocessor will append the tokenizer
34
+ end token to each input sequence. Default is `False`.
35
+
36
+ Call arguments:
37
+ x: A string, `tf.Tensor` or list of python strings.
38
+ y: Label data. Should always be `None` as the layer generates labels.
39
+ sample_weight: Label weights. Should always be `None` as the layer
40
+ generates label weights.
41
+ sequence_length: Pass to override the configured `sequence_length` of
42
+ the layer.
43
+
44
+ Examples:
45
+ ```python
46
+ # Load the preprocessor from a preset.
47
+ preprocessor = keras_hub.models.QwenCausalLMPreprocessor.from_preset(
48
+ "qwen2.5_0.5b_en"
49
+ )
50
+
51
+ # Tokenize and pack a single sentence.
52
+ sentence = tf.constant("League of legends")
53
+ preprocessor(sentence)
54
+ # Same output.
55
+ preprocessor("League of legends")
56
+
57
+ # Tokenize a batch of sentences.
58
+ sentences = tf.constant(["Taco tuesday", "Fish taco please!"])
59
+ preprocessor(sentences)
60
+ # Same output.
61
+ preprocessor(["Taco tuesday", "Fish taco please!"])
62
+
63
+ # Map a dataset to preprocess a single sentence.
64
+ features = tf.constant(
65
+ [
66
+ "Avatar 2 is amazing!",
67
+ "Well, I am not sure.",
68
+ ]
69
+ )
70
+ labels = tf.constant([1, 0])
71
+ ds = tf.data.Dataset.from_tensor_slices((features, labels))
72
+ ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
73
+
74
+ # Map a dataset to preprocess unlabled sentences.
75
+ ds = tf.data.Dataset.from_tensor_slices(features)
76
+ ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
77
+ ```
78
+ """
79
+
14
80
  backbone_cls = QwenBackbone
15
81
  tokenizer_cls = QwenTokenizer
16
82
 
@@ -4,12 +4,74 @@ from keras_hub.src.models.qwen_moe.qwen_moe_backbone import QwenMoeBackbone
4
4
  from keras_hub.src.models.qwen_moe.qwen_moe_tokenizer import QwenMoeTokenizer
5
5
 
6
6
 
7
- @keras_hub_export(
8
- [
9
- "keras_hub.models.QwenMoeCausalLMPreprocessor",
10
- ]
11
- )
7
+ @keras_hub_export("keras_hub.models.QwenMoeCausalLMPreprocessor")
12
8
  class QwenMoeCausalLMPreprocessor(CausalLMPreprocessor):
9
+ """Qwen-Moe Causal LM preprocessor.
10
+
11
+ This preprocessing layer is meant for use with
12
+ `keras_hub.models.QwenMoeCausalLM`. By default, it will take in batches of
13
+ strings, and return outputs in a `(x, y, sample_weight)` format, where the
14
+ `y` label is the next token id in the `x` sequence.
15
+
16
+ For use with generation, the layer also exposes two methods
17
+ `generate_preprocess()` and `generate_postprocess()`. When this preprocessor
18
+ is attached to a `keras_hub.models.QwenMoeCausalLM` instance, these methods
19
+ will be called implicitly in `generate()`. They can also be called
20
+ standalone (e.g. to precompute preprocessing inputs for generation in a
21
+ separate process).
22
+
23
+ Args:
24
+ tokenizer: A `keras_hub.models.QwenMoeTokenizer` instance.
25
+ sequence_length: The length of the packed inputs.
26
+ add_start_token: If `True`, the preprocessor will prepend the tokenizer
27
+ start token to each input sequence. Default is `True`.
28
+ add_end_token: If `True`, the preprocessor will append the tokenizer
29
+ end token to each input sequence. Default is `False`.
30
+
31
+ Call arguments:
32
+ x: A string, `tf.Tensor` or list of python strings.
33
+ y: Label data. Should always be `None` as the layer generates labels.
34
+ sample_weight: Label weights. Should always be `None` as the layer
35
+ generates label weights.
36
+ sequence_length: Pass to override the configured `sequence_length` of
37
+ the layer.
38
+
39
+ Examples:
40
+ ```python
41
+ # Load the preprocessor from a preset.
42
+ preprocessor = keras_hub.models.QwenMoeCausalLMPreprocessor.from_preset(
43
+ "qwen2.5_0.5b_en"
44
+ )
45
+
46
+ # Tokenize and pack a single sentence.
47
+ sentence = tf.constant("League of legends")
48
+ preprocessor(sentence)
49
+ # Same output.
50
+ preprocessor("League of legends")
51
+
52
+ # Tokenize a batch of sentences.
53
+ sentences = tf.constant(["Taco tuesday", "Fish taco please!"])
54
+ preprocessor(sentences)
55
+ # Same output.
56
+ preprocessor(["Taco tuesday", "Fish taco please!"])
57
+
58
+ # Map a dataset to preprocess a single sentence.
59
+ features = tf.constant(
60
+ [
61
+ "Avatar 2 is amazing!",
62
+ "Well, I am not sure.",
63
+ ]
64
+ )
65
+ labels = tf.constant([1, 0])
66
+ ds = tf.data.Dataset.from_tensor_slices((features, labels))
67
+ ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
68
+
69
+ # Map a dataset to preprocess unlabled sentences.
70
+ ds = tf.data.Dataset.from_tensor_slices(features)
71
+ ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
72
+ ```
73
+ """
74
+
13
75
  backbone_cls = QwenMoeBackbone
14
76
  tokenizer_cls = QwenMoeTokenizer
15
77
 
keras_hub/src/version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from keras_hub.src.api_export import keras_hub_export
2
2
 
3
3
  # Unique source of truth for the version number.
4
- __version__ = "0.22.0.dev202506020416"
4
+ __version__ = "0.22.0.dev202506050411"
5
5
 
6
6
 
7
7
  @keras_hub_export("keras_hub.version")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: keras-hub-nightly
3
- Version: 0.22.0.dev202506020416
3
+ Version: 0.22.0.dev202506050411
4
4
  Summary: Pretrained models for Keras.
5
5
  Author-email: Keras team <keras-users@googlegroups.com>
6
6
  License-Expression: Apache-2.0
@@ -5,7 +5,7 @@ keras_hub/models/__init__.py,sha256=7MhCw7S-uIPcko-R6g5a-Jy1idKe7BwlI836PfekhHc,
5
5
  keras_hub/samplers/__init__.py,sha256=aFQIkiqbZpi8vjrPp2MVII4QUfE-eQjra5fMeHsoy7k,886
6
6
  keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
8
- keras_hub/src/version.py,sha256=KA5qbLmn8SK7MldRkS5yF9nlAlhQyGwT1bc7bwd9c-c,222
8
+ keras_hub/src/version.py,sha256=B9uwPpXhJdy_zwokYEE5czEMPf0rOU7lAUA6ctNnbqg,222
9
9
  keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
@@ -314,8 +314,8 @@ keras_hub/src/models/phi3/phi3_tokenizer.py,sha256=bOPH14wTVVHJHq8mgzXLjsgvKMNhf
314
314
  keras_hub/src/models/qwen/__init__.py,sha256=C2NncZC-NIg29cQwbHUsrLsA6JAFXtco3hwP7VFgy9M,245
315
315
  keras_hub/src/models/qwen/qwen_attention.py,sha256=SrUYESCg27ksuDKZHKJ5Wmnkbr6WZdF7nHv0AHFfWR8,13014
316
316
  keras_hub/src/models/qwen/qwen_backbone.py,sha256=i39_LoKu6hcYWV6KFh2OzUDaXjV7g1WLNGF2-JD_tqI,13015
317
- keras_hub/src/models/qwen/qwen_causal_lm.py,sha256=_f-UHaKHp0ncxknpkpEJiW3jlng3E4CmddjQfz2QzJo,12249
318
- keras_hub/src/models/qwen/qwen_causal_lm_preprocessor.py,sha256=Va-4TLJD3ycEnkS41rF3dVj4_6K0j-gxLTrREFRcyr0,609
317
+ keras_hub/src/models/qwen/qwen_causal_lm.py,sha256=2ugwuJsoxO8WpKX8iuWQPwc7SOAks7YHNvLeaJfLqG4,16722
318
+ keras_hub/src/models/qwen/qwen_causal_lm_preprocessor.py,sha256=Va6LPa0GoLSxPripWOngw6jVNI-nQSFMdaja_BcfWmY,3195
319
319
  keras_hub/src/models/qwen/qwen_decoder.py,sha256=utmAvZlU7_nP-6pjGPDinK4JaMzsQSwOARG0ote-jAg,11771
320
320
  keras_hub/src/models/qwen/qwen_layernorm.py,sha256=DS35r3qd6g5ocL7Nhf_vNzLLMo1aI9VCSmL64dgNOYI,924
321
321
  keras_hub/src/models/qwen/qwen_presets.py,sha256=1FkKV6M3yqJz4EP1xa7bEvfIQ721xXT-_ikjWX0xvww,1992
@@ -330,7 +330,7 @@ keras_hub/src/models/qwen_moe/__init__.py,sha256=5D8GUmVDsJs0J4sVZHcXOLkZf12U96l
330
330
  keras_hub/src/models/qwen_moe/qwen_moe_attention.py,sha256=pE79_iHUm2LGkoWL6zMJw_pNfzIvmyq3yJaiq47W2TY,13242
331
331
  keras_hub/src/models/qwen_moe/qwen_moe_backbone.py,sha256=nrfELvIvRLmrgKrUNXci2CrecmeI6bWzJj7HH-RcWJA,15341
332
332
  keras_hub/src/models/qwen_moe/qwen_moe_causal_lm.py,sha256=MeP60v7GcN_SmH5_ULRpqgmFVgaYAosSecZiSQVlJvU,13256
333
- keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py,sha256=uKaXRrJs02vkVudjdehzJPp0B84tPMkxNHlp166kceE,589
333
+ keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py,sha256=9P6TT7W_fqf4HsXcmlHF-DW_anR-XoDrRN2ZFGA7Ai4,3168
334
334
  keras_hub/src/models/qwen_moe/qwen_moe_decoder.py,sha256=kmUjLpYTbJQ3J_31qWhLOd0Dg2_9cl_JX_zM8ZMH1Qo,23130
335
335
  keras_hub/src/models/qwen_moe/qwen_moe_layernorm.py,sha256=DbkWJo7U0-cwdZwHPeAnFznYwtao6o0fjpoDJ9UWnpc,927
336
336
  keras_hub/src/models/qwen_moe/qwen_moe_presets.py,sha256=LhOA3Ow-z3cNTan4AOrtyCXS58EgfvO_gtqiZt5cUQc,455
@@ -517,7 +517,7 @@ keras_hub/src/utils/transformers/preset_loader.py,sha256=K5FzDAtCuXS9rmZc0Zj7UCw
517
517
  keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
518
518
  keras_hub/tokenizers/__init__.py,sha256=uMjjm0mzUkRb0e4Ac_JK8aJ9cKGUi5UqmzWoWAFJprE,4164
519
519
  keras_hub/utils/__init__.py,sha256=jXPqVGBpJr_PpYmqD8aDG-fRMlxH-ulqCR2SZMn288Y,646
520
- keras_hub_nightly-0.22.0.dev202506020416.dist-info/METADATA,sha256=h5I4Lg2V2DMWjoGGUtHAJBBbuodLp-75oHy8LPo2Ukc,7393
521
- keras_hub_nightly-0.22.0.dev202506020416.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
522
- keras_hub_nightly-0.22.0.dev202506020416.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
523
- keras_hub_nightly-0.22.0.dev202506020416.dist-info/RECORD,,
520
+ keras_hub_nightly-0.22.0.dev202506050411.dist-info/METADATA,sha256=jY17YDoa_qupp1lX2XHMifxxDZQWQWjzI1aIUTufxY4,7393
521
+ keras_hub_nightly-0.22.0.dev202506050411.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
522
+ keras_hub_nightly-0.22.0.dev202506050411.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
523
+ keras_hub_nightly-0.22.0.dev202506050411.dist-info/RECORD,,