PyPI - keras-hub-nightly - Versions diffs - 0.16.1.dev202410070341__py3-none-any.whl → 0.16.1.dev202410080341__py3-none-any.whl - Mend

keras-hub-nightly 0.16.1.dev202410070341py3-none-any.whl → 0.16.1.dev202410080341py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

keras_hub/src/models/causal_lm.py CHANGED Viewed

@@ -326,6 +326,10 @@ class CausalLM(Task):
             )
         elif stop_token_ids == "auto":
             stop_token_ids = [self.preprocessor.tokenizer.end_token_id]
+            # Some models like Llama3 use two end tokens: <|eot_id|> in
+            # "instruct" versions and <|end_of_text|> in others.
+            if hasattr(self.preprocessor.tokenizer, "end_token2_id"):
+                stop_token_ids.append(self.preprocessor.tokenizer.end_token2_id)
         def preprocess(x):
             return self.preprocessor.generate_preprocess(

keras_hub/src/models/llama3/llama3_tokenizer.py CHANGED Viewed

@@ -16,10 +16,33 @@ class Llama3Tokenizer(BytePairTokenizer):
         self,
         vocabulary=None,
         merges=None,
+        bos_token="<|begin_of_text|>",
+        eos_token="<|end_of_text|>",
+        misc_special_tokens={"<|start_header_id|>", "<|end_header_id|>"},
         **kwargs,
     ):
-        self._add_special_token("<|begin_of_text|>", "start_token")
-        self._add_special_token("<|end_of_text|>", "end_token")
+        # Note: all special tokens must also appear in "vocabulary"
+        self._add_special_token(bos_token, "start_token")
+        misc_special_tokens -= {bos_token}
+        self._add_special_token(eos_token, "end_token")
+        misc_special_tokens -= {eos_token}
+        for i, token in enumerate(misc_special_tokens):
+            self._add_special_token(token, f"special_token_{i:03d}")
+        # Hack:
+        # Llama models use the <|end_of_text|> or the <|eot_id|> as the stop
+        # token. This info can be read from config when loading a Hugging Face
+        # checkpoint but no such config exists for Keras checkpoints.
+        # Setting both probable end tokens when no config is availble will
+        # make text generation work in all cases as it will stop
+        # on both end tokens. However, the packer will always use
+        # "<|end_of_text|>" , which will be the wrong eos_token for "instruct"
+        # variants of Llama3.
+        # TODO: load this correctly from a Keras tokenizer config.
+        if eos_token == "<|end_of_text|>":
+            self._add_special_token("<|eot_id|>", "end_token2")
         self.pad_token_id = 0
         super().__init__(
             vocabulary=vocabulary,

keras_hub/src/tokenizers/byte_pair_tokenizer.py CHANGED Viewed

@@ -43,7 +43,11 @@ SPLIT_PATTERN_1 = (
 SPLIT_PATTERN_1 = SPLIT_PATTERN_1.replace(
     "{special_spaces}", SPECIAL_WHITESPACES
 )
-SPLIT_PATTERN_2 = rf"""[\s६{SPECIAL_WHITESPACES}]$"""
+# The pattern " \t\r\f\v" is the same as \s "all spaces" but without the \n.
+# Multiple \n\n\n in sequence must not be split for Llama3.
+# SPLIT_PATTERN_2 = rf"""[\s६{SPECIAL_WHITESPACES}]$"""
+SPLIT_PATTERN_2 = rf"""[ \t\r\f\v६{SPECIAL_WHITESPACES}]$"""
 def create_alts_for_unsplittable_tokens(unsplittable_tokens):

keras_hub/src/utils/transformers/convert_llama3.py CHANGED Viewed

@@ -107,10 +107,26 @@ def convert_tokenizer(cls, preset, **kwargs):
     vocab = tokenizer_config["model"]["vocab"]
     merges = tokenizer_config["model"]["merges"]
-    bot = tokenizer_config["added_tokens"][0]  # begin of text
-    eot = tokenizer_config["added_tokens"][1]  # end of text
-    vocab[bot["content"]] = bot["id"]
-    vocab[eot["content"]] = eot["id"]
+    # Load all special tokens with the exception of "reserved" ones.
+    special_tokens = set()
+    for token in tokenizer_config["added_tokens"]:
+        if not token["content"].startswith("<|reserved_special_token_"):
+            vocab[token["content"]] = token["id"]
+            special_tokens.add(token["content"])
+    # Load text start and stop tokens from the config.
+    # Llama3 uses the <|end_of_text|> end token for regular models
+    # but uses <|eot_id|> for instruction-tuned  variants.
+    tokenizer_config2 = load_json(preset, "tokenizer_config.json")
+    bos_token = tokenizer_config2["bos_token"]
+    eos_token = tokenizer_config2["eos_token"]
+    kwargs.update(
+        {
+            "bos_token": bos_token,
+            "eos_token": eos_token,
+            "misc_special_tokens": special_tokens,
+        }
+    )
     return cls(vocabulary=vocab, merges=merges, **kwargs)

keras_hub/src/version_utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from keras_hub.src.api_export import keras_hub_export
 # Unique source of truth for the version number.
-__version__ = "0.16.1.dev202410070341"
+__version__ = "0.16.1.dev202410080341"
 @keras_hub_export("keras_hub.version")

{keras_hub_nightly-0.16.1.dev202410070341.dist-info → keras_hub_nightly-0.16.1.dev202410080341.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: keras-hub-nightly
-Version: 0.16.1.dev202410070341
+Version: 0.16.1.dev202410080341
 Summary: Industry-strength Natural Language Processing extensions for Keras.
 Home-page: https://github.com/keras-team/keras-hub
 Author: Keras team

{keras_hub_nightly-0.16.1.dev202410070341.dist-info → keras_hub_nightly-0.16.1.dev202410080341.dist-info}/RECORD RENAMED Viewed

@@ -9,7 +9,7 @@ keras_hub/api/tokenizers/__init__.py,sha256=_f-r_cyUM2fjBB7iO84ThOdqqsAxHNIewJ2E
 keras_hub/api/utils/__init__.py,sha256=Gp1E6gG-RtKQS3PBEQEOz9PQvXkXaJ0ySGMqZ7myN7A,215
 keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
-keras_hub/src/version_utils.py,sha256=Tt3QcaichNaTMoNu_eci34g4G9ytWSUqQbx-P4xXpyA,222
+keras_hub/src/version_utils.py,sha256=ZcW3wGP8G9ckkrN4UDSpLre640ME6s_nJGCdK-nY_JI,222
 keras_hub/src/bounding_box/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/bounding_box/converters.py,sha256=a5po8DBm87oz2EXfi-0uEZHCMlCJPIb4-MaZIdYx3Dg,17865
 keras_hub/src/bounding_box/formats.py,sha256=YmskOz2BOSat7NaE__J9VfpSNGPJJR0znSzA4lp8MMI,3868
@@ -50,7 +50,7 @@ keras_hub/src/metrics/rouge_l.py,sha256=JlZhMBV6wS_6zMd57pkTc6yxHkEJT9fVQMlPZKek
 keras_hub/src/metrics/rouge_n.py,sha256=JoFtmgjF4Ic263ny6bfD6vMHKreH9le3HnOOxemupRc,3620
 keras_hub/src/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/models/backbone.py,sha256=2OZx6WAx2q9JK2yue5BoUUipIBjpOJRVNnMjXLVDLRk,11185
-keras_hub/src/models/causal_lm.py,sha256=p3C5R6hbe1BARHNXJZqtgwlp3bDqkv3gguO19PeJC2c,14791
+keras_hub/src/models/causal_lm.py,sha256=zGUamLuL2HlTgummUhfnA8Uoe4QMsGGLD4uJazxJe-Y,15079
 keras_hub/src/models/causal_lm_preprocessor.py,sha256=YY7VJZicdmnjDSWi9g4_pEpd5bdJK166GlWcapvokF0,6663
 keras_hub/src/models/feature_pyramid_backbone.py,sha256=clEW-TTQSVJ_5qFNdDF0iABkin1p_xlBUFjJrC7T0IA,2247
 keras_hub/src/models/image_classifier.py,sha256=yt6cjhPfqs8A_eWXBsXdXFzn-aRgH2rVHUq7Zu7CyK8,7804
@@ -197,7 +197,7 @@ keras_hub/src/models/llama3/llama3_backbone.py,sha256=nR5y51oI2QraL4Q9IxmQZrr0yS
 keras_hub/src/models/llama3/llama3_causal_lm.py,sha256=0Kcr0sB78wSNDpeo4AE-PeefJe1DxEIdGRNMzdjk3WM,1541
 keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py,sha256=twbXel9hsQgGxDAoQhEQuVm2udnEybI4fAQTJzXAuBs,3064
 keras_hub/src/models/llama3/llama3_presets.py,sha256=n-GIQg6tVf9JY9djBqsFZvWAAuDqXHORrRxFg-xcDFw,2003
-keras_hub/src/models/llama3/llama3_tokenizer.py,sha256=BcNHfsT19LUC0PkEEyN22C9zxPNVboQSK9EGMfhtpnk,789
+keras_hub/src/models/llama3/llama3_tokenizer.py,sha256=J-KxRc08vGs4olFw_4mtJs0W_dTeUyj_XxMycazBmxI,1934
 keras_hub/src/models/mistral/__init__.py,sha256=vjBlzcrIsFSwJKnfwfTNMKstIEKGFTE3kVcdAdfwlnE,263
 keras_hub/src/models/mistral/mistral_attention.py,sha256=HCkUIc2DVIlYC5hhwomENlqLOsKTvbCKF0lx0_OBAyA,7862
 keras_hub/src/models/mistral/mistral_backbone.py,sha256=x4BfyfWTCUXcjPSxdPSl8QITXgzUg1oJlAQt2acZfv4,7245
@@ -327,7 +327,7 @@ keras_hub/src/samplers/top_p_sampler.py,sha256=9r29WdqBlrW_2TBma6QqkRps2Uit4a6iZ
 keras_hub/src/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/tests/test_case.py,sha256=pgjT5CkkkX4BTNfaDD6i-YChO6Ig3But66Ls4RxEymw,25937
 keras_hub/src/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-keras_hub/src/tokenizers/byte_pair_tokenizer.py,sha256=Wocarha6ZuzrfiWHPiQUPLLRLrDITyc0hQzjRupw4xA,23849
+keras_hub/src/tokenizers/byte_pair_tokenizer.py,sha256=fGFp3WgPNYGTztpSGMl0kKFjn1bCeZB71lSJfT1eqEE,24052
 keras_hub/src/tokenizers/byte_tokenizer.py,sha256=vjgrTT8FdtZVAlr0mU13alzADcUhtMrzgOs4lYeHvAQ,10648
 keras_hub/src/tokenizers/sentence_piece_tokenizer.py,sha256=_PaVn4re3AwBkHylJWsvdvOCCYjOnFXLZmj-V34KehU,9562
 keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py,sha256=8X_IN-hPDiUETGrSX3wPzFnip73xTYcN6FhLNIwfy-Y,4834
@@ -354,12 +354,12 @@ keras_hub/src/utils/transformers/convert_bert.py,sha256=4gQqXCJzC9QWdLPDUAq741K8
 keras_hub/src/utils/transformers/convert_distilbert.py,sha256=SlfIRhSRk5c1ir2HGiDPiXa5XdOId_DbcnZO9lbwyZ8,6498
 keras_hub/src/utils/transformers/convert_gemma.py,sha256=ElCgwBpSN5Q7rV5PJawTsoytPzs5ZjuwoY60YAe8y_A,6533
 keras_hub/src/utils/transformers/convert_gpt2.py,sha256=HCeHN_-GiQJRxLCM9OCJJ1watPVpIBF8ujS8pGbBOWc,5703
-keras_hub/src/utils/transformers/convert_llama3.py,sha256=QqsGS2rkQ5EBJUzhq06tJNU07BI7k7wAlUNzUgFEYhs,4620
+keras_hub/src/utils/transformers/convert_llama3.py,sha256=zlg0yFscjytyOFymDwqnbuXkmYvb88qqYzAROKcpaPU,5250
 keras_hub/src/utils/transformers/convert_mistral.py,sha256=kVhN9h1ZFVhwkNW8p3wnS7eANJUXIsNy1RxWXy20Gqw,4760
 keras_hub/src/utils/transformers/convert_pali_gemma.py,sha256=B1leeDw96Yvu81hYumf66hIid07k5NLqoeWAJgPnaLs,10649
 keras_hub/src/utils/transformers/preset_loader.py,sha256=GS44hZUuGQCtzsyn8z44ZpHdftd3DFemwV2hx2bQa-U,2738
 keras_hub/src/utils/transformers/safetensor_utils.py,sha256=rPK-Uw1CG0DX0d_UAD-r2cG9fw8GI8bvAlrcXfQ9g4c,3323
-keras_hub_nightly-0.16.1.dev202410070341.dist-info/METADATA,sha256=-gYUt9I22A6R7D8Tc4jXF5h5BLh-YBIzLj8WH3tzc8w,7458
-keras_hub_nightly-0.16.1.dev202410070341.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-keras_hub_nightly-0.16.1.dev202410070341.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
-keras_hub_nightly-0.16.1.dev202410070341.dist-info/RECORD,,
+keras_hub_nightly-0.16.1.dev202410080341.dist-info/METADATA,sha256=SrlKiCjbDmXdTPsxSP6_NNTb-RKCwlNldhrxmphg_5Y,7458
+keras_hub_nightly-0.16.1.dev202410080341.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+keras_hub_nightly-0.16.1.dev202410080341.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
+keras_hub_nightly-0.16.1.dev202410080341.dist-info/RECORD,,

{keras_hub_nightly-0.16.1.dev202410070341.dist-info → keras_hub_nightly-0.16.1.dev202410080341.dist-info}/WHEEL RENAMED Viewed

File without changes

{keras_hub_nightly-0.16.1.dev202410070341.dist-info → keras_hub_nightly-0.16.1.dev202410080341.dist-info}/top_level.txt RENAMED Viewed

File without changes

keras-hub-nightly 0.16.1.dev202410070341__py3-none-any.whl → 0.16.1.dev202410080341__py3-none-any.whl

keras-hub-nightly 0.16.1.dev202410070341py3-none-any.whl → 0.16.1.dev202410080341py3-none-any.whl