PyPI - keras-nlp - Versions diffs - 0.8.2.dev0__tar.gz → 0.9.0.dev0__tar.gz - Mend

keras-nlp 0.8.2.dev0tar.gz → 0.9.0.dev0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (267) hide show

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: keras-nlp
-Version: 0.8.2.dev0
+Version: 0.9.0.dev0
 Summary: Industry-strength Natural Language Processing extensions for Keras.
 Home-page: https://github.com/keras-team/keras-nlp
 Author: Keras team

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/__init__.py RENAMED Viewed

@@ -10,5 +10,6 @@ from keras_nlp import metrics
 from keras_nlp import models
 from keras_nlp import samplers
 from keras_nlp import tokenizers
+from keras_nlp.src.utils.preset_utils import upload_preset
 from keras_nlp.src.version_utils import version
 from keras_nlp.src.version_utils import __version__

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/models/__init__.py RENAMED Viewed

@@ -11,6 +11,7 @@ from keras_nlp.src.models.albert.albert_masked_lm import AlbertMaskedLM
 from keras_nlp.src.models.albert.albert_masked_lm_preprocessor import AlbertMaskedLMPreprocessor
 from keras_nlp.src.models.albert.albert_preprocessor import AlbertPreprocessor
 from keras_nlp.src.models.albert.albert_tokenizer import AlbertTokenizer
+from keras_nlp.src.models.backbone import Backbone
 from keras_nlp.src.models.bart.bart_backbone import BartBackbone
 from keras_nlp.src.models.bart.bart_preprocessor import BartPreprocessor
 from keras_nlp.src.models.bart.bart_seq_2_seq_lm import BartSeq2SeqLM
@@ -22,6 +23,13 @@ from keras_nlp.src.models.bert.bert_masked_lm import BertMaskedLM
 from keras_nlp.src.models.bert.bert_masked_lm_preprocessor import BertMaskedLMPreprocessor
 from keras_nlp.src.models.bert.bert_preprocessor import BertPreprocessor
 from keras_nlp.src.models.bert.bert_tokenizer import BertTokenizer
+from keras_nlp.src.models.bloom.bloom_backbone import BloomBackbone
+from keras_nlp.src.models.bloom.bloom_causal_lm import BloomCausalLM
+from keras_nlp.src.models.bloom.bloom_causal_lm_preprocessor import BloomCausalLMPreprocessor
+from keras_nlp.src.models.bloom.bloom_preprocessor import BloomPreprocessor
+from keras_nlp.src.models.bloom.bloom_tokenizer import BloomTokenizer
+from keras_nlp.src.models.causal_lm import CausalLM
+from keras_nlp.src.models.classifier import Classifier
 from keras_nlp.src.models.deberta_v3.deberta_v3_backbone import DebertaV3Backbone
 from keras_nlp.src.models.deberta_v3.deberta_v3_classifier import DebertaV3Classifier
 from keras_nlp.src.models.deberta_v3.deberta_v3_masked_lm import DebertaV3MaskedLM
@@ -34,12 +42,19 @@ from keras_nlp.src.models.distil_bert.distil_bert_masked_lm import DistilBertMas
 from keras_nlp.src.models.distil_bert.distil_bert_masked_lm_preprocessor import DistilBertMaskedLMPreprocessor
 from keras_nlp.src.models.distil_bert.distil_bert_preprocessor import DistilBertPreprocessor
 from keras_nlp.src.models.distil_bert.distil_bert_tokenizer import DistilBertTokenizer
+from keras_nlp.src.models.electra.electra_backbone import ElectraBackbone
+from keras_nlp.src.models.electra.electra_preprocessor import ElectraPreprocessor
+from keras_nlp.src.models.electra.electra_tokenizer import ElectraTokenizer
 from keras_nlp.src.models.f_net.f_net_backbone import FNetBackbone
 from keras_nlp.src.models.f_net.f_net_classifier import FNetClassifier
 from keras_nlp.src.models.f_net.f_net_masked_lm import FNetMaskedLM
 from keras_nlp.src.models.f_net.f_net_masked_lm_preprocessor import FNetMaskedLMPreprocessor
 from keras_nlp.src.models.f_net.f_net_preprocessor import FNetPreprocessor
 from keras_nlp.src.models.f_net.f_net_tokenizer import FNetTokenizer
+from keras_nlp.src.models.falcon.falcon_backbone import FalconBackbone
+from keras_nlp.src.models.falcon.falcon_causal_lm_preprocessor import FalconCausalLMPreprocessor
+from keras_nlp.src.models.falcon.falcon_preprocessor import FalconPreprocessor
+from keras_nlp.src.models.falcon.falcon_tokenizer import FalconTokenizer
 from keras_nlp.src.models.gemma.gemma_backbone import GemmaBackbone
 from keras_nlp.src.models.gemma.gemma_causal_lm import GemmaCausalLM
 from keras_nlp.src.models.gemma.gemma_causal_lm_preprocessor import GemmaCausalLMPreprocessor
@@ -50,7 +65,12 @@ from keras_nlp.src.models.gpt2.gpt2_causal_lm import GPT2CausalLM
 from keras_nlp.src.models.gpt2.gpt2_causal_lm_preprocessor import GPT2CausalLMPreprocessor
 from keras_nlp.src.models.gpt2.gpt2_preprocessor import GPT2Preprocessor
 from keras_nlp.src.models.gpt2.gpt2_tokenizer import GPT2Tokenizer
+from keras_nlp.src.models.llama.llama_backbone import LlamaBackbone
+from keras_nlp.src.models.llama.llama_causal_lm import LlamaCausalLM
+from keras_nlp.src.models.llama.llama_causal_lm_preprocessor import LlamaCausalLMPreprocessor
+from keras_nlp.src.models.llama.llama_preprocessor import LlamaPreprocessor
 from keras_nlp.src.models.llama.llama_tokenizer import LlamaTokenizer
+from keras_nlp.src.models.masked_lm import MaskedLM
 from keras_nlp.src.models.mistral.mistral_backbone import MistralBackbone
 from keras_nlp.src.models.mistral.mistral_causal_lm import MistralCausalLM
 from keras_nlp.src.models.mistral.mistral_causal_lm_preprocessor import MistralCausalLMPreprocessor
@@ -61,15 +81,19 @@ from keras_nlp.src.models.opt.opt_causal_lm import OPTCausalLM
 from keras_nlp.src.models.opt.opt_causal_lm_preprocessor import OPTCausalLMPreprocessor
 from keras_nlp.src.models.opt.opt_preprocessor import OPTPreprocessor
 from keras_nlp.src.models.opt.opt_tokenizer import OPTTokenizer
+from keras_nlp.src.models.preprocessor import Preprocessor
 from keras_nlp.src.models.roberta.roberta_backbone import RobertaBackbone
 from keras_nlp.src.models.roberta.roberta_classifier import RobertaClassifier
 from keras_nlp.src.models.roberta.roberta_masked_lm import RobertaMaskedLM
 from keras_nlp.src.models.roberta.roberta_masked_lm_preprocessor import RobertaMaskedLMPreprocessor
 from keras_nlp.src.models.roberta.roberta_preprocessor import RobertaPreprocessor
 from keras_nlp.src.models.roberta.roberta_tokenizer import RobertaTokenizer
+from keras_nlp.src.models.seq_2_seq_lm import Seq2SeqLM
+from keras_nlp.src.models.task import Task
 from keras_nlp.src.models.xlm_roberta.xlm_roberta_backbone import XLMRobertaBackbone
 from keras_nlp.src.models.xlm_roberta.xlm_roberta_classifier import XLMRobertaClassifier
 from keras_nlp.src.models.xlm_roberta.xlm_roberta_masked_lm import XLMRobertaMaskedLM
 from keras_nlp.src.models.xlm_roberta.xlm_roberta_masked_lm_preprocessor import XLMRobertaMaskedLMPreprocessor
 from keras_nlp.src.models.xlm_roberta.xlm_roberta_preprocessor import XLMRobertaPreprocessor
 from keras_nlp.src.models.xlm_roberta.xlm_roberta_tokenizer import XLMRobertaTokenizer
+from keras_nlp.src.tokenizers.tokenizer import Tokenizer

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/__init__.py RENAMED Viewed

@@ -26,6 +26,7 @@ from keras_nlp.src import models
 from keras_nlp.src import samplers
 from keras_nlp.src import tokenizers
 from keras_nlp.src import utils
+from keras_nlp.src.utils.preset_utils import upload_preset
 from keras_nlp.src.version_utils import __version__
 from keras_nlp.src.version_utils import version

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/conftest.py RENAMED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 import pytest
 import tensorflow as tf
@@ -83,6 +85,10 @@ def pytest_configure(config):
         "markers",
         "keras_3_only: mark test as a keras 3 only test",
     )
+    config.addinivalue_line(
+        "markers",
+        "kaggle_key_required: mark test needing a kaggle key",
+    )
 def pytest_collection_modifyitems(config, items):
@@ -107,6 +113,16 @@ def pytest_collection_modifyitems(config, items):
         not backend_config.keras_3(),
         reason="tests only run on with multi-backend keras",
     )
+    found_kaggle_key = all(
+        [
+            os.environ.get("KAGGLE_USERNAME", None),
+            os.environ.get("KAGGLE_KEY", None),
+        ]
+    )
+    kaggle_key_required = pytest.mark.skipif(
+        not found_kaggle_key,
+        reason="tests only run with a kaggle api key",
+    )
     for item in items:
         if "large" in item.keywords:
             item.add_marker(skip_large)
@@ -116,6 +132,8 @@ def pytest_collection_modifyitems(config, items):
             item.add_marker(tf_only)
         if "keras_3_only" in item.keywords:
             item.add_marker(keras_3_only)
+        if "kaggle_key_required" in item.keywords:
+            item.add_marker(kaggle_key_required)
 # Disable traceback filtering for quicker debugging of tests failures.

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/alibi_bias.py RENAMED Viewed

@@ -35,12 +35,15 @@ class AlibiBias(keras.layers.Layer):
             each head. The heads' slopes are a geometric sequence that starts at
             `2**(-alibi_bias_max/num_heads)` and uses that same value as its
             ratio. Defaults to 8.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `trainable`, `dtype` etc.
     Call arguments:
         attention_scores: The result of multipying the query and the key of the
             multi-head attention layer of the transformer to add alibi bias to
             it. With shape `(batch_size, num_heads, query_length, key_length)`.
-    Examples:
+    Example:
     ```python
     query_length = 10
     key_length = 10
@@ -94,7 +97,9 @@ class AlibiBias(keras.layers.Layer):
         )
         slopes = ops.expand_dims(slopes, 1)
-        seq_range = ops.expand_dims(ops.arange(1 - key_length, 1), 0)
+        seq_range = ops.expand_dims(
+            ops.arange(1 - key_length, 1, dtype="int32"), 0
+        )
         seq_range = ops.cast(seq_range, dtype=self.compute_dtype)
         alibi_bias = ops.multiply(slopes, seq_range)

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/f_net_encoder.py RENAMED Viewed

@@ -47,10 +47,10 @@ class FNetEncoder(keras.layers.Layer):
         bias_initializer: "string" or `keras.initializers` initializer.
             The bias initializer for the dense layers.
             Defaults to `"zeros"`.
-        name: string. The name of the layer. Defaults to `None`.
-        **kwargs: other keyword arguments.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `trainable`, `dtype` etc.
-    Examples:
+    Example:
     ```python
     # Create a single FNet encoder layer.
@@ -79,10 +79,9 @@ class FNetEncoder(keras.layers.Layer):
         layer_norm_epsilon=1e-5,
         kernel_initializer="glorot_uniform",
         bias_initializer="zeros",
-        name=None,
         **kwargs
     ):
-        super().__init__(name=name, **kwargs)
+        super().__init__(**kwargs)
         self.intermediate_dim = intermediate_dim
         self.dropout = dropout
         self.activation = keras.activations.get(activation)

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/masked_lm_head.py RENAMED Viewed

@@ -59,8 +59,10 @@ class MaskedLMHead(keras.layers.Layer):
         bias_initializer: string or `keras.initializers` initializer.
             The bias initializer for the dense and multiheaded
             attention layers. Defaults to `"zeros"`.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `trainable`, `dtype` etc.
-    Examples:
+    Example:
     ```python
     batch_size = 16

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/position_embedding.py RENAMED Viewed

@@ -33,6 +33,8 @@ class PositionEmbedding(keras.layers.Layer):
         initializer: The initializer to use for the embedding weights. Defaults
             to `"glorot_uniform"`.
         seq_axis: The axis of the input tensor where we add the embeddings.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `trainable`, `dtype` etc.
     Call arguments:
         inputs: The tensor inputs to compute an embedding for, with shape
@@ -43,7 +45,7 @@ class PositionEmbedding(keras.layers.Layer):
             compute the position embedding from. This is useful during cached
             decoding, where each position is predicted separately in a loop.
-    Examples:
+    Example:
     Called directly on input.
     >>> layer = keras_nlp.layers.PositionEmbedding(sequence_length=10)

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/reversible_embedding.py RENAMED Viewed

@@ -52,6 +52,8 @@ class ReversibleEmbedding(keras.layers.Embedding):
         reverse_dtype: The dtype for the reverse projection computation.
             For stability, it is usually best to use full precision even when
             working with half or mixed precision training.
+        **kwargs: other keyword arguments passed to `keras.layers.Embedding`,
+            including `name`, `trainable`, `dtype` etc.
     Call arguments:
         inputs: The tensor inputs to the layer.
@@ -59,7 +61,7 @@ class ReversibleEmbedding(keras.layers.Embedding):
             from `output_dim` to `input_dim`, instead of a normal embedding
             call. Default to `False`.
-    Examples:
+    Example:
     ```python
     batch_size = 16
     vocab_size = 100
@@ -73,7 +75,7 @@ class ReversibleEmbedding(keras.layers.Embedding):
     # Embed tokens to shape `(batch_size, seq_length, hidden_dim)`.
     hidden_states = embedding(token_ids)
     # Project hidden states to shape `(batch_size, seq_length, vocab_size)`.
-    logits = embedding(hidden_state, reverse=True)
+    logits = embedding(hidden_states, reverse=True)
     ```
     References:

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/rotary_embedding.py RENAMED Viewed

@@ -38,6 +38,8 @@ class RotaryEmbedding(keras.layers.Layer):
         scaling_factor: float. The scaling factor used to scale frequency range.
         sequence_axis: int. Sequence axis in the input tensor.
         feature_axis: int. Feature axis in the input tensor.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `trainable`, `dtype` etc.
     Call arguments:
         inputs: The tensor inputs to apply the embedding to. This can have
@@ -85,30 +87,42 @@ class RotaryEmbedding(keras.layers.Layer):
         self.built = True
     def call(self, inputs, start_index=0):
+        inputs = ops.moveaxis(
+            inputs, (self.feature_axis, self.sequence_axis), (-1, 1)
+        )
         cos_emb, sin_emb = self._compute_cos_sin_embedding(inputs, start_index)
-        return self._apply_rotary_pos_emb(inputs, cos_emb, sin_emb)
+        output = self._apply_rotary_pos_emb(inputs, cos_emb, sin_emb)
+        return ops.moveaxis(
+            output, (-1, 1), (self.feature_axis, self.sequence_axis)
+        )
     def _apply_rotary_pos_emb(self, tensor, cos_emb, sin_emb):
-        x1, x2 = ops.split(tensor, 2, axis=self.feature_axis)
-        half_rot_tensor = ops.concatenate((-x2, x1), axis=self.feature_axis)
+        x1, x2 = ops.split(tensor, 2, axis=-1)
+        # Avoid `ops.concatenate` for now, to avoid a obscure bug with XLA
+        # compilation on jax. We should be able to remove this once the
+        # following PR is in all jax releases we care about:
+        # https://github.com/openxla/xla/pull/7875
+        half_rot_tensor = ops.stack((-x2, x1), axis=-2)
+        half_rot_tensor = ops.reshape(half_rot_tensor, ops.shape(tensor))
         return (tensor * cos_emb) + (half_rot_tensor * sin_emb)
     def _compute_cos_sin_embedding(self, inputs, start_index=0):
-        def get_axis(axis):
-            return axis if axis > 0 else len(inputs.shape) + axis
+        start_index = ops.cast(start_index, dtype="float32")
-        feature_axis = get_axis(self.feature_axis)
-        sequence_axis = get_axis(self.sequence_axis)
+        feature_axis = len(inputs.shape) - 1
+        sequence_axis = 1
         rotary_dim = ops.shape(inputs)[feature_axis]
         inverse_freq = self._get_inverse_freq(rotary_dim)
-        seq_len = ops.shape(inputs)[self.sequence_axis]
-        tensor = ops.cast(ops.arange(seq_len), self.compute_dtype) + start_index
+        seq_len = ops.shape(inputs)[sequence_axis]
+        tensor = ops.arange(seq_len, dtype="float32") + start_index
-        tensor = ops.cast(tensor, dtype=inverse_freq.dtype)
         freq = ops.einsum("i,j->ij", tensor, inverse_freq)
-        embedding = ops.concatenate((freq, freq), axis=-1)
+        embedding = ops.stack((freq, freq), axis=-2)
+        embedding = ops.reshape(
+            embedding, (*ops.shape(freq)[:-1], ops.shape(freq)[-1] * 2)
+        )
         # Reshape the embedding to be broadcastable with input shape.
         if feature_axis < sequence_axis:
@@ -117,17 +131,16 @@ class RotaryEmbedding(keras.layers.Layer):
             if axis != sequence_axis and axis != feature_axis:
                 embedding = ops.expand_dims(embedding, axis)
-        return ops.cos(embedding), ops.sin(embedding)
+        cos_emb = ops.cast(ops.cos(embedding), self.compute_dtype)
+        sin_emb = ops.cast(ops.sin(embedding), self.compute_dtype)
+        return cos_emb, sin_emb
     def _get_inverse_freq(self, rotary_dim):
-        freq_range = ops.arange(0, rotary_dim, 2)
-        freq_range = ops.cast(freq_range, self.compute_dtype)
-        freq_range = freq_range / ops.cast(
-            self.scaling_factor, self.compute_dtype
-        )
+        freq_range = ops.arange(0, rotary_dim, 2, dtype="float32")
+        freq_range = freq_range / ops.cast(self.scaling_factor, "float32")
         inverse_freq = 1.0 / (
             self.max_wavelength
-            ** (freq_range / ops.cast(rotary_dim, self.compute_dtype))
+            ** (freq_range / ops.cast(rotary_dim, "float32"))
         )
         return inverse_freq

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/sine_position_encoding.py RENAMED Viewed

@@ -34,6 +34,8 @@ class SinePositionEncoding(keras.layers.Layer):
         max_wavelength: The maximum angular wavelength of the sine/cosine
             curves, as described in Attention is All You Need. Defaults to
             `10000`.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `trainable`, `dtype` etc.
     Call arguments:
         inputs: The tensor inputs to compute an embedding for, with shape
@@ -42,7 +44,7 @@ class SinePositionEncoding(keras.layers.Layer):
             compute the encoding from. This is useful during cached decoding,
             where each position is predicted separately in a loop.
-    Examples:
+    Example:
     ```python
     # create a simple embedding layer with sinusoidal positional encoding
     seq_len = 100

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/token_and_position_embedding.py RENAMED Viewed

@@ -33,6 +33,9 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
         vocabulary_size: The size of the vocabulary.
         sequence_length: The maximum length of input sequence
         embedding_dim: The output dimension of the embedding layer
+        tie_weights: Boolean, whether or not the matrix for embedding and
+            the matrix for the `reverse` projection should share the same
+            weights.
         embeddings_initializer: The initializer to use for the Embedding
             Layers
         mask_zero: Boolean, whether or not the input value 0 is a special
@@ -43,8 +46,10 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
             If mask_zero` is set to True, as a consequence, index 0 cannot be
             used in the vocabulary
             (input_dim should equal size of vocabulary + 1).
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `trainable`, `dtype` etc.
-    Examples:
+    Example:
     ```python
     inputs = np.ones(shape=(1, 50), dtype="int32")
     embedding_layer = keras_nlp.layers.TokenAndPositionEmbedding(

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/transformer_decoder.py RENAMED Viewed

@@ -34,12 +34,9 @@ class TransformerDecoder(keras.layers.Layer):
     paper [Attention is All You Need](https://arxiv.org/abs/1706.03762). Users
     can instantiate multiple instances of this class to stack up a decoder.
-    By default, this layer will apply a causal mask to the decoder attention layer.
-    This layer will correctly compute an attention mask from an implicit
-    Keras padding mask (for example, by passing `mask_zero=True` to a
-    `keras.layers.Embedding` layer). See the Masking and Padding
-    [guide](https://keras.io/guides/understanding_masking_and_padding/)
-    for more details.
+    By default, this layer will apply a causal mask to the decoder attention
+    layer. You can also pass padding or attention masks directly to the layer
+    during call, e.g. with `decoder_padding_mask` or `decoder_attention_mask`.
     This layer can be called with either one or two inputs. The number of inputs
     must be consistent across all calls. The options are as follows:
@@ -72,10 +69,10 @@ class TransformerDecoder(keras.layers.Layer):
             (similar to GPT-2). If set to False, outputs of attention layer and
             intermediate dense layer are normalized (similar to BERT).
             Defaults to `False`.
-        name: string. The name of the layer. Defaults to `None`.
-        **kwargs: other keyword arguments.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `trainable`, `dtype` etc.
-    Examples:
+    Example:
     ```python
     # Create a single transformer decoder layer.
     decoder = keras_nlp.layers.TransformerDecoder(

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/transformer_encoder.py RENAMED Viewed

@@ -58,10 +58,10 @@ class TransformerEncoder(keras.layers.Layer):
             (similar to GPT-2). If set to False, outputs of attention layer and
             intermediate dense layer are normalized (similar to BERT).
             Defaults to `False`.
-        name: string. The name of the layer. Defaults to `None`.
-        **kwargs: other keyword arguments.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `trainable`, `dtype` etc.
-    Examples:
+    Example:
     ```python
     # Create a single transformer encoder layer.

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/__init__.py RENAMED Viewed

@@ -20,6 +20,7 @@ from keras_nlp.src.models.albert.albert_masked_lm_preprocessor import (
 )
 from keras_nlp.src.models.albert.albert_preprocessor import AlbertPreprocessor
 from keras_nlp.src.models.albert.albert_tokenizer import AlbertTokenizer
+from keras_nlp.src.models.backbone import Backbone
 from keras_nlp.src.models.bart.bart_backbone import BartBackbone
 from keras_nlp.src.models.bart.bart_preprocessor import BartPreprocessor
 from keras_nlp.src.models.bart.bart_seq_2_seq_lm import BartSeq2SeqLM
@@ -36,7 +37,14 @@ from keras_nlp.src.models.bert.bert_masked_lm_preprocessor import (
 from keras_nlp.src.models.bert.bert_preprocessor import BertPreprocessor
 from keras_nlp.src.models.bert.bert_tokenizer import BertTokenizer
 from keras_nlp.src.models.bloom.bloom_backbone import BloomBackbone
+from keras_nlp.src.models.bloom.bloom_causal_lm import BloomCausalLM
+from keras_nlp.src.models.bloom.bloom_causal_lm_preprocessor import (
+    BloomCausalLMPreprocessor,
+)
+from keras_nlp.src.models.bloom.bloom_preprocessor import BloomPreprocessor
 from keras_nlp.src.models.bloom.bloom_tokenizer import BloomTokenizer
+from keras_nlp.src.models.causal_lm import CausalLM
+from keras_nlp.src.models.classifier import Classifier
 from keras_nlp.src.models.deberta_v3.deberta_v3_backbone import DebertaV3Backbone
 from keras_nlp.src.models.deberta_v3.deberta_v3_classifier import (
     DebertaV3Classifier,
@@ -66,6 +74,7 @@ from keras_nlp.src.models.distil_bert.distil_bert_tokenizer import (
     DistilBertTokenizer,
 )
 from keras_nlp.src.models.electra.electra_backbone import ElectraBackbone
+from keras_nlp.src.models.electra.electra_preprocessor import ElectraPreprocessor
 from keras_nlp.src.models.electra.electra_tokenizer import ElectraTokenizer
 from keras_nlp.src.models.f_net.f_net_backbone import FNetBackbone
 from keras_nlp.src.models.f_net.f_net_classifier import FNetClassifier
@@ -75,6 +84,8 @@ from keras_nlp.src.models.f_net.f_net_masked_lm_preprocessor import (
 )
 from keras_nlp.src.models.f_net.f_net_preprocessor import FNetPreprocessor
 from keras_nlp.src.models.f_net.f_net_tokenizer import FNetTokenizer
+from keras_nlp.src.models.falcon.falcon_backbone import FalconBackbone
+from keras_nlp.src.models.falcon.falcon_tokenizer import FalconTokenizer
 from keras_nlp.src.models.gemma.gemma_backbone import GemmaBackbone
 from keras_nlp.src.models.gemma.gemma_causal_lm import GemmaCausalLM
 from keras_nlp.src.models.gemma.gemma_causal_lm_preprocessor import (
@@ -99,6 +110,13 @@ from keras_nlp.src.models.gpt_neo_x.gpt_neo_x_preprocessor import (
 )
 from keras_nlp.src.models.gpt_neo_x.gpt_neo_x_tokenizer import GPTNeoXTokenizer
 from keras_nlp.src.models.llama.llama_backbone import LlamaBackbone
+from keras_nlp.src.models.llama.llama_causal_lm import LlamaCausalLM
+from keras_nlp.src.models.llama.llama_causal_lm_preprocessor import (
+    LlamaCausalLMPreprocessor,
+)
+from keras_nlp.src.models.llama.llama_preprocessor import LlamaPreprocessor
+from keras_nlp.src.models.llama.llama_tokenizer import LlamaTokenizer
+from keras_nlp.src.models.masked_lm import MaskedLM
 from keras_nlp.src.models.mistral.mistral_backbone import MistralBackbone
 from keras_nlp.src.models.mistral.mistral_causal_lm import MistralCausalLM
 from keras_nlp.src.models.mistral.mistral_causal_lm_preprocessor import (
@@ -113,6 +131,7 @@ from keras_nlp.src.models.opt.opt_causal_lm_preprocessor import (
 )
 from keras_nlp.src.models.opt.opt_preprocessor import OPTPreprocessor
 from keras_nlp.src.models.opt.opt_tokenizer import OPTTokenizer
+from keras_nlp.src.models.preprocessor import Preprocessor
 from keras_nlp.src.models.roberta.roberta_backbone import RobertaBackbone
 from keras_nlp.src.models.roberta.roberta_classifier import RobertaClassifier
 from keras_nlp.src.models.roberta.roberta_masked_lm import RobertaMaskedLM
@@ -121,8 +140,10 @@ from keras_nlp.src.models.roberta.roberta_masked_lm_preprocessor import (
 )
 from keras_nlp.src.models.roberta.roberta_preprocessor import RobertaPreprocessor
 from keras_nlp.src.models.roberta.roberta_tokenizer import RobertaTokenizer
+from keras_nlp.src.models.seq_2_seq_lm import Seq2SeqLM
 from keras_nlp.src.models.t5.t5_backbone import T5Backbone
 from keras_nlp.src.models.t5.t5_tokenizer import T5Tokenizer
+from keras_nlp.src.models.task import Task
 from keras_nlp.src.models.whisper.whisper_audio_feature_extractor import (
     WhisperAudioFeatureExtractor,
 )
@@ -146,4 +167,5 @@ from keras_nlp.src.models.xlm_roberta.xlm_roberta_tokenizer import (
     XLMRobertaTokenizer,
 )
 from keras_nlp.src.models.xlnet.xlnet_backbone import XLNetBackbone
+from keras_nlp.src.tokenizers.tokenizer import Tokenizer

keras-nlp-0.9.0.dev0/keras_nlp/src/models/albert/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright 2023 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from keras_nlp.src.models.albert.albert_backbone import AlbertBackbone
+from keras_nlp.src.models.albert.albert_presets import backbone_presets
+from keras_nlp.src.models.albert.albert_tokenizer import AlbertTokenizer
+from keras_nlp.src.utils.preset_utils import register_presets
+register_presets(backbone_presets, (AlbertBackbone, AlbertTokenizer))

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_backbone.py RENAMED Viewed

@@ -12,17 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import copy
 from keras_nlp.src.api_export import keras_nlp_export
 from keras_nlp.src.backend import keras
 from keras_nlp.src.layers.modeling.position_embedding import PositionEmbedding
 from keras_nlp.src.layers.modeling.reversible_embedding import ReversibleEmbedding
 from keras_nlp.src.layers.modeling.transformer_encoder import TransformerEncoder
-from keras_nlp.src.models.albert.albert_presets import backbone_presets
 from keras_nlp.src.models.backbone import Backbone
 from keras_nlp.src.utils.keras_utils import gelu_approximate
-from keras_nlp.src.utils.python_utils import classproperty
 def albert_kernel_initializer(stddev=0.02):
@@ -77,7 +73,7 @@ class AlbertBackbone(Backbone):
             such as softmax and layer normalization, will always be done at
             float32 precision regardless of dtype.
-    Examples:
+    Example:
     ```python
     input_data = {
         "token_ids": np.ones(shape=(1, 12), dtype="int32"),
@@ -230,6 +226,7 @@ class AlbertBackbone(Backbone):
                 "sequence_output": sequence_output,
                 "pooled_output": pooled_output,
             },
+            dtype=dtype,
             **kwargs,
         )
@@ -266,7 +263,3 @@ class AlbertBackbone(Backbone):
         )
         return config
-    @classproperty
-    def presets(cls):
-        return copy.deepcopy(backbone_presets)

{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_classifier.py RENAMED Viewed

@@ -12,20 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import copy
 from keras_nlp.src.api_export import keras_nlp_export
 from keras_nlp.src.backend import keras
 from keras_nlp.src.models.albert.albert_backbone import AlbertBackbone
 from keras_nlp.src.models.albert.albert_backbone import albert_kernel_initializer
 from keras_nlp.src.models.albert.albert_preprocessor import AlbertPreprocessor
-from keras_nlp.src.models.albert.albert_presets import backbone_presets
-from keras_nlp.src.models.task import Task
-from keras_nlp.src.utils.python_utils import classproperty
+from keras_nlp.src.models.classifier import Classifier
 @keras_nlp_export("keras_nlp.models.AlbertClassifier")
-class AlbertClassifier(Task):
+class AlbertClassifier(Classifier):
     """An end-to-end ALBERT model for classification tasks
     This model attaches a classification head to a `keras_nlp.model.AlbertBackbone`
@@ -146,6 +142,9 @@ class AlbertClassifier(Task):
     ```
     """
+    backbone_cls = AlbertBackbone
+    preprocessor_cls = AlbertPreprocessor
     def __init__(
         self,
         backbone,
@@ -187,17 +186,6 @@ class AlbertClassifier(Task):
         self.activation = keras.activations.get(activation)
         self.dropout = dropout
-        # === Default compilation ===
-        logit_output = self.activation == keras.activations.linear
-        self.compile(
-            loss=keras.losses.SparseCategoricalCrossentropy(
-                from_logits=logit_output
-            ),
-            optimizer=keras.optimizers.Adam(5e-5),
-            metrics=[keras.metrics.SparseCategoricalAccuracy()],
-            jit_compile=True,
-        )
     def get_config(self):
         config = super().get_config()
         config.update(
@@ -210,15 +198,3 @@ class AlbertClassifier(Task):
         return config
-    @classproperty
-    def backbone_cls(cls):
-        return AlbertBackbone
-    @classproperty
-    def preprocessor_cls(cls):
-        return AlbertPreprocessor
-    @classproperty
-    def presets(cls):
-        return copy.deepcopy({**backbone_presets})

keras-nlp 0.8.2.dev0__tar.gz → 0.9.0.dev0__tar.gz

keras-nlp 0.8.2.dev0tar.gz → 0.9.0.dev0tar.gz