PyPI - keras-hub-nightly - Versions diffs - 0.21.0.dev202505230409__py3-none-any.whl → 0.21.0.dev202505240409__py3-none-any.whl - Mend

keras-hub-nightly 0.21.0.dev202505230409py3-none-any.whl → 0.21.0.dev202505240409py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

keras_hub/src/models/audio_to_text.py ADDED Viewed

@@ -0,0 +1,66 @@
+from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM
+class AudioToText(Seq2SeqLM):
+    """Base class for audio-to-text models.
+    `AudioToText` tasks wrap a `keras_hub.models.Backbone` (capable of
+    processing audio and text features) and a
+    `keras_hub.models.AudioToTextPreprocessor` to create a model for
+    audio-to-text tasks like speech recognition or audio transcription.
+    These models typically consist of an encoder that processes audio input
+    and a decoder that generates a textual representation.
+    `AudioToText` tasks provide a high-level `generate()` method for
+    auto-regressively generating text from audio input. An optional text
+    prompt can also be provided to the decoder to guide generation. The
+    sampling strategy for generation (e.g., greedy, top-k, top-p) can be
+    controlled via the `sampler` argument in the `compile()` method.
+    When calling `fit()`, inputs should consist of audio data and corresponding
+    target text transcriptions. The model is trained to predict the target text
+    token-by-token.
+    All `AudioToText` tasks include a `from_preset()` constructor which
+    can be used to load pre-trained configurations and weights for specific
+    audio-to-text models.
+    This constructor can also be called on the base `AudioToText` class,
+    which will automatically select the correct subclass based on the preset.
+    Examples:
+    ```python
+    # Load a Moonshine backbone with pre-trained weights.
+    # AudioToText is a base class. You will typically work with a specific
+    # implementation, such as `keras_hub.models.MoonshineAudioToText`.
+    # The following examples demonstrate common usage patterns.
+    # Initialize a model from a preset using the specific subclass.
+    audio_to_text = keras_hub.models.MoonshineAudioToText.from_preset(
+        "moonshine_base_en"
+    )
+    # Initialize a model from a preset using the base class.
+    audio_to_text_model_base = keras_hub.models.AudioToText.from_preset(
+        "moonshine_base_en"
+    )
+    # Generate text from an audio input.
+    audio_input_tensor = keras.random.normal((1, 16000, 1))
+    generated_output = audio_to_text_model.generate(
+        {"audio": audio_input_tensor}
+    )
+    # Generate conditioned on the `"The quick brown fox."` as an input sequence.
+    prompted_output = audio_to_text_model.generate(
+        {"audio": audio_input_tensor, "text": "The quick brown fox."}
+    )
+    # Use a different sampling strategy for generation.
+    audio_to_text_model.compile(sampler="greedy")
+    greedy_output = audio_to_text_model.generate(
+        {"audio": audio_input_tensor}
+    )
+    """
+    # TODO: Fill in once audio to text task model requirements are clearer.

keras_hub/src/models/audio_to_text_preprocessor.py ADDED Viewed

@@ -0,0 +1,80 @@
+from keras_hub.src.models.seq_2_seq_lm_preprocessor import Seq2SeqLMPreprocessor
+class AudioToTextPreprocessor(Seq2SeqLMPreprocessor):
+    """Base class for audio-to-text preprocessing layers.
+    `AudioToTextPreprocessor` layers wrap an audio feature extractor (specific
+    to the subclass) and a `keras_hub.tokenizer.Tokenizer` to create a
+    preprocessing layer for audio-to-text tasks. It is intended to be
+    paired with a `keras_hub.models.AudioToText` task.
+    Subclasses are expected to handle the conversion of raw audio data into
+    numerical features suitable for an encoder, and raw text data into token IDs
+    for a decoder.
+    All `AudioToTextPreprocessor` layers take a dictionary as input,
+    typically with keys like `"audio"` (for audio data) and `"text"` (for
+    target transcriptions or decoder prompts).
+    This layer will always output a `(x, y, sample_weight)` tuple, where `x`
+    is a dictionary containing processed audio features for the encoder and
+    tokenized text inputs for the decoder. `y` contains the target token IDs
+    (decoder input tokens shifted by one position), and `sample_weight`
+    indicates padding in `y`. The exact keys and structure of features within
+    `x` will depend on the specific subclass and the paired `AudioToText` model.
+    An `AudioToTextPreprocessor` includes `generate_preprocess` and
+    `generate_postprocess` methods for use during inference with an
+    `AudioToText` model's `generate()` method.
+    All `AudioToTextPreprocessor` tasks include a `from_preset()` constructor
+    which can be used to load a pre-trained configuration, including tokenizer
+    vocabularies and audio feature extraction settings. Calling `from_preset()`
+    on this base class can instantiate the correct subclass registered for the
+    given preset.
+    Examples:
+    ```python
+    preprocessor = keras_hub.models.AudioToTextPreprocessor.from_preset(
+        "moonshine_base_en",
+        decoder_sequence_length=10
+    )
+    # Process a single audio-text pair.
+    x = {
+        "audio": keras.random.normal((1, 16000, 1)),
+        "text": ["the quick brown fox"]
+    }
+    x, y, sample_weight = preprocessor(x)
+    # Process a batch of audio-text pairs.
+    x = {
+        "audio": keras.random.normal((2, 16000, 1)),
+        "text": ["first sentence", "second sentence"]
+    }
+    x, y, sample_weight = preprocessor(x)
+    # With a `tf.data.Dataset`.
+    audio_tf = keras.ops.convert_to_tensor(batch_input["audio"])
+    text_tf = batch_input["text"] # List of strings
+    x = {"audio": audio_tf, "text": text_tf}
+    ds = tf.data.Dataset.from_tensor_slices(x)
+    ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
+    ds = ds.batch(2) # Batching after map
+    # Generate preprocess and postprocess.
+    x = preprocessor.generate_preprocess({
+        "audio": keras.random.normal((1, 16000, 1)),
+        "text": ["optional prompt text"]
+    })
+    x = preprocessor.generate_postprocess({
+        "decoder_token_ids": keras.ops.array([[10, 20, 30, 2, 0]]),
+        "decoder_padding_mask": keras.ops.array([
+            [True, True, True, True, False]
+        ])
+    })
+    ```
+    """
+    # TODO: Fill in once audio to text task model requirements are clearer.

keras_hub/src/models/moonshine/__init__.py CHANGED Viewed

@@ -0,0 +1,5 @@
+from keras_hub.src.models.moonshine.moonshine_backbone import MoonshineBackbone
+from keras_hub.src.models.moonshine.moonshine_presets import backbone_presets
+from keras_hub.src.utils.preset_utils import register_presets
+register_presets(backbone_presets, MoonshineBackbone)

keras_hub/src/models/moonshine/moonshine_audio_to_text.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import keras
 from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.audio_to_text import AudioToText
 from keras_hub.src.models.moonshine.moonshine_audio_to_text_preprocessor import (  # noqa: E501
     MoonshineAudioToTextPreprocessor,
 )
@@ -9,12 +10,11 @@ from keras_hub.src.models.moonshine.moonshine_backbone import MoonshineBackbone
 from keras_hub.src.models.moonshine.moonshine_backbone import (
     compute_output_lengths,
 )
-from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM
 from keras_hub.src.utils.tensor_utils import any_equal
 @keras_hub_export("keras_hub.models.MoonshineAudioToText")
-class MoonshineAudioToText(Seq2SeqLM):
+class MoonshineAudioToText(AudioToText):
     """An end-to-end Moonshine model for audio-to-text tasks.
     A Seq2Seq LM designed for audio-to-text tasks, such as speech recognition.

keras_hub/src/models/moonshine/moonshine_audio_to_text_preprocessor.py CHANGED Viewed

@@ -6,16 +6,18 @@ except ImportError:
     tf = None
 from keras_hub.src.api_export import keras_hub_export
 from keras_hub.src.layers.preprocessing.start_end_packer import StartEndPacker
+from keras_hub.src.models.audio_to_text_preprocessor import (
+    AudioToTextPreprocessor,
+)
 from keras_hub.src.models.moonshine.moonshine_backbone import MoonshineBackbone
 from keras_hub.src.models.moonshine.moonshine_tokenizer import (
     MoonshineTokenizer,
 )
-from keras_hub.src.models.seq_2_seq_lm_preprocessor import Seq2SeqLMPreprocessor
 from keras_hub.src.utils.tensor_utils import preprocessing_function
 @keras_hub_export("keras_hub.models.MoonshineAudioToTextPreprocessor")
-class MoonshineAudioToTextPreprocessor(Seq2SeqLMPreprocessor):
+class MoonshineAudioToTextPreprocessor(AudioToTextPreprocessor):
     """Moonshine Seq2Seq LM preprocessor for audio-to-text tasks.
     This preprocessor converts raw audio and text inputs into a format suitable

keras_hub/src/version.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from keras_hub.src.api_export import keras_hub_export
 # Unique source of truth for the version number.
-__version__ = "0.21.0.dev202505230409"
+__version__ = "0.21.0.dev202505240409"
 @keras_hub_export("keras_hub.version")

{keras_hub_nightly-0.21.0.dev202505230409.dist-info → keras_hub_nightly-0.21.0.dev202505240409.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: keras-hub-nightly
-Version: 0.21.0.dev202505230409
+Version: 0.21.0.dev202505240409
 Summary: Pretrained models for Keras.
 Author-email: Keras team <keras-users@googlegroups.com>
 License-Expression: Apache-2.0

{keras_hub_nightly-0.21.0.dev202505230409.dist-info → keras_hub_nightly-0.21.0.dev202505240409.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ keras_hub/models/__init__.py,sha256=itSzodVUeuX6HQnmsSXY0Wv-5Htbu397410R-SFW_4I,
 keras_hub/samplers/__init__.py,sha256=aFQIkiqbZpi8vjrPp2MVII4QUfE-eQjra5fMeHsoy7k,886
 keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
-keras_hub/src/version.py,sha256=SjWdrHYDbNitBzSsMmxG-HvuuqsSB3ICvTQclkoX-Os,222
+keras_hub/src/version.py,sha256=AnU8tBqSqSoLY34F6O-fFt47PXgrGHUbqORa6_sXy6w,222
 keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
@@ -41,6 +41,8 @@ keras_hub/src/metrics/rouge_base.py,sha256=Pt2DUznhTTeR-fX1nQ_wSbPtmuTgxQTvrGpu8
 keras_hub/src/metrics/rouge_l.py,sha256=JlZhMBV6wS_6zMd57pkTc6yxHkEJT9fVQMlPZKekQzQ,2729
 keras_hub/src/metrics/rouge_n.py,sha256=JoFtmgjF4Ic263ny6bfD6vMHKreH9le3HnOOxemupRc,3620
 keras_hub/src/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+keras_hub/src/models/audio_to_text.py,sha256=XoOjXtKBX6K1fz-zOXcdVo3FpjuxCMnJZh2LQcYXb_0,2726
+keras_hub/src/models/audio_to_text_preprocessor.py,sha256=GS-WWyJ6aSsPRxi_0bxvxA00h2mT2FEwSdAoQXAUYVI,3249
 keras_hub/src/models/backbone.py,sha256=KS2x3HFWKhEYhroUFT3uZgSkeW_48zPGqUNvxCDDIQQ,11534
 keras_hub/src/models/causal_lm.py,sha256=ReaF-i3SHsCkHh4c28jM72QjMQ8x7yiCwG39FRb-7KE,16786
 keras_hub/src/models/causal_lm_preprocessor.py,sha256=YY7VJZicdmnjDSWi9g4_pEpd5bdJK166GlWcapvokF0,6663
@@ -265,10 +267,10 @@ keras_hub/src/models/mobilenet/mobilenet_image_classifier_preprocessor.py,sha256
 keras_hub/src/models/mobilenet/mobilenet_image_converter.py,sha256=a3Ka0UYYK5wHSOjf2oMHSgofRazTAeUfttklVefq14w,360
 keras_hub/src/models/mobilenet/mobilenet_presets.py,sha256=--nhaM6LmaiCtQlZPDwoQTHW7ciU0igzS4f9ssdD9Lo,1903
 keras_hub/src/models/mobilenet/util.py,sha256=S7j4UacmVIJ3fU8cymyAoK49eHcpWIKTOyUQiEjcbzQ,721
-keras_hub/src/models/moonshine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+keras_hub/src/models/moonshine/__init__.py,sha256=WK_9Cy1dp5KplNAaTsaJbd-2DGLsiHQsIL5ZnXuCbDQ,275
 keras_hub/src/models/moonshine/moonshine_audio_converter.py,sha256=FnvR7SP44uVOsA3g9azUhQjsVg809eJ5nqoJZQ-DAq0,11854
-keras_hub/src/models/moonshine/moonshine_audio_to_text.py,sha256=295kTM-XfUqb5mYjVSApKzMGPtnRyQdwynqqcPS7a_M,15860
-keras_hub/src/models/moonshine/moonshine_audio_to_text_preprocessor.py,sha256=TZPvyMcPJ4Ojjv7r6ZUeafssIIVtFvPPzdiRHkK9O_A,10002
+keras_hub/src/models/moonshine/moonshine_audio_to_text.py,sha256=dXFtjaxL1jpcIAiiZY1-kcNL-S4RiRJiAC2uR_a3Fyc,15865
+keras_hub/src/models/moonshine/moonshine_audio_to_text_preprocessor.py,sha256=hTw941ww8cJrP5DRrxv2DtZUNLJ9A3cayFhnsG5Ef4g,10016
 keras_hub/src/models/moonshine/moonshine_backbone.py,sha256=XtRUBe_VusXsFRk7-t1JNXM0lxp2UBOJk9v7gfTNDhA,19623
 keras_hub/src/models/moonshine/moonshine_decoder.py,sha256=Exf5Gg1gsCBST53wxOgBetKkhjS8E8QIUIlUwHlOkIY,11816
 keras_hub/src/models/moonshine/moonshine_encoder.py,sha256=NjjMO_FEBlWFSv6Appv8a3V7XovW2afvxxjXwQRgV60,8148
@@ -499,7 +501,7 @@ keras_hub/src/utils/transformers/preset_loader.py,sha256=1nfS5xVsl-JROGXJXltTqV1
 keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
 keras_hub/tokenizers/__init__.py,sha256=uMjjm0mzUkRb0e4Ac_JK8aJ9cKGUi5UqmzWoWAFJprE,4164
 keras_hub/utils/__init__.py,sha256=jXPqVGBpJr_PpYmqD8aDG-fRMlxH-ulqCR2SZMn288Y,646
-keras_hub_nightly-0.21.0.dev202505230409.dist-info/METADATA,sha256=i-P2LqVLiVN0cIr63OvwxpAmqn2sGBPSUHDqbHiFhcg,7393
-keras_hub_nightly-0.21.0.dev202505230409.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
-keras_hub_nightly-0.21.0.dev202505230409.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
-keras_hub_nightly-0.21.0.dev202505230409.dist-info/RECORD,,
+keras_hub_nightly-0.21.0.dev202505240409.dist-info/METADATA,sha256=BJHRD68RtZc8CA6kIFWZxphjYr6g2t62j1FvwLar_LU,7393
+keras_hub_nightly-0.21.0.dev202505240409.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
+keras_hub_nightly-0.21.0.dev202505240409.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
+keras_hub_nightly-0.21.0.dev202505240409.dist-info/RECORD,,

{keras_hub_nightly-0.21.0.dev202505230409.dist-info → keras_hub_nightly-0.21.0.dev202505240409.dist-info}/WHEEL RENAMED Viewed

File without changes

{keras_hub_nightly-0.21.0.dev202505230409.dist-info → keras_hub_nightly-0.21.0.dev202505240409.dist-info}/top_level.txt RENAMED Viewed

File without changes

keras-hub-nightly 0.21.0.dev202505230409__py3-none-any.whl → 0.21.0.dev202505240409__py3-none-any.whl

keras-hub-nightly 0.21.0.dev202505230409py3-none-any.whl → 0.21.0.dev202505240409py3-none-any.whl