npm - react-native-executorch - Versions diffs - 0.7.0-nightly-b4770df-20260205 → 0.7.0 - Mend

react-native-executorch 0.7.0-nightly-b4770df-20260205 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (296) hide show

package/src/types/ocr.ts CHANGED Viewed

@@ -1,14 +1,119 @@
 import { symbols } from '../constants/ocr/symbols';
+import { RnExecutorchError } from '../errors/errorUtils';
+import { ResourceSource } from './common';
+/**
+ * OCRDetection represents a single detected text instance in an image,
+ * including its bounding box, recognized text, and confidence score.
+ *
+ * @category Types
+ * @property {Point[]} bbox - An array of points defining the bounding box around the detected text.
+ * @property {string} text - The recognized text within the bounding box.
+ * @property {number} score - The confidence score of the OCR detection, ranging from 0 to 1.
+ */
 export interface OCRDetection {
-  bbox: OCRBbox[];
+  bbox: Point[];
   text: string;
   score: number;
 }
-export interface OCRBbox {
+/**
+ * Point represents a coordinate in 2D space.
+ *
+ * @category Types
+ * @property {number} x - The x-coordinate of the point.
+ * @property {number} y - The y-coordinate of the point.
+ */
+export interface Point {
   x: number;
   y: number;
 }
+/**
+ * Configuration properties for the `useOCR` hook.
+ *
+ * @category Types
+ */
+export interface OCRProps {
+  /**
+   * Object containing the necessary model sources and configuration for the OCR pipeline.
+   */
+  model: {
+    /**
+     * `ResourceSource` that specifies the location of the text detector model binary.
+     */
+    detectorSource: ResourceSource;
+    /**
+     * `ResourceSource` that specifies the location of the text recognizer model binary.
+     */
+    recognizerSource: ResourceSource;
+    /**
+     * The language configuration enum for the OCR model (e.g., English, Polish, etc.).
+     */
+    language: OCRLanguage;
+  };
+  /**
+   * Boolean that can prevent automatic model loading (and downloading the data if loaded for the first time) after running the hook.
+   * Defaults to `false`.
+   */
+  preventLoad?: boolean;
+}
+/**
+ * Configuration properties for the `useVerticalOCR` hook.
+ *
+ * @category Types
+ */
+export interface VerticalOCRProps extends OCRProps {
+  /**
+   * Boolean indicating whether to treat each character independently during recognition.
+   * Defaults to `false`.
+   */
+  independentCharacters?: boolean;
+}
+/**
+ * Return type for the `useOCR` hook.
+ * Manages the state and operations for Optical Character Recognition (OCR).
+ *
+ * @category Types
+ */
+export interface OCRType {
+  /**
+   * Contains the error object if the models failed to load, download, or encountered a runtime error during recognition.
+   */
+  error: RnExecutorchError | null;
+  /**
+   * Indicates whether both detector and recognizer models are loaded and ready to process images.
+   */
+  isReady: boolean;
+  /**
+   * Indicates whether the model is currently processing an image.
+   */
+  isGenerating: boolean;
+  /**
+   * Represents the total download progress of the model binaries as a value between 0 and 1.
+   */
+  downloadProgress: number;
+  /**
+   * Executes the OCR pipeline (detection and recognition) on the provided image.
+   * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed.
+   * @returns A Promise that resolves to the OCR results (typically containing the recognized text strings and their bounding boxes).
+   * @throws {RnExecutorchError} If the models are not loaded or are currently processing another image.
+   */
+  forward: (imageSource: string) => Promise<OCRDetection[]>;
+}
+/**
+ * Enumeration of supported OCR languages based on available symbol sets.
+ *
+ * @category Types
+ */
 export type OCRLanguage = keyof typeof symbols;

package/src/types/stt.ts CHANGED Viewed

@@ -1,6 +1,115 @@
 import { ResourceSource } from './common';
+import { RnExecutorchError } from '../errors/errorUtils';
-// Languages supported by whisper (not whisper.en)
+/**
+ * Configuration for Speech to Text model.
+ *
+ * @category Types
+ */
+export interface SpeechToTextProps {
+  /**
+   * Configuration object containing model sources.
+   */
+  model: SpeechToTextModelConfig;
+  /**
+   * Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
+   */
+  preventLoad?: boolean;
+}
+/**
+ * React hook for managing Speech to Text (STT) instance.
+ *
+ * @category Types
+ */
+export interface SpeechToTextType {
+  /**
+   * Contains the error message if the model failed to load.
+   */
+  error: null | RnExecutorchError;
+  /**
+   * Indicates whether the model has successfully loaded and is ready for inference.
+   */
+  isReady: boolean;
+  /**
+   * Indicates whether the model is currently processing an inference.
+   */
+  isGenerating: boolean;
+  /**
+   * Tracks the progress of the model download process.
+   */
+  downloadProgress: number;
+  /**
+   * Contains the part of the transcription that is finalized and will not change.
+   * Useful for displaying stable results during streaming.
+   */
+  committedTranscription: string;
+  /**
+   * Contains the part of the transcription that is still being processed and may change.
+   * Useful for displaying live, partial results during streaming.
+   */
+  nonCommittedTranscription: string;
+  /**
+   * Runs the encoding part of the model on the provided waveform.
+   * @param waveform - The input audio waveform array.
+   * @returns A promise resolving to the encoded data.
+   */
+  encode(waveform: Float32Array): Promise<Float32Array>;
+  /**
+   * Runs the decoder of the model.
+   * @param tokens - The encoded audio data.
+   * @param encoderOutput - The output from the encoder.
+   * @returns A promise resolving to the decoded text.
+   */
+  decode(
+    tokens: Int32Array,
+    encoderOutput: Float32Array
+  ): Promise<Float32Array>;
+  /**
+   * Starts a transcription process for a given input array, which should be a waveform at 16kHz.
+   * @param waveform - The input audio waveform.
+   * @param options - Decoding options, e.g. `{ language: 'es' }` for multilingual models.
+   * @returns Resolves a promise with the output transcription when the model is finished.
+   */
+  transcribe(
+    waveform: Float32Array,
+    options?: DecodingOptions | undefined
+  ): Promise<string>;
+  /**
+   * Starts a streaming transcription process.
+   * Use in combination with `streamInsert` to feed audio chunks and `streamStop` to end the stream.
+   * Updates `committedTranscription` and `nonCommittedTranscription` as transcription progresses.
+   * @param options - Decoding options including language.
+   * @returns The final transcription string.
+   */
+  stream(options?: DecodingOptions | undefined): Promise<string>;
+  /**
+   * Inserts a chunk of audio data (sampled at 16kHz) into the ongoing streaming transcription.
+   * @param waveform - The audio chunk to insert.
+   */
+  streamInsert(waveform: Float32Array): void;
+  /**
+   * Stops the ongoing streaming transcription process.
+   */
+  streamStop(): void;
+}
+/**
+ * Languages supported by whisper (not whisper.en)
+ *
+ * @category Types
+ */
 export type SpeechToTextLanguage =
   | 'af'
   | 'sq'
@@ -78,13 +187,39 @@ export type SpeechToTextLanguage =
   | 'cy'
   | 'yi';
+/**
+ * Options for decoding speech to text.
+ *
+ * @category Types
+ * @property {SpeechToTextLanguage} [language] - Optional language code to guide the transcription.
+ */
 export interface DecodingOptions {
   language?: SpeechToTextLanguage;
 }
+/**
+ * Configuration for Speech to Text model.
+ *
+ * @category Types
+ */
 export interface SpeechToTextModelConfig {
+  /**
+   * A boolean flag indicating whether the model supports multiple languages.
+   */
   isMultilingual: boolean;
+  /**
+   * A string that specifies the location of a `.pte` file for the encoder.
+   */
   encoderSource: ResourceSource;
+  /**
+   * A string that specifies the location of a `.pte` file for the decoder.
+   */
   decoderSource: ResourceSource;
+  /**
+   * A string that specifies the location to the tokenizer for the model.
+   */
   tokenizerSource: ResourceSource;
 }

package/src/types/styleTransfer.ts ADDED Viewed

@@ -0,0 +1,51 @@
+import { RnExecutorchError } from '../errors/errorUtils';
+import { ResourceSource } from './common';
+/**
+ * Configuration properties for the `useStyleTransfer` hook.
+ *
+ * @category Types
+ * @property {Object} model - Object containing the `modelSource` for the style transfer model.
+ * @property {ResourceSource} model.modelSource - `ResourceSource` that specifies the location of the style transfer model binary.
+ * @property {boolean} [preventLoad] - Boolean that can prevent automatic model loading (and downloading the data if loaded for the first time) after running the hook.
+ */
+export interface StyleTransferProps {
+  model: { modelSource: ResourceSource };
+  preventLoad?: boolean;
+}
+/**
+ * Return type for the `useStyleTransfer` hook.
+ * Manages the state and operations for applying artistic style transfer to images.
+ *
+ * @category Types
+ */
+export interface StyleTransferType {
+  /**
+   * Contains the error object if the model failed to load, download, or encountered a runtime error during style transfer.
+   */
+  error: RnExecutorchError | null;
+  /**
+   * Indicates whether the style transfer model is loaded and ready to process images.
+   */
+  isReady: boolean;
+  /**
+   * Indicates whether the model is currently processing an image.
+   */
+  isGenerating: boolean;
+  /**
+   * Represents the download progress of the model binary as a value between 0 and 1.
+   */
+  downloadProgress: number;
+  /**
+   * Executes the model's forward pass to apply the specific artistic style to the provided image.
+   * @param imageSource - A string representing the input image source (e.g., a file path, URI, or base64 string) to be stylized.
+   * @returns A Promise that resolves to a string containing the stylized image (typically as a base64 string or a file URI).
+   * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image.
+   */
+  forward: (imageSource: string) => Promise<string>;
+}

package/src/types/textEmbeddings.ts ADDED Viewed

@@ -0,0 +1,58 @@
+import { RnExecutorchError } from '../errors/errorUtils';
+import { ResourceSource } from '../types/common';
+/**
+ * Props for the useTextEmbeddings hook.
+ *
+ * @category Types
+ * @property {Object} model - An object containing the model and tokenizer sources.
+ * @property {boolean} [preventLoad] - Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
+ */
+export interface TextEmbeddingsProps {
+  model: {
+    /**
+     * The source of the text embeddings model binary.
+     */
+    modelSource: ResourceSource;
+    /**
+     * The source of the tokenizer JSON file.
+     */
+    tokenizerSource: ResourceSource;
+  };
+  preventLoad?: boolean;
+}
+/**
+ * React hook state and methods for managing a Text Embeddings model instance.
+ *
+ * @category Types
+ */
+export interface TextEmbeddingsType {
+  /**
+   * Contains the error message if the model failed to load or during inference.
+   */
+  error: null | RnExecutorchError;
+  /**
+   * Indicates whether the embeddings model has successfully loaded and is ready for inference.
+   */
+  isReady: boolean;
+  /**
+   * Indicates whether the model is currently generating embeddings.
+   */
+  isGenerating: boolean;
+  /**
+   * Tracks the progress of the model download process (value between 0 and 1).
+   */
+  downloadProgress: number;
+  /**
+   * Runs the text embeddings model on the provided input string.
+   * @param input - The text string to embed.
+   * @returns A promise resolving to a Float32Array containing the vector embeddings.
+   * @throws {RnExecutorchError} If the model is not loaded or is currently processing another request.
+   */
+  forward(input: string): Promise<Float32Array>;
+}

package/src/types/tokenizer.ts ADDED Viewed

@@ -0,0 +1,86 @@
+import { RnExecutorchError } from '../errors/errorUtils';
+import { ResourceSource } from './common';
+/**
+ * Parameters for initializing and configuring a Tokenizer instance.
+ *
+ * @category Types
+ */
+export interface TokenizerProps {
+  /**
+   * Object containing:
+   *
+   * `tokenizerSource` - A `ResourceSource` that specifies the location of the tokenizer.
+   */
+  tokenizer: { tokenizerSource: ResourceSource };
+  /**
+   * Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
+   */
+  preventLoad?: boolean;
+}
+/**
+ * React hook state and methods for managing a Tokenizer instance.
+ *
+ * @category Types
+ */
+export interface TokenizerType {
+  /**
+   * Contains the error message if the tokenizer failed to load or during processing.
+   */
+  error: null | RnExecutorchError;
+  /**
+   * Indicates whether the tokenizer has successfully loaded and is ready for use.
+   */
+  isReady: boolean;
+  /**
+   * Indicates whether the tokenizer is currently processing data.
+   */
+  isGenerating: boolean;
+  /**
+   * Tracks the progress of the tokenizer download process (value between 0 and 1).
+   */
+  downloadProgress: number;
+  /**
+   * Converts an array of token IDs into a string.
+   * @param tokens - An array or `number[]` of token IDs to decode.
+   * @param skipSpecialTokens - Optional boolean to indicate whether special tokens should be skipped during decoding.
+   * @returns A promise resolving to the decoded text string.
+   */
+  decode(
+    tokens: number[],
+    skipSpecialTokens: boolean | undefined
+  ): Promise<string>;
+  /**
+   * Converts a string into an array of token IDs.
+   * @param text - The input text string to tokenize.
+   * @returns A promise resolving to an array `number[]` containing the encoded token IDs.
+   */
+  encode(text: string): Promise<number[]>;
+  /**
+   * Returns the size of the tokenizer's vocabulary.
+   * @returns A promise resolving to the vocabulary size.
+   */
+  getVocabSize(): Promise<number>;
+  /**
+   * Returns the token associated to the ID.
+   * @param id - The numeric token ID.
+   * @returns A promise resolving to the token string representation.
+   */
+  idToToken(id: number): Promise<string>;
+  /**
+   * Returns the ID associated to the token.
+   * @param token - The token string.
+   * @returns A promise resolving to the token ID.
+   */
+  tokenToId(token: string): Promise<number>;
+}

package/src/types/tti.ts ADDED Viewed

@@ -0,0 +1,87 @@
+import { RnExecutorchError } from '../errors/errorUtils';
+import { ResourceSource } from '../types/common';
+/**
+ * Configuration properties for the `useTextToImage` hook.
+ *
+ * @category Types
+ */
+export interface TextToImageProps {
+  /**
+   * Object containing the required model sources for the diffusion pipeline.
+   */
+  model: {
+    /** Source for the text tokenizer binary/config. */
+    tokenizerSource: ResourceSource;
+    /** Source for the diffusion scheduler binary/config. */
+    schedulerSource: ResourceSource;
+    /** Source for the text encoder model binary. */
+    encoderSource: ResourceSource;
+    /** Source for the UNet (noise predictor) model binary. */
+    unetSource: ResourceSource;
+    /** Source for the VAE decoder model binary, used to decode the final image. */
+    decoderSource: ResourceSource;
+  };
+  /**
+   * Optional callback function that is triggered after each diffusion inference step.
+   * Useful for updating a progress bar during image generation.
+   * @param stepIdx - The index of the current inference step.
+   */
+  inferenceCallback?: (stepIdx: number) => void;
+  /**
+   * Boolean that can prevent automatic model loading (and downloading the data if loaded for the first time) after running the hook.
+   * Defaults to `false`.
+   */
+  preventLoad?: boolean;
+}
+/**
+ * Return type for the `useTextToImage` hook.
+ * Manages the state and operations for generating images from text prompts using a diffusion model pipeline.
+ *
+ * @category Types
+ */
+export interface TextToImageType {
+  /**
+   * Contains the error object if any of the pipeline models failed to load, download, or encountered a runtime error.
+   */
+  error: RnExecutorchError | null;
+  /**
+   * Indicates whether the entire diffusion pipeline is loaded into memory and ready for generation.
+   */
+  isReady: boolean;
+  /**
+   * Indicates whether the model is currently generating an image.
+   */
+  isGenerating: boolean;
+  /**
+   * Represents the total download progress of all the model binaries combined, as a value between 0 and 1.
+   */
+  downloadProgress: number;
+  /**
+   * Runs the diffusion pipeline to generate an image from the provided text prompt.
+   * @param input - The text prompt describing the desired image.
+   * @param [imageSize] - Optional. The target width and height of the generated image (e.g., 512 for 512x512). Defaults to the model's standard size if omitted.
+   * @param [numSteps] - Optional. The number of denoising steps for the diffusion process. More steps generally yield higher quality at the cost of generation time.
+   * @param [seed] - Optional. A random seed for reproducible generation. Should be a positive integer.
+   * @returns A Promise that resolves to a string representing the generated image (e.g., base64 string or file URI).
+   * @throws {RnExecutorchError} If the model is not loaded or is currently generating another image.
+   */
+  generate: (
+    input: string,
+    imageSize?: number,
+    numSteps?: number,
+    seed?: number
+  ) => Promise<string>;
+  /**
+   * Interrupts the currently active image generation process at the next available inference step.
+   */
+  interrupt: () => void;
+}

package/src/types/tts.ts CHANGED Viewed

@@ -1,6 +1,11 @@
 import { ResourceSource } from './common';
+import { RnExecutorchError } from '../errors/errorUtils';
-// List all the languages available in TTS models (as lang shorthands)
+/**
+ * List all the languages available in TTS models (as lang shorthands)
+ *
+ * @category Types
+ */
 export type TextToSpeechLanguage =
   | 'en-us' // American English
   | 'en-gb'; // British English
@@ -10,6 +15,7 @@ export type TextToSpeechLanguage =
  *
  * So far in Kokoro, each voice is directly associated with a language.
  *
+ * @category Types
  * @property {TextToSpeechLanguage} lang - speaker's language
  * @property {ResourceSource} voiceSource - a source to a binary file with voice embedding
  * @property {KokoroVoiceExtras} [extra] - an optional extra sources or properties related to specific voice
@@ -20,7 +26,13 @@ export interface VoiceConfig {
   extra?: KokoroVoiceExtras; // ... add more possible types
 }
-// Kokoro-specific voice extra props
+/**
+ * Kokoro-specific voice extra props
+ *
+ * @category Types
+ * @property {ResourceSource} taggerSource - source to Kokoro's tagger model binary
+ * @property {ResourceSource} lexiconSource - source to Kokoro's lexicon binary
+ */
 export interface KokoroVoiceExtras {
   taggerSource: ResourceSource;
   lexiconSource: ResourceSource;
@@ -29,6 +41,11 @@ export interface KokoroVoiceExtras {
 /**
  * Kokoro model configuration.
  * Only the core Kokoro model sources, as phonemizer sources are included in voice configuration.
+ *
+ * @category Types
+ * @property {'kokoro'} type - model type identifier
+ * @property {ResourceSource} durationPredictorSource - source to Kokoro's duration predictor model binary
+ * @property {ResourceSource} synthesizerSource - source to Kokoro's synthesizer model binary
  */
 export interface KokoroConfig {
   type: 'kokoro';
@@ -39,6 +56,7 @@ export interface KokoroConfig {
 /**
  * General Text to Speech module configuration
  *
+ * @category Types
  * @property {KokoroConfig} model - a selected T2S model
  * @property {VoiceConfig} voice - a selected speaker's voice
  * @property {KokoroOptions} [options] - a completely optional model-specific configuration
@@ -48,9 +66,22 @@ export interface TextToSpeechConfig {
   voice: VoiceConfig;
 }
+/**
+ * Props for the useTextToSpeech hook.
+ *
+ * @category Types
+ * @extends TextToSpeechConfig
+ *
+ * @property {boolean} [preventLoad] - Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
+ */
+export interface TextToSpeechProps extends TextToSpeechConfig {
+  preventLoad?: boolean;
+}
 /**
  * Text to Speech module input definition
  *
+ * @category Types
  * @property {string} text - a text to be spoken
  * @property {number} [speed] - optional speed argument - the higher it is, the faster the speech becomes
  */
@@ -59,6 +90,56 @@ export interface TextToSpeechInput {
   speed?: number;
 }
+/**
+ * Return type for the `useTextToSpeech` hook.
+ * Manages the state and operations for Text-to-Speech generation.
+ *
+ * @category Types
+ */
+export interface TextToSpeechType {
+  /**
+   * Contains the error object if the model failed to load or encountered an error during inference.
+   */
+  error: RnExecutorchError | null;
+  /**
+   * Indicates whether the Text-to-Speech model is loaded and ready to accept inputs.
+   */
+  isReady: boolean;
+  /**
+   * Indicates whether the model is currently generating audio.
+   */
+  isGenerating: boolean;
+  /**
+   * Represents the download progress of the model and voice assets as a value between 0 and 1.
+   */
+  downloadProgress: number;
+  /**
+   * Runs the model to convert the provided text into speech audio in a single pass.
+   * * @param input - The `TextToSpeechInput` object containing the `text` to synthesize and optional `speed`.
+   * @returns A Promise that resolves with the generated audio data (typically a `Float32Array`).
+   * @throws {RnExecutorchError} If the model is not loaded or is currently generating.
+   */
+  forward: (input: TextToSpeechInput) => Promise<Float32Array>;
+  /**
+   * Streams the generated audio data incrementally.
+   * This is optimal for real-time playback, allowing audio to start playing before the full text is synthesized.
+   * * @param input - The `TextToSpeechStreamingInput` object containing `text`, optional `speed`, and lifecycle callbacks (`onBegin`, `onNext`, `onEnd`).
+   * @returns A Promise that resolves when the streaming process is complete.
+   * @throws {RnExecutorchError} If the model is not loaded or is currently generating.
+   */
+  stream: (input: TextToSpeechStreamingInput) => Promise<void>;
+  /**
+   * Interrupts and stops the currently active audio generation stream.
+   */
+  streamStop: () => void;
+}
 /**
  * Text to Speech streaming input definition
  *
@@ -66,6 +147,8 @@ export interface TextToSpeechInput {
  * executed at given moments of the streaming.
  * Actions such as playing the audio should happen within the onNext callback.
  * Callbacks can be both synchronous or asynchronous.
+ *
+ * @category Types
  * @property {() => void | Promise<void>} [onBegin] - Called when streaming begins
  * @property {(audio: Float32Array) => void | Promise<void>} [onNext] - Called after each audio chunk gets calculated.
  * @property {() => void | Promise<void>} [onEnd] - Called when streaming ends