npm - react-native-litert-lm - Versions diffs - 0.2.2 → 0.3.0 - Mend

react-native-litert-lm 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.md +270 -186
package/android/build.gradle +1 -1
package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +93 -37
package/app.plugin.js +33 -0
package/cpp/HybridLiteRTLM.cpp +571 -451
package/cpp/HybridLiteRTLM.hpp +54 -23
package/cpp/IOSDownloadHelper.h +24 -0
package/cpp/cpp-adapter.cpp +2 -2
package/cpp/include/litert_lm_engine.h +502 -0
package/ios/IOSDownloadHelper.mm +129 -0
package/ios/LiteRTLMAutolinking.mm +30 -0
package/lib/hooks.d.ts +9 -4
package/lib/hooks.js +34 -20
package/lib/index.d.ts +1 -0
package/lib/index.js +2 -5
package/lib/memoryTracker.d.ts +1 -1
package/lib/memoryTracker.js +1 -1
package/lib/modelFactory.d.ts +11 -5
package/lib/modelFactory.js +9 -4
package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +11 -4
package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +31 -37
package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +19 -22
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +15 -18
package/package.json +12 -5
package/react-native-litert-lm.podspec +20 -7
package/scripts/build-ios-engine.sh +283 -0
package/scripts/download-ios-frameworks.sh +72 -0
package/scripts/postinstall.js +116 -0
package/scripts/stubs/cxx_bridge_stubs.cc +224 -0
package/scripts/stubs/gemma_model_constraint_provider.cc +46 -0
package/scripts/stubs/llguidance_stubs.c +101 -0
package/src/hooks.ts +62 -39
package/src/index.ts +4 -7
package/src/memoryTracker.ts +1 -1
package/src/modelFactory.ts +30 -5

package/cpp/HybridLiteRTLM.hpp CHANGED Viewed

@@ -14,11 +14,9 @@
 #include "../nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp"
-// LiteRT-LM headers (conditionally included when available via Prefab/CMake)
-#ifdef LITERT_LM_ENABLED
-#include "litert/lm/engine.h"
-#include "litert/lm/conversation.h"
-#include "litert/lm/types.h"
+// LiteRT-LM C API (iOS uses prebuilt framework with C ABI)
+#ifdef __APPLE__
+#include "include/litert_lm_engine.h"
 #endif
 // Memory usage headers
@@ -37,14 +35,15 @@
 #include <memory>
 #include <mutex>
 #include <functional>
+#include <atomic>
 namespace margelo::nitro::litertlm {
 /**
  * HybridLiteRTLM: React Native bindings for LiteRT-LM.
  *
- * Wraps LiteRT-LM's Engine and Conversation classes to provide
- * high-level LLM inference with GPU acceleration.
+ * On iOS, wraps the LiteRT-LM C API (engine.h) with prebuilt framework.
+ * On Android, this class is unused — the Kotlin implementation is used instead.
  */
 class HybridLiteRTLM : public HybridLiteRTLMSpec {
 public:
@@ -61,24 +60,26 @@ public:
 public:
   // HybridLiteRTLMSpec interface implementation
-  void loadModel(const std::string& modelPath,
+  std::shared_ptr<Promise<void>> loadModel(const std::string& modelPath,
                  const std::optional<LLMConfig>& config) override;
-  std::string sendMessage(const std::string& message) override;
+  std::shared_ptr<Promise<std::string>> sendMessage(const std::string& message) override;
-  std::string sendMessageWithImage(const std::string& message,
+  std::shared_ptr<Promise<std::string>> sendMessageWithImage(const std::string& message,
                                    const std::string& imagePath) override;
-  std::future<std::string> downloadModel(const std::string& url,
+  std::shared_ptr<Promise<std::string>> downloadModel(const std::string& url,
                                          const std::string& fileName,
                                          const std::optional<std::function<void(double)>>& onProgress) override;
-  std::string sendMessageWithAudio(const std::string& message,
+  std::shared_ptr<Promise<void>> deleteModel(const std::string& fileName) override;
+  std::shared_ptr<Promise<std::string>> sendMessageWithAudio(const std::string& message,
                                    const std::string& audioPath) override;
   void sendMessageAsync(
     const std::string& message,
-    const std::function<void(std::string, bool)>& onToken
+    const std::function<void(const std::string&, bool)>& onToken
   ) override;
   std::vector<Message> getHistory() override;
@@ -94,10 +95,12 @@ public:
   void close() override;
 private:
-  // LiteRT-LM resources (conditionally available on Android with Prefab)
-#ifdef LITERT_LM_ENABLED
-  std::unique_ptr<litert::lm::Engine> engine_;
-  std::unique_ptr<litert::lm::Conversation> conversation_;
+  // LiteRT-LM C API resources (iOS only)
+#ifdef __APPLE__
+  LiteRtLmEngine* engine_ = nullptr;
+  LiteRtLmConversation* conversation_ = nullptr;
+  LiteRtLmConversationConfig* conv_config_ = nullptr;
+  LiteRtLmSessionConfig* session_config_ = nullptr;
 #endif
   // State
@@ -108,10 +111,11 @@ private:
   // Thread safety
   mutable std::mutex mutex_;
-  // Configuration - backends
+  // Configuration - backend
   Backend backend_ = Backend::GPU;
-  Backend visionBackend_ = Backend::GPU;  // Gemma 3n requires GPU for vision
-  Backend audioBackend_ = Backend::CPU;   // Audio typically CPU
+  // System prompt / instruction
+  std::string systemPrompt_;
   // Configuration - sampling parameters
   double temperature_ = 0.7;
@@ -126,11 +130,38 @@ private:
     }
   }
-  // Helper to format a message for the engine (apply chat template if needed)
-  std::string formatUserPrompt(const std::string& message) const;
   // Helper to create a new conversation from existing engine
   void createNewConversation();
+  // JSON helpers for building C API message payloads
+  static std::string escapeJson(const std::string& input);
+  static std::string buildTextMessageJson(const std::string& text);
+  static std::string buildImageMessageJson(const std::string& text, const std::string& imagePath);
+  static std::string buildAudioMessageJson(const std::string& text, const std::string& audioPath);
+  static std::string extractTextFromResponse(const std::string& jsonResponse);
+  // Internal implementations (called from Promise lambdas)
+  void loadModelInternal(const std::string& modelPath, const std::optional<LLMConfig>& config);
+  std::string sendMessageInternal(const std::string& message);
+  std::string sendMessageWithImageInternal(const std::string& message, const std::string& imagePath);
+  std::string sendMessageWithAudioInternal(const std::string& message, const std::string& audioPath);
+  // Streaming callback context (must be a plain struct for C function pointer)
+  struct StreamContext {
+    std::function<void(const std::string&, bool)> onToken;
+    std::string fullResponse;
+    std::vector<Message>* history;
+    std::mutex* historyMutex;
+    std::string userMessage;
+    GenerationStats* lastStats;
+    std::chrono::steady_clock::time_point startTime;
+    int tokenCount;
+  };
+  // Static C callback for streaming (no captures needed)
+  static void streamCallbackFn(void* callback_data, const char* chunk,
+                                bool is_final, const char* error_msg);
 };
 } // namespace margelo::nitro::litertlm

package/cpp/IOSDownloadHelper.h ADDED Viewed

@@ -0,0 +1,24 @@
+#pragma once
+#include <string>
+#include <functional>
+#include <optional>
+namespace litert_lm {
+/**
+ * Download a file from a URL to the app's Caches/litert_models directory.
+ * Uses NSURLSession for efficient, resumable downloads.
+ *
+ * @param url HTTPS URL to download from
+ * @param fileName Destination filename
+ * @param onProgress Optional progress callback (0.0 to 1.0)
+ * @return Absolute path to the downloaded file
+ * @throws std::runtime_error on download failure
+ */
+std::string downloadModelFile(
+    const std::string& url,
+    const std::string& fileName,
+    const std::optional<std::function<void(double)>>& onProgress);
+} // namespace litert_lm

package/cpp/cpp-adapter.cpp CHANGED Viewed

@@ -2,7 +2,7 @@
 /// cpp-adapter.cpp
 /// JNI Entry Point - Required by Nitrogen to register Kotlin HybridObjects
 ///
-/// Updated for react-native-nitro-modules v0.34+:
+/// Updated for react-native-nitro-modules v0.35+:
 /// Uses facebook::jni::initialize() directly with registerAllNatives().
 ///
@@ -12,7 +12,7 @@
 // JNI_OnLoad is called when the native library is loaded via System.loadLibrary()
 // This is where we initialize the Nitrogen bridge and register all Kotlin HybridObjects.
-// The new v0.34 API allows registering custom C++ native JNI classes/functions
+// The new v0.35 API allows registering custom C++ native JNI classes/functions
 // alongside Nitrogen's auto-generated registrations.
 JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void*) {
     return facebook::jni::initialize(vm, []() {

package/cpp/include/litert_lm_engine.h ADDED Viewed

@@ -0,0 +1,502 @@
+// Copyright 2025 The ODML Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef THIRD_PARTY_ODML_LITERT_LM_C_ENGINE_H_
+#define THIRD_PARTY_ODML_LITERT_LM_C_ENGINE_H_
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+// For Windows, __declspec( dllexport ) is required to export function in .dll.
+// https://learn.microsoft.com/en-us/cpp/cpp/using-dllimport-and-dllexport-in-cpp-classes?view=msvc-170
+//
+// _WIN32 is defined as 1 when the compilation target is 32-bit ARM, 64-bit ARM,
+// x86, x64, or ARM64EC. Otherwise, undefined.
+// https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros
+#if defined(_WIN32)
+#define LITERT_LM_C_API_EXPORT __declspec(dllexport)
+#else
+#define LITERT_LM_C_API_EXPORT
+#endif
+// Opaque pointer for the LiteRT LM Engine.
+typedef struct LiteRtLmEngine LiteRtLmEngine;
+// Opaque pointer for the LiteRT LM Session.
+typedef struct LiteRtLmSession LiteRtLmSession;
+// Opaque pointer for the LiteRT LM Responses.
+typedef struct LiteRtLmResponses LiteRtLmResponses;
+// Opaque pointer for the LiteRT LM Engine Settings.
+typedef struct LiteRtLmEngineSettings LiteRtLmEngineSettings;
+// Opaque pointer for the LiteRT LM Benchmark Info.
+typedef struct LiteRtLmBenchmarkInfo LiteRtLmBenchmarkInfo;
+// Opaque pointer for the LiteRT LM Conversation.
+typedef struct LiteRtLmConversation LiteRtLmConversation;
+// Opaque pointer for a JSON response.
+typedef struct LiteRtLmJsonResponse LiteRtLmJsonResponse;
+// Opaque pointer for LiteRT LM Session Config.
+typedef struct LiteRtLmSessionConfig LiteRtLmSessionConfig;
+// Opaque pointer for LiteRT LM Conversation Config.
+typedef struct LiteRtLmConversationConfig LiteRtLmConversationConfig;
+// Represents the type of sampler.
+typedef enum {
+  kTypeUnspecified = 0,
+  // Probabilistically pick among the top k tokens.
+  kTopK = 1,
+  // Probabilistically pick among the tokens such that the sum is greater
+  // than or equal to p tokens after first performing top-k sampling.
+  kTopP = 2,
+  // Pick the token with maximum logit (i.e., argmax).
+  kGreedy = 3,
+} Type;
+// Parameters for the sampler.
+typedef struct {
+  Type type;
+  int32_t top_k;
+  float top_p;
+  float temperature;
+  int32_t seed;
+} LiteRtLmSamplerParams;
+// Creates a LiteRT LM Session Config.
+// The caller is responsible for destroying the config using
+// `litert_lm_session_config_delete`.
+// @return A pointer to the created config, or NULL on failure.
+LITERT_LM_C_API_EXPORT
+LiteRtLmSessionConfig* litert_lm_session_config_create();
+// Sets the maximum number of output tokens per decode step for this session.
+// @param config The config to modify.
+// @param max_output_tokens The maximum number of output tokens.
+LITERT_LM_C_API_EXPORT
+void litert_lm_session_config_set_max_output_tokens(
+    LiteRtLmSessionConfig* config, int max_output_tokens);
+// Sets the sampler parameters for this session config.
+// @param config The config to modify.
+// @param sampler_params The sampler parameters to use.
+LITERT_LM_C_API_EXPORT
+void litert_lm_session_config_set_sampler_params(
+    LiteRtLmSessionConfig* config, const LiteRtLmSamplerParams* sampler_params);
+// Destroys a LiteRT LM Session Config.
+// @param config The config to destroy.
+LITERT_LM_C_API_EXPORT
+void litert_lm_session_config_delete(LiteRtLmSessionConfig* config);
+// Creates a LiteRT LM Conversation Config.
+// The caller is responsible for destroying the config using
+// `litert_lm_conversation_config_delete`.
+// @param engine The engine to use.
+// @param session_config The session config to use. If NULL, default
+// session config will be used.
+// @param system_message_json The system message in JSON format.
+// @param tools_json The tools description in JSON array format.
+// @param enable_constrained_decoding Whether to enable constrained decoding.
+// @return A pointer to the created config, or NULL on failure.
+LITERT_LM_C_API_EXPORT
+LiteRtLmConversationConfig* litert_lm_conversation_config_create(
+    LiteRtLmEngine* engine, const LiteRtLmSessionConfig* session_config,
+    const char* system_message_json, const char* tools_json,
+    const char* messages_json, bool enable_constrained_decoding);
+// Destroys a LiteRT LM Conversation Config.
+// @param config The config to destroy.
+LITERT_LM_C_API_EXPORT
+void litert_lm_conversation_config_delete(LiteRtLmConversationConfig* config);
+// Sets the minimum log level for the LiteRT LM library.
+// Log levels are: 0=INFO, 1=WARNING, 2=ERROR, 3=FATAL.
+LITERT_LM_C_API_EXPORT
+void litert_lm_set_min_log_level(int level);
+// Represents the type of input data.
+typedef enum {
+  kInputText,
+  kInputImage,
+  kInputImageEnd,
+  kInputAudio,
+  kInputAudioEnd,
+} InputDataType;
+// Represents a single piece of input data.
+typedef struct {
+  InputDataType type;
+  // The data pointer. The interpretation depends on the `type`.
+  // For kInputText, it's a UTF-8 string.
+  // For kInputImage and kInputAudio, it's a pointer to the raw bytes.
+  const void* data;
+  // The size of the data in bytes.
+  size_t size;
+} InputData;
+// Creates LiteRT LM Engine Settings. The caller is responsible for destroying
+// the settings using `litert_lm_engine_settings_delete`.
+//
+// @param model_path The path to the model file.
+// @param backend_str The backend to use (e.g., "cpu", "gpu").
+// @param vision_backend_str The vision backend to use, or NULL if not set.
+// @param audio_backend_str The audio backend to use, or NULL if not set.
+// @return A pointer to the created settings, or NULL on failure.
+LITERT_LM_C_API_EXPORT
+LiteRtLmEngineSettings* litert_lm_engine_settings_create(
+    const char* model_path, const char* backend_str,
+    const char* vision_backend_str, const char* audio_backend_str);
+// Destroys LiteRT LM Engine Settings.
+//
+// @param settings The settings to destroy.
+LITERT_LM_C_API_EXPORT
+void litert_lm_engine_settings_delete(LiteRtLmEngineSettings* settings);
+// Sets the maximum number of tokens for the engine.
+//
+// @param settings The engine settings.
+// @param max_num_tokens The maximum number of tokens.
+LITERT_LM_C_API_EXPORT
+void litert_lm_engine_settings_set_max_num_tokens(
+    LiteRtLmEngineSettings* settings, int max_num_tokens);
+// Sets the cache directory for the engine.
+//
+// @param settings The engine settings.
+// @param cache_dir The cache directory.
+LITERT_LM_C_API_EXPORT
+void litert_lm_engine_settings_set_cache_dir(LiteRtLmEngineSettings* settings,
+                                             const char* cache_dir);
+// Sets the activation data type.
+//
+// @param settings The engine settings.
+// @param activation_data_type_int The activation data type. See
+// `ActivationDataType` in executor_settings_base.h for the possible values
+// (e.g., 0 for F32, 1 for F16, 2 for I16, 3 for I8).
+LITERT_LM_C_API_EXPORT
+void litert_lm_engine_settings_set_activation_data_type(
+    LiteRtLmEngineSettings* settings, int activation_data_type_int);
+// Sets the prefill chunk size for the engine. Only applicable for CPU backend
+// with dynamic models.
+//
+// @param settings The engine settings.
+// @param prefill_chunk_size The prefill chunk size.
+LITERT_LM_C_API_EXPORT
+void litert_lm_engine_settings_set_prefill_chunk_size(
+    LiteRtLmEngineSettings* settings, int prefill_chunk_size);
+// Enables benchmarking for the engine.
+//
+// @param settings The engine settings.
+LITERT_LM_C_API_EXPORT
+void litert_lm_engine_settings_enable_benchmark(
+    LiteRtLmEngineSettings* settings);
+// Sets the number of prefill tokens for benchmarking.
+//
+// @param settings The engine settings.
+// @param num_prefill_tokens The number of prefill tokens.
+LITERT_LM_C_API_EXPORT
+void litert_lm_engine_settings_set_num_prefill_tokens(
+    LiteRtLmEngineSettings* settings, int num_prefill_tokens);
+// Sets the number of decode tokens for benchmarking.
+//
+// @param settings The engine settings.
+// @param num_decode_tokens The number of decode tokens.
+LITERT_LM_C_API_EXPORT
+void litert_lm_engine_settings_set_num_decode_tokens(
+    LiteRtLmEngineSettings* settings, int num_decode_tokens);
+// Creates a LiteRT LM Engine from the given settings. The caller is responsible
+// for destroying the engine using `litert_lm_engine_delete`.
+//
+// @param settings The engine settings.
+// @return A pointer to the created engine, or NULL on failure.
+LITERT_LM_C_API_EXPORT
+LiteRtLmEngine* litert_lm_engine_create(const LiteRtLmEngineSettings* settings);
+// Destroys a LiteRT LM Engine.
+//
+// @param engine The engine to destroy.
+LITERT_LM_C_API_EXPORT
+void litert_lm_engine_delete(LiteRtLmEngine* engine);
+// Creates a LiteRT LM Session. The caller is responsible for destroying the
+// session using `litert_lm_session_delete`.
+//
+// @param engine The engine to create the session from.
+// @param config The session config of the session. If NULL, use the default
+// session config.
+// @return A pointer to the created session, or NULL on failure.
+LITERT_LM_C_API_EXPORT
+LiteRtLmSession* litert_lm_engine_create_session(LiteRtLmEngine* engine,
+                                                 LiteRtLmSessionConfig* config);
+// Destroys a LiteRT LM Session.
+//
+// @param session The session to destroy.
+LITERT_LM_C_API_EXPORT
+void litert_lm_session_delete(LiteRtLmSession* session);
+// Generates content from the input prompt.
+//
+// @param session The session to use for generation.
+// @param inputs An array of InputData structs representing the multimodal
+//   input.
+// @param num_inputs The number of InputData structs in the array.
+// @return A pointer to the responses, or NULL on failure. The caller is
+//   responsible for deleting the responses using `litert_lm_responses_delete`.
+LITERT_LM_C_API_EXPORT
+LiteRtLmResponses* litert_lm_session_generate_content(LiteRtLmSession* session,
+                                                      const InputData* inputs,
+                                                      size_t num_inputs);
+// Destroys a LiteRT LM Responses object.
+//
+// @param responses The responses to destroy.
+LITERT_LM_C_API_EXPORT
+void litert_lm_responses_delete(LiteRtLmResponses* responses);
+// Returns the number of response candidates.
+//
+// @param responses The responses object.
+// @return The number of candidates.
+LITERT_LM_C_API_EXPORT
+int litert_lm_responses_get_num_candidates(const LiteRtLmResponses* responses);
+// Returns the response text at a given index.
+//
+// @param responses The responses object.
+// @param index The index of the response.
+// @return The response text. The returned string is owned by the `responses`
+//   object and is valid only for its lifetime. Returns NULL if index is out of
+//   bounds.
+LITERT_LM_C_API_EXPORT
+const char* litert_lm_responses_get_response_text_at(
+    const LiteRtLmResponses* responses, int index);
+// Retrieves the benchmark information from the session. The caller is
+// responsible for destroying the benchmark info using
+// `litert_lm_benchmark_info_delete`.
+//
+// @param session The session to get the benchmark info from.
+// @return A pointer to the benchmark info, or NULL on failure.
+LITERT_LM_C_API_EXPORT
+LiteRtLmBenchmarkInfo* litert_lm_session_get_benchmark_info(
+    LiteRtLmSession* session);
+// Destroys a LiteRT LM Benchmark Info object.
+//
+// @param benchmark_info The benchmark info to destroy.
+LITERT_LM_C_API_EXPORT
+void litert_lm_benchmark_info_delete(LiteRtLmBenchmarkInfo* benchmark_info);
+// Returns the time to the first token in seconds.
+//
+// Note that the first time to token doesn't include the time for
+// initialization. It is the sum of the prefill time for the first turn and
+// the time spent for decoding the first token.
+//
+// @param benchmark_info The benchmark info object.
+// @return The time to the first token in seconds.
+LITERT_LM_C_API_EXPORT
+double litert_lm_benchmark_info_get_time_to_first_token(
+    const LiteRtLmBenchmarkInfo* benchmark_info);
+// Returns the total initialization time in seconds.
+//
+// @param benchmark_info The benchmark info object.
+// @return The total initialization time in seconds.
+LITERT_LM_C_API_EXPORT
+double litert_lm_benchmark_info_get_total_init_time_in_second(
+    const LiteRtLmBenchmarkInfo* benchmark_info);
+// Returns the number of prefill turns.
+//
+// @param benchmark_info The benchmark info object.
+// @return The number of prefill turns.
+LITERT_LM_C_API_EXPORT
+int litert_lm_benchmark_info_get_num_prefill_turns(
+    const LiteRtLmBenchmarkInfo* benchmark_info);
+// Returns the number of decode turns.
+//
+// @param benchmark_info The benchmark info object.
+// @return The number of decode turns.
+LITERT_LM_C_API_EXPORT
+int litert_lm_benchmark_info_get_num_decode_turns(
+    const LiteRtLmBenchmarkInfo* benchmark_info);
+// Returns the prefill token count at a given turn index.
+//
+// @param benchmark_info The benchmark info object.
+// @param index The index of the prefill turn.
+// @return The prefill token count.
+LITERT_LM_C_API_EXPORT
+int litert_lm_benchmark_info_get_prefill_token_count_at(
+    const LiteRtLmBenchmarkInfo* benchmark_info, int index);
+// Returns the decode token count at a given turn index.
+//
+// @param benchmark_info The benchmark info object.
+// @param index The index of the decode turn.
+// @return The decode token count.
+LITERT_LM_C_API_EXPORT
+int litert_lm_benchmark_info_get_decode_token_count_at(
+    const LiteRtLmBenchmarkInfo* benchmark_info, int index);
+// Returns the prefill tokens per second at a given turn index.
+//
+// @param benchmark_info The benchmark info object.
+// @param index The index of the prefill turn.
+// @return The prefill tokens per second.
+LITERT_LM_C_API_EXPORT
+double litert_lm_benchmark_info_get_prefill_tokens_per_sec_at(
+    const LiteRtLmBenchmarkInfo* benchmark_info, int index);
+// Returns the decode tokens per second at a given turn index.
+//
+// @param benchmark_info The benchmark info object.
+// @param index The index of the decode turn.
+// @return The decode tokens per second.
+LITERT_LM_C_API_EXPORT
+double litert_lm_benchmark_info_get_decode_tokens_per_sec_at(
+    const LiteRtLmBenchmarkInfo* benchmark_info, int index);
+// Callback for streaming responses.
+// `callback_data` is a pointer to user-defined data passed to the stream
+// function. `chunk` is the piece of text from the stream. It's only valid for
+// the duration of the call. `is_final` is true if this is the last chunk in the
+// stream. `error_msg` is a null-terminated string with an error message, or
+// NULL on success.
+typedef void (*LiteRtLmStreamCallback)(void* callback_data, const char* chunk,
+                                       bool is_final, const char* error_msg);
+// Generates content from the input prompt and streams the response via a
+// callback. This is a non-blocking call that will invoke the callback from a
+// background thread for each chunk.
+//
+// @param session The session to use for generation.
+// @param inputs An array of InputData structs representing the multimodal
+//   input.
+// @param num_inputs The number of InputData structs in the array.
+// @param callback The callback function to receive response chunks.
+// @param callback_data A pointer to user data that will be passed to the
+// callback.
+// @return 0 on success, non-zero on failure to start the stream.
+LITERT_LM_C_API_EXPORT
+int litert_lm_session_generate_content_stream(LiteRtLmSession* session,
+                                              const InputData* inputs,
+                                              size_t num_inputs,
+                                              LiteRtLmStreamCallback callback,
+                                              void* callback_data);
+// Creates a LiteRT LM Conversation. The caller is responsible for destroying
+// the conversation using `litert_lm_conversation_delete`.
+//
+// @param engine The engine to create the conversation from.
+// @param config The conversation config to use. If NULL, the default config
+//   will be used.
+// @return A pointer to the created conversation, or NULL on failure.
+LITERT_LM_C_API_EXPORT
+LiteRtLmConversation* litert_lm_conversation_create(
+    LiteRtLmEngine* engine, LiteRtLmConversationConfig* config);
+// Destroys a LiteRT LM Conversation.
+//
+// @param conversation The conversation to destroy.
+LITERT_LM_C_API_EXPORT
+void litert_lm_conversation_delete(LiteRtLmConversation* conversation);
+// Sends a message to the conversation and returns the response.
+// This is a blocking call.
+//
+// @param conversation The conversation to use.
+// @param message_json A JSON string representing the message to send.
+// @param extra_context A JSON string representing the extra context to use.
+// @return A pointer to the JSON response, or NULL on failure. The caller is
+//   responsible for deleting the response using
+//   `litert_lm_json_response_delete`.
+LITERT_LM_C_API_EXPORT
+LiteRtLmJsonResponse* litert_lm_conversation_send_message(
+    LiteRtLmConversation* conversation, const char* message_json,
+    const char* extra_context);
+// Destroys a LiteRT LM Json Response object.
+//
+// @param response The response to destroy.
+LITERT_LM_C_API_EXPORT
+void litert_lm_json_response_delete(LiteRtLmJsonResponse* response);
+// Returns the JSON response string from a response object.
+//
+// @param response The response object.
+// @return The response JSON string. The returned string is owned by the
+//   `response` object and is valid only for its lifetime. Returns NULL if
+//   response is NULL.
+LITERT_LM_C_API_EXPORT
+const char* litert_lm_json_response_get_string(
+    const LiteRtLmJsonResponse* response);
+// Sends a message to the conversation and streams the response via a
+// callback. This is a non-blocking call that will invoke the callback from a
+// background thread for each chunk.
+//
+// @param conversation The conversation to use.
+// @param message_json A JSON string representing the message to send.
+// @param extra_context A JSON string representing the extra context to use.
+// @param callback The callback function to receive response chunks.
+// @param callback_data A pointer to user data that will be passed to the
+// callback.
+// @return 0 on success, non-zero on failure to start the stream.
+LITERT_LM_C_API_EXPORT
+int litert_lm_conversation_send_message_stream(
+    LiteRtLmConversation* conversation, const char* message_json,
+    const char* extra_context, LiteRtLmStreamCallback callback,
+    void* callback_data);
+// Cancels the ongoing inference process, for asynchronous inference.
+//
+// @param conversation The conversation to cancel the inference for.
+LITERT_LM_C_API_EXPORT
+void litert_lm_conversation_cancel_process(LiteRtLmConversation* conversation);
+// Retrieves the benchmark information from the conversation. The caller is
+// responsible for destroying the benchmark info using
+// `litert_lm_benchmark_info_delete`.
+//
+// @param conversation The conversation to get the benchmark info from.
+// @return A pointer to the benchmark info, or NULL on failure.
+LITERT_LM_C_API_EXPORT
+LiteRtLmBenchmarkInfo* litert_lm_conversation_get_benchmark_info(
+    LiteRtLmConversation* conversation);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+#endif  // THIRD_PARTY_ODML_LITERT_LM_C_ENGINE_H_