react-native-litert-lm 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +270 -186
  2. package/android/build.gradle +1 -1
  3. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +93 -37
  4. package/app.plugin.js +33 -0
  5. package/cpp/HybridLiteRTLM.cpp +571 -451
  6. package/cpp/HybridLiteRTLM.hpp +54 -23
  7. package/cpp/IOSDownloadHelper.h +24 -0
  8. package/cpp/cpp-adapter.cpp +2 -2
  9. package/cpp/include/litert_lm_engine.h +502 -0
  10. package/ios/IOSDownloadHelper.mm +129 -0
  11. package/ios/LiteRTLMAutolinking.mm +30 -0
  12. package/lib/hooks.d.ts +9 -4
  13. package/lib/hooks.js +34 -20
  14. package/lib/index.d.ts +1 -0
  15. package/lib/index.js +2 -5
  16. package/lib/memoryTracker.d.ts +1 -1
  17. package/lib/memoryTracker.js +1 -1
  18. package/lib/modelFactory.d.ts +11 -5
  19. package/lib/modelFactory.js +9 -4
  20. package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +11 -4
  21. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +31 -37
  22. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +19 -22
  23. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +15 -18
  24. package/package.json +12 -5
  25. package/react-native-litert-lm.podspec +20 -7
  26. package/scripts/build-ios-engine.sh +283 -0
  27. package/scripts/download-ios-frameworks.sh +72 -0
  28. package/scripts/postinstall.js +116 -0
  29. package/scripts/stubs/cxx_bridge_stubs.cc +224 -0
  30. package/scripts/stubs/gemma_model_constraint_provider.cc +46 -0
  31. package/scripts/stubs/llguidance_stubs.c +101 -0
  32. package/src/hooks.ts +62 -39
  33. package/src/index.ts +4 -7
  34. package/src/memoryTracker.ts +1 -1
  35. package/src/modelFactory.ts +30 -5
@@ -14,11 +14,9 @@
14
14
 
15
15
  #include "../nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp"
16
16
 
17
- // LiteRT-LM headers (conditionally included when available via Prefab/CMake)
18
- #ifdef LITERT_LM_ENABLED
19
- #include "litert/lm/engine.h"
20
- #include "litert/lm/conversation.h"
21
- #include "litert/lm/types.h"
17
+ // LiteRT-LM C API (iOS uses prebuilt framework with C ABI)
18
+ #ifdef __APPLE__
19
+ #include "include/litert_lm_engine.h"
22
20
  #endif
23
21
 
24
22
  // Memory usage headers
@@ -37,14 +35,15 @@
37
35
  #include <memory>
38
36
  #include <mutex>
39
37
  #include <functional>
38
+ #include <atomic>
40
39
 
41
40
  namespace margelo::nitro::litertlm {
42
41
 
43
42
  /**
44
43
  * HybridLiteRTLM: React Native bindings for LiteRT-LM.
45
44
  *
46
- * Wraps LiteRT-LM's Engine and Conversation classes to provide
47
- * high-level LLM inference with GPU acceleration.
45
+ * On iOS, wraps the LiteRT-LM C API (engine.h) with prebuilt framework.
46
+ * On Android, this class is unused — the Kotlin implementation is used instead.
48
47
  */
49
48
  class HybridLiteRTLM : public HybridLiteRTLMSpec {
50
49
  public:
@@ -61,24 +60,26 @@ public:
61
60
  public:
62
61
  // HybridLiteRTLMSpec interface implementation
63
62
 
64
- void loadModel(const std::string& modelPath,
63
+ std::shared_ptr<Promise<void>> loadModel(const std::string& modelPath,
65
64
  const std::optional<LLMConfig>& config) override;
66
65
 
67
- std::string sendMessage(const std::string& message) override;
66
+ std::shared_ptr<Promise<std::string>> sendMessage(const std::string& message) override;
68
67
 
69
- std::string sendMessageWithImage(const std::string& message,
68
+ std::shared_ptr<Promise<std::string>> sendMessageWithImage(const std::string& message,
70
69
  const std::string& imagePath) override;
71
70
 
72
- std::future<std::string> downloadModel(const std::string& url,
71
+ std::shared_ptr<Promise<std::string>> downloadModel(const std::string& url,
73
72
  const std::string& fileName,
74
73
  const std::optional<std::function<void(double)>>& onProgress) override;
75
74
 
76
- std::string sendMessageWithAudio(const std::string& message,
75
+ std::shared_ptr<Promise<void>> deleteModel(const std::string& fileName) override;
76
+
77
+ std::shared_ptr<Promise<std::string>> sendMessageWithAudio(const std::string& message,
77
78
  const std::string& audioPath) override;
78
79
 
79
80
  void sendMessageAsync(
80
81
  const std::string& message,
81
- const std::function<void(std::string, bool)>& onToken
82
+ const std::function<void(const std::string&, bool)>& onToken
82
83
  ) override;
83
84
 
84
85
  std::vector<Message> getHistory() override;
@@ -94,10 +95,12 @@ public:
94
95
  void close() override;
95
96
 
96
97
  private:
97
- // LiteRT-LM resources (conditionally available on Android with Prefab)
98
- #ifdef LITERT_LM_ENABLED
99
- std::unique_ptr<litert::lm::Engine> engine_;
100
- std::unique_ptr<litert::lm::Conversation> conversation_;
98
+ // LiteRT-LM C API resources (iOS only)
99
+ #ifdef __APPLE__
100
+ LiteRtLmEngine* engine_ = nullptr;
101
+ LiteRtLmConversation* conversation_ = nullptr;
102
+ LiteRtLmConversationConfig* conv_config_ = nullptr;
103
+ LiteRtLmSessionConfig* session_config_ = nullptr;
101
104
  #endif
102
105
 
103
106
  // State
@@ -108,10 +111,11 @@ private:
108
111
  // Thread safety
109
112
  mutable std::mutex mutex_;
110
113
 
111
- // Configuration - backends
114
+ // Configuration - backend
112
115
  Backend backend_ = Backend::GPU;
113
- Backend visionBackend_ = Backend::GPU; // Gemma 3n requires GPU for vision
114
- Backend audioBackend_ = Backend::CPU; // Audio typically CPU
116
+
117
+ // System prompt / instruction
118
+ std::string systemPrompt_;
115
119
 
116
120
  // Configuration - sampling parameters
117
121
  double temperature_ = 0.7;
@@ -126,11 +130,38 @@ private:
126
130
  }
127
131
  }
128
132
 
129
- // Helper to format a message for the engine (apply chat template if needed)
130
- std::string formatUserPrompt(const std::string& message) const;
131
-
132
133
  // Helper to create a new conversation from existing engine
133
134
  void createNewConversation();
135
+
136
+ // JSON helpers for building C API message payloads
137
+ static std::string escapeJson(const std::string& input);
138
+ static std::string buildTextMessageJson(const std::string& text);
139
+ static std::string buildImageMessageJson(const std::string& text, const std::string& imagePath);
140
+ static std::string buildAudioMessageJson(const std::string& text, const std::string& audioPath);
141
+ static std::string extractTextFromResponse(const std::string& jsonResponse);
142
+
143
+ // Internal implementations (called from Promise lambdas)
144
+ void loadModelInternal(const std::string& modelPath, const std::optional<LLMConfig>& config);
145
+ std::string sendMessageInternal(const std::string& message);
146
+ std::string sendMessageWithImageInternal(const std::string& message, const std::string& imagePath);
147
+ std::string sendMessageWithAudioInternal(const std::string& message, const std::string& audioPath);
148
+
149
+ // Streaming callback context (must be a plain struct for C function pointer)
150
+ struct StreamContext {
151
+ std::function<void(const std::string&, bool)> onToken;
152
+ std::string fullResponse;
153
+ std::vector<Message>* history;
154
+ std::mutex* historyMutex;
155
+ std::string userMessage;
156
+ GenerationStats* lastStats;
157
+ std::chrono::steady_clock::time_point startTime;
158
+ int tokenCount;
159
+ };
160
+
161
+ // Static C callback for streaming (no captures needed)
162
+ static void streamCallbackFn(void* callback_data, const char* chunk,
163
+ bool is_final, const char* error_msg);
134
164
  };
135
165
 
136
166
  } // namespace margelo::nitro::litertlm
167
+
@@ -0,0 +1,24 @@
1
+ #pragma once
2
+
3
+ #include <string>
4
+ #include <functional>
5
+ #include <optional>
6
+
7
+ namespace litert_lm {
8
+
9
+ /**
10
+ * Download a file from a URL to the app's Caches/litert_models directory.
11
+ * Uses NSURLSession for efficient, resumable downloads.
12
+ *
13
+ * @param url HTTPS URL to download from
14
+ * @param fileName Destination filename
15
+ * @param onProgress Optional progress callback (0.0 to 1.0)
16
+ * @return Absolute path to the downloaded file
17
+ * @throws std::runtime_error on download failure
18
+ */
19
+ std::string downloadModelFile(
20
+ const std::string& url,
21
+ const std::string& fileName,
22
+ const std::optional<std::function<void(double)>>& onProgress);
23
+
24
+ } // namespace litert_lm
@@ -2,7 +2,7 @@
2
2
  /// cpp-adapter.cpp
3
3
  /// JNI Entry Point - Required by Nitrogen to register Kotlin HybridObjects
4
4
  ///
5
- /// Updated for react-native-nitro-modules v0.34+:
5
+ /// Updated for react-native-nitro-modules v0.35+:
6
6
  /// Uses facebook::jni::initialize() directly with registerAllNatives().
7
7
  ///
8
8
 
@@ -12,7 +12,7 @@
12
12
 
13
13
  // JNI_OnLoad is called when the native library is loaded via System.loadLibrary()
14
14
  // This is where we initialize the Nitrogen bridge and register all Kotlin HybridObjects.
15
- // The new v0.34 API allows registering custom C++ native JNI classes/functions
15
+ // The new v0.35 API allows registering custom C++ native JNI classes/functions
16
16
  // alongside Nitrogen's auto-generated registrations.
17
17
  JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void*) {
18
18
  return facebook::jni::initialize(vm, []() {
@@ -0,0 +1,502 @@
1
+ // Copyright 2025 The ODML Authors.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ #ifndef THIRD_PARTY_ODML_LITERT_LM_C_ENGINE_H_
16
+ #define THIRD_PARTY_ODML_LITERT_LM_C_ENGINE_H_
17
+
18
+ #include <stdbool.h>
19
+ #include <stddef.h>
20
+ #include <stdint.h>
21
+
22
+ #ifdef __cplusplus
23
+ extern "C" {
24
+ #endif
25
+
26
+ // For Windows, __declspec( dllexport ) is required to export function in .dll.
27
+ // https://learn.microsoft.com/en-us/cpp/cpp/using-dllimport-and-dllexport-in-cpp-classes?view=msvc-170
28
+ //
29
+ // _WIN32 is defined as 1 when the compilation target is 32-bit ARM, 64-bit ARM,
30
+ // x86, x64, or ARM64EC. Otherwise, undefined.
31
+ // https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros
32
+ #if defined(_WIN32)
33
+ #define LITERT_LM_C_API_EXPORT __declspec(dllexport)
34
+ #else
35
+ #define LITERT_LM_C_API_EXPORT
36
+ #endif
37
+
38
+ // Opaque pointer for the LiteRT LM Engine.
39
+ typedef struct LiteRtLmEngine LiteRtLmEngine;
40
+
41
+ // Opaque pointer for the LiteRT LM Session.
42
+ typedef struct LiteRtLmSession LiteRtLmSession;
43
+
44
+ // Opaque pointer for the LiteRT LM Responses.
45
+ typedef struct LiteRtLmResponses LiteRtLmResponses;
46
+
47
+ // Opaque pointer for the LiteRT LM Engine Settings.
48
+ typedef struct LiteRtLmEngineSettings LiteRtLmEngineSettings;
49
+
50
+ // Opaque pointer for the LiteRT LM Benchmark Info.
51
+ typedef struct LiteRtLmBenchmarkInfo LiteRtLmBenchmarkInfo;
52
+
53
+ // Opaque pointer for the LiteRT LM Conversation.
54
+ typedef struct LiteRtLmConversation LiteRtLmConversation;
55
+
56
+ // Opaque pointer for a JSON response.
57
+ typedef struct LiteRtLmJsonResponse LiteRtLmJsonResponse;
58
+
59
+ // Opaque pointer for LiteRT LM Session Config.
60
+ typedef struct LiteRtLmSessionConfig LiteRtLmSessionConfig;
61
+
62
+ // Opaque pointer for LiteRT LM Conversation Config.
63
+ typedef struct LiteRtLmConversationConfig LiteRtLmConversationConfig;
64
+
65
+ // Represents the type of sampler.
66
+ typedef enum {
67
+ kTypeUnspecified = 0,
68
+ // Probabilistically pick among the top k tokens.
69
+ kTopK = 1,
70
+ // Probabilistically pick among the tokens such that the sum is greater
71
+ // than or equal to p tokens after first performing top-k sampling.
72
+ kTopP = 2,
73
+ // Pick the token with maximum logit (i.e., argmax).
74
+ kGreedy = 3,
75
+ } Type;
76
+
77
+ // Parameters for the sampler.
78
+ typedef struct {
79
+ Type type;
80
+ int32_t top_k;
81
+ float top_p;
82
+ float temperature;
83
+ int32_t seed;
84
+ } LiteRtLmSamplerParams;
85
+
86
+ // Creates a LiteRT LM Session Config.
87
+ // The caller is responsible for destroying the config using
88
+ // `litert_lm_session_config_delete`.
89
+ // @return A pointer to the created config, or NULL on failure.
90
+ LITERT_LM_C_API_EXPORT
91
+ LiteRtLmSessionConfig* litert_lm_session_config_create();
92
+
93
+ // Sets the maximum number of output tokens per decode step for this session.
94
+ // @param config The config to modify.
95
+ // @param max_output_tokens The maximum number of output tokens.
96
+ LITERT_LM_C_API_EXPORT
97
+ void litert_lm_session_config_set_max_output_tokens(
98
+ LiteRtLmSessionConfig* config, int max_output_tokens);
99
+
100
+ // Sets the sampler parameters for this session config.
101
+ // @param config The config to modify.
102
+ // @param sampler_params The sampler parameters to use.
103
+ LITERT_LM_C_API_EXPORT
104
+ void litert_lm_session_config_set_sampler_params(
105
+ LiteRtLmSessionConfig* config, const LiteRtLmSamplerParams* sampler_params);
106
+
107
+ // Destroys a LiteRT LM Session Config.
108
+ // @param config The config to destroy.
109
+ LITERT_LM_C_API_EXPORT
110
+ void litert_lm_session_config_delete(LiteRtLmSessionConfig* config);
111
+
112
+ // Creates a LiteRT LM Conversation Config.
113
+ // The caller is responsible for destroying the config using
114
+ // `litert_lm_conversation_config_delete`.
115
+ // @param engine The engine to use.
116
+ // @param session_config The session config to use. If NULL, default
117
+ // session config will be used.
118
+ // @param system_message_json The system message in JSON format.
119
+ // @param tools_json The tools description in JSON array format.
120
+ // @param enable_constrained_decoding Whether to enable constrained decoding.
121
+ // @return A pointer to the created config, or NULL on failure.
122
+ LITERT_LM_C_API_EXPORT
123
+ LiteRtLmConversationConfig* litert_lm_conversation_config_create(
124
+ LiteRtLmEngine* engine, const LiteRtLmSessionConfig* session_config,
125
+ const char* system_message_json, const char* tools_json,
126
+ const char* messages_json, bool enable_constrained_decoding);
127
+
128
+ // Destroys a LiteRT LM Conversation Config.
129
+ // @param config The config to destroy.
130
+ LITERT_LM_C_API_EXPORT
131
+ void litert_lm_conversation_config_delete(LiteRtLmConversationConfig* config);
132
+
133
+ // Sets the minimum log level for the LiteRT LM library.
134
+ // Log levels are: 0=INFO, 1=WARNING, 2=ERROR, 3=FATAL.
135
+ LITERT_LM_C_API_EXPORT
136
+ void litert_lm_set_min_log_level(int level);
137
+
138
+ // Represents the type of input data.
139
+ typedef enum {
140
+ kInputText,
141
+ kInputImage,
142
+ kInputImageEnd,
143
+ kInputAudio,
144
+ kInputAudioEnd,
145
+ } InputDataType;
146
+
147
+ // Represents a single piece of input data.
148
+ typedef struct {
149
+ InputDataType type;
150
+ // The data pointer. The interpretation depends on the `type`.
151
+ // For kInputText, it's a UTF-8 string.
152
+ // For kInputImage and kInputAudio, it's a pointer to the raw bytes.
153
+ const void* data;
154
+ // The size of the data in bytes.
155
+ size_t size;
156
+ } InputData;
157
+
158
+ // Creates LiteRT LM Engine Settings. The caller is responsible for destroying
159
+ // the settings using `litert_lm_engine_settings_delete`.
160
+ //
161
+ // @param model_path The path to the model file.
162
+ // @param backend_str The backend to use (e.g., "cpu", "gpu").
163
+ // @param vision_backend_str The vision backend to use, or NULL if not set.
164
+ // @param audio_backend_str The audio backend to use, or NULL if not set.
165
+ // @return A pointer to the created settings, or NULL on failure.
166
+ LITERT_LM_C_API_EXPORT
167
+ LiteRtLmEngineSettings* litert_lm_engine_settings_create(
168
+ const char* model_path, const char* backend_str,
169
+ const char* vision_backend_str, const char* audio_backend_str);
170
+
171
+ // Destroys LiteRT LM Engine Settings.
172
+ //
173
+ // @param settings The settings to destroy.
174
+ LITERT_LM_C_API_EXPORT
175
+ void litert_lm_engine_settings_delete(LiteRtLmEngineSettings* settings);
176
+
177
+ // Sets the maximum number of tokens for the engine.
178
+ //
179
+ // @param settings The engine settings.
180
+ // @param max_num_tokens The maximum number of tokens.
181
+ LITERT_LM_C_API_EXPORT
182
+ void litert_lm_engine_settings_set_max_num_tokens(
183
+ LiteRtLmEngineSettings* settings, int max_num_tokens);
184
+
185
+ // Sets the cache directory for the engine.
186
+ //
187
+ // @param settings The engine settings.
188
+ // @param cache_dir The cache directory.
189
+ LITERT_LM_C_API_EXPORT
190
+ void litert_lm_engine_settings_set_cache_dir(LiteRtLmEngineSettings* settings,
191
+ const char* cache_dir);
192
+
193
+ // Sets the activation data type.
194
+ //
195
+ // @param settings The engine settings.
196
+ // @param activation_data_type_int The activation data type. See
197
+ // `ActivationDataType` in executor_settings_base.h for the possible values
198
+ // (e.g., 0 for F32, 1 for F16, 2 for I16, 3 for I8).
199
+ LITERT_LM_C_API_EXPORT
200
+ void litert_lm_engine_settings_set_activation_data_type(
201
+ LiteRtLmEngineSettings* settings, int activation_data_type_int);
202
+
203
+ // Sets the prefill chunk size for the engine. Only applicable for CPU backend
204
+ // with dynamic models.
205
+ //
206
+ // @param settings The engine settings.
207
+ // @param prefill_chunk_size The prefill chunk size.
208
+ LITERT_LM_C_API_EXPORT
209
+ void litert_lm_engine_settings_set_prefill_chunk_size(
210
+ LiteRtLmEngineSettings* settings, int prefill_chunk_size);
211
+
212
+ // Enables benchmarking for the engine.
213
+ //
214
+ // @param settings The engine settings.
215
+ LITERT_LM_C_API_EXPORT
216
+ void litert_lm_engine_settings_enable_benchmark(
217
+ LiteRtLmEngineSettings* settings);
218
+
219
+ // Sets the number of prefill tokens for benchmarking.
220
+ //
221
+ // @param settings The engine settings.
222
+ // @param num_prefill_tokens The number of prefill tokens.
223
+ LITERT_LM_C_API_EXPORT
224
+ void litert_lm_engine_settings_set_num_prefill_tokens(
225
+ LiteRtLmEngineSettings* settings, int num_prefill_tokens);
226
+
227
+ // Sets the number of decode tokens for benchmarking.
228
+ //
229
+ // @param settings The engine settings.
230
+ // @param num_decode_tokens The number of decode tokens.
231
+ LITERT_LM_C_API_EXPORT
232
+ void litert_lm_engine_settings_set_num_decode_tokens(
233
+ LiteRtLmEngineSettings* settings, int num_decode_tokens);
234
+
235
+ // Creates a LiteRT LM Engine from the given settings. The caller is responsible
236
+ // for destroying the engine using `litert_lm_engine_delete`.
237
+ //
238
+ // @param settings The engine settings.
239
+ // @return A pointer to the created engine, or NULL on failure.
240
+ LITERT_LM_C_API_EXPORT
241
+ LiteRtLmEngine* litert_lm_engine_create(const LiteRtLmEngineSettings* settings);
242
+
243
+ // Destroys a LiteRT LM Engine.
244
+ //
245
+ // @param engine The engine to destroy.
246
+ LITERT_LM_C_API_EXPORT
247
+ void litert_lm_engine_delete(LiteRtLmEngine* engine);
248
+
249
+ // Creates a LiteRT LM Session. The caller is responsible for destroying the
250
+ // session using `litert_lm_session_delete`.
251
+ //
252
+ // @param engine The engine to create the session from.
253
+ // @param config The session config of the session. If NULL, use the default
254
+ // session config.
255
+ // @return A pointer to the created session, or NULL on failure.
256
+ LITERT_LM_C_API_EXPORT
257
+ LiteRtLmSession* litert_lm_engine_create_session(LiteRtLmEngine* engine,
258
+ LiteRtLmSessionConfig* config);
259
+
260
+ // Destroys a LiteRT LM Session.
261
+ //
262
+ // @param session The session to destroy.
263
+ LITERT_LM_C_API_EXPORT
264
+ void litert_lm_session_delete(LiteRtLmSession* session);
265
+
266
+ // Generates content from the input prompt.
267
+ //
268
+ // @param session The session to use for generation.
269
+ // @param inputs An array of InputData structs representing the multimodal
270
+ // input.
271
+ // @param num_inputs The number of InputData structs in the array.
272
+ // @return A pointer to the responses, or NULL on failure. The caller is
273
+ // responsible for deleting the responses using `litert_lm_responses_delete`.
274
+ LITERT_LM_C_API_EXPORT
275
+ LiteRtLmResponses* litert_lm_session_generate_content(LiteRtLmSession* session,
276
+ const InputData* inputs,
277
+ size_t num_inputs);
278
+ // Destroys a LiteRT LM Responses object.
279
+ //
280
+ // @param responses The responses to destroy.
281
+ LITERT_LM_C_API_EXPORT
282
+ void litert_lm_responses_delete(LiteRtLmResponses* responses);
283
+
284
+ // Returns the number of response candidates.
285
+ //
286
+ // @param responses The responses object.
287
+ // @return The number of candidates.
288
+ LITERT_LM_C_API_EXPORT
289
+ int litert_lm_responses_get_num_candidates(const LiteRtLmResponses* responses);
290
+
291
+ // Returns the response text at a given index.
292
+ //
293
+ // @param responses The responses object.
294
+ // @param index The index of the response.
295
+ // @return The response text. The returned string is owned by the `responses`
296
+ // object and is valid only for its lifetime. Returns NULL if index is out of
297
+ // bounds.
298
+ LITERT_LM_C_API_EXPORT
299
+ const char* litert_lm_responses_get_response_text_at(
300
+ const LiteRtLmResponses* responses, int index);
301
+
302
+ // Retrieves the benchmark information from the session. The caller is
303
+ // responsible for destroying the benchmark info using
304
+ // `litert_lm_benchmark_info_delete`.
305
+ //
306
+ // @param session The session to get the benchmark info from.
307
+ // @return A pointer to the benchmark info, or NULL on failure.
308
+ LITERT_LM_C_API_EXPORT
309
+ LiteRtLmBenchmarkInfo* litert_lm_session_get_benchmark_info(
310
+ LiteRtLmSession* session);
311
+
312
+ // Destroys a LiteRT LM Benchmark Info object.
313
+ //
314
+ // @param benchmark_info The benchmark info to destroy.
315
+ LITERT_LM_C_API_EXPORT
316
+ void litert_lm_benchmark_info_delete(LiteRtLmBenchmarkInfo* benchmark_info);
317
+
318
+ // Returns the time to the first token in seconds.
319
+ //
320
+ // Note that the first time to token doesn't include the time for
321
+ // initialization. It is the sum of the prefill time for the first turn and
322
+ // the time spent for decoding the first token.
323
+ //
324
+ // @param benchmark_info The benchmark info object.
325
+ // @return The time to the first token in seconds.
326
+ LITERT_LM_C_API_EXPORT
327
+ double litert_lm_benchmark_info_get_time_to_first_token(
328
+ const LiteRtLmBenchmarkInfo* benchmark_info);
329
+
330
+ // Returns the total initialization time in seconds.
331
+ //
332
+ // @param benchmark_info The benchmark info object.
333
+ // @return The total initialization time in seconds.
334
+ LITERT_LM_C_API_EXPORT
335
+ double litert_lm_benchmark_info_get_total_init_time_in_second(
336
+ const LiteRtLmBenchmarkInfo* benchmark_info);
337
+
338
+ // Returns the number of prefill turns.
339
+ //
340
+ // @param benchmark_info The benchmark info object.
341
+ // @return The number of prefill turns.
342
+ LITERT_LM_C_API_EXPORT
343
+ int litert_lm_benchmark_info_get_num_prefill_turns(
344
+ const LiteRtLmBenchmarkInfo* benchmark_info);
345
+
346
+ // Returns the number of decode turns.
347
+ //
348
+ // @param benchmark_info The benchmark info object.
349
+ // @return The number of decode turns.
350
+ LITERT_LM_C_API_EXPORT
351
+ int litert_lm_benchmark_info_get_num_decode_turns(
352
+ const LiteRtLmBenchmarkInfo* benchmark_info);
353
+
354
+ // Returns the prefill token count at a given turn index.
355
+ //
356
+ // @param benchmark_info The benchmark info object.
357
+ // @param index The index of the prefill turn.
358
+ // @return The prefill token count.
359
+ LITERT_LM_C_API_EXPORT
360
+ int litert_lm_benchmark_info_get_prefill_token_count_at(
361
+ const LiteRtLmBenchmarkInfo* benchmark_info, int index);
362
+
363
+ // Returns the decode token count at a given turn index.
364
+ //
365
+ // @param benchmark_info The benchmark info object.
366
+ // @param index The index of the decode turn.
367
+ // @return The decode token count.
368
+ LITERT_LM_C_API_EXPORT
369
+ int litert_lm_benchmark_info_get_decode_token_count_at(
370
+ const LiteRtLmBenchmarkInfo* benchmark_info, int index);
371
+
372
+ // Returns the prefill tokens per second at a given turn index.
373
+ //
374
+ // @param benchmark_info The benchmark info object.
375
+ // @param index The index of the prefill turn.
376
+ // @return The prefill tokens per second.
377
+ LITERT_LM_C_API_EXPORT
378
+ double litert_lm_benchmark_info_get_prefill_tokens_per_sec_at(
379
+ const LiteRtLmBenchmarkInfo* benchmark_info, int index);
380
+
381
+ // Returns the decode tokens per second at a given turn index.
382
+ //
383
+ // @param benchmark_info The benchmark info object.
384
+ // @param index The index of the decode turn.
385
+ // @return The decode tokens per second.
386
+ LITERT_LM_C_API_EXPORT
387
+ double litert_lm_benchmark_info_get_decode_tokens_per_sec_at(
388
+ const LiteRtLmBenchmarkInfo* benchmark_info, int index);
389
+
390
+ // Callback for streaming responses.
391
+ // `callback_data` is a pointer to user-defined data passed to the stream
392
+ // function. `chunk` is the piece of text from the stream. It's only valid for
393
+ // the duration of the call. `is_final` is true if this is the last chunk in the
394
+ // stream. `error_msg` is a null-terminated string with an error message, or
395
+ // NULL on success.
396
+ typedef void (*LiteRtLmStreamCallback)(void* callback_data, const char* chunk,
397
+ bool is_final, const char* error_msg);
398
+
399
+ // Generates content from the input prompt and streams the response via a
400
+ // callback. This is a non-blocking call that will invoke the callback from a
401
+ // background thread for each chunk.
402
+ //
403
+ // @param session The session to use for generation.
404
+ // @param inputs An array of InputData structs representing the multimodal
405
+ // input.
406
+ // @param num_inputs The number of InputData structs in the array.
407
+ // @param callback The callback function to receive response chunks.
408
+ // @param callback_data A pointer to user data that will be passed to the
409
+ // callback.
410
+ // @return 0 on success, non-zero on failure to start the stream.
411
+ LITERT_LM_C_API_EXPORT
412
+ int litert_lm_session_generate_content_stream(LiteRtLmSession* session,
413
+ const InputData* inputs,
414
+ size_t num_inputs,
415
+ LiteRtLmStreamCallback callback,
416
+ void* callback_data);
417
+
418
+ // Creates a LiteRT LM Conversation. The caller is responsible for destroying
419
+ // the conversation using `litert_lm_conversation_delete`.
420
+ //
421
+ // @param engine The engine to create the conversation from.
422
+ // @param config The conversation config to use. If NULL, the default config
423
+ // will be used.
424
+ // @return A pointer to the created conversation, or NULL on failure.
425
+ LITERT_LM_C_API_EXPORT
426
+ LiteRtLmConversation* litert_lm_conversation_create(
427
+ LiteRtLmEngine* engine, LiteRtLmConversationConfig* config);
428
+
429
+ // Destroys a LiteRT LM Conversation.
430
+ //
431
+ // @param conversation The conversation to destroy.
432
+ LITERT_LM_C_API_EXPORT
433
+ void litert_lm_conversation_delete(LiteRtLmConversation* conversation);
434
+
435
+ // Sends a message to the conversation and returns the response.
436
+ // This is a blocking call.
437
+ //
438
+ // @param conversation The conversation to use.
439
+ // @param message_json A JSON string representing the message to send.
440
+ // @param extra_context A JSON string representing the extra context to use.
441
+ // @return A pointer to the JSON response, or NULL on failure. The caller is
442
+ // responsible for deleting the response using
443
+ // `litert_lm_json_response_delete`.
444
+ LITERT_LM_C_API_EXPORT
445
+ LiteRtLmJsonResponse* litert_lm_conversation_send_message(
446
+ LiteRtLmConversation* conversation, const char* message_json,
447
+ const char* extra_context);
448
+
449
+ // Destroys a LiteRT LM Json Response object.
450
+ //
451
+ // @param response The response to destroy.
452
+ LITERT_LM_C_API_EXPORT
453
+ void litert_lm_json_response_delete(LiteRtLmJsonResponse* response);
454
+
455
+ // Returns the JSON response string from a response object.
456
+ //
457
+ // @param response The response object.
458
+ // @return The response JSON string. The returned string is owned by the
459
+ // `response` object and is valid only for its lifetime. Returns NULL if
460
+ // response is NULL.
461
+ LITERT_LM_C_API_EXPORT
462
+ const char* litert_lm_json_response_get_string(
463
+ const LiteRtLmJsonResponse* response);
464
+
465
+ // Sends a message to the conversation and streams the response via a
466
+ // callback. This is a non-blocking call that will invoke the callback from a
467
+ // background thread for each chunk.
468
+ //
469
+ // @param conversation The conversation to use.
470
+ // @param message_json A JSON string representing the message to send.
471
+ // @param extra_context A JSON string representing the extra context to use.
472
+ // @param callback The callback function to receive response chunks.
473
+ // @param callback_data A pointer to user data that will be passed to the
474
+ // callback.
475
+ // @return 0 on success, non-zero on failure to start the stream.
476
+ LITERT_LM_C_API_EXPORT
477
+ int litert_lm_conversation_send_message_stream(
478
+ LiteRtLmConversation* conversation, const char* message_json,
479
+ const char* extra_context, LiteRtLmStreamCallback callback,
480
+ void* callback_data);
481
+
482
+ // Cancels the ongoing inference process, for asynchronous inference.
483
+ //
484
+ // @param conversation The conversation to cancel the inference for.
485
+ LITERT_LM_C_API_EXPORT
486
+ void litert_lm_conversation_cancel_process(LiteRtLmConversation* conversation);
487
+
488
+ // Retrieves the benchmark information from the conversation. The caller is
489
+ // responsible for destroying the benchmark info using
490
+ // `litert_lm_benchmark_info_delete`.
491
+ //
492
+ // @param conversation The conversation to get the benchmark info from.
493
+ // @return A pointer to the benchmark info, or NULL on failure.
494
+ LITERT_LM_C_API_EXPORT
495
+ LiteRtLmBenchmarkInfo* litert_lm_conversation_get_benchmark_info(
496
+ LiteRtLmConversation* conversation);
497
+
498
+ #ifdef __cplusplus
499
+ } // extern "C"
500
+ #endif
501
+
502
+ #endif // THIRD_PARTY_ODML_LITERT_LM_C_ENGINE_H_