@runanywhere/core 0.17.8 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/README.md +218 -2
  2. package/RunAnywhereCore.podspec +1 -0
  3. package/android/CMakeLists.txt +24 -2
  4. package/android/build.gradle +61 -9
  5. package/android/src/main/cpp/cpp-adapter.cpp +51 -3
  6. package/android/src/main/include/rac/backends/rac_vlm_llamacpp.h +216 -0
  7. package/android/src/main/include/rac/core/capabilities/rac_lifecycle.h +3 -1
  8. package/android/src/main/include/rac/core/rac_core.h +11 -0
  9. package/android/src/main/include/rac/core/rac_types.h +8 -6
  10. package/android/src/main/include/rac/features/diffusion/rac_diffusion.h +22 -0
  11. package/android/src/main/include/rac/features/diffusion/rac_diffusion_component.h +263 -0
  12. package/android/src/main/include/rac/features/diffusion/rac_diffusion_model_registry.h +358 -0
  13. package/android/src/main/include/rac/features/diffusion/rac_diffusion_service.h +187 -0
  14. package/android/src/main/include/rac/features/diffusion/rac_diffusion_tokenizer.h +167 -0
  15. package/android/src/main/include/rac/features/diffusion/rac_diffusion_types.h +454 -0
  16. package/android/src/main/include/rac/features/llm/rac_tool_calling.h +373 -0
  17. package/android/src/main/include/rac/features/platform/rac_diffusion_platform.h +305 -0
  18. package/android/src/main/include/rac/features/vad/rac_vad_energy.h +1 -1
  19. package/android/src/main/include/rac/features/vlm/rac_vlm.h +16 -0
  20. package/android/src/main/include/rac/features/vlm/rac_vlm_component.h +168 -0
  21. package/android/src/main/include/rac/features/vlm/rac_vlm_service.h +206 -0
  22. package/android/src/main/include/rac/features/vlm/rac_vlm_types.h +417 -0
  23. package/android/src/main/include/rac/infrastructure/model_management/rac_model_registry.h +15 -0
  24. package/android/src/main/include/rac/infrastructure/model_management/rac_model_types.h +3 -0
  25. package/android/src/main/include/rac/utils/rac_image_utils.h +215 -0
  26. package/android/src/main/java/com/margelo/nitro/runanywhere/PlatformAdapterBridge.kt +201 -1
  27. package/android/src/main/jniLibs/arm64-v8a/libc++_shared.so +0 -0
  28. package/android/src/main/jniLibs/arm64-v8a/libomp.so +0 -0
  29. package/android/src/main/jniLibs/arm64-v8a/librac_commons.so +0 -0
  30. package/android/src/main/jniLibs/arm64-v8a/librunanywhere_jni.so +0 -0
  31. package/android/src/main/jniLibs/x86_64/libc++_shared.so +0 -0
  32. package/android/src/main/jniLibs/x86_64/libomp.so +0 -0
  33. package/android/src/main/jniLibs/x86_64/librac_commons.so +0 -0
  34. package/android/src/main/jniLibs/x86_64/librunanywhere_jni.so +0 -0
  35. package/cpp/HybridRunAnywhereCore.cpp +263 -163
  36. package/cpp/HybridRunAnywhereCore.hpp +11 -0
  37. package/cpp/bridges/InitBridge.cpp +234 -3
  38. package/cpp/bridges/PlatformDownloadBridge.h +44 -0
  39. package/cpp/bridges/ToolCallingBridge.cpp +188 -0
  40. package/cpp/bridges/ToolCallingBridge.hpp +98 -0
  41. package/cpp/third_party/nlohmann/json.hpp +24765 -0
  42. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/RACommons.h +18 -4
  43. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_core.h +11 -0
  44. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion.h +22 -0
  45. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion_component.h +263 -0
  46. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion_model_registry.h +358 -0
  47. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion_platform.h +305 -0
  48. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion_service.h +187 -0
  49. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion_tokenizer.h +167 -0
  50. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion_types.h +454 -0
  51. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_endpoints.h +3 -17
  52. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_image_utils.h +215 -0
  53. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_lifecycle.h +3 -1
  54. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_model_assignment.h +4 -20
  55. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_model_registry.h +15 -0
  56. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_model_types.h +3 -0
  57. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_tool_calling.h +373 -0
  58. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_types.h +8 -6
  59. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_vad_energy.h +1 -1
  60. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_vlm.h +16 -0
  61. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_vlm_component.h +168 -0
  62. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_vlm_llamacpp.h +216 -0
  63. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_vlm_service.h +206 -0
  64. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_vlm_types.h +417 -0
  65. package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/RACommons +0 -0
  66. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/RACommons.h +18 -4
  67. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_core.h +11 -0
  68. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion.h +22 -0
  69. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion_component.h +263 -0
  70. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion_model_registry.h +358 -0
  71. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion_platform.h +305 -0
  72. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion_service.h +187 -0
  73. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion_tokenizer.h +167 -0
  74. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion_types.h +454 -0
  75. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_endpoints.h +3 -17
  76. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_image_utils.h +215 -0
  77. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_lifecycle.h +3 -1
  78. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_model_assignment.h +4 -20
  79. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_model_registry.h +15 -0
  80. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_model_types.h +3 -0
  81. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_tool_calling.h +373 -0
  82. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_types.h +8 -6
  83. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_vad_energy.h +1 -1
  84. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_vlm.h +16 -0
  85. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_vlm_component.h +168 -0
  86. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_vlm_llamacpp.h +216 -0
  87. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_vlm_service.h +206 -0
  88. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_vlm_types.h +417 -0
  89. package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/RACommons +0 -0
  90. package/ios/PlatformAdapterBridge.h +24 -1
  91. package/ios/PlatformAdapterBridge.m +243 -0
  92. package/nitrogen/generated/shared/c++/HybridRunAnywhereCoreSpec.cpp +4 -0
  93. package/nitrogen/generated/shared/c++/HybridRunAnywhereCoreSpec.hpp +4 -0
  94. package/package.json +8 -4
  95. package/src/Foundation/Security/SecureStorageService.ts +12 -6
  96. package/src/Public/Extensions/RunAnywhere+Models.ts +5 -3
  97. package/src/Public/Extensions/RunAnywhere+STT.ts +7 -2
  98. package/src/Public/Extensions/RunAnywhere+ToolCalling.ts +472 -0
  99. package/src/Public/Extensions/index.ts +16 -0
  100. package/src/Public/RunAnywhere.ts +18 -0
  101. package/src/index.ts +0 -1
  102. package/src/services/Network/index.ts +0 -1
  103. package/src/services/index.ts +0 -1
  104. package/src/specs/RunAnywhereCore.nitro.ts +72 -0
  105. package/src/types/ToolCallingTypes.ts +198 -0
  106. package/src/types/index.ts +13 -0
@@ -0,0 +1,417 @@
1
+ /**
2
+ * @file rac_vlm_types.h
3
+ * @brief RunAnywhere Commons - VLM Types and Data Structures
4
+ *
5
+ * Defines data structures for Vision Language Model (VLM) operations.
6
+ * Supports image input (file path, RGB pixels, base64), generation options,
7
+ * results, and streaming callbacks.
8
+ *
9
+ * For the service interface, see rac_vlm_service.h.
10
+ */
11
+
12
+ #ifndef RAC_VLM_TYPES_H
13
+ #define RAC_VLM_TYPES_H
14
+
15
+ #include "rac/core/rac_types.h"
16
+
17
+ #ifdef __cplusplus
18
+ extern "C" {
19
+ #endif
20
+
21
+ // =============================================================================
22
+ // CHAT TEMPLATE - Abstraction for VLM prompt formatting
23
+ // =============================================================================
24
+
25
+ /**
26
+ * @brief Known VLM model families for chat template selection
27
+ *
28
+ * Use RAC_VLM_MODEL_FAMILY_AUTO (default) to auto-detect from model metadata.
29
+ * Use RAC_VLM_MODEL_FAMILY_CUSTOM with a custom template string for new models.
30
+ *
31
+ * Verified templates (from official HuggingFace repos):
32
+ * - QWEN2_VL: <|im_start|>system\nYou are a helpful assistant.<|im_end|>\n
33
+ * <|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{prompt}<|im_end|>\n
34
+ * <|im_start|>assistant\n
35
+ * - SMOLVLM: <|im_start|>User: {image}{prompt} \nAssistant:
36
+ * - LLAVA: USER: <image>\n{prompt}\nASSISTANT:
37
+ */
38
+ typedef enum rac_vlm_model_family {
39
+ RAC_VLM_MODEL_FAMILY_AUTO = 0, /**< Auto-detect from model metadata (default) */
40
+ RAC_VLM_MODEL_FAMILY_QWEN2_VL = 1, /**< Qwen2-VL: chatml with <|vision_start|> markers */
41
+ RAC_VLM_MODEL_FAMILY_SMOLVLM = 2, /**< SmolVLM: <|im_start|>User: format */
42
+ RAC_VLM_MODEL_FAMILY_LLAVA = 3, /**< LLaVA/Vicuna: USER:/ASSISTANT: format */
43
+ RAC_VLM_MODEL_FAMILY_CUSTOM = 99, /**< Use custom_chat_template string */
44
+ } rac_vlm_model_family_t;
45
+
46
+ /**
47
+ * @brief Custom chat template for VLM prompt formatting
48
+ *
49
+ * A simple template string with placeholders:
50
+ * {system} - System prompt (optional, can be empty)
51
+ * {image} - Image marker/placeholder
52
+ * {prompt} - User's text prompt
53
+ *
54
+ * Example template string:
55
+ * "<|im_start|>user\n{image}{prompt}<|im_end|>\n<|im_start|>assistant\n"
56
+ *
57
+ * The SDK will replace placeholders at runtime. If {system} is in the template
58
+ * but no system prompt is provided, it uses a default or leaves empty.
59
+ */
60
+ typedef struct rac_vlm_chat_template {
61
+ /**
62
+ * Full template string with {system}, {image}, {prompt} placeholders.
63
+ * Example: "<|im_start|>user\n{image}{prompt}<|im_end|>\n<|im_start|>assistant\n"
64
+ */
65
+ const char* template_str;
66
+
67
+ /**
68
+ * Image marker to insert at {image} placeholder.
69
+ * Examples: "<image>", "<|vision_start|><|image_pad|><|vision_end|>"
70
+ * If NULL, uses the backend's default marker.
71
+ */
72
+ const char* image_marker;
73
+
74
+ /**
75
+ * Default system prompt if {system} is in template but none provided.
76
+ * Can be NULL for no default.
77
+ */
78
+ const char* default_system_prompt;
79
+ } rac_vlm_chat_template_t;
80
+
81
+ /**
82
+ * @brief Get built-in chat template for a model family
83
+ *
84
+ * @param family Model family enum value
85
+ * @return Pointer to static template, or NULL if family not supported
86
+ */
87
+ RAC_API const rac_vlm_chat_template_t* rac_vlm_get_builtin_template(rac_vlm_model_family_t family);
88
+
89
+ // =============================================================================
90
+ // IMAGE INPUT - Supports multiple input formats
91
+ // =============================================================================
92
+
93
+ /**
94
+ * @brief VLM image input format enumeration
95
+ */
96
+ typedef enum rac_vlm_image_format {
97
+ RAC_VLM_IMAGE_FORMAT_FILE_PATH = 0, /**< Path to image file (JPEG, PNG, etc.) */
98
+ RAC_VLM_IMAGE_FORMAT_RGB_PIXELS = 1, /**< Raw RGB pixel buffer (RGBRGBRGB...) */
99
+ RAC_VLM_IMAGE_FORMAT_BASE64 = 2, /**< Base64-encoded image data */
100
+ } rac_vlm_image_format_t;
101
+
102
+ /**
103
+ * @brief VLM image input structure
104
+ *
105
+ * Represents an image input for VLM processing. Supports three formats:
106
+ * - FILE_PATH: Path to an image file on disk
107
+ * - RGB_PIXELS: Raw RGB pixel data with width/height
108
+ * - BASE64: Base64-encoded image data
109
+ */
110
+ typedef struct rac_vlm_image {
111
+ /** Image format type */
112
+ rac_vlm_image_format_t format;
113
+
114
+ /** Path to image file (for FILE_PATH format) */
115
+ const char* file_path;
116
+
117
+ /** Raw RGB pixel data (for RGB_PIXELS format, layout: RGBRGBRGB...) */
118
+ const uint8_t* pixel_data;
119
+
120
+ /** Base64-encoded image data (for BASE64 format) */
121
+ const char* base64_data;
122
+
123
+ /** Image width in pixels (required for RGB_PIXELS, 0 otherwise) */
124
+ uint32_t width;
125
+
126
+ /** Image height in pixels (required for RGB_PIXELS, 0 otherwise) */
127
+ uint32_t height;
128
+
129
+ /** Size of pixel_data or base64_data in bytes */
130
+ size_t data_size;
131
+ } rac_vlm_image_t;
132
+
133
+ // =============================================================================
134
+ // OPTIONS - VLM Generation Options
135
+ // =============================================================================
136
+
137
+ /**
138
+ * @brief VLM generation options
139
+ *
140
+ * Controls text generation behavior for VLM inference.
141
+ * Combines standard LLM options with VLM-specific parameters.
142
+ */
143
+ typedef struct rac_vlm_options {
144
+ // ── Standard Generation Parameters ──
145
+ /** Maximum number of tokens to generate (default: 2048) */
146
+ int32_t max_tokens;
147
+
148
+ /** Temperature for sampling (0.0 - 2.0, default: 0.7) */
149
+ float temperature;
150
+
151
+ /** Top-p sampling parameter (default: 0.9) */
152
+ float top_p;
153
+
154
+ /** Stop sequences (null-terminated array, can be NULL) */
155
+ const char* const* stop_sequences;
156
+
157
+ /** Number of stop sequences */
158
+ size_t num_stop_sequences;
159
+
160
+ /** Enable streaming mode (default: true) */
161
+ rac_bool_t streaming_enabled;
162
+
163
+ /** System prompt (can be NULL, uses template default if available) */
164
+ const char* system_prompt;
165
+
166
+ // ── VLM-Specific Parameters ──
167
+ /** Max image dimension for resize (0 = model default) */
168
+ int32_t max_image_size;
169
+
170
+ /** Number of CPU threads for vision encoder (0 = auto) */
171
+ int32_t n_threads;
172
+
173
+ /** Use GPU for vision encoding */
174
+ rac_bool_t use_gpu;
175
+
176
+ // ── Chat Template Configuration ──
177
+ /**
178
+ * Model family for automatic chat template selection.
179
+ * Set to RAC_VLM_MODEL_FAMILY_AUTO (default) to auto-detect from model metadata.
180
+ * Set to RAC_VLM_MODEL_FAMILY_CUSTOM and provide custom_chat_template for custom templates.
181
+ */
182
+ rac_vlm_model_family_t model_family;
183
+
184
+ /**
185
+ * Custom chat template (only used when model_family == RAC_VLM_MODEL_FAMILY_CUSTOM).
186
+ * If NULL and model_family is CUSTOM, falls back to GENERIC template.
187
+ */
188
+ const rac_vlm_chat_template_t* custom_chat_template;
189
+
190
+ /**
191
+ * Override image marker (can be NULL to use template default).
192
+ * Useful when the default marker doesn't match your model's expectations.
193
+ */
194
+ const char* image_marker_override;
195
+ } rac_vlm_options_t;
196
+
197
+ /**
198
+ * @brief Default VLM generation options
199
+ */
200
+ #define RAC_VLM_OPTIONS_DEFAULT \
201
+ { \
202
+ .max_tokens = 2048, .temperature = 0.7f, .top_p = 0.9f, .stop_sequences = RAC_NULL, \
203
+ .num_stop_sequences = 0, .streaming_enabled = RAC_TRUE, .system_prompt = RAC_NULL, \
204
+ .max_image_size = 0, .n_threads = 0, .use_gpu = RAC_TRUE, \
205
+ .model_family = RAC_VLM_MODEL_FAMILY_AUTO, .custom_chat_template = RAC_NULL, \
206
+ .image_marker_override = RAC_NULL \
207
+ }
208
+
209
+ // =============================================================================
210
+ // CONFIGURATION - VLM Component Configuration
211
+ // =============================================================================
212
+
213
+ /**
214
+ * @brief VLM component configuration
215
+ *
216
+ * Configuration for initializing a VLM component.
217
+ */
218
+ typedef struct rac_vlm_config {
219
+ /** Model ID (optional - uses default if NULL) */
220
+ const char* model_id;
221
+
222
+ /** Preferred framework for generation (use RAC_FRAMEWORK_UNKNOWN for auto) */
223
+ int32_t preferred_framework;
224
+
225
+ /** Context length - max tokens the model can handle (default: 4096) */
226
+ int32_t context_length;
227
+
228
+ /** Temperature for sampling (0.0 - 2.0, default: 0.7) */
229
+ float temperature;
230
+
231
+ /** Maximum tokens to generate (default: 2048) */
232
+ int32_t max_tokens;
233
+
234
+ /** System prompt for generation (can be NULL) */
235
+ const char* system_prompt;
236
+
237
+ /** Enable streaming mode (default: true) */
238
+ rac_bool_t streaming_enabled;
239
+ } rac_vlm_config_t;
240
+
241
+ /**
242
+ * @brief Default VLM configuration
243
+ */
244
+ static const rac_vlm_config_t RAC_VLM_CONFIG_DEFAULT = {.model_id = RAC_NULL,
245
+ .preferred_framework =
246
+ 99, // RAC_FRAMEWORK_UNKNOWN
247
+ .context_length = 4096,
248
+ .temperature = 0.7f,
249
+ .max_tokens = 2048,
250
+ .system_prompt = RAC_NULL,
251
+ .streaming_enabled = RAC_TRUE};
252
+
253
+ // =============================================================================
254
+ // RESULTS - VLM Generation Results
255
+ // =============================================================================
256
+
257
+ /**
258
+ * @brief VLM generation result
259
+ *
260
+ * Contains the generated text and detailed metrics for VLM inference.
261
+ */
262
+ typedef struct rac_vlm_result {
263
+ /** Generated text (owned, must be freed with rac_vlm_result_free) */
264
+ char* text;
265
+
266
+ /** Number of tokens in prompt (including text tokens) */
267
+ int32_t prompt_tokens;
268
+
269
+ /** Number of vision/image tokens specifically */
270
+ int32_t image_tokens;
271
+
272
+ /** Number of tokens generated */
273
+ int32_t completion_tokens;
274
+
275
+ /** Total tokens (prompt + completion) */
276
+ int32_t total_tokens;
277
+
278
+ /** Time to first token in milliseconds */
279
+ int64_t time_to_first_token_ms;
280
+
281
+ /** Time spent encoding the image in milliseconds */
282
+ int64_t image_encode_time_ms;
283
+
284
+ /** Total generation time in milliseconds */
285
+ int64_t total_time_ms;
286
+
287
+ /** Tokens generated per second */
288
+ float tokens_per_second;
289
+ } rac_vlm_result_t;
290
+
291
+ // =============================================================================
292
+ // SERVICE INFO - VLM Service Information
293
+ // =============================================================================
294
+
295
+ /**
296
+ * @brief VLM service handle info
297
+ *
298
+ * Provides information about a VLM service instance.
299
+ */
300
+ typedef struct rac_vlm_info {
301
+ /** Whether the service is ready for generation */
302
+ rac_bool_t is_ready;
303
+
304
+ /** Current model identifier (can be NULL if not loaded) */
305
+ const char* current_model;
306
+
307
+ /** Context length (0 if unknown) */
308
+ int32_t context_length;
309
+
310
+ /** Whether streaming is supported */
311
+ rac_bool_t supports_streaming;
312
+
313
+ /** Whether multiple images per request are supported */
314
+ rac_bool_t supports_multiple_images;
315
+
316
+ /** Vision encoder type ("clip", "siglip", "fastvithd", etc.) */
317
+ const char* vision_encoder_type;
318
+ } rac_vlm_info_t;
319
+
320
+ // =============================================================================
321
+ // CALLBACKS - Streaming Callbacks
322
+ // =============================================================================
323
+
324
+ /**
325
+ * @brief Simple VLM streaming callback
326
+ *
327
+ * Called for each generated token during streaming.
328
+ *
329
+ * @param token The generated token string
330
+ * @param user_data User-provided context
331
+ * @return RAC_TRUE to continue, RAC_FALSE to stop generation
332
+ */
333
+ typedef rac_bool_t (*rac_vlm_stream_callback_fn)(const char* token, void* user_data);
334
+
335
+ /**
336
+ * @brief Extended token event structure
337
+ *
338
+ * Provides detailed information about each token during streaming.
339
+ */
340
+ typedef struct rac_vlm_token_event {
341
+ /** The generated token text */
342
+ const char* token;
343
+
344
+ /** Token index in the sequence */
345
+ int32_t token_index;
346
+
347
+ /** Is this the final token? */
348
+ rac_bool_t is_final;
349
+
350
+ /** Tokens generated per second so far */
351
+ float tokens_per_second;
352
+ } rac_vlm_token_event_t;
353
+
354
+ /**
355
+ * @brief Extended streaming callback with token event details
356
+ *
357
+ * @param event Token event details
358
+ * @param user_data User-provided context
359
+ * @return RAC_TRUE to continue, RAC_FALSE to stop generation
360
+ */
361
+ typedef rac_bool_t (*rac_vlm_token_event_callback_fn)(const rac_vlm_token_event_t* event,
362
+ void* user_data);
363
+
364
+ // =============================================================================
365
+ // COMPONENT CALLBACKS - For component-level streaming
366
+ // =============================================================================
367
+
368
+ /**
369
+ * @brief VLM component token callback
370
+ *
371
+ * @param token The generated token
372
+ * @param user_data User-provided context
373
+ * @return RAC_TRUE to continue, RAC_FALSE to stop
374
+ */
375
+ typedef rac_bool_t (*rac_vlm_component_token_callback_fn)(const char* token, void* user_data);
376
+
377
+ /**
378
+ * @brief VLM component completion callback
379
+ *
380
+ * Called when streaming is complete with final result.
381
+ *
382
+ * @param result Final generation result with metrics
383
+ * @param user_data User-provided context
384
+ */
385
+ typedef void (*rac_vlm_component_complete_callback_fn)(const rac_vlm_result_t* result,
386
+ void* user_data);
387
+
388
+ /**
389
+ * @brief VLM component error callback
390
+ *
391
+ * Called if streaming fails.
392
+ *
393
+ * @param error_code Error code
394
+ * @param error_message Error message
395
+ * @param user_data User-provided context
396
+ */
397
+ typedef void (*rac_vlm_component_error_callback_fn)(rac_result_t error_code,
398
+ const char* error_message, void* user_data);
399
+
400
+ // =============================================================================
401
+ // MEMORY MANAGEMENT
402
+ // =============================================================================
403
+
404
+ /**
405
+ * @brief Free VLM result resources
406
+ *
407
+ * Frees the text and any other owned resources in the result.
408
+ *
409
+ * @param result Result to free (can be NULL)
410
+ */
411
+ RAC_API void rac_vlm_result_free(rac_vlm_result_t* result);
412
+
413
+ #ifdef __cplusplus
414
+ }
415
+ #endif
416
+
417
+ #endif /* RAC_VLM_TYPES_H */
@@ -86,6 +86,21 @@ RAC_API rac_result_t rac_model_registry_save(rac_model_registry_handle_t handle,
86
86
  RAC_API rac_result_t rac_model_registry_get(rac_model_registry_handle_t handle,
87
87
  const char* model_id, rac_model_info_t** out_model);
88
88
 
89
+ /**
90
+ * @brief Get model metadata by local path.
91
+ *
92
+ * Searches through all registered models and returns the one with matching local_path.
93
+ * This is useful when loading models by path instead of model_id.
94
+ *
95
+ * @param handle Registry handle
96
+ * @param local_path Local path to search for
97
+ * @param out_model Output: Model info (owned, must be freed with rac_model_info_free)
98
+ * @return RAC_SUCCESS, RAC_ERROR_NOT_FOUND, or other error code
99
+ */
100
+ RAC_API rac_result_t rac_model_registry_get_by_path(rac_model_registry_handle_t handle,
101
+ const char* local_path,
102
+ rac_model_info_t** out_model);
103
+
89
104
  /**
90
105
  * @brief Load all stored models.
91
106
  *
@@ -163,6 +163,7 @@ typedef enum rac_model_format {
163
163
  RAC_MODEL_FORMAT_ORT = 1, /**< ONNX Runtime format */
164
164
  RAC_MODEL_FORMAT_GGUF = 2, /**< GGUF format (llama.cpp) */
165
165
  RAC_MODEL_FORMAT_BIN = 3, /**< Binary format */
166
+ RAC_MODEL_FORMAT_COREML = 4, /**< Core ML format (.mlmodelc, .mlpackage) */
166
167
  RAC_MODEL_FORMAT_UNKNOWN = 99 /**< Unknown format */
167
168
  } rac_model_format_t;
168
169
 
@@ -182,6 +183,8 @@ typedef enum rac_inference_framework {
182
183
  RAC_FRAMEWORK_FLUID_AUDIO = 4, /**< FluidAudio */
183
184
  RAC_FRAMEWORK_BUILTIN = 5, /**< Built-in (e.g., energy VAD) */
184
185
  RAC_FRAMEWORK_NONE = 6, /**< No framework needed */
186
+ RAC_FRAMEWORK_MLX = 7, /**< MLX C++ (Apple Silicon VLM) */
187
+ RAC_FRAMEWORK_COREML = 8, /**< Core ML (Apple Neural Engine) */
185
188
  RAC_FRAMEWORK_UNKNOWN = 99 /**< Unknown framework */
186
189
  } rac_inference_framework_t;
187
190
 
@@ -0,0 +1,215 @@
1
+ /**
2
+ * @file rac_image_utils.h
3
+ * @brief RunAnywhere Commons - Image Utilities
4
+ *
5
+ * Image loading and processing utilities for VLM backends.
6
+ * Supports loading from file paths, decoding base64, and resizing.
7
+ */
8
+
9
+ #ifndef RAC_IMAGE_UTILS_H
10
+ #define RAC_IMAGE_UTILS_H
11
+
12
+ #include "rac/core/rac_error.h"
13
+ #include "rac/core/rac_types.h"
14
+
15
+ #ifdef __cplusplus
16
+ extern "C" {
17
+ #endif
18
+
19
+ // =============================================================================
20
+ // IMAGE DATA STRUCTURES
21
+ // =============================================================================
22
+
23
+ /**
24
+ * @brief Loaded image data
25
+ *
26
+ * Contains RGB pixel data after loading an image.
27
+ * Must be freed with rac_image_free().
28
+ */
29
+ typedef struct rac_image_data {
30
+ /** Raw RGB pixel data (RGBRGBRGB...) */
31
+ uint8_t* pixels;
32
+
33
+ /** Image width in pixels */
34
+ int32_t width;
35
+
36
+ /** Image height in pixels */
37
+ int32_t height;
38
+
39
+ /** Number of channels (3 for RGB) */
40
+ int32_t channels;
41
+
42
+ /** Total size in bytes (width * height * channels) */
43
+ size_t size;
44
+ } rac_image_data_t;
45
+
46
+ /**
47
+ * @brief Normalized float image data
48
+ *
49
+ * Contains normalized float32 pixel data (values in [-1, 1] or [0, 1]).
50
+ * Used by vision encoders.
51
+ */
52
+ typedef struct rac_image_float {
53
+ /** Normalized float pixel data */
54
+ float* pixels;
55
+
56
+ /** Image width in pixels */
57
+ int32_t width;
58
+
59
+ /** Image height in pixels */
60
+ int32_t height;
61
+
62
+ /** Number of channels (3 for RGB) */
63
+ int32_t channels;
64
+
65
+ /** Total number of floats (width * height * channels) */
66
+ size_t count;
67
+ } rac_image_float_t;
68
+
69
+ // =============================================================================
70
+ // IMAGE LOADING
71
+ // =============================================================================
72
+
73
+ /**
74
+ * @brief Load an image from a file path
75
+ *
76
+ * Supports JPEG, PNG, BMP, GIF, and other common formats via stb_image.
77
+ * Output is always RGB (3 channels).
78
+ *
79
+ * @param file_path Path to the image file
80
+ * @param out_image Output: Loaded image data (must be freed with rac_image_free)
81
+ * @return RAC_SUCCESS or error code
82
+ */
83
+ RAC_API rac_result_t rac_image_load_file(const char* file_path, rac_image_data_t* out_image);
84
+
85
+ /**
86
+ * @brief Decode a base64-encoded image
87
+ *
88
+ * Decodes base64 data and loads the image.
89
+ * Supports the same formats as rac_image_load_file.
90
+ *
91
+ * @param base64_data Base64-encoded image data
92
+ * @param data_size Length of the base64 string
93
+ * @param out_image Output: Loaded image data (must be freed with rac_image_free)
94
+ * @return RAC_SUCCESS or error code
95
+ */
96
+ RAC_API rac_result_t rac_image_decode_base64(const char* base64_data, size_t data_size,
97
+ rac_image_data_t* out_image);
98
+
99
+ /**
100
+ * @brief Decode image from raw bytes
101
+ *
102
+ * Decodes an image from raw bytes (e.g., from network response).
103
+ *
104
+ * @param data Raw image data (JPEG, PNG, etc.)
105
+ * @param data_size Size of the data in bytes
106
+ * @param out_image Output: Loaded image data (must be freed with rac_image_free)
107
+ * @return RAC_SUCCESS or error code
108
+ */
109
+ RAC_API rac_result_t rac_image_decode_bytes(const uint8_t* data, size_t data_size,
110
+ rac_image_data_t* out_image);
111
+
112
+ // =============================================================================
113
+ // IMAGE PROCESSING
114
+ // =============================================================================
115
+
116
+ /**
117
+ * @brief Resize an image
118
+ *
119
+ * Resizes the image to the specified dimensions using bilinear interpolation.
120
+ *
121
+ * @param image Input image
122
+ * @param new_width Target width
123
+ * @param new_height Target height
124
+ * @param out_image Output: Resized image (must be freed with rac_image_free)
125
+ * @return RAC_SUCCESS or error code
126
+ */
127
+ RAC_API rac_result_t rac_image_resize(const rac_image_data_t* image, int32_t new_width,
128
+ int32_t new_height, rac_image_data_t* out_image);
129
+
130
+ /**
131
+ * @brief Resize an image maintaining aspect ratio
132
+ *
133
+ * Resizes the image so that the longest dimension equals max_size.
134
+ * Aspect ratio is preserved.
135
+ *
136
+ * @param image Input image
137
+ * @param max_size Maximum dimension (width or height)
138
+ * @param out_image Output: Resized image (must be freed with rac_image_free)
139
+ * @return RAC_SUCCESS or error code
140
+ */
141
+ RAC_API rac_result_t rac_image_resize_max(const rac_image_data_t* image, int32_t max_size,
142
+ rac_image_data_t* out_image);
143
+
144
+ /**
145
+ * @brief Normalize image to float values
146
+ *
147
+ * Converts uint8 pixels to float32 with optional mean/std normalization.
148
+ * Commonly used for vision encoders (CLIP, SigLIP, etc.).
149
+ *
150
+ * Formula: pixel_normalized = (pixel / 255.0 - mean) / std
151
+ *
152
+ * @param image Input image
153
+ * @param mean Per-channel mean values (array of 3 floats, or NULL for [0,0,0])
154
+ * @param std Per-channel std values (array of 3 floats, or NULL for [1,1,1])
155
+ * @param out_float Output: Normalized float image (must be freed with rac_image_float_free)
156
+ * @return RAC_SUCCESS or error code
157
+ */
158
+ RAC_API rac_result_t rac_image_normalize(const rac_image_data_t* image, const float* mean,
159
+ const float* std, rac_image_float_t* out_float);
160
+
161
+ /**
162
+ * @brief Convert RGB to CHW format
163
+ *
164
+ * Converts from HWC (Height, Width, Channels) to CHW format.
165
+ * Many neural networks expect CHW format.
166
+ *
167
+ * @param image Input float image in HWC format
168
+ * @param out_chw Output: Float image in CHW format (must be freed with rac_image_float_free)
169
+ * @return RAC_SUCCESS or error code
170
+ */
171
+ RAC_API rac_result_t rac_image_to_chw(const rac_image_float_t* image, rac_image_float_t* out_chw);
172
+
173
+ // =============================================================================
174
+ // MEMORY MANAGEMENT
175
+ // =============================================================================
176
+
177
+ /**
178
+ * @brief Free image data
179
+ *
180
+ * Frees the pixel data allocated by image loading functions.
181
+ *
182
+ * @param image Image to free (can be NULL)
183
+ */
184
+ RAC_API void rac_image_free(rac_image_data_t* image);
185
+
186
+ /**
187
+ * @brief Free float image data
188
+ *
189
+ * Frees the pixel data allocated by normalization functions.
190
+ *
191
+ * @param image Float image to free (can be NULL)
192
+ */
193
+ RAC_API void rac_image_float_free(rac_image_float_t* image);
194
+
195
+ // =============================================================================
196
+ // UTILITY FUNCTIONS
197
+ // =============================================================================
198
+
199
+ /**
200
+ * @brief Calculate resized dimensions maintaining aspect ratio
201
+ *
202
+ * @param width Original width
203
+ * @param height Original height
204
+ * @param max_size Maximum dimension
205
+ * @param out_width Output: New width
206
+ * @param out_height Output: New height
207
+ */
208
+ RAC_API void rac_image_calc_resize(int32_t width, int32_t height, int32_t max_size,
209
+ int32_t* out_width, int32_t* out_height);
210
+
211
+ #ifdef __cplusplus
212
+ }
213
+ #endif
214
+
215
+ #endif /* RAC_IMAGE_UTILS_H */