@runanywhere/core 0.17.7 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +218 -2
- package/RunAnywhereCore.podspec +1 -0
- package/android/CMakeLists.txt +24 -2
- package/android/build.gradle +61 -9
- package/android/src/main/cpp/cpp-adapter.cpp +51 -3
- package/android/src/main/include/rac/backends/rac_vlm_llamacpp.h +216 -0
- package/android/src/main/include/rac/core/capabilities/rac_lifecycle.h +3 -1
- package/android/src/main/include/rac/core/rac_core.h +11 -0
- package/android/src/main/include/rac/core/rac_types.h +8 -6
- package/android/src/main/include/rac/features/diffusion/rac_diffusion.h +22 -0
- package/android/src/main/include/rac/features/diffusion/rac_diffusion_component.h +263 -0
- package/android/src/main/include/rac/features/diffusion/rac_diffusion_model_registry.h +358 -0
- package/android/src/main/include/rac/features/diffusion/rac_diffusion_service.h +187 -0
- package/android/src/main/include/rac/features/diffusion/rac_diffusion_tokenizer.h +167 -0
- package/android/src/main/include/rac/features/diffusion/rac_diffusion_types.h +454 -0
- package/android/src/main/include/rac/features/llm/rac_tool_calling.h +373 -0
- package/android/src/main/include/rac/features/platform/rac_diffusion_platform.h +305 -0
- package/android/src/main/include/rac/features/vad/rac_vad_energy.h +1 -1
- package/android/src/main/include/rac/features/vlm/rac_vlm.h +16 -0
- package/android/src/main/include/rac/features/vlm/rac_vlm_component.h +168 -0
- package/android/src/main/include/rac/features/vlm/rac_vlm_service.h +206 -0
- package/android/src/main/include/rac/features/vlm/rac_vlm_types.h +417 -0
- package/android/src/main/include/rac/infrastructure/model_management/rac_model_registry.h +15 -0
- package/android/src/main/include/rac/infrastructure/model_management/rac_model_types.h +3 -0
- package/android/src/main/include/rac/utils/rac_image_utils.h +215 -0
- package/android/src/main/java/com/margelo/nitro/runanywhere/PlatformAdapterBridge.kt +201 -1
- package/android/src/main/jniLibs/arm64-v8a/libc++_shared.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libomp.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librac_commons.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librunanywhere_jni.so +0 -0
- package/android/src/main/jniLibs/x86_64/libc++_shared.so +0 -0
- package/android/src/main/jniLibs/x86_64/libomp.so +0 -0
- package/android/src/main/jniLibs/x86_64/librac_commons.so +0 -0
- package/android/src/main/jniLibs/x86_64/librunanywhere_jni.so +0 -0
- package/cpp/HybridRunAnywhereCore.cpp +259 -160
- package/cpp/HybridRunAnywhereCore.hpp +11 -0
- package/cpp/bridges/InitBridge.cpp +234 -3
- package/cpp/bridges/PlatformDownloadBridge.h +44 -0
- package/cpp/bridges/ToolCallingBridge.cpp +188 -0
- package/cpp/bridges/ToolCallingBridge.hpp +98 -0
- package/cpp/third_party/nlohmann/json.hpp +24765 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/RACommons.h +18 -4
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_core.h +11 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion.h +22 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion_component.h +263 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion_model_registry.h +358 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion_platform.h +305 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion_service.h +187 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion_tokenizer.h +167 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_diffusion_types.h +454 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_endpoints.h +3 -17
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_image_utils.h +215 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_lifecycle.h +3 -1
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_model_assignment.h +4 -20
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_model_registry.h +15 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_model_types.h +3 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_tool_calling.h +373 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_types.h +8 -6
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_vad_energy.h +1 -1
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_vlm.h +16 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_vlm_component.h +168 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_vlm_llamacpp.h +216 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_vlm_service.h +206 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/Headers/rac_vlm_types.h +417 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64/RACommons.framework/RACommons +0 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/RACommons.h +18 -4
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_core.h +11 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion.h +22 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion_component.h +263 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion_model_registry.h +358 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion_platform.h +305 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion_service.h +187 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion_tokenizer.h +167 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_diffusion_types.h +454 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_endpoints.h +3 -17
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_image_utils.h +215 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_lifecycle.h +3 -1
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_model_assignment.h +4 -20
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_model_registry.h +15 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_model_types.h +3 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_tool_calling.h +373 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_types.h +8 -6
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_vad_energy.h +1 -1
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_vlm.h +16 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_vlm_component.h +168 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_vlm_llamacpp.h +216 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_vlm_service.h +206 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/Headers/rac_vlm_types.h +417 -0
- package/ios/Binaries/RACommons.xcframework/ios-arm64_x86_64-simulator/RACommons.framework/RACommons +0 -0
- package/ios/PlatformAdapterBridge.h +24 -1
- package/ios/PlatformAdapterBridge.m +243 -0
- package/nitrogen/generated/shared/c++/HybridRunAnywhereCoreSpec.cpp +4 -0
- package/nitrogen/generated/shared/c++/HybridRunAnywhereCoreSpec.hpp +4 -0
- package/package.json +8 -4
- package/src/Foundation/Security/SecureStorageService.ts +12 -6
- package/src/Public/Extensions/RunAnywhere+Models.ts +5 -3
- package/src/Public/Extensions/RunAnywhere+STT.ts +7 -2
- package/src/Public/Extensions/RunAnywhere+ToolCalling.ts +472 -0
- package/src/Public/Extensions/index.ts +16 -0
- package/src/Public/RunAnywhere.ts +18 -0
- package/src/index.ts +0 -1
- package/src/services/Network/index.ts +0 -1
- package/src/services/index.ts +0 -1
- package/src/specs/RunAnywhereCore.nitro.ts +72 -0
- package/src/types/ToolCallingTypes.ts +198 -0
- package/src/types/index.ts +13 -0
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file rac_vlm_types.h
|
|
3
|
+
* @brief RunAnywhere Commons - VLM Types and Data Structures
|
|
4
|
+
*
|
|
5
|
+
* Defines data structures for Vision Language Model (VLM) operations.
|
|
6
|
+
* Supports image input (file path, RGB pixels, base64), generation options,
|
|
7
|
+
* results, and streaming callbacks.
|
|
8
|
+
*
|
|
9
|
+
* For the service interface, see rac_vlm_service.h.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#ifndef RAC_VLM_TYPES_H
|
|
13
|
+
#define RAC_VLM_TYPES_H
|
|
14
|
+
|
|
15
|
+
#include "rac/core/rac_types.h"
|
|
16
|
+
|
|
17
|
+
#ifdef __cplusplus
|
|
18
|
+
extern "C" {
|
|
19
|
+
#endif
|
|
20
|
+
|
|
21
|
+
// =============================================================================
|
|
22
|
+
// CHAT TEMPLATE - Abstraction for VLM prompt formatting
|
|
23
|
+
// =============================================================================
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* @brief Known VLM model families for chat template selection
|
|
27
|
+
*
|
|
28
|
+
* Use RAC_VLM_MODEL_FAMILY_AUTO (default) to auto-detect from model metadata.
|
|
29
|
+
* Use RAC_VLM_MODEL_FAMILY_CUSTOM with a custom template string for new models.
|
|
30
|
+
*
|
|
31
|
+
* Verified templates (from official HuggingFace repos):
|
|
32
|
+
* - QWEN2_VL: <|im_start|>system\nYou are a helpful assistant.<|im_end|>\n
|
|
33
|
+
* <|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{prompt}<|im_end|>\n
|
|
34
|
+
* <|im_start|>assistant\n
|
|
35
|
+
* - SMOLVLM: <|im_start|>User: {image}{prompt} \nAssistant:
|
|
36
|
+
* - LLAVA: USER: <image>\n{prompt}\nASSISTANT:
|
|
37
|
+
*/
|
|
38
|
+
typedef enum rac_vlm_model_family {
|
|
39
|
+
RAC_VLM_MODEL_FAMILY_AUTO = 0, /**< Auto-detect from model metadata (default) */
|
|
40
|
+
RAC_VLM_MODEL_FAMILY_QWEN2_VL = 1, /**< Qwen2-VL: chatml with <|vision_start|> markers */
|
|
41
|
+
RAC_VLM_MODEL_FAMILY_SMOLVLM = 2, /**< SmolVLM: <|im_start|>User: format */
|
|
42
|
+
RAC_VLM_MODEL_FAMILY_LLAVA = 3, /**< LLaVA/Vicuna: USER:/ASSISTANT: format */
|
|
43
|
+
RAC_VLM_MODEL_FAMILY_CUSTOM = 99, /**< Use custom_chat_template string */
|
|
44
|
+
} rac_vlm_model_family_t;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* @brief Custom chat template for VLM prompt formatting
|
|
48
|
+
*
|
|
49
|
+
* A simple template string with placeholders:
|
|
50
|
+
* {system} - System prompt (optional, can be empty)
|
|
51
|
+
* {image} - Image marker/placeholder
|
|
52
|
+
* {prompt} - User's text prompt
|
|
53
|
+
*
|
|
54
|
+
* Example template string:
|
|
55
|
+
* "<|im_start|>user\n{image}{prompt}<|im_end|>\n<|im_start|>assistant\n"
|
|
56
|
+
*
|
|
57
|
+
* The SDK will replace placeholders at runtime. If {system} is in the template
|
|
58
|
+
* but no system prompt is provided, it uses a default or leaves empty.
|
|
59
|
+
*/
|
|
60
|
+
typedef struct rac_vlm_chat_template {
|
|
61
|
+
/**
|
|
62
|
+
* Full template string with {system}, {image}, {prompt} placeholders.
|
|
63
|
+
* Example: "<|im_start|>user\n{image}{prompt}<|im_end|>\n<|im_start|>assistant\n"
|
|
64
|
+
*/
|
|
65
|
+
const char* template_str;
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Image marker to insert at {image} placeholder.
|
|
69
|
+
* Examples: "<image>", "<|vision_start|><|image_pad|><|vision_end|>"
|
|
70
|
+
* If NULL, uses the backend's default marker.
|
|
71
|
+
*/
|
|
72
|
+
const char* image_marker;
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Default system prompt if {system} is in template but none provided.
|
|
76
|
+
* Can be NULL for no default.
|
|
77
|
+
*/
|
|
78
|
+
const char* default_system_prompt;
|
|
79
|
+
} rac_vlm_chat_template_t;
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* @brief Get built-in chat template for a model family
|
|
83
|
+
*
|
|
84
|
+
* @param family Model family enum value
|
|
85
|
+
* @return Pointer to static template, or NULL if family not supported
|
|
86
|
+
*/
|
|
87
|
+
RAC_API const rac_vlm_chat_template_t* rac_vlm_get_builtin_template(rac_vlm_model_family_t family);
|
|
88
|
+
|
|
89
|
+
// =============================================================================
|
|
90
|
+
// IMAGE INPUT - Supports multiple input formats
|
|
91
|
+
// =============================================================================
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* @brief VLM image input format enumeration
|
|
95
|
+
*/
|
|
96
|
+
typedef enum rac_vlm_image_format {
|
|
97
|
+
RAC_VLM_IMAGE_FORMAT_FILE_PATH = 0, /**< Path to image file (JPEG, PNG, etc.) */
|
|
98
|
+
RAC_VLM_IMAGE_FORMAT_RGB_PIXELS = 1, /**< Raw RGB pixel buffer (RGBRGBRGB...) */
|
|
99
|
+
RAC_VLM_IMAGE_FORMAT_BASE64 = 2, /**< Base64-encoded image data */
|
|
100
|
+
} rac_vlm_image_format_t;
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* @brief VLM image input structure
|
|
104
|
+
*
|
|
105
|
+
* Represents an image input for VLM processing. Supports three formats:
|
|
106
|
+
* - FILE_PATH: Path to an image file on disk
|
|
107
|
+
* - RGB_PIXELS: Raw RGB pixel data with width/height
|
|
108
|
+
* - BASE64: Base64-encoded image data
|
|
109
|
+
*/
|
|
110
|
+
typedef struct rac_vlm_image {
|
|
111
|
+
/** Image format type */
|
|
112
|
+
rac_vlm_image_format_t format;
|
|
113
|
+
|
|
114
|
+
/** Path to image file (for FILE_PATH format) */
|
|
115
|
+
const char* file_path;
|
|
116
|
+
|
|
117
|
+
/** Raw RGB pixel data (for RGB_PIXELS format, layout: RGBRGBRGB...) */
|
|
118
|
+
const uint8_t* pixel_data;
|
|
119
|
+
|
|
120
|
+
/** Base64-encoded image data (for BASE64 format) */
|
|
121
|
+
const char* base64_data;
|
|
122
|
+
|
|
123
|
+
/** Image width in pixels (required for RGB_PIXELS, 0 otherwise) */
|
|
124
|
+
uint32_t width;
|
|
125
|
+
|
|
126
|
+
/** Image height in pixels (required for RGB_PIXELS, 0 otherwise) */
|
|
127
|
+
uint32_t height;
|
|
128
|
+
|
|
129
|
+
/** Size of pixel_data or base64_data in bytes */
|
|
130
|
+
size_t data_size;
|
|
131
|
+
} rac_vlm_image_t;
|
|
132
|
+
|
|
133
|
+
// =============================================================================
|
|
134
|
+
// OPTIONS - VLM Generation Options
|
|
135
|
+
// =============================================================================
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* @brief VLM generation options
|
|
139
|
+
*
|
|
140
|
+
* Controls text generation behavior for VLM inference.
|
|
141
|
+
* Combines standard LLM options with VLM-specific parameters.
|
|
142
|
+
*/
|
|
143
|
+
typedef struct rac_vlm_options {
|
|
144
|
+
// ── Standard Generation Parameters ──
|
|
145
|
+
/** Maximum number of tokens to generate (default: 2048) */
|
|
146
|
+
int32_t max_tokens;
|
|
147
|
+
|
|
148
|
+
/** Temperature for sampling (0.0 - 2.0, default: 0.7) */
|
|
149
|
+
float temperature;
|
|
150
|
+
|
|
151
|
+
/** Top-p sampling parameter (default: 0.9) */
|
|
152
|
+
float top_p;
|
|
153
|
+
|
|
154
|
+
/** Stop sequences (null-terminated array, can be NULL) */
|
|
155
|
+
const char* const* stop_sequences;
|
|
156
|
+
|
|
157
|
+
/** Number of stop sequences */
|
|
158
|
+
size_t num_stop_sequences;
|
|
159
|
+
|
|
160
|
+
/** Enable streaming mode (default: true) */
|
|
161
|
+
rac_bool_t streaming_enabled;
|
|
162
|
+
|
|
163
|
+
/** System prompt (can be NULL, uses template default if available) */
|
|
164
|
+
const char* system_prompt;
|
|
165
|
+
|
|
166
|
+
// ── VLM-Specific Parameters ──
|
|
167
|
+
/** Max image dimension for resize (0 = model default) */
|
|
168
|
+
int32_t max_image_size;
|
|
169
|
+
|
|
170
|
+
/** Number of CPU threads for vision encoder (0 = auto) */
|
|
171
|
+
int32_t n_threads;
|
|
172
|
+
|
|
173
|
+
/** Use GPU for vision encoding */
|
|
174
|
+
rac_bool_t use_gpu;
|
|
175
|
+
|
|
176
|
+
// ── Chat Template Configuration ──
|
|
177
|
+
/**
|
|
178
|
+
* Model family for automatic chat template selection.
|
|
179
|
+
* Set to RAC_VLM_MODEL_FAMILY_AUTO (default) to auto-detect from model metadata.
|
|
180
|
+
* Set to RAC_VLM_MODEL_FAMILY_CUSTOM and provide custom_chat_template for custom templates.
|
|
181
|
+
*/
|
|
182
|
+
rac_vlm_model_family_t model_family;
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Custom chat template (only used when model_family == RAC_VLM_MODEL_FAMILY_CUSTOM).
|
|
186
|
+
* If NULL and model_family is CUSTOM, falls back to GENERIC template.
|
|
187
|
+
*/
|
|
188
|
+
const rac_vlm_chat_template_t* custom_chat_template;
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Override image marker (can be NULL to use template default).
|
|
192
|
+
* Useful when the default marker doesn't match your model's expectations.
|
|
193
|
+
*/
|
|
194
|
+
const char* image_marker_override;
|
|
195
|
+
} rac_vlm_options_t;
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* @brief Default VLM generation options
|
|
199
|
+
*/
|
|
200
|
+
#define RAC_VLM_OPTIONS_DEFAULT \
|
|
201
|
+
{ \
|
|
202
|
+
.max_tokens = 2048, .temperature = 0.7f, .top_p = 0.9f, .stop_sequences = RAC_NULL, \
|
|
203
|
+
.num_stop_sequences = 0, .streaming_enabled = RAC_TRUE, .system_prompt = RAC_NULL, \
|
|
204
|
+
.max_image_size = 0, .n_threads = 0, .use_gpu = RAC_TRUE, \
|
|
205
|
+
.model_family = RAC_VLM_MODEL_FAMILY_AUTO, .custom_chat_template = RAC_NULL, \
|
|
206
|
+
.image_marker_override = RAC_NULL \
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// =============================================================================
|
|
210
|
+
// CONFIGURATION - VLM Component Configuration
|
|
211
|
+
// =============================================================================
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* @brief VLM component configuration
|
|
215
|
+
*
|
|
216
|
+
* Configuration for initializing a VLM component.
|
|
217
|
+
*/
|
|
218
|
+
typedef struct rac_vlm_config {
|
|
219
|
+
/** Model ID (optional - uses default if NULL) */
|
|
220
|
+
const char* model_id;
|
|
221
|
+
|
|
222
|
+
/** Preferred framework for generation (use RAC_FRAMEWORK_UNKNOWN for auto) */
|
|
223
|
+
int32_t preferred_framework;
|
|
224
|
+
|
|
225
|
+
/** Context length - max tokens the model can handle (default: 4096) */
|
|
226
|
+
int32_t context_length;
|
|
227
|
+
|
|
228
|
+
/** Temperature for sampling (0.0 - 2.0, default: 0.7) */
|
|
229
|
+
float temperature;
|
|
230
|
+
|
|
231
|
+
/** Maximum tokens to generate (default: 2048) */
|
|
232
|
+
int32_t max_tokens;
|
|
233
|
+
|
|
234
|
+
/** System prompt for generation (can be NULL) */
|
|
235
|
+
const char* system_prompt;
|
|
236
|
+
|
|
237
|
+
/** Enable streaming mode (default: true) */
|
|
238
|
+
rac_bool_t streaming_enabled;
|
|
239
|
+
} rac_vlm_config_t;
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* @brief Default VLM configuration
|
|
243
|
+
*/
|
|
244
|
+
static const rac_vlm_config_t RAC_VLM_CONFIG_DEFAULT = {.model_id = RAC_NULL,
|
|
245
|
+
.preferred_framework =
|
|
246
|
+
99, // RAC_FRAMEWORK_UNKNOWN
|
|
247
|
+
.context_length = 4096,
|
|
248
|
+
.temperature = 0.7f,
|
|
249
|
+
.max_tokens = 2048,
|
|
250
|
+
.system_prompt = RAC_NULL,
|
|
251
|
+
.streaming_enabled = RAC_TRUE};
|
|
252
|
+
|
|
253
|
+
// =============================================================================
|
|
254
|
+
// RESULTS - VLM Generation Results
|
|
255
|
+
// =============================================================================
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* @brief VLM generation result
|
|
259
|
+
*
|
|
260
|
+
* Contains the generated text and detailed metrics for VLM inference.
|
|
261
|
+
*/
|
|
262
|
+
typedef struct rac_vlm_result {
|
|
263
|
+
/** Generated text (owned, must be freed with rac_vlm_result_free) */
|
|
264
|
+
char* text;
|
|
265
|
+
|
|
266
|
+
/** Number of tokens in prompt (including text tokens) */
|
|
267
|
+
int32_t prompt_tokens;
|
|
268
|
+
|
|
269
|
+
/** Number of vision/image tokens specifically */
|
|
270
|
+
int32_t image_tokens;
|
|
271
|
+
|
|
272
|
+
/** Number of tokens generated */
|
|
273
|
+
int32_t completion_tokens;
|
|
274
|
+
|
|
275
|
+
/** Total tokens (prompt + completion) */
|
|
276
|
+
int32_t total_tokens;
|
|
277
|
+
|
|
278
|
+
/** Time to first token in milliseconds */
|
|
279
|
+
int64_t time_to_first_token_ms;
|
|
280
|
+
|
|
281
|
+
/** Time spent encoding the image in milliseconds */
|
|
282
|
+
int64_t image_encode_time_ms;
|
|
283
|
+
|
|
284
|
+
/** Total generation time in milliseconds */
|
|
285
|
+
int64_t total_time_ms;
|
|
286
|
+
|
|
287
|
+
/** Tokens generated per second */
|
|
288
|
+
float tokens_per_second;
|
|
289
|
+
} rac_vlm_result_t;
|
|
290
|
+
|
|
291
|
+
// =============================================================================
|
|
292
|
+
// SERVICE INFO - VLM Service Information
|
|
293
|
+
// =============================================================================
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* @brief VLM service handle info
|
|
297
|
+
*
|
|
298
|
+
* Provides information about a VLM service instance.
|
|
299
|
+
*/
|
|
300
|
+
typedef struct rac_vlm_info {
|
|
301
|
+
/** Whether the service is ready for generation */
|
|
302
|
+
rac_bool_t is_ready;
|
|
303
|
+
|
|
304
|
+
/** Current model identifier (can be NULL if not loaded) */
|
|
305
|
+
const char* current_model;
|
|
306
|
+
|
|
307
|
+
/** Context length (0 if unknown) */
|
|
308
|
+
int32_t context_length;
|
|
309
|
+
|
|
310
|
+
/** Whether streaming is supported */
|
|
311
|
+
rac_bool_t supports_streaming;
|
|
312
|
+
|
|
313
|
+
/** Whether multiple images per request are supported */
|
|
314
|
+
rac_bool_t supports_multiple_images;
|
|
315
|
+
|
|
316
|
+
/** Vision encoder type ("clip", "siglip", "fastvithd", etc.) */
|
|
317
|
+
const char* vision_encoder_type;
|
|
318
|
+
} rac_vlm_info_t;
|
|
319
|
+
|
|
320
|
+
// =============================================================================
|
|
321
|
+
// CALLBACKS - Streaming Callbacks
|
|
322
|
+
// =============================================================================
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* @brief Simple VLM streaming callback
|
|
326
|
+
*
|
|
327
|
+
* Called for each generated token during streaming.
|
|
328
|
+
*
|
|
329
|
+
* @param token The generated token string
|
|
330
|
+
* @param user_data User-provided context
|
|
331
|
+
* @return RAC_TRUE to continue, RAC_FALSE to stop generation
|
|
332
|
+
*/
|
|
333
|
+
typedef rac_bool_t (*rac_vlm_stream_callback_fn)(const char* token, void* user_data);
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* @brief Extended token event structure
|
|
337
|
+
*
|
|
338
|
+
* Provides detailed information about each token during streaming.
|
|
339
|
+
*/
|
|
340
|
+
typedef struct rac_vlm_token_event {
|
|
341
|
+
/** The generated token text */
|
|
342
|
+
const char* token;
|
|
343
|
+
|
|
344
|
+
/** Token index in the sequence */
|
|
345
|
+
int32_t token_index;
|
|
346
|
+
|
|
347
|
+
/** Is this the final token? */
|
|
348
|
+
rac_bool_t is_final;
|
|
349
|
+
|
|
350
|
+
/** Tokens generated per second so far */
|
|
351
|
+
float tokens_per_second;
|
|
352
|
+
} rac_vlm_token_event_t;
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* @brief Extended streaming callback with token event details
|
|
356
|
+
*
|
|
357
|
+
* @param event Token event details
|
|
358
|
+
* @param user_data User-provided context
|
|
359
|
+
* @return RAC_TRUE to continue, RAC_FALSE to stop generation
|
|
360
|
+
*/
|
|
361
|
+
typedef rac_bool_t (*rac_vlm_token_event_callback_fn)(const rac_vlm_token_event_t* event,
|
|
362
|
+
void* user_data);
|
|
363
|
+
|
|
364
|
+
// =============================================================================
|
|
365
|
+
// COMPONENT CALLBACKS - For component-level streaming
|
|
366
|
+
// =============================================================================
|
|
367
|
+
|
|
368
|
+
/**
|
|
369
|
+
* @brief VLM component token callback
|
|
370
|
+
*
|
|
371
|
+
* @param token The generated token
|
|
372
|
+
* @param user_data User-provided context
|
|
373
|
+
* @return RAC_TRUE to continue, RAC_FALSE to stop
|
|
374
|
+
*/
|
|
375
|
+
typedef rac_bool_t (*rac_vlm_component_token_callback_fn)(const char* token, void* user_data);
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* @brief VLM component completion callback
|
|
379
|
+
*
|
|
380
|
+
* Called when streaming is complete with final result.
|
|
381
|
+
*
|
|
382
|
+
* @param result Final generation result with metrics
|
|
383
|
+
* @param user_data User-provided context
|
|
384
|
+
*/
|
|
385
|
+
typedef void (*rac_vlm_component_complete_callback_fn)(const rac_vlm_result_t* result,
|
|
386
|
+
void* user_data);
|
|
387
|
+
|
|
388
|
+
/**
|
|
389
|
+
* @brief VLM component error callback
|
|
390
|
+
*
|
|
391
|
+
* Called if streaming fails.
|
|
392
|
+
*
|
|
393
|
+
* @param error_code Error code
|
|
394
|
+
* @param error_message Error message
|
|
395
|
+
* @param user_data User-provided context
|
|
396
|
+
*/
|
|
397
|
+
typedef void (*rac_vlm_component_error_callback_fn)(rac_result_t error_code,
|
|
398
|
+
const char* error_message, void* user_data);
|
|
399
|
+
|
|
400
|
+
// =============================================================================
|
|
401
|
+
// MEMORY MANAGEMENT
|
|
402
|
+
// =============================================================================
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* @brief Free VLM result resources
|
|
406
|
+
*
|
|
407
|
+
* Frees the text and any other owned resources in the result.
|
|
408
|
+
*
|
|
409
|
+
* @param result Result to free (can be NULL)
|
|
410
|
+
*/
|
|
411
|
+
RAC_API void rac_vlm_result_free(rac_vlm_result_t* result);
|
|
412
|
+
|
|
413
|
+
#ifdef __cplusplus
|
|
414
|
+
}
|
|
415
|
+
#endif
|
|
416
|
+
|
|
417
|
+
#endif /* RAC_VLM_TYPES_H */
|
|
@@ -86,6 +86,21 @@ RAC_API rac_result_t rac_model_registry_save(rac_model_registry_handle_t handle,
|
|
|
86
86
|
RAC_API rac_result_t rac_model_registry_get(rac_model_registry_handle_t handle,
|
|
87
87
|
const char* model_id, rac_model_info_t** out_model);
|
|
88
88
|
|
|
89
|
+
/**
|
|
90
|
+
* @brief Get model metadata by local path.
|
|
91
|
+
*
|
|
92
|
+
* Searches through all registered models and returns the one with matching local_path.
|
|
93
|
+
* This is useful when loading models by path instead of model_id.
|
|
94
|
+
*
|
|
95
|
+
* @param handle Registry handle
|
|
96
|
+
* @param local_path Local path to search for
|
|
97
|
+
* @param out_model Output: Model info (owned, must be freed with rac_model_info_free)
|
|
98
|
+
* @return RAC_SUCCESS, RAC_ERROR_NOT_FOUND, or other error code
|
|
99
|
+
*/
|
|
100
|
+
RAC_API rac_result_t rac_model_registry_get_by_path(rac_model_registry_handle_t handle,
|
|
101
|
+
const char* local_path,
|
|
102
|
+
rac_model_info_t** out_model);
|
|
103
|
+
|
|
89
104
|
/**
|
|
90
105
|
* @brief Load all stored models.
|
|
91
106
|
*
|
|
@@ -163,6 +163,7 @@ typedef enum rac_model_format {
|
|
|
163
163
|
RAC_MODEL_FORMAT_ORT = 1, /**< ONNX Runtime format */
|
|
164
164
|
RAC_MODEL_FORMAT_GGUF = 2, /**< GGUF format (llama.cpp) */
|
|
165
165
|
RAC_MODEL_FORMAT_BIN = 3, /**< Binary format */
|
|
166
|
+
RAC_MODEL_FORMAT_COREML = 4, /**< Core ML format (.mlmodelc, .mlpackage) */
|
|
166
167
|
RAC_MODEL_FORMAT_UNKNOWN = 99 /**< Unknown format */
|
|
167
168
|
} rac_model_format_t;
|
|
168
169
|
|
|
@@ -182,6 +183,8 @@ typedef enum rac_inference_framework {
|
|
|
182
183
|
RAC_FRAMEWORK_FLUID_AUDIO = 4, /**< FluidAudio */
|
|
183
184
|
RAC_FRAMEWORK_BUILTIN = 5, /**< Built-in (e.g., energy VAD) */
|
|
184
185
|
RAC_FRAMEWORK_NONE = 6, /**< No framework needed */
|
|
186
|
+
RAC_FRAMEWORK_MLX = 7, /**< MLX C++ (Apple Silicon VLM) */
|
|
187
|
+
RAC_FRAMEWORK_COREML = 8, /**< Core ML (Apple Neural Engine) */
|
|
185
188
|
RAC_FRAMEWORK_UNKNOWN = 99 /**< Unknown framework */
|
|
186
189
|
} rac_inference_framework_t;
|
|
187
190
|
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file rac_image_utils.h
|
|
3
|
+
* @brief RunAnywhere Commons - Image Utilities
|
|
4
|
+
*
|
|
5
|
+
* Image loading and processing utilities for VLM backends.
|
|
6
|
+
* Supports loading from file paths, decoding base64, and resizing.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#ifndef RAC_IMAGE_UTILS_H
|
|
10
|
+
#define RAC_IMAGE_UTILS_H
|
|
11
|
+
|
|
12
|
+
#include "rac/core/rac_error.h"
|
|
13
|
+
#include "rac/core/rac_types.h"
|
|
14
|
+
|
|
15
|
+
#ifdef __cplusplus
|
|
16
|
+
extern "C" {
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
// =============================================================================
|
|
20
|
+
// IMAGE DATA STRUCTURES
|
|
21
|
+
// =============================================================================
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* @brief Loaded image data
|
|
25
|
+
*
|
|
26
|
+
* Contains RGB pixel data after loading an image.
|
|
27
|
+
* Must be freed with rac_image_free().
|
|
28
|
+
*/
|
|
29
|
+
typedef struct rac_image_data {
|
|
30
|
+
/** Raw RGB pixel data (RGBRGBRGB...) */
|
|
31
|
+
uint8_t* pixels;
|
|
32
|
+
|
|
33
|
+
/** Image width in pixels */
|
|
34
|
+
int32_t width;
|
|
35
|
+
|
|
36
|
+
/** Image height in pixels */
|
|
37
|
+
int32_t height;
|
|
38
|
+
|
|
39
|
+
/** Number of channels (3 for RGB) */
|
|
40
|
+
int32_t channels;
|
|
41
|
+
|
|
42
|
+
/** Total size in bytes (width * height * channels) */
|
|
43
|
+
size_t size;
|
|
44
|
+
} rac_image_data_t;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* @brief Normalized float image data
|
|
48
|
+
*
|
|
49
|
+
* Contains normalized float32 pixel data (values in [-1, 1] or [0, 1]).
|
|
50
|
+
* Used by vision encoders.
|
|
51
|
+
*/
|
|
52
|
+
typedef struct rac_image_float {
|
|
53
|
+
/** Normalized float pixel data */
|
|
54
|
+
float* pixels;
|
|
55
|
+
|
|
56
|
+
/** Image width in pixels */
|
|
57
|
+
int32_t width;
|
|
58
|
+
|
|
59
|
+
/** Image height in pixels */
|
|
60
|
+
int32_t height;
|
|
61
|
+
|
|
62
|
+
/** Number of channels (3 for RGB) */
|
|
63
|
+
int32_t channels;
|
|
64
|
+
|
|
65
|
+
/** Total number of floats (width * height * channels) */
|
|
66
|
+
size_t count;
|
|
67
|
+
} rac_image_float_t;
|
|
68
|
+
|
|
69
|
+
// =============================================================================
|
|
70
|
+
// IMAGE LOADING
|
|
71
|
+
// =============================================================================
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* @brief Load an image from a file path
|
|
75
|
+
*
|
|
76
|
+
* Supports JPEG, PNG, BMP, GIF, and other common formats via stb_image.
|
|
77
|
+
* Output is always RGB (3 channels).
|
|
78
|
+
*
|
|
79
|
+
* @param file_path Path to the image file
|
|
80
|
+
* @param out_image Output: Loaded image data (must be freed with rac_image_free)
|
|
81
|
+
* @return RAC_SUCCESS or error code
|
|
82
|
+
*/
|
|
83
|
+
RAC_API rac_result_t rac_image_load_file(const char* file_path, rac_image_data_t* out_image);
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* @brief Decode a base64-encoded image
|
|
87
|
+
*
|
|
88
|
+
* Decodes base64 data and loads the image.
|
|
89
|
+
* Supports the same formats as rac_image_load_file.
|
|
90
|
+
*
|
|
91
|
+
* @param base64_data Base64-encoded image data
|
|
92
|
+
* @param data_size Length of the base64 string
|
|
93
|
+
* @param out_image Output: Loaded image data (must be freed with rac_image_free)
|
|
94
|
+
* @return RAC_SUCCESS or error code
|
|
95
|
+
*/
|
|
96
|
+
RAC_API rac_result_t rac_image_decode_base64(const char* base64_data, size_t data_size,
|
|
97
|
+
rac_image_data_t* out_image);
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* @brief Decode image from raw bytes
|
|
101
|
+
*
|
|
102
|
+
* Decodes an image from raw bytes (e.g., from network response).
|
|
103
|
+
*
|
|
104
|
+
* @param data Raw image data (JPEG, PNG, etc.)
|
|
105
|
+
* @param data_size Size of the data in bytes
|
|
106
|
+
* @param out_image Output: Loaded image data (must be freed with rac_image_free)
|
|
107
|
+
* @return RAC_SUCCESS or error code
|
|
108
|
+
*/
|
|
109
|
+
RAC_API rac_result_t rac_image_decode_bytes(const uint8_t* data, size_t data_size,
|
|
110
|
+
rac_image_data_t* out_image);
|
|
111
|
+
|
|
112
|
+
// =============================================================================
|
|
113
|
+
// IMAGE PROCESSING
|
|
114
|
+
// =============================================================================
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* @brief Resize an image
|
|
118
|
+
*
|
|
119
|
+
* Resizes the image to the specified dimensions using bilinear interpolation.
|
|
120
|
+
*
|
|
121
|
+
* @param image Input image
|
|
122
|
+
* @param new_width Target width
|
|
123
|
+
* @param new_height Target height
|
|
124
|
+
* @param out_image Output: Resized image (must be freed with rac_image_free)
|
|
125
|
+
* @return RAC_SUCCESS or error code
|
|
126
|
+
*/
|
|
127
|
+
RAC_API rac_result_t rac_image_resize(const rac_image_data_t* image, int32_t new_width,
|
|
128
|
+
int32_t new_height, rac_image_data_t* out_image);
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* @brief Resize an image maintaining aspect ratio
|
|
132
|
+
*
|
|
133
|
+
* Resizes the image so that the longest dimension equals max_size.
|
|
134
|
+
* Aspect ratio is preserved.
|
|
135
|
+
*
|
|
136
|
+
* @param image Input image
|
|
137
|
+
* @param max_size Maximum dimension (width or height)
|
|
138
|
+
* @param out_image Output: Resized image (must be freed with rac_image_free)
|
|
139
|
+
* @return RAC_SUCCESS or error code
|
|
140
|
+
*/
|
|
141
|
+
RAC_API rac_result_t rac_image_resize_max(const rac_image_data_t* image, int32_t max_size,
|
|
142
|
+
rac_image_data_t* out_image);
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* @brief Normalize image to float values
|
|
146
|
+
*
|
|
147
|
+
* Converts uint8 pixels to float32 with optional mean/std normalization.
|
|
148
|
+
* Commonly used for vision encoders (CLIP, SigLIP, etc.).
|
|
149
|
+
*
|
|
150
|
+
* Formula: pixel_normalized = (pixel / 255.0 - mean) / std
|
|
151
|
+
*
|
|
152
|
+
* @param image Input image
|
|
153
|
+
* @param mean Per-channel mean values (array of 3 floats, or NULL for [0,0,0])
|
|
154
|
+
* @param std Per-channel std values (array of 3 floats, or NULL for [1,1,1])
|
|
155
|
+
* @param out_float Output: Normalized float image (must be freed with rac_image_float_free)
|
|
156
|
+
* @return RAC_SUCCESS or error code
|
|
157
|
+
*/
|
|
158
|
+
RAC_API rac_result_t rac_image_normalize(const rac_image_data_t* image, const float* mean,
|
|
159
|
+
const float* std, rac_image_float_t* out_float);
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* @brief Convert RGB to CHW format
|
|
163
|
+
*
|
|
164
|
+
* Converts from HWC (Height, Width, Channels) to CHW format.
|
|
165
|
+
* Many neural networks expect CHW format.
|
|
166
|
+
*
|
|
167
|
+
* @param image Input float image in HWC format
|
|
168
|
+
* @param out_chw Output: Float image in CHW format (must be freed with rac_image_float_free)
|
|
169
|
+
* @return RAC_SUCCESS or error code
|
|
170
|
+
*/
|
|
171
|
+
RAC_API rac_result_t rac_image_to_chw(const rac_image_float_t* image, rac_image_float_t* out_chw);
|
|
172
|
+
|
|
173
|
+
// =============================================================================
|
|
174
|
+
// MEMORY MANAGEMENT
|
|
175
|
+
// =============================================================================
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* @brief Free image data
|
|
179
|
+
*
|
|
180
|
+
* Frees the pixel data allocated by image loading functions.
|
|
181
|
+
*
|
|
182
|
+
* @param image Image to free (can be NULL)
|
|
183
|
+
*/
|
|
184
|
+
RAC_API void rac_image_free(rac_image_data_t* image);
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* @brief Free float image data
|
|
188
|
+
*
|
|
189
|
+
* Frees the pixel data allocated by normalization functions.
|
|
190
|
+
*
|
|
191
|
+
* @param image Float image to free (can be NULL)
|
|
192
|
+
*/
|
|
193
|
+
RAC_API void rac_image_float_free(rac_image_float_t* image);
|
|
194
|
+
|
|
195
|
+
// =============================================================================
|
|
196
|
+
// UTILITY FUNCTIONS
|
|
197
|
+
// =============================================================================
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* @brief Calculate resized dimensions maintaining aspect ratio
|
|
201
|
+
*
|
|
202
|
+
* @param width Original width
|
|
203
|
+
* @param height Original height
|
|
204
|
+
* @param max_size Maximum dimension
|
|
205
|
+
* @param out_width Output: New width
|
|
206
|
+
* @param out_height Output: New height
|
|
207
|
+
*/
|
|
208
|
+
RAC_API void rac_image_calc_resize(int32_t width, int32_t height, int32_t max_size,
|
|
209
|
+
int32_t* out_width, int32_t* out_height);
|
|
210
|
+
|
|
211
|
+
#ifdef __cplusplus
|
|
212
|
+
}
|
|
213
|
+
#endif
|
|
214
|
+
|
|
215
|
+
#endif /* RAC_IMAGE_UTILS_H */
|