@novastera-oss/llamarn 0.1.3-beta.2 → 0.1.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/PureCppImpl.cpp +64 -5
- package/package.json +1 -1
package/cpp/PureCppImpl.cpp
CHANGED
|
@@ -235,11 +235,70 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
|
|
|
235
235
|
}
|
|
236
236
|
|
|
237
237
|
// Initialize using common_init_from_params
|
|
238
|
-
common_init_result result
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
238
|
+
common_init_result result;
|
|
239
|
+
|
|
240
|
+
try {
|
|
241
|
+
result = common_init_from_params(params);
|
|
242
|
+
|
|
243
|
+
// Check if initialization was successful
|
|
244
|
+
if (!result.model || !result.context) {
|
|
245
|
+
throw std::runtime_error("Failed to initialize model and context");
|
|
246
|
+
}
|
|
247
|
+
} catch (const std::exception& e) {
|
|
248
|
+
std::string error_msg = e.what();
|
|
249
|
+
|
|
250
|
+
// Check for specific Vulkan errors that indicate Adreno GPU shader incompatibility
|
|
251
|
+
bool isVulkanShaderError = (
|
|
252
|
+
error_msg.find("createComputePipeline") != std::string::npos ||
|
|
253
|
+
error_msg.find("ErrorUnknown") != std::string::npos ||
|
|
254
|
+
error_msg.find("matmul_q4_k") != std::string::npos ||
|
|
255
|
+
error_msg.find("matmul_q5_k") != std::string::npos ||
|
|
256
|
+
error_msg.find("dequant_q4_K") != std::string::npos ||
|
|
257
|
+
error_msg.find("dequant_q5_K") != std::string::npos ||
|
|
258
|
+
error_msg.find("vulkan") != std::string::npos
|
|
259
|
+
);
|
|
260
|
+
|
|
261
|
+
// If we were trying to use GPU and got a Vulkan/shader error, retry with CPU-only
|
|
262
|
+
if (params.n_gpu_layers > 0 && isVulkanShaderError) {
|
|
263
|
+
fprintf(stderr, "Vulkan shader compilation failed (likely Adreno GPU incompatibility): %s\n", e.what());
|
|
264
|
+
fprintf(stderr, "This is a known issue with Q4_K/Q5_K shaders on Qualcomm Adreno GPUs\n");
|
|
265
|
+
fprintf(stderr, "Retrying with CPU-only mode...\n");
|
|
266
|
+
|
|
267
|
+
// Retry with CPU-only
|
|
268
|
+
params.n_gpu_layers = 0;
|
|
269
|
+
|
|
270
|
+
try {
|
|
271
|
+
result = common_init_from_params(params);
|
|
272
|
+
|
|
273
|
+
if (!result.model || !result.context) {
|
|
274
|
+
throw std::runtime_error("Failed to initialize model and context even with CPU-only mode");
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
fprintf(stderr, "Successfully recovered with CPU-only mode after Vulkan shader failure\n");
|
|
278
|
+
} catch (const std::exception& cpu_e) {
|
|
279
|
+
throw std::runtime_error(std::string("Model initialization failed: ") + cpu_e.what());
|
|
280
|
+
}
|
|
281
|
+
} else if (params.n_gpu_layers > 0) {
|
|
282
|
+
// Other GPU error, still try CPU fallback
|
|
283
|
+
fprintf(stderr, "GPU initialization failed (%s), retrying with CPU-only\n", e.what());
|
|
284
|
+
|
|
285
|
+
params.n_gpu_layers = 0;
|
|
286
|
+
|
|
287
|
+
try {
|
|
288
|
+
result = common_init_from_params(params);
|
|
289
|
+
|
|
290
|
+
if (!result.model || !result.context) {
|
|
291
|
+
throw std::runtime_error("Failed to initialize model and context even with CPU-only mode");
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
fprintf(stderr, "Successfully recovered with CPU-only mode after GPU failure\n");
|
|
295
|
+
} catch (const std::exception& cpu_e) {
|
|
296
|
+
throw std::runtime_error(std::string("Model initialization failed: ") + cpu_e.what());
|
|
297
|
+
}
|
|
298
|
+
} else {
|
|
299
|
+
// Was already CPU-only, re-throw the original error
|
|
300
|
+
throw std::runtime_error(std::string("Model initialization failed: ") + e.what());
|
|
301
|
+
}
|
|
243
302
|
}
|
|
244
303
|
|
|
245
304
|
// Create and initialize rn_llama_context
|