npm - cui-llama.rn - Versions diffs - 1.4.1 → 1.4.2 - Mend

cui-llama.rn 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +4 -23
package/android/build.gradle +12 -3
package/android/src/main/CMakeLists.txt +13 -7
package/android/src/main/java/com/rnllama/LlamaContext.java +27 -20
package/android/src/main/java/com/rnllama/RNLlama.java +5 -1
package/android/src/main/jni.cpp +8 -5
package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
package/cpp/README.md +1 -1
package/cpp/common.cpp +0 -212
package/cpp/common.h +3 -0
package/cpp/rn-llama.cpp +822 -0
package/cpp/rn-llama.h +123 -0
package/ios/CMakeLists.txt +99 -0
package/ios/RNLlama.h +5 -1
package/ios/RNLlama.mm +2 -2
package/ios/RNLlamaContext.h +8 -1
package/ios/RNLlamaContext.mm +15 -11
package/ios/rnllama.xcframework/Info.plist +74 -0
package/jest/mock.js +3 -2
package/lib/commonjs/NativeRNLlama.js.map +1 -1
package/lib/commonjs/index.js +4 -2
package/lib/commonjs/index.js.map +1 -1
package/lib/module/NativeRNLlama.js.map +1 -1
package/lib/module/index.js +4 -2
package/lib/module/index.js.map +1 -1
package/lib/typescript/NativeRNLlama.d.ts +5 -1
package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
package/lib/typescript/index.d.ts.map +1 -1
package/llama-rn.podspec +8 -2
package/package.json +5 -2
package/src/NativeRNLlama.ts +5 -1
package/src/index.ts +9 -2

package/README.md CHANGED Viewed

@@ -36,6 +36,8 @@ npm install llama.rn
 Please re-run `npx pod-install` again.
+By default, `llama.rn` will use pre-built `rnllama.xcframework` for iOS. If you want to build from source, please set `RNLLAMA_BUILD_FROM_SOURCE` to `1` in your Podfile.
 #### Android
 Add proguard rule if it's enabled in project (android/app/proguard-rules.pro):
@@ -45,6 +47,8 @@ Add proguard rule if it's enabled in project (android/app/proguard-rules.pro):
 -keep class com.rnllama.** { *; }
 ```
+By default, `llama.rn` will use pre-built libraries for Android. If you want to build from source, please set `rnllamaBuildFromSource` to `true` in `android/gradle.properties`.
 ## Obtain the model
 You can search HuggingFace for available models (Keyword: [`GGUF`](https://huggingface.co/search/full-text?q=GGUF&type=model)).
@@ -137,29 +141,6 @@ Please visit the [Documentation](docs/API) for more details.
 You can also visit the [example](example) to see how to use it.
-Run the example:
-```bash
-yarn && yarn bootstrap
-# iOS
-yarn example ios
-# Use device
-yarn example ios --device "<device name>"
-# With release mode
-yarn example ios --mode Release
-# Android
-yarn example android
-# With release mode
-yarn example android --mode release
-```
-This example used [react-native-document-picker](https://github.com/rnmods/react-native-document-picker) for select model.
-- iOS: You can move the model to iOS Simulator, or iCloud for real device.
-- Android: Selected file will be copied or downloaded to cache directory so it may be slow.
 ## Grammar Sampling
 GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis.

package/android/build.gradle CHANGED Viewed

@@ -54,9 +54,18 @@ android {
       }
     }
   }
-  externalNativeBuild {
-    cmake {
-      path = file('src/main/CMakeLists.txt')
+  def rnllamaBuildFromSource = project.properties["rnllamaBuildFromSource"]
+  if (rnllamaBuildFromSource == "true") {
+    externalNativeBuild {
+      cmake {
+        path = file('src/main/CMakeLists.txt')
+      }
+    }
+    // Exclude jniLibs
+    sourceSets {
+      main {
+        jniLibs.srcDirs = []
+      }
     }
   }
   buildTypes {

package/android/src/main/CMakeLists.txt CHANGED Viewed

@@ -2,6 +2,12 @@ cmake_minimum_required(VERSION 3.10)
 project(llama.rn)
+find_program(CCACHE_FOUND ccache)
+if(CCACHE_FOUND)
+  set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
+  set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
+endif(CCACHE_FOUND)
 set(CMAKE_CXX_STANDARD 17)
 set(RNLLAMA_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../cpp)
@@ -45,7 +51,7 @@ set(
     ${RNLLAMA_LIB_DIR}/unicode.cpp
     ${RNLLAMA_LIB_DIR}/sgemm.cpp
     ${RNLLAMA_LIB_DIR}/common.cpp
-    ${RNLLAMA_LIB_DIR}/rn-llama.hpp
+    ${RNLLAMA_LIB_DIR}/rn-llama.cpp
     ${CMAKE_SOURCE_DIR}/jni-utils.h
     ${CMAKE_SOURCE_DIR}/jni.cpp
 )
@@ -86,13 +92,13 @@ build_library("rnllama" "")
 if (${ANDROID_ABI} STREQUAL "arm64-v8a")
     # ARM64 targets
-    build_library("rnllama_v8_4_fp16_dotprod_sve" "-march=armv8.4-a+fp16+dotprod+sve")
-    build_library("rnllama_v8_4_fp16_dotprod_i8mm_sve" "-march=armv8.4-a+fp16+dotprod+i8mm+sve")
-    build_library("rnllama_v8_4_fp16_dotprod_i8mm" "-march=armv8.4-a+fp16+dotprod+i8mm")
-    build_library("rnllama_v8_4_fp16_dotprod" "-march=armv8.4-a+fp16+dotprod")
-    build_library("rnllama_v8_2_fp16_dotprod" "-march=armv8.2-a+fp16+dotprod")
-    build_library("rnllama_v8_2_fp16" "-march=armv8.2-a+fp16")
+    # Removing fp16 for now as it leads to issues with some models like deepseek r1 distills
+    # https://github.com/mybigday/llama.rn/pull/110#issuecomment-2609918310
     build_library("rnllama_v8" "-march=armv8-a")
+    build_library("rnllama_v8_2" "-march=armv8.2-a")
+    build_library("rnllama_v8_2_dotprod" "-march=armv8.2-a+dotprod")
+    build_library("rnllama_v8_2_i8mm" "-march=armv8.2-a+i8mm")
+    build_library("rnllama_v8_2_dotprod_i8mm" "-march=armv8.2-a+dotprod+i8mm")
     # https://github.com/ggerganov/llama.cpp/blob/master/docs/android.md#cross-compile-using-android-ndk
     # llama.cpp will deal with the cpu features

package/android/src/main/java/com/rnllama/LlamaContext.java CHANGED Viewed

@@ -26,6 +26,8 @@ import java.io.FileInputStream;
 public class LlamaContext {
   public static final String NAME = "RNLlamaContext";
+  private static String loadedLibrary = "";
   private int id;
   private ReactApplicationContext reactContext;
   private long context;
@@ -160,6 +162,10 @@ public class LlamaContext {
     return modelDetails;
   }
+  public String getLoadedLibrary() {
+    return loadedLibrary;
+  }
   public String getFormattedChat(ReadableArray messages, String chatTemplate) {
     ReadableMap[] msgs = new ReadableMap[messages.size()];
     for (int i = 0; i < messages.size(); i++) {
@@ -401,36 +407,37 @@ public class LlamaContext {
     // TODO: Add runtime check for cpu features
     if (LlamaContext.isArm64V8a()) {
-      if (isAtLeastArmV84 && hasSve && hasI8mm && hasFp16 && hasDotProd) {
-        Log.d(NAME, "Loading librnllama_v8_4_fp16_dotprod_i8mm_sve.so");
-        System.loadLibrary("rnllama_v8_4_fp16_dotprod_i8mm_sve");
-      } else if (isAtLeastArmV84 && hasSve && hasFp16 && hasDotProd) {
-        Log.d(NAME, "Loading librnllama_v8_4_fp16_dotprod_sve.so");
-        System.loadLibrary("rnllama_v8_4_fp16_dotprod_sve");
-      } else if (isAtLeastArmV84 && hasI8mm && hasFp16 && hasDotProd) {
-        Log.d(NAME, "Loading librnllama_v8_4_fp16_dotprod_i8mm.so");
-        System.loadLibrary("rnllama_v8_4_fp16_dotprod_i8mm");
-      } else if (isAtLeastArmV84 && hasFp16 && hasDotProd) {
-        Log.d(NAME, "Loading librnllama_v8_4_fp16_dotprod.so");
-        System.loadLibrary("rnllama_v8_4_fp16_dotprod");
-      } else if (isAtLeastArmV82 && hasFp16 && hasDotProd) {
-        Log.d(NAME, "Loading librnllama_v8_2_fp16_dotprod.so");
-        System.loadLibrary("rnllama_v8_2_fp16_dotprod");
-      } else if (isAtLeastArmV82 && hasFp16) {
-        Log.d(NAME, "Loading librnllama_v8_2_fp16.so");
-        System.loadLibrary("rnllama_v8_2_fp16");
+      if (hasDotProd && hasI8mm) {
+        Log.d(NAME, "Loading librnllama_v8_2_dotprod_i8mm.so");
+        System.loadLibrary("rnllama_v8_2_dotprod_i8mm");
+        loadedLibrary = "rnllama_v8_2_dotprod_i8mm";
+      } else if (hasDotProd) {
+        Log.d(NAME, "Loading librnllama_v8_2_dotprod.so");
+        System.loadLibrary("rnllama_v8_2_dotprod");
+        loadedLibrary = "rnllama_v8_2_dotprod";
+      } else if (hasI8mm) {
+        Log.d(NAME, "Loading librnllama_v8_2_i8mm.so");
+        System.loadLibrary("rnllama_v8_2_i8mm");
+        loadedLibrary = "rnllama_v8_2_i8mm";
+      } else if (hasFp16) {
+        Log.d(NAME, "Loading librnllama_v8_2.so");
+        System.loadLibrary("rnllama_v8_2");
+        loadedLibrary = "rnllama_v8_2";
       } else {
-        Log.d(NAME, "Loading librnllama_v8.so");
+        Log.d(NAME, "Loading default librnllama_v8.so");
         System.loadLibrary("rnllama_v8");
+        loadedLibrary = "rnllama_v8";
       }
       //  Log.d(NAME, "Loading librnllama_v8_7.so with runtime feature detection");
       //  System.loadLibrary("rnllama_v8_7");
     } else if (LlamaContext.isX86_64()) {
         Log.d(NAME, "Loading librnllama_x86_64.so");
         System.loadLibrary("rnllama_x86_64");
+        loadedLibrary = "rnllama_x86_64";
     } else {
         Log.d(NAME, "Loading default librnllama.so");
         System.loadLibrary("rnllama");
+        loadedLibrary = "rnllama";
     }
 }
@@ -465,7 +472,7 @@ public class LlamaContext {
   public void emitModelProgressUpdate(int progress) {
     WritableMap event = Arguments.createMap();
     event.putInt("progress", progress);
-    eventEmitter.emit("@RNLlama_onModelProgress", event);
+    eventEmitter.emit("@RNLlama_onInitContextProgress", event);
   }
   protected static native WritableMap modelInfo(

package/android/src/main/java/com/rnllama/RNLlama.java CHANGED Viewed

@@ -35,7 +35,7 @@ public class RNLlama implements LifecycleEventListener {
   private HashMap<Integer, LlamaContext> contexts = new HashMap<>();
-  private int llamaContextLimit = 1;
+  private int llamaContextLimit = -1;
   public void setContextLimit(double limit, Promise promise) {
     llamaContextLimit = (int) limit;
@@ -83,6 +83,9 @@ public class RNLlama implements LifecycleEventListener {
           if (context != null) {
             throw new Exception("Context already exists");
           }
+          if (llamaContextLimit > -1 && contexts.size() >= llamaContextLimit) {
+            throw new Exception("Context limit reached");
+          }
           LlamaContext llamaContext = new LlamaContext(contextId, reactContext, params);
           if (llamaContext.getContext() == 0) {
             throw new Exception("Failed to initialize context");
@@ -92,6 +95,7 @@ public class RNLlama implements LifecycleEventListener {
           result.putBoolean("gpu", false);
           result.putString("reasonNoGPU", "Currently not supported");
           result.putMap("model", llamaContext.getModelDetails());
+          result.putString("androidLib", llamaContext.getLoadedLibrary());
           return result;
         } catch (Exception e) {
           exception = e;

package/android/src/main/jni.cpp CHANGED Viewed

@@ -11,9 +11,8 @@
 #include <unordered_map>
 #include "llama.h"
 #include "llama-impl.h"
-#include "llama-context.h"
-#include "gguf.h"
-#include "rn-llama.hpp"
+#include "ggml.h"
+#include "rn-llama.h"
 #include "jni-utils.h"
 #define UNUSED(x) (void)(x)
@@ -421,6 +420,7 @@ Java_com_rnllama_LlamaContext_loadModelDetails(
     llama_model_desc(llama->model, desc, sizeof(desc));
     putString(env, result, "desc", desc);
     putDouble(env, result, "size", llama_model_size(llama->model));
+    putDouble(env, result, "nEmbd", llama_model_n_embd(llama->model));
     putDouble(env, result, "nParams", llama_model_n_params(llama->model));
     putBoolean(env, result, "isChatTemplateSupported", llama->validateModelChatTemplate());
     putMap(env, result, "metadata", meta);
@@ -621,9 +621,12 @@ Java_com_rnllama_LlamaContext_doCompletion(
     sparams.dry_allowed_length = dry_allowed_length;
     sparams.dry_penalty_last_n = dry_penalty_last_n;
+    const llama_model * model = llama_get_model(llama->ctx);
+    const llama_vocab * vocab = llama_model_get_vocab(model);
     sparams.logit_bias.clear();
     if (ignore_eos) {
-        sparams.logit_bias[llama_vocab_eos(llama_model_get_vocab(llama->model))].bias = -INFINITY;
+        sparams.logit_bias[llama_vocab_eos(vocab)].bias = -INFINITY;
     }
     // dry break seq
@@ -642,7 +645,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
     sparams.dry_sequence_breakers = dry_sequence_breakers_vector;
     // logit bias
-    const int n_vocab = llama_vocab_n_tokens(llama_model_get_vocab(llama->model));
+    const int n_vocab = llama_vocab_n_tokens(vocab);
     jsize logit_bias_len = env->GetArrayLength(logit_bias);
     for (jsize i = 0; i < logit_bias_len; i++) {

package/android/src/main/jniLibs/arm64-v8a/librnllama.so ADDED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so ADDED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so ADDED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so ADDED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so ADDED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so ADDED Viewed

Binary file

package/android/src/main/jniLibs/x86_64/librnllama.so ADDED Viewed

Binary file

package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so ADDED Viewed

Binary file

package/cpp/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
 # Note
-- Only `rn-llama.hpp` is the specific file for this project, others are sync from [llama.cpp](https://github.com/ggerganov/llama.cpp).
+- Only `rn-llama.h` and `rn-llama.cpp` are the specific files for this folder, others are sync from [llama.cpp](https://github.com/ggerganov/llama.cpp).
 - We can update the native source by using the [bootstrap](../scripts/bootstrap.sh) script.

package/cpp/common.cpp CHANGED Viewed

@@ -1153,218 +1153,6 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
     return false;
 }
-static bool common_download_file(const std::string & url, const std::string & path, const std::string & hf_token) {
-    // Initialize libcurl
-    curl_ptr       curl(curl_easy_init(), &curl_easy_cleanup);
-    curl_slist_ptr http_headers;
-    if (!curl) {
-        LOG_ERR("%s: error initializing libcurl\n", __func__);
-        return false;
-    }
-    bool force_download = false;
-    // Set the URL, allow to follow http redirection
-    curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
-    curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
-    // Check if hf-token or bearer-token was specified
-    if (!hf_token.empty()) {
-        std::string auth_header = "Authorization: Bearer " + hf_token;
-        http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
-        curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
-    }
-#if defined(_WIN32)
-    // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
-    //   operating system. Currently implemented under MS-Windows.
-    curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
-#endif
-    // Check if the file already exists locally
-    auto file_exists = std::filesystem::exists(path);
-    // If the file exists, check its JSON metadata companion file.
-    std::string metadata_path = path + ".json";
-    nlohmann::json metadata;
-    std::string etag;
-    std::string last_modified;
-    if (file_exists) {
-        // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
-        std::ifstream metadata_in(metadata_path);
-        if (metadata_in.good()) {
-            try {
-                metadata_in >> metadata;
-                LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
-                if (metadata.contains("url") && metadata.at("url").is_string()) {
-                    auto previous_url = metadata.at("url").get<std::string>();
-                    if (previous_url != url) {
-                        LOG_ERR("%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
-                        return false;
-                    }
-                }
-                if (metadata.contains("etag") && metadata.at("etag").is_string()) {
-                    etag = metadata.at("etag");
-                }
-                if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
-                    last_modified = metadata.at("lastModified");
-                }
-            } catch (const nlohmann::json::exception & e) {
-            LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
-                return false;
-            }
-        }
-    } else {
-        LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
-    }
-    // Send a HEAD request to retrieve the etag and last-modified headers
-    struct common_load_model_from_url_headers {
-        std::string etag;
-        std::string last_modified;
-    };
-    common_load_model_from_url_headers headers;
-    {
-        typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
-        auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
-            common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
-            static std::regex header_regex("([^:]+): (.*)\r\n");
-            static std::regex etag_regex("ETag", std::regex_constants::icase);
-            static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
-            std::string header(buffer, n_items);
-            std::smatch match;
-            if (std::regex_match(header, match, header_regex)) {
-                const std::string & key = match[1];
-                const std::string & value = match[2];
-                if (std::regex_match(key, match, etag_regex)) {
-                    headers->etag = value;
-                } else if (std::regex_match(key, match, last_modified_regex)) {
-                    headers->last_modified = value;
-                }
-            }
-            return n_items;
-        };
-        curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
-        curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
-        curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
-        curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
-        bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
-        if (!was_perform_successful) {
-            return false;
-        }
-        long http_code = 0;
-        curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
-        if (http_code != 200) {
-            // HEAD not supported, we don't know if the file has changed
-            // force trigger downloading
-            force_download = true;
-            LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
-        }
-    }
-    bool should_download = !file_exists || force_download;
-    if (!should_download) {
-        if (!etag.empty() && etag != headers.etag) {
-            LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
-            should_download = true;
-        } else if (!last_modified.empty() && last_modified != headers.last_modified) {
-            LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str());
-            should_download = true;
-        }
-    }
-    if (should_download) {
-        std::string path_temporary = path + ".downloadInProgress";
-        if (file_exists) {
-            LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
-            if (remove(path.c_str()) != 0) {
-                LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
-                return false;
-            }
-        }
-        // Set the output file
-        struct FILE_deleter {
-            void operator()(FILE * f) const {
-                fclose(f);
-            }
-        };
-        std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "wb"));
-        if (!outfile) {
-            LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str());
-            return false;
-        }
-        typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
-        auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t {
-            return fwrite(data, size, nmemb, (FILE *)fd);
-        };
-        curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L);
-        curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
-        curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get());
-        //  display download progress
-        curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L);
-        // helper function to hide password in URL
-        auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string {
-            std::size_t protocol_pos = url.find("://");
-            if (protocol_pos == std::string::npos) {
-                return url;  // Malformed URL
-            }
-            std::size_t at_pos = url.find('@', protocol_pos + 3);
-            if (at_pos == std::string::npos) {
-                return url;  // No password in URL
-            }
-            return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos);
-        };
-        // start the download
-        LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
-            llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
-        bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
-        if (!was_perform_successful) {
-            return false;
-        }
-        long http_code = 0;
-        curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
-        if (http_code < 200 || http_code >= 400) {
-            LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
-            return false;
-        }
-        // Causes file to be closed explicitly here before we rename it.
-        outfile.reset();
-        // Write the updated JSON metadata file.
-        metadata.update({
-            {"url", url},
-            {"etag", headers.etag},
-            {"lastModified", headers.last_modified}
-        });
-        std::ofstream(metadata_path) << metadata.dump(4);
-        LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
-        if (rename(path_temporary.c_str(), path.c_str()) != 0) {
-            LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
-            return false;
-        }
-    }
-    return true;
-}
 struct llama_model * common_load_model_from_url(
         const std::string & model_url,

package/cpp/common.h CHANGED Viewed

@@ -534,6 +534,9 @@ struct llama_model * common_load_model_from_hf(
     const std::string & local_path,
     const std::string & hf_token,
     const struct llama_model_params & params);
+std::pair<std::string, std::string> common_get_hf_file(
+    const std::string & hf_repo_with_tag,
+    const std::string & hf_token);
 std::pair<std::string, std::string> common_get_hf_file(
     const std::string & hf_repo_with_tag,