@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
#if defined(_MSC_VER)
|
|
2
|
+
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
|
|
3
|
+
#endif
|
|
4
|
+
|
|
5
|
+
#include "ggml-rpc.h"
|
|
6
|
+
#ifdef _WIN32
|
|
7
|
+
# define NOMINMAX
|
|
8
|
+
# define DIRECTORY_SEPARATOR '\\'
|
|
9
|
+
# include <locale>
|
|
10
|
+
# include <windows.h>
|
|
11
|
+
# include <fcntl.h>
|
|
12
|
+
# include <io.h>
|
|
13
|
+
#else
|
|
14
|
+
# define DIRECTORY_SEPARATOR '/'
|
|
15
|
+
# include <unistd.h>
|
|
16
|
+
# include <sys/stat.h>
|
|
17
|
+
#endif
|
|
18
|
+
#include <codecvt>
|
|
19
|
+
#include <string>
|
|
20
|
+
#include <stdio.h>
|
|
21
|
+
#include <vector>
|
|
22
|
+
#include <filesystem>
|
|
23
|
+
#include <algorithm>
|
|
24
|
+
#include <thread>
|
|
25
|
+
|
|
26
|
+
namespace fs = std::filesystem;
|
|
27
|
+
|
|
28
|
+
// NOTE: this is copied from common.cpp to avoid linking with libcommon
|
|
29
|
+
// returns true if successful, false otherwise
|
|
30
|
+
static bool fs_create_directory_with_parents(const std::string & path) {
|
|
31
|
+
#ifdef _WIN32
|
|
32
|
+
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
|
33
|
+
std::wstring wpath = converter.from_bytes(path);
|
|
34
|
+
|
|
35
|
+
// if the path already exists, check whether it's a directory
|
|
36
|
+
const DWORD attributes = GetFileAttributesW(wpath.c_str());
|
|
37
|
+
if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
|
|
38
|
+
return true;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
size_t pos_slash = 0;
|
|
42
|
+
|
|
43
|
+
// process path from front to back, procedurally creating directories
|
|
44
|
+
while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) {
|
|
45
|
+
const std::wstring subpath = wpath.substr(0, pos_slash);
|
|
46
|
+
const wchar_t * test = subpath.c_str();
|
|
47
|
+
|
|
48
|
+
const bool success = CreateDirectoryW(test, NULL);
|
|
49
|
+
if (!success) {
|
|
50
|
+
const DWORD error = GetLastError();
|
|
51
|
+
|
|
52
|
+
// if the path already exists, ensure that it's a directory
|
|
53
|
+
if (error == ERROR_ALREADY_EXISTS) {
|
|
54
|
+
const DWORD attributes = GetFileAttributesW(subpath.c_str());
|
|
55
|
+
if (attributes == INVALID_FILE_ATTRIBUTES || !(attributes & FILE_ATTRIBUTE_DIRECTORY)) {
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
} else {
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
pos_slash += 1;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return true;
|
|
67
|
+
#else
|
|
68
|
+
// if the path already exists, check whether it's a directory
|
|
69
|
+
struct stat info;
|
|
70
|
+
if (stat(path.c_str(), &info) == 0) {
|
|
71
|
+
return S_ISDIR(info.st_mode);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
size_t pos_slash = 1; // skip leading slashes for directory creation
|
|
75
|
+
|
|
76
|
+
// process path from front to back, procedurally creating directories
|
|
77
|
+
while ((pos_slash = path.find('/', pos_slash)) != std::string::npos) {
|
|
78
|
+
const std::string subpath = path.substr(0, pos_slash);
|
|
79
|
+
struct stat info;
|
|
80
|
+
|
|
81
|
+
// if the path already exists, ensure that it's a directory
|
|
82
|
+
if (stat(subpath.c_str(), &info) == 0) {
|
|
83
|
+
if (!S_ISDIR(info.st_mode)) {
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
} else {
|
|
87
|
+
// create parent directories
|
|
88
|
+
const int ret = mkdir(subpath.c_str(), 0755);
|
|
89
|
+
if (ret != 0) {
|
|
90
|
+
return false;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
pos_slash += 1;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return true;
|
|
98
|
+
#endif // _WIN32
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// NOTE: this is copied from common.cpp to avoid linking with libcommon
|
|
102
|
+
static std::string fs_get_cache_directory() {
|
|
103
|
+
std::string cache_directory = "";
|
|
104
|
+
auto ensure_trailing_slash = [](std::string p) {
|
|
105
|
+
// Make sure to add trailing slash
|
|
106
|
+
if (p.back() != DIRECTORY_SEPARATOR) {
|
|
107
|
+
p += DIRECTORY_SEPARATOR;
|
|
108
|
+
}
|
|
109
|
+
return p;
|
|
110
|
+
};
|
|
111
|
+
if (getenv("LLAMA_CACHE")) {
|
|
112
|
+
cache_directory = std::getenv("LLAMA_CACHE");
|
|
113
|
+
} else {
|
|
114
|
+
#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)
|
|
115
|
+
if (std::getenv("XDG_CACHE_HOME")) {
|
|
116
|
+
cache_directory = std::getenv("XDG_CACHE_HOME");
|
|
117
|
+
} else {
|
|
118
|
+
cache_directory = std::getenv("HOME") + std::string("/.cache/");
|
|
119
|
+
}
|
|
120
|
+
#elif defined(__APPLE__)
|
|
121
|
+
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
|
|
122
|
+
#elif defined(_WIN32)
|
|
123
|
+
cache_directory = std::getenv("LOCALAPPDATA");
|
|
124
|
+
#else
|
|
125
|
+
# error Unknown architecture
|
|
126
|
+
#endif
|
|
127
|
+
cache_directory = ensure_trailing_slash(cache_directory);
|
|
128
|
+
cache_directory += "llama.cpp";
|
|
129
|
+
}
|
|
130
|
+
return ensure_trailing_slash(cache_directory);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
struct rpc_server_params {
|
|
134
|
+
std::string host = "127.0.0.1";
|
|
135
|
+
int port = 50052;
|
|
136
|
+
size_t backend_mem = 0;
|
|
137
|
+
bool use_cache = false;
|
|
138
|
+
int n_threads = std::max(1U, std::thread::hardware_concurrency()/2);
|
|
139
|
+
std::string device;
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
static void print_usage(int /*argc*/, char ** argv, rpc_server_params params) {
|
|
143
|
+
fprintf(stderr, "Usage: %s [options]\n\n", argv[0]);
|
|
144
|
+
fprintf(stderr, "options:\n");
|
|
145
|
+
fprintf(stderr, " -h, --help show this help message and exit\n");
|
|
146
|
+
fprintf(stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n", params.n_threads);
|
|
147
|
+
fprintf(stderr, " -d DEV, --device device to use\n");
|
|
148
|
+
fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str());
|
|
149
|
+
fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port);
|
|
150
|
+
fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n");
|
|
151
|
+
fprintf(stderr, " -c, --cache enable local file cache\n");
|
|
152
|
+
fprintf(stderr, "\n");
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params & params) {
|
|
156
|
+
std::string arg;
|
|
157
|
+
for (int i = 1; i < argc; i++) {
|
|
158
|
+
arg = argv[i];
|
|
159
|
+
if (arg == "-H" || arg == "--host") {
|
|
160
|
+
if (++i >= argc) {
|
|
161
|
+
return false;
|
|
162
|
+
}
|
|
163
|
+
params.host = argv[i];
|
|
164
|
+
} else if (arg == "-t" || arg == "--threads") {
|
|
165
|
+
if (++i >= argc) {
|
|
166
|
+
return false;
|
|
167
|
+
}
|
|
168
|
+
params.n_threads = std::stoi(argv[i]);
|
|
169
|
+
if (params.n_threads <= 0) {
|
|
170
|
+
fprintf(stderr, "error: invalid number of threads: %d\n", params.n_threads);
|
|
171
|
+
return false;
|
|
172
|
+
}
|
|
173
|
+
} else if (arg == "-d" || arg == "--device") {
|
|
174
|
+
if (++i >= argc) {
|
|
175
|
+
return false;
|
|
176
|
+
}
|
|
177
|
+
params.device = argv[i];
|
|
178
|
+
if (ggml_backend_dev_by_name(params.device.c_str()) == nullptr) {
|
|
179
|
+
fprintf(stderr, "error: unknown device: %s\n", params.device.c_str());
|
|
180
|
+
fprintf(stderr, "available devices:\n");
|
|
181
|
+
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
|
182
|
+
auto * dev = ggml_backend_dev_get(i);
|
|
183
|
+
size_t free, total;
|
|
184
|
+
ggml_backend_dev_memory(dev, &free, &total);
|
|
185
|
+
printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024);
|
|
186
|
+
}
|
|
187
|
+
return false;
|
|
188
|
+
}
|
|
189
|
+
} else if (arg == "-p" || arg == "--port") {
|
|
190
|
+
if (++i >= argc) {
|
|
191
|
+
return false;
|
|
192
|
+
}
|
|
193
|
+
params.port = std::stoi(argv[i]);
|
|
194
|
+
if (params.port <= 0 || params.port > 65535) {
|
|
195
|
+
return false;
|
|
196
|
+
}
|
|
197
|
+
} else if (arg == "-c" || arg == "--cache") {
|
|
198
|
+
params.use_cache = true;
|
|
199
|
+
} else if (arg == "-m" || arg == "--mem") {
|
|
200
|
+
if (++i >= argc) {
|
|
201
|
+
return false;
|
|
202
|
+
}
|
|
203
|
+
params.backend_mem = std::stoul(argv[i]) * 1024 * 1024;
|
|
204
|
+
} else if (arg == "-h" || arg == "--help") {
|
|
205
|
+
print_usage(argc, argv, params);
|
|
206
|
+
exit(0);
|
|
207
|
+
} else {
|
|
208
|
+
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
|
209
|
+
print_usage(argc, argv, params);
|
|
210
|
+
exit(0);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
return true;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
static ggml_backend_t create_backend(const rpc_server_params & params) {
|
|
217
|
+
ggml_backend_t backend = nullptr;
|
|
218
|
+
|
|
219
|
+
if (!params.device.empty()) {
|
|
220
|
+
ggml_backend_dev_t dev = ggml_backend_dev_by_name(params.device.c_str());
|
|
221
|
+
if (dev) {
|
|
222
|
+
backend = ggml_backend_dev_init(dev, nullptr);
|
|
223
|
+
if (!backend) {
|
|
224
|
+
fprintf(stderr, "Failed to create backend for device %s\n", params.device.c_str());
|
|
225
|
+
return nullptr;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// try to initialize a GPU backend first
|
|
231
|
+
if (!backend) {
|
|
232
|
+
backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// if there aren't GPU backends fallback to CPU backend
|
|
236
|
+
if (!backend) {
|
|
237
|
+
backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (backend) {
|
|
241
|
+
fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend));
|
|
242
|
+
|
|
243
|
+
// set the number of threads
|
|
244
|
+
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
|
|
245
|
+
ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr;
|
|
246
|
+
if (reg) {
|
|
247
|
+
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
|
248
|
+
if (ggml_backend_set_n_threads_fn) {
|
|
249
|
+
ggml_backend_set_n_threads_fn(backend, params.n_threads);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
return backend;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
static void get_backend_memory(ggml_backend_t backend, size_t * free_mem, size_t * total_mem) {
|
|
258
|
+
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
|
|
259
|
+
GGML_ASSERT(dev != nullptr);
|
|
260
|
+
ggml_backend_dev_memory(dev, free_mem, total_mem);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
int main(int argc, char * argv[]) {
|
|
264
|
+
ggml_backend_load_all();
|
|
265
|
+
|
|
266
|
+
rpc_server_params params;
|
|
267
|
+
if (!rpc_server_params_parse(argc, argv, params)) {
|
|
268
|
+
fprintf(stderr, "Invalid parameters\n");
|
|
269
|
+
return 1;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if (params.host != "127.0.0.1") {
|
|
273
|
+
fprintf(stderr, "\n");
|
|
274
|
+
fprintf(stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
|
|
275
|
+
fprintf(stderr, "WARNING: Host ('%s') is != '127.0.0.1'\n", params.host.c_str());
|
|
276
|
+
fprintf(stderr, " Never expose the RPC server to an open network!\n");
|
|
277
|
+
fprintf(stderr, " This is an experimental feature and is not secure!\n");
|
|
278
|
+
fprintf(stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
|
|
279
|
+
fprintf(stderr, "\n");
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
ggml_backend_t backend = create_backend(params);
|
|
283
|
+
if (!backend) {
|
|
284
|
+
fprintf(stderr, "Failed to create backend\n");
|
|
285
|
+
return 1;
|
|
286
|
+
}
|
|
287
|
+
std::string endpoint = params.host + ":" + std::to_string(params.port);
|
|
288
|
+
size_t free_mem, total_mem;
|
|
289
|
+
if (params.backend_mem > 0) {
|
|
290
|
+
free_mem = params.backend_mem;
|
|
291
|
+
total_mem = params.backend_mem;
|
|
292
|
+
} else {
|
|
293
|
+
get_backend_memory(backend, &free_mem, &total_mem);
|
|
294
|
+
}
|
|
295
|
+
const char * cache_dir = nullptr;
|
|
296
|
+
std::string cache_dir_str;
|
|
297
|
+
if (params.use_cache) {
|
|
298
|
+
cache_dir_str = fs_get_cache_directory() + "rpc/";
|
|
299
|
+
if (!fs_create_directory_with_parents(cache_dir_str)) {
|
|
300
|
+
fprintf(stderr, "Failed to create cache directory: %s\n", cache_dir_str.c_str());
|
|
301
|
+
return 1;
|
|
302
|
+
}
|
|
303
|
+
cache_dir = cache_dir_str.c_str();
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
ggml_backend_reg_t reg = ggml_backend_reg_by_name("RPC");
|
|
307
|
+
if (!reg) {
|
|
308
|
+
fprintf(stderr, "Failed to find RPC backend\n");
|
|
309
|
+
return 1;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
auto start_server_fn = (decltype(ggml_backend_rpc_start_server)*) ggml_backend_reg_get_proc_address(reg, "ggml_backend_rpc_start_server");
|
|
313
|
+
if (!start_server_fn) {
|
|
314
|
+
fprintf(stderr, "Failed to obtain RPC backend start server function\n");
|
|
315
|
+
return 1;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
start_server_fn(backend, endpoint.c_str(), cache_dir, free_mem, total_mem);
|
|
319
|
+
|
|
320
|
+
ggml_backend_free(backend);
|
|
321
|
+
return 0;
|
|
322
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
set(TARGET llama-run)
|
|
2
|
+
add_executable(${TARGET} run.cpp linenoise.cpp/linenoise.cpp)
|
|
3
|
+
|
|
4
|
+
# TODO: avoid copying this code block from common/CMakeLists.txt
|
|
5
|
+
set(LLAMA_RUN_EXTRA_LIBS "")
|
|
6
|
+
if (LLAMA_CURL)
|
|
7
|
+
find_package(CURL REQUIRED)
|
|
8
|
+
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
|
|
9
|
+
include_directories(${CURL_INCLUDE_DIRS})
|
|
10
|
+
find_library(CURL_LIBRARY curl REQUIRED)
|
|
11
|
+
set(LLAMA_RUN_EXTRA_LIBS ${LLAMA_RUN_EXTRA_LIBS} ${CURL_LIBRARY})
|
|
12
|
+
endif ()
|
|
13
|
+
|
|
14
|
+
install(TARGETS ${TARGET} RUNTIME)
|
|
15
|
+
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_RUN_EXTRA_LIBS})
|
|
16
|
+
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
@@ -38,24 +38,6 @@
|
|
|
38
38
|
}
|
|
39
39
|
#endif
|
|
40
40
|
|
|
41
|
-
GGML_ATTRIBUTE_FORMAT(1, 2)
|
|
42
|
-
static std::string fmt(const char * fmt, ...) {
|
|
43
|
-
va_list ap;
|
|
44
|
-
va_list ap2;
|
|
45
|
-
va_start(ap, fmt);
|
|
46
|
-
va_copy(ap2, ap);
|
|
47
|
-
const int size = vsnprintf(NULL, 0, fmt, ap);
|
|
48
|
-
GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
|
|
49
|
-
std::string buf;
|
|
50
|
-
buf.resize(size);
|
|
51
|
-
const int size2 = vsnprintf(const_cast<char *>(buf.data()), buf.size() + 1, fmt, ap2);
|
|
52
|
-
GGML_ASSERT(size2 == size);
|
|
53
|
-
va_end(ap2);
|
|
54
|
-
va_end(ap);
|
|
55
|
-
|
|
56
|
-
return buf;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
41
|
GGML_ATTRIBUTE_FORMAT(1, 2)
|
|
60
42
|
static int printe(const char * fmt, ...) {
|
|
61
43
|
va_list args;
|
|
@@ -285,7 +267,7 @@ class Opt {
|
|
|
285
267
|
"Commands:\n"
|
|
286
268
|
" model\n"
|
|
287
269
|
" Model is a string with an optional prefix of \n"
|
|
288
|
-
" huggingface:// (hf://), ollama://, https:// or file://.\n"
|
|
270
|
+
" huggingface:// (hf://), modelscope:// (ms://), ollama://, https:// or file://.\n"
|
|
289
271
|
" If no protocol is specified and a file exists in the specified\n"
|
|
290
272
|
" path, file:// is assumed, otherwise if a file does not exist in\n"
|
|
291
273
|
" the specified path, ollama:// is assumed. Models that are being\n"
|
|
@@ -300,6 +282,9 @@ class Opt {
|
|
|
300
282
|
" llama-run hf://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf\n"
|
|
301
283
|
" llama-run "
|
|
302
284
|
"huggingface://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf\n"
|
|
285
|
+
" llama-run ms://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf\n"
|
|
286
|
+
" llama-run "
|
|
287
|
+
"modelscope://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf\n"
|
|
303
288
|
" llama-run https://example.com/some-file1.gguf\n"
|
|
304
289
|
" llama-run some-file2.gguf\n"
|
|
305
290
|
" llama-run file://some-file3.gguf\n"
|
|
@@ -525,11 +510,11 @@ class HttpClient {
|
|
|
525
510
|
int secs = static_cast<int>(seconds) % 60;
|
|
526
511
|
|
|
527
512
|
if (hrs > 0) {
|
|
528
|
-
return
|
|
513
|
+
return string_format("%dh %02dm %02ds", hrs, mins, secs);
|
|
529
514
|
} else if (mins > 0) {
|
|
530
|
-
return
|
|
515
|
+
return string_format("%dm %02ds", mins, secs);
|
|
531
516
|
} else {
|
|
532
|
-
return
|
|
517
|
+
return string_format("%ds", secs);
|
|
533
518
|
}
|
|
534
519
|
}
|
|
535
520
|
|
|
@@ -544,7 +529,7 @@ class HttpClient {
|
|
|
544
529
|
}
|
|
545
530
|
}
|
|
546
531
|
|
|
547
|
-
return
|
|
532
|
+
return string_format("%.2f %s", dbl_size, suffix[i]);
|
|
548
533
|
}
|
|
549
534
|
|
|
550
535
|
static int update_progress(void * ptr, curl_off_t total_to_download, curl_off_t now_downloaded, curl_off_t,
|
|
@@ -578,7 +563,9 @@ class HttpClient {
|
|
|
578
563
|
return (now_downloaded_plus_file_size * 100) / total_to_download;
|
|
579
564
|
}
|
|
580
565
|
|
|
581
|
-
static std::string generate_progress_prefix(curl_off_t percentage) {
|
|
566
|
+
static std::string generate_progress_prefix(curl_off_t percentage) {
|
|
567
|
+
return string_format("%3ld%% |", static_cast<long int>(percentage));
|
|
568
|
+
}
|
|
582
569
|
|
|
583
570
|
static double calculate_speed(curl_off_t now_downloaded, const std::chrono::steady_clock::time_point & start_time) {
|
|
584
571
|
const auto now = std::chrono::steady_clock::now();
|
|
@@ -589,9 +576,9 @@ class HttpClient {
|
|
|
589
576
|
static std::string generate_progress_suffix(curl_off_t now_downloaded_plus_file_size, curl_off_t total_to_download,
|
|
590
577
|
double speed, double estimated_time) {
|
|
591
578
|
const int width = 10;
|
|
592
|
-
return
|
|
593
|
-
|
|
594
|
-
|
|
579
|
+
return string_format("%*s/%*s%*s/s%*s", width, human_readable_size(now_downloaded_plus_file_size).c_str(),
|
|
580
|
+
width, human_readable_size(total_to_download).c_str(), width,
|
|
581
|
+
human_readable_size(speed).c_str(), width, human_readable_time(estimated_time).c_str());
|
|
595
582
|
}
|
|
596
583
|
|
|
597
584
|
static int calculate_progress_bar_width(const std::string & progress_prefix, const std::string & progress_suffix) {
|
|
@@ -705,7 +692,7 @@ class LlamaData {
|
|
|
705
692
|
return 0;
|
|
706
693
|
}
|
|
707
694
|
|
|
708
|
-
int
|
|
695
|
+
int dl_from_endpoint(std::string & model_endpoint, std::string & model, const std::string & bn) {
|
|
709
696
|
// Find the second occurrence of '/' after protocol string
|
|
710
697
|
size_t pos = model.find('/');
|
|
711
698
|
pos = model.find('/', pos + 1);
|
|
@@ -714,7 +701,7 @@ class LlamaData {
|
|
|
714
701
|
std::string url;
|
|
715
702
|
|
|
716
703
|
if (pos == std::string::npos) {
|
|
717
|
-
auto [model_name, manifest_url] = extract_model_and_tag(model, "
|
|
704
|
+
auto [model_name, manifest_url] = extract_model_and_tag(model, model_endpoint + "v2/");
|
|
718
705
|
hfr = model_name;
|
|
719
706
|
|
|
720
707
|
nlohmann::json manifest;
|
|
@@ -729,11 +716,21 @@ class LlamaData {
|
|
|
729
716
|
hff = model.substr(pos + 1);
|
|
730
717
|
}
|
|
731
718
|
|
|
732
|
-
url =
|
|
719
|
+
url = model_endpoint + hfr + "/resolve/main/" + hff;
|
|
733
720
|
|
|
734
721
|
return download(url, bn, true, headers);
|
|
735
722
|
}
|
|
736
723
|
|
|
724
|
+
int modelscope_dl(std::string & model, const std::string & bn) {
|
|
725
|
+
std::string model_endpoint = "https://modelscope.cn/models/";
|
|
726
|
+
return dl_from_endpoint(model_endpoint, model, bn);
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
int huggingface_dl(std::string & model, const std::string & bn) {
|
|
730
|
+
std::string model_endpoint = get_model_endpoint();
|
|
731
|
+
return dl_from_endpoint(model_endpoint, model, bn);
|
|
732
|
+
}
|
|
733
|
+
|
|
737
734
|
int ollama_dl(std::string & model, const std::string & bn) {
|
|
738
735
|
const std::vector<std::string> headers = { "Accept: application/vnd.docker.distribution.manifest.v2+json" };
|
|
739
736
|
if (model.find('/') == std::string::npos) {
|
|
@@ -851,6 +848,9 @@ class LlamaData {
|
|
|
851
848
|
rm_until_substring(model_, "hf.co/");
|
|
852
849
|
rm_until_substring(model_, "://");
|
|
853
850
|
ret = huggingface_dl(model_, bn);
|
|
851
|
+
} else if (string_starts_with(model_, "ms://") || string_starts_with(model_, "modelscope://")) {
|
|
852
|
+
rm_until_substring(model_, "://");
|
|
853
|
+
ret = modelscope_dl(model_, bn);
|
|
854
854
|
} else if ((string_starts_with(model_, "https://") || string_starts_with(model_, "http://")) &&
|
|
855
855
|
!string_starts_with(model_, "https://ollama.com/library/")) {
|
|
856
856
|
ret = download(model_, bn, true);
|
|
@@ -34,8 +34,9 @@ endforeach()
|
|
|
34
34
|
add_executable(${TARGET} ${TARGET_SRCS})
|
|
35
35
|
install(TARGETS ${TARGET} RUNTIME)
|
|
36
36
|
|
|
37
|
+
target_include_directories(${TARGET} PRIVATE ../llava)
|
|
37
38
|
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
|
|
38
|
-
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
|
39
|
+
target_link_libraries(${TARGET} PRIVATE common mtmd ${CMAKE_THREAD_LIBS_INIT})
|
|
39
40
|
|
|
40
41
|
if (LLAMA_SERVER_SSL)
|
|
41
42
|
find_package(OpenSSL REQUIRED)
|