@fugood/llama.node 0.3.16 → 0.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +5 -0
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +8 -0
- package/src/LlamaCompletionWorker.h +1 -0
- package/src/LlamaContext.cpp +3 -2
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +124 -0
- package/src/llama.cpp/.github/workflows/build.yml +70 -27
- package/src/llama.cpp/.github/workflows/docker.yml +6 -6
- package/src/llama.cpp/.github/workflows/server.yml +7 -11
- package/src/llama.cpp/CMakeLists.txt +23 -1
- package/src/llama.cpp/common/CMakeLists.txt +6 -3
- package/src/llama.cpp/common/arg.cpp +809 -105
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +1 -1
- package/src/llama.cpp/common/common.cpp +31 -521
- package/src/llama.cpp/common/common.h +17 -36
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +30 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +15 -7
- package/src/llama.cpp/common/minja/minja.hpp +119 -93
- package/src/llama.cpp/common/sampling.cpp +3 -0
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +0 -9
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +7 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +210 -8
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +39 -24
- package/src/llama.cpp/examples/llava/clip-impl.h +345 -0
- package/src/llama.cpp/examples/llava/clip.cpp +2152 -1803
- package/src/llama.cpp/examples/llava/clip.h +39 -22
- package/src/llama.cpp/examples/llava/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/examples/llava/llava.cpp +64 -52
- package/src/llama.cpp/examples/llava/mtmd-cli.cpp +344 -0
- package/src/llama.cpp/examples/llava/mtmd.cpp +708 -0
- package/src/llama.cpp/examples/llava/mtmd.h +168 -0
- package/src/llama.cpp/examples/llava/{qwen2vl-cli.cpp → qwen2vl-test.cpp} +83 -31
- package/src/llama.cpp/examples/main/main.cpp +16 -5
- package/src/llama.cpp/examples/parallel/parallel.cpp +3 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +17 -3
- package/src/llama.cpp/examples/quantize/quantize.cpp +115 -2
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +4 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +163 -8
- package/src/llama.cpp/examples/run/CMakeLists.txt +12 -1
- package/src/llama.cpp/examples/run/run.cpp +14 -28
- package/src/llama.cpp/examples/server/httplib.h +313 -247
- package/src/llama.cpp/examples/server/server.cpp +238 -139
- package/src/llama.cpp/examples/server/utils.hpp +51 -2
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +6 -9
- package/src/llama.cpp/ggml/CMakeLists.txt +8 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +66 -99
- package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +48 -22
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -192
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +754 -404
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1003 -13519
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +2 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +3 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +533 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8809 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +258 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +70 -3
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -260
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +293 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +96 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +350 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +2 -292
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +967 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +204 -280
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +23 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +646 -114
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +17 -8
- package/src/llama.cpp/ggml/src/ggml.c +141 -245
- package/src/llama.cpp/ggml/src/gguf.cpp +1 -0
- package/src/llama.cpp/include/llama.h +30 -11
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +2 -0
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/src/CMakeLists.txt +3 -2
- package/src/llama.cpp/src/llama-adapter.cpp +37 -1
- package/src/llama.cpp/src/llama-arch.cpp +160 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-chat.cpp +82 -17
- package/src/llama.cpp/src/llama-chat.h +6 -2
- package/src/llama.cpp/src/llama-context.cpp +108 -92
- package/src/llama.cpp/src/llama-context.h +1 -2
- package/src/llama.cpp/src/llama-graph.cpp +189 -119
- package/src/llama.cpp/src/llama-graph.h +26 -6
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +70 -123
- package/src/llama.cpp/src/llama-kv-cache.h +41 -115
- package/src/llama.cpp/src/llama-memory.h +1 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +10 -5
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model.cpp +1760 -534
- package/src/llama.cpp/src/llama-model.h +13 -1
- package/src/llama.cpp/src/llama-quant.cpp +29 -8
- package/src/llama.cpp/src/llama-sampling.cpp +7 -1
- package/src/llama.cpp/src/llama-vocab.cpp +44 -6
- package/src/llama.cpp/src/llama.cpp +1 -1
- package/src/llama.cpp/tests/CMakeLists.txt +43 -30
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +82 -43
- package/src/llama.cpp/tests/test-chat-template.cpp +34 -13
- package/src/llama.cpp/tests/test-chat.cpp +12 -2
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
|
@@ -31,9 +31,16 @@
|
|
|
31
31
|
#include <memory>
|
|
32
32
|
#include <string>
|
|
33
33
|
#include <vector>
|
|
34
|
+
#include <atomic>
|
|
35
|
+
#include <condition_variable>
|
|
36
|
+
#include <mutex>
|
|
37
|
+
#include <thread>
|
|
38
|
+
#include <unistd.h>
|
|
39
|
+
#include <functional>
|
|
34
40
|
|
|
35
41
|
#include "../include/ggml-cann.h"
|
|
36
42
|
#include "../include/ggml.h"
|
|
43
|
+
#include "../ggml-impl.h"
|
|
37
44
|
|
|
38
45
|
#define MATRIX_ROW_PADDING 512
|
|
39
46
|
#define GGML_CANN_MAX_STREAMS 8
|
|
@@ -205,6 +212,127 @@ struct ggml_cann_pool_alloc {
|
|
|
205
212
|
ggml_cann_pool_alloc& operator=(ggml_cann_pool_alloc&&) = delete;
|
|
206
213
|
};
|
|
207
214
|
|
|
215
|
+
/**
|
|
216
|
+
* @brief Function pointer type for ACLNN operator calls.
|
|
217
|
+
*/
|
|
218
|
+
using aclnn_func_t = aclnnStatus (*)(void*, uint64_t, aclOpExecutor*, aclrtStream);
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* @brief Base class for all CANN tasks to be submitted to the task queue.
|
|
222
|
+
*
|
|
223
|
+
* Users should override the run_task() method with actual task logic.
|
|
224
|
+
*/
|
|
225
|
+
class cann_task {
|
|
226
|
+
public:
|
|
227
|
+
virtual void run_task() {}
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* @brief A lock-free ring-buffer based task queue for asynchronously executing cann_task instances.
|
|
232
|
+
*/
|
|
233
|
+
class cann_task_queue {
|
|
234
|
+
public:
|
|
235
|
+
/**
|
|
236
|
+
* @brief Constructs a task queue with a fixed power-of-two capacity for a specific device.
|
|
237
|
+
*
|
|
238
|
+
* @param capacity Queue capacity. Must be a power of 2.
|
|
239
|
+
* @param device Target device ID (used for context setting).
|
|
240
|
+
*/
|
|
241
|
+
explicit cann_task_queue(size_t capacity, int32_t device)
|
|
242
|
+
: buffer_(capacity), capacity_(capacity), head_(0), tail_(0),
|
|
243
|
+
running_(false), device_(device) {
|
|
244
|
+
GGML_ASSERT((capacity & (capacity - 1)) == 0 && "capacity must be power of 2");
|
|
245
|
+
mask_ = capacity_ - 1;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* @brief Attempts to enqueue a task into the queue.
|
|
250
|
+
*
|
|
251
|
+
* @param item Unique pointer to the task.
|
|
252
|
+
* @return true if the task was successfully enqueued, false if the queue was full.
|
|
253
|
+
*/
|
|
254
|
+
bool enqueue(std::unique_ptr<cann_task>&& item) {
|
|
255
|
+
size_t next_tail = (tail_ + 1) & mask_;
|
|
256
|
+
|
|
257
|
+
if (next_tail == head_) {
|
|
258
|
+
return false;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
buffer_[tail_] = std::move(item);
|
|
262
|
+
std::atomic_thread_fence(std::memory_order_release);
|
|
263
|
+
tail_ = next_tail;
|
|
264
|
+
|
|
265
|
+
return true;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* @brief Submits a task to the queue, and starts the worker thread if not already running.
|
|
270
|
+
*
|
|
271
|
+
* @param task Task to be submitted.
|
|
272
|
+
*/
|
|
273
|
+
void submit_task(std::unique_ptr<cann_task>&& task) {
|
|
274
|
+
while(!enqueue(std::move(task))) {
|
|
275
|
+
std::this_thread::yield();
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
if (!running_) {
|
|
280
|
+
running_ = true;
|
|
281
|
+
thread_ = std::thread(&cann_task_queue::execute, this);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* @brief Waits until the queue is completely empty and no tasks are being processed.
|
|
288
|
+
*/
|
|
289
|
+
void wait() {
|
|
290
|
+
while (running_ && head_ != tail_) {
|
|
291
|
+
std::this_thread::yield();
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* @brief Stops the task queue and joins the worker thread.
|
|
298
|
+
*/
|
|
299
|
+
void stop() {
|
|
300
|
+
running_ = false;
|
|
301
|
+
if (thread_.joinable()) {
|
|
302
|
+
thread_.join();
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
private:
|
|
307
|
+
/**
|
|
308
|
+
* @brief Worker thread function that continuously dequeues and executes tasks.
|
|
309
|
+
*/
|
|
310
|
+
void execute() {
|
|
311
|
+
ggml_cann_set_device(device_);
|
|
312
|
+
|
|
313
|
+
while (running_) {
|
|
314
|
+
if(head_ == tail_) {
|
|
315
|
+
std::this_thread::yield();
|
|
316
|
+
continue;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
std::atomic_thread_fence(std::memory_order_acquire);
|
|
320
|
+
buffer_[head_]->run_task();
|
|
321
|
+
buffer_[head_].reset();
|
|
322
|
+
head_ = (head_ + 1) & mask_;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
std::vector<std::unique_ptr<cann_task>> buffer_;
|
|
327
|
+
const size_t capacity_;
|
|
328
|
+
size_t mask_;
|
|
329
|
+
size_t head_;
|
|
330
|
+
size_t tail_;
|
|
331
|
+
bool running_;
|
|
332
|
+
std::thread thread_;
|
|
333
|
+
int32_t device_;
|
|
334
|
+
};
|
|
335
|
+
|
|
208
336
|
/**
|
|
209
337
|
* @brief Context for managing CANN backend operations.
|
|
210
338
|
*/
|
|
@@ -213,6 +341,8 @@ struct ggml_backend_cann_context {
|
|
|
213
341
|
std::string name; /**< Name of the device. */
|
|
214
342
|
std::string description; /**< Description of the device. */
|
|
215
343
|
aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */
|
|
344
|
+
cann_task_queue task_queue;
|
|
345
|
+
bool async_mode;
|
|
216
346
|
|
|
217
347
|
aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */
|
|
218
348
|
|
|
@@ -221,9 +351,12 @@ struct ggml_backend_cann_context {
|
|
|
221
351
|
* @param device Device ID.
|
|
222
352
|
*/
|
|
223
353
|
explicit ggml_backend_cann_context(int device)
|
|
224
|
-
: device(device), name("CANN" + std::to_string(device)) {
|
|
354
|
+
: device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) {
|
|
225
355
|
ggml_cann_set_device(device);
|
|
226
356
|
description = aclrtGetSocName();
|
|
357
|
+
async_mode = (getenv("GGML_CANN_ASYNC_MODE") != nullptr);
|
|
358
|
+
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
|
|
359
|
+
device, async_mode ? "ON" : "OFF");
|
|
227
360
|
}
|
|
228
361
|
|
|
229
362
|
/**
|
|
@@ -231,6 +364,7 @@ struct ggml_backend_cann_context {
|
|
|
231
364
|
*/
|
|
232
365
|
~ggml_backend_cann_context() {
|
|
233
366
|
ggml_cann_set_device(device);
|
|
367
|
+
task_queue.stop();
|
|
234
368
|
if (copy_event != nullptr) {
|
|
235
369
|
ACL_CHECK(aclrtDestroyEvent(copy_event));
|
|
236
370
|
}
|