bigdl-core-npu 2.5.0__cp310-cp310-win_amd64.whl → 2.6.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. bigdl-core-npu/__init__.py +0 -0
  2. bigdl-core-npu/common.lib +0 -0
  3. bigdl-core-npu/ggml.dll +0 -0
  4. bigdl-core-npu/ggml.lib +0 -0
  5. bigdl-core-npu/include/llamacpp/arg.h +77 -0
  6. bigdl-core-npu/include/llamacpp/common.h +563 -0
  7. bigdl-core-npu/include/llamacpp/ggml-alloc.h +76 -0
  8. bigdl-core-npu/include/llamacpp/ggml-backend.h +241 -0
  9. bigdl-core-npu/include/llamacpp/ggml.h +2679 -0
  10. bigdl-core-npu/include/llamacpp/llama.h +1234 -0
  11. bigdl-core-npu/include/llamacpp/log.h +92 -0
  12. bigdl-core-npu/include/npu/npu_common.h +119 -0
  13. bigdl-core-npu/include/npu/npu_llm.h +77 -0
  14. bigdl-core-npu/llama-cli-npu.exe +0 -0
  15. bigdl-core-npu/llama.dll +0 -0
  16. bigdl-core-npu/llama.lib +0 -0
  17. bigdl-core-npu/llm-cli.exe +0 -0
  18. bigdl-core-npu/npu_llm.dll +0 -0
  19. bigdl-core-npu/npu_llm.lib +0 -0
  20. bigdl-core-npu/zlib1.dll +0 -0
  21. bigdl_core_npu-2.6.0.data/scripts/init-llama-cpp.bat +29 -0
  22. {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/METADATA +12 -3
  23. {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/RECORD +146 -96
  24. {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/WHEEL +1 -1
  25. {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/top_level.txt +1 -0
  26. intel_npu_acceleration_library/_version.py +1 -1
  27. intel_npu_acceleration_library/backend/base.py +39 -4
  28. intel_npu_acceleration_library/backend/bindings.py +109 -5
  29. intel_npu_acceleration_library/backend/factory.py +264 -47
  30. intel_npu_acceleration_library/backend/ops.py +2 -1
  31. intel_npu_acceleration_library/backend/qlinear.py +8 -4
  32. intel_npu_acceleration_library/backend/runtime.py +7 -2
  33. intel_npu_acceleration_library/backend/tensor.py +73 -3
  34. intel_npu_acceleration_library/bigdl-core-npu/cache.json +113732 -0
  35. intel_npu_acceleration_library/bigdl-core-npu/openvino.dll +0 -0
  36. intel_npu_acceleration_library/bigdl-core-npu/openvino_auto_batch_plugin.dll +0 -0
  37. intel_npu_acceleration_library/bigdl-core-npu/openvino_auto_plugin.dll +0 -0
  38. intel_npu_acceleration_library/bigdl-core-npu/openvino_c.dll +0 -0
  39. intel_npu_acceleration_library/bigdl-core-npu/openvino_hetero_plugin.dll +0 -0
  40. intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_cpu_plugin.dll +0 -0
  41. intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_gpu_plugin.dll +0 -0
  42. intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_npu_plugin.dll +0 -0
  43. intel_npu_acceleration_library/bigdl-core-npu/openvino_ir_frontend.dll +0 -0
  44. intel_npu_acceleration_library/bigdl-core-npu/openvino_onnx_frontend.dll +0 -0
  45. intel_npu_acceleration_library/bigdl-core-npu/openvino_paddle_frontend.dll +0 -0
  46. intel_npu_acceleration_library/bigdl-core-npu/openvino_pytorch_frontend.dll +0 -0
  47. intel_npu_acceleration_library/bigdl-core-npu/openvino_tensorflow_frontend.dll +0 -0
  48. intel_npu_acceleration_library/bigdl-core-npu/openvino_tensorflow_lite_frontend.dll +0 -0
  49. intel_npu_acceleration_library/bigdl-core-npu/tbb12.dll +0 -0
  50. intel_npu_acceleration_library/bigdl-core-npu/tbb12_debug.dll +0 -0
  51. intel_npu_acceleration_library/bigdl-core-npu/tbbbind_2_5.dll +0 -0
  52. intel_npu_acceleration_library/bigdl-core-npu/tbbbind_2_5_debug.dll +0 -0
  53. intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc.dll +0 -0
  54. intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_debug.dll +0 -0
  55. intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_proxy.dll +0 -0
  56. intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_proxy_debug.dll +0 -0
  57. intel_npu_acceleration_library/device.py +2 -2
  58. intel_npu_acceleration_library/dtypes.py +34 -1
  59. intel_npu_acceleration_library/external/openvino/__init__.py +1 -0
  60. intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +1 -0
  61. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
  62. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
  63. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
  64. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
  65. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
  66. intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
  67. intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
  68. intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +293 -0
  69. intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
  70. intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +182 -0
  71. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
  72. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
  73. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
  74. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
  75. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
  76. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
  77. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
  78. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
  79. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
  80. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
  81. intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +37 -19
  82. intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +47 -6
  83. intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +28 -8
  84. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
  85. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
  86. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
  87. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
  88. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
  89. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +17 -5
  90. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +1 -0
  91. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +55 -47
  92. intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +95 -63
  93. intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +12 -10
  94. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
  95. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
  96. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
  97. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
  98. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
  99. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +31 -10
  100. intel_npu_acceleration_library/external/openvino/helpers/packing.py +4 -4
  101. intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +2 -0
  102. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +1 -0
  103. intel_npu_acceleration_library/external/openvino/properties/__init__.py +1 -0
  104. intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +1 -1
  105. intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +1 -0
  106. intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +2 -1
  107. intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +5 -6
  108. intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +7 -0
  109. intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +193 -2
  110. intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +69 -43
  111. intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +4 -0
  112. intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +2 -0
  113. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +21 -3
  114. intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +88 -2
  115. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +9 -9
  116. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +16 -2
  117. intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +5 -0
  118. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
  119. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +68 -16
  120. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +69 -60
  121. intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +90 -3
  122. intel_npu_acceleration_library/external/openvino/utils.py +17 -0
  123. intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
  124. intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
  125. intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
  126. intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
  127. intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
  128. intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
  129. intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
  130. intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
  131. intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
  132. intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
  133. intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
  134. intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
  135. intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
  136. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
  137. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
  138. intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
  139. intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
  140. intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
  141. intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
  142. intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
  143. intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
  144. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
  145. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
  146. intel_npu_acceleration_library/nn/module.py +17 -17
@@ -0,0 +1,76 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+
5
+ #ifdef __cplusplus
6
+ extern "C" {
7
+ #endif
8
+
9
+ typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
10
+ typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
11
+ typedef struct ggml_backend * ggml_backend_t;
12
+
13
+ // Tensor allocator
14
+ struct ggml_tallocr {
15
+ ggml_backend_buffer_t buffer;
16
+ void * base;
17
+ size_t alignment;
18
+ size_t offset;
19
+ };
20
+
21
+ GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
22
+ GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
23
+
24
+ // Graph allocator
25
+ /*
26
+ Example usage:
27
+ ggml_gallocr_t galloc = ggml_gallocr_new(ggml_bacckend_cpu_buffer_type());
28
+
29
+ // optional: create a worst-case graph and reserve the buffers to avoid reallocations
30
+ ggml_gallocr_reserve(galloc, build_graph(max_batch));
31
+
32
+ // allocate the graph
33
+ struct ggml_cgraph * graph = build_graph(batch);
34
+ ggml_gallocr_alloc_graph(galloc, graph);
35
+
36
+ printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0));
37
+
38
+ // evaluate the graph
39
+ ggml_backend_graph_compute(backend, graph);
40
+ */
41
+
42
+ // special tensor flags for use with the graph allocator:
43
+ // ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
44
+ // ggml_set_output(): output tensors are never freed and never overwritten
45
+
46
+ typedef struct ggml_gallocr * ggml_gallocr_t;
47
+
48
+ GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft);
49
+ GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs);
50
+ GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
51
+
52
+ // pre-allocate buffers from a measure graph - does not allocate or modify the graph
53
+ // call with a worst-case graph to avoid buffer reallocations
54
+ // not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
55
+ // returns false if the buffer allocation failed
56
+ GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
57
+ GGML_API bool ggml_gallocr_reserve_n(
58
+ ggml_gallocr_t galloc,
59
+ struct ggml_cgraph * graph,
60
+ const int * node_buffer_ids,
61
+ const int * leaf_buffer_ids);
62
+
63
+ // automatic reallocation if the topology changes when using a single buffer
64
+ // returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
65
+ GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
66
+
67
+ GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
68
+
69
+ // Utils
70
+ // Create a buffer and allocate all the tensors in a ggml_context
71
+ GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
72
+ GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
73
+
74
+ #ifdef __cplusplus
75
+ }
76
+ #endif
@@ -0,0 +1,241 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-alloc.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
11
+ typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
12
+ typedef struct ggml_backend_event * ggml_backend_event_t;
13
+ typedef struct ggml_backend * ggml_backend_t;
14
+ typedef void * ggml_backend_graph_plan_t;
15
+
16
+ //
17
+ // Backend buffer
18
+ //
19
+
20
+ // buffer type
21
+ GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
22
+ GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
23
+ GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
24
+ GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
25
+ GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
26
+ GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
27
+
28
+ // buffer
29
+ enum ggml_backend_buffer_usage {
30
+ GGML_BACKEND_BUFFER_USAGE_ANY = 0,
31
+ GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
32
+ GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2,
33
+ };
34
+
35
+ GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
36
+ GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
37
+ GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
38
+ GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
39
+ GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
40
+ GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
41
+ GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
42
+ GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
43
+ GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
44
+ GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
45
+ GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
46
+ GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer);
47
+ GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
48
+ GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
49
+
50
+ //
51
+ // Backend
52
+ //
53
+
54
+ GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
55
+ GGML_API const char * ggml_backend_name(ggml_backend_t backend);
56
+ GGML_API void ggml_backend_free(ggml_backend_t backend);
57
+
58
+ GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
59
+ GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
60
+ GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
61
+ GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
62
+
63
+ GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
64
+ GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
65
+
66
+ // "offset" refers to the offset of the tensor data for setting/getting data
67
+ GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
68
+ GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
69
+ GGML_API GGML_CALL void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
70
+
71
+ GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
72
+
73
+ GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
74
+ GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
75
+
76
+ GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
77
+ GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
78
+ GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
79
+ GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
80
+ GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
81
+ GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
82
+
83
+ // tensor copy between different backends
84
+ GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
85
+
86
+ // asynchronous copy
87
+ // the copy is performed after all the currently queued operations in backend_src
88
+ // backend_dst will wait for the copy to complete before performing other operations
89
+ // automatic fallback to sync copy if async is not supported
90
+ GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
91
+
92
+ // events
93
+ GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
94
+ GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
95
+ GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
96
+ GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
97
+ GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event);
98
+
99
+ //
100
+ // CPU backend
101
+ //
102
+
103
+ GGML_API ggml_backend_t ggml_backend_cpu_init(void);
104
+
105
+ GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
106
+ GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
107
+ GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
108
+ GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
109
+
110
+ // Create a backend buffer from an existing pointer
111
+ GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
112
+
113
+ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
114
+
115
+ #ifdef GGML_USE_CPU_HBM
116
+ GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
117
+ #endif
118
+
119
+ //
120
+ // Backend registry
121
+ //
122
+
123
+ // The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
124
+
125
+ GGML_API size_t ggml_backend_reg_get_count(void);
126
+ GGML_API size_t ggml_backend_reg_find_by_name(const char * name); // returns index of backend with name, or SIZE_MAX if not found
127
+ GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is backend_name:params (params is optional)
128
+ GGML_API const char * ggml_backend_reg_get_name(size_t i);
129
+ GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
130
+ GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
131
+ GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
132
+
133
+ //
134
+ // Backend scheduler
135
+ //
136
+
137
+ // The backend scheduler allows for multiple backends to be used together
138
+ // Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
139
+ // The backends are selected based on:
140
+ // - the backend that supports the operation
141
+ // - the location of the pre-allocated tensors (e.g. the weights)
142
+ /*
143
+ Example usage:
144
+
145
+ // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
146
+ // preferrably to run on the same backend as the buffer
147
+ ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
148
+
149
+ sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
150
+
151
+ // initialize buffers from a max size graph (optional)
152
+ reserve_graph = build_graph(sched, max_batch_size);
153
+
154
+ // manually assign nodes to a backend (optional, should not be needed in most cases)
155
+ struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
156
+ ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
157
+
158
+ ggml_backend_sched_reserve(sched, reserve_graph);
159
+
160
+ // compute
161
+ graph = build_graph(sched);
162
+ ggml_backend_sched_graph_compute(sched, graph);
163
+
164
+ // if there are graph inputs:
165
+ ggml_backend_sched_reset(sched);
166
+ ggml_backend_sched_alloc_graph(sched, graph);
167
+ ggml_backend_tensor_set(input_tensor, ...);
168
+ ggml_backend_sched_graph_compute(sched, graph);
169
+ }
170
+ */
171
+
172
+ struct ggml_backend_sched;
173
+ typedef struct ggml_backend_sched * ggml_backend_sched_t;
174
+
175
+ // when ask == true, the scheduler wants to know if the user wants to observe this node
176
+ // this allows the scheduler to batch nodes together in order to evaluate them in a single call
177
+ //
178
+ // when ask == false, the scheduler is passing the node tensor to the user for observation
179
+ // if the user returns false, the scheduler will cancel the graph compute
180
+ //
181
+ typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
182
+
183
+ // Initialize a backend scheduler
184
+ GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
185
+ GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
186
+
187
+ // Initialize backend buffers from a measure graph
188
+ GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
189
+
190
+ GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
191
+ GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
192
+
193
+ // Get the number of splits of the last graph
194
+ GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
195
+ GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
196
+
197
+ GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
198
+
199
+ GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
200
+ GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
201
+
202
+ // Allocate and compute graph on the backend scheduler
203
+ GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
204
+ GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
205
+ GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
206
+ GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
207
+
208
+ // Reset all assignments and allocators - must be called before changing the node backends
209
+ GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
210
+
211
+ // Set a callback to be called for each resulting node during graph compute
212
+ GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
213
+
214
+ //
215
+ // Utils
216
+ //
217
+
218
+ struct ggml_backend_graph_copy {
219
+ ggml_backend_buffer_t buffer;
220
+ struct ggml_context * ctx_allocated;
221
+ struct ggml_context * ctx_unallocated;
222
+ struct ggml_cgraph * graph;
223
+ };
224
+
225
+ // Copy a graph to a different backend
226
+ GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
227
+ GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
228
+
229
+ typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
230
+
231
+ // Compare the output of two backends
232
+ GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
233
+
234
+ // Tensor initialization
235
+ GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
236
+ GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
237
+
238
+
239
+ #ifdef __cplusplus
240
+ }
241
+ #endif