bigdl-core-npu 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. bigdl-core-npu/__init__.py +0 -0
  2. bigdl-core-npu/common.lib +0 -0
  3. bigdl-core-npu/ggml.dll +0 -0
  4. bigdl-core-npu/ggml.lib +0 -0
  5. bigdl-core-npu/include/llamacpp/arg.h +77 -0
  6. bigdl-core-npu/include/llamacpp/common.h +563 -0
  7. bigdl-core-npu/include/llamacpp/ggml-alloc.h +76 -0
  8. bigdl-core-npu/include/llamacpp/ggml-backend.h +241 -0
  9. bigdl-core-npu/include/llamacpp/ggml.h +2679 -0
  10. bigdl-core-npu/include/llamacpp/llama.h +1234 -0
  11. bigdl-core-npu/include/llamacpp/log.h +92 -0
  12. bigdl-core-npu/include/npu/npu_common.h +119 -0
  13. bigdl-core-npu/include/npu/npu_llm.h +77 -0
  14. bigdl-core-npu/llama-cli-npu.exe +0 -0
  15. bigdl-core-npu/llama.dll +0 -0
  16. bigdl-core-npu/llama.lib +0 -0
  17. bigdl-core-npu/llm-cli.exe +0 -0
  18. bigdl-core-npu/npu_llm.dll +0 -0
  19. bigdl-core-npu/npu_llm.lib +0 -0
  20. bigdl-core-npu/zlib1.dll +0 -0
  21. bigdl_core_npu-2.6.0.data/scripts/init-llama-cpp.bat +29 -0
  22. {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/METADATA +12 -3
  23. {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/RECORD +146 -96
  24. {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/WHEEL +1 -1
  25. {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/top_level.txt +1 -0
  26. intel_npu_acceleration_library/_version.py +1 -1
  27. intel_npu_acceleration_library/backend/base.py +39 -4
  28. intel_npu_acceleration_library/backend/bindings.py +109 -5
  29. intel_npu_acceleration_library/backend/factory.py +264 -47
  30. intel_npu_acceleration_library/backend/ops.py +2 -1
  31. intel_npu_acceleration_library/backend/qlinear.py +8 -4
  32. intel_npu_acceleration_library/backend/runtime.py +7 -2
  33. intel_npu_acceleration_library/backend/tensor.py +73 -3
  34. intel_npu_acceleration_library/bigdl-core-npu/cache.json +113732 -0
  35. intel_npu_acceleration_library/bigdl-core-npu/openvino.dll +0 -0
  36. intel_npu_acceleration_library/bigdl-core-npu/openvino_auto_batch_plugin.dll +0 -0
  37. intel_npu_acceleration_library/bigdl-core-npu/openvino_auto_plugin.dll +0 -0
  38. intel_npu_acceleration_library/bigdl-core-npu/openvino_c.dll +0 -0
  39. intel_npu_acceleration_library/bigdl-core-npu/openvino_hetero_plugin.dll +0 -0
  40. intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_cpu_plugin.dll +0 -0
  41. intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_gpu_plugin.dll +0 -0
  42. intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_npu_plugin.dll +0 -0
  43. intel_npu_acceleration_library/bigdl-core-npu/openvino_ir_frontend.dll +0 -0
  44. intel_npu_acceleration_library/bigdl-core-npu/openvino_onnx_frontend.dll +0 -0
  45. intel_npu_acceleration_library/bigdl-core-npu/openvino_paddle_frontend.dll +0 -0
  46. intel_npu_acceleration_library/bigdl-core-npu/openvino_pytorch_frontend.dll +0 -0
  47. intel_npu_acceleration_library/bigdl-core-npu/openvino_tensorflow_frontend.dll +0 -0
  48. intel_npu_acceleration_library/bigdl-core-npu/openvino_tensorflow_lite_frontend.dll +0 -0
  49. intel_npu_acceleration_library/bigdl-core-npu/tbb12.dll +0 -0
  50. intel_npu_acceleration_library/bigdl-core-npu/tbb12_debug.dll +0 -0
  51. intel_npu_acceleration_library/bigdl-core-npu/tbbbind_2_5.dll +0 -0
  52. intel_npu_acceleration_library/bigdl-core-npu/tbbbind_2_5_debug.dll +0 -0
  53. intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc.dll +0 -0
  54. intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_debug.dll +0 -0
  55. intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_proxy.dll +0 -0
  56. intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_proxy_debug.dll +0 -0
  57. intel_npu_acceleration_library/device.py +2 -2
  58. intel_npu_acceleration_library/dtypes.py +34 -1
  59. intel_npu_acceleration_library/external/openvino/__init__.py +1 -0
  60. intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +1 -0
  61. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
  62. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
  63. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
  64. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
  65. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
  66. intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
  67. intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
  68. intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +293 -0
  69. intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
  70. intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +182 -0
  71. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
  72. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
  73. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
  74. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
  75. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
  76. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
  77. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
  78. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
  79. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
  80. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
  81. intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +37 -19
  82. intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +47 -6
  83. intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +28 -8
  84. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
  85. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
  86. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
  87. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
  88. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
  89. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +17 -5
  90. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +1 -0
  91. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +55 -47
  92. intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +95 -63
  93. intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +12 -10
  94. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
  95. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
  96. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
  97. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
  98. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
  99. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +31 -10
  100. intel_npu_acceleration_library/external/openvino/helpers/packing.py +4 -4
  101. intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +2 -0
  102. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +1 -0
  103. intel_npu_acceleration_library/external/openvino/properties/__init__.py +1 -0
  104. intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +1 -1
  105. intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +1 -0
  106. intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +2 -1
  107. intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +5 -6
  108. intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +7 -0
  109. intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +193 -2
  110. intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +69 -43
  111. intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +4 -0
  112. intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +2 -0
  113. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +21 -3
  114. intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +88 -2
  115. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +9 -9
  116. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +16 -2
  117. intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +5 -0
  118. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
  119. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +68 -16
  120. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +69 -60
  121. intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +90 -3
  122. intel_npu_acceleration_library/external/openvino/utils.py +17 -0
  123. intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
  124. intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
  125. intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
  126. intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
  127. intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
  128. intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
  129. intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
  130. intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
  131. intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
  132. intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
  133. intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
  134. intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
  135. intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
  136. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
  137. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
  138. intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
  139. intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
  140. intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
  141. intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
  142. intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
  143. intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
  144. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
  145. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
  146. intel_npu_acceleration_library/nn/module.py +17 -17
@@ -0,0 +1,92 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h" // for ggml_log_level
4
+
5
+ #ifndef __GNUC__
6
+ # define LOG_ATTRIBUTE_FORMAT(...)
7
+ #elif defined(__MINGW32__)
8
+ # define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
9
+ #else
10
+ # define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
11
+ #endif
12
+
13
+ #define LOG_DEFAULT_DEBUG 1
14
+ #define LOG_DEFAULT_LLAMA 0
15
+
16
+ // needed by the LOG_TMPL macro to avoid computing log arguments if the verbosity lower
17
+ // set via gpt_log_set_verbosity()
18
+ extern int gpt_log_verbosity_thold;
19
+
20
+ void gpt_log_set_verbosity_thold(int verbosity); // not thread-safe
21
+
22
+ // the gpt_log uses an internal worker thread to print/write log messages
23
+ // when the worker thread is paused, incoming log messages are discarded
24
+ struct gpt_log;
25
+
26
+ struct gpt_log * gpt_log_init();
27
+ struct gpt_log * gpt_log_main(); // singleton, automatically destroys itself on exit
28
+ void gpt_log_pause (struct gpt_log * log); // pause the worker thread, not thread-safe
29
+ void gpt_log_resume(struct gpt_log * log); // resume the worker thread, not thread-safe
30
+ void gpt_log_free (struct gpt_log * log);
31
+
32
+ LOG_ATTRIBUTE_FORMAT(3, 4)
33
+ void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, const char * fmt, ...);
34
+
35
+ // defaults: file = NULL, colors = false, prefix = false, timestamps = false
36
+ //
37
+ // regular log output:
38
+ //
39
+ // ggml_backend_metal_log_allocated_size: allocated buffer, size = 6695.84 MiB, ( 6695.91 / 21845.34)
40
+ // llm_load_tensors: ggml ctx size = 0.27 MiB
41
+ // llm_load_tensors: offloading 32 repeating layers to GPU
42
+ // llm_load_tensors: offloading non-repeating layers to GPU
43
+ //
44
+ // with prefix = true, timestamps = true, the log output will look like this:
45
+ //
46
+ // 0.00.035.060 D ggml_backend_metal_log_allocated_size: allocated buffer, size = 6695.84 MiB, ( 6695.91 / 21845.34)
47
+ // 0.00.035.064 I llm_load_tensors: ggml ctx size = 0.27 MiB
48
+ // 0.00.090.578 I llm_load_tensors: offloading 32 repeating layers to GPU
49
+ // 0.00.090.579 I llm_load_tensors: offloading non-repeating layers to GPU
50
+ //
51
+ // I - info (stdout, V = 0)
52
+ // W - warning (stderr, V = 0)
53
+ // E - error (stderr, V = 0)
54
+ // D - debug (stderr, V = LOG_DEFAULT_DEBUG)
55
+ //
56
+
57
+ void gpt_log_set_file (struct gpt_log * log, const char * file); // not thread-safe
58
+ void gpt_log_set_colors (struct gpt_log * log, bool colors); // not thread-safe
59
+ void gpt_log_set_prefix (struct gpt_log * log, bool prefix); // whether to output prefix to each log
60
+ void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps); // whether to output timestamps in the prefix
61
+
62
+ // helper macros for logging
63
+ // use these to avoid computing log arguments if the verbosity of the log is higher than the threshold
64
+ //
65
+ // for example:
66
+ //
67
+ // LOG_DBG("this is a debug message: %d\n", expensive_function());
68
+ //
69
+ // this will avoid calling expensive_function() if LOG_DEFAULT_DEBUG > gpt_log_verbosity_thold
70
+ //
71
+
72
+ #define LOG_TMPL(level, verbosity, ...) \
73
+ do { \
74
+ if ((verbosity) <= gpt_log_verbosity_thold) { \
75
+ gpt_log_add(gpt_log_main(), (level), __VA_ARGS__); \
76
+ } \
77
+ } while (0)
78
+
79
+ #define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, 0, __VA_ARGS__)
80
+ #define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
81
+
82
+ #define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, 0, __VA_ARGS__)
83
+ #define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, 0, __VA_ARGS__)
84
+ #define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__)
85
+ #define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__)
86
+ #define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, 0, __VA_ARGS__)
87
+
88
+ #define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__)
89
+ #define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__)
90
+ #define LOG_ERRV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, verbosity, __VA_ARGS__)
91
+ #define LOG_DBGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, verbosity, __VA_ARGS__)
92
+ #define LOG_CNTV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_CONT, verbosity, __VA_ARGS__)
@@ -0,0 +1,119 @@
1
+ //
2
+ // Copyright 2016 The BigDL Authors.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ //
16
+
17
+ #pragma once
18
+
19
+ #include <string>
20
+ #include <vector>
21
+ #include <sstream>
22
+ #include <chrono>
23
+
24
+ #ifdef __linux__
25
+ #define EXPORT_API extern "C"
26
+ #else
27
+ #define EXPORT_API extern "C" __declspec(dllexport)
28
+ #endif
29
+
30
+
31
+ struct common_params {
32
+ int32_t n_predict = -1; // new tokens to predict
33
+ char* model = ""; // model path // NOLINT
34
+ std::string prompt = ""; // NOLINT
35
+ std::string prompt_file = ""; // store the external prompt file name // NOLINT
36
+
37
+ std::string cache_type_k = "f16"; // KV cache data type for the K
38
+ std::string cache_type_v = "f16"; // KV cache data type for the V
39
+ };
40
+
41
+ struct npu_model_params {
42
+ int32_t kv_len;
43
+ int32_t max_prompt_len;
44
+ int32_t num_head;
45
+ int32_t head_dim;
46
+ int32_t num_layers;
47
+ int32_t vocab_size;
48
+ int32_t hidden_size;
49
+ int32_t intermediate_size;
50
+ int32_t group_size;
51
+ int32_t fused_layers_num;
52
+ int32_t fused_layers;
53
+ int32_t weight_num;
54
+ int32_t weight_idx;
55
+ int32_t n_splits_linear;
56
+ int32_t n_splits_down_proj;
57
+ int32_t max_position_embeddings;
58
+ bool embedding_post;
59
+ std::string model_dir;
60
+ std::string model_weight_dir;
61
+ std::string model_name;
62
+ std::string prefill_layer_blob_name;
63
+ std::string lmhead_blob_name;
64
+ std::string embedding_post_prefill_blob_name;
65
+ std::string embedding_post_blob_name;
66
+ std::string prefill_layer_ir_name;
67
+ std::string lmhead_ir_name;
68
+ std::string embedding_post_prefill_ir_name;
69
+ std::string embedding_post_ir_name;
70
+ std::string config;
71
+ std::string low_bit;
72
+ std::string lm_head_low_bit;
73
+ bool const_parameter;
74
+ std::string model_type;
75
+ bool transpose_value_cache;
76
+ bool qkv_bias;
77
+ bool use_prefill_sdp;
78
+ bool cos_sin_input;
79
+ bool use_level_zero;
80
+ };
81
+
82
+ struct tokenizer_params {
83
+ std::string tokenizer_file;
84
+ int32_t bos_token_id;
85
+ std::vector<int32_t> eos_token_id;
86
+ };
87
+
88
+ struct npu_generation_params {
89
+ // may add more later when dealing with more cases
90
+ float repetition_penalty;
91
+ int32_t max_new_token;
92
+ };
93
+
94
+ struct llm_perf_data {
95
+ std::chrono::time_point<std::chrono::high_resolution_clock> t_start;
96
+ double t_load_ms;
97
+ double t_p_eval_ms;
98
+ double t_eval_ms;
99
+ uint32_t n_p_eval;
100
+ uint32_t n_eval;
101
+ };
102
+
103
+ #ifndef BASE64_H
104
+ #define BASE64_H
105
+
106
+ namespace base64 {
107
+ std::string encode(const std::string &data);
108
+ std::string decode(const std::string &data);
109
+ }
110
+
111
+ #endif // BASE64_H to encode and decode
112
+
113
+ #ifdef __cplusplus
114
+ extern "C" {
115
+ #endif
116
+
117
+ #ifdef __cplusplus
118
+ }
119
+ #endif
@@ -0,0 +1,77 @@
1
+ //
2
+ // Copyright 2016 The BigDL Authors.
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+ //
16
+
17
+ #pragma once
18
+
19
+ #include <string>
20
+ #include <vector>
21
+ #include <stddef.h>
22
+ #include <stdint.h>
23
+ #include <stdio.h>
24
+ #include <stdbool.h>
25
+ #include <memory>
26
+ #include <vector>
27
+
28
+ #include "npu_common.h"
29
+
30
+ using namespace std;
31
+
32
+ #ifdef __linux__
33
+ #define EXPORT_API extern "C"
34
+ #else
35
+ #define EXPORT_API extern "C" __declspec(dllexport)
36
+ #endif
37
+
38
+
39
+ class NPUModel;
40
+
41
+ #ifdef __cplusplus
42
+ extern "C" {
43
+ #endif
44
+ EXPORT_API void load_tokenizer(tokenizer_params &tok_params, std::string model_str);
45
+
46
+ EXPORT_API vector<int32_t> llm_tokenize(std::string prompt, bool add_special);
47
+
48
+ EXPORT_API std::string llm_decode(vector<int32_t> tokens);
49
+
50
+ EXPORT_API void* load_model_from_file(const char* model_path);
51
+
52
+ EXPORT_API void load_config_from_file(npu_model_params &model_params, const char* model_path);
53
+
54
+ EXPORT_API void load_generation_config_from_file(npu_generation_params &generation_params, const char* model_path);
55
+
56
+ EXPORT_API std::string add_chat_template(npu_model_params model_params, std::string input_prompt);
57
+
58
+ EXPORT_API float* run_prefill(void* void_model, void* embd_inp_ptr, int32_t embd_inp_size, float repetition_penalty, bool skip_embd=false);
59
+
60
+ EXPORT_API float* run_decode(void* void_model, int32_t input_token, float repetition_penalty);
61
+
62
+ EXPORT_API void run_prefill_with_logits(void* void_model, void* embd_inp_ptr, int32_t embd_inp_size, float* logits, int32_t vocab_size, bool skip_embd=false);
63
+
64
+ EXPORT_API void run_decode_with_logits(void* void_model, int32_t input_token, float* logits, int32_t vocab_size);
65
+
66
+ EXPORT_API float* process_logits(float* logits, int32_t vocab_size, int32_t* p_updated_input_ids, int32_t updated_input_id_size, float repetition_penalty);
67
+
68
+ EXPORT_API int32_t llm_sample_token(float* logits, bool greedy_search, int32_t vocab_size);
69
+
70
+ EXPORT_API void reset(void* void_model);
71
+
72
+ EXPORT_API void llm_perf_print(void * void_model);
73
+
74
+ EXPORT_API void prepare_ir(const char* model_path);
75
+ #ifdef __cplusplus
76
+ }
77
+ #endif
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,29 @@
1
+ @echo off
2
+ for /f "delims=" %%i in ('python -c "import importlib; print(importlib.import_module('bigdl-core-npu').__file__)"') do set "cpp_file=%%i"
3
+ for %%a in ("%cpp_file%") do set "cpp_dir=%%~dpa"
4
+
5
+ set "cpp_dir=%cpp_dir:~0,-1%"
6
+ set "lib_dir=%cpp_dir:bigdl-core-npu=intel_npu_acceleration_library%\lib\Release"
7
+ set "destination_folder=%cd%"
8
+
9
+ pushd "%lib_dir%"
10
+ for %%f in (*) do (
11
+ if exist "%destination_folder%\%%~nxf" (
12
+ del /f "%destination_folder%\%%~nxf"
13
+ )
14
+ mklink "%destination_folder%\%%~nxf" "%%~ff"
15
+ )
16
+ popd
17
+
18
+ pushd "%cpp_dir%"
19
+ for %%f in (*) do (
20
+ if not "%%f"=="llama-cli-npu.exe" (
21
+ if exist "%destination_folder%\%%~nxf" (
22
+ del /f "%destination_folder%\%%~nxf"
23
+ )
24
+ mklink "%destination_folder%\%%~nxf" "%%~ff"
25
+ )
26
+ )
27
+ popd
28
+
29
+ copy "%cpp_dir%\llama-cli-npu.exe" .
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: bigdl-core-npu
3
- Version: 2.5.0
3
+ Version: 2.6.0
4
4
  Summary: Intel® NPU Acceleration Library
5
5
  Home-page: https://github.com/intel/intel-npu-acceleration-library
6
6
  Author: Alessandro Palla
@@ -32,4 +32,13 @@ Requires-Dist: numpy
32
32
  Requires-Dist: torch
33
33
  Requires-Dist: transformers>=4.39.3
34
34
  Requires-Dist: neural-compressor
35
-
35
+ Dynamic: author
36
+ Dynamic: author-email
37
+ Dynamic: classifier
38
+ Dynamic: description-content-type
39
+ Dynamic: home-page
40
+ Dynamic: keywords
41
+ Dynamic: license
42
+ Dynamic: requires-dist
43
+ Dynamic: requires-python
44
+ Dynamic: summary