bigdl-core-npu 2.6.0b20250206__cp311-cp311-win_amd64.whl → 2.6.0b20250208__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bigdl-core-npu/common.lib CHANGED
Binary file
bigdl-core-npu/ggml.dll CHANGED
Binary file
bigdl-core-npu/ggml.lib CHANGED
Binary file
@@ -70,7 +70,7 @@ struct npu_model_params {
70
70
  std::string config;
71
71
  std::string low_bit;
72
72
  std::string lm_head_low_bit;
73
- bool layernorm_const;
73
+ bool const_parameter;
74
74
  std::string model_type;
75
75
  bool transpose_value_cache;
76
76
  bool qkv_bias;
@@ -344,7 +344,7 @@ struct gpt_params {
344
344
  bool batched_bench_output_jsonl = false;
345
345
 
346
346
  // npu convert
347
- std::string low_bit = "sym_int4";
347
+ std::string low_bit = "Q4_0";
348
348
  int32_t quantization_group_size = 0;
349
349
  int32_t max_context_len = 1024;
350
350
  int32_t max_prompt_len = 512;
@@ -2661,9 +2661,19 @@ extern "C" {
2661
2661
 
2662
2662
  GGML_API void ggml_dequantize_to_fp32(struct ggml_tensor * tensor, float *y, size_t n);
2663
2663
 
2664
- GGML_API void ggml_requantize_to_npu(struct ggml_tensor * tensor, float* y, const char* layer, const int weight_idx, enum gguf_npu_qtype npu_type, const char* dir_path);
2664
+ GGML_API void ggml_requantize_to_npu(struct ggml_tensor * tensor, float* y, const char* layer, const int weight_idx, enum gguf_npu_qtype npu_type, const char* dir_path, int n_splits);
2665
2665
 
2666
2666
  GGML_API void ggml_convert_to_fp16(struct ggml_tensor * tensor, const char* layer, const int weight_idx, const char* dir_path);
2667
+
2668
+ GGML_API size_t ggml_quantize_chunk_with_splits(
2669
+ enum ggml_type type,
2670
+ const float * src,
2671
+ void * dst,
2672
+ int64_t start,
2673
+ int64_t nrows,
2674
+ int64_t n_per_row,
2675
+ const float * imatrix,
2676
+ int n_splits);
2667
2677
  #ifdef __cplusplus
2668
2678
  }
2669
2679
  #endif
@@ -8,6 +8,7 @@
8
8
  #include <stdint.h>
9
9
  #include <stdio.h>
10
10
  #include <stdbool.h>
11
+ #include <string>
11
12
 
12
13
  #ifdef LLAMA_SHARED
13
14
  # if defined(_WIN32) && !defined(__MINGW32__)
@@ -310,6 +311,8 @@ extern "C" {
310
311
  bool use_mmap; // use mmap if possible
311
312
  bool use_mlock; // force system to keep model in RAM
312
313
  bool check_tensors; // validate model tensor data
314
+ std::string npu_model;
315
+ std::string qtype;
313
316
  };
314
317
 
315
318
  // NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
@@ -1217,6 +1220,8 @@ extern "C" {
1217
1220
 
1218
1221
  LLAMA_API void convert_gguf_to_npu_weight(llama_model* model, const char* weight_path, enum gguf_npu_qtype npu_type);
1219
1222
 
1223
+ LLAMA_API std::string get_npu_model_dir(llama_model * model, std::string qtype);
1224
+
1220
1225
  #ifdef __cplusplus
1221
1226
  }
1222
1227
  #endif
Binary file
bigdl-core-npu/llama.dll CHANGED
Binary file
bigdl-core-npu/llama.lib CHANGED
Binary file
Binary file
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: bigdl-core-npu
3
- Version: 2.6.0b20250206
3
+ Version: 2.6.0b20250208
4
4
  Summary: Intel® NPU Acceleration Library
5
5
  Home-page: https://github.com/intel/intel-npu-acceleration-library
6
6
  Author: Alessandro Palla
@@ -1,23 +1,24 @@
1
1
  bigdl-core-npu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- bigdl-core-npu/common.lib,sha256=WeP3Luv_NeckX076g4F2Yu-94kY923dN8_SZ5gMxm7A,6647656
3
- bigdl-core-npu/ggml.dll,sha256=fasouXiGR4_52JcJ55A4-LCW0rzC2l47a00x4gqESRM,704000
4
- bigdl-core-npu/ggml.lib,sha256=U29OuxT-I6Ul4MQA7IaQqq7FN6d8C45Ee8SjjzjEt10,139602
5
- bigdl-core-npu/llama-cli-npu.exe,sha256=prFMRx0v3iLbpf7ZjoXUx-p43Psw5RKlsogd7RNlpkc,43520
6
- bigdl-core-npu/llama.dll,sha256=ITiOtxH1Uov3zE2BtS-TSb7MFlVtAnNi6nVfLe7nrco,1574400
7
- bigdl-core-npu/llama.lib,sha256=Z2dQy5y6QJ1P2DubIjfo4cB-T6a2ZfAMEC7MDShrX8Q,1494020
8
- bigdl-core-npu/npu_llm.dll,sha256=Dkl94IB73CLBCl9sYqKynFPRLsz7ySIttYdxiViVpmY,3956736
2
+ bigdl-core-npu/common.lib,sha256=2EWBOeMbwQA5G-ncR3V8Q_fnKiEQSBB4Kq9mhMyett0,6645900
3
+ bigdl-core-npu/ggml.dll,sha256=IklFfsCaC9zARbPclfXAr1kvw51cf8HfyuRqUOEzpSc,705024
4
+ bigdl-core-npu/ggml.lib,sha256=YiA6rQgJZwvwdM0bXqMHNgAigoEPozcLs7jP3dYz1nk,139880
5
+ bigdl-core-npu/llama-cli-npu.exe,sha256=_o2PZIbodHqp7zCzJ9WQUYdgXNiKdjldyt-NqUML02o,556032
6
+ bigdl-core-npu/llama.dll,sha256=sijBcM3BFDq6j_kF5BKXK6APzVvyZ3G8DKSklE9-hhI,1591808
7
+ bigdl-core-npu/llama.lib,sha256=G-H-iFwvni5O4_Lwjx2GteAE7B9kTC9pMVbM23dndC0,1515984
8
+ bigdl-core-npu/llm-cli.exe,sha256=vk6HxJd1wm0c4qWUIf2XZrv8z6BY73HJuf5fBw7y1TM,44544
9
+ bigdl-core-npu/npu_llm.dll,sha256=HCswGalVLJOU7KKNHpRpms0i5m-3z6Jar-9hUBHoehs,3962368
9
10
  bigdl-core-npu/npu_llm.lib,sha256=nNYF-btjrT9Pzcd31kQV5BsLBSNdA_DSeyK5RnmOc9s,44524
10
- bigdl-core-npu/include/common.h,sha256=xsTOeUtJPvwuv5F8Wn9Bk67GnvIk3RlMI3DN_W5UBEY,3206
11
+ bigdl-core-npu/include/common.h,sha256=p5PTHTHiJlOA9lXXWH12eHf5JTvGi-mmf9GgFY7kv7E,3206
11
12
  bigdl-core-npu/include/npu_llm.h,sha256=siEFqkKnZlzR9cvq7qXiAy5Z65-X14QyWEw__wsLLpA,2605
12
13
  bigdl-core-npu/include/llamacpp/arg.h,sha256=XOLwSDqJsCJJgGCkOGSWcA9yGZ0eiiY7GffHdAxFYYY,3110
13
- bigdl-core-npu/include/llamacpp/common.h,sha256=SaGJ1cAmJWd5D-T_03udb6ADTcJ8VX-bMy0GYGDMpI4,25841
14
+ bigdl-core-npu/include/llamacpp/common.h,sha256=pZ1DuXPcaIFui7juV0kB4-c5CW2I9BRI3Tlz8OT_bsk,25837
14
15
  bigdl-core-npu/include/llamacpp/ggml-alloc.h,sha256=kiWITcUF9Q7kvGSBeCfouV59YyGDNy56VE_0fXXnWHE,3088
15
16
  bigdl-core-npu/include/llamacpp/ggml-backend.h,sha256=cUJR-AC7hUbIxukKixR9vUSV38gmp-9eoViaBG8lyio,14347
16
- bigdl-core-npu/include/llamacpp/ggml.h,sha256=_K-zhVjT6uHa5lXMN3k3AwSrwb9W2q1A7v5HiGQc8iQ,105629
17
- bigdl-core-npu/include/llamacpp/llama.h,sha256=gMTNuRrEY7z0nkCewKLMZtf5urcSzkQm_HxEs3DJXHI,60967
17
+ bigdl-core-npu/include/llamacpp/ggml.h,sha256=liFnnzpemuVWZ3lg7oTCoRIPCo2taBdA8cwMY6OV9FM,106000
18
+ bigdl-core-npu/include/llamacpp/llama.h,sha256=OCMREKob7-NDw3ia9INyJ13bVKUT00gSp9JIayY5FnM,61134
18
19
  bigdl-core-npu/include/llamacpp/log.h,sha256=D6UiundA26yZOh3ci828u5U4ZaBxWb9CwSNu_3dt6CA,4295
19
20
  intel_npu_acceleration_library/__init__.py,sha256=ZKTIhGMDjF7P6pF-yX8KWcSXbeHWRk24AO_orsa18f8,536
20
- intel_npu_acceleration_library/_version.py,sha256=l2BonFE20zLZI17uMf5zSIVamDTrunPYZXc0hR7UQns,112
21
+ intel_npu_acceleration_library/_version.py,sha256=4pFkd6f20XRFeIEE7uRcKsrNSMU_fJedu5xEWYA3v6k,112
21
22
  intel_npu_acceleration_library/compiler.py,sha256=3IdgqjamSC8MLexDBJypIeZRiWIcTFnvQSU1LPXUr7Y,6225
22
23
  intel_npu_acceleration_library/device.py,sha256=9bn8eVXJa5cXIqgfLsQAdkMVtVUQABb8z0-mQik5jRg,7424
23
24
  intel_npu_acceleration_library/dtypes.py,sha256=gdd06Wsc9zIZFHlauUEx4xcK9WGTn1Mu6GkuYDJeA-E,4683
@@ -234,7 +235,7 @@ intel_npu_acceleration_library/external/openvino/torch/__init__.py,sha256=RXLzsf
234
235
  intel_npu_acceleration_library/functional/__init__.py,sha256=WWKwKOh6Sgovv7mKctA872TbLP98Pg5m5-MREvUmlAA,204
235
236
  intel_npu_acceleration_library/functional/scaled_dot_product_attention.py,sha256=yGUcg4tDQOLuUnP1g74cl-ec8TRr2SuAMcNLlN6qLvE,1620
236
237
  intel_npu_acceleration_library/lib/Release/cache.json,sha256=CyrSqZUWo0Ec4_7ydOiuKIC0Gm8AybrGdozUqUuHxBw,8840377
237
- intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=tXXz-JLzbryT0FJI3uf7y3Ru-ClGrGfzQLJLK7mcGeU,341504
238
+ intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=pya_g73mUmmLRM7pUeVQ1A0nCTi22TWae16hBHYe4_s,341504
238
239
  intel_npu_acceleration_library/lib/Release/openvino.dll,sha256=m7M119p3JBq2YYJJ2zzCaBDz6XivKK3nNykb8L1cvDU,13244768
239
240
  intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll,sha256=2v_I9P3Qo0St1bQZMEZscnFOUVvgZQQ0HvQlG3HtTd0,203104
240
241
  intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll,sha256=e3Aj9CDRHN30dBEdPSk7OCWe52tWfhI4xeXgyFjuDHg,475488
@@ -264,7 +265,7 @@ intel_npu_acceleration_library/nn/functional.py,sha256=UfAKBc0u6RtyaMo14ldH2GpEn
264
265
  intel_npu_acceleration_library/nn/linear.py,sha256=Q06SoGQeLaI86nA_ky2GnFC6H2Fw1zyMDILKnpYC2eo,5739
265
266
  intel_npu_acceleration_library/nn/llm.py,sha256=P6dz36Yf6BHtzWcftaghC6QaMI_WeRfQwrCbO7fD6hk,15002
266
267
  intel_npu_acceleration_library/nn/module.py,sha256=EYxoTq6I_YgBDgTF76GPDxHrT8SupOTDGMzQaomBeq8,12667
267
- bigdl_core_npu-2.6.0b20250206.dist-info/METADATA,sha256=AvzIyOhFVctFq9nq_McYX_Fuib3-aqtLQeQcE8xmxZY,1762
268
- bigdl_core_npu-2.6.0b20250206.dist-info/WHEEL,sha256=yNnHoQL2GZYIUXm9YvoaBpFjGlUoK9qq9oqYeudrWlE,101
269
- bigdl_core_npu-2.6.0b20250206.dist-info/top_level.txt,sha256=iMQZlTsFPJjlD-Y0MqZEP_9ifI0LlbNCJIOTaMoGMjk,46
270
- bigdl_core_npu-2.6.0b20250206.dist-info/RECORD,,
268
+ bigdl_core_npu-2.6.0b20250208.dist-info/METADATA,sha256=0eKBbcpupCbACu_wuUmjO8kgF0Uj0-C39YnxzYKopAI,1762
269
+ bigdl_core_npu-2.6.0b20250208.dist-info/WHEEL,sha256=yNnHoQL2GZYIUXm9YvoaBpFjGlUoK9qq9oqYeudrWlE,101
270
+ bigdl_core_npu-2.6.0b20250208.dist-info/top_level.txt,sha256=iMQZlTsFPJjlD-Y0MqZEP_9ifI0LlbNCJIOTaMoGMjk,46
271
+ bigdl_core_npu-2.6.0b20250208.dist-info/RECORD,,
@@ -3,4 +3,4 @@
3
3
  # SPDX-License-Identifier: Apache 2.0
4
4
  #
5
5
 
6
- __version__ = "2.6.0b20250206"
6
+ __version__ = "2.6.0b20250208"