ipex-llm 2.2.0b20250104__py3-none-win_amd64.whl → 2.2.0b20250106__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. ipex_llm/libs/bloom-api.dll +0 -0
  2. ipex_llm/libs/bloom.dll +0 -0
  3. ipex_llm/libs/gptneox-api.dll +0 -0
  4. ipex_llm/libs/gptneox.dll +0 -0
  5. ipex_llm/libs/libbloom_avx.dll +0 -0
  6. ipex_llm/libs/libbloom_vnni.dll +0 -0
  7. ipex_llm/libs/libgptneox_avx.dll +0 -0
  8. ipex_llm/libs/libgptneox_vnni.dll +0 -0
  9. ipex_llm/libs/libllama_avx.dll +0 -0
  10. ipex_llm/libs/libllama_vnni.dll +0 -0
  11. ipex_llm/libs/libstarcoder_avx.dll +0 -0
  12. ipex_llm/libs/libstarcoder_vnni.dll +0 -0
  13. ipex_llm/libs/llama-api.dll +0 -0
  14. ipex_llm/libs/llama.dll +0 -0
  15. ipex_llm/libs/main-bloom.exe +0 -0
  16. ipex_llm/libs/main-gptneox.exe +0 -0
  17. ipex_llm/libs/main-llama.exe +0 -0
  18. ipex_llm/libs/main-starcoder.exe +0 -0
  19. ipex_llm/libs/pipeline.dll +0 -0
  20. ipex_llm/libs/quantize-bloom.exe +0 -0
  21. ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
  22. ipex_llm/libs/quantize-gptneox.exe +0 -0
  23. ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
  24. ipex_llm/libs/quantize-llama.exe +0 -0
  25. ipex_llm/libs/quantize-llama_vnni.exe +0 -0
  26. ipex_llm/libs/quantize-starcoder.exe +0 -0
  27. ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
  28. ipex_llm/libs/starcoder-api.dll +0 -0
  29. ipex_llm/libs/starcoder.dll +0 -0
  30. ipex_llm/transformers/npu_model.py +80 -50
  31. ipex_llm/transformers/npu_models/convert_mp.py +1 -1
  32. ipex_llm/transformers/npu_models/linear.py +15 -3
  33. ipex_llm/transformers/npu_models/lm_head.py +1 -90
  34. ipex_llm/transformers/npu_models/lm_head_linear.py +106 -0
  35. {ipex_llm-2.2.0b20250104.dist-info → ipex_llm-2.2.0b20250106.dist-info}/METADATA +19 -19
  36. {ipex_llm-2.2.0b20250104.dist-info → ipex_llm-2.2.0b20250106.dist-info}/RECORD +42 -41
  37. {ipex_llm-2.2.0b20250104.data → ipex_llm-2.2.0b20250106.data}/scripts/ipex-llm-init.bat +0 -0
  38. {ipex_llm-2.2.0b20250104.data → ipex_llm-2.2.0b20250106.data}/scripts/llm-chat.ps1 +0 -0
  39. {ipex_llm-2.2.0b20250104.data → ipex_llm-2.2.0b20250106.data}/scripts/llm-cli.ps1 +0 -0
  40. {ipex_llm-2.2.0b20250104.dist-info → ipex_llm-2.2.0b20250106.dist-info}/WHEEL +0 -0
  41. {ipex_llm-2.2.0b20250104.dist-info → ipex_llm-2.2.0b20250106.dist-info}/entry_points.txt +0 -0
  42. {ipex_llm-2.2.0b20250104.dist-info → ipex_llm-2.2.0b20250106.dist-info}/top_level.txt +0 -0
Binary file
ipex_llm/libs/bloom.dll CHANGED
Binary file
Binary file
ipex_llm/libs/gptneox.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
ipex_llm/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -27,7 +27,7 @@ from transformers.configuration_utils import PretrainedConfig
27
27
 
28
28
  from ipex_llm.utils.common.log4Error import invalidInputError
29
29
  from ipex_llm.transformers.utils import logger, load_imatrix_data
30
- from ipex_llm.transformers.npu_models.convert import optimize_llm, optimize_llm_post
30
+ from ipex_llm.transformers.npu_models.convert import optimize_llm
31
31
 
32
32
 
33
33
  def patch_flash_attn_import(filename: str) -> List[str]:
@@ -207,8 +207,6 @@ class _BaseAutoModelClass:
207
207
  model = model.eval()
208
208
  logger.info(f"Finish to convert model")
209
209
  else:
210
- from intel_npu_acceleration_library.compiler import create_npu_kernels
211
-
212
210
  if optimize_model:
213
211
  invalidInputError(
214
212
  max_prompt_len < max_context_len,
@@ -232,11 +230,14 @@ class _BaseAutoModelClass:
232
230
  "convert_model": convert_model,
233
231
  "save_directory": save_directory,
234
232
  "fuse_layers": fuse_layers,
235
- "imatrix_data": imatrix_data
233
+ "imatrix_data": imatrix_data,
234
+ "skip_npu_logic": mock_device == "dummy",
236
235
  }
236
+ # Dummy will skip npu related logic and save the quantized model
237
+ if mock_device == "dummy":
238
+ model.save_low_bit = types.MethodType(save_low_bit, model)
237
239
  model = cls.optimize_npu_model(*args, **optimize_kwargs)
238
240
  else:
239
- from ipex_llm.transformers.npu_models.convert import optimize_llm
240
241
  optimize_llm(model)
241
242
  with torch.no_grad():
242
243
  cls.load_convert(qtype, model, "cpu", modules_to_not_convert,
@@ -258,7 +259,6 @@ class _BaseAutoModelClass:
258
259
  def optimize_npu_model(cls, *args, **kwargs):
259
260
 
260
261
  from ipex_llm.transformers.npu_models.convert_mp import optimize_llm_pre, optimize_llm
261
- from intel_npu_acceleration_library.compiler import create_npu_kernels
262
262
 
263
263
  model = kwargs.pop("model")
264
264
  qtype = kwargs.pop("qtype", "sym_int4_rtn")
@@ -275,6 +275,7 @@ class _BaseAutoModelClass:
275
275
  save_directory = kwargs.pop('save_directory', None)
276
276
  fuse_layers = kwargs.pop('fuse_layers', None)
277
277
  imatrix_data = kwargs.pop('imatrix_data', None)
278
+ skip_npu_logic = kwargs.pop("skip_npu_logic", False)
278
279
  invalidInputError(save_directory is not None,
279
280
  "Please provide the path to save converted model "
280
281
  "through `save_directory`.")
@@ -294,51 +295,58 @@ class _BaseAutoModelClass:
294
295
  cls.load_convert(qtype, model, "cpu", modules_to_not_convert,
295
296
  quantization_group_size, imatrix_data,
296
297
  *args, **kwargs)
297
- create_npu_kernels(llm)
298
+ if not skip_npu_logic:
299
+ from intel_npu_acceleration_library.compiler import create_npu_kernels
300
+ create_npu_kernels(llm)
298
301
  model = model.eval()
299
302
  logger.info(f"Finish to convert model")
300
303
  model.config.update({"bigdl_transformers_low_bit": qtype})
301
- model.share_memory()
302
304
 
303
- if not pipeline:
304
- if model.config.model_type in ["qwen2", "llama", "minicpm"]:
305
- from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
306
- optimize_llm_single_process(
307
- llm,
308
- kv_len=max_context_len,
309
- max_prompt_len=max_prompt_len,
310
- transpose_value_cache=transpose_value_cache,
311
- group_size=quantization_group_size,
312
- qtype=qtype,
313
- save_directory=save_directory,
314
- fuse_layers=fuse_layers,
315
- has_llm=hasattr(model, "llm")
316
- )
317
- else:
318
- optimize_llm(
319
- llm,
320
- max_context_len=max_context_len,
321
- max_prompt_len=max_prompt_len,
322
- inter_pp=inter_pp,
323
- intra_pp=intra_pp,
324
- transpose_value_cache=transpose_value_cache,
325
- group_size=quantization_group_size
326
- )
305
+ if skip_npu_logic:
306
+ model.save_low_bit(model_dir=save_directory)
327
307
  else:
328
- from ipex_llm.transformers.npu_pipeline_model.convert_pipeline \
329
- import convert_llm
330
- convert_llm(llm,
308
+ model.share_memory()
309
+
310
+ if not pipeline:
311
+ if model.config.model_type in ["qwen2", "llama", "minicpm"]:
312
+ from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
313
+ optimize_llm_single_process(
314
+ llm,
331
315
  kv_len=max_context_len,
332
316
  max_prompt_len=max_prompt_len,
333
317
  transpose_value_cache=transpose_value_cache,
334
318
  group_size=quantization_group_size,
335
319
  qtype=qtype,
336
- convert_model=convert_model,
337
320
  save_directory=save_directory,
338
- fuse_layers=fuse_layers)
339
- model.save_low_bit = types.MethodType(save_low_bit, model)
340
- model.save_low_bit(save_directory)
341
- logger.info(f"Converted model has already saved to {save_directory}.")
321
+ fuse_layers=fuse_layers,
322
+ has_llm=hasattr(model, "llm")
323
+ )
324
+ else:
325
+ optimize_llm(
326
+ llm,
327
+ max_context_len=max_context_len,
328
+ max_prompt_len=max_prompt_len,
329
+ inter_pp=inter_pp,
330
+ intra_pp=intra_pp,
331
+ transpose_value_cache=transpose_value_cache,
332
+ group_size=quantization_group_size
333
+ )
334
+ else:
335
+ from ipex_llm.transformers.npu_pipeline_model.convert_pipeline \
336
+ import convert_llm
337
+ convert_llm(llm,
338
+ kv_len=max_context_len,
339
+ max_prompt_len=max_prompt_len,
340
+ transpose_value_cache=transpose_value_cache,
341
+ group_size=quantization_group_size,
342
+ qtype=qtype,
343
+ convert_model=convert_model,
344
+ save_directory=save_directory,
345
+ fuse_layers=fuse_layers)
346
+ model.save_low_bit = types.MethodType(save_low_bit, model)
347
+ model.save_low_bit(save_directory)
348
+ logger.info(f"Converted model has already saved to {save_directory}.")
349
+
342
350
  return model
343
351
 
344
352
  @classmethod
@@ -379,6 +387,7 @@ class _BaseAutoModelClass:
379
387
  intra_pp = kwargs.pop("intra_pp", None)
380
388
  transpose_value_cache = kwargs.pop("transpose_value_cache", True)
381
389
  modules_to_not_convert = kwargs.pop("modules_to_not_convert", [])
390
+ save_directory = kwargs.pop('save_directory', None)
382
391
 
383
392
  from transformers.models.auto.configuration_auto import AutoConfig
384
393
  from transformers.modeling_utils import no_init_weights, get_state_dict_dtype
@@ -650,16 +659,37 @@ class _BaseAutoModelClass:
650
659
  param.requires_grad_(False)
651
660
 
652
661
  if optimize_model and not pipeline:
653
- from ipex_llm.transformers.npu_models.convert_mp import optimize_llm
654
- optimize_llm(
655
- llm,
656
- max_context_len=max_context_len,
657
- max_prompt_len=max_prompt_len,
658
- inter_pp=inter_pp,
659
- intra_pp=intra_pp,
660
- transpose_value_cache=transpose_value_cache,
661
- group_size=quantization_group_size
662
- )
662
+ if model.config.model_type in ["qwen2", "llama", "minicpm"]:
663
+ from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
664
+ if save_directory is None:
665
+ invalidInputError(False,
666
+ "Please specify the save_directory, the path of folder " +
667
+ "to save the compiled NPU model. If path not exists, " +
668
+ "the compiled NPU model will be saved there. " +
669
+ "Else, program will exit.")
670
+
671
+ optimize_llm_single_process(
672
+ llm,
673
+ kv_len=max_context_len,
674
+ max_prompt_len=max_prompt_len,
675
+ transpose_value_cache=transpose_value_cache,
676
+ group_size=quantization_group_size,
677
+ qtype=qtype,
678
+ save_directory=save_directory,
679
+ fuse_layers=None,
680
+ has_llm=hasattr(model, "llm")
681
+ )
682
+ else:
683
+ from ipex_llm.transformers.npu_models.convert_mp import optimize_llm
684
+ optimize_llm(
685
+ llm,
686
+ max_context_len=max_context_len,
687
+ max_prompt_len=max_prompt_len,
688
+ inter_pp=inter_pp,
689
+ intra_pp=intra_pp,
690
+ transpose_value_cache=transpose_value_cache,
691
+ group_size=quantization_group_size
692
+ )
663
693
  elif optimize_model and pipeline:
664
694
  from ipex_llm.transformers.npu_pipeline_model.convert_pipeline \
665
695
  import convert_llm
@@ -18,7 +18,7 @@ import torch
18
18
  import importlib
19
19
  import numpy as np
20
20
  from ipex_llm.transformers.low_bit_linear import LowBitLinear, FP4Params
21
- from ipex_llm.transformers.npu_models.lm_head import LMHeadLinear, SlicedLMHead
21
+ from ipex_llm.transformers.npu_models.lm_head import SlicedLMHead
22
22
  from ipex_llm.utils.common.log4Error import invalidInputError
23
23
 
24
24
 
@@ -21,16 +21,25 @@
21
21
  # SPDX-License-Identifier: Apache 2.0
22
22
  #
23
23
 
24
- from intel_npu_acceleration_library.quantization import quantize_tensor, compress_to_i4
25
- from intel_npu_acceleration_library.dtypes import NPUDtype
24
+
26
25
  import os
27
26
  import torch
28
27
  from torch.nn import Parameter
29
28
  import uuid
30
29
  import math
31
- from intel_npu_acceleration_library.backend import run_matmul
32
30
  from typing import Optional, Union
33
31
  from ipex_llm.utils.common import invalidInputError
32
+ import importlib
33
+
34
+
35
+ def is_acclib_available():
36
+ return importlib.util.find_spec("intel_npu_acceleration_library") is not None
37
+
38
+
39
+ if is_acclib_available():
40
+ from intel_npu_acceleration_library.quantization import quantize_tensor, compress_to_i4
41
+ from intel_npu_acceleration_library.dtypes import NPUDtype
42
+ from intel_npu_acceleration_library.backend import run_matmul
34
43
 
35
44
 
36
45
  class Linear(torch.nn.Module):
@@ -63,6 +72,7 @@ class Linear(torch.nn.Module):
63
72
  if self.training:
64
73
  out = self._mm(x, self.weight, None)
65
74
  else:
75
+ from intel_npu_acceleration_library.backend import run_matmul
66
76
  out = run_matmul(x, self.weight, None, self.op_id)
67
77
 
68
78
  if self.bias is None:
@@ -105,6 +115,8 @@ class Linear(torch.nn.Module):
105
115
  Returns:
106
116
  Union[Linear, QuantizedLinear]: A NPU linear layer
107
117
  """
118
+ from intel_npu_acceleration_library.quantization import quantize_tensor, compress_to_i4
119
+ from intel_npu_acceleration_library.dtypes import NPUDtype
108
120
  if dtype.is_floating_point:
109
121
  if bias is None:
110
122
  return Linear(weight.to(dtype), None)
@@ -16,96 +16,6 @@
16
16
  import torch
17
17
  from torch import nn
18
18
  import numpy as np
19
- from filelock import FileLock
20
- from intel_npu_acceleration_library.backend import NNFactory
21
- from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
22
-
23
-
24
- class LMHeadLinear(NNFactory):
25
- """Quantized Linear class for sliced lm_head, computing a matrix matrix multiplication
26
- with weights prefetching."""
27
-
28
- def __init__(
29
- self,
30
- inC: int,
31
- outC: int,
32
- batch: int,
33
- split_num: int = 2,
34
- profile: bool = False,
35
- device: str = "NPU",
36
- dtype: np.dtype = np.int8,
37
- use_split: bool = False,
38
- group_size: int = 0,
39
- asym: bool = False,
40
- ):
41
- """Initialize the LMHeadLinear class.
42
-
43
- Args:
44
- inC (int): input channels
45
- outC (int): output channels
46
- batch (int): batch
47
- split_num (int): split in_features of lm_head to how many parts
48
- profile (bool): Enable/Disable profiling. Defaults to False.
49
- device (str): Target device, default to "NPU".
50
- dtype (np.dtype): weights datatype. Defaults to np.int8.
51
-
52
- """
53
- super().__init__(profile, device)
54
- self.inC, self.outC = inC, outC
55
- self.batch = batch
56
-
57
- self.split_num = split_num
58
- if use_split:
59
- input = self.parameter((1, self.batch, self.inC))
60
- res = self.dq_split_linear(input, self.split_num, self.outC, self.inC, wt_dtype=dtype,
61
- scale_factor=(group_size == 0), asym=asym)
62
- else:
63
- input = self.parameter((self.batch, self.inC))
64
- split_size = self.inC // split_num // 2 * 2
65
-
66
- for i in range(self.split_num):
67
- start_idx = i * split_size
68
- end_idx = (i + 1) * split_size if i < self.split_num - 1 else self.inC
69
- input_slice = self.slice(input, begin=[0, start_idx],
70
- end=[self.batch, end_idx])
71
- linear_slice = self.linear(input_slice, outC, split_size, bias=False,
72
- wt_dtype=dtype, asym=asym)
73
- if i == 0:
74
- res = linear_slice
75
- else:
76
- res += linear_slice
77
-
78
- print("start compiling lm_head")
79
- self.compile()
80
- print("end compiling lm_head")
81
-
82
- def set_weights(self, op_id, weights):
83
- self.set_weights_async(op_id, weights)
84
- with FileLock(f"lmhead_run.lock"):
85
- backend_lib.run(self._mm)
86
-
87
- def set_weights_async(self, op_id, weights):
88
- self.setWeights(1, op_id, *weights)
89
-
90
- def run(
91
- self, X: np.ndarray
92
- ) -> np.ndarray:
93
- """Run the layer: $X * (W * S)^T$ .
94
-
95
- Args:
96
- X (np.ndarray): activation
97
-
98
- Raises:
99
- RuntimeError: Input, weights or scale shape mismatch
100
-
101
- Returns:
102
- np.ndarray: result
103
- """
104
- self.set_input_tensor(X, 0)
105
- self.elapsed = backend_lib.run(self._mm)
106
- if len(self.out) == 1:
107
- return self.out[0]
108
- return self.out
109
19
 
110
20
 
111
21
  class SlicedLMHead(nn.Module):
@@ -160,6 +70,7 @@ class SlicedLMHead(nn.Module):
160
70
  return self.lm_heads[0].weight.dtype
161
71
 
162
72
  def get_fused_lm_head(self):
73
+ from ipex_llm.transformers.npu_models.lm_head_linear import LMHeadLinear
163
74
  np_dtype = np.uint8 if self.get_weight_dtype() == torch.uint8 else np.int8
164
75
  self.fused_lm_head = LMHeadLinear(self.inC, self.outC, 1, self.split_num,
165
76
  False, "NPU", dtype=np_dtype, use_split=self.use_split,
@@ -0,0 +1,106 @@
1
+ #
2
+ # Copyright 2016 The BigDL Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import numpy as np
17
+ from filelock import FileLock
18
+ from intel_npu_acceleration_library.backend import NNFactory
19
+ from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
20
+
21
+
22
+ class LMHeadLinear(NNFactory):
23
+ """Quantized Linear class for sliced lm_head, computing a matrix matrix multiplication
24
+ with weights prefetching."""
25
+
26
+ def __init__(
27
+ self,
28
+ inC: int,
29
+ outC: int,
30
+ batch: int,
31
+ split_num: int = 2,
32
+ profile: bool = False,
33
+ device: str = "NPU",
34
+ dtype: np.dtype = np.int8,
35
+ use_split: bool = False,
36
+ group_size: int = 0,
37
+ asym: bool = False,
38
+ ):
39
+ """Initialize the LMHeadLinear class.
40
+
41
+ Args:
42
+ inC (int): input channels
43
+ outC (int): output channels
44
+ batch (int): batch
45
+ split_num (int): split in_features of lm_head to how many parts
46
+ profile (bool): Enable/Disable profiling. Defaults to False.
47
+ device (str): Target device, default to "NPU".
48
+ dtype (np.dtype): weights datatype. Defaults to np.int8.
49
+
50
+ """
51
+ super().__init__(profile, device)
52
+ self.inC, self.outC = inC, outC
53
+ self.batch = batch
54
+
55
+ self.split_num = split_num
56
+ if use_split:
57
+ input = self.parameter((1, self.batch, self.inC))
58
+ res = self.dq_split_linear(input, self.split_num, self.outC, self.inC, wt_dtype=dtype,
59
+ scale_factor=(group_size == 0), asym=asym)
60
+ else:
61
+ input = self.parameter((self.batch, self.inC))
62
+ split_size = self.inC // split_num // 2 * 2
63
+
64
+ for i in range(self.split_num):
65
+ start_idx = i * split_size
66
+ end_idx = (i + 1) * split_size if i < self.split_num - 1 else self.inC
67
+ input_slice = self.slice(input, begin=[0, start_idx],
68
+ end=[self.batch, end_idx])
69
+ linear_slice = self.linear(input_slice, outC, split_size, bias=False,
70
+ wt_dtype=dtype, asym=asym)
71
+ if i == 0:
72
+ res = linear_slice
73
+ else:
74
+ res += linear_slice
75
+
76
+ print("start compiling lm_head")
77
+ self.compile()
78
+ print("end compiling lm_head")
79
+
80
+ def set_weights(self, op_id, weights):
81
+ self.set_weights_async(op_id, weights)
82
+ with FileLock(f"lmhead_run.lock"):
83
+ backend_lib.run(self._mm)
84
+
85
+ def set_weights_async(self, op_id, weights):
86
+ self.setWeights(1, op_id, *weights)
87
+
88
+ def run(
89
+ self, X: np.ndarray
90
+ ) -> np.ndarray:
91
+ """Run the layer: $X * (W * S)^T$ .
92
+
93
+ Args:
94
+ X (np.ndarray): activation
95
+
96
+ Raises:
97
+ RuntimeError: Input, weights or scale shape mismatch
98
+
99
+ Returns:
100
+ np.ndarray: result
101
+ """
102
+ self.set_input_tensor(X, 0)
103
+ self.elapsed = backend_lib.run(self._mm)
104
+ if len(self.out) == 1:
105
+ return self.out[0]
106
+ return self.out
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250104
3
+ Version: 2.2.0b20250106
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,10 +27,10 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250104 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250106 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Provides-Extra: cpp-arl
33
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250104 ; extra == 'cpp-arl'
33
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250106 ; extra == 'cpp-arl'
34
34
  Requires-Dist: setuptools ; extra == 'cpp-arl'
35
35
  Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
36
36
  Requires-Dist: dpcpp-cpp-rt ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
@@ -65,7 +65,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
65
65
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
66
66
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
67
67
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
68
- Requires-Dist: bigdl-core-npu ==2.6.0b20250104 ; (platform_system == "Windows") and extra == 'npu'
68
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250106 ; (platform_system == "Windows") and extra == 'npu'
69
69
  Provides-Extra: serving
70
70
  Requires-Dist: py-cpuinfo ; extra == 'serving'
71
71
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -85,9 +85,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
85
85
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
86
86
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
87
87
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
88
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250104 ; extra == 'xpu'
89
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250104 ; extra == 'xpu'
90
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250104 ; extra == 'xpu'
88
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106 ; extra == 'xpu'
89
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106 ; extra == 'xpu'
90
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106 ; extra == 'xpu'
91
91
  Provides-Extra: xpu-2-1
92
92
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
93
93
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -102,9 +102,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
102
102
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
103
103
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
104
104
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
105
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250104 ; extra == 'xpu-2-1'
106
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250104 ; extra == 'xpu-2-1'
107
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250104 ; extra == 'xpu-2-1'
105
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106 ; extra == 'xpu-2-1'
106
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106 ; extra == 'xpu-2-1'
107
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106 ; extra == 'xpu-2-1'
108
108
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
109
109
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
110
110
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -119,9 +119,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
119
119
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
120
120
  Requires-Dist: tabulate ; extra == 'xpu-arc'
121
121
  Requires-Dist: setuptools ; extra == 'xpu-arc'
122
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250104 ; extra == 'xpu-arc'
123
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250104 ; extra == 'xpu-arc'
124
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250104 ; extra == 'xpu-arc'
122
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106 ; extra == 'xpu-arc'
123
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106 ; extra == 'xpu-arc'
124
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106 ; extra == 'xpu-arc'
125
125
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
126
126
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
127
127
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -141,9 +141,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
141
141
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
142
142
  Requires-Dist: tabulate ; extra == 'xpu-arl'
143
143
  Requires-Dist: setuptools ; extra == 'xpu-arl'
144
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250104 ; extra == 'xpu-arl'
145
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250104 ; extra == 'xpu-arl'
146
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250104 ; extra == 'xpu-arl'
144
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106 ; extra == 'xpu-arl'
145
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106 ; extra == 'xpu-arl'
146
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106 ; extra == 'xpu-arl'
147
147
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
148
148
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
149
149
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -163,9 +163,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
163
163
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
164
164
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
165
165
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
166
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250104 ; extra == 'xpu-lnl'
167
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250104 ; extra == 'xpu-lnl'
168
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250104 ; extra == 'xpu-lnl'
166
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106 ; extra == 'xpu-lnl'
167
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106 ; extra == 'xpu-lnl'
168
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106 ; extra == 'xpu-lnl'
169
169
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
170
170
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
171
171
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
41
41
  ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
42
42
  ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
43
43
  ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- ipex_llm/libs/bloom-api.dll,sha256=M1Roz77en2poEMA3ahJIi10TyALMZRTMyRVzJW2Q1TE,36352
45
- ipex_llm/libs/bloom.dll,sha256=Yoc8Xc2IldVuARmtfcrT3c09UfKzWm5cfxoKngffSrg,506880
46
- ipex_llm/libs/gptneox-api.dll,sha256=LRHD9zUpaxapA_WiQ9JPY3IFVzUlgOMa32rdZqo6hWc,24576
47
- ipex_llm/libs/gptneox.dll,sha256=wMQMHqzyJTmVOcFRxfcNo_HHnJt4tYJ-hKWYTWZmcdk,567296
48
- ipex_llm/libs/libbloom_avx.dll,sha256=R-4XPfSEyIsURGPm4X_7ESr3ZH7C3r1oy_0bl9czjNY,535040
49
- ipex_llm/libs/libbloom_vnni.dll,sha256=_jsNrGVGNqWebMBWN-Zi2gYHAmDWsuMdsP9o_ZBX9hc,506880
50
- ipex_llm/libs/libgptneox_avx.dll,sha256=IblWKTU4YPlSVUlJvLl7b3AALMAoaKjImT_6yHh981s,595456
51
- ipex_llm/libs/libgptneox_vnni.dll,sha256=hzaDRb04sla_ILiV0uloACj5XIchURaU92ygFmV9KK0,567808
52
- ipex_llm/libs/libllama_avx.dll,sha256=268FWvvpz4fore4HzL6hYvFi2UMltrZcvSKrA-E_fRg,589824
53
- ipex_llm/libs/libllama_vnni.dll,sha256=5tReXBo0-okcXVagRqf8VtcddYyi57fKHxY86ipPYiE,561664
54
- ipex_llm/libs/libstarcoder_avx.dll,sha256=0pqICdK42UnBqA4zU9Stlq_V0bFwUf-qse5VQSw-86M,626688
55
- ipex_llm/libs/libstarcoder_vnni.dll,sha256=8sGu77v-RosphQYh0exh6-tG6v0yqepzGZJ5Xk1Ns3Q,598528
56
- ipex_llm/libs/llama-api.dll,sha256=E7GWEyfoiIeeXpQnBI4DyFTaJbRkehOkz_uQbWfK14c,25600
57
- ipex_llm/libs/llama.dll,sha256=Lb6Pv4kUVh3HcAMtwQXqsUcpC7NHwShR12MWXfrHg3o,561152
58
- ipex_llm/libs/main-bloom.exe,sha256=QY2c1jYzBiLtcZ4pvhENUF8H8QwLY0kOdflswlW8sYA,103424
59
- ipex_llm/libs/main-gptneox.exe,sha256=ELitg5JpzDBrHXw7Kig3s6cCz1MQbiD9UU9AdwQCiRU,98816
60
- ipex_llm/libs/main-llama.exe,sha256=IFlfc4lDtFqGB6tsCtgt_j_UFSpsxrIxhiIgdT-FcXk,99840
61
- ipex_llm/libs/main-starcoder.exe,sha256=TR-D37rraHagIFP81C5h1v0yMMgWwhycBY8NaRoQPv8,157696
62
- ipex_llm/libs/pipeline.dll,sha256=yUkVtNjdVc7i40FVkPNlZ5DU7IkaKc9ILpQCNE67sq4,72704
63
- ipex_llm/libs/quantize-bloom.exe,sha256=XO2-elKOn-3E6bO-XNrPOaeuTTitczDIU3gZ5t2Jxm8,126464
64
- ipex_llm/libs/quantize-bloom_vnni.exe,sha256=0DbmArEDuePUFsZ-IkXGoxbHgWV0rpV2XCfUqPiv0P4,127488
65
- ipex_llm/libs/quantize-gptneox.exe,sha256=02duioQn9KbPqZdeSDXgz_vizh_q56f8yNw72_tLdlM,104448
66
- ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=C6hrL5paI3U8VScPqj_-6ak5GH-fyGlkVbzown75Fhk,104960
67
- ipex_llm/libs/quantize-llama.exe,sha256=8_oMjm6hQO89DEoUkSdqV6puS81c0wUrayOi9JyGsxI,109568
68
- ipex_llm/libs/quantize-llama_vnni.exe,sha256=LPWsZIFIn3CGgg2ExHSit6QjJ8is1hWMIegOxYYrCyo,110592
69
- ipex_llm/libs/quantize-starcoder.exe,sha256=ysSUPYfW7V2hnQEj0g2SW6yk9kXMRYyknQ5qliQ8C68,127488
70
- ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=jZlDuI33PNSrNOl84DNUYkNUnGYPv5Uj6Kxz7OIMdqE,128512
71
- ipex_llm/libs/starcoder-api.dll,sha256=hzTDC-HWtD1mdbkktfcEa11gwXVG2WIZayPvBK0ZzH4,21504
72
- ipex_llm/libs/starcoder.dll,sha256=koZkw01HYSZ7xYqhBl57zvOKAe83XjNUZXP3INhVweI,598016
44
+ ipex_llm/libs/bloom-api.dll,sha256=oXbHMfWCF-cSjpdBIxpiOoJr36WPJC62FHhdSKzEivg,36352
45
+ ipex_llm/libs/bloom.dll,sha256=vsa_giqtsGQWXMVSOt1r-wsEWiRBRxT2skq0HwJqVAA,506880
46
+ ipex_llm/libs/gptneox-api.dll,sha256=FtjD0SyIKcyW8Q1cKp7rfiGkS8xyncjrINwH1LHZVcU,24576
47
+ ipex_llm/libs/gptneox.dll,sha256=qriFw1GXbkagSPydHmDC5YDmqrahnLiWFKBWZ2KvyCM,567296
48
+ ipex_llm/libs/libbloom_avx.dll,sha256=9KjLxHUKLesAykdLy-swkRz_Q86qTPzrJC0Y8w6cWV4,535040
49
+ ipex_llm/libs/libbloom_vnni.dll,sha256=2qDbiFOll48C5fMdWG5Mc9BkPxoeYM-W39wG8-joiyk,506880
50
+ ipex_llm/libs/libgptneox_avx.dll,sha256=uoLc9zGCCuUcIShyDWwb_MbVKhqlMh13laB_V50U_4k,595456
51
+ ipex_llm/libs/libgptneox_vnni.dll,sha256=swge9FsvtJIQ_VitBVNnh1BAf-8a-gCRWOBJeAaOumw,567808
52
+ ipex_llm/libs/libllama_avx.dll,sha256=GNl29Z8r50KAUzH2dOg5FlfM1Gs9Ab4ZNov7Pu4PRjE,589824
53
+ ipex_llm/libs/libllama_vnni.dll,sha256=uYiPjJ9OS5ZdxMrZ5wepafpiF25FjdpTRTeEXgNbdGc,561664
54
+ ipex_llm/libs/libstarcoder_avx.dll,sha256=Lbbm2O3e9niHmfppGLo1I1lnzdmQXXWyDYsoxPrdrps,626688
55
+ ipex_llm/libs/libstarcoder_vnni.dll,sha256=Sd_nS4gVyJrPV-eqPzerhx3oxjjI3TMXYtB_hZHt0Y0,598528
56
+ ipex_llm/libs/llama-api.dll,sha256=-tkpqdidqyCR0r3RyKk--KSIUtTz2r9YaMNaOGi_Y44,25600
57
+ ipex_llm/libs/llama.dll,sha256=n3ZdQ4_01ROY49M0sPDuSmBaAFCEGrj4ArwCJ_W40Rw,561152
58
+ ipex_llm/libs/main-bloom.exe,sha256=tLu3JxGrrG0vVIkt4DubOcfXXSc9om-8tDItbNfa0Yw,103424
59
+ ipex_llm/libs/main-gptneox.exe,sha256=ZuaLuNft2_30VWHlORQwx-zspXt7ozdUw41sUfJdip4,98816
60
+ ipex_llm/libs/main-llama.exe,sha256=MBta6VTF7VAElzaT0Gk_Hi7-zOyJG3Ji0m1o25_Dhjo,99840
61
+ ipex_llm/libs/main-starcoder.exe,sha256=WQhAYmXLoST8_zeL9xmmeWzI2ezdLXT1A2QZqXwPJoM,157696
62
+ ipex_llm/libs/pipeline.dll,sha256=vKhyNhxs7FTybiTXv9gnZNF6SXJ-HcsM5qAgDba-iYI,72704
63
+ ipex_llm/libs/quantize-bloom.exe,sha256=FpagD6uubPkfHSrN4Ejmq5E_EpK9IBqn8CcmXHAcUhY,126464
64
+ ipex_llm/libs/quantize-bloom_vnni.exe,sha256=lEzEI0LWW0-T4di29PAbalSmPAcN3qNrxsTYrAR9nnE,127488
65
+ ipex_llm/libs/quantize-gptneox.exe,sha256=5rtTUGcGhiAhx0642jnDjVOdoaK2evGMaKo-P99rqpQ,104448
66
+ ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=mcoc32UP7v2MFNO8b_fZLYk9YZ6FwNvFpzZQi4q7n7o,104960
67
+ ipex_llm/libs/quantize-llama.exe,sha256=AQLloOUZCtUTXUTbCH9JW7F-0h7I5JmKG3zrksLDx3U,109568
68
+ ipex_llm/libs/quantize-llama_vnni.exe,sha256=5KfWhklZ24b228xkRVEAbeY3UeG5-vHaZl5dIo4GXns,110592
69
+ ipex_llm/libs/quantize-starcoder.exe,sha256=X4PZ68IA8BK0ylRRt6WvVto8jmbxrJvT80R5xBUBFgM,127488
70
+ ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=jSCIfgmCXt_7ScRIMW6IAagTcoIyEO8SUK0h8ZbBR_I,128512
71
+ ipex_llm/libs/starcoder-api.dll,sha256=AiG72eS3F3mjj0fqrfcRJsz4g5yGVIiEqrZHqKQR5jE,21504
72
+ ipex_llm/libs/starcoder.dll,sha256=U_ZoyR-d-vSVn8IYSdbsjkChd2w0Aq6HvlkZ5r_snQo,598016
73
73
  ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
74
74
  ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
75
75
  ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
@@ -97,7 +97,7 @@ ipex_llm/transformers/lookup.py,sha256=c4ETIha6ZLbWvhcclSKRDdi5Ipuet4mfUnOkBa0E8
97
97
  ipex_llm/transformers/low_bit_linear.py,sha256=dyyYyCqw0GK8hzaUGanrg-uIhU1HTLEEbvbxXMlm-80,41668
98
98
  ipex_llm/transformers/model.py,sha256=KcRjkauGg48BYrUBoUZaVMpg7Piuz5JrfIpVZd3EIjs,41105
99
99
  ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
100
- ipex_llm/transformers/npu_model.py,sha256=a1mkyc6EqD7AJhqbYzokGhFubNpt5trIMuZT_dQKlTk,37861
100
+ ipex_llm/transformers/npu_model.py,sha256=YW02GeVz-9ZGqxAeSz0AOvciS-17bo9eK5ZOBrICwSQ,39508
101
101
  ipex_llm/transformers/patches.py,sha256=halPWm__ORh2fRFSIFPiCNg3LQBfrRkTPtmtRpBJCZQ,1286
102
102
  ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
103
103
  ipex_llm/transformers/qlora.py,sha256=jtPGsvWFjbTUGzDBCdfftnCis_0nJQNRpACSwXUbbGU,14943
@@ -187,13 +187,14 @@ ipex_llm/transformers/npu_models/chatglm.py,sha256=YzpGLZ7ORt6qkwW9mCwZ_xhOAI8uH
187
187
  ipex_llm/transformers/npu_models/chatglm4.py,sha256=J4523DzhIzZxIvlf1V9qU4auzEGKvC80YqyxuCJygjw,9795
188
188
  ipex_llm/transformers/npu_models/common.py,sha256=tTUJL7IxVrJSnXle6nla35wTUrBf2sOEt7Ya1qyMezY,4853
189
189
  ipex_llm/transformers/npu_models/convert.py,sha256=FILSGnoltcR9FMrCkw0eOKh6p3sbBI5i0Ms8AsJc04E,25342
190
- ipex_llm/transformers/npu_models/convert_mp.py,sha256=t7160V4MmYpnex2NfuLTcqoc1meGEXdYi4AAPotfbzk,24518
190
+ ipex_llm/transformers/npu_models/convert_mp.py,sha256=ADMTnY3utRmCA9kGOCoiJ3NTI4via3TiX6i8duJ2TIE,24504
191
191
  ipex_llm/transformers/npu_models/glm_edge.py,sha256=VsJex-6530h4ZQk35TxRe1MnttAHT41omE8LV47LgBE,6723
192
192
  ipex_llm/transformers/npu_models/kv.py,sha256=2OSFO9Z6e4nGdVxXEM-Bq2qa_npYYbGmQt3lcCZxTlU,9201
193
- ipex_llm/transformers/npu_models/linear.py,sha256=G7W3tFXLG4FDzz-vc90_-YlEl1GxdoNz4XliqdlUb2U,10878
193
+ ipex_llm/transformers/npu_models/linear.py,sha256=RQxL42laJTm5hz11SNl0KlJX9xM6C_0OiN6soLShDM0,11284
194
194
  ipex_llm/transformers/npu_models/llama.py,sha256=WpRcw7sLnbZeR4XoM-a6XQ-BNYeQaHBEOX1r_O5C9uo,9857
195
195
  ipex_llm/transformers/npu_models/llama_mp.py,sha256=6fyWzbFozKPOfSPDBk2x_Rsejj2P0HOR-jn8SNUWy3s,49349
196
- ipex_llm/transformers/npu_models/lm_head.py,sha256=ZwH23jf21pNILTriAeF46TdymoVP3_OgQArsQX6lZzI,7823
196
+ ipex_llm/transformers/npu_models/lm_head.py,sha256=-yS0sM8905sQ2S9pwCZ6pX1vZa15aqOPGdoYEyhGbuQ,4825
197
+ ipex_llm/transformers/npu_models/lm_head_linear.py,sha256=7iPV0UqPqEYTP4-cUtJXxgjOzAObzEY9Utuu3nTIRqI,3683
197
198
  ipex_llm/transformers/npu_models/minicpm.py,sha256=PP3nrCoHMcQ3kHrnQ0gYglCKvL0Dh5MAQX3_Yq8_Ygc,10411
198
199
  ipex_llm/transformers/npu_models/minicpm_mp.py,sha256=0iCRWN9UIUQp5tSKyu-orpGCOxEjQrJ9b8ePnG0-ZV4,41921
199
200
  ipex_llm/transformers/npu_models/minicpmv_mp.py,sha256=m11WT6s_H5wkFtlz7aHMOL9b_CoL_G5MhoL5te4la_Q,20147
@@ -246,11 +247,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
246
247
  ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
247
248
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
248
249
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
249
- ipex_llm-2.2.0b20250104.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
250
- ipex_llm-2.2.0b20250104.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
251
- ipex_llm-2.2.0b20250104.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
252
- ipex_llm-2.2.0b20250104.dist-info/METADATA,sha256=b92eOZaiow_iGnnKdId6c_XNboJByYBAV-LWYRXTfuY,11374
253
- ipex_llm-2.2.0b20250104.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
254
- ipex_llm-2.2.0b20250104.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
255
- ipex_llm-2.2.0b20250104.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
256
- ipex_llm-2.2.0b20250104.dist-info/RECORD,,
250
+ ipex_llm-2.2.0b20250106.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
251
+ ipex_llm-2.2.0b20250106.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
252
+ ipex_llm-2.2.0b20250106.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
253
+ ipex_llm-2.2.0b20250106.dist-info/METADATA,sha256=RVDr0pwoPE6J0yPUZ9k7t6_jQn01wTwAXkU5ViqE-c8,11374
254
+ ipex_llm-2.2.0b20250106.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
255
+ ipex_llm-2.2.0b20250106.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
256
+ ipex_llm-2.2.0b20250106.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
257
+ ipex_llm-2.2.0b20250106.dist-info/RECORD,,