ipex-llm 2.2.0b20250105__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250106__py3-none-manylinux2010_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,7 +27,7 @@ from transformers.configuration_utils import PretrainedConfig
27
27
 
28
28
  from ipex_llm.utils.common.log4Error import invalidInputError
29
29
  from ipex_llm.transformers.utils import logger, load_imatrix_data
30
- from ipex_llm.transformers.npu_models.convert import optimize_llm, optimize_llm_post
30
+ from ipex_llm.transformers.npu_models.convert import optimize_llm
31
31
 
32
32
 
33
33
  def patch_flash_attn_import(filename: str) -> List[str]:
@@ -207,8 +207,6 @@ class _BaseAutoModelClass:
207
207
  model = model.eval()
208
208
  logger.info(f"Finish to convert model")
209
209
  else:
210
- from intel_npu_acceleration_library.compiler import create_npu_kernels
211
-
212
210
  if optimize_model:
213
211
  invalidInputError(
214
212
  max_prompt_len < max_context_len,
@@ -232,11 +230,14 @@ class _BaseAutoModelClass:
232
230
  "convert_model": convert_model,
233
231
  "save_directory": save_directory,
234
232
  "fuse_layers": fuse_layers,
235
- "imatrix_data": imatrix_data
233
+ "imatrix_data": imatrix_data,
234
+ "skip_npu_logic": mock_device == "dummy",
236
235
  }
236
+ # Dummy will skip npu related logic and save the quantized model
237
+ if mock_device == "dummy":
238
+ model.save_low_bit = types.MethodType(save_low_bit, model)
237
239
  model = cls.optimize_npu_model(*args, **optimize_kwargs)
238
240
  else:
239
- from ipex_llm.transformers.npu_models.convert import optimize_llm
240
241
  optimize_llm(model)
241
242
  with torch.no_grad():
242
243
  cls.load_convert(qtype, model, "cpu", modules_to_not_convert,
@@ -258,7 +259,6 @@ class _BaseAutoModelClass:
258
259
  def optimize_npu_model(cls, *args, **kwargs):
259
260
 
260
261
  from ipex_llm.transformers.npu_models.convert_mp import optimize_llm_pre, optimize_llm
261
- from intel_npu_acceleration_library.compiler import create_npu_kernels
262
262
 
263
263
  model = kwargs.pop("model")
264
264
  qtype = kwargs.pop("qtype", "sym_int4_rtn")
@@ -275,6 +275,7 @@ class _BaseAutoModelClass:
275
275
  save_directory = kwargs.pop('save_directory', None)
276
276
  fuse_layers = kwargs.pop('fuse_layers', None)
277
277
  imatrix_data = kwargs.pop('imatrix_data', None)
278
+ skip_npu_logic = kwargs.pop("skip_npu_logic", False)
278
279
  invalidInputError(save_directory is not None,
279
280
  "Please provide the path to save converted model "
280
281
  "through `save_directory`.")
@@ -294,51 +295,58 @@ class _BaseAutoModelClass:
294
295
  cls.load_convert(qtype, model, "cpu", modules_to_not_convert,
295
296
  quantization_group_size, imatrix_data,
296
297
  *args, **kwargs)
297
- create_npu_kernels(llm)
298
+ if not skip_npu_logic:
299
+ from intel_npu_acceleration_library.compiler import create_npu_kernels
300
+ create_npu_kernels(llm)
298
301
  model = model.eval()
299
302
  logger.info(f"Finish to convert model")
300
303
  model.config.update({"bigdl_transformers_low_bit": qtype})
301
- model.share_memory()
302
304
 
303
- if not pipeline:
304
- if model.config.model_type in ["qwen2", "llama", "minicpm"]:
305
- from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
306
- optimize_llm_single_process(
307
- llm,
308
- kv_len=max_context_len,
309
- max_prompt_len=max_prompt_len,
310
- transpose_value_cache=transpose_value_cache,
311
- group_size=quantization_group_size,
312
- qtype=qtype,
313
- save_directory=save_directory,
314
- fuse_layers=fuse_layers,
315
- has_llm=hasattr(model, "llm")
316
- )
317
- else:
318
- optimize_llm(
319
- llm,
320
- max_context_len=max_context_len,
321
- max_prompt_len=max_prompt_len,
322
- inter_pp=inter_pp,
323
- intra_pp=intra_pp,
324
- transpose_value_cache=transpose_value_cache,
325
- group_size=quantization_group_size
326
- )
305
+ if skip_npu_logic:
306
+ model.save_low_bit(model_dir=save_directory)
327
307
  else:
328
- from ipex_llm.transformers.npu_pipeline_model.convert_pipeline \
329
- import convert_llm
330
- convert_llm(llm,
308
+ model.share_memory()
309
+
310
+ if not pipeline:
311
+ if model.config.model_type in ["qwen2", "llama", "minicpm"]:
312
+ from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
313
+ optimize_llm_single_process(
314
+ llm,
331
315
  kv_len=max_context_len,
332
316
  max_prompt_len=max_prompt_len,
333
317
  transpose_value_cache=transpose_value_cache,
334
318
  group_size=quantization_group_size,
335
319
  qtype=qtype,
336
- convert_model=convert_model,
337
320
  save_directory=save_directory,
338
- fuse_layers=fuse_layers)
339
- model.save_low_bit = types.MethodType(save_low_bit, model)
340
- model.save_low_bit(save_directory)
341
- logger.info(f"Converted model has already saved to {save_directory}.")
321
+ fuse_layers=fuse_layers,
322
+ has_llm=hasattr(model, "llm")
323
+ )
324
+ else:
325
+ optimize_llm(
326
+ llm,
327
+ max_context_len=max_context_len,
328
+ max_prompt_len=max_prompt_len,
329
+ inter_pp=inter_pp,
330
+ intra_pp=intra_pp,
331
+ transpose_value_cache=transpose_value_cache,
332
+ group_size=quantization_group_size
333
+ )
334
+ else:
335
+ from ipex_llm.transformers.npu_pipeline_model.convert_pipeline \
336
+ import convert_llm
337
+ convert_llm(llm,
338
+ kv_len=max_context_len,
339
+ max_prompt_len=max_prompt_len,
340
+ transpose_value_cache=transpose_value_cache,
341
+ group_size=quantization_group_size,
342
+ qtype=qtype,
343
+ convert_model=convert_model,
344
+ save_directory=save_directory,
345
+ fuse_layers=fuse_layers)
346
+ model.save_low_bit = types.MethodType(save_low_bit, model)
347
+ model.save_low_bit(save_directory)
348
+ logger.info(f"Converted model has already saved to {save_directory}.")
349
+
342
350
  return model
343
351
 
344
352
  @classmethod
@@ -379,6 +387,7 @@ class _BaseAutoModelClass:
379
387
  intra_pp = kwargs.pop("intra_pp", None)
380
388
  transpose_value_cache = kwargs.pop("transpose_value_cache", True)
381
389
  modules_to_not_convert = kwargs.pop("modules_to_not_convert", [])
390
+ save_directory = kwargs.pop('save_directory', None)
382
391
 
383
392
  from transformers.models.auto.configuration_auto import AutoConfig
384
393
  from transformers.modeling_utils import no_init_weights, get_state_dict_dtype
@@ -650,16 +659,37 @@ class _BaseAutoModelClass:
650
659
  param.requires_grad_(False)
651
660
 
652
661
  if optimize_model and not pipeline:
653
- from ipex_llm.transformers.npu_models.convert_mp import optimize_llm
654
- optimize_llm(
655
- llm,
656
- max_context_len=max_context_len,
657
- max_prompt_len=max_prompt_len,
658
- inter_pp=inter_pp,
659
- intra_pp=intra_pp,
660
- transpose_value_cache=transpose_value_cache,
661
- group_size=quantization_group_size
662
- )
662
+ if model.config.model_type in ["qwen2", "llama", "minicpm"]:
663
+ from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
664
+ if save_directory is None:
665
+ invalidInputError(False,
666
+ "Please specify the save_directory, the path of folder " +
667
+ "to save the compiled NPU model. If path not exists, " +
668
+ "the compiled NPU model will be saved there. " +
669
+ "Else, program will exit.")
670
+
671
+ optimize_llm_single_process(
672
+ llm,
673
+ kv_len=max_context_len,
674
+ max_prompt_len=max_prompt_len,
675
+ transpose_value_cache=transpose_value_cache,
676
+ group_size=quantization_group_size,
677
+ qtype=qtype,
678
+ save_directory=save_directory,
679
+ fuse_layers=None,
680
+ has_llm=hasattr(model, "llm")
681
+ )
682
+ else:
683
+ from ipex_llm.transformers.npu_models.convert_mp import optimize_llm
684
+ optimize_llm(
685
+ llm,
686
+ max_context_len=max_context_len,
687
+ max_prompt_len=max_prompt_len,
688
+ inter_pp=inter_pp,
689
+ intra_pp=intra_pp,
690
+ transpose_value_cache=transpose_value_cache,
691
+ group_size=quantization_group_size
692
+ )
663
693
  elif optimize_model and pipeline:
664
694
  from ipex_llm.transformers.npu_pipeline_model.convert_pipeline \
665
695
  import convert_llm
@@ -18,7 +18,7 @@ import torch
18
18
  import importlib
19
19
  import numpy as np
20
20
  from ipex_llm.transformers.low_bit_linear import LowBitLinear, FP4Params
21
- from ipex_llm.transformers.npu_models.lm_head import LMHeadLinear, SlicedLMHead
21
+ from ipex_llm.transformers.npu_models.lm_head import SlicedLMHead
22
22
  from ipex_llm.utils.common.log4Error import invalidInputError
23
23
 
24
24
 
@@ -21,16 +21,25 @@
21
21
  # SPDX-License-Identifier: Apache 2.0
22
22
  #
23
23
 
24
- from intel_npu_acceleration_library.quantization import quantize_tensor, compress_to_i4
25
- from intel_npu_acceleration_library.dtypes import NPUDtype
24
+
26
25
  import os
27
26
  import torch
28
27
  from torch.nn import Parameter
29
28
  import uuid
30
29
  import math
31
- from intel_npu_acceleration_library.backend import run_matmul
32
30
  from typing import Optional, Union
33
31
  from ipex_llm.utils.common import invalidInputError
32
+ import importlib
33
+
34
+
35
+ def is_acclib_available():
36
+ return importlib.util.find_spec("intel_npu_acceleration_library") is not None
37
+
38
+
39
+ if is_acclib_available():
40
+ from intel_npu_acceleration_library.quantization import quantize_tensor, compress_to_i4
41
+ from intel_npu_acceleration_library.dtypes import NPUDtype
42
+ from intel_npu_acceleration_library.backend import run_matmul
34
43
 
35
44
 
36
45
  class Linear(torch.nn.Module):
@@ -63,6 +72,7 @@ class Linear(torch.nn.Module):
63
72
  if self.training:
64
73
  out = self._mm(x, self.weight, None)
65
74
  else:
75
+ from intel_npu_acceleration_library.backend import run_matmul
66
76
  out = run_matmul(x, self.weight, None, self.op_id)
67
77
 
68
78
  if self.bias is None:
@@ -105,6 +115,8 @@ class Linear(torch.nn.Module):
105
115
  Returns:
106
116
  Union[Linear, QuantizedLinear]: A NPU linear layer
107
117
  """
118
+ from intel_npu_acceleration_library.quantization import quantize_tensor, compress_to_i4
119
+ from intel_npu_acceleration_library.dtypes import NPUDtype
108
120
  if dtype.is_floating_point:
109
121
  if bias is None:
110
122
  return Linear(weight.to(dtype), None)
@@ -16,96 +16,6 @@
16
16
  import torch
17
17
  from torch import nn
18
18
  import numpy as np
19
- from filelock import FileLock
20
- from intel_npu_acceleration_library.backend import NNFactory
21
- from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
22
-
23
-
24
- class LMHeadLinear(NNFactory):
25
- """Quantized Linear class for sliced lm_head, computing a matrix matrix multiplication
26
- with weights prefetching."""
27
-
28
- def __init__(
29
- self,
30
- inC: int,
31
- outC: int,
32
- batch: int,
33
- split_num: int = 2,
34
- profile: bool = False,
35
- device: str = "NPU",
36
- dtype: np.dtype = np.int8,
37
- use_split: bool = False,
38
- group_size: int = 0,
39
- asym: bool = False,
40
- ):
41
- """Initialize the LMHeadLinear class.
42
-
43
- Args:
44
- inC (int): input channels
45
- outC (int): output channels
46
- batch (int): batch
47
- split_num (int): split in_features of lm_head to how many parts
48
- profile (bool): Enable/Disable profiling. Defaults to False.
49
- device (str): Target device, default to "NPU".
50
- dtype (np.dtype): weights datatype. Defaults to np.int8.
51
-
52
- """
53
- super().__init__(profile, device)
54
- self.inC, self.outC = inC, outC
55
- self.batch = batch
56
-
57
- self.split_num = split_num
58
- if use_split:
59
- input = self.parameter((1, self.batch, self.inC))
60
- res = self.dq_split_linear(input, self.split_num, self.outC, self.inC, wt_dtype=dtype,
61
- scale_factor=(group_size == 0), asym=asym)
62
- else:
63
- input = self.parameter((self.batch, self.inC))
64
- split_size = self.inC // split_num // 2 * 2
65
-
66
- for i in range(self.split_num):
67
- start_idx = i * split_size
68
- end_idx = (i + 1) * split_size if i < self.split_num - 1 else self.inC
69
- input_slice = self.slice(input, begin=[0, start_idx],
70
- end=[self.batch, end_idx])
71
- linear_slice = self.linear(input_slice, outC, split_size, bias=False,
72
- wt_dtype=dtype, asym=asym)
73
- if i == 0:
74
- res = linear_slice
75
- else:
76
- res += linear_slice
77
-
78
- print("start compiling lm_head")
79
- self.compile()
80
- print("end compiling lm_head")
81
-
82
- def set_weights(self, op_id, weights):
83
- self.set_weights_async(op_id, weights)
84
- with FileLock(f"lmhead_run.lock"):
85
- backend_lib.run(self._mm)
86
-
87
- def set_weights_async(self, op_id, weights):
88
- self.setWeights(1, op_id, *weights)
89
-
90
- def run(
91
- self, X: np.ndarray
92
- ) -> np.ndarray:
93
- """Run the layer: $X * (W * S)^T$ .
94
-
95
- Args:
96
- X (np.ndarray): activation
97
-
98
- Raises:
99
- RuntimeError: Input, weights or scale shape mismatch
100
-
101
- Returns:
102
- np.ndarray: result
103
- """
104
- self.set_input_tensor(X, 0)
105
- self.elapsed = backend_lib.run(self._mm)
106
- if len(self.out) == 1:
107
- return self.out[0]
108
- return self.out
109
19
 
110
20
 
111
21
  class SlicedLMHead(nn.Module):
@@ -160,6 +70,7 @@ class SlicedLMHead(nn.Module):
160
70
  return self.lm_heads[0].weight.dtype
161
71
 
162
72
  def get_fused_lm_head(self):
73
+ from ipex_llm.transformers.npu_models.lm_head_linear import LMHeadLinear
163
74
  np_dtype = np.uint8 if self.get_weight_dtype() == torch.uint8 else np.int8
164
75
  self.fused_lm_head = LMHeadLinear(self.inC, self.outC, 1, self.split_num,
165
76
  False, "NPU", dtype=np_dtype, use_split=self.use_split,
@@ -0,0 +1,106 @@
1
+ #
2
+ # Copyright 2016 The BigDL Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import numpy as np
17
+ from filelock import FileLock
18
+ from intel_npu_acceleration_library.backend import NNFactory
19
+ from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
20
+
21
+
22
+ class LMHeadLinear(NNFactory):
23
+ """Quantized Linear class for sliced lm_head, computing a matrix matrix multiplication
24
+ with weights prefetching."""
25
+
26
+ def __init__(
27
+ self,
28
+ inC: int,
29
+ outC: int,
30
+ batch: int,
31
+ split_num: int = 2,
32
+ profile: bool = False,
33
+ device: str = "NPU",
34
+ dtype: np.dtype = np.int8,
35
+ use_split: bool = False,
36
+ group_size: int = 0,
37
+ asym: bool = False,
38
+ ):
39
+ """Initialize the LMHeadLinear class.
40
+
41
+ Args:
42
+ inC (int): input channels
43
+ outC (int): output channels
44
+ batch (int): batch
45
+ split_num (int): split in_features of lm_head to how many parts
46
+ profile (bool): Enable/Disable profiling. Defaults to False.
47
+ device (str): Target device, default to "NPU".
48
+ dtype (np.dtype): weights datatype. Defaults to np.int8.
49
+
50
+ """
51
+ super().__init__(profile, device)
52
+ self.inC, self.outC = inC, outC
53
+ self.batch = batch
54
+
55
+ self.split_num = split_num
56
+ if use_split:
57
+ input = self.parameter((1, self.batch, self.inC))
58
+ res = self.dq_split_linear(input, self.split_num, self.outC, self.inC, wt_dtype=dtype,
59
+ scale_factor=(group_size == 0), asym=asym)
60
+ else:
61
+ input = self.parameter((self.batch, self.inC))
62
+ split_size = self.inC // split_num // 2 * 2
63
+
64
+ for i in range(self.split_num):
65
+ start_idx = i * split_size
66
+ end_idx = (i + 1) * split_size if i < self.split_num - 1 else self.inC
67
+ input_slice = self.slice(input, begin=[0, start_idx],
68
+ end=[self.batch, end_idx])
69
+ linear_slice = self.linear(input_slice, outC, split_size, bias=False,
70
+ wt_dtype=dtype, asym=asym)
71
+ if i == 0:
72
+ res = linear_slice
73
+ else:
74
+ res += linear_slice
75
+
76
+ print("start compiling lm_head")
77
+ self.compile()
78
+ print("end compiling lm_head")
79
+
80
+ def set_weights(self, op_id, weights):
81
+ self.set_weights_async(op_id, weights)
82
+ with FileLock(f"lmhead_run.lock"):
83
+ backend_lib.run(self._mm)
84
+
85
+ def set_weights_async(self, op_id, weights):
86
+ self.setWeights(1, op_id, *weights)
87
+
88
+ def run(
89
+ self, X: np.ndarray
90
+ ) -> np.ndarray:
91
+ """Run the layer: $X * (W * S)^T$ .
92
+
93
+ Args:
94
+ X (np.ndarray): activation
95
+
96
+ Raises:
97
+ RuntimeError: Input, weights or scale shape mismatch
98
+
99
+ Returns:
100
+ np.ndarray: result
101
+ """
102
+ self.set_input_tensor(X, 0)
103
+ self.elapsed = backend_lib.run(self._mm)
104
+ if len(self.out) == 1:
105
+ return self.out[0]
106
+ return self.out
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250105
3
+ Version: 2.2.0b20250106
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,10 +27,10 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250105 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250106 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Provides-Extra: cpp-arl
33
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250105 ; extra == 'cpp-arl'
33
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250106 ; extra == 'cpp-arl'
34
34
  Requires-Dist: setuptools ; extra == 'cpp-arl'
35
35
  Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
36
36
  Requires-Dist: dpcpp-cpp-rt ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
@@ -65,7 +65,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
65
65
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
66
66
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
67
67
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
68
- Requires-Dist: bigdl-core-npu ==2.6.0b20250105 ; (platform_system == "Windows") and extra == 'npu'
68
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250106 ; (platform_system == "Windows") and extra == 'npu'
69
69
  Provides-Extra: serving
70
70
  Requires-Dist: py-cpuinfo ; extra == 'serving'
71
71
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -85,9 +85,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
85
85
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
86
86
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
87
87
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
88
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250105 ; extra == 'xpu'
89
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250105 ; extra == 'xpu'
90
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250105 ; extra == 'xpu'
88
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106 ; extra == 'xpu'
89
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106 ; extra == 'xpu'
90
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106 ; extra == 'xpu'
91
91
  Provides-Extra: xpu-2-1
92
92
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
93
93
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -102,9 +102,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
102
102
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
103
103
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
104
104
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
105
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250105 ; extra == 'xpu-2-1'
106
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250105 ; extra == 'xpu-2-1'
107
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250105 ; extra == 'xpu-2-1'
105
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106 ; extra == 'xpu-2-1'
106
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106 ; extra == 'xpu-2-1'
107
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106 ; extra == 'xpu-2-1'
108
108
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
109
109
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
110
110
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -119,9 +119,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
119
119
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
120
120
  Requires-Dist: tabulate ; extra == 'xpu-arc'
121
121
  Requires-Dist: setuptools ; extra == 'xpu-arc'
122
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250105 ; extra == 'xpu-arc'
123
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250105 ; extra == 'xpu-arc'
124
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250105 ; extra == 'xpu-arc'
122
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106 ; extra == 'xpu-arc'
123
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106 ; extra == 'xpu-arc'
124
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106 ; extra == 'xpu-arc'
125
125
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
126
126
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
127
127
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -141,9 +141,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
141
141
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
142
142
  Requires-Dist: tabulate ; extra == 'xpu-arl'
143
143
  Requires-Dist: setuptools ; extra == 'xpu-arl'
144
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250105 ; extra == 'xpu-arl'
145
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250105 ; extra == 'xpu-arl'
146
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250105 ; extra == 'xpu-arl'
144
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106 ; extra == 'xpu-arl'
145
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106 ; extra == 'xpu-arl'
146
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106 ; extra == 'xpu-arl'
147
147
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
148
148
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
149
149
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -163,9 +163,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
163
163
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
164
164
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
165
165
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
166
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250105 ; extra == 'xpu-lnl'
167
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250105 ; extra == 'xpu-lnl'
168
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250105 ; extra == 'xpu-lnl'
166
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106 ; extra == 'xpu-lnl'
167
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106 ; extra == 'xpu-lnl'
168
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106 ; extra == 'xpu-lnl'
169
169
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
170
170
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
171
171
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -104,7 +104,7 @@ ipex_llm/transformers/lookup.py,sha256=c4ETIha6ZLbWvhcclSKRDdi5Ipuet4mfUnOkBa0E8
104
104
  ipex_llm/transformers/low_bit_linear.py,sha256=dyyYyCqw0GK8hzaUGanrg-uIhU1HTLEEbvbxXMlm-80,41668
105
105
  ipex_llm/transformers/model.py,sha256=KcRjkauGg48BYrUBoUZaVMpg7Piuz5JrfIpVZd3EIjs,41105
106
106
  ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
107
- ipex_llm/transformers/npu_model.py,sha256=a1mkyc6EqD7AJhqbYzokGhFubNpt5trIMuZT_dQKlTk,37861
107
+ ipex_llm/transformers/npu_model.py,sha256=YW02GeVz-9ZGqxAeSz0AOvciS-17bo9eK5ZOBrICwSQ,39508
108
108
  ipex_llm/transformers/patches.py,sha256=halPWm__ORh2fRFSIFPiCNg3LQBfrRkTPtmtRpBJCZQ,1286
109
109
  ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
110
110
  ipex_llm/transformers/qlora.py,sha256=jtPGsvWFjbTUGzDBCdfftnCis_0nJQNRpACSwXUbbGU,14943
@@ -194,13 +194,14 @@ ipex_llm/transformers/npu_models/chatglm.py,sha256=YzpGLZ7ORt6qkwW9mCwZ_xhOAI8uH
194
194
  ipex_llm/transformers/npu_models/chatglm4.py,sha256=J4523DzhIzZxIvlf1V9qU4auzEGKvC80YqyxuCJygjw,9795
195
195
  ipex_llm/transformers/npu_models/common.py,sha256=tTUJL7IxVrJSnXle6nla35wTUrBf2sOEt7Ya1qyMezY,4853
196
196
  ipex_llm/transformers/npu_models/convert.py,sha256=FILSGnoltcR9FMrCkw0eOKh6p3sbBI5i0Ms8AsJc04E,25342
197
- ipex_llm/transformers/npu_models/convert_mp.py,sha256=t7160V4MmYpnex2NfuLTcqoc1meGEXdYi4AAPotfbzk,24518
197
+ ipex_llm/transformers/npu_models/convert_mp.py,sha256=ADMTnY3utRmCA9kGOCoiJ3NTI4via3TiX6i8duJ2TIE,24504
198
198
  ipex_llm/transformers/npu_models/glm_edge.py,sha256=VsJex-6530h4ZQk35TxRe1MnttAHT41omE8LV47LgBE,6723
199
199
  ipex_llm/transformers/npu_models/kv.py,sha256=2OSFO9Z6e4nGdVxXEM-Bq2qa_npYYbGmQt3lcCZxTlU,9201
200
- ipex_llm/transformers/npu_models/linear.py,sha256=G7W3tFXLG4FDzz-vc90_-YlEl1GxdoNz4XliqdlUb2U,10878
200
+ ipex_llm/transformers/npu_models/linear.py,sha256=RQxL42laJTm5hz11SNl0KlJX9xM6C_0OiN6soLShDM0,11284
201
201
  ipex_llm/transformers/npu_models/llama.py,sha256=WpRcw7sLnbZeR4XoM-a6XQ-BNYeQaHBEOX1r_O5C9uo,9857
202
202
  ipex_llm/transformers/npu_models/llama_mp.py,sha256=6fyWzbFozKPOfSPDBk2x_Rsejj2P0HOR-jn8SNUWy3s,49349
203
- ipex_llm/transformers/npu_models/lm_head.py,sha256=ZwH23jf21pNILTriAeF46TdymoVP3_OgQArsQX6lZzI,7823
203
+ ipex_llm/transformers/npu_models/lm_head.py,sha256=-yS0sM8905sQ2S9pwCZ6pX1vZa15aqOPGdoYEyhGbuQ,4825
204
+ ipex_llm/transformers/npu_models/lm_head_linear.py,sha256=7iPV0UqPqEYTP4-cUtJXxgjOzAObzEY9Utuu3nTIRqI,3683
204
205
  ipex_llm/transformers/npu_models/minicpm.py,sha256=PP3nrCoHMcQ3kHrnQ0gYglCKvL0Dh5MAQX3_Yq8_Ygc,10411
205
206
  ipex_llm/transformers/npu_models/minicpm_mp.py,sha256=0iCRWN9UIUQp5tSKyu-orpGCOxEjQrJ9b8ePnG0-ZV4,41921
206
207
  ipex_llm/transformers/npu_models/minicpmv_mp.py,sha256=m11WT6s_H5wkFtlz7aHMOL9b_CoL_G5MhoL5te4la_Q,20147
@@ -253,11 +254,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
253
254
  ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
254
255
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
255
256
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
256
- ipex_llm-2.2.0b20250105.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
257
- ipex_llm-2.2.0b20250105.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
258
- ipex_llm-2.2.0b20250105.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
259
- ipex_llm-2.2.0b20250105.dist-info/METADATA,sha256=V2tvYG6plKx9JKN-Jhm0eImBpQzazdndXQ2s2OOvwAE,11374
260
- ipex_llm-2.2.0b20250105.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
261
- ipex_llm-2.2.0b20250105.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
262
- ipex_llm-2.2.0b20250105.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
263
- ipex_llm-2.2.0b20250105.dist-info/RECORD,,
257
+ ipex_llm-2.2.0b20250106.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
258
+ ipex_llm-2.2.0b20250106.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
259
+ ipex_llm-2.2.0b20250106.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
260
+ ipex_llm-2.2.0b20250106.dist-info/METADATA,sha256=RVDr0pwoPE6J0yPUZ9k7t6_jQn01wTwAXkU5ViqE-c8,11374
261
+ ipex_llm-2.2.0b20250106.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
262
+ ipex_llm-2.2.0b20250106.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
263
+ ipex_llm-2.2.0b20250106.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
264
+ ipex_llm-2.2.0b20250106.dist-info/RECORD,,