ipex-llm 2.2.0b20250104__py3-none-win_amd64.whl → 2.2.0b20250106__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ipex_llm/libs/bloom-api.dll +0 -0
- ipex_llm/libs/bloom.dll +0 -0
- ipex_llm/libs/gptneox-api.dll +0 -0
- ipex_llm/libs/gptneox.dll +0 -0
- ipex_llm/libs/libbloom_avx.dll +0 -0
- ipex_llm/libs/libbloom_vnni.dll +0 -0
- ipex_llm/libs/libgptneox_avx.dll +0 -0
- ipex_llm/libs/libgptneox_vnni.dll +0 -0
- ipex_llm/libs/libllama_avx.dll +0 -0
- ipex_llm/libs/libllama_vnni.dll +0 -0
- ipex_llm/libs/libstarcoder_avx.dll +0 -0
- ipex_llm/libs/libstarcoder_vnni.dll +0 -0
- ipex_llm/libs/llama-api.dll +0 -0
- ipex_llm/libs/llama.dll +0 -0
- ipex_llm/libs/main-bloom.exe +0 -0
- ipex_llm/libs/main-gptneox.exe +0 -0
- ipex_llm/libs/main-llama.exe +0 -0
- ipex_llm/libs/main-starcoder.exe +0 -0
- ipex_llm/libs/pipeline.dll +0 -0
- ipex_llm/libs/quantize-bloom.exe +0 -0
- ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
- ipex_llm/libs/quantize-gptneox.exe +0 -0
- ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
- ipex_llm/libs/quantize-llama.exe +0 -0
- ipex_llm/libs/quantize-llama_vnni.exe +0 -0
- ipex_llm/libs/quantize-starcoder.exe +0 -0
- ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
- ipex_llm/libs/starcoder-api.dll +0 -0
- ipex_llm/libs/starcoder.dll +0 -0
- ipex_llm/transformers/npu_model.py +80 -50
- ipex_llm/transformers/npu_models/convert_mp.py +1 -1
- ipex_llm/transformers/npu_models/linear.py +15 -3
- ipex_llm/transformers/npu_models/lm_head.py +1 -90
- ipex_llm/transformers/npu_models/lm_head_linear.py +106 -0
- {ipex_llm-2.2.0b20250104.dist-info → ipex_llm-2.2.0b20250106.dist-info}/METADATA +19 -19
- {ipex_llm-2.2.0b20250104.dist-info → ipex_llm-2.2.0b20250106.dist-info}/RECORD +42 -41
- {ipex_llm-2.2.0b20250104.data → ipex_llm-2.2.0b20250106.data}/scripts/ipex-llm-init.bat +0 -0
- {ipex_llm-2.2.0b20250104.data → ipex_llm-2.2.0b20250106.data}/scripts/llm-chat.ps1 +0 -0
- {ipex_llm-2.2.0b20250104.data → ipex_llm-2.2.0b20250106.data}/scripts/llm-cli.ps1 +0 -0
- {ipex_llm-2.2.0b20250104.dist-info → ipex_llm-2.2.0b20250106.dist-info}/WHEEL +0 -0
- {ipex_llm-2.2.0b20250104.dist-info → ipex_llm-2.2.0b20250106.dist-info}/entry_points.txt +0 -0
- {ipex_llm-2.2.0b20250104.dist-info → ipex_llm-2.2.0b20250106.dist-info}/top_level.txt +0 -0
ipex_llm/libs/bloom-api.dll
CHANGED
Binary file
|
ipex_llm/libs/bloom.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox-api.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_vnni.dll
CHANGED
Binary file
|
ipex_llm/libs/libgptneox_avx.dll
CHANGED
Binary file
|
Binary file
|
ipex_llm/libs/libllama_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libllama_vnni.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/llama-api.dll
CHANGED
Binary file
|
ipex_llm/libs/llama.dll
CHANGED
Binary file
|
ipex_llm/libs/main-bloom.exe
CHANGED
Binary file
|
ipex_llm/libs/main-gptneox.exe
CHANGED
Binary file
|
ipex_llm/libs/main-llama.exe
CHANGED
Binary file
|
ipex_llm/libs/main-starcoder.exe
CHANGED
Binary file
|
ipex_llm/libs/pipeline.dll
CHANGED
Binary file
|
ipex_llm/libs/quantize-bloom.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/quantize-llama.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/starcoder-api.dll
CHANGED
Binary file
|
ipex_llm/libs/starcoder.dll
CHANGED
Binary file
|
@@ -27,7 +27,7 @@ from transformers.configuration_utils import PretrainedConfig
|
|
27
27
|
|
28
28
|
from ipex_llm.utils.common.log4Error import invalidInputError
|
29
29
|
from ipex_llm.transformers.utils import logger, load_imatrix_data
|
30
|
-
from ipex_llm.transformers.npu_models.convert import optimize_llm
|
30
|
+
from ipex_llm.transformers.npu_models.convert import optimize_llm
|
31
31
|
|
32
32
|
|
33
33
|
def patch_flash_attn_import(filename: str) -> List[str]:
|
@@ -207,8 +207,6 @@ class _BaseAutoModelClass:
|
|
207
207
|
model = model.eval()
|
208
208
|
logger.info(f"Finish to convert model")
|
209
209
|
else:
|
210
|
-
from intel_npu_acceleration_library.compiler import create_npu_kernels
|
211
|
-
|
212
210
|
if optimize_model:
|
213
211
|
invalidInputError(
|
214
212
|
max_prompt_len < max_context_len,
|
@@ -232,11 +230,14 @@ class _BaseAutoModelClass:
|
|
232
230
|
"convert_model": convert_model,
|
233
231
|
"save_directory": save_directory,
|
234
232
|
"fuse_layers": fuse_layers,
|
235
|
-
"imatrix_data": imatrix_data
|
233
|
+
"imatrix_data": imatrix_data,
|
234
|
+
"skip_npu_logic": mock_device == "dummy",
|
236
235
|
}
|
236
|
+
# Dummy will skip npu related logic and save the quantized model
|
237
|
+
if mock_device == "dummy":
|
238
|
+
model.save_low_bit = types.MethodType(save_low_bit, model)
|
237
239
|
model = cls.optimize_npu_model(*args, **optimize_kwargs)
|
238
240
|
else:
|
239
|
-
from ipex_llm.transformers.npu_models.convert import optimize_llm
|
240
241
|
optimize_llm(model)
|
241
242
|
with torch.no_grad():
|
242
243
|
cls.load_convert(qtype, model, "cpu", modules_to_not_convert,
|
@@ -258,7 +259,6 @@ class _BaseAutoModelClass:
|
|
258
259
|
def optimize_npu_model(cls, *args, **kwargs):
|
259
260
|
|
260
261
|
from ipex_llm.transformers.npu_models.convert_mp import optimize_llm_pre, optimize_llm
|
261
|
-
from intel_npu_acceleration_library.compiler import create_npu_kernels
|
262
262
|
|
263
263
|
model = kwargs.pop("model")
|
264
264
|
qtype = kwargs.pop("qtype", "sym_int4_rtn")
|
@@ -275,6 +275,7 @@ class _BaseAutoModelClass:
|
|
275
275
|
save_directory = kwargs.pop('save_directory', None)
|
276
276
|
fuse_layers = kwargs.pop('fuse_layers', None)
|
277
277
|
imatrix_data = kwargs.pop('imatrix_data', None)
|
278
|
+
skip_npu_logic = kwargs.pop("skip_npu_logic", False)
|
278
279
|
invalidInputError(save_directory is not None,
|
279
280
|
"Please provide the path to save converted model "
|
280
281
|
"through `save_directory`.")
|
@@ -294,51 +295,58 @@ class _BaseAutoModelClass:
|
|
294
295
|
cls.load_convert(qtype, model, "cpu", modules_to_not_convert,
|
295
296
|
quantization_group_size, imatrix_data,
|
296
297
|
*args, **kwargs)
|
297
|
-
|
298
|
+
if not skip_npu_logic:
|
299
|
+
from intel_npu_acceleration_library.compiler import create_npu_kernels
|
300
|
+
create_npu_kernels(llm)
|
298
301
|
model = model.eval()
|
299
302
|
logger.info(f"Finish to convert model")
|
300
303
|
model.config.update({"bigdl_transformers_low_bit": qtype})
|
301
|
-
model.share_memory()
|
302
304
|
|
303
|
-
if
|
304
|
-
|
305
|
-
from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
|
306
|
-
optimize_llm_single_process(
|
307
|
-
llm,
|
308
|
-
kv_len=max_context_len,
|
309
|
-
max_prompt_len=max_prompt_len,
|
310
|
-
transpose_value_cache=transpose_value_cache,
|
311
|
-
group_size=quantization_group_size,
|
312
|
-
qtype=qtype,
|
313
|
-
save_directory=save_directory,
|
314
|
-
fuse_layers=fuse_layers,
|
315
|
-
has_llm=hasattr(model, "llm")
|
316
|
-
)
|
317
|
-
else:
|
318
|
-
optimize_llm(
|
319
|
-
llm,
|
320
|
-
max_context_len=max_context_len,
|
321
|
-
max_prompt_len=max_prompt_len,
|
322
|
-
inter_pp=inter_pp,
|
323
|
-
intra_pp=intra_pp,
|
324
|
-
transpose_value_cache=transpose_value_cache,
|
325
|
-
group_size=quantization_group_size
|
326
|
-
)
|
305
|
+
if skip_npu_logic:
|
306
|
+
model.save_low_bit(model_dir=save_directory)
|
327
307
|
else:
|
328
|
-
|
329
|
-
|
330
|
-
|
308
|
+
model.share_memory()
|
309
|
+
|
310
|
+
if not pipeline:
|
311
|
+
if model.config.model_type in ["qwen2", "llama", "minicpm"]:
|
312
|
+
from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
|
313
|
+
optimize_llm_single_process(
|
314
|
+
llm,
|
331
315
|
kv_len=max_context_len,
|
332
316
|
max_prompt_len=max_prompt_len,
|
333
317
|
transpose_value_cache=transpose_value_cache,
|
334
318
|
group_size=quantization_group_size,
|
335
319
|
qtype=qtype,
|
336
|
-
convert_model=convert_model,
|
337
320
|
save_directory=save_directory,
|
338
|
-
fuse_layers=fuse_layers
|
339
|
-
|
340
|
-
|
341
|
-
|
321
|
+
fuse_layers=fuse_layers,
|
322
|
+
has_llm=hasattr(model, "llm")
|
323
|
+
)
|
324
|
+
else:
|
325
|
+
optimize_llm(
|
326
|
+
llm,
|
327
|
+
max_context_len=max_context_len,
|
328
|
+
max_prompt_len=max_prompt_len,
|
329
|
+
inter_pp=inter_pp,
|
330
|
+
intra_pp=intra_pp,
|
331
|
+
transpose_value_cache=transpose_value_cache,
|
332
|
+
group_size=quantization_group_size
|
333
|
+
)
|
334
|
+
else:
|
335
|
+
from ipex_llm.transformers.npu_pipeline_model.convert_pipeline \
|
336
|
+
import convert_llm
|
337
|
+
convert_llm(llm,
|
338
|
+
kv_len=max_context_len,
|
339
|
+
max_prompt_len=max_prompt_len,
|
340
|
+
transpose_value_cache=transpose_value_cache,
|
341
|
+
group_size=quantization_group_size,
|
342
|
+
qtype=qtype,
|
343
|
+
convert_model=convert_model,
|
344
|
+
save_directory=save_directory,
|
345
|
+
fuse_layers=fuse_layers)
|
346
|
+
model.save_low_bit = types.MethodType(save_low_bit, model)
|
347
|
+
model.save_low_bit(save_directory)
|
348
|
+
logger.info(f"Converted model has already saved to {save_directory}.")
|
349
|
+
|
342
350
|
return model
|
343
351
|
|
344
352
|
@classmethod
|
@@ -379,6 +387,7 @@ class _BaseAutoModelClass:
|
|
379
387
|
intra_pp = kwargs.pop("intra_pp", None)
|
380
388
|
transpose_value_cache = kwargs.pop("transpose_value_cache", True)
|
381
389
|
modules_to_not_convert = kwargs.pop("modules_to_not_convert", [])
|
390
|
+
save_directory = kwargs.pop('save_directory', None)
|
382
391
|
|
383
392
|
from transformers.models.auto.configuration_auto import AutoConfig
|
384
393
|
from transformers.modeling_utils import no_init_weights, get_state_dict_dtype
|
@@ -650,16 +659,37 @@ class _BaseAutoModelClass:
|
|
650
659
|
param.requires_grad_(False)
|
651
660
|
|
652
661
|
if optimize_model and not pipeline:
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
662
|
+
if model.config.model_type in ["qwen2", "llama", "minicpm"]:
|
663
|
+
from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
|
664
|
+
if save_directory is None:
|
665
|
+
invalidInputError(False,
|
666
|
+
"Please specify the save_directory, the path of folder " +
|
667
|
+
"to save the compiled NPU model. If path not exists, " +
|
668
|
+
"the compiled NPU model will be saved there. " +
|
669
|
+
"Else, program will exit.")
|
670
|
+
|
671
|
+
optimize_llm_single_process(
|
672
|
+
llm,
|
673
|
+
kv_len=max_context_len,
|
674
|
+
max_prompt_len=max_prompt_len,
|
675
|
+
transpose_value_cache=transpose_value_cache,
|
676
|
+
group_size=quantization_group_size,
|
677
|
+
qtype=qtype,
|
678
|
+
save_directory=save_directory,
|
679
|
+
fuse_layers=None,
|
680
|
+
has_llm=hasattr(model, "llm")
|
681
|
+
)
|
682
|
+
else:
|
683
|
+
from ipex_llm.transformers.npu_models.convert_mp import optimize_llm
|
684
|
+
optimize_llm(
|
685
|
+
llm,
|
686
|
+
max_context_len=max_context_len,
|
687
|
+
max_prompt_len=max_prompt_len,
|
688
|
+
inter_pp=inter_pp,
|
689
|
+
intra_pp=intra_pp,
|
690
|
+
transpose_value_cache=transpose_value_cache,
|
691
|
+
group_size=quantization_group_size
|
692
|
+
)
|
663
693
|
elif optimize_model and pipeline:
|
664
694
|
from ipex_llm.transformers.npu_pipeline_model.convert_pipeline \
|
665
695
|
import convert_llm
|
@@ -18,7 +18,7 @@ import torch
|
|
18
18
|
import importlib
|
19
19
|
import numpy as np
|
20
20
|
from ipex_llm.transformers.low_bit_linear import LowBitLinear, FP4Params
|
21
|
-
from ipex_llm.transformers.npu_models.lm_head import
|
21
|
+
from ipex_llm.transformers.npu_models.lm_head import SlicedLMHead
|
22
22
|
from ipex_llm.utils.common.log4Error import invalidInputError
|
23
23
|
|
24
24
|
|
@@ -21,16 +21,25 @@
|
|
21
21
|
# SPDX-License-Identifier: Apache 2.0
|
22
22
|
#
|
23
23
|
|
24
|
-
|
25
|
-
from intel_npu_acceleration_library.dtypes import NPUDtype
|
24
|
+
|
26
25
|
import os
|
27
26
|
import torch
|
28
27
|
from torch.nn import Parameter
|
29
28
|
import uuid
|
30
29
|
import math
|
31
|
-
from intel_npu_acceleration_library.backend import run_matmul
|
32
30
|
from typing import Optional, Union
|
33
31
|
from ipex_llm.utils.common import invalidInputError
|
32
|
+
import importlib
|
33
|
+
|
34
|
+
|
35
|
+
def is_acclib_available():
|
36
|
+
return importlib.util.find_spec("intel_npu_acceleration_library") is not None
|
37
|
+
|
38
|
+
|
39
|
+
if is_acclib_available():
|
40
|
+
from intel_npu_acceleration_library.quantization import quantize_tensor, compress_to_i4
|
41
|
+
from intel_npu_acceleration_library.dtypes import NPUDtype
|
42
|
+
from intel_npu_acceleration_library.backend import run_matmul
|
34
43
|
|
35
44
|
|
36
45
|
class Linear(torch.nn.Module):
|
@@ -63,6 +72,7 @@ class Linear(torch.nn.Module):
|
|
63
72
|
if self.training:
|
64
73
|
out = self._mm(x, self.weight, None)
|
65
74
|
else:
|
75
|
+
from intel_npu_acceleration_library.backend import run_matmul
|
66
76
|
out = run_matmul(x, self.weight, None, self.op_id)
|
67
77
|
|
68
78
|
if self.bias is None:
|
@@ -105,6 +115,8 @@ class Linear(torch.nn.Module):
|
|
105
115
|
Returns:
|
106
116
|
Union[Linear, QuantizedLinear]: A NPU linear layer
|
107
117
|
"""
|
118
|
+
from intel_npu_acceleration_library.quantization import quantize_tensor, compress_to_i4
|
119
|
+
from intel_npu_acceleration_library.dtypes import NPUDtype
|
108
120
|
if dtype.is_floating_point:
|
109
121
|
if bias is None:
|
110
122
|
return Linear(weight.to(dtype), None)
|
@@ -16,96 +16,6 @@
|
|
16
16
|
import torch
|
17
17
|
from torch import nn
|
18
18
|
import numpy as np
|
19
|
-
from filelock import FileLock
|
20
|
-
from intel_npu_acceleration_library.backend import NNFactory
|
21
|
-
from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
|
22
|
-
|
23
|
-
|
24
|
-
class LMHeadLinear(NNFactory):
|
25
|
-
"""Quantized Linear class for sliced lm_head, computing a matrix matrix multiplication
|
26
|
-
with weights prefetching."""
|
27
|
-
|
28
|
-
def __init__(
|
29
|
-
self,
|
30
|
-
inC: int,
|
31
|
-
outC: int,
|
32
|
-
batch: int,
|
33
|
-
split_num: int = 2,
|
34
|
-
profile: bool = False,
|
35
|
-
device: str = "NPU",
|
36
|
-
dtype: np.dtype = np.int8,
|
37
|
-
use_split: bool = False,
|
38
|
-
group_size: int = 0,
|
39
|
-
asym: bool = False,
|
40
|
-
):
|
41
|
-
"""Initialize the LMHeadLinear class.
|
42
|
-
|
43
|
-
Args:
|
44
|
-
inC (int): input channels
|
45
|
-
outC (int): output channels
|
46
|
-
batch (int): batch
|
47
|
-
split_num (int): split in_features of lm_head to how many parts
|
48
|
-
profile (bool): Enable/Disable profiling. Defaults to False.
|
49
|
-
device (str): Target device, default to "NPU".
|
50
|
-
dtype (np.dtype): weights datatype. Defaults to np.int8.
|
51
|
-
|
52
|
-
"""
|
53
|
-
super().__init__(profile, device)
|
54
|
-
self.inC, self.outC = inC, outC
|
55
|
-
self.batch = batch
|
56
|
-
|
57
|
-
self.split_num = split_num
|
58
|
-
if use_split:
|
59
|
-
input = self.parameter((1, self.batch, self.inC))
|
60
|
-
res = self.dq_split_linear(input, self.split_num, self.outC, self.inC, wt_dtype=dtype,
|
61
|
-
scale_factor=(group_size == 0), asym=asym)
|
62
|
-
else:
|
63
|
-
input = self.parameter((self.batch, self.inC))
|
64
|
-
split_size = self.inC // split_num // 2 * 2
|
65
|
-
|
66
|
-
for i in range(self.split_num):
|
67
|
-
start_idx = i * split_size
|
68
|
-
end_idx = (i + 1) * split_size if i < self.split_num - 1 else self.inC
|
69
|
-
input_slice = self.slice(input, begin=[0, start_idx],
|
70
|
-
end=[self.batch, end_idx])
|
71
|
-
linear_slice = self.linear(input_slice, outC, split_size, bias=False,
|
72
|
-
wt_dtype=dtype, asym=asym)
|
73
|
-
if i == 0:
|
74
|
-
res = linear_slice
|
75
|
-
else:
|
76
|
-
res += linear_slice
|
77
|
-
|
78
|
-
print("start compiling lm_head")
|
79
|
-
self.compile()
|
80
|
-
print("end compiling lm_head")
|
81
|
-
|
82
|
-
def set_weights(self, op_id, weights):
|
83
|
-
self.set_weights_async(op_id, weights)
|
84
|
-
with FileLock(f"lmhead_run.lock"):
|
85
|
-
backend_lib.run(self._mm)
|
86
|
-
|
87
|
-
def set_weights_async(self, op_id, weights):
|
88
|
-
self.setWeights(1, op_id, *weights)
|
89
|
-
|
90
|
-
def run(
|
91
|
-
self, X: np.ndarray
|
92
|
-
) -> np.ndarray:
|
93
|
-
"""Run the layer: $X * (W * S)^T$ .
|
94
|
-
|
95
|
-
Args:
|
96
|
-
X (np.ndarray): activation
|
97
|
-
|
98
|
-
Raises:
|
99
|
-
RuntimeError: Input, weights or scale shape mismatch
|
100
|
-
|
101
|
-
Returns:
|
102
|
-
np.ndarray: result
|
103
|
-
"""
|
104
|
-
self.set_input_tensor(X, 0)
|
105
|
-
self.elapsed = backend_lib.run(self._mm)
|
106
|
-
if len(self.out) == 1:
|
107
|
-
return self.out[0]
|
108
|
-
return self.out
|
109
19
|
|
110
20
|
|
111
21
|
class SlicedLMHead(nn.Module):
|
@@ -160,6 +70,7 @@ class SlicedLMHead(nn.Module):
|
|
160
70
|
return self.lm_heads[0].weight.dtype
|
161
71
|
|
162
72
|
def get_fused_lm_head(self):
|
73
|
+
from ipex_llm.transformers.npu_models.lm_head_linear import LMHeadLinear
|
163
74
|
np_dtype = np.uint8 if self.get_weight_dtype() == torch.uint8 else np.int8
|
164
75
|
self.fused_lm_head = LMHeadLinear(self.inC, self.outC, 1, self.split_num,
|
165
76
|
False, "NPU", dtype=np_dtype, use_split=self.use_split,
|
@@ -0,0 +1,106 @@
|
|
1
|
+
#
|
2
|
+
# Copyright 2016 The BigDL Authors.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
import numpy as np
|
17
|
+
from filelock import FileLock
|
18
|
+
from intel_npu_acceleration_library.backend import NNFactory
|
19
|
+
from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
|
20
|
+
|
21
|
+
|
22
|
+
class LMHeadLinear(NNFactory):
|
23
|
+
"""Quantized Linear class for sliced lm_head, computing a matrix matrix multiplication
|
24
|
+
with weights prefetching."""
|
25
|
+
|
26
|
+
def __init__(
|
27
|
+
self,
|
28
|
+
inC: int,
|
29
|
+
outC: int,
|
30
|
+
batch: int,
|
31
|
+
split_num: int = 2,
|
32
|
+
profile: bool = False,
|
33
|
+
device: str = "NPU",
|
34
|
+
dtype: np.dtype = np.int8,
|
35
|
+
use_split: bool = False,
|
36
|
+
group_size: int = 0,
|
37
|
+
asym: bool = False,
|
38
|
+
):
|
39
|
+
"""Initialize the LMHeadLinear class.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
inC (int): input channels
|
43
|
+
outC (int): output channels
|
44
|
+
batch (int): batch
|
45
|
+
split_num (int): split in_features of lm_head to how many parts
|
46
|
+
profile (bool): Enable/Disable profiling. Defaults to False.
|
47
|
+
device (str): Target device, default to "NPU".
|
48
|
+
dtype (np.dtype): weights datatype. Defaults to np.int8.
|
49
|
+
|
50
|
+
"""
|
51
|
+
super().__init__(profile, device)
|
52
|
+
self.inC, self.outC = inC, outC
|
53
|
+
self.batch = batch
|
54
|
+
|
55
|
+
self.split_num = split_num
|
56
|
+
if use_split:
|
57
|
+
input = self.parameter((1, self.batch, self.inC))
|
58
|
+
res = self.dq_split_linear(input, self.split_num, self.outC, self.inC, wt_dtype=dtype,
|
59
|
+
scale_factor=(group_size == 0), asym=asym)
|
60
|
+
else:
|
61
|
+
input = self.parameter((self.batch, self.inC))
|
62
|
+
split_size = self.inC // split_num // 2 * 2
|
63
|
+
|
64
|
+
for i in range(self.split_num):
|
65
|
+
start_idx = i * split_size
|
66
|
+
end_idx = (i + 1) * split_size if i < self.split_num - 1 else self.inC
|
67
|
+
input_slice = self.slice(input, begin=[0, start_idx],
|
68
|
+
end=[self.batch, end_idx])
|
69
|
+
linear_slice = self.linear(input_slice, outC, split_size, bias=False,
|
70
|
+
wt_dtype=dtype, asym=asym)
|
71
|
+
if i == 0:
|
72
|
+
res = linear_slice
|
73
|
+
else:
|
74
|
+
res += linear_slice
|
75
|
+
|
76
|
+
print("start compiling lm_head")
|
77
|
+
self.compile()
|
78
|
+
print("end compiling lm_head")
|
79
|
+
|
80
|
+
def set_weights(self, op_id, weights):
|
81
|
+
self.set_weights_async(op_id, weights)
|
82
|
+
with FileLock(f"lmhead_run.lock"):
|
83
|
+
backend_lib.run(self._mm)
|
84
|
+
|
85
|
+
def set_weights_async(self, op_id, weights):
|
86
|
+
self.setWeights(1, op_id, *weights)
|
87
|
+
|
88
|
+
def run(
|
89
|
+
self, X: np.ndarray
|
90
|
+
) -> np.ndarray:
|
91
|
+
"""Run the layer: $X * (W * S)^T$ .
|
92
|
+
|
93
|
+
Args:
|
94
|
+
X (np.ndarray): activation
|
95
|
+
|
96
|
+
Raises:
|
97
|
+
RuntimeError: Input, weights or scale shape mismatch
|
98
|
+
|
99
|
+
Returns:
|
100
|
+
np.ndarray: result
|
101
|
+
"""
|
102
|
+
self.set_input_tensor(X, 0)
|
103
|
+
self.elapsed = backend_lib.run(self._mm)
|
104
|
+
if len(self.out) == 1:
|
105
|
+
return self.out[0]
|
106
|
+
return self.out
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ipex-llm
|
3
|
-
Version: 2.2.
|
3
|
+
Version: 2.2.0b20250106
|
4
4
|
Summary: Large Language Model Develop Toolkit
|
5
5
|
Home-page: https://github.com/intel-analytics/ipex-llm
|
6
6
|
Author: BigDL Authors
|
@@ -27,10 +27,10 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
|
|
27
27
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
|
28
28
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
|
29
29
|
Provides-Extra: cpp
|
30
|
-
Requires-Dist: bigdl-core-cpp ==2.6.
|
30
|
+
Requires-Dist: bigdl-core-cpp ==2.6.0b20250106 ; extra == 'cpp'
|
31
31
|
Requires-Dist: setuptools ; extra == 'cpp'
|
32
32
|
Provides-Extra: cpp-arl
|
33
|
-
Requires-Dist: bigdl-core-cpp ==2.6.
|
33
|
+
Requires-Dist: bigdl-core-cpp ==2.6.0b20250106 ; extra == 'cpp-arl'
|
34
34
|
Requires-Dist: setuptools ; extra == 'cpp-arl'
|
35
35
|
Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
|
36
36
|
Requires-Dist: dpcpp-cpp-rt ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
|
@@ -65,7 +65,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
|
|
65
65
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
|
66
66
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
|
67
67
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
|
68
|
-
Requires-Dist: bigdl-core-npu ==2.6.
|
68
|
+
Requires-Dist: bigdl-core-npu ==2.6.0b20250106 ; (platform_system == "Windows") and extra == 'npu'
|
69
69
|
Provides-Extra: serving
|
70
70
|
Requires-Dist: py-cpuinfo ; extra == 'serving'
|
71
71
|
Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
|
@@ -85,9 +85,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
|
|
85
85
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
|
86
86
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
|
87
87
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
|
88
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.
|
89
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.
|
90
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.
|
88
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106 ; extra == 'xpu'
|
89
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106 ; extra == 'xpu'
|
90
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106 ; extra == 'xpu'
|
91
91
|
Provides-Extra: xpu-2-1
|
92
92
|
Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
|
93
93
|
Requires-Dist: protobuf ; extra == 'xpu-2-1'
|
@@ -102,9 +102,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
|
|
102
102
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
|
103
103
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
|
104
104
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
|
105
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.
|
106
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.
|
107
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.
|
105
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106 ; extra == 'xpu-2-1'
|
106
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106 ; extra == 'xpu-2-1'
|
107
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106 ; extra == 'xpu-2-1'
|
108
108
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
|
109
109
|
Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
110
110
|
Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
@@ -119,9 +119,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
|
|
119
119
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
|
120
120
|
Requires-Dist: tabulate ; extra == 'xpu-arc'
|
121
121
|
Requires-Dist: setuptools ; extra == 'xpu-arc'
|
122
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
123
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
124
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
122
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106 ; extra == 'xpu-arc'
|
123
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106 ; extra == 'xpu-arc'
|
124
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106 ; extra == 'xpu-arc'
|
125
125
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
|
126
126
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
127
127
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
@@ -141,9 +141,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
|
|
141
141
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
|
142
142
|
Requires-Dist: tabulate ; extra == 'xpu-arl'
|
143
143
|
Requires-Dist: setuptools ; extra == 'xpu-arl'
|
144
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
145
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
146
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
144
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106 ; extra == 'xpu-arl'
|
145
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106 ; extra == 'xpu-arl'
|
146
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106 ; extra == 'xpu-arl'
|
147
147
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
|
148
148
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
149
149
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
@@ -163,9 +163,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
|
|
163
163
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
|
164
164
|
Requires-Dist: tabulate ; extra == 'xpu-lnl'
|
165
165
|
Requires-Dist: setuptools ; extra == 'xpu-lnl'
|
166
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
167
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
168
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
166
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106 ; extra == 'xpu-lnl'
|
167
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106 ; extra == 'xpu-lnl'
|
168
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106 ; extra == 'xpu-lnl'
|
169
169
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
|
170
170
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
171
171
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
|
|
41
41
|
ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
42
42
|
ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
|
43
43
|
ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
|
-
ipex_llm/libs/bloom-api.dll,sha256=
|
45
|
-
ipex_llm/libs/bloom.dll,sha256=
|
46
|
-
ipex_llm/libs/gptneox-api.dll,sha256=
|
47
|
-
ipex_llm/libs/gptneox.dll,sha256=
|
48
|
-
ipex_llm/libs/libbloom_avx.dll,sha256=
|
49
|
-
ipex_llm/libs/libbloom_vnni.dll,sha256=
|
50
|
-
ipex_llm/libs/libgptneox_avx.dll,sha256=
|
51
|
-
ipex_llm/libs/libgptneox_vnni.dll,sha256=
|
52
|
-
ipex_llm/libs/libllama_avx.dll,sha256=
|
53
|
-
ipex_llm/libs/libllama_vnni.dll,sha256=
|
54
|
-
ipex_llm/libs/libstarcoder_avx.dll,sha256=
|
55
|
-
ipex_llm/libs/libstarcoder_vnni.dll,sha256=
|
56
|
-
ipex_llm/libs/llama-api.dll,sha256
|
57
|
-
ipex_llm/libs/llama.dll,sha256=
|
58
|
-
ipex_llm/libs/main-bloom.exe,sha256=
|
59
|
-
ipex_llm/libs/main-gptneox.exe,sha256=
|
60
|
-
ipex_llm/libs/main-llama.exe,sha256=
|
61
|
-
ipex_llm/libs/main-starcoder.exe,sha256=
|
62
|
-
ipex_llm/libs/pipeline.dll,sha256=
|
63
|
-
ipex_llm/libs/quantize-bloom.exe,sha256=
|
64
|
-
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=
|
65
|
-
ipex_llm/libs/quantize-gptneox.exe,sha256=
|
66
|
-
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=
|
67
|
-
ipex_llm/libs/quantize-llama.exe,sha256=
|
68
|
-
ipex_llm/libs/quantize-llama_vnni.exe,sha256=
|
69
|
-
ipex_llm/libs/quantize-starcoder.exe,sha256=
|
70
|
-
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=
|
71
|
-
ipex_llm/libs/starcoder-api.dll,sha256=
|
72
|
-
ipex_llm/libs/starcoder.dll,sha256=
|
44
|
+
ipex_llm/libs/bloom-api.dll,sha256=oXbHMfWCF-cSjpdBIxpiOoJr36WPJC62FHhdSKzEivg,36352
|
45
|
+
ipex_llm/libs/bloom.dll,sha256=vsa_giqtsGQWXMVSOt1r-wsEWiRBRxT2skq0HwJqVAA,506880
|
46
|
+
ipex_llm/libs/gptneox-api.dll,sha256=FtjD0SyIKcyW8Q1cKp7rfiGkS8xyncjrINwH1LHZVcU,24576
|
47
|
+
ipex_llm/libs/gptneox.dll,sha256=qriFw1GXbkagSPydHmDC5YDmqrahnLiWFKBWZ2KvyCM,567296
|
48
|
+
ipex_llm/libs/libbloom_avx.dll,sha256=9KjLxHUKLesAykdLy-swkRz_Q86qTPzrJC0Y8w6cWV4,535040
|
49
|
+
ipex_llm/libs/libbloom_vnni.dll,sha256=2qDbiFOll48C5fMdWG5Mc9BkPxoeYM-W39wG8-joiyk,506880
|
50
|
+
ipex_llm/libs/libgptneox_avx.dll,sha256=uoLc9zGCCuUcIShyDWwb_MbVKhqlMh13laB_V50U_4k,595456
|
51
|
+
ipex_llm/libs/libgptneox_vnni.dll,sha256=swge9FsvtJIQ_VitBVNnh1BAf-8a-gCRWOBJeAaOumw,567808
|
52
|
+
ipex_llm/libs/libllama_avx.dll,sha256=GNl29Z8r50KAUzH2dOg5FlfM1Gs9Ab4ZNov7Pu4PRjE,589824
|
53
|
+
ipex_llm/libs/libllama_vnni.dll,sha256=uYiPjJ9OS5ZdxMrZ5wepafpiF25FjdpTRTeEXgNbdGc,561664
|
54
|
+
ipex_llm/libs/libstarcoder_avx.dll,sha256=Lbbm2O3e9niHmfppGLo1I1lnzdmQXXWyDYsoxPrdrps,626688
|
55
|
+
ipex_llm/libs/libstarcoder_vnni.dll,sha256=Sd_nS4gVyJrPV-eqPzerhx3oxjjI3TMXYtB_hZHt0Y0,598528
|
56
|
+
ipex_llm/libs/llama-api.dll,sha256=-tkpqdidqyCR0r3RyKk--KSIUtTz2r9YaMNaOGi_Y44,25600
|
57
|
+
ipex_llm/libs/llama.dll,sha256=n3ZdQ4_01ROY49M0sPDuSmBaAFCEGrj4ArwCJ_W40Rw,561152
|
58
|
+
ipex_llm/libs/main-bloom.exe,sha256=tLu3JxGrrG0vVIkt4DubOcfXXSc9om-8tDItbNfa0Yw,103424
|
59
|
+
ipex_llm/libs/main-gptneox.exe,sha256=ZuaLuNft2_30VWHlORQwx-zspXt7ozdUw41sUfJdip4,98816
|
60
|
+
ipex_llm/libs/main-llama.exe,sha256=MBta6VTF7VAElzaT0Gk_Hi7-zOyJG3Ji0m1o25_Dhjo,99840
|
61
|
+
ipex_llm/libs/main-starcoder.exe,sha256=WQhAYmXLoST8_zeL9xmmeWzI2ezdLXT1A2QZqXwPJoM,157696
|
62
|
+
ipex_llm/libs/pipeline.dll,sha256=vKhyNhxs7FTybiTXv9gnZNF6SXJ-HcsM5qAgDba-iYI,72704
|
63
|
+
ipex_llm/libs/quantize-bloom.exe,sha256=FpagD6uubPkfHSrN4Ejmq5E_EpK9IBqn8CcmXHAcUhY,126464
|
64
|
+
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=lEzEI0LWW0-T4di29PAbalSmPAcN3qNrxsTYrAR9nnE,127488
|
65
|
+
ipex_llm/libs/quantize-gptneox.exe,sha256=5rtTUGcGhiAhx0642jnDjVOdoaK2evGMaKo-P99rqpQ,104448
|
66
|
+
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=mcoc32UP7v2MFNO8b_fZLYk9YZ6FwNvFpzZQi4q7n7o,104960
|
67
|
+
ipex_llm/libs/quantize-llama.exe,sha256=AQLloOUZCtUTXUTbCH9JW7F-0h7I5JmKG3zrksLDx3U,109568
|
68
|
+
ipex_llm/libs/quantize-llama_vnni.exe,sha256=5KfWhklZ24b228xkRVEAbeY3UeG5-vHaZl5dIo4GXns,110592
|
69
|
+
ipex_llm/libs/quantize-starcoder.exe,sha256=X4PZ68IA8BK0ylRRt6WvVto8jmbxrJvT80R5xBUBFgM,127488
|
70
|
+
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=jSCIfgmCXt_7ScRIMW6IAagTcoIyEO8SUK0h8ZbBR_I,128512
|
71
|
+
ipex_llm/libs/starcoder-api.dll,sha256=AiG72eS3F3mjj0fqrfcRJsz4g5yGVIiEqrZHqKQR5jE,21504
|
72
|
+
ipex_llm/libs/starcoder.dll,sha256=U_ZoyR-d-vSVn8IYSdbsjkChd2w0Aq6HvlkZ5r_snQo,598016
|
73
73
|
ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
74
74
|
ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
|
75
75
|
ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
|
@@ -97,7 +97,7 @@ ipex_llm/transformers/lookup.py,sha256=c4ETIha6ZLbWvhcclSKRDdi5Ipuet4mfUnOkBa0E8
|
|
97
97
|
ipex_llm/transformers/low_bit_linear.py,sha256=dyyYyCqw0GK8hzaUGanrg-uIhU1HTLEEbvbxXMlm-80,41668
|
98
98
|
ipex_llm/transformers/model.py,sha256=KcRjkauGg48BYrUBoUZaVMpg7Piuz5JrfIpVZd3EIjs,41105
|
99
99
|
ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
|
100
|
-
ipex_llm/transformers/npu_model.py,sha256=
|
100
|
+
ipex_llm/transformers/npu_model.py,sha256=YW02GeVz-9ZGqxAeSz0AOvciS-17bo9eK5ZOBrICwSQ,39508
|
101
101
|
ipex_llm/transformers/patches.py,sha256=halPWm__ORh2fRFSIFPiCNg3LQBfrRkTPtmtRpBJCZQ,1286
|
102
102
|
ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
|
103
103
|
ipex_llm/transformers/qlora.py,sha256=jtPGsvWFjbTUGzDBCdfftnCis_0nJQNRpACSwXUbbGU,14943
|
@@ -187,13 +187,14 @@ ipex_llm/transformers/npu_models/chatglm.py,sha256=YzpGLZ7ORt6qkwW9mCwZ_xhOAI8uH
|
|
187
187
|
ipex_llm/transformers/npu_models/chatglm4.py,sha256=J4523DzhIzZxIvlf1V9qU4auzEGKvC80YqyxuCJygjw,9795
|
188
188
|
ipex_llm/transformers/npu_models/common.py,sha256=tTUJL7IxVrJSnXle6nla35wTUrBf2sOEt7Ya1qyMezY,4853
|
189
189
|
ipex_llm/transformers/npu_models/convert.py,sha256=FILSGnoltcR9FMrCkw0eOKh6p3sbBI5i0Ms8AsJc04E,25342
|
190
|
-
ipex_llm/transformers/npu_models/convert_mp.py,sha256=
|
190
|
+
ipex_llm/transformers/npu_models/convert_mp.py,sha256=ADMTnY3utRmCA9kGOCoiJ3NTI4via3TiX6i8duJ2TIE,24504
|
191
191
|
ipex_llm/transformers/npu_models/glm_edge.py,sha256=VsJex-6530h4ZQk35TxRe1MnttAHT41omE8LV47LgBE,6723
|
192
192
|
ipex_llm/transformers/npu_models/kv.py,sha256=2OSFO9Z6e4nGdVxXEM-Bq2qa_npYYbGmQt3lcCZxTlU,9201
|
193
|
-
ipex_llm/transformers/npu_models/linear.py,sha256=
|
193
|
+
ipex_llm/transformers/npu_models/linear.py,sha256=RQxL42laJTm5hz11SNl0KlJX9xM6C_0OiN6soLShDM0,11284
|
194
194
|
ipex_llm/transformers/npu_models/llama.py,sha256=WpRcw7sLnbZeR4XoM-a6XQ-BNYeQaHBEOX1r_O5C9uo,9857
|
195
195
|
ipex_llm/transformers/npu_models/llama_mp.py,sha256=6fyWzbFozKPOfSPDBk2x_Rsejj2P0HOR-jn8SNUWy3s,49349
|
196
|
-
ipex_llm/transformers/npu_models/lm_head.py,sha256
|
196
|
+
ipex_llm/transformers/npu_models/lm_head.py,sha256=-yS0sM8905sQ2S9pwCZ6pX1vZa15aqOPGdoYEyhGbuQ,4825
|
197
|
+
ipex_llm/transformers/npu_models/lm_head_linear.py,sha256=7iPV0UqPqEYTP4-cUtJXxgjOzAObzEY9Utuu3nTIRqI,3683
|
197
198
|
ipex_llm/transformers/npu_models/minicpm.py,sha256=PP3nrCoHMcQ3kHrnQ0gYglCKvL0Dh5MAQX3_Yq8_Ygc,10411
|
198
199
|
ipex_llm/transformers/npu_models/minicpm_mp.py,sha256=0iCRWN9UIUQp5tSKyu-orpGCOxEjQrJ9b8ePnG0-ZV4,41921
|
199
200
|
ipex_llm/transformers/npu_models/minicpmv_mp.py,sha256=m11WT6s_H5wkFtlz7aHMOL9b_CoL_G5MhoL5te4la_Q,20147
|
@@ -246,11 +247,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
|
|
246
247
|
ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
|
247
248
|
ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
|
248
249
|
ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
|
249
|
-
ipex_llm-2.2.
|
250
|
-
ipex_llm-2.2.
|
251
|
-
ipex_llm-2.2.
|
252
|
-
ipex_llm-2.2.
|
253
|
-
ipex_llm-2.2.
|
254
|
-
ipex_llm-2.2.
|
255
|
-
ipex_llm-2.2.
|
256
|
-
ipex_llm-2.2.
|
250
|
+
ipex_llm-2.2.0b20250106.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
|
251
|
+
ipex_llm-2.2.0b20250106.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
|
252
|
+
ipex_llm-2.2.0b20250106.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
|
253
|
+
ipex_llm-2.2.0b20250106.dist-info/METADATA,sha256=RVDr0pwoPE6J0yPUZ9k7t6_jQn01wTwAXkU5ViqE-c8,11374
|
254
|
+
ipex_llm-2.2.0b20250106.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
|
255
|
+
ipex_llm-2.2.0b20250106.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
|
256
|
+
ipex_llm-2.2.0b20250106.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
|
257
|
+
ipex_llm-2.2.0b20250106.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|