ipex-llm 2.3.0b20250603__py3-none-win_amd64.whl → 2.3.0b20250604__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ipex_llm/ggml/quantize.py +3 -0
- ipex_llm/libs/bloom-api.dll +0 -0
- ipex_llm/libs/bloom.dll +0 -0
- ipex_llm/libs/gptneox-api.dll +0 -0
- ipex_llm/libs/gptneox.dll +0 -0
- ipex_llm/libs/libbloom_avx.dll +0 -0
- ipex_llm/libs/libbloom_vnni.dll +0 -0
- ipex_llm/libs/libgptneox_avx.dll +0 -0
- ipex_llm/libs/libgptneox_vnni.dll +0 -0
- ipex_llm/libs/libllama_avx.dll +0 -0
- ipex_llm/libs/libllama_vnni.dll +0 -0
- ipex_llm/libs/libstarcoder_avx.dll +0 -0
- ipex_llm/libs/libstarcoder_vnni.dll +0 -0
- ipex_llm/libs/llama-api.dll +0 -0
- ipex_llm/libs/llama.dll +0 -0
- ipex_llm/libs/main-bloom.exe +0 -0
- ipex_llm/libs/main-gptneox.exe +0 -0
- ipex_llm/libs/main-llama.exe +0 -0
- ipex_llm/libs/main-starcoder.exe +0 -0
- ipex_llm/libs/pipeline.dll +0 -0
- ipex_llm/libs/quantize-bloom.exe +0 -0
- ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
- ipex_llm/libs/quantize-gptneox.exe +0 -0
- ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
- ipex_llm/libs/quantize-llama.exe +0 -0
- ipex_llm/libs/quantize-llama_vnni.exe +0 -0
- ipex_llm/libs/quantize-starcoder.exe +0 -0
- ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
- ipex_llm/libs/starcoder-api.dll +0 -0
- ipex_llm/libs/starcoder.dll +0 -0
- ipex_llm/transformers/low_bit_linear.py +81 -42
- {ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250604.dist-info}/METADATA +11 -11
- {ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250604.dist-info}/RECORD +39 -39
- {ipex_llm-2.3.0b20250603.data → ipex_llm-2.3.0b20250604.data}/scripts/ipex-llm-init.bat +0 -0
- {ipex_llm-2.3.0b20250603.data → ipex_llm-2.3.0b20250604.data}/scripts/llm-chat.ps1 +0 -0
- {ipex_llm-2.3.0b20250603.data → ipex_llm-2.3.0b20250604.data}/scripts/llm-cli.ps1 +0 -0
- {ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250604.dist-info}/WHEEL +0 -0
- {ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250604.dist-info}/entry_points.txt +0 -0
- {ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250604.dist-info}/top_level.txt +0 -0
ipex_llm/ggml/quantize.py
CHANGED
ipex_llm/libs/bloom-api.dll
CHANGED
Binary file
|
ipex_llm/libs/bloom.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox-api.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_vnni.dll
CHANGED
Binary file
|
ipex_llm/libs/libgptneox_avx.dll
CHANGED
Binary file
|
Binary file
|
ipex_llm/libs/libllama_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libllama_vnni.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/llama-api.dll
CHANGED
Binary file
|
ipex_llm/libs/llama.dll
CHANGED
Binary file
|
ipex_llm/libs/main-bloom.exe
CHANGED
Binary file
|
ipex_llm/libs/main-gptneox.exe
CHANGED
Binary file
|
ipex_llm/libs/main-llama.exe
CHANGED
Binary file
|
ipex_llm/libs/main-starcoder.exe
CHANGED
Binary file
|
ipex_llm/libs/pipeline.dll
CHANGED
Binary file
|
ipex_llm/libs/quantize-bloom.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/quantize-llama.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/starcoder-api.dll
CHANGED
Binary file
|
ipex_llm/libs/starcoder.dll
CHANGED
Binary file
|
@@ -86,6 +86,8 @@ SYM_INT4_RTN = ggml_tensor_qtype["sym_int4_rtn"]
|
|
86
86
|
SYM_INT8_RTN = ggml_tensor_qtype["sym_int8_rtn"]
|
87
87
|
ASYM_INT4_RTN = ggml_tensor_qtype["asym_int4_rtn"]
|
88
88
|
WOQ_INT4 = ggml_tensor_qtype["woq_int4"]
|
89
|
+
TORCH_FP8E5 = ggml_tensor_qtype["torch_fp8_e5m2"]
|
90
|
+
TORCH_FP8E4 = ggml_tensor_qtype["torch_fp8_e4m3"]
|
89
91
|
RTN_DTYPE = {
|
90
92
|
SYM_INT4_RTN: torch.uint8,
|
91
93
|
ASYM_INT4_RTN: torch.uint8,
|
@@ -106,39 +108,44 @@ def ggml_convert_qtype(tensor: torch.Tensor, qtype: int,
|
|
106
108
|
imatrix: torch.Tensor=None,
|
107
109
|
in_features: int=None,
|
108
110
|
enable_scale_search: bool=False):
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
"Input tensor must be float32")
|
114
|
-
src = tensor.data.data_ptr()
|
115
|
-
src = ctypes.cast(src, ctypes.POINTER(ctypes.c_float))
|
116
|
-
n = tensor.numel() # all elements
|
117
|
-
k = tensor.shape[-1]
|
118
|
-
invalidInputError(k % QK == 0,
|
119
|
-
f"Last dim of input tensor must be multiple of {QK}")
|
120
|
-
|
121
|
-
dst_size = (n // QK) * block_size_in_bytes
|
122
|
-
if qtype in [SYM_INT8_RTN, SYM_INT4_RTN, ASYM_INT4_RTN]:
|
123
|
-
dst_tensor = torch.empty(dst_size, dtype=RTN_DTYPE[qtype],
|
124
|
-
device=device)
|
125
|
-
dst_tensor = dst_tensor.reshape(tensor.shape[0], tensor.shape[-1] // QK)
|
126
|
-
if qtype == ASYM_INT4_RTN:
|
127
|
-
scale = torch.empty((n // k) * 2, dtype=torch.float32,
|
128
|
-
device=device)
|
129
|
-
else:
|
130
|
-
scale = torch.empty(n // k, dtype=torch.float32,
|
131
|
-
device=device)
|
132
|
-
elif qtype == NF4:
|
133
|
-
# Deepspeed zero3 requires unified dtype,
|
134
|
-
# thus here uses bfloat16 consistent to other layers
|
135
|
-
# dst_size above is computed based on uint8, and for bfloat16,
|
136
|
-
# buffer size should be half
|
137
|
-
dst_tensor = torch.empty(dst_size // 2, dtype=torch.bfloat16,
|
138
|
-
device=device)
|
111
|
+
if qtype in [TORCH_FP8E5, TORCH_FP8E4]:
|
112
|
+
fp8_dtype = torch.float8_e5m2 if qtype == TORCH_FP8E5 else torch.float8_e4m3fn
|
113
|
+
dst_tensor = torch.empty(tensor.shape, device=device, dtype=fp8_dtype)
|
114
|
+
scale = torch.zeros(1, device=device, dtype=torch.float32)
|
139
115
|
else:
|
140
|
-
|
141
|
-
|
116
|
+
QK = ggml.ggml_qk_size(qtype)
|
117
|
+
block_size_in_bytes = ggml.ggml_type_size(qtype)
|
118
|
+
|
119
|
+
invalidInputError(tensor.dtype == torch.float,
|
120
|
+
"Input tensor must be float32")
|
121
|
+
src = tensor.data.data_ptr()
|
122
|
+
src = ctypes.cast(src, ctypes.POINTER(ctypes.c_float))
|
123
|
+
n = tensor.numel() # all elements
|
124
|
+
k = tensor.shape[-1]
|
125
|
+
invalidInputError(k % QK == 0,
|
126
|
+
f"Last dim of input tensor must be multiple of {QK}")
|
127
|
+
|
128
|
+
dst_size = (n // QK) * block_size_in_bytes
|
129
|
+
if qtype in [SYM_INT8_RTN, SYM_INT4_RTN, ASYM_INT4_RTN]:
|
130
|
+
dst_tensor = torch.empty(dst_size, dtype=RTN_DTYPE[qtype],
|
131
|
+
device=device)
|
132
|
+
dst_tensor = dst_tensor.reshape(tensor.shape[0], tensor.shape[-1] // QK)
|
133
|
+
if qtype == ASYM_INT4_RTN:
|
134
|
+
scale = torch.empty((n // k) * 2, dtype=torch.float32,
|
135
|
+
device=device)
|
136
|
+
else:
|
137
|
+
scale = torch.empty(n // k, dtype=torch.float32,
|
138
|
+
device=device)
|
139
|
+
elif qtype == NF4:
|
140
|
+
# Deepspeed zero3 requires unified dtype,
|
141
|
+
# thus here uses bfloat16 consistent to other layers
|
142
|
+
# dst_size above is computed based on uint8, and for bfloat16,
|
143
|
+
# buffer size should be half
|
144
|
+
dst_tensor = torch.empty(dst_size // 2, dtype=torch.bfloat16,
|
145
|
+
device=device)
|
146
|
+
else:
|
147
|
+
dst_tensor = torch.empty(dst_size, dtype=torch.uint8,
|
148
|
+
device=device)
|
142
149
|
|
143
150
|
if not convert_shape_only and device != 'meta':
|
144
151
|
dst = ctypes.c_void_p(dst_tensor.data.data_ptr())
|
@@ -158,6 +165,17 @@ def ggml_convert_qtype(tensor: torch.Tensor, qtype: int,
|
|
158
165
|
enable_scale_search,
|
159
166
|
imatrix)
|
160
167
|
return dst_tensor, scale.type(torch.float16)
|
168
|
+
elif qtype in [TORCH_FP8E5, TORCH_FP8E4]:
|
169
|
+
import xe_linear
|
170
|
+
tensor_device = tensor.device
|
171
|
+
tensor_xpu = tensor.to("xpu")
|
172
|
+
dst_tensor = dst_tensor.to("xpu")
|
173
|
+
scale = scale.to("xpu")
|
174
|
+
|
175
|
+
xe_linear.dynamic_scaled_fp8_quant(dst_tensor, tensor_xpu, scale)
|
176
|
+
|
177
|
+
# scale = scale.to(tensor_device)
|
178
|
+
dst_tensor = dst_tensor.to(tensor_device)
|
161
179
|
else:
|
162
180
|
ggml.ggml_quantize_tensor(src, dst, qtype, n, k, hist, enable_scale_search)
|
163
181
|
else:
|
@@ -171,6 +189,8 @@ def ggml_convert_qtype(tensor: torch.Tensor, qtype: int,
|
|
171
189
|
hist, imatrix)
|
172
190
|
if qtype in [SYM_INT8_RTN, SYM_INT4_RTN, ASYM_INT4_RTN]:
|
173
191
|
return dst_tensor, scale.type(torch.float16)
|
192
|
+
elif qtype in [TORCH_FP8E5, TORCH_FP8E4]:
|
193
|
+
return dst_tensor, scale
|
174
194
|
else:
|
175
195
|
return dst_tensor
|
176
196
|
|
@@ -179,7 +199,7 @@ def ggml_q_format_convet_cpu2xpu(tensor: torch.Tensor, num_elem: int, qtype: int
|
|
179
199
|
if qtype == NF4:
|
180
200
|
invalidInputError(tensor.dtype == torch.bfloat16,
|
181
201
|
"NF4 Input tensor must be bfloat16")
|
182
|
-
|
202
|
+
elif qtype not in [TORCH_FP8E5, TORCH_FP8E4]:
|
183
203
|
invalidInputError(tensor.dtype == torch.uint8,
|
184
204
|
"Input tensor except NF4 must be uint8")
|
185
205
|
|
@@ -208,7 +228,7 @@ def ggml_q_format_convet_xpu2cpu(tensor: torch.Tensor, num_elem: int, qtype: int
|
|
208
228
|
if qtype == NF4:
|
209
229
|
invalidInputError(tensor.dtype == torch.bfloat16,
|
210
230
|
"NF4 Input tensor must be bfloat16")
|
211
|
-
|
231
|
+
elif qtype not in [TORCH_FP8E5, TORCH_FP8E4]:
|
212
232
|
invalidInputError(tensor.dtype == torch.uint8,
|
213
233
|
"Input tensor must be uint8")
|
214
234
|
|
@@ -319,7 +339,8 @@ class FP4Params(torch.nn.Parameter):
|
|
319
339
|
qtype=None,
|
320
340
|
imatrix=None,
|
321
341
|
in_features=None,
|
322
|
-
enable_scale_search=False
|
342
|
+
enable_scale_search=False,
|
343
|
+
torch_fp8_scale=None):
|
323
344
|
if data is None:
|
324
345
|
data = torch.empty(0)
|
325
346
|
|
@@ -332,6 +353,7 @@ class FP4Params(torch.nn.Parameter):
|
|
332
353
|
self.imatrix = imatrix
|
333
354
|
self.in_features = in_features
|
334
355
|
self.enable_scale_search = enable_scale_search
|
356
|
+
self.torch_fp8_scale = torch_fp8_scale
|
335
357
|
return self
|
336
358
|
|
337
359
|
def ggml_mse(self, w, ggml_qtype, device):
|
@@ -391,7 +413,11 @@ class FP4Params(torch.nn.Parameter):
|
|
391
413
|
imatrix=self.imatrix,
|
392
414
|
in_features=self.in_features,
|
393
415
|
enable_scale_search=self.enable_scale_search)
|
394
|
-
self.
|
416
|
+
if self.qtype in [TORCH_FP8E5, TORCH_FP8E4]:
|
417
|
+
self.data = w_quantized[0]
|
418
|
+
self.torch_fp8_scale = w_quantized[1]
|
419
|
+
else:
|
420
|
+
self.data = w_quantized
|
395
421
|
self.quantized = True
|
396
422
|
self._shape = w.shape
|
397
423
|
return self
|
@@ -414,6 +440,8 @@ class FP4Params(torch.nn.Parameter):
|
|
414
440
|
|
415
441
|
def to(self, *args, **kwargs):
|
416
442
|
device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
|
443
|
+
if self.qtype in [TORCH_FP8E5, TORCH_FP8E4]:
|
444
|
+
dtype = None
|
417
445
|
if (device is not None and device.type == "cpu" and self.data.device.type == "cpu"):
|
418
446
|
return self.quantize(device.type)
|
419
447
|
elif device is not None and device.type == "meta" and self.data.device.type == "meta":
|
@@ -424,6 +452,7 @@ class FP4Params(torch.nn.Parameter):
|
|
424
452
|
self.data = ggml_q_format_convet_cpu2xpu(self.data,
|
425
453
|
reduce(mul, self._shape, 1),
|
426
454
|
self.qtype)
|
455
|
+
fp8_scale = None if self.torch_fp8_scale is None else self.torch_fp8_scale.to(device)
|
427
456
|
new_param = FP4Params(super().to(device=device,
|
428
457
|
dtype=dtype,
|
429
458
|
non_blocking=non_blocking),
|
@@ -431,9 +460,11 @@ class FP4Params(torch.nn.Parameter):
|
|
431
460
|
quantized=self.quantized,
|
432
461
|
_shape=self._shape,
|
433
462
|
qtype=self.qtype,
|
434
|
-
enable_scale_search=self.enable_scale_search
|
463
|
+
enable_scale_search=self.enable_scale_search,
|
464
|
+
torch_fp8_scale=fp8_scale)
|
435
465
|
return new_param
|
436
466
|
elif (device is not None and device.type == "cpu" and self.data.device.type == "xpu"):
|
467
|
+
fp8_scale = None if self.torch_fp8_scale is None else self.torch_fp8_scale.to(device)
|
437
468
|
new_param = FP4Params(super().to(device=device,
|
438
469
|
dtype=dtype,
|
439
470
|
non_blocking=non_blocking),
|
@@ -441,7 +472,8 @@ class FP4Params(torch.nn.Parameter):
|
|
441
472
|
quantized=self.quantized,
|
442
473
|
_shape=self._shape,
|
443
474
|
qtype=self.qtype,
|
444
|
-
enable_scale_search=self.enable_scale_search
|
475
|
+
enable_scale_search=self.enable_scale_search,
|
476
|
+
torch_fp8_scale=fp8_scale)
|
445
477
|
ggml_xpu = new_param.data
|
446
478
|
new_param.data = ggml_q_format_convet_xpu2cpu(ggml_xpu,
|
447
479
|
reduce(mul, new_param._shape, 1),
|
@@ -614,6 +646,7 @@ class LowBitLinear(nn.Linear):
|
|
614
646
|
# Due to inconsistent training status in some models like Baichuan-7b-Chat,
|
615
647
|
# we should check both self.training and torch.is_inference_mode_enabled().
|
616
648
|
is_training = self.training and not torch.is_inference_mode_enabled()
|
649
|
+
|
617
650
|
if is_training:
|
618
651
|
# below logic is only for training
|
619
652
|
autocast_dtype = get_autocast_dtype(x.device.type)
|
@@ -643,6 +676,8 @@ class LowBitLinear(nn.Linear):
|
|
643
676
|
|
644
677
|
if self.weight.device.type == "xpu":
|
645
678
|
if is_training and x_2d.requires_grad:
|
679
|
+
invalidInputError(self.weight.qtype not in [TORCH_FP8E5, TORCH_FP8E4],
|
680
|
+
"TORCH_FP8 training is not supported.")
|
646
681
|
result = MatMulLowBit.apply(x_2d, self.weight, self.out_len)
|
647
682
|
else:
|
648
683
|
do_empty_cache = self.low_memory_mode and x_2d.shape[0] >= 1024
|
@@ -654,7 +689,11 @@ class LowBitLinear(nn.Linear):
|
|
654
689
|
else:
|
655
690
|
w = self.weight.data
|
656
691
|
|
657
|
-
if
|
692
|
+
if self.weight.qtype in [TORCH_FP8E5, TORCH_FP8E4]:
|
693
|
+
import xe_linear
|
694
|
+
result = xe_linear.run_linear_fp8(x_2d, w, self.bias,
|
695
|
+
self.weight.torch_fp8_scale)
|
696
|
+
elif use_batch_forward(x_2d, self.weight.qtype, self.out_len) and \
|
658
697
|
(x_2d.dtype == torch.half or self.conver_to_half):
|
659
698
|
import xe_batch
|
660
699
|
result = xe_batch.batch_forward(x_2d, w, self.qtype)
|
@@ -682,13 +721,13 @@ class LowBitLinear(nn.Linear):
|
|
682
721
|
else:
|
683
722
|
invalidInputError(False, "mp_group is not None, but no supported backend found")
|
684
723
|
|
685
|
-
if self.bias is not None:
|
724
|
+
if self.bias is not None and self.weight.qtype not in [TORCH_FP8E5, TORCH_FP8E4]:
|
686
725
|
result += self.bias
|
687
726
|
else:
|
688
727
|
# CPU logic
|
689
728
|
# todo may need to set a different number on different platforms
|
690
|
-
invalidInputError(self.qtype
|
691
|
-
|
729
|
+
invalidInputError(self.qtype not in [NF3, NF4, FP8E4, FP4, FP8E5,
|
730
|
+
TORCH_FP8E5, TORCH_FP8E4],
|
692
731
|
"NF3, NF4, FP4 and FP8 quantization are currently not"
|
693
732
|
" supported on CPU")
|
694
733
|
if self.training and x.requires_grad:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ipex-llm
|
3
|
-
Version: 2.3.
|
3
|
+
Version: 2.3.0b20250604
|
4
4
|
Summary: Large Language Model Develop Toolkit
|
5
5
|
Home-page: https://github.com/intel-analytics/ipex-llm
|
6
6
|
Author: BigDL Authors
|
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
|
|
27
27
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
|
28
28
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
|
29
29
|
Provides-Extra: cpp
|
30
|
-
Requires-Dist: bigdl-core-cpp ==2.7.
|
30
|
+
Requires-Dist: bigdl-core-cpp ==2.7.0b20250604 ; extra == 'cpp'
|
31
31
|
Requires-Dist: setuptools ; extra == 'cpp'
|
32
32
|
Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
|
33
33
|
Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
|
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
|
|
60
60
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
|
61
61
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
|
62
62
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
|
63
|
-
Requires-Dist: bigdl-core-npu ==2.7.
|
63
|
+
Requires-Dist: bigdl-core-npu ==2.7.0b20250604 ; (platform_system == "Windows") and extra == 'npu'
|
64
64
|
Provides-Extra: serving
|
65
65
|
Requires-Dist: py-cpuinfo ; extra == 'serving'
|
66
66
|
Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
|
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
|
|
80
80
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
|
81
81
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
|
82
82
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
|
83
|
-
Requires-Dist: bigdl-core-xe-21 ==2.7.
|
84
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.7.
|
85
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.7.
|
83
|
+
Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250604 ; extra == 'xpu'
|
84
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250604 ; extra == 'xpu'
|
85
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250604 ; extra == 'xpu'
|
86
86
|
Provides-Extra: xpu-2-1
|
87
87
|
Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
|
88
88
|
Requires-Dist: protobuf ; extra == 'xpu-2-1'
|
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
|
|
97
97
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
|
98
98
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
|
99
99
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
|
100
|
-
Requires-Dist: bigdl-core-xe-21 ==2.7.
|
101
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.7.
|
102
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.7.
|
100
|
+
Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250604 ; extra == 'xpu-2-1'
|
101
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250604 ; extra == 'xpu-2-1'
|
102
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250604 ; extra == 'xpu-2-1'
|
103
103
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
|
104
104
|
Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
105
105
|
Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
|
|
117
117
|
Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
|
118
118
|
Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
|
119
119
|
Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
|
120
|
-
Requires-Dist: bigdl-core-xe-all ==2.7.
|
120
|
+
Requires-Dist: bigdl-core-xe-all ==2.7.0b20250604 ; extra == 'xpu-2-6'
|
121
121
|
Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
|
122
122
|
Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
|
123
123
|
Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
|
@@ -132,7 +132,7 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-2-6-arl'
|
|
132
132
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-2-6-arl'
|
133
133
|
Requires-Dist: tabulate ; extra == 'xpu-2-6-arl'
|
134
134
|
Requires-Dist: setuptools ; extra == 'xpu-2-6-arl'
|
135
|
-
Requires-Dist: bigdl-core-xe-all ==2.7.
|
135
|
+
Requires-Dist: bigdl-core-xe-all ==2.7.0b20250604 ; extra == 'xpu-2-6-arl'
|
136
136
|
Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6-arl'
|
137
137
|
Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6-arl'
|
138
138
|
Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6-arl'
|
@@ -9,7 +9,7 @@ ipex_llm/cli/prompts/chat-with-llm.txt,sha256=PpSyd4FQQd-T7ptfXL9jZp7dgstevu1fsx
|
|
9
9
|
ipex_llm/ggml/__init__.py,sha256=FzapYBUiTdZf0LzlN9hfJI-HE1OTi_2dzaYELJ9Mw8s,1272
|
10
10
|
ipex_llm/ggml/convert.py,sha256=xfWH1E_hivbsxVo8h00STjH1Rlu9-dZQkCLLeIs1TWA,5286
|
11
11
|
ipex_llm/ggml/convert_model.py,sha256=t-tGK9w8ZRi9dlDLTutput3ZBKj3ji94WUJi2KG8hkA,5955
|
12
|
-
ipex_llm/ggml/quantize.py,sha256=
|
12
|
+
ipex_llm/ggml/quantize.py,sha256=3RQvkCvYz6rTRXAaXMK854JA6g1d8uq5JXZ7OZcj1eg,6490
|
13
13
|
ipex_llm/ggml/model/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
14
14
|
ipex_llm/ggml/model/bloom/__init__.py,sha256=291QHI19FMw7Z1oaKBAf2YJ0M51iYqWC4IT1ejI-OGg,900
|
15
15
|
ipex_llm/ggml/model/bloom/bloom.py,sha256=fUxgZd_Uc4RXaMC_naYdjekwNprM1TpURmQ8VbocShc,17975
|
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
|
|
41
41
|
ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
42
42
|
ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
|
43
43
|
ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
|
-
ipex_llm/libs/bloom-api.dll,sha256=
|
45
|
-
ipex_llm/libs/bloom.dll,sha256=
|
46
|
-
ipex_llm/libs/gptneox-api.dll,sha256=
|
47
|
-
ipex_llm/libs/gptneox.dll,sha256=
|
48
|
-
ipex_llm/libs/libbloom_avx.dll,sha256=
|
49
|
-
ipex_llm/libs/libbloom_vnni.dll,sha256=
|
50
|
-
ipex_llm/libs/libgptneox_avx.dll,sha256=
|
51
|
-
ipex_llm/libs/libgptneox_vnni.dll,sha256=
|
52
|
-
ipex_llm/libs/libllama_avx.dll,sha256=
|
53
|
-
ipex_llm/libs/libllama_vnni.dll,sha256=
|
54
|
-
ipex_llm/libs/libstarcoder_avx.dll,sha256=
|
55
|
-
ipex_llm/libs/libstarcoder_vnni.dll,sha256=
|
56
|
-
ipex_llm/libs/llama-api.dll,sha256=
|
57
|
-
ipex_llm/libs/llama.dll,sha256=
|
58
|
-
ipex_llm/libs/main-bloom.exe,sha256=
|
59
|
-
ipex_llm/libs/main-gptneox.exe,sha256=
|
60
|
-
ipex_llm/libs/main-llama.exe,sha256=
|
61
|
-
ipex_llm/libs/main-starcoder.exe,sha256=
|
62
|
-
ipex_llm/libs/pipeline.dll,sha256=
|
63
|
-
ipex_llm/libs/quantize-bloom.exe,sha256=
|
64
|
-
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=
|
65
|
-
ipex_llm/libs/quantize-gptneox.exe,sha256=
|
66
|
-
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=
|
67
|
-
ipex_llm/libs/quantize-llama.exe,sha256=
|
68
|
-
ipex_llm/libs/quantize-llama_vnni.exe,sha256=
|
69
|
-
ipex_llm/libs/quantize-starcoder.exe,sha256=
|
70
|
-
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=
|
71
|
-
ipex_llm/libs/starcoder-api.dll,sha256=
|
72
|
-
ipex_llm/libs/starcoder.dll,sha256=
|
44
|
+
ipex_llm/libs/bloom-api.dll,sha256=iVAUoeGnV_rF3-X-j-QGGvnWqafT07gZvwPYXI3tCgc,36352
|
45
|
+
ipex_llm/libs/bloom.dll,sha256=vbPiKtGYzofzMwNPLM1kEvp2xV5AzmGzK9S82zt-p_4,507904
|
46
|
+
ipex_llm/libs/gptneox-api.dll,sha256=rpWEftSzELHl6TPKo0DamFojlHV1M9fOzX0uvnJG4OA,24576
|
47
|
+
ipex_llm/libs/gptneox.dll,sha256=3nXMFGPJZXKSES76iQdsvYZekClS8k0H_BGMV1h7Pz8,568320
|
48
|
+
ipex_llm/libs/libbloom_avx.dll,sha256=HkhwLiNIP5TEuT1yLyX5Y1v57ys0bXQEH2RAl6aC4mA,536576
|
49
|
+
ipex_llm/libs/libbloom_vnni.dll,sha256=YCQR1-6NJ-amAiAl7VzP8DXM-IAO6q-IpGXpeAe-ZDI,508416
|
50
|
+
ipex_llm/libs/libgptneox_avx.dll,sha256=TOMLirth6zzBmwP8fIResFDditfSYinh_nVopN3ibNg,596992
|
51
|
+
ipex_llm/libs/libgptneox_vnni.dll,sha256=rP2yd6QDwIg5zCUysBTBjule502oQa9TgcoKpAHijhQ,568832
|
52
|
+
ipex_llm/libs/libllama_avx.dll,sha256=4F2DV-YP-2S41_bUh5BiOuZzNzGhLQ4kHvxh4twmYdE,591360
|
53
|
+
ipex_llm/libs/libllama_vnni.dll,sha256=yiA7vMw04Xw4UtJdmrEN8E0XT51-Drln0-KWI8D2BFQ,563200
|
54
|
+
ipex_llm/libs/libstarcoder_avx.dll,sha256=BFnibBHCD__0eVi1lpC-8h1MTyPwW-otxHMn7SGTZz8,627712
|
55
|
+
ipex_llm/libs/libstarcoder_vnni.dll,sha256=u73NhyE5wwyUdA1vwmx_FE95imLEHk__f5nv4AqwwAM,599552
|
56
|
+
ipex_llm/libs/llama-api.dll,sha256=jSAsxabG0ER6IRnw2V9ada-SVZUGLvM9qwpQyvmMjt8,25600
|
57
|
+
ipex_llm/libs/llama.dll,sha256=3Sr7SEiUda9-teaaewL0docXM2YgMGmDz5-cVIHFXS4,562688
|
58
|
+
ipex_llm/libs/main-bloom.exe,sha256=weAS6Dk_IgKtDwjN1nPfG3USxW78l3fo3HM6v8C1l7w,103424
|
59
|
+
ipex_llm/libs/main-gptneox.exe,sha256=3wO6Ly8VxHt4_YaBFPbtcAY6LoTzs1Go10jCfbV4Psc,98816
|
60
|
+
ipex_llm/libs/main-llama.exe,sha256=ui3i416Tip6JuQqDqOGyqZ63gTsHSY-ANQNQd4OnSZY,99840
|
61
|
+
ipex_llm/libs/main-starcoder.exe,sha256=gTS8pi9u8R6eooZcAXl5NBPiNoAfsK0N2Wvy-_2Htb8,157696
|
62
|
+
ipex_llm/libs/pipeline.dll,sha256=N4gt8uAhjSIcJ6TXth2bRBOmFt9zw69RoEfwHj4uC4g,73216
|
63
|
+
ipex_llm/libs/quantize-bloom.exe,sha256=9WDSjP-XPo1KY2mwfc-nabweyqYmiRqFlFV0qDf-u3s,126464
|
64
|
+
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=SIxH7u_OLOl8WGOcZnVQuU0grKk9B03z9QcUeZwIsXA,128000
|
65
|
+
ipex_llm/libs/quantize-gptneox.exe,sha256=ZADeUDJGungBqH4aZHFeJdQCw_5EI_U4K7eceL54D68,104448
|
66
|
+
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=HH7HXiEZH5snhShSBMVYuslFSf_TEbyYF9QlTDBqxGM,104960
|
67
|
+
ipex_llm/libs/quantize-llama.exe,sha256=YIWlACmLjiTojIRyfngP_xXVdrqSnWy45C9aIu3IP-M,110080
|
68
|
+
ipex_llm/libs/quantize-llama_vnni.exe,sha256=wsVIJkeuEMvYMQw3Ea0G8dnRQuoaNRN5ZgQtq-UMGbU,110592
|
69
|
+
ipex_llm/libs/quantize-starcoder.exe,sha256=wPr0Zqg92-98mMYf9ldflgItY6_CkrckhLDn7jMwEvQ,127488
|
70
|
+
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=vmbkQpOp9SYc_3FrvXPmr8bBo6eCumQqOIXnjJBc7Tg,128512
|
71
|
+
ipex_llm/libs/starcoder-api.dll,sha256=xYBjwTBWFqoTYE3bFxSINh76RT55VQ3ItSEtctn2bDI,21504
|
72
|
+
ipex_llm/libs/starcoder.dll,sha256=B2qwDw0L_cK2PM-xpXDMltIhkDK2MpXK9-3uB2Yh--8,599040
|
73
73
|
ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
74
74
|
ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
|
75
75
|
ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
|
@@ -94,7 +94,7 @@ ipex_llm/transformers/kv.py,sha256=src_HcVDKFwQ1V8hdTrFQw5RIwUewM9VOR47GVTPJG4,2
|
|
94
94
|
ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
|
95
95
|
ipex_llm/transformers/loader.py,sha256=c9qfJSC6-in-mkd-iKb1igk3nHWUYS3QtyH2cOazmKc,6825
|
96
96
|
ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
|
97
|
-
ipex_llm/transformers/low_bit_linear.py,sha256=
|
97
|
+
ipex_llm/transformers/low_bit_linear.py,sha256=03TMG4GZsgRPvchQC2h7eMU9IQ9XCyVcdh3Pvi7_Rew,41550
|
98
98
|
ipex_llm/transformers/model.py,sha256=tWTzKsCz8A1P5gYEeG9KZgpxQgbP9hQ-TWAdkebA6Jg,40886
|
99
99
|
ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
|
100
100
|
ipex_llm/transformers/npu_model.py,sha256=X8mdY6N9TYlxG41wmFloX44ZUjyitFzdKbhzO7TToFY,40309
|
@@ -258,11 +258,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=sOvwLx_Zj0jiRCGj9W3DgGTfcSU3hABYhgIQ
|
|
258
258
|
ipex_llm/vllm/xpu/engine/engine.py,sha256=XAprw7VifjfnR915TZOaKcxe3QCFsVBgxzS8qOdn1yg,14462
|
259
259
|
ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=VlmS56hBHBZTIZ5Jhvb4TZN-h28O7uMn33hX8NiJXKk,45719
|
260
260
|
ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
|
261
|
-
ipex_llm-2.3.
|
262
|
-
ipex_llm-2.3.
|
263
|
-
ipex_llm-2.3.
|
264
|
-
ipex_llm-2.3.
|
265
|
-
ipex_llm-2.3.
|
266
|
-
ipex_llm-2.3.
|
267
|
-
ipex_llm-2.3.
|
268
|
-
ipex_llm-2.3.
|
261
|
+
ipex_llm-2.3.0b20250604.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
|
262
|
+
ipex_llm-2.3.0b20250604.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
|
263
|
+
ipex_llm-2.3.0b20250604.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
|
264
|
+
ipex_llm-2.3.0b20250604.dist-info/METADATA,sha256=E9hbz-rGmyrCUOmWC7ZcofUNbDTTJRerkDK_aCPMLek,8865
|
265
|
+
ipex_llm-2.3.0b20250604.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
|
266
|
+
ipex_llm-2.3.0b20250604.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
|
267
|
+
ipex_llm-2.3.0b20250604.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
|
268
|
+
ipex_llm-2.3.0b20250604.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|