ipex-llm 2.2.0b20250120__py3-none-win_amd64.whl → 2.2.0b20250122__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. ipex_llm/libs/bloom-api.dll +0 -0
  2. ipex_llm/libs/bloom.dll +0 -0
  3. ipex_llm/libs/gptneox-api.dll +0 -0
  4. ipex_llm/libs/gptneox.dll +0 -0
  5. ipex_llm/libs/libbloom_avx.dll +0 -0
  6. ipex_llm/libs/libbloom_vnni.dll +0 -0
  7. ipex_llm/libs/libgptneox_avx.dll +0 -0
  8. ipex_llm/libs/libgptneox_vnni.dll +0 -0
  9. ipex_llm/libs/libllama_avx.dll +0 -0
  10. ipex_llm/libs/libllama_vnni.dll +0 -0
  11. ipex_llm/libs/libstarcoder_avx.dll +0 -0
  12. ipex_llm/libs/libstarcoder_vnni.dll +0 -0
  13. ipex_llm/libs/llama-api.dll +0 -0
  14. ipex_llm/libs/llama.dll +0 -0
  15. ipex_llm/libs/main-bloom.exe +0 -0
  16. ipex_llm/libs/main-gptneox.exe +0 -0
  17. ipex_llm/libs/main-llama.exe +0 -0
  18. ipex_llm/libs/main-starcoder.exe +0 -0
  19. ipex_llm/libs/pipeline.dll +0 -0
  20. ipex_llm/libs/quantize-bloom.exe +0 -0
  21. ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
  22. ipex_llm/libs/quantize-gptneox.exe +0 -0
  23. ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
  24. ipex_llm/libs/quantize-llama.exe +0 -0
  25. ipex_llm/libs/quantize-llama_vnni.exe +0 -0
  26. ipex_llm/libs/quantize-starcoder.exe +0 -0
  27. ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
  28. ipex_llm/libs/starcoder-api.dll +0 -0
  29. ipex_llm/libs/starcoder.dll +0 -0
  30. ipex_llm/transformers/convert.py +0 -1
  31. ipex_llm/transformers/low_bit_linear.py +8 -5
  32. ipex_llm/transformers/model.py +1 -3
  33. ipex_llm/transformers/patches.py +0 -11
  34. ipex_llm/transformers/utils.py +16 -10
  35. ipex_llm/vllm/cpu/engine/__init__.py +2 -1
  36. ipex_llm/vllm/cpu/engine/engine.py +159 -75
  37. ipex_llm/vllm/cpu/entrypoints/api_server.py +787 -0
  38. ipex_llm/vllm/cpu/entrypoints/openai/api_server.py +680 -95
  39. ipex_llm/vllm/cpu/entrypoints/openai/cli_args.py +277 -0
  40. ipex_llm/vllm/cpu/ipex_llm_v1_wrapper.py +23 -0
  41. ipex_llm/vllm/cpu/ipex_llm_wrapper.py +24 -0
  42. ipex_llm/vllm/cpu/model_convert.py +126 -233
  43. {ipex_llm-2.2.0b20250120.dist-info → ipex_llm-2.2.0b20250122.dist-info}/METADATA +20 -20
  44. {ipex_llm-2.2.0b20250120.dist-info → ipex_llm-2.2.0b20250122.dist-info}/RECORD +50 -46
  45. {ipex_llm-2.2.0b20250120.data → ipex_llm-2.2.0b20250122.data}/scripts/ipex-llm-init.bat +0 -0
  46. {ipex_llm-2.2.0b20250120.data → ipex_llm-2.2.0b20250122.data}/scripts/llm-chat.ps1 +0 -0
  47. {ipex_llm-2.2.0b20250120.data → ipex_llm-2.2.0b20250122.data}/scripts/llm-cli.ps1 +0 -0
  48. {ipex_llm-2.2.0b20250120.dist-info → ipex_llm-2.2.0b20250122.dist-info}/WHEEL +0 -0
  49. {ipex_llm-2.2.0b20250120.dist-info → ipex_llm-2.2.0b20250122.dist-info}/entry_points.txt +0 -0
  50. {ipex_llm-2.2.0b20250120.dist-info → ipex_llm-2.2.0b20250122.dist-info}/top_level.txt +0 -0
@@ -14,259 +14,152 @@
14
14
  # limitations under the License.
15
15
  #
16
16
  import torch
17
+ from typing import Optional, Union
18
+ from vllm.distributed import tensor_model_parallel_gather, tensor_model_parallel_all_gather
17
19
  from vllm.logger import init_logger
18
- from vllm.model_executor.model_loader import get_model
19
- from vllm.model_executor.model_loader.utils import get_model_architecture
20
- from vllm.model_executor.models.llama import LlamaMLP, LlamaAttention
21
- from vllm.model_executor.models.qwen2 import Qwen2MLP, Qwen2Attention
22
- from vllm.model_executor.models.qwen import QWenMLP, QWenAttention
20
+ from vllm.model_executor.models.llama import LlamaMLP, LlamaAttention, LlamaForCausalLM
21
+ from vllm.model_executor.models.qwen2 import Qwen2MLP, Qwen2Attention, Qwen2ForCausalLM
22
+ from vllm.model_executor.models.qwen import QWenMLP, QWenAttention, QWenLMHeadModel
23
23
  from vllm.model_executor.models.baichuan import BaiChuanMLP, BaiChuanAttention
24
- from vllm.model_executor.models.chatglm import GLMMLP, GLMAttention
25
- from vllm.attention import Attention, AttentionMetadata
26
- from vllm.lora.worker_manager import LRUCacheWorkerLoRAManager
24
+ from vllm.model_executor.models.baichuan import BaiChuanBaseForCausalLM
25
+ from vllm.model_executor.models.chatglm import GLMMLP, GLMAttention, ChatGLMForCausalLM
26
+ from vllm.model_executor.model_loader import get_model
27
+ from vllm.model_executor.layers.vocab_parallel_embedding import (
28
+ VocabParallelEmbedding)
29
+ from vllm.attention import AttentionMetadata
27
30
  from vllm.config import DeviceConfig
28
- from vllm.logger import init_logger
29
-
30
- from vllm._C import ops
31
- from ipex_llm.utils.common import invalidInputError
32
- from typing import List, Optional, Tuple, Union
33
-
34
- logger = init_logger(__name__)
35
-
36
-
37
- def _MLP_forward(self, x):
38
- gate_up = self.gate_up_proj(x)
39
- x = self.act_fn(gate_up)
40
- x = self.down_proj(x)
41
- return x
42
-
43
-
44
- def _Attention_forward(
45
- self,
46
- positions: torch.Tensor,
47
- hidden_states: torch.Tensor,
48
- kv_cache: torch.Tensor,
49
- attn_metadata: AttentionMetadata,
50
- ) -> torch.Tensor:
51
- qkv = self.qkv_proj(hidden_states).to(dtype=kv_cache.dtype)
52
- q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
53
- q, k = self.rotary_emb(positions, q, k)
54
- attn_output = self.attn(q, k, v, kv_cache, attn_metadata, self.kv_scale)
55
- output = self.o_proj(attn_output)
56
- return output
57
-
58
-
59
- def _QWen_Attention_forward(
60
- self,
61
- positions: torch.Tensor,
62
- hidden_states: torch.Tensor,
63
- kv_cache: Tuple[torch.Tensor, torch.Tensor],
64
- attn_metadata: AttentionMetadata,
65
- ) -> torch.Tensor:
66
- qkv = self.c_attn(hidden_states).to(dtype=kv_cache.dtype)
67
- q, k, v = qkv.chunk(chunks=3, dim=-1)
68
- q, k = self.rotary_emb(positions, q, k)
69
- attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
70
- output = self.c_proj(attn_output)
71
- return output
72
-
73
-
74
- def _QWen_MLP_forward(self, x):
75
- gate_up = self.gate_up_proj(x)
76
- x = self.act_fn(gate_up)
77
- x = self.c_proj(x)
78
- return x
79
-
80
-
81
- def _Qwen2_Attention_forward(
82
- self,
83
- positions: torch.Tensor,
84
- hidden_states: torch.Tensor,
85
- kv_cache: torch.Tensor,
86
- attn_metadata: AttentionMetadata,
87
- ) -> torch.Tensor:
88
- qkv = self.qkv_proj(hidden_states).to(dtype=kv_cache.dtype)
89
- q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
90
- q, k = self.rotary_emb(positions, q, k)
91
- attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
92
- output = self.o_proj(attn_output)
93
- return output
94
-
95
-
96
- def _ChatGLM_MLP_forward(self, hidden_states):
97
- # [s, b, 4hp]
98
- intermediate_parallel = self.dense_h_to_4h(hidden_states)
99
- intermediate_parallel = self.activation_func(intermediate_parallel)
100
- # [s, b, h]
101
- output = self.dense_4h_to_h(intermediate_parallel)
102
- return output
103
-
104
-
105
- def _Baichuan_Attention_forward(
106
- self,
107
- positions: torch.Tensor,
108
- hidden_states: torch.Tensor,
109
- kv_cache: Tuple[torch.Tensor, torch.Tensor],
110
- attn_metadata: AttentionMetadata,
111
- ) -> torch.Tensor:
112
- qkv = self.W_pack(hidden_states).to(dtype=kv_cache.dtype)
113
- q, k, v = qkv.chunk(chunks=3, dim=-1)
114
- if self.postion_embedding != "ALIBI":
115
- q, k = self.rotary_emb(positions, q, k)
116
- attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
117
- output = self.o_proj(attn_output)
118
- return output
31
+ from typing import Tuple
32
+ from ipex_llm.transformers.low_bit_linear import LowBitLinear
119
33
 
120
34
 
121
- def _ChatGLM_Attention_forward(
35
+ def _sample_get_logits(
122
36
  self,
123
37
  hidden_states: torch.Tensor,
124
- position_ids: torch.Tensor,
125
- kv_cache: Tuple[torch.Tensor, torch.Tensor],
126
- attn_metadata: AttentionMetadata,
38
+ lm_head: Union[VocabParallelEmbedding, LowBitLinear],
39
+ embedding_bias: Optional[torch.Tensor],
127
40
  ) -> torch.Tensor:
128
- qkv = self.query_key_value(hidden_states).to(dtype=kv_cache.dtype)
129
- q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
130
- q, k = self.rotary_emb(position_ids, q, k)
131
- context_layer = self.attn(
132
- q,
133
- k,
134
- v,
135
- kv_cache,
136
- attn_metadata,
137
- )
138
- attn_output = self.dense(context_layer)
139
- return attn_output
140
-
141
- _REPLACED_MLP_LAYERS = {
142
- LlamaMLP: _MLP_forward,
143
- Qwen2MLP: _MLP_forward,
144
- BaiChuanMLP: _MLP_forward,
145
- # QWenMLP: _QWen_MLP_forward,
146
- GLMMLP: _ChatGLM_MLP_forward
147
- }
148
-
149
- _REPLACED_ATTENTION_LAYERS = {
150
- LlamaAttention: _Attention_forward,
151
- Qwen2Attention: _Qwen2_Attention_forward,
152
- # QWenAttention: _QWen_Attention_forward,
153
- BaiChuanAttention: _Baichuan_Attention_forward,
154
- GLMAttention: _ChatGLM_Attention_forward
155
- }
156
-
157
- _IPEX_LLM_SUPPORTED_MODELS = [
158
- "LlamaForCausalLM",
159
- "BaichuanForCausalLM",
160
- "ChatGLMForCausalLM",
161
- "Qwen2ForCausalLM",
162
- ]
163
-
164
-
165
- def _model_mlp_convert():
166
- for module, replaced_func in _REPLACED_MLP_LAYERS.items():
167
- setattr(module, "forward", replaced_func)
41
+ # HINT: we do not support other types of quantization for now
42
+ # TODO: we may encounter tie-word-embedding problems
43
+ if isinstance(lm_head, VocabParallelEmbedding):
44
+ logits = lm_head.linear_method.apply(lm_head,
45
+ hidden_states,
46
+ bias=embedding_bias)
47
+ else:
48
+ logits = lm_head(hidden_states)
49
+ if embedding_bias is not None:
50
+ logits += embedding_bias
51
+ if self.use_gather:
52
+ logits = tensor_model_parallel_gather(logits)
53
+ else:
54
+ logits = tensor_model_parallel_all_gather(logits)
55
+ if logits is not None:
56
+ logits = logits[:, : self.org_vocab_size]
57
+ return logits
168
58
 
169
59
 
170
- def _model_attention_convert():
171
- for module, replaced_func in _REPLACED_ATTENTION_LAYERS.items():
172
- setattr(module, "forward", replaced_func)
60
+ def _model_sample_convert():
61
+ from vllm.model_executor.layers.logits_processor import LogitsProcessor
62
+ setattr(LogitsProcessor, "_get_logits", _sample_get_logits)
173
63
 
174
64
 
175
65
  def _ipex_llm_convert(load_in_low_bit):
176
- if load_in_low_bit is None:
177
- return
178
66
  from vllm.worker.cpu_model_runner import CPUModelRunner
179
- import vllm.model_executor.model_loader as model_loader
67
+ from ipex_llm.vllm.cpu.ipex_llm_wrapper import get_ipex_llm_wrapper
68
+ from ipex_llm.vllm.cpu.ipex_llm_v1_wrapper import get_ipex_llm_v1_wrapper
69
+ import vllm.executor.ray_utils as ray_utils_v0
70
+ import vllm.v1.executor.ray_utils as ray_utils_v1
180
71
  setattr(CPUModelRunner, "load_model", get_load_function(load_in_low_bit))
181
-
182
- from vllm.model_executor.layers.rotary_embedding import RotaryEmbedding
183
- setattr(RotaryEmbedding, "forward", _ipex_llm_rotary_embedding_forward)
184
- from vllm.model_executor.layers.layernorm import RMSNorm
185
- setattr(RMSNorm, "forward", _ipex_llm_rmsnorm_forward)
186
-
187
-
188
- def _ipex_llm_rotary_embedding_forward(
189
- self,
190
- positions: torch.Tensor,
191
- query: torch.Tensor,
192
- key: torch.Tensor,
193
- offsets: Optional[torch.Tensor] = None,
194
- ) -> Tuple[torch.Tensor, torch.Tensor]:
195
- self.cos_sin_cache = self.cos_sin_cache.to(positions.device, dtype=query.dtype)
196
-
197
- # ops.rotary_embedding()/batched_rotary_embedding()
198
- # are in-place operations that update the query and key tensors.
199
- if offsets is not None:
200
- ops.batched_rotary_embedding(positions, query, key, self.head_size,
201
- self.cos_sin_cache,
202
- self.is_neox_style, self.rotary_dim,
203
- offsets)
204
- else:
205
- ops.rotary_embedding(positions, query, key, self.head_size,
206
- self.cos_sin_cache, self.is_neox_style)
207
- return query, key
208
-
209
-
210
- def _ipex_llm_rmsnorm_forward(
211
- self,
212
- x: torch.Tensor,
213
- residual: Optional[torch.Tensor] = None,
214
- ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
215
- x = x.to(dtype=self.weight.data.dtype)
216
- if residual is not None:
217
- residual = residual.to(dtype=self.weight.data.dtype)
218
- ops.fused_add_rms_norm(
219
- x,
220
- residual,
221
- self.weight.data,
222
- self.variance_epsilon,
223
- )
224
- return x, residual
225
- out = torch.empty_like(x)
226
- ops.rms_norm(
227
- out,
228
- x,
229
- self.weight.data,
230
- self.variance_epsilon,
231
- )
232
- return out
72
+ setattr(ray_utils_v0, "RayWorkerWrapper", get_ipex_llm_wrapper(load_in_low_bit))
73
+ setattr(ray_utils_v1, "RayWorkerWrapper", get_ipex_llm_v1_wrapper(load_in_low_bit))
233
74
 
234
75
 
235
76
  def get_load_function(low_bit):
236
77
  def _ipex_llm_load_model(self) -> None:
237
- model_class = get_model_architecture(self.model_config)[1]
238
- cur_model_list = ", ".join(_IPEX_LLM_SUPPORTED_MODELS)
239
- if low_bit != "bf16":
240
- invalidInputError(model_class in _IPEX_LLM_SUPPORTED_MODELS,
241
- f"Currently IPEX-LLM vLLM convert only support {cur_model_list}.")
242
- else:
243
- if model_class not in _IPEX_LLM_SUPPORTED_MODELS:
244
- logger.warning(
245
- f"Currently IPEX-LLM vLLM convert only support {cur_model_list}."
246
- )
247
- self.model = get_model(
248
- model_config=self.model_config,
249
- load_config=self.load_config,
250
- device_config=self.device_config,
251
- vision_language_config=self.vision_language_config,
252
- lora_config=self.lora_config,
253
- parallel_config=self.parallel_config,
254
- scheduler_config=self.scheduler_config)
255
- return
256
-
257
- # _model_mlp_convert()
258
- # _model_attention_convert()
259
-
78
+ _model_sample_convert()
79
+
80
+ # from vllm.utils import measure_device_memory
81
+ # from vllm.utils import DeviceMemoryProfiler
82
+ # with DeviceMemoryProfiler() as m:
83
+ from dataclasses import replace
84
+ new_device_config = DeviceConfig("cpu")
85
+ new_vllm_config = replace(self.vllm_config, device_config=new_device_config)
260
86
  self.model = get_model(
261
- model_config=self.model_config,
262
- load_config=self.load_config,
263
- device_config=self.device_config,
264
- vision_language_config=self.vision_language_config,
265
- lora_config=self.lora_config,
266
- parallel_config=self.parallel_config,
267
- scheduler_config=self.scheduler_config)
268
-
87
+ vllm_config=new_vllm_config
88
+ )
89
+ if "qwen" in self.vllm_config.model_config.model.lower() or \
90
+ "baichuan" in self.vllm_config.model_config.model.lower() or \
91
+ "codegeex4-all" in self.vllm_config.model_config.model.lower() or \
92
+ "chatglm" in self.vllm_config.model_config.model.lower():
93
+ self.model.apply(padding_mlp)
269
94
  from ipex_llm import optimize_model
270
- optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype)
95
+ import os
96
+ not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
97
+ if not_convert_last_mlp is not None:
98
+ # only use to avoid nan value in last mlp forward running glm4-9b-chat
99
+ modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
100
+ else:
101
+ modules = None
102
+ if "minicpm" in self.vllm_config.model_config.model.lower():
103
+ modules = ["vpm", "resampler"]
104
+ # only for minicpm_2_6
105
+ if "minicpm-v" in self.vllm_config.model_config.model.lower():
106
+ from ipex_llm.transformers.models.minicpmv import merge_qkv
107
+ self.model.vpm.apply(merge_qkv)
108
+ if "internvl2" in self.vllm_config.model_config.model.lower():
109
+ modules = ["vision_model", "mlp1"]
110
+
111
+ # print(self.vllm_config.model_config.dtype)
112
+ # print("---------------------------------------")
113
+ optimize_model(self.model, low_bit=low_bit, torch_dtype=self.vllm_config.model_config.dtype,
114
+ modules_to_not_convert=modules)
115
+ self.model = self.model.to(device=self.vllm_config.device_config.device,
116
+ dtype=self.vllm_config.model_config.dtype)
117
+ # print(self.model)
118
+ # self.model_memory_usage = m.consumed_memory
119
+ # logger = init_logger(__name__)
120
+ # logger.info("Loading model weights took %.4f GB",
121
+ # self.model_memory_usage / float(2**30))
271
122
 
272
123
  return _ipex_llm_load_model
124
+
125
+
126
+ def padding_mlp(module: torch.nn.Module):
127
+ mlp_gate_up_name = None
128
+ mlp_down_name = None
129
+ if isinstance(module, Qwen2MLP):
130
+ mlp_gate_up_name = "gate_up_proj"
131
+ mlp_down_name = "down_proj"
132
+ elif isinstance(module, GLMMLP):
133
+ mlp_gate_up_name = "dense_h_to_4h"
134
+ mlp_down_name = "dense_4h_to_h"
135
+ elif isinstance(module, BaiChuanMLP):
136
+ mlp_gate_up_name = "gate_up_proj"
137
+ mlp_down_name = "down_proj"
138
+ else:
139
+ return
140
+ hidden_size = getattr(module, mlp_down_name).output_size
141
+ # devide by rank
142
+ intermediate_size = getattr(module, mlp_down_name).input_size_per_partition
143
+ padding_size = 256
144
+ padding_intermediate_size = \
145
+ (intermediate_size + padding_size - 1) // padding_size * padding_size
146
+ if intermediate_size % padding_size == 0:
147
+ return
148
+ gate_up_weight = getattr(module, mlp_gate_up_name).weight.data
149
+ new_gate_up_weight = torch.zeros([padding_intermediate_size * 2, hidden_size],
150
+ dtype=gate_up_weight.dtype, device=gate_up_weight.device)
151
+ # merge_gate_up_weight
152
+ new_gate_up_weight[:intermediate_size, :] = gate_up_weight[:intermediate_size, :]
153
+ new_gate_up_weight[padding_intermediate_size:padding_intermediate_size+intermediate_size, :] = gate_up_weight[intermediate_size:, :] # noqa
154
+ getattr(module, mlp_gate_up_name).output_size_per_partition = padding_intermediate_size * 2
155
+ getattr(module, mlp_gate_up_name).output_size = padding_intermediate_size * 2
156
+ getattr(module, mlp_gate_up_name).weight = \
157
+ torch.nn.Parameter(new_gate_up_weight, requires_grad=False)
158
+
159
+ down_weight = getattr(module, mlp_down_name).weight.data
160
+ new_down_weight = torch.zeros([hidden_size, padding_intermediate_size],
161
+ dtype=down_weight.dtype, device=down_weight.device)
162
+ new_down_weight[:, :intermediate_size] = down_weight
163
+ getattr(module, mlp_down_name).input_size_per_partition = padding_intermediate_size
164
+ getattr(module, mlp_down_name).input_size = padding_intermediate_size
165
+ getattr(module, mlp_down_name).weight = torch.nn.Parameter(new_down_weight, requires_grad=False)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250120
3
+ Version: 2.2.0b20250122
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,10 +27,10 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250120 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250122 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Provides-Extra: cpp-arl
33
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250120 ; extra == 'cpp-arl'
33
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250122 ; extra == 'cpp-arl'
34
34
  Requires-Dist: setuptools ; extra == 'cpp-arl'
35
35
  Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
36
36
  Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
@@ -67,7 +67,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
67
67
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
68
68
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
69
69
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
70
- Requires-Dist: bigdl-core-npu ==2.6.0b20250120 ; (platform_system == "Windows") and extra == 'npu'
70
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250122 ; (platform_system == "Windows") and extra == 'npu'
71
71
  Provides-Extra: serving
72
72
  Requires-Dist: py-cpuinfo ; extra == 'serving'
73
73
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -87,9 +87,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
87
87
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
88
88
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
89
89
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
90
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250120 ; extra == 'xpu'
91
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250120 ; extra == 'xpu'
92
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250120 ; extra == 'xpu'
90
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250122 ; extra == 'xpu'
91
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250122 ; extra == 'xpu'
92
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250122 ; extra == 'xpu'
93
93
  Provides-Extra: xpu-2-1
94
94
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
95
95
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -104,9 +104,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
104
104
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
105
105
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
106
106
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
107
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250120 ; extra == 'xpu-2-1'
108
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250120 ; extra == 'xpu-2-1'
109
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250120 ; extra == 'xpu-2-1'
107
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250122 ; extra == 'xpu-2-1'
108
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250122 ; extra == 'xpu-2-1'
109
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250122 ; extra == 'xpu-2-1'
110
110
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
111
111
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
112
112
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -124,7 +124,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
124
124
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
125
125
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
126
126
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
127
- Requires-Dist: bigdl-core-xe-all ==2.6.0b20250120 ; extra == 'xpu-2-6'
127
+ Requires-Dist: bigdl-core-xe-all ==2.6.0b20250122 ; extra == 'xpu-2-6'
128
128
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
129
129
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
130
130
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -140,9 +140,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
140
140
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
141
141
  Requires-Dist: tabulate ; extra == 'xpu-arc'
142
142
  Requires-Dist: setuptools ; extra == 'xpu-arc'
143
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250120 ; extra == 'xpu-arc'
144
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250120 ; extra == 'xpu-arc'
145
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250120 ; extra == 'xpu-arc'
143
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250122 ; extra == 'xpu-arc'
144
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250122 ; extra == 'xpu-arc'
145
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250122 ; extra == 'xpu-arc'
146
146
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
147
147
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
148
148
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -163,9 +163,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
163
163
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
164
164
  Requires-Dist: tabulate ; extra == 'xpu-arl'
165
165
  Requires-Dist: setuptools ; extra == 'xpu-arl'
166
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250120 ; extra == 'xpu-arl'
167
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250120 ; extra == 'xpu-arl'
168
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250120 ; extra == 'xpu-arl'
166
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250122 ; extra == 'xpu-arl'
167
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250122 ; extra == 'xpu-arl'
168
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250122 ; extra == 'xpu-arl'
169
169
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
170
170
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
171
171
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -186,9 +186,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
186
186
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
187
187
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
188
188
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
189
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250120 ; extra == 'xpu-lnl'
190
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250120 ; extra == 'xpu-lnl'
191
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250120 ; extra == 'xpu-lnl'
189
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250122 ; extra == 'xpu-lnl'
190
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250122 ; extra == 'xpu-lnl'
191
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250122 ; extra == 'xpu-lnl'
192
192
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
193
193
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
194
194
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
41
41
  ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
42
42
  ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
43
43
  ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- ipex_llm/libs/bloom-api.dll,sha256=ElOM48DXunT7S4guvSaQH8xQ_JL7JaXnbvoUF5BfOtw,36352
45
- ipex_llm/libs/bloom.dll,sha256=TtYxuRMUc7ZsN-cP1AlKJBNclQ0wZduO1kUeSgcOAqo,507904
46
- ipex_llm/libs/gptneox-api.dll,sha256=A3LEWtq5-jYIZdKBVoktVoIgxnuCyFQCdckhfbypb2k,24576
47
- ipex_llm/libs/gptneox.dll,sha256=qqqL9d-FIzS13rix1cLivsymaNv29WMu7xTUHLZttRg,568320
48
- ipex_llm/libs/libbloom_avx.dll,sha256=fuBy9eI2euJLyHcMa5J8EOXa2_YwY3oko6DTEOD0zBE,536576
49
- ipex_llm/libs/libbloom_vnni.dll,sha256=kKbqwJVSOg8Hitn4GBYZSpEogY0f9ytXRi63McBrQuw,508416
50
- ipex_llm/libs/libgptneox_avx.dll,sha256=DVEf_qTfo9H8qc75SioP-UsgLbcB5FM52oq1EY3T6tg,596992
51
- ipex_llm/libs/libgptneox_vnni.dll,sha256=w1fZIx8HpodE1kCoTKwG_FOtJ4_iwSTvP81ER7g5Coo,568832
52
- ipex_llm/libs/libllama_avx.dll,sha256=lDJZiFY6oeGE4NsOJnH9j0JoicC2NFS3sJ1_B8I0-Qk,591360
53
- ipex_llm/libs/libllama_vnni.dll,sha256=Q0NqK_4ox6L5CtJw-_NDL3Bcq8d2wJxUgJF5pOjAx1w,563200
54
- ipex_llm/libs/libstarcoder_avx.dll,sha256=xtEKhbGxU3ve8CR6nCrA0X6eZE7kHfXt-ROTc3u93DQ,627712
55
- ipex_llm/libs/libstarcoder_vnni.dll,sha256=e4NEMrTTYVHkH3AmafYyFjD7buJvCzeBRpmni6v_J5Q,599552
56
- ipex_llm/libs/llama-api.dll,sha256=aqbrwZGMjY4u48DELpr4Z1upxVuUqoGErtYkPyff8_Y,25600
57
- ipex_llm/libs/llama.dll,sha256=5jehsfn5c_y6tYe6569oSWWMYGANu51d_44Z7hzS4Lw,562688
58
- ipex_llm/libs/main-bloom.exe,sha256=cGjimmFdrQPWSVf1d_wWcY1grIdX4atvRAA7Dt0ug_4,103424
59
- ipex_llm/libs/main-gptneox.exe,sha256=vdL1ZEsEPyOMfqJWxgnP_X92Wnc-zZY-tqcJ8_vRcmE,98816
60
- ipex_llm/libs/main-llama.exe,sha256=mPutujcN_vxxfQzohMZgwl5y2455A4zoSdXT7Pf1JsU,99840
61
- ipex_llm/libs/main-starcoder.exe,sha256=lY3E8TCuD73ONf4QEj-r-hZdCPX79jJq6WXqjkY7hV0,157696
62
- ipex_llm/libs/pipeline.dll,sha256=LD7iwczX5rvo9FjTwcX66i1TwqFZkid3d3MUtIvfBFg,72704
63
- ipex_llm/libs/quantize-bloom.exe,sha256=d08GthA3axJ51Dt3aSLu5B_dMQAb2Y5_F8ZmEtK_XkA,126464
64
- ipex_llm/libs/quantize-bloom_vnni.exe,sha256=sk83Oyef-RwWIMe4FW7kI_Zw5RDjlYvbitsL0LuZU7w,128000
65
- ipex_llm/libs/quantize-gptneox.exe,sha256=UwUFOyUoN6OCEUjN2lHOpQNyxlnwtMj8k4YfnnuckuM,104448
66
- ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=DoU6RNdMQBdr0FuYWy6dN0cmlt5emgt-VwiSK3KVkRI,104960
67
- ipex_llm/libs/quantize-llama.exe,sha256=iNS_ihom3QFqNHJLL_GR02-krdihtgIy9TKM6CiO4cY,110080
68
- ipex_llm/libs/quantize-llama_vnni.exe,sha256=7Uc-1v2Vrn3A_WwHSOPxKMJqbiH2y5JwZmHslsDxgko,110592
69
- ipex_llm/libs/quantize-starcoder.exe,sha256=pJ25creC-Ry6RDZU1K-OKEvNj0GwSFHLOM8CpNiSi-o,127488
70
- ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=HjF-4S6_0R_yuGiu42RVyrt1-rytl5GjRp30ZvFumMs,128512
71
- ipex_llm/libs/starcoder-api.dll,sha256=bqmEpK1r-zNyNWlz0Ezc151jRt3Ad9QsRYqDrse0xko,21504
72
- ipex_llm/libs/starcoder.dll,sha256=_VzX6itysqS0taFtSn5X1ZmeCIj9aO1lozr-jatxYhI,599040
44
+ ipex_llm/libs/bloom-api.dll,sha256=quxNPcqDy7sOIEfeu_fPdJmVrkWuqGWTsO8bmQWW030,36352
45
+ ipex_llm/libs/bloom.dll,sha256=CAX0tJlqYwMnmlYwXjQ2M7WeUmvWsRid51pMItEc0l4,507904
46
+ ipex_llm/libs/gptneox-api.dll,sha256=ZRB3wAjxWce7RivyvgqGuwg_Fn4uJc1uAmBszhg63EI,24576
47
+ ipex_llm/libs/gptneox.dll,sha256=fNbCKX4W844dEnTEdoMPVtQb3A8IGi4MOg44EAV4cYI,568320
48
+ ipex_llm/libs/libbloom_avx.dll,sha256=UZ94a_6kmXZLAdbUyq0uo3KHMj3UoLntdKAlXDL73zU,536576
49
+ ipex_llm/libs/libbloom_vnni.dll,sha256=b1LdhjaBZfghPNjLdtiSMdNQqgj19DgOaqszo5dXZ-4,508416
50
+ ipex_llm/libs/libgptneox_avx.dll,sha256=RlX5bX5OV_oCuelT1mLdU5oZy1V8jtG_ewUjjrDGrgE,596992
51
+ ipex_llm/libs/libgptneox_vnni.dll,sha256=NLIdy93bX-FWbP-3gYxawMKMyDcem7zuC-ytzx7z4aI,568832
52
+ ipex_llm/libs/libllama_avx.dll,sha256=BwTPw65FSFWexwbAEyk8-LTGdsiTHIykdXMu5vrBENY,591360
53
+ ipex_llm/libs/libllama_vnni.dll,sha256=T8-4VsUbB7NJYjdbyw_ZDGYp71GYfKPllOKNX4L81I4,563200
54
+ ipex_llm/libs/libstarcoder_avx.dll,sha256=pEX0b5rCiVw-UBqOz8PSa04TXY6md94h1j8bCdj9sn8,627712
55
+ ipex_llm/libs/libstarcoder_vnni.dll,sha256=U_JlHGgtYdcPXdkiacm4dWBqouke4Mf8lKAHXW7UcFE,599552
56
+ ipex_llm/libs/llama-api.dll,sha256=zUqOq0weiUTPsbaUSiW6yuKzkp2fu1lYDUShzkZDKeg,25600
57
+ ipex_llm/libs/llama.dll,sha256=nWO8ekh8KduP0M5jV6ABmdSyNN_DCF5zOwMOiCDB_I0,562688
58
+ ipex_llm/libs/main-bloom.exe,sha256=Fn8MRfqGuw1zASWW0SEgbjBIMl-D1jLPTt9VQ6mc8Co,103424
59
+ ipex_llm/libs/main-gptneox.exe,sha256=7_vPfyJ6xbP3xBvf3jwpk7EqCs0jNTTMqG5xeOaDoSE,98816
60
+ ipex_llm/libs/main-llama.exe,sha256=RAkzWejQI2b_0RmYVum0QTxRvLq74V13SRMOqE2wetU,99840
61
+ ipex_llm/libs/main-starcoder.exe,sha256=FvpnlhBOLlkrw9bgpU9eDRnzk0AHWQluwq4u-Y6hwUw,157696
62
+ ipex_llm/libs/pipeline.dll,sha256=Y6JosovQjHe8fnBJiAPrf6Gd-5uR9larFZFJMHVP0vk,72704
63
+ ipex_llm/libs/quantize-bloom.exe,sha256=PXpJ-8-UgthckwMV-15Cj4pf3_DRh_PIkyVHPNJ40F4,126464
64
+ ipex_llm/libs/quantize-bloom_vnni.exe,sha256=5x9Oxz2cURavrKui0YCJ6CQTNodu0_h14o2TjpyDKOs,128000
65
+ ipex_llm/libs/quantize-gptneox.exe,sha256=9TawQ7WBCnJkDiTwq-RyukNofB_TxXAQSXu8JGiU-9A,104448
66
+ ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=WOtH_wUcm_0p6tlJ5ugilThl8zB9C9dpYtdYpxMxoag,104960
67
+ ipex_llm/libs/quantize-llama.exe,sha256=ztv6PtFIbOHgBaDSfmP10PrEAoLMHYqsGHLDeN0GwXg,110080
68
+ ipex_llm/libs/quantize-llama_vnni.exe,sha256=OERE5kHKLpHxyUa8aD8tNk_covf0x26ZsWbq9ubHbdE,110592
69
+ ipex_llm/libs/quantize-starcoder.exe,sha256=SK7-9t9Q_qRTSKlChEDTFcvQmdkEDIvuwSAl9IofnRA,127488
70
+ ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=A6q48FyFaZRiRSSgDdTO7Ep1vdi4vvsZHgoZVYTkyYE,128512
71
+ ipex_llm/libs/starcoder-api.dll,sha256=0TAn1ce4BRkQz9oIZ8nJGb3pW1GM_YNT-pE1Hd9njes,21504
72
+ ipex_llm/libs/starcoder.dll,sha256=PRvZN0qYiX8C1adihh-ZU2VgwCvMYIX04yzJgnXp88s,599040
73
73
  ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
74
74
  ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
75
75
  ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
@@ -87,25 +87,25 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
87
87
  ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
88
88
  ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
89
89
  ipex_llm/transformers/__init__.py,sha256=pJHs6GZXHIObVE4BUCuej-6BKBZZg9pYWKPrkhWSfB4,1192
90
- ipex_llm/transformers/convert.py,sha256=RVW8PIKpLrdMbvqKuUtWswyECTvDP7IM-o6I-Ez3TZ4,99554
90
+ ipex_llm/transformers/convert.py,sha256=Ss4q1bsMgGMU2hauZJxUsEA2noPByR-fALrJGPN1sEk,99520
91
91
  ipex_llm/transformers/convert_ipex.py,sha256=_nSnUTQy-yfkKaqGdqnBdWztZf3NGmnbZ0TKaDrF4X4,14617
92
92
  ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
93
93
  ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,19191
94
94
  ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
95
95
  ipex_llm/transformers/loader.py,sha256=AwjV5RpI2t2bedlv7ZhLm8cfd-QJZm5hny-XyjIvdnk,6876
96
96
  ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
97
- ipex_llm/transformers/low_bit_linear.py,sha256=2FhbDODYaw0D2RXfYCx3B9NyDep2D50tzQm6pYDxpzQ,40964
98
- ipex_llm/transformers/model.py,sha256=64eRLtbFftku9kcN5xSNerbZ3jAFUjCbNlM8Ly54ygE,41079
97
+ ipex_llm/transformers/low_bit_linear.py,sha256=mFJRKU60ZVHm-V7gDsJYIz-ryntZi15XhS0eqSUPag4,41136
98
+ ipex_llm/transformers/model.py,sha256=cQJNlAkdfoWmVbWd-TS2hf-Do41mMO9orPvG3FO4Nns,40855
99
99
  ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
100
100
  ipex_llm/transformers/npu_model.py,sha256=X8ZtvZJpzz64XrSPhUYXXZmdJcbZ9X6G3Vlzw-zgN1Q,39749
101
- ipex_llm/transformers/patches.py,sha256=halPWm__ORh2fRFSIFPiCNg3LQBfrRkTPtmtRpBJCZQ,1286
101
+ ipex_llm/transformers/patches.py,sha256=G9KcXxo42H1HJEDaroq4JbBN5P0P0lty7U7kk7-g4tw,991
102
102
  ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
103
103
  ipex_llm/transformers/qlora.py,sha256=jtPGsvWFjbTUGzDBCdfftnCis_0nJQNRpACSwXUbbGU,14943
104
104
  ipex_llm/transformers/relora.py,sha256=-dYzUV0P-IhO2jFdnzN9-v_sFzJpRj3ZwN9eCJzOoCw,16567
105
105
  ipex_llm/transformers/speculative.py,sha256=0XNLgc9dGswJHVPrXo4iM7pPxkWwfFfJMECcivJSnIc,63368
106
106
  ipex_llm/transformers/streamer.py,sha256=RrVlLblzCOtABRUpaMXAyaMnCGgLUtAi_YesLumRbww,4842
107
107
  ipex_llm/transformers/training_patch.py,sha256=oxMkUtqyvqJiprw6dE3skkYfD1HOmUlH9N0hBkbn0G0,10799
108
- ipex_llm/transformers/utils.py,sha256=9IRSqfDokf8QFW9T47R--i3RL1E-_O31HO7IJf7H6pg,16748
108
+ ipex_llm/transformers/utils.py,sha256=JBekwpPD-CyMxt1OzvVsp7tu26pSA4v2mjuaUbqrAgI,16995
109
109
  ipex_llm/transformers/xpu_customize_fwd.py,sha256=wFpIhs5F6tkNs8gBOrLxWdhLzO3EDHovVkERPIAoAvg,7611
110
110
  ipex_llm/transformers/xpu_ops.py,sha256=vw4cUwvqUqDr45d-WMIkCpM2oiHfjN-VjF0bjMSF4kY,4830
111
111
  ipex_llm/transformers/awq/__init__.py,sha256=Du5gu3-eeAkeDO_dEMBTzrDBA66DSN3uL3-rn8WGXQw,875
@@ -233,10 +233,14 @@ ipex_llm/utils/common/lazyimport.py,sha256=AOxkmsRnqpr9zEGA5_0baqrWGhdWBmIgyKO8c
233
233
  ipex_llm/utils/common/log4Error.py,sha256=8UgIpEJYQasQO4gMOWO22nsOgr14w1emAJy4ts1VOb0,1763
234
234
  ipex_llm/vllm/__init__.py,sha256=zBSG6nzrVF5QnpR6_f7kPhBFeowTE9gaZ7D5m98E7_w,585
235
235
  ipex_llm/vllm/cpu/__init__.py,sha256=zBSG6nzrVF5QnpR6_f7kPhBFeowTE9gaZ7D5m98E7_w,585
236
- ipex_llm/vllm/cpu/model_convert.py,sha256=BoIQqIJpm-L0ZygnlBMiC3pHOwU3VLBNtf5JP0cAUyM,9217
237
- ipex_llm/vllm/cpu/engine/__init__.py,sha256=mzPVAyZdbvfzBQi-wxZh1sbme_NElPMmtrJ9C2zh8Us,747
238
- ipex_llm/vllm/cpu/engine/engine.py,sha256=PpFWHL5tFg49DNRtjyj0lGNzFdk_XjJpKvdWEDz6VAE,7106
239
- ipex_llm/vllm/cpu/entrypoints/openai/api_server.py,sha256=N1DUxhLVE9hgIMe99jhVGPdCfHAx6dfRqhLZm-D6cNk,6917
236
+ ipex_llm/vllm/cpu/ipex_llm_v1_wrapper.py,sha256=_hCFJKXW6_Pekss8brhIIFukErOILN8BNlUtP9OFNEo,825
237
+ ipex_llm/vllm/cpu/ipex_llm_wrapper.py,sha256=wENN6x7qZPXiUKkkFnh7E2uqeePsBxy0xsXjhPDQ88g,892
238
+ ipex_llm/vllm/cpu/model_convert.py,sha256=FJT1rxSZjpmsFYBeu7n_s91rdQwi-FoL95lYbGqXhh8,7853
239
+ ipex_llm/vllm/cpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZjWqm5YiA,783
240
+ ipex_llm/vllm/cpu/engine/engine.py,sha256=MzVIncDlokHhWC8nj05dio0KXh_I0qob_mIlwt6v31g,9906
241
+ ipex_llm/vllm/cpu/entrypoints/api_server.py,sha256=RDe3GrGMw47c7dVZL2IMmTHtGegnzhms7I1yOnBbwjE,30156
242
+ ipex_llm/vllm/cpu/entrypoints/openai/api_server.py,sha256=l0tHnCaD96WD2k3zLTrYtLrcigf1eqYvszs79fYgoK8,29812
243
+ ipex_llm/vllm/cpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
240
244
  ipex_llm/vllm/xpu/__init__.py,sha256=zBSG6nzrVF5QnpR6_f7kPhBFeowTE9gaZ7D5m98E7_w,585
241
245
  ipex_llm/vllm/xpu/ipex_llm_wrapper.py,sha256=_CbhvBuf_KPnmLfngYKtJl5gPAHVsG2mWth3wSeaH3M,892
242
246
  ipex_llm/vllm/xpu/model_convert.py,sha256=tDgaOliyBJdpcCctFNlMd9RK8fA7usLQi2RadgOj--A,7599
@@ -244,11 +248,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
244
248
  ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
245
249
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
246
250
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
247
- ipex_llm-2.2.0b20250120.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
248
- ipex_llm-2.2.0b20250120.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
249
- ipex_llm-2.2.0b20250120.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
250
- ipex_llm-2.2.0b20250120.dist-info/METADATA,sha256=q7KGrzwCH-U7MiiWVeH6NibIoEag5WgHfMnbhWjdSio,12879
251
- ipex_llm-2.2.0b20250120.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
252
- ipex_llm-2.2.0b20250120.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
253
- ipex_llm-2.2.0b20250120.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
254
- ipex_llm-2.2.0b20250120.dist-info/RECORD,,
251
+ ipex_llm-2.2.0b20250122.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
252
+ ipex_llm-2.2.0b20250122.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
253
+ ipex_llm-2.2.0b20250122.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
254
+ ipex_llm-2.2.0b20250122.dist-info/METADATA,sha256=9Z4u6y119-W33jLX1WPjrNGbR3RtgbUfzeXDJNbK2W8,12879
255
+ ipex_llm-2.2.0b20250122.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
256
+ ipex_llm-2.2.0b20250122.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
257
+ ipex_llm-2.2.0b20250122.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
258
+ ipex_llm-2.2.0b20250122.dist-info/RECORD,,