ipex-llm 2.2.0b20250120__py3-none-win_amd64.whl → 2.2.0b20250122__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ipex_llm/libs/bloom-api.dll +0 -0
- ipex_llm/libs/bloom.dll +0 -0
- ipex_llm/libs/gptneox-api.dll +0 -0
- ipex_llm/libs/gptneox.dll +0 -0
- ipex_llm/libs/libbloom_avx.dll +0 -0
- ipex_llm/libs/libbloom_vnni.dll +0 -0
- ipex_llm/libs/libgptneox_avx.dll +0 -0
- ipex_llm/libs/libgptneox_vnni.dll +0 -0
- ipex_llm/libs/libllama_avx.dll +0 -0
- ipex_llm/libs/libllama_vnni.dll +0 -0
- ipex_llm/libs/libstarcoder_avx.dll +0 -0
- ipex_llm/libs/libstarcoder_vnni.dll +0 -0
- ipex_llm/libs/llama-api.dll +0 -0
- ipex_llm/libs/llama.dll +0 -0
- ipex_llm/libs/main-bloom.exe +0 -0
- ipex_llm/libs/main-gptneox.exe +0 -0
- ipex_llm/libs/main-llama.exe +0 -0
- ipex_llm/libs/main-starcoder.exe +0 -0
- ipex_llm/libs/pipeline.dll +0 -0
- ipex_llm/libs/quantize-bloom.exe +0 -0
- ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
- ipex_llm/libs/quantize-gptneox.exe +0 -0
- ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
- ipex_llm/libs/quantize-llama.exe +0 -0
- ipex_llm/libs/quantize-llama_vnni.exe +0 -0
- ipex_llm/libs/quantize-starcoder.exe +0 -0
- ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
- ipex_llm/libs/starcoder-api.dll +0 -0
- ipex_llm/libs/starcoder.dll +0 -0
- ipex_llm/transformers/convert.py +0 -1
- ipex_llm/transformers/low_bit_linear.py +8 -5
- ipex_llm/transformers/model.py +1 -3
- ipex_llm/transformers/patches.py +0 -11
- ipex_llm/transformers/utils.py +16 -10
- ipex_llm/vllm/cpu/engine/__init__.py +2 -1
- ipex_llm/vllm/cpu/engine/engine.py +159 -75
- ipex_llm/vllm/cpu/entrypoints/api_server.py +787 -0
- ipex_llm/vllm/cpu/entrypoints/openai/api_server.py +680 -95
- ipex_llm/vllm/cpu/entrypoints/openai/cli_args.py +277 -0
- ipex_llm/vllm/cpu/ipex_llm_v1_wrapper.py +23 -0
- ipex_llm/vllm/cpu/ipex_llm_wrapper.py +24 -0
- ipex_llm/vllm/cpu/model_convert.py +126 -233
- {ipex_llm-2.2.0b20250120.dist-info → ipex_llm-2.2.0b20250122.dist-info}/METADATA +20 -20
- {ipex_llm-2.2.0b20250120.dist-info → ipex_llm-2.2.0b20250122.dist-info}/RECORD +50 -46
- {ipex_llm-2.2.0b20250120.data → ipex_llm-2.2.0b20250122.data}/scripts/ipex-llm-init.bat +0 -0
- {ipex_llm-2.2.0b20250120.data → ipex_llm-2.2.0b20250122.data}/scripts/llm-chat.ps1 +0 -0
- {ipex_llm-2.2.0b20250120.data → ipex_llm-2.2.0b20250122.data}/scripts/llm-cli.ps1 +0 -0
- {ipex_llm-2.2.0b20250120.dist-info → ipex_llm-2.2.0b20250122.dist-info}/WHEEL +0 -0
- {ipex_llm-2.2.0b20250120.dist-info → ipex_llm-2.2.0b20250122.dist-info}/entry_points.txt +0 -0
- {ipex_llm-2.2.0b20250120.dist-info → ipex_llm-2.2.0b20250122.dist-info}/top_level.txt +0 -0
@@ -14,259 +14,152 @@
|
|
14
14
|
# limitations under the License.
|
15
15
|
#
|
16
16
|
import torch
|
17
|
+
from typing import Optional, Union
|
18
|
+
from vllm.distributed import tensor_model_parallel_gather, tensor_model_parallel_all_gather
|
17
19
|
from vllm.logger import init_logger
|
18
|
-
from vllm.model_executor.
|
19
|
-
from vllm.model_executor.
|
20
|
-
from vllm.model_executor.models.
|
21
|
-
from vllm.model_executor.models.qwen2 import Qwen2MLP, Qwen2Attention
|
22
|
-
from vllm.model_executor.models.qwen import QWenMLP, QWenAttention
|
20
|
+
from vllm.model_executor.models.llama import LlamaMLP, LlamaAttention, LlamaForCausalLM
|
21
|
+
from vllm.model_executor.models.qwen2 import Qwen2MLP, Qwen2Attention, Qwen2ForCausalLM
|
22
|
+
from vllm.model_executor.models.qwen import QWenMLP, QWenAttention, QWenLMHeadModel
|
23
23
|
from vllm.model_executor.models.baichuan import BaiChuanMLP, BaiChuanAttention
|
24
|
-
from vllm.model_executor.models.
|
25
|
-
from vllm.
|
26
|
-
from vllm.
|
24
|
+
from vllm.model_executor.models.baichuan import BaiChuanBaseForCausalLM
|
25
|
+
from vllm.model_executor.models.chatglm import GLMMLP, GLMAttention, ChatGLMForCausalLM
|
26
|
+
from vllm.model_executor.model_loader import get_model
|
27
|
+
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
28
|
+
VocabParallelEmbedding)
|
29
|
+
from vllm.attention import AttentionMetadata
|
27
30
|
from vllm.config import DeviceConfig
|
28
|
-
from
|
29
|
-
|
30
|
-
from vllm._C import ops
|
31
|
-
from ipex_llm.utils.common import invalidInputError
|
32
|
-
from typing import List, Optional, Tuple, Union
|
33
|
-
|
34
|
-
logger = init_logger(__name__)
|
35
|
-
|
36
|
-
|
37
|
-
def _MLP_forward(self, x):
|
38
|
-
gate_up = self.gate_up_proj(x)
|
39
|
-
x = self.act_fn(gate_up)
|
40
|
-
x = self.down_proj(x)
|
41
|
-
return x
|
42
|
-
|
43
|
-
|
44
|
-
def _Attention_forward(
|
45
|
-
self,
|
46
|
-
positions: torch.Tensor,
|
47
|
-
hidden_states: torch.Tensor,
|
48
|
-
kv_cache: torch.Tensor,
|
49
|
-
attn_metadata: AttentionMetadata,
|
50
|
-
) -> torch.Tensor:
|
51
|
-
qkv = self.qkv_proj(hidden_states).to(dtype=kv_cache.dtype)
|
52
|
-
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
|
53
|
-
q, k = self.rotary_emb(positions, q, k)
|
54
|
-
attn_output = self.attn(q, k, v, kv_cache, attn_metadata, self.kv_scale)
|
55
|
-
output = self.o_proj(attn_output)
|
56
|
-
return output
|
57
|
-
|
58
|
-
|
59
|
-
def _QWen_Attention_forward(
|
60
|
-
self,
|
61
|
-
positions: torch.Tensor,
|
62
|
-
hidden_states: torch.Tensor,
|
63
|
-
kv_cache: Tuple[torch.Tensor, torch.Tensor],
|
64
|
-
attn_metadata: AttentionMetadata,
|
65
|
-
) -> torch.Tensor:
|
66
|
-
qkv = self.c_attn(hidden_states).to(dtype=kv_cache.dtype)
|
67
|
-
q, k, v = qkv.chunk(chunks=3, dim=-1)
|
68
|
-
q, k = self.rotary_emb(positions, q, k)
|
69
|
-
attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
|
70
|
-
output = self.c_proj(attn_output)
|
71
|
-
return output
|
72
|
-
|
73
|
-
|
74
|
-
def _QWen_MLP_forward(self, x):
|
75
|
-
gate_up = self.gate_up_proj(x)
|
76
|
-
x = self.act_fn(gate_up)
|
77
|
-
x = self.c_proj(x)
|
78
|
-
return x
|
79
|
-
|
80
|
-
|
81
|
-
def _Qwen2_Attention_forward(
|
82
|
-
self,
|
83
|
-
positions: torch.Tensor,
|
84
|
-
hidden_states: torch.Tensor,
|
85
|
-
kv_cache: torch.Tensor,
|
86
|
-
attn_metadata: AttentionMetadata,
|
87
|
-
) -> torch.Tensor:
|
88
|
-
qkv = self.qkv_proj(hidden_states).to(dtype=kv_cache.dtype)
|
89
|
-
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
|
90
|
-
q, k = self.rotary_emb(positions, q, k)
|
91
|
-
attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
|
92
|
-
output = self.o_proj(attn_output)
|
93
|
-
return output
|
94
|
-
|
95
|
-
|
96
|
-
def _ChatGLM_MLP_forward(self, hidden_states):
|
97
|
-
# [s, b, 4hp]
|
98
|
-
intermediate_parallel = self.dense_h_to_4h(hidden_states)
|
99
|
-
intermediate_parallel = self.activation_func(intermediate_parallel)
|
100
|
-
# [s, b, h]
|
101
|
-
output = self.dense_4h_to_h(intermediate_parallel)
|
102
|
-
return output
|
103
|
-
|
104
|
-
|
105
|
-
def _Baichuan_Attention_forward(
|
106
|
-
self,
|
107
|
-
positions: torch.Tensor,
|
108
|
-
hidden_states: torch.Tensor,
|
109
|
-
kv_cache: Tuple[torch.Tensor, torch.Tensor],
|
110
|
-
attn_metadata: AttentionMetadata,
|
111
|
-
) -> torch.Tensor:
|
112
|
-
qkv = self.W_pack(hidden_states).to(dtype=kv_cache.dtype)
|
113
|
-
q, k, v = qkv.chunk(chunks=3, dim=-1)
|
114
|
-
if self.postion_embedding != "ALIBI":
|
115
|
-
q, k = self.rotary_emb(positions, q, k)
|
116
|
-
attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
|
117
|
-
output = self.o_proj(attn_output)
|
118
|
-
return output
|
31
|
+
from typing import Tuple
|
32
|
+
from ipex_llm.transformers.low_bit_linear import LowBitLinear
|
119
33
|
|
120
34
|
|
121
|
-
def
|
35
|
+
def _sample_get_logits(
|
122
36
|
self,
|
123
37
|
hidden_states: torch.Tensor,
|
124
|
-
|
125
|
-
|
126
|
-
attn_metadata: AttentionMetadata,
|
38
|
+
lm_head: Union[VocabParallelEmbedding, LowBitLinear],
|
39
|
+
embedding_bias: Optional[torch.Tensor],
|
127
40
|
) -> torch.Tensor:
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
# QWenMLP: _QWen_MLP_forward,
|
146
|
-
GLMMLP: _ChatGLM_MLP_forward
|
147
|
-
}
|
148
|
-
|
149
|
-
_REPLACED_ATTENTION_LAYERS = {
|
150
|
-
LlamaAttention: _Attention_forward,
|
151
|
-
Qwen2Attention: _Qwen2_Attention_forward,
|
152
|
-
# QWenAttention: _QWen_Attention_forward,
|
153
|
-
BaiChuanAttention: _Baichuan_Attention_forward,
|
154
|
-
GLMAttention: _ChatGLM_Attention_forward
|
155
|
-
}
|
156
|
-
|
157
|
-
_IPEX_LLM_SUPPORTED_MODELS = [
|
158
|
-
"LlamaForCausalLM",
|
159
|
-
"BaichuanForCausalLM",
|
160
|
-
"ChatGLMForCausalLM",
|
161
|
-
"Qwen2ForCausalLM",
|
162
|
-
]
|
163
|
-
|
164
|
-
|
165
|
-
def _model_mlp_convert():
|
166
|
-
for module, replaced_func in _REPLACED_MLP_LAYERS.items():
|
167
|
-
setattr(module, "forward", replaced_func)
|
41
|
+
# HINT: we do not support other types of quantization for now
|
42
|
+
# TODO: we may encounter tie-word-embedding problems
|
43
|
+
if isinstance(lm_head, VocabParallelEmbedding):
|
44
|
+
logits = lm_head.linear_method.apply(lm_head,
|
45
|
+
hidden_states,
|
46
|
+
bias=embedding_bias)
|
47
|
+
else:
|
48
|
+
logits = lm_head(hidden_states)
|
49
|
+
if embedding_bias is not None:
|
50
|
+
logits += embedding_bias
|
51
|
+
if self.use_gather:
|
52
|
+
logits = tensor_model_parallel_gather(logits)
|
53
|
+
else:
|
54
|
+
logits = tensor_model_parallel_all_gather(logits)
|
55
|
+
if logits is not None:
|
56
|
+
logits = logits[:, : self.org_vocab_size]
|
57
|
+
return logits
|
168
58
|
|
169
59
|
|
170
|
-
def
|
171
|
-
|
172
|
-
|
60
|
+
def _model_sample_convert():
|
61
|
+
from vllm.model_executor.layers.logits_processor import LogitsProcessor
|
62
|
+
setattr(LogitsProcessor, "_get_logits", _sample_get_logits)
|
173
63
|
|
174
64
|
|
175
65
|
def _ipex_llm_convert(load_in_low_bit):
|
176
|
-
if load_in_low_bit is None:
|
177
|
-
return
|
178
66
|
from vllm.worker.cpu_model_runner import CPUModelRunner
|
179
|
-
|
67
|
+
from ipex_llm.vllm.cpu.ipex_llm_wrapper import get_ipex_llm_wrapper
|
68
|
+
from ipex_llm.vllm.cpu.ipex_llm_v1_wrapper import get_ipex_llm_v1_wrapper
|
69
|
+
import vllm.executor.ray_utils as ray_utils_v0
|
70
|
+
import vllm.v1.executor.ray_utils as ray_utils_v1
|
180
71
|
setattr(CPUModelRunner, "load_model", get_load_function(load_in_low_bit))
|
181
|
-
|
182
|
-
|
183
|
-
setattr(RotaryEmbedding, "forward", _ipex_llm_rotary_embedding_forward)
|
184
|
-
from vllm.model_executor.layers.layernorm import RMSNorm
|
185
|
-
setattr(RMSNorm, "forward", _ipex_llm_rmsnorm_forward)
|
186
|
-
|
187
|
-
|
188
|
-
def _ipex_llm_rotary_embedding_forward(
|
189
|
-
self,
|
190
|
-
positions: torch.Tensor,
|
191
|
-
query: torch.Tensor,
|
192
|
-
key: torch.Tensor,
|
193
|
-
offsets: Optional[torch.Tensor] = None,
|
194
|
-
) -> Tuple[torch.Tensor, torch.Tensor]:
|
195
|
-
self.cos_sin_cache = self.cos_sin_cache.to(positions.device, dtype=query.dtype)
|
196
|
-
|
197
|
-
# ops.rotary_embedding()/batched_rotary_embedding()
|
198
|
-
# are in-place operations that update the query and key tensors.
|
199
|
-
if offsets is not None:
|
200
|
-
ops.batched_rotary_embedding(positions, query, key, self.head_size,
|
201
|
-
self.cos_sin_cache,
|
202
|
-
self.is_neox_style, self.rotary_dim,
|
203
|
-
offsets)
|
204
|
-
else:
|
205
|
-
ops.rotary_embedding(positions, query, key, self.head_size,
|
206
|
-
self.cos_sin_cache, self.is_neox_style)
|
207
|
-
return query, key
|
208
|
-
|
209
|
-
|
210
|
-
def _ipex_llm_rmsnorm_forward(
|
211
|
-
self,
|
212
|
-
x: torch.Tensor,
|
213
|
-
residual: Optional[torch.Tensor] = None,
|
214
|
-
) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
|
215
|
-
x = x.to(dtype=self.weight.data.dtype)
|
216
|
-
if residual is not None:
|
217
|
-
residual = residual.to(dtype=self.weight.data.dtype)
|
218
|
-
ops.fused_add_rms_norm(
|
219
|
-
x,
|
220
|
-
residual,
|
221
|
-
self.weight.data,
|
222
|
-
self.variance_epsilon,
|
223
|
-
)
|
224
|
-
return x, residual
|
225
|
-
out = torch.empty_like(x)
|
226
|
-
ops.rms_norm(
|
227
|
-
out,
|
228
|
-
x,
|
229
|
-
self.weight.data,
|
230
|
-
self.variance_epsilon,
|
231
|
-
)
|
232
|
-
return out
|
72
|
+
setattr(ray_utils_v0, "RayWorkerWrapper", get_ipex_llm_wrapper(load_in_low_bit))
|
73
|
+
setattr(ray_utils_v1, "RayWorkerWrapper", get_ipex_llm_v1_wrapper(load_in_low_bit))
|
233
74
|
|
234
75
|
|
235
76
|
def get_load_function(low_bit):
|
236
77
|
def _ipex_llm_load_model(self) -> None:
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
f"Currently IPEX-LLM vLLM convert only support {cur_model_list}."
|
246
|
-
)
|
247
|
-
self.model = get_model(
|
248
|
-
model_config=self.model_config,
|
249
|
-
load_config=self.load_config,
|
250
|
-
device_config=self.device_config,
|
251
|
-
vision_language_config=self.vision_language_config,
|
252
|
-
lora_config=self.lora_config,
|
253
|
-
parallel_config=self.parallel_config,
|
254
|
-
scheduler_config=self.scheduler_config)
|
255
|
-
return
|
256
|
-
|
257
|
-
# _model_mlp_convert()
|
258
|
-
# _model_attention_convert()
|
259
|
-
|
78
|
+
_model_sample_convert()
|
79
|
+
|
80
|
+
# from vllm.utils import measure_device_memory
|
81
|
+
# from vllm.utils import DeviceMemoryProfiler
|
82
|
+
# with DeviceMemoryProfiler() as m:
|
83
|
+
from dataclasses import replace
|
84
|
+
new_device_config = DeviceConfig("cpu")
|
85
|
+
new_vllm_config = replace(self.vllm_config, device_config=new_device_config)
|
260
86
|
self.model = get_model(
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
87
|
+
vllm_config=new_vllm_config
|
88
|
+
)
|
89
|
+
if "qwen" in self.vllm_config.model_config.model.lower() or \
|
90
|
+
"baichuan" in self.vllm_config.model_config.model.lower() or \
|
91
|
+
"codegeex4-all" in self.vllm_config.model_config.model.lower() or \
|
92
|
+
"chatglm" in self.vllm_config.model_config.model.lower():
|
93
|
+
self.model.apply(padding_mlp)
|
269
94
|
from ipex_llm import optimize_model
|
270
|
-
|
95
|
+
import os
|
96
|
+
not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
|
97
|
+
if not_convert_last_mlp is not None:
|
98
|
+
# only use to avoid nan value in last mlp forward running glm4-9b-chat
|
99
|
+
modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
|
100
|
+
else:
|
101
|
+
modules = None
|
102
|
+
if "minicpm" in self.vllm_config.model_config.model.lower():
|
103
|
+
modules = ["vpm", "resampler"]
|
104
|
+
# only for minicpm_2_6
|
105
|
+
if "minicpm-v" in self.vllm_config.model_config.model.lower():
|
106
|
+
from ipex_llm.transformers.models.minicpmv import merge_qkv
|
107
|
+
self.model.vpm.apply(merge_qkv)
|
108
|
+
if "internvl2" in self.vllm_config.model_config.model.lower():
|
109
|
+
modules = ["vision_model", "mlp1"]
|
110
|
+
|
111
|
+
# print(self.vllm_config.model_config.dtype)
|
112
|
+
# print("---------------------------------------")
|
113
|
+
optimize_model(self.model, low_bit=low_bit, torch_dtype=self.vllm_config.model_config.dtype,
|
114
|
+
modules_to_not_convert=modules)
|
115
|
+
self.model = self.model.to(device=self.vllm_config.device_config.device,
|
116
|
+
dtype=self.vllm_config.model_config.dtype)
|
117
|
+
# print(self.model)
|
118
|
+
# self.model_memory_usage = m.consumed_memory
|
119
|
+
# logger = init_logger(__name__)
|
120
|
+
# logger.info("Loading model weights took %.4f GB",
|
121
|
+
# self.model_memory_usage / float(2**30))
|
271
122
|
|
272
123
|
return _ipex_llm_load_model
|
124
|
+
|
125
|
+
|
126
|
+
def padding_mlp(module: torch.nn.Module):
|
127
|
+
mlp_gate_up_name = None
|
128
|
+
mlp_down_name = None
|
129
|
+
if isinstance(module, Qwen2MLP):
|
130
|
+
mlp_gate_up_name = "gate_up_proj"
|
131
|
+
mlp_down_name = "down_proj"
|
132
|
+
elif isinstance(module, GLMMLP):
|
133
|
+
mlp_gate_up_name = "dense_h_to_4h"
|
134
|
+
mlp_down_name = "dense_4h_to_h"
|
135
|
+
elif isinstance(module, BaiChuanMLP):
|
136
|
+
mlp_gate_up_name = "gate_up_proj"
|
137
|
+
mlp_down_name = "down_proj"
|
138
|
+
else:
|
139
|
+
return
|
140
|
+
hidden_size = getattr(module, mlp_down_name).output_size
|
141
|
+
# devide by rank
|
142
|
+
intermediate_size = getattr(module, mlp_down_name).input_size_per_partition
|
143
|
+
padding_size = 256
|
144
|
+
padding_intermediate_size = \
|
145
|
+
(intermediate_size + padding_size - 1) // padding_size * padding_size
|
146
|
+
if intermediate_size % padding_size == 0:
|
147
|
+
return
|
148
|
+
gate_up_weight = getattr(module, mlp_gate_up_name).weight.data
|
149
|
+
new_gate_up_weight = torch.zeros([padding_intermediate_size * 2, hidden_size],
|
150
|
+
dtype=gate_up_weight.dtype, device=gate_up_weight.device)
|
151
|
+
# merge_gate_up_weight
|
152
|
+
new_gate_up_weight[:intermediate_size, :] = gate_up_weight[:intermediate_size, :]
|
153
|
+
new_gate_up_weight[padding_intermediate_size:padding_intermediate_size+intermediate_size, :] = gate_up_weight[intermediate_size:, :] # noqa
|
154
|
+
getattr(module, mlp_gate_up_name).output_size_per_partition = padding_intermediate_size * 2
|
155
|
+
getattr(module, mlp_gate_up_name).output_size = padding_intermediate_size * 2
|
156
|
+
getattr(module, mlp_gate_up_name).weight = \
|
157
|
+
torch.nn.Parameter(new_gate_up_weight, requires_grad=False)
|
158
|
+
|
159
|
+
down_weight = getattr(module, mlp_down_name).weight.data
|
160
|
+
new_down_weight = torch.zeros([hidden_size, padding_intermediate_size],
|
161
|
+
dtype=down_weight.dtype, device=down_weight.device)
|
162
|
+
new_down_weight[:, :intermediate_size] = down_weight
|
163
|
+
getattr(module, mlp_down_name).input_size_per_partition = padding_intermediate_size
|
164
|
+
getattr(module, mlp_down_name).input_size = padding_intermediate_size
|
165
|
+
getattr(module, mlp_down_name).weight = torch.nn.Parameter(new_down_weight, requires_grad=False)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ipex-llm
|
3
|
-
Version: 2.2.
|
3
|
+
Version: 2.2.0b20250122
|
4
4
|
Summary: Large Language Model Develop Toolkit
|
5
5
|
Home-page: https://github.com/intel-analytics/ipex-llm
|
6
6
|
Author: BigDL Authors
|
@@ -27,10 +27,10 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
|
|
27
27
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
|
28
28
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
|
29
29
|
Provides-Extra: cpp
|
30
|
-
Requires-Dist: bigdl-core-cpp ==2.6.
|
30
|
+
Requires-Dist: bigdl-core-cpp ==2.6.0b20250122 ; extra == 'cpp'
|
31
31
|
Requires-Dist: setuptools ; extra == 'cpp'
|
32
32
|
Provides-Extra: cpp-arl
|
33
|
-
Requires-Dist: bigdl-core-cpp ==2.6.
|
33
|
+
Requires-Dist: bigdl-core-cpp ==2.6.0b20250122 ; extra == 'cpp-arl'
|
34
34
|
Requires-Dist: setuptools ; extra == 'cpp-arl'
|
35
35
|
Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
|
36
36
|
Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
|
@@ -67,7 +67,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
|
|
67
67
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
|
68
68
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
|
69
69
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
|
70
|
-
Requires-Dist: bigdl-core-npu ==2.6.
|
70
|
+
Requires-Dist: bigdl-core-npu ==2.6.0b20250122 ; (platform_system == "Windows") and extra == 'npu'
|
71
71
|
Provides-Extra: serving
|
72
72
|
Requires-Dist: py-cpuinfo ; extra == 'serving'
|
73
73
|
Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
|
@@ -87,9 +87,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
|
|
87
87
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
|
88
88
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
|
89
89
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
|
90
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.
|
91
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.
|
92
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.
|
90
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250122 ; extra == 'xpu'
|
91
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250122 ; extra == 'xpu'
|
92
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250122 ; extra == 'xpu'
|
93
93
|
Provides-Extra: xpu-2-1
|
94
94
|
Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
|
95
95
|
Requires-Dist: protobuf ; extra == 'xpu-2-1'
|
@@ -104,9 +104,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
|
|
104
104
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
|
105
105
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
|
106
106
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
|
107
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.
|
108
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.
|
109
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.
|
107
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250122 ; extra == 'xpu-2-1'
|
108
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250122 ; extra == 'xpu-2-1'
|
109
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250122 ; extra == 'xpu-2-1'
|
110
110
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
|
111
111
|
Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
112
112
|
Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
@@ -124,7 +124,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
|
|
124
124
|
Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
|
125
125
|
Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
|
126
126
|
Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
|
127
|
-
Requires-Dist: bigdl-core-xe-all ==2.6.
|
127
|
+
Requires-Dist: bigdl-core-xe-all ==2.6.0b20250122 ; extra == 'xpu-2-6'
|
128
128
|
Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
|
129
129
|
Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
|
130
130
|
Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
|
@@ -140,9 +140,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
|
|
140
140
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
|
141
141
|
Requires-Dist: tabulate ; extra == 'xpu-arc'
|
142
142
|
Requires-Dist: setuptools ; extra == 'xpu-arc'
|
143
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
144
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
145
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
143
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250122 ; extra == 'xpu-arc'
|
144
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250122 ; extra == 'xpu-arc'
|
145
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250122 ; extra == 'xpu-arc'
|
146
146
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
|
147
147
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
148
148
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
@@ -163,9 +163,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
|
|
163
163
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
|
164
164
|
Requires-Dist: tabulate ; extra == 'xpu-arl'
|
165
165
|
Requires-Dist: setuptools ; extra == 'xpu-arl'
|
166
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
167
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
168
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
166
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250122 ; extra == 'xpu-arl'
|
167
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250122 ; extra == 'xpu-arl'
|
168
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250122 ; extra == 'xpu-arl'
|
169
169
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
|
170
170
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
171
171
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
@@ -186,9 +186,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
|
|
186
186
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
|
187
187
|
Requires-Dist: tabulate ; extra == 'xpu-lnl'
|
188
188
|
Requires-Dist: setuptools ; extra == 'xpu-lnl'
|
189
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
190
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
191
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
189
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250122 ; extra == 'xpu-lnl'
|
190
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250122 ; extra == 'xpu-lnl'
|
191
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250122 ; extra == 'xpu-lnl'
|
192
192
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
|
193
193
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
194
194
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
|
|
41
41
|
ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
42
42
|
ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
|
43
43
|
ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
|
-
ipex_llm/libs/bloom-api.dll,sha256=
|
45
|
-
ipex_llm/libs/bloom.dll,sha256=
|
46
|
-
ipex_llm/libs/gptneox-api.dll,sha256=
|
47
|
-
ipex_llm/libs/gptneox.dll,sha256=
|
48
|
-
ipex_llm/libs/libbloom_avx.dll,sha256=
|
49
|
-
ipex_llm/libs/libbloom_vnni.dll,sha256=
|
50
|
-
ipex_llm/libs/libgptneox_avx.dll,sha256=
|
51
|
-
ipex_llm/libs/libgptneox_vnni.dll,sha256=
|
52
|
-
ipex_llm/libs/libllama_avx.dll,sha256=
|
53
|
-
ipex_llm/libs/libllama_vnni.dll,sha256=
|
54
|
-
ipex_llm/libs/libstarcoder_avx.dll,sha256=
|
55
|
-
ipex_llm/libs/libstarcoder_vnni.dll,sha256=
|
56
|
-
ipex_llm/libs/llama-api.dll,sha256=
|
57
|
-
ipex_llm/libs/llama.dll,sha256=
|
58
|
-
ipex_llm/libs/main-bloom.exe,sha256=
|
59
|
-
ipex_llm/libs/main-gptneox.exe,sha256=
|
60
|
-
ipex_llm/libs/main-llama.exe,sha256=
|
61
|
-
ipex_llm/libs/main-starcoder.exe,sha256=
|
62
|
-
ipex_llm/libs/pipeline.dll,sha256=
|
63
|
-
ipex_llm/libs/quantize-bloom.exe,sha256=
|
64
|
-
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=
|
65
|
-
ipex_llm/libs/quantize-gptneox.exe,sha256=
|
66
|
-
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=
|
67
|
-
ipex_llm/libs/quantize-llama.exe,sha256=
|
68
|
-
ipex_llm/libs/quantize-llama_vnni.exe,sha256=
|
69
|
-
ipex_llm/libs/quantize-starcoder.exe,sha256=
|
70
|
-
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=
|
71
|
-
ipex_llm/libs/starcoder-api.dll,sha256=
|
72
|
-
ipex_llm/libs/starcoder.dll,sha256=
|
44
|
+
ipex_llm/libs/bloom-api.dll,sha256=quxNPcqDy7sOIEfeu_fPdJmVrkWuqGWTsO8bmQWW030,36352
|
45
|
+
ipex_llm/libs/bloom.dll,sha256=CAX0tJlqYwMnmlYwXjQ2M7WeUmvWsRid51pMItEc0l4,507904
|
46
|
+
ipex_llm/libs/gptneox-api.dll,sha256=ZRB3wAjxWce7RivyvgqGuwg_Fn4uJc1uAmBszhg63EI,24576
|
47
|
+
ipex_llm/libs/gptneox.dll,sha256=fNbCKX4W844dEnTEdoMPVtQb3A8IGi4MOg44EAV4cYI,568320
|
48
|
+
ipex_llm/libs/libbloom_avx.dll,sha256=UZ94a_6kmXZLAdbUyq0uo3KHMj3UoLntdKAlXDL73zU,536576
|
49
|
+
ipex_llm/libs/libbloom_vnni.dll,sha256=b1LdhjaBZfghPNjLdtiSMdNQqgj19DgOaqszo5dXZ-4,508416
|
50
|
+
ipex_llm/libs/libgptneox_avx.dll,sha256=RlX5bX5OV_oCuelT1mLdU5oZy1V8jtG_ewUjjrDGrgE,596992
|
51
|
+
ipex_llm/libs/libgptneox_vnni.dll,sha256=NLIdy93bX-FWbP-3gYxawMKMyDcem7zuC-ytzx7z4aI,568832
|
52
|
+
ipex_llm/libs/libllama_avx.dll,sha256=BwTPw65FSFWexwbAEyk8-LTGdsiTHIykdXMu5vrBENY,591360
|
53
|
+
ipex_llm/libs/libllama_vnni.dll,sha256=T8-4VsUbB7NJYjdbyw_ZDGYp71GYfKPllOKNX4L81I4,563200
|
54
|
+
ipex_llm/libs/libstarcoder_avx.dll,sha256=pEX0b5rCiVw-UBqOz8PSa04TXY6md94h1j8bCdj9sn8,627712
|
55
|
+
ipex_llm/libs/libstarcoder_vnni.dll,sha256=U_JlHGgtYdcPXdkiacm4dWBqouke4Mf8lKAHXW7UcFE,599552
|
56
|
+
ipex_llm/libs/llama-api.dll,sha256=zUqOq0weiUTPsbaUSiW6yuKzkp2fu1lYDUShzkZDKeg,25600
|
57
|
+
ipex_llm/libs/llama.dll,sha256=nWO8ekh8KduP0M5jV6ABmdSyNN_DCF5zOwMOiCDB_I0,562688
|
58
|
+
ipex_llm/libs/main-bloom.exe,sha256=Fn8MRfqGuw1zASWW0SEgbjBIMl-D1jLPTt9VQ6mc8Co,103424
|
59
|
+
ipex_llm/libs/main-gptneox.exe,sha256=7_vPfyJ6xbP3xBvf3jwpk7EqCs0jNTTMqG5xeOaDoSE,98816
|
60
|
+
ipex_llm/libs/main-llama.exe,sha256=RAkzWejQI2b_0RmYVum0QTxRvLq74V13SRMOqE2wetU,99840
|
61
|
+
ipex_llm/libs/main-starcoder.exe,sha256=FvpnlhBOLlkrw9bgpU9eDRnzk0AHWQluwq4u-Y6hwUw,157696
|
62
|
+
ipex_llm/libs/pipeline.dll,sha256=Y6JosovQjHe8fnBJiAPrf6Gd-5uR9larFZFJMHVP0vk,72704
|
63
|
+
ipex_llm/libs/quantize-bloom.exe,sha256=PXpJ-8-UgthckwMV-15Cj4pf3_DRh_PIkyVHPNJ40F4,126464
|
64
|
+
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=5x9Oxz2cURavrKui0YCJ6CQTNodu0_h14o2TjpyDKOs,128000
|
65
|
+
ipex_llm/libs/quantize-gptneox.exe,sha256=9TawQ7WBCnJkDiTwq-RyukNofB_TxXAQSXu8JGiU-9A,104448
|
66
|
+
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=WOtH_wUcm_0p6tlJ5ugilThl8zB9C9dpYtdYpxMxoag,104960
|
67
|
+
ipex_llm/libs/quantize-llama.exe,sha256=ztv6PtFIbOHgBaDSfmP10PrEAoLMHYqsGHLDeN0GwXg,110080
|
68
|
+
ipex_llm/libs/quantize-llama_vnni.exe,sha256=OERE5kHKLpHxyUa8aD8tNk_covf0x26ZsWbq9ubHbdE,110592
|
69
|
+
ipex_llm/libs/quantize-starcoder.exe,sha256=SK7-9t9Q_qRTSKlChEDTFcvQmdkEDIvuwSAl9IofnRA,127488
|
70
|
+
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=A6q48FyFaZRiRSSgDdTO7Ep1vdi4vvsZHgoZVYTkyYE,128512
|
71
|
+
ipex_llm/libs/starcoder-api.dll,sha256=0TAn1ce4BRkQz9oIZ8nJGb3pW1GM_YNT-pE1Hd9njes,21504
|
72
|
+
ipex_llm/libs/starcoder.dll,sha256=PRvZN0qYiX8C1adihh-ZU2VgwCvMYIX04yzJgnXp88s,599040
|
73
73
|
ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
74
74
|
ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
|
75
75
|
ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
|
@@ -87,25 +87,25 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
|
|
87
87
|
ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
|
88
88
|
ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
|
89
89
|
ipex_llm/transformers/__init__.py,sha256=pJHs6GZXHIObVE4BUCuej-6BKBZZg9pYWKPrkhWSfB4,1192
|
90
|
-
ipex_llm/transformers/convert.py,sha256=
|
90
|
+
ipex_llm/transformers/convert.py,sha256=Ss4q1bsMgGMU2hauZJxUsEA2noPByR-fALrJGPN1sEk,99520
|
91
91
|
ipex_llm/transformers/convert_ipex.py,sha256=_nSnUTQy-yfkKaqGdqnBdWztZf3NGmnbZ0TKaDrF4X4,14617
|
92
92
|
ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
|
93
93
|
ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,19191
|
94
94
|
ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
|
95
95
|
ipex_llm/transformers/loader.py,sha256=AwjV5RpI2t2bedlv7ZhLm8cfd-QJZm5hny-XyjIvdnk,6876
|
96
96
|
ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
|
97
|
-
ipex_llm/transformers/low_bit_linear.py,sha256=
|
98
|
-
ipex_llm/transformers/model.py,sha256=
|
97
|
+
ipex_llm/transformers/low_bit_linear.py,sha256=mFJRKU60ZVHm-V7gDsJYIz-ryntZi15XhS0eqSUPag4,41136
|
98
|
+
ipex_llm/transformers/model.py,sha256=cQJNlAkdfoWmVbWd-TS2hf-Do41mMO9orPvG3FO4Nns,40855
|
99
99
|
ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
|
100
100
|
ipex_llm/transformers/npu_model.py,sha256=X8ZtvZJpzz64XrSPhUYXXZmdJcbZ9X6G3Vlzw-zgN1Q,39749
|
101
|
-
ipex_llm/transformers/patches.py,sha256=
|
101
|
+
ipex_llm/transformers/patches.py,sha256=G9KcXxo42H1HJEDaroq4JbBN5P0P0lty7U7kk7-g4tw,991
|
102
102
|
ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
|
103
103
|
ipex_llm/transformers/qlora.py,sha256=jtPGsvWFjbTUGzDBCdfftnCis_0nJQNRpACSwXUbbGU,14943
|
104
104
|
ipex_llm/transformers/relora.py,sha256=-dYzUV0P-IhO2jFdnzN9-v_sFzJpRj3ZwN9eCJzOoCw,16567
|
105
105
|
ipex_llm/transformers/speculative.py,sha256=0XNLgc9dGswJHVPrXo4iM7pPxkWwfFfJMECcivJSnIc,63368
|
106
106
|
ipex_llm/transformers/streamer.py,sha256=RrVlLblzCOtABRUpaMXAyaMnCGgLUtAi_YesLumRbww,4842
|
107
107
|
ipex_llm/transformers/training_patch.py,sha256=oxMkUtqyvqJiprw6dE3skkYfD1HOmUlH9N0hBkbn0G0,10799
|
108
|
-
ipex_llm/transformers/utils.py,sha256=
|
108
|
+
ipex_llm/transformers/utils.py,sha256=JBekwpPD-CyMxt1OzvVsp7tu26pSA4v2mjuaUbqrAgI,16995
|
109
109
|
ipex_llm/transformers/xpu_customize_fwd.py,sha256=wFpIhs5F6tkNs8gBOrLxWdhLzO3EDHovVkERPIAoAvg,7611
|
110
110
|
ipex_llm/transformers/xpu_ops.py,sha256=vw4cUwvqUqDr45d-WMIkCpM2oiHfjN-VjF0bjMSF4kY,4830
|
111
111
|
ipex_llm/transformers/awq/__init__.py,sha256=Du5gu3-eeAkeDO_dEMBTzrDBA66DSN3uL3-rn8WGXQw,875
|
@@ -233,10 +233,14 @@ ipex_llm/utils/common/lazyimport.py,sha256=AOxkmsRnqpr9zEGA5_0baqrWGhdWBmIgyKO8c
|
|
233
233
|
ipex_llm/utils/common/log4Error.py,sha256=8UgIpEJYQasQO4gMOWO22nsOgr14w1emAJy4ts1VOb0,1763
|
234
234
|
ipex_llm/vllm/__init__.py,sha256=zBSG6nzrVF5QnpR6_f7kPhBFeowTE9gaZ7D5m98E7_w,585
|
235
235
|
ipex_llm/vllm/cpu/__init__.py,sha256=zBSG6nzrVF5QnpR6_f7kPhBFeowTE9gaZ7D5m98E7_w,585
|
236
|
-
ipex_llm/vllm/cpu/
|
237
|
-
ipex_llm/vllm/cpu/
|
238
|
-
ipex_llm/vllm/cpu/
|
239
|
-
ipex_llm/vllm/cpu/
|
236
|
+
ipex_llm/vllm/cpu/ipex_llm_v1_wrapper.py,sha256=_hCFJKXW6_Pekss8brhIIFukErOILN8BNlUtP9OFNEo,825
|
237
|
+
ipex_llm/vllm/cpu/ipex_llm_wrapper.py,sha256=wENN6x7qZPXiUKkkFnh7E2uqeePsBxy0xsXjhPDQ88g,892
|
238
|
+
ipex_llm/vllm/cpu/model_convert.py,sha256=FJT1rxSZjpmsFYBeu7n_s91rdQwi-FoL95lYbGqXhh8,7853
|
239
|
+
ipex_llm/vllm/cpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZjWqm5YiA,783
|
240
|
+
ipex_llm/vllm/cpu/engine/engine.py,sha256=MzVIncDlokHhWC8nj05dio0KXh_I0qob_mIlwt6v31g,9906
|
241
|
+
ipex_llm/vllm/cpu/entrypoints/api_server.py,sha256=RDe3GrGMw47c7dVZL2IMmTHtGegnzhms7I1yOnBbwjE,30156
|
242
|
+
ipex_llm/vllm/cpu/entrypoints/openai/api_server.py,sha256=l0tHnCaD96WD2k3zLTrYtLrcigf1eqYvszs79fYgoK8,29812
|
243
|
+
ipex_llm/vllm/cpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
|
240
244
|
ipex_llm/vllm/xpu/__init__.py,sha256=zBSG6nzrVF5QnpR6_f7kPhBFeowTE9gaZ7D5m98E7_w,585
|
241
245
|
ipex_llm/vllm/xpu/ipex_llm_wrapper.py,sha256=_CbhvBuf_KPnmLfngYKtJl5gPAHVsG2mWth3wSeaH3M,892
|
242
246
|
ipex_llm/vllm/xpu/model_convert.py,sha256=tDgaOliyBJdpcCctFNlMd9RK8fA7usLQi2RadgOj--A,7599
|
@@ -244,11 +248,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
|
|
244
248
|
ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
|
245
249
|
ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
|
246
250
|
ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
|
247
|
-
ipex_llm-2.2.
|
248
|
-
ipex_llm-2.2.
|
249
|
-
ipex_llm-2.2.
|
250
|
-
ipex_llm-2.2.
|
251
|
-
ipex_llm-2.2.
|
252
|
-
ipex_llm-2.2.
|
253
|
-
ipex_llm-2.2.
|
254
|
-
ipex_llm-2.2.
|
251
|
+
ipex_llm-2.2.0b20250122.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
|
252
|
+
ipex_llm-2.2.0b20250122.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
|
253
|
+
ipex_llm-2.2.0b20250122.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
|
254
|
+
ipex_llm-2.2.0b20250122.dist-info/METADATA,sha256=9Z4u6y119-W33jLX1WPjrNGbR3RtgbUfzeXDJNbK2W8,12879
|
255
|
+
ipex_llm-2.2.0b20250122.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
|
256
|
+
ipex_llm-2.2.0b20250122.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
|
257
|
+
ipex_llm-2.2.0b20250122.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
|
258
|
+
ipex_llm-2.2.0b20250122.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|