ipex-llm 2.2.0b20250211__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250212__py3-none-manylinux2010_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,11 +7,14 @@ purposes.
7
7
  import argparse
8
8
  import json
9
9
  import ssl
10
- from typing import List, Optional, Sequence, Union
10
+ from typing import List, Optional, Sequence, Union, get_args
11
11
 
12
12
  from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
13
+ from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
14
+ validate_chat_template)
13
15
  from vllm.entrypoints.openai.serving_engine import (LoRAModulePath,
14
16
  PromptAdapterPath)
17
+ from vllm.entrypoints.openai.tool_parsers import ToolParserManager
15
18
  from vllm.utils import FlexibleArgumentParser
16
19
 
17
20
 
@@ -130,10 +133,23 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
130
133
  help="The file path to the chat template, "
131
134
  "or the template in single-line form "
132
135
  "for the specified model")
136
+ parser.add_argument(
137
+ '--chat-template-content-format',
138
+ type=str,
139
+ default="auto",
140
+ choices=get_args(ChatTemplateContentFormatOption),
141
+ help='The format to render message content within a chat template.'
142
+ '\n\n'
143
+ '* "string" will render the content as a string. '
144
+ 'Example: "Hello World"\n'
145
+ '* "openai" will render the content as a list of dictionaries, '
146
+ 'similar to OpenAI schema. '
147
+ 'Example: [{"type": "text", "text": "Hello world!"}]')
133
148
  parser.add_argument("--response-role",
134
149
  type=nullable_str,
135
150
  default="assistant",
136
- help="The role name to return if `request.add_generation_prompt=true`.")
151
+ help="The role name to return if "
152
+ "`request.add_generation_prompt=true`.")
137
153
  parser.add_argument("--ssl-keyfile",
138
154
  type=nullable_str,
139
155
  default=None,
@@ -180,28 +196,36 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
180
196
  action="store_true",
181
197
  help="If specified, will run the OpenAI frontend server in the same "
182
198
  "process as the model serving engine.")
183
-
199
+ parser.add_argument(
200
+ "--enable-request-id-headers",
201
+ action="store_true",
202
+ help="If specified, API server will add X-Request-Id header to "
203
+ "responses. Caution: this hurts performance at high QPS.")
184
204
  parser.add_argument(
185
205
  "--enable-auto-tool-choice",
186
206
  action="store_true",
187
207
  default=False,
188
208
  help="Enable auto tool choice for supported models. Use --tool-call-parser"
189
- "to specify which parser to use")
209
+ " to specify which parser to use")
190
210
 
211
+ valid_tool_parsers = ToolParserManager.tool_parsers.keys()
191
212
  parser.add_argument(
192
213
  "--tool-call-parser",
193
214
  type=str,
194
- choices=["mistral", "hermes"],
215
+ metavar="{" + ",".join(valid_tool_parsers) + "} or name registered in "
216
+ "--tool-parser-plugin",
195
217
  default=None,
196
218
  help="Select the tool call parser depending on the model that you're using."
197
219
  " This is used to parse the model-generated tool call into OpenAI API "
198
220
  "format. Required for --enable-auto-tool-choice.")
199
221
 
200
222
  parser.add_argument(
201
- "--load-in-low-bit",
223
+ "--tool-parser-plugin",
202
224
  type=str,
203
- default="sym_int4",
204
- help="Low-bit quantization for IPEX-LLM models")
225
+ default="",
226
+ help="Special the tool parser plugin write to parse the model-generated tool"
227
+ " into OpenAI API format, the name register in this plugin can be used "
228
+ "in --tool-call-parser.")
205
229
 
206
230
  parser = AsyncEngineArgs.add_cli_args(parser)
207
231
 
@@ -218,10 +242,35 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
218
242
  default=False,
219
243
  help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint"
220
244
  )
245
+ parser.add_argument(
246
+ "--enable-prompt-tokens-details",
247
+ action='store_true',
248
+ default=False,
249
+ help="If set to True, enable prompt_tokens_details in usage.")
250
+
251
+ parser.add_argument(
252
+ "--load-in-low-bit",
253
+ type=str,
254
+ default="sym_int4",
255
+ help="Low-bit quantization for IPEX-LLM models")
221
256
 
222
257
  return parser
223
258
 
224
259
 
260
+ def validate_parsed_serve_args(args: argparse.Namespace):
261
+ """Quick checks for model serve args that raise prior to loading.""" # noqa
262
+ if hasattr(args, "subparser") and args.subparser != "serve":
263
+ return
264
+
265
+ # Ensure that the chat template is valid; raises if it likely isn't
266
+ validate_chat_template(args.chat_template)
267
+
268
+ # Enable auto tool needs a tool call parser to be valid
269
+ if args.enable_auto_tool_choice and not args.tool_call_parser:
270
+ raise TypeError("Error: --enable-auto-tool-choice requires " # noqa
271
+ "--tool-call-parser")
272
+
273
+
225
274
  def create_parser_for_docs() -> FlexibleArgumentParser:
226
275
  parser_for_docs = FlexibleArgumentParser(
227
276
  prog="-m vllm.entrypoints.openai.api_server")
@@ -0,0 +1,23 @@
1
+ from vllm.logger import init_logger
2
+ from vllm.v1.executor.ray_utils import RayWorkerWrapper
3
+
4
+
5
+ logger = init_logger(__name__)
6
+
7
+
8
+ class IPEXLLMV1Wrapper(RayWorkerWrapper):
9
+ def __init__(self, load_in_low_bit="sym_int4", *args, **kwargs) -> None:
10
+ super().__init__(*args, **kwargs)
11
+ from ipex_llm.vllm.xpu.model_convert import _ipex_llm_convert
12
+ _ipex_llm_convert(load_in_low_bit=load_in_low_bit)
13
+ self.compiled_dag_cuda_device_set = False
14
+
15
+
16
+ def get_ipex_llm_v1_wrapper(load_in_low_bit):
17
+ # The reason why we not using functools.partial is that
18
+ # ray seems not work well with it.
19
+ class WrapperWithLoadBit(IPEXLLMV1Wrapper):
20
+ def __init__(self, *args, **kwargs) -> None:
21
+ super().__init__(load_in_low_bit=load_in_low_bit, *args, **kwargs)
22
+
23
+ return WrapperWithLoadBit
@@ -65,9 +65,14 @@ def _model_sample_convert():
65
65
  def _ipex_llm_convert(load_in_low_bit):
66
66
  from vllm.worker.xpu_model_runner import XPUModelRunner
67
67
  from ipex_llm.vllm.xpu.ipex_llm_wrapper import get_ipex_llm_wrapper
68
- import vllm.executor.ray_utils as ray_utils
68
+ from ipex_llm.vllm.xpu.ipex_llm_v1_wrapper import get_ipex_llm_v1_wrapper
69
+ import vllm.executor.ray_utils as ray_utils_v0
70
+ import vllm.v1.executor.ray_utils as ray_utils_v1
71
+ from vllm.v1.worker.gpu_model_runner import GPUModelRunner
69
72
  setattr(XPUModelRunner, "load_model", get_load_function(load_in_low_bit))
70
- setattr(ray_utils, "RayWorkerWrapper", get_ipex_llm_wrapper(load_in_low_bit))
73
+ setattr(GPUModelRunner, "load_model", get_load_function(load_in_low_bit))
74
+ setattr(ray_utils_v0, "RayWorkerWrapper", get_ipex_llm_wrapper(load_in_low_bit))
75
+ setattr(ray_utils_v1, "RayWorkerWrapper", get_ipex_llm_v1_wrapper(load_in_low_bit))
71
76
 
72
77
 
73
78
  def get_load_function(low_bit):
@@ -77,19 +82,16 @@ def get_load_function(low_bit):
77
82
  # from vllm.utils import measure_device_memory
78
83
  from vllm.utils import DeviceMemoryProfiler
79
84
  with DeviceMemoryProfiler() as m:
85
+ from dataclasses import replace
86
+ new_device_config = DeviceConfig("cpu")
87
+ new_vllm_config = replace(self.vllm_config, device_config=new_device_config)
80
88
  self.model = get_model(
81
- model_config=self.model_config,
82
- device_config=DeviceConfig("cpu"),
83
- load_config=self.load_config,
84
- lora_config=self.lora_config,
85
- parallel_config=self.parallel_config,
86
- scheduler_config=self.scheduler_config,
87
- cache_config=self.cache_config,
89
+ vllm_config=new_vllm_config
88
90
  )
89
- if "qwen" in self.model_config.model.lower() or \
90
- "baichuan" in self.model_config.model.lower() or \
91
- "codegeex4-all" in self.model_config.model.lower() or \
92
- "chatglm" in self.model_config.model.lower():
91
+ if "qwen" in self.vllm_config.model_config.model.lower() or \
92
+ "baichuan" in self.vllm_config.model_config.model.lower() or \
93
+ "codegeex4-all" in self.vllm_config.model_config.model.lower() or \
94
+ "chatglm" in self.vllm_config.model_config.model.lower():
93
95
  self.model.apply(padding_mlp)
94
96
  from ipex_llm import optimize_model
95
97
  import os
@@ -99,18 +101,22 @@ def get_load_function(low_bit):
99
101
  modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
100
102
  else:
101
103
  modules = None
102
- if "minicpm" in self.model_config.model.lower():
104
+ if "minicpm" in self.vllm_config.model_config.model.lower():
103
105
  modules = ["vpm", "resampler"]
104
106
  # only for minicpm_2_6
105
- if "minicpm-v" in self.model_config.model.lower():
107
+ if "minicpm-v" in self.vllm_config.model_config.model.lower():
106
108
  from ipex_llm.transformers.models.minicpmv import merge_qkv
107
109
  self.model.vpm.apply(merge_qkv)
108
- if "internvl2" in self.model_config.model.lower():
110
+ if "internvl2" in self.vllm_config.model_config.model.lower():
109
111
  modules = ["vision_model", "mlp1"]
110
- optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype,
112
+ if "deepseek-v2" in self.vllm_config.model_config.model.lower():
113
+ modules = ["down_proj"]
114
+ optimize_model(self.model,
115
+ low_bit=low_bit,
116
+ torch_dtype=self.vllm_config.model_config.dtype,
111
117
  modules_to_not_convert=modules)
112
- self.model = self.model.to(device=self.device_config.device,
113
- dtype=self.model_config.dtype)
118
+ self.model = self.model.to(device=self.vllm_config.device_config.device,
119
+ dtype=self.vllm_config.model_config.dtype)
114
120
 
115
121
  self.model_memory_usage = m.consumed_memory
116
122
  logger = init_logger(__name__)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250211
3
+ Version: 2.2.0b20250212
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250211 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250212 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
33
33
  Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
60
60
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
61
61
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
62
62
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
63
- Requires-Dist: bigdl-core-npu ==2.6.0b20250211 ; (platform_system == "Windows") and extra == 'npu'
63
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250212 ; (platform_system == "Windows") and extra == 'npu'
64
64
  Provides-Extra: serving
65
65
  Requires-Dist: py-cpuinfo ; extra == 'serving'
66
66
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
80
80
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
81
81
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
82
82
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
83
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250211 ; extra == 'xpu'
84
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250211 ; extra == 'xpu'
85
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250211 ; extra == 'xpu'
83
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250212 ; extra == 'xpu'
84
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250212 ; extra == 'xpu'
85
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250212 ; extra == 'xpu'
86
86
  Provides-Extra: xpu-2-1
87
87
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
88
88
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
97
97
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
98
98
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
99
99
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
100
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250211 ; extra == 'xpu-2-1'
101
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250211 ; extra == 'xpu-2-1'
102
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250211 ; extra == 'xpu-2-1'
100
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250212 ; extra == 'xpu-2-1'
101
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250212 ; extra == 'xpu-2-1'
102
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250212 ; extra == 'xpu-2-1'
103
103
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
104
104
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
105
105
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
117
117
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
118
118
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
119
119
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
120
- Requires-Dist: bigdl-core-xe-all ==2.6.0b20250211 ; extra == 'xpu-2-6'
120
+ Requires-Dist: bigdl-core-xe-all ==2.6.0b20250212 ; extra == 'xpu-2-6'
121
121
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
122
122
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
123
123
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -133,9 +133,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
133
133
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
134
134
  Requires-Dist: tabulate ; extra == 'xpu-arc'
135
135
  Requires-Dist: setuptools ; extra == 'xpu-arc'
136
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250211 ; extra == 'xpu-arc'
137
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250211 ; extra == 'xpu-arc'
138
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250211 ; extra == 'xpu-arc'
136
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250212 ; extra == 'xpu-arc'
137
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250212 ; extra == 'xpu-arc'
138
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250212 ; extra == 'xpu-arc'
139
139
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
140
140
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
141
141
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -156,9 +156,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
156
156
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
157
157
  Requires-Dist: tabulate ; extra == 'xpu-arl'
158
158
  Requires-Dist: setuptools ; extra == 'xpu-arl'
159
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250211 ; extra == 'xpu-arl'
160
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250211 ; extra == 'xpu-arl'
161
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250211 ; extra == 'xpu-arl'
159
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250212 ; extra == 'xpu-arl'
160
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250212 ; extra == 'xpu-arl'
161
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250212 ; extra == 'xpu-arl'
162
162
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
163
163
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
164
164
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -179,9 +179,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
179
179
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
180
180
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
181
181
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
182
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250211 ; extra == 'xpu-lnl'
183
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250211 ; extra == 'xpu-lnl'
184
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250211 ; extra == 'xpu-lnl'
182
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250212 ; extra == 'xpu-lnl'
183
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250212 ; extra == 'xpu-lnl'
184
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250212 ; extra == 'xpu-lnl'
185
185
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
186
186
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
187
187
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -94,7 +94,7 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
94
94
  ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
95
95
  ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
96
96
  ipex_llm/transformers/__init__.py,sha256=pJHs6GZXHIObVE4BUCuej-6BKBZZg9pYWKPrkhWSfB4,1192
97
- ipex_llm/transformers/convert.py,sha256=42qHApc3hoL38ldQXTv9BkDVQ0Zb-xGJYA2DWYFzwMg,100795
97
+ ipex_llm/transformers/convert.py,sha256=t-2tMK9ktT0GGszoN65gQfVtBvUV1Yka7uyp4CUFGok,100851
98
98
  ipex_llm/transformers/convert_ipex.py,sha256=_nSnUTQy-yfkKaqGdqnBdWztZf3NGmnbZ0TKaDrF4X4,14617
99
99
  ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
100
100
  ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,19191
@@ -162,6 +162,7 @@ ipex_llm/transformers/models/gptbigcode.py,sha256=cP1_qGWoa43R2WacAMblShjku4Qupc
162
162
  ipex_llm/transformers/models/gptneox.py,sha256=loRh1x_5S6BCeOr_s5xr-N_1SQHL3Y5IiUBAEyoMUqQ,6172
163
163
  ipex_llm/transformers/models/internlm.py,sha256=JZFrI2HXsIAfM-6pA2RO0wcXopOliC1FggLMzNzaDZ4,17404
164
164
  ipex_llm/transformers/models/internvl.py,sha256=Vx0vENIEQLX2M6P398mw5TOhpks0U8xf8rtRQvy94go,8154
165
+ ipex_llm/transformers/models/janus.py,sha256=0URo2NC8_2CGaOl3CiVB3IFTVsYyplMFgjBJdPDNBsY,1509
165
166
  ipex_llm/transformers/models/llama.py,sha256=rqrNjuZb_jeb9MKx0z-FSVoGx8YDBxQzPJ9ZUvYhgx0,9138
166
167
  ipex_llm/transformers/models/minicpm.py,sha256=eaPNVNrep0_xGoELhZd886ff0ceoKqB6cusdAhd52eE,10145
167
168
  ipex_llm/transformers/models/minicpm3.py,sha256=11cYl8KM2hoIJNMAOZMxiwCu6dMhup9ric_OEn8-VrQ,9363
@@ -182,7 +183,7 @@ ipex_llm/transformers/models/rwkv5.py,sha256=OkRNj1pCAZg1z2Fw-I0DEnxLEdZyPeRSQ6m
182
183
  ipex_llm/transformers/models/sd.py,sha256=VvHV5u-0k2MgHu3NL9113hPj7DgfxqctuKzEEeNfRDU,5981
183
184
  ipex_llm/transformers/models/stablelm.py,sha256=fj-XtOnR6kggnFUQTMPCOOzolkPztN06WAv8QW-XRnI,7054
184
185
  ipex_llm/transformers/models/starcoder2.py,sha256=ONKvD7JCkRM0DI-R56x28QFBJ7CjD5hOZBQ_3WfOcNk,6626
185
- ipex_llm/transformers/models/utils.py,sha256=qI5ln8SQGTvR_IyxFkoZhefgOErnXUnJrifIyhiqT9c,14753
186
+ ipex_llm/transformers/models/utils.py,sha256=c3hh0YDHE-Qg7SQBXhnNXf85Nx7jopZFfa1KS-Pe6kQ,14734
186
187
  ipex_llm/transformers/models/whisper.py,sha256=ju3WP8Eq-KvD7kb3Qy51r4FOfSX3NBxfp5RBcq__gzc,4241
187
188
  ipex_llm/transformers/models/yuan.py,sha256=JYAn_ZaSGK0NBJLEIxCACfAq084a66GFJkdd5NbpmMA,7732
188
189
  ipex_llm/transformers/npu_models/__init__.py,sha256=ulEUGLjaP48LCrVeury3UxLjXxKzRi0UpSG4bYu-7f8,585
@@ -250,17 +251,18 @@ ipex_llm/vllm/cpu/entrypoints/api_server.py,sha256=RDe3GrGMw47c7dVZL2IMmTHtGegnz
250
251
  ipex_llm/vllm/cpu/entrypoints/openai/api_server.py,sha256=l0tHnCaD96WD2k3zLTrYtLrcigf1eqYvszs79fYgoK8,29812
251
252
  ipex_llm/vllm/cpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
252
253
  ipex_llm/vllm/xpu/__init__.py,sha256=zBSG6nzrVF5QnpR6_f7kPhBFeowTE9gaZ7D5m98E7_w,585
254
+ ipex_llm/vllm/xpu/ipex_llm_v1_wrapper.py,sha256=pd939vFomKIg9Qn2NO4u0OF6hPgvQpqcfJSxqBzcqhA,825
253
255
  ipex_llm/vllm/xpu/ipex_llm_wrapper.py,sha256=_CbhvBuf_KPnmLfngYKtJl5gPAHVsG2mWth3wSeaH3M,892
254
- ipex_llm/vllm/xpu/model_convert.py,sha256=tDgaOliyBJdpcCctFNlMd9RK8fA7usLQi2RadgOj--A,7599
256
+ ipex_llm/vllm/xpu/model_convert.py,sha256=0lfzHhCtTVSZZdlqXAOAEtK3FsouqEheSGr9MjpWz38,8137
255
257
  ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZjWqm5YiA,783
256
- ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
257
- ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
258
- ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
259
- ipex_llm-2.2.0b20250211.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
260
- ipex_llm-2.2.0b20250211.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
261
- ipex_llm-2.2.0b20250211.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
262
- ipex_llm-2.2.0b20250211.dist-info/METADATA,sha256=bXwvxJIBVIayYIeP1lVUyFJXiMqzVppGcvwttejTvOk,12369
263
- ipex_llm-2.2.0b20250211.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
264
- ipex_llm-2.2.0b20250211.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
265
- ipex_llm-2.2.0b20250211.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
266
- ipex_llm-2.2.0b20250211.dist-info/RECORD,,
258
+ ipex_llm/vllm/xpu/engine/engine.py,sha256=ClvTHz_w52IU_vUw-LhotLduCMu0zPl9kebnQEKQq4A,9494
259
+ ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=OScQFmAdrd-PyEyQ2Mr8fJrk_lC6MHqPraphVk-NDzw,33140
260
+ ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
261
+ ipex_llm-2.2.0b20250212.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
262
+ ipex_llm-2.2.0b20250212.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
263
+ ipex_llm-2.2.0b20250212.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
264
+ ipex_llm-2.2.0b20250212.dist-info/METADATA,sha256=VFWO-HzZd_Ecx1J6HceoMwKM-u8x4aYVet1wDKSO7Ig,12369
265
+ ipex_llm-2.2.0b20250212.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
266
+ ipex_llm-2.2.0b20250212.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
267
+ ipex_llm-2.2.0b20250212.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
268
+ ipex_llm-2.2.0b20250212.dist-info/RECORD,,