bigdl-core-npu 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl-core-npu/__init__.py +0 -0
- bigdl-core-npu/common.lib +0 -0
- bigdl-core-npu/ggml.dll +0 -0
- bigdl-core-npu/ggml.lib +0 -0
- bigdl-core-npu/include/llamacpp/arg.h +77 -0
- bigdl-core-npu/include/llamacpp/common.h +563 -0
- bigdl-core-npu/include/llamacpp/ggml-alloc.h +76 -0
- bigdl-core-npu/include/llamacpp/ggml-backend.h +241 -0
- bigdl-core-npu/include/llamacpp/ggml.h +2679 -0
- bigdl-core-npu/include/llamacpp/llama.h +1234 -0
- bigdl-core-npu/include/llamacpp/log.h +92 -0
- bigdl-core-npu/include/npu/npu_common.h +119 -0
- bigdl-core-npu/include/npu/npu_llm.h +77 -0
- bigdl-core-npu/llama-cli-npu.exe +0 -0
- bigdl-core-npu/llama.dll +0 -0
- bigdl-core-npu/llama.lib +0 -0
- bigdl-core-npu/llm-cli.exe +0 -0
- bigdl-core-npu/npu_llm.dll +0 -0
- bigdl-core-npu/npu_llm.lib +0 -0
- bigdl-core-npu/zlib1.dll +0 -0
- bigdl_core_npu-2.6.0.data/scripts/init-llama-cpp.bat +29 -0
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/METADATA +12 -3
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/RECORD +146 -96
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/WHEEL +1 -1
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/top_level.txt +1 -0
- intel_npu_acceleration_library/_version.py +1 -1
- intel_npu_acceleration_library/backend/base.py +39 -4
- intel_npu_acceleration_library/backend/bindings.py +109 -5
- intel_npu_acceleration_library/backend/factory.py +264 -47
- intel_npu_acceleration_library/backend/ops.py +2 -1
- intel_npu_acceleration_library/backend/qlinear.py +8 -4
- intel_npu_acceleration_library/backend/runtime.py +7 -2
- intel_npu_acceleration_library/backend/tensor.py +73 -3
- intel_npu_acceleration_library/bigdl-core-npu/cache.json +113732 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_auto_batch_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_auto_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_c.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_hetero_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_cpu_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_gpu_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_npu_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_ir_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_onnx_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_paddle_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_pytorch_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_tensorflow_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_tensorflow_lite_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbb12.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbb12_debug.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbbind_2_5.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbbind_2_5_debug.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_debug.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_proxy.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_proxy_debug.dll +0 -0
- intel_npu_acceleration_library/device.py +2 -2
- intel_npu_acceleration_library/dtypes.py +34 -1
- intel_npu_acceleration_library/external/openvino/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +293 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +182 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +37 -19
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +47 -6
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +28 -8
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +17 -5
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +1 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +55 -47
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +95 -63
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +12 -10
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +31 -10
- intel_npu_acceleration_library/external/openvino/helpers/packing.py +4 -4
- intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +2 -0
- intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +1 -0
- intel_npu_acceleration_library/external/openvino/properties/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +1 -1
- intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +2 -1
- intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +5 -6
- intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +7 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +193 -2
- intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +69 -43
- intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +4 -0
- intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +2 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +21 -3
- intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +88 -2
- intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +9 -9
- intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +16 -2
- intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +5 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +68 -16
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +69 -60
- intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +90 -3
- intel_npu_acceleration_library/external/openvino/utils.py +17 -0
- intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
- intel_npu_acceleration_library/nn/module.py +17 -17
@@ -0,0 +1,182 @@
|
|
1
|
+
# Copyright (C) 2018-2024 Intel Corporation
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# flake8: noqa
|
5
|
+
# mypy: ignore-errors
|
6
|
+
|
7
|
+
import jax
|
8
|
+
import jax.numpy as jnp
|
9
|
+
import numpy as np
|
10
|
+
from openvino.frontend.jax.passes import filter_element, filter_ivalue, filter_param
|
11
|
+
from openvino.runtime import op, Type as OVType, Shape, OVAny
|
12
|
+
|
13
|
+
numpy_to_ov_type_map = {
|
14
|
+
np.float32: OVType.f32,
|
15
|
+
bool: OVType.boolean,
|
16
|
+
jax.dtypes.bfloat16: OVType.bf16, # TODO: check this
|
17
|
+
np.float16: OVType.f16,
|
18
|
+
np.float32: OVType.f32,
|
19
|
+
np.float64: OVType.f64,
|
20
|
+
np.uint8: OVType.u8,
|
21
|
+
np.int8: OVType.i8,
|
22
|
+
np.uint16: OVType.u16,
|
23
|
+
np.int16: OVType.i16,
|
24
|
+
np.uint32: OVType.u32,
|
25
|
+
np.int32: OVType.i32,
|
26
|
+
np.uint64: OVType.u64,
|
27
|
+
np.int64: OVType.i64,
|
28
|
+
}
|
29
|
+
|
30
|
+
jax_to_ov_type_map = {
|
31
|
+
jnp.float32: OVType.f32,
|
32
|
+
jnp.bfloat16: OVType.bf16, # TODO: check this
|
33
|
+
jnp.float16: OVType.f16,
|
34
|
+
jnp.float64: OVType.f64,
|
35
|
+
jnp.uint8: OVType.u8,
|
36
|
+
jnp.int8: OVType.i8,
|
37
|
+
jnp.uint16: OVType.u16,
|
38
|
+
jnp.int16: OVType.i16,
|
39
|
+
jnp.uint32: OVType.u32,
|
40
|
+
jnp.int32: OVType.i32,
|
41
|
+
jnp.uint64: OVType.u64,
|
42
|
+
jnp.int64: OVType.i64,
|
43
|
+
}
|
44
|
+
|
45
|
+
try:
|
46
|
+
jax_to_ov_type_map[jnp.bool] = OVType.boolean
|
47
|
+
except:
|
48
|
+
pass
|
49
|
+
|
50
|
+
basic_to_ov_type_map = {
|
51
|
+
int: OVType.i64,
|
52
|
+
float: OVType.f32,
|
53
|
+
bool: OVType.boolean,
|
54
|
+
}
|
55
|
+
|
56
|
+
ov_type_to_int_map = {
|
57
|
+
OVType.u8: 0,
|
58
|
+
OVType.i8: 1,
|
59
|
+
OVType.i16: 2,
|
60
|
+
OVType.i32: 3,
|
61
|
+
OVType.i64: 4,
|
62
|
+
OVType.f16: 5,
|
63
|
+
OVType.f32: 6,
|
64
|
+
OVType.f64: 7,
|
65
|
+
OVType.u16: 8,
|
66
|
+
OVType.u32: 9,
|
67
|
+
OVType.u64: 10,
|
68
|
+
OVType.boolean: 11,
|
69
|
+
OVType.bf16: 15,
|
70
|
+
}
|
71
|
+
|
72
|
+
|
73
|
+
def get_type_from_py_type(value):
|
74
|
+
if isinstance(value, float):
|
75
|
+
return OVType.f32
|
76
|
+
if isinstance(value, bool):
|
77
|
+
return OVType.boolean
|
78
|
+
if isinstance(value, int):
|
79
|
+
return OVType.i64
|
80
|
+
return OVType.dynamic
|
81
|
+
|
82
|
+
|
83
|
+
def get_type_from_np_type(value):
|
84
|
+
for np_dtype, ov_type in numpy_to_ov_type_map.items():
|
85
|
+
if isinstance(value, np_dtype):
|
86
|
+
return ov_type
|
87
|
+
return None
|
88
|
+
|
89
|
+
|
90
|
+
def _get_ov_type_from_value(value):
|
91
|
+
ov_type = get_type_from_np_type(value)
|
92
|
+
if ov_type is None:
|
93
|
+
ov_type = get_type_from_py_type(value)
|
94
|
+
return ov_type
|
95
|
+
|
96
|
+
|
97
|
+
def get_ov_type_for_value(value):
|
98
|
+
if isinstance(value, (jax.core.Var, jax.core.Literal)):
|
99
|
+
if value.aval.dtype in jax_to_ov_type_map:
|
100
|
+
return OVAny(jax_to_ov_type_map[value.aval.dtype])
|
101
|
+
for k, v in numpy_to_ov_type_map.items():
|
102
|
+
if value.aval.dtype == k:
|
103
|
+
return OVAny(v)
|
104
|
+
for k, v in basic_to_ov_type_map.items():
|
105
|
+
if isinstance(value.aval.dtype, k):
|
106
|
+
return OVAny(v)
|
107
|
+
elif isinstance(value, (int, float, bool)):
|
108
|
+
return OVAny(jax_to_ov_type_map[type(value)])
|
109
|
+
else:
|
110
|
+
raise NotImplementedError(f"dtype for {value} of type {type(value)} has not been supported yet.")
|
111
|
+
|
112
|
+
|
113
|
+
def get_ov_type_from_jax_type(dtype):
|
114
|
+
if dtype in jax_to_ov_type_map:
|
115
|
+
return OVAny(jax_to_ov_type_map[dtype])
|
116
|
+
for k, v in numpy_to_ov_type_map.items():
|
117
|
+
if dtype == k:
|
118
|
+
return OVAny(v)
|
119
|
+
for k, v in basic_to_ov_type_map.items():
|
120
|
+
if isinstance(dtype, k):
|
121
|
+
return OVAny(v)
|
122
|
+
return None
|
123
|
+
|
124
|
+
|
125
|
+
def jax_array_to_ov_const(arr: np.ndarray, shared_memory=True):
|
126
|
+
# TODO: deal with bfloat16 dtype here.
|
127
|
+
if isinstance(arr, np.ndarray):
|
128
|
+
return op.Constant(arr, shared_memory=shared_memory)
|
129
|
+
elif isinstance(arr, jax.Array):
|
130
|
+
return op.Constant(np.array(jax.device_get(arr)), shared_memory=shared_memory)
|
131
|
+
else:
|
132
|
+
raise ValueError(f"Constant is expected to be a numpy array or jax array but got {type(arr)}")
|
133
|
+
|
134
|
+
|
135
|
+
def ivalue_to_constant(ivalue, shared_memory=True):
|
136
|
+
'''
|
137
|
+
Convert a python object to an openvino constant.
|
138
|
+
'''
|
139
|
+
# print('ivalue = ', ivalue)
|
140
|
+
ivalue = filter_ivalue(ivalue)
|
141
|
+
ov_type = _get_ov_type_from_value(ivalue)
|
142
|
+
if ov_type.is_static():
|
143
|
+
return op.Constant(ov_type, Shape([]), [ivalue]).outputs()
|
144
|
+
if isinstance(ivalue, (list, tuple)):
|
145
|
+
assert len(ivalue) > 0, "Can't deduce type for empty list"
|
146
|
+
if isinstance(ivalue[0], (list, tuple)):
|
147
|
+
second_len = len(ivalue[0])
|
148
|
+
flattened_ivalue = []
|
149
|
+
for value in ivalue:
|
150
|
+
assert isinstance(value, (list, tuple)), "Can't deduce type for a list with both list and basic types."
|
151
|
+
assert len(value) == second_len or len(value) == 0, "Can't deduce type for nested list with different lengths."
|
152
|
+
flattened_ivalue.extend([filter_element(item) for item in value])
|
153
|
+
flattened_ivalue = [item for sublist in ivalue for item in sublist]
|
154
|
+
ov_type = _get_ov_type_from_value(flattened_ivalue[0])
|
155
|
+
assert ov_type.is_static(), f"Can't deduce type {flattened_ivalue[0].__class__} for list"
|
156
|
+
return op.Constant(ov_type, Shape([len(ivalue), second_len]), flattened_ivalue).outputs()
|
157
|
+
ivalue = [filter_element(item) for item in ivalue]
|
158
|
+
ov_type = _get_ov_type_from_value(ivalue[0])
|
159
|
+
try:
|
160
|
+
assert ov_type.is_static(), f"Can't deduce type {ivalue[0].__class__} for list"
|
161
|
+
except:
|
162
|
+
# TODO 150596: remove this workaround
|
163
|
+
ivalue = [0]
|
164
|
+
ov_type = OVType.f32
|
165
|
+
return op.Constant(ov_type, Shape([len(ivalue)]), ivalue).outputs()
|
166
|
+
|
167
|
+
if isinstance(ivalue, (jax.Array, np.ndarray)):
|
168
|
+
return jax_array_to_ov_const(ivalue, shared_memory=shared_memory).outputs()
|
169
|
+
|
170
|
+
ov_dtype_value = get_ov_type_from_jax_type(ivalue)
|
171
|
+
if ov_dtype_value is not None:
|
172
|
+
return op.Constant(OVType.i64, Shape([]), [ov_type_to_int_map[ov_dtype_value]]).outputs()
|
173
|
+
|
174
|
+
return None
|
175
|
+
|
176
|
+
|
177
|
+
def param_to_constants(primitive: str, param_name: str, jaxpr, shared_memory=True):
|
178
|
+
processed_params = filter_param(primitive, param_name, jaxpr)
|
179
|
+
|
180
|
+
for k, v in processed_params.items():
|
181
|
+
processed_params[k] = ivalue_to_constant(v, shared_memory=shared_memory)
|
182
|
+
return processed_params
|
intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd
CHANGED
Binary file
|
intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd
CHANGED
Binary file
|
intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd
CHANGED
Binary file
|
intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd
CHANGED
Binary file
|
intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -16,6 +16,11 @@ logger = logging.getLogger(__name__)
|
|
16
16
|
logger.setLevel(logging.WARNING)
|
17
17
|
|
18
18
|
|
19
|
+
class InlinedInput:
|
20
|
+
def __init__(self, data) -> None:
|
21
|
+
self.data = data
|
22
|
+
|
23
|
+
|
19
24
|
class TorchFXPythonDecoder (Decoder):
|
20
25
|
|
21
26
|
def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, input_shapes=[], input_types=[]):
|
@@ -30,6 +35,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
30
35
|
self.input_shapes = input_shapes
|
31
36
|
|
32
37
|
self._input_signature = []
|
38
|
+
self._example_input = None
|
33
39
|
|
34
40
|
if issubclass(type(pt_module), torch.fx.graph_module.GraphModule):
|
35
41
|
|
@@ -58,7 +64,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
58
64
|
for arg in uargs if arg[1] is not None]
|
59
65
|
for idx, shape in enumerate(found_shapes):
|
60
66
|
if shape is not None:
|
61
|
-
new_shape=[]
|
67
|
+
new_shape = []
|
62
68
|
for dim in range(0, len(shape)):
|
63
69
|
if (type(shape[dim]).__name__ == "SymInt"):
|
64
70
|
new_shape.append(-1)
|
@@ -80,7 +86,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
80
86
|
|
81
87
|
# None in inputs mean the input is inlined or None (also considered inlined)
|
82
88
|
self._inputs = [self._nodes.index(
|
83
|
-
arg) if arg in self._nodes else (arg
|
89
|
+
arg) if arg in self._nodes else InlinedInput(arg) for arg in pt_module.args]
|
84
90
|
|
85
91
|
# FIXME: Find a better way to pass nested tuples to OV frontend. This is a temporary solution to flatten arguments.
|
86
92
|
new_inputs = []
|
@@ -91,22 +97,22 @@ class TorchFXPythonDecoder (Decoder):
|
|
91
97
|
if arg in self._nodes:
|
92
98
|
new_inputs.append(self._nodes.index(arg))
|
93
99
|
else:
|
94
|
-
new_inputs.append((arg
|
100
|
+
new_inputs.append(InlinedInput(arg))
|
95
101
|
self.input_types.append(OVAny(DecoderType.List(
|
96
102
|
TorchFXPythonDecoder.get_type_for_value(arg))))
|
97
103
|
else:
|
98
104
|
v = self._inputs[i]
|
99
105
|
new_inputs.append(v)
|
100
106
|
self.input_types.append(
|
101
|
-
TorchFXPythonDecoder.get_type_for_value(v
|
107
|
+
TorchFXPythonDecoder.get_type_for_value(v.data if isinstance(v, InlinedInput) else self._nodes[v]))
|
102
108
|
self._inputs = new_inputs
|
103
109
|
|
104
110
|
def inputs(self):
|
105
111
|
# Consider 0 a special case which may mean the input is inlined, but not guaranteed
|
106
|
-
return [x if not isinstance(x,
|
112
|
+
return [x if not isinstance(x, InlinedInput) else 0 for x in self._inputs]
|
107
113
|
|
108
114
|
def is_input_inlined(self, index):
|
109
|
-
return isinstance(self._inputs[index],
|
115
|
+
return isinstance(self._inputs[index], InlinedInput)
|
110
116
|
|
111
117
|
@staticmethod
|
112
118
|
def unpack_containers(arg):
|
@@ -141,19 +147,24 @@ class TorchFXPythonDecoder (Decoder):
|
|
141
147
|
return make_constant(OVType.i64, Shape([]), [arg])
|
142
148
|
elif isinstance(arg, float):
|
143
149
|
return make_constant(OVType.f32, Shape([]), [arg])
|
150
|
+
elif isinstance(arg, str):
|
151
|
+
u8_tensor = torch.frombuffer(str.encode(arg), dtype=torch.uint8)
|
152
|
+
return torch_tensor_to_ov_const(u8_tensor, shared_memory=True)
|
144
153
|
return None
|
145
154
|
|
146
155
|
def inlined_input(self, index):
|
147
156
|
assert index < len(self._inputs), "Requested input doesn't exist"
|
148
157
|
assert isinstance(
|
149
|
-
self._inputs[index],
|
150
|
-
|
158
|
+
self._inputs[index], InlinedInput), "Requested input which is not inlined"
|
159
|
+
arg = self._inputs[index].data
|
160
|
+
assert arg is not None, f"Requested None inlined input for op {self.get_op_type()}"
|
151
161
|
constant = None
|
152
|
-
arg = self._inputs[index][0]
|
153
162
|
constant = self.arg_to_constant(arg)
|
154
163
|
|
155
|
-
|
156
|
-
|
164
|
+
if constant is not None:
|
165
|
+
return constant.outputs()
|
166
|
+
else:
|
167
|
+
return []
|
157
168
|
|
158
169
|
def input(self, index): # TODO: remove
|
159
170
|
return self.inputs()[index] # TODO: find specialized method
|
@@ -256,9 +267,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
256
267
|
raise RuntimeError("This input is not a Node")
|
257
268
|
|
258
269
|
def get_subgraph_size(self):
|
259
|
-
|
260
|
-
return 0
|
261
|
-
return len(self.get_subgraphs()) if hasattr(self.pt_module, 'blocks') else 1
|
270
|
+
return len(self.get_subgraphs())
|
262
271
|
|
263
272
|
def decoder_type_name(self) -> str:
|
264
273
|
return "fx"
|
@@ -276,9 +285,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
276
285
|
node_visitor(decoder)
|
277
286
|
|
278
287
|
def get_subgraphs(self):
|
279
|
-
|
280
|
-
return []
|
281
|
-
return list(self.pt_module.blocks())
|
288
|
+
return []
|
282
289
|
|
283
290
|
def get_subgraph_decoder(self, index):
|
284
291
|
decoder = TorchFXPythonDecoder(self.get_subgraphs()[index],
|
@@ -308,7 +315,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
308
315
|
return self._raw_outputs()[index]
|
309
316
|
|
310
317
|
def _raw_inputs(self):
|
311
|
-
return [self._nodes[x] if not isinstance(x,
|
318
|
+
return [self._nodes[x] if not isinstance(x, InlinedInput) and x < len(self._nodes) else x.data for x in self._inputs]
|
312
319
|
|
313
320
|
def _raw_input(self, index):
|
314
321
|
return self._raw_inputs()[index]
|
@@ -316,6 +323,13 @@ class TorchFXPythonDecoder (Decoder):
|
|
316
323
|
def num_of_outputs(self):
|
317
324
|
return len(self.outputs())
|
318
325
|
|
326
|
+
def output_list_size(self):
|
327
|
+
max_out_id = -1
|
328
|
+
for user in self.pt_module.users:
|
329
|
+
if "<built-in function getitem>" == str(user.target) and max_out_id < user.args[1]:
|
330
|
+
max_out_id = user.args[1]
|
331
|
+
return max_out_id + 1
|
332
|
+
|
319
333
|
def output(self, index):
|
320
334
|
return self.outputs()[index]
|
321
335
|
|
@@ -339,7 +353,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
339
353
|
return None
|
340
354
|
|
341
355
|
def input_is_none(self, index):
|
342
|
-
if index >= len(self._inputs) or (isinstance(self._inputs[index],
|
356
|
+
if index >= len(self._inputs) or (isinstance(self._inputs[index], InlinedInput) and self._inputs[index].data is None):
|
343
357
|
return True
|
344
358
|
else:
|
345
359
|
r_input = self._raw_input(index)
|
@@ -350,3 +364,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
350
364
|
|
351
365
|
def may_produce_alias(self, in_index: int, out_index: int) -> bool:
|
352
366
|
return False
|
367
|
+
|
368
|
+
def get_rt_info(self):
|
369
|
+
rt_info = {}
|
370
|
+
return rt_info
|
@@ -32,7 +32,7 @@ def patched_forward(self, *args, **kwargs):
|
|
32
32
|
x = args[0]
|
33
33
|
dtype = x.dtype
|
34
34
|
outshape = x.shape[:-1] + (self.width,)
|
35
|
-
x = x.view(-1, x.shape[-1])
|
35
|
+
x = x.contiguous().view(-1, x.shape[-1])
|
36
36
|
groups = self.qzeros.shape[0]
|
37
37
|
height = self.qweight.shape[0]
|
38
38
|
|
@@ -43,8 +43,6 @@ def patched_forward(self, *args, **kwargs):
|
|
43
43
|
unpacked_zp = decompression_pattern(
|
44
44
|
self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1)
|
45
45
|
|
46
|
-
unpacked_zp = unpacked_zp.to(dtype) + 1
|
47
|
-
|
48
46
|
unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales
|
49
47
|
unpacked_weights = unpacked_weights.view(-1, self.width)
|
50
48
|
|
@@ -59,11 +57,50 @@ def patched_forward(self, *args, **kwargs):
|
|
59
57
|
return out
|
60
58
|
|
61
59
|
|
60
|
+
def patched_forward_sym(self, *args, **kwargs):
|
61
|
+
if hasattr(self, '_hf_hook'):
|
62
|
+
args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs)
|
63
|
+
|
64
|
+
x = args[0]
|
65
|
+
dtype = x.dtype
|
66
|
+
outshape = x.shape[:-1] + (self.width,)
|
67
|
+
x = x.contiguous().view(-1, x.shape[-1])
|
68
|
+
height = self.qweight.shape[0]
|
69
|
+
|
70
|
+
unpacked_weights = decompression_pattern(
|
71
|
+
self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8)
|
72
|
+
unpacked_weights = torch.transpose(
|
73
|
+
unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width)
|
74
|
+
|
75
|
+
# all zp is 8 for symmetrical, will repack to i4 in pt fe transformation
|
76
|
+
unpacked_weights = unpacked_weights.to(dtype) * self.scales
|
77
|
+
unpacked_weights = unpacked_weights.view(-1, self.width)
|
78
|
+
|
79
|
+
out = x @ unpacked_weights
|
80
|
+
|
81
|
+
out = out.view(outshape)
|
82
|
+
if self.bias is not None:
|
83
|
+
out.add_(self.bias)
|
84
|
+
|
85
|
+
if hasattr(self, '_hf_hook'):
|
86
|
+
out = self._hf_hook.post_forward(self, out)
|
87
|
+
return out
|
88
|
+
|
89
|
+
|
62
90
|
# All the following AutoGPTQ's quant types are supposed to have the same weights packing schema
|
63
91
|
supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old']
|
64
92
|
|
65
93
|
|
66
94
|
def patch_model(model):
|
95
|
+
is_symmetrical = False
|
96
|
+
config = None
|
97
|
+
if hasattr(model, "config"):
|
98
|
+
config = model.config
|
99
|
+
elif hasattr(model, "model") and hasattr(model.model, "config"):
|
100
|
+
# original model was wrapped
|
101
|
+
config = model.model.config
|
102
|
+
if config is not None and hasattr(config, 'quantization_config') and hasattr(config.quantization_config, 'sym'):
|
103
|
+
is_symmetrical = config.quantization_config.sym
|
67
104
|
for name, m in model.named_modules():
|
68
105
|
if hasattr(m, '_openvino_patch_orig_forward'):
|
69
106
|
# already patched, skipping
|
@@ -87,7 +124,10 @@ def patch_model(model):
|
|
87
124
|
assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups
|
88
125
|
|
89
126
|
m._openvino_patch_orig_forward = m.forward
|
90
|
-
|
127
|
+
if is_symmetrical:
|
128
|
+
m.forward = partial(patched_forward_sym, m)
|
129
|
+
else:
|
130
|
+
m.forward = partial(patched_forward, m)
|
91
131
|
|
92
132
|
# Keep original field properties to be used when model is returned back to its original state
|
93
133
|
m._openvino_patch_orig_qweights_type = m.qweight.dtype
|
@@ -97,11 +137,12 @@ def patch_model(model):
|
|
97
137
|
m.qweight = m.qweight.view(dtype=torch.uint8)
|
98
138
|
m.qzeros = m.qzeros.view(dtype=torch.uint8)
|
99
139
|
|
100
|
-
# TODO: Redundant tensor copy? Try to remove m.
|
140
|
+
# TODO: Redundant tensor copy? Try to remove m.qweight and m.qzeros after keeping modified values as submodules
|
101
141
|
m.add_module(
|
102
142
|
'_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight))
|
143
|
+
# Adding 17 to move zp+1 step from after unpacking to before to have correct decompression pattern. Can it overflow?
|
103
144
|
m.add_module('_openvino_u4_compression_submodule_qzeros',
|
104
|
-
KeepWeight(m.qzeros))
|
145
|
+
KeepWeight(m.qzeros + torch.tensor(17, dtype=torch.uint8)))
|
105
146
|
|
106
147
|
m.scales = m.scales.view(-1, 1, m.width)
|
107
148
|
|
@@ -30,6 +30,7 @@ def patch_model(model, module_extensions, orig_forward_name):
|
|
30
30
|
|
31
31
|
if extension:
|
32
32
|
# The Trampoline class is instantiated for every module replacement, so we can use class members individually for each module.
|
33
|
+
|
33
34
|
class Trampoline(torch.autograd.Function):
|
34
35
|
target_extension = extension
|
35
36
|
original_module = m
|
@@ -83,16 +84,35 @@ def unpatch_model(model, orig_forward_name):
|
|
83
84
|
|
84
85
|
|
85
86
|
def __make_16bit_traceable(model: torch.nn.Module):
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
87
|
+
"""
|
88
|
+
Prepare a 16-bit PyTorch model for tracing with OpenVINO.
|
89
|
+
- Replace known list of modules with ModuleExtension.
|
90
|
+
- Convert other modules with weights to FP32.
|
91
|
+
"""
|
92
|
+
extensions = {
|
93
|
+
torch.nn.Linear: ModuleExtension(
|
94
|
+
torch.nn.Linear, "ov_ext::linear",
|
95
|
+
evaluate=lambda module, *args, **kwargs: torch.full(
|
96
|
+
list(args[0].shape[:-1]) + [module.out_features], 0.5, dtype=torch.float32),
|
97
|
+
convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias)),
|
98
|
+
torch.nn.Embedding: ModuleExtension(
|
99
|
+
torch.nn.Embedding, "ov_ext::embedding",
|
100
|
+
evaluate=lambda module, *args, **kwargs: torch.full(
|
101
|
+
list(args[0].shape) + [module.embedding_dim], 0.5, dtype=torch.float32),
|
102
|
+
convert=lambda module, target_op, *args, **kwargs: target_op(module.weight, args[0], module.padding_idx, module.scale_grad_by_freq, module.sparse)),
|
93
103
|
}
|
104
|
+
try:
|
105
|
+
from transformers.pytorch_utils import Conv1D
|
106
|
+
extensions[Conv1D] = ModuleExtension(
|
107
|
+
Conv1D, "ov_ext::conv1d",
|
108
|
+
evaluate=lambda module, *args, **kwargs: torch.full(
|
109
|
+
list(args[0].shape[:-1]) + [module.nf], 0.5, dtype=torch.float32),
|
110
|
+
convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias))
|
111
|
+
except:
|
112
|
+
pass
|
94
113
|
patch_model(model, extensions,
|
95
114
|
"_openvino_module_extension_patch_orig_forward")
|
96
115
|
for _, module in model.named_modules():
|
97
|
-
if module.__class__ not in extensions and
|
116
|
+
if module.__class__ not in extensions and (any([p.dtype in [torch.float16, torch.bfloat16] for p in module.parameters(False)])
|
117
|
+
or any([b.dtype in [torch.float16, torch.bfloat16] for b in module.buffers(False)])):
|
98
118
|
module.float()
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -13,6 +13,7 @@ import torch
|
|
13
13
|
from torch._dynamo.backends.common import fake_tensor_unsupported, aot_autograd
|
14
14
|
from torch._dynamo.backends.registry import register_backend
|
15
15
|
from torch._inductor.compile_fx import compile_fx
|
16
|
+
from torch._inductor.freezing import replace_params_with_constants
|
16
17
|
from torch.fx.experimental.proxy_tensor import make_fx
|
17
18
|
from torch._decomp import decomposition_table, get_decompositions
|
18
19
|
|
@@ -54,10 +55,9 @@ def openvino(subgraph, example_inputs, options=None):
|
|
54
55
|
if (_get_aot_autograd(options)):
|
55
56
|
global openvino_options
|
56
57
|
openvino_options = options
|
57
|
-
decompositions = _get_decompositions(options) + get_inf_decomposition_list()
|
58
|
-
|
59
|
-
|
60
|
-
bw_compiler=fx_openvino,
|
58
|
+
decompositions = _get_decompositions(options) + get_inf_decomposition_list() + get_aot_decomposition_list()
|
59
|
+
return aot_autograd(fw_compiler=fx_openvino,
|
60
|
+
bw_compiler=fx_openvino,
|
61
61
|
decompositions=get_decompositions(decompositions))(subgraph, example_inputs)
|
62
62
|
return fx_openvino(subgraph, example_inputs, options)
|
63
63
|
|
@@ -86,7 +86,14 @@ def fx_openvino(subgraph, example_inputs, options=None):
|
|
86
86
|
if inputs_reversed:
|
87
87
|
example_inputs.reverse()
|
88
88
|
|
89
|
+
preserved_arg_indices = []
|
89
90
|
if (_get_aot_autograd(options)):
|
91
|
+
if tracing_context := torch._guards.TracingContext.try_get():
|
92
|
+
fw_metadata = tracing_context.fw_metadata
|
93
|
+
params_flat = tracing_context.params_flat
|
94
|
+
assert fw_metadata is not None and params_flat is not None
|
95
|
+
preserved_arg_indices = replace_params_with_constants(subgraph, params_flat, fw_metadata)
|
96
|
+
example_inputs = [example_inputs[ind] for ind in preserved_arg_indices]
|
90
97
|
model = subgraph
|
91
98
|
else:
|
92
99
|
from torch._subclasses.fake_tensor import FakeTensorMode
|
@@ -96,7 +103,6 @@ def fx_openvino(subgraph, example_inputs, options=None):
|
|
96
103
|
|
97
104
|
with torch.no_grad():
|
98
105
|
model.eval()
|
99
|
-
|
100
106
|
partitioner = Partitioner(options)
|
101
107
|
compiled_model = partitioner.make_partitions(model, options)
|
102
108
|
|
@@ -107,9 +113,15 @@ def fx_openvino(subgraph, example_inputs, options=None):
|
|
107
113
|
executor_parameters["model_hash_str"] += "_fs"
|
108
114
|
|
109
115
|
def _call(*args):
|
116
|
+
if(_get_aot_autograd(options)):
|
117
|
+
args_list = args[0]
|
118
|
+
args_new = [args_list[i] for i in preserved_arg_indices]
|
119
|
+
args = args_new
|
110
120
|
res = execute(compiled_model, *args, executor="openvino",
|
111
121
|
executor_parameters=executor_parameters, options=options)
|
112
122
|
return res
|
123
|
+
if(_get_aot_autograd(options)):
|
124
|
+
_call._boxed_call = True # type: ignore[attr-defined]
|
113
125
|
return _call
|
114
126
|
except Exception as e:
|
115
127
|
logger.debug(f"Failed in OpenVINO execution: {e}")
|
@@ -241,6 +241,7 @@ class OperatorSupport(OperatorSupport):
|
|
241
241
|
"torch.ops.aten.transpose.int": None,
|
242
242
|
"torch.ops.aten.tril.default": None,
|
243
243
|
"torch.ops.aten.tril_.default": None,
|
244
|
+
"torch.ops.aten.triu.default": None,
|
244
245
|
"torch.ops.aten.unbind.int": None,
|
245
246
|
"torch.ops.aten.unfold.default": None,
|
246
247
|
"torch.ops.aten.unsqueeze.default": None,
|