bigdl-core-npu 2.6.0b20241120__cp310-cp310-win_amd64.whl → 2.6.0b20241122__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl-core-npu/include/common.h +82 -0
- bigdl-core-npu/include/npu_llm.h +62 -0
- bigdl-core-npu/npu_llm.dll +0 -0
- {bigdl_core_npu-2.6.0b20241120.dist-info → bigdl_core_npu-2.6.0b20241122.dist-info}/METADATA +1 -2
- {bigdl_core_npu-2.6.0b20241120.dist-info → bigdl_core_npu-2.6.0b20241122.dist-info}/RECORD +77 -79
- {bigdl_core_npu-2.6.0b20241120.dist-info → bigdl_core_npu-2.6.0b20241122.dist-info}/WHEEL +1 -1
- intel_npu_acceleration_library/_version.py +1 -1
- intel_npu_acceleration_library/external/openvino/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +15 -5
- intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +66 -13
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +29 -19
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +46 -5
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +17 -5
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +55 -47
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +92 -63
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +12 -10
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +31 -10
- intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +1 -1
- intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +5 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +131 -1
- intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +13 -4
- intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +1 -1
- intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +21 -3
- intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +29 -9
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +0 -1
- intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_jax_frontend.dll +0 -0
- {bigdl_core_npu-2.6.0b20241120.dist-info → bigdl_core_npu-2.6.0b20241122.dist-info}/top_level.txt +0 -0
@@ -5,34 +5,40 @@
|
|
5
5
|
# mypy: ignore-errors
|
6
6
|
|
7
7
|
import jax
|
8
|
-
import numpy as np
|
9
8
|
import jax.numpy as jnp
|
10
|
-
|
9
|
+
import numpy as np
|
10
|
+
from openvino.frontend.jax.passes import filter_element, filter_ivalue, filter_param
|
11
11
|
from openvino.runtime import op, Type as OVType, Shape, OVAny
|
12
12
|
|
13
13
|
numpy_to_ov_type_map = {
|
14
14
|
np.float32: OVType.f32,
|
15
15
|
bool: OVType.boolean,
|
16
|
-
jax.dtypes.bfloat16: OVType.bf16,
|
16
|
+
jax.dtypes.bfloat16: OVType.bf16, # TODO: check this
|
17
17
|
np.float16: OVType.f16,
|
18
18
|
np.float32: OVType.f32,
|
19
19
|
np.float64: OVType.f64,
|
20
20
|
np.uint8: OVType.u8,
|
21
21
|
np.int8: OVType.i8,
|
22
|
+
np.uint16: OVType.u16,
|
22
23
|
np.int16: OVType.i16,
|
24
|
+
np.uint32: OVType.u32,
|
23
25
|
np.int32: OVType.i32,
|
26
|
+
np.uint64: OVType.u64,
|
24
27
|
np.int64: OVType.i64,
|
25
28
|
}
|
26
29
|
|
27
30
|
jax_to_ov_type_map = {
|
28
31
|
jnp.float32: OVType.f32,
|
29
|
-
jnp.bfloat16: OVType.bf16,
|
32
|
+
jnp.bfloat16: OVType.bf16, # TODO: check this
|
30
33
|
jnp.float16: OVType.f16,
|
31
34
|
jnp.float64: OVType.f64,
|
32
35
|
jnp.uint8: OVType.u8,
|
33
36
|
jnp.int8: OVType.i8,
|
37
|
+
jnp.uint16: OVType.u16,
|
34
38
|
jnp.int16: OVType.i16,
|
39
|
+
jnp.uint32: OVType.u32,
|
35
40
|
jnp.int32: OVType.i32,
|
41
|
+
jnp.uint64: OVType.u64,
|
36
42
|
jnp.int64: OVType.i64,
|
37
43
|
}
|
38
44
|
|
@@ -56,10 +62,14 @@ ov_type_to_int_map = {
|
|
56
62
|
OVType.f16: 5,
|
57
63
|
OVType.f32: 6,
|
58
64
|
OVType.f64: 7,
|
65
|
+
OVType.u16: 8,
|
66
|
+
OVType.u32: 9,
|
67
|
+
OVType.u64: 10,
|
59
68
|
OVType.boolean: 11,
|
60
69
|
OVType.bf16: 15,
|
61
70
|
}
|
62
71
|
|
72
|
+
|
63
73
|
def get_type_from_py_type(value):
|
64
74
|
if isinstance(value, float):
|
65
75
|
return OVType.f32
|
@@ -69,6 +79,21 @@ def get_type_from_py_type(value):
|
|
69
79
|
return OVType.i64
|
70
80
|
return OVType.dynamic
|
71
81
|
|
82
|
+
|
83
|
+
def get_type_from_np_type(value):
|
84
|
+
for np_dtype, ov_type in numpy_to_ov_type_map.items():
|
85
|
+
if isinstance(value, np_dtype):
|
86
|
+
return ov_type
|
87
|
+
return None
|
88
|
+
|
89
|
+
|
90
|
+
def _get_ov_type_from_value(value):
|
91
|
+
ov_type = get_type_from_np_type(value)
|
92
|
+
if ov_type is None:
|
93
|
+
ov_type = get_type_from_py_type(value)
|
94
|
+
return ov_type
|
95
|
+
|
96
|
+
|
72
97
|
def get_ov_type_for_value(value):
|
73
98
|
if isinstance(value, (jax.core.Var, jax.core.Literal)):
|
74
99
|
if value.aval.dtype in jax_to_ov_type_map:
|
@@ -83,7 +108,8 @@ def get_ov_type_for_value(value):
|
|
83
108
|
return OVAny(jax_to_ov_type_map[type(value)])
|
84
109
|
else:
|
85
110
|
raise NotImplementedError(f"dtype for {value} of type {type(value)} has not been supported yet.")
|
86
|
-
|
111
|
+
|
112
|
+
|
87
113
|
def get_ov_type_from_jax_type(dtype):
|
88
114
|
if dtype in jax_to_ov_type_map:
|
89
115
|
return OVAny(jax_to_ov_type_map[dtype])
|
@@ -95,6 +121,7 @@ def get_ov_type_from_jax_type(dtype):
|
|
95
121
|
return OVAny(v)
|
96
122
|
return None
|
97
123
|
|
124
|
+
|
98
125
|
def jax_array_to_ov_const(arr: np.ndarray, shared_memory=True):
|
99
126
|
# TODO: deal with bfloat16 dtype here.
|
100
127
|
if isinstance(arr, np.ndarray):
|
@@ -104,26 +131,52 @@ def jax_array_to_ov_const(arr: np.ndarray, shared_memory=True):
|
|
104
131
|
else:
|
105
132
|
raise ValueError(f"Constant is expected to be a numpy array or jax array but got {type(arr)}")
|
106
133
|
|
134
|
+
|
107
135
|
def ivalue_to_constant(ivalue, shared_memory=True):
|
108
136
|
'''
|
109
137
|
Convert a python object to an openvino constant.
|
110
138
|
'''
|
111
|
-
|
139
|
+
# print('ivalue = ', ivalue)
|
140
|
+
ivalue = filter_ivalue(ivalue)
|
141
|
+
ov_type = _get_ov_type_from_value(ivalue)
|
112
142
|
if ov_type.is_static():
|
113
143
|
return op.Constant(ov_type, Shape([]), [ivalue]).outputs()
|
114
|
-
|
115
144
|
if isinstance(ivalue, (list, tuple)):
|
116
145
|
assert len(ivalue) > 0, "Can't deduce type for empty list"
|
117
|
-
|
118
|
-
|
146
|
+
if isinstance(ivalue[0], (list, tuple)):
|
147
|
+
second_len = len(ivalue[0])
|
148
|
+
flattened_ivalue = []
|
149
|
+
for value in ivalue:
|
150
|
+
assert isinstance(value, (list, tuple)), "Can't deduce type for a list with both list and basic types."
|
151
|
+
assert len(value) == second_len or len(value) == 0, "Can't deduce type for nested list with different lengths."
|
152
|
+
flattened_ivalue.extend([filter_element(item) for item in value])
|
153
|
+
flattened_ivalue = [item for sublist in ivalue for item in sublist]
|
154
|
+
ov_type = _get_ov_type_from_value(flattened_ivalue[0])
|
155
|
+
assert ov_type.is_static(), f"Can't deduce type {flattened_ivalue[0].__class__} for list"
|
156
|
+
return op.Constant(ov_type, Shape([len(ivalue), second_len]), flattened_ivalue).outputs()
|
157
|
+
ivalue = [filter_element(item) for item in ivalue]
|
158
|
+
ov_type = _get_ov_type_from_value(ivalue[0])
|
159
|
+
try:
|
160
|
+
assert ov_type.is_static(), f"Can't deduce type {ivalue[0].__class__} for list"
|
161
|
+
except:
|
162
|
+
# TODO 150596: remove this workaround
|
163
|
+
ivalue = [0]
|
164
|
+
ov_type = OVType.f32
|
119
165
|
return op.Constant(ov_type, Shape([len(ivalue)]), ivalue).outputs()
|
120
166
|
|
121
167
|
if isinstance(ivalue, (jax.Array, np.ndarray)):
|
122
168
|
return jax_array_to_ov_const(ivalue, shared_memory=shared_memory).outputs()
|
123
|
-
|
169
|
+
|
124
170
|
ov_dtype_value = get_ov_type_from_jax_type(ivalue)
|
125
171
|
if ov_dtype_value is not None:
|
126
172
|
return op.Constant(OVType.i64, Shape([]), [ov_type_to_int_map[ov_dtype_value]]).outputs()
|
127
|
-
|
128
|
-
|
129
|
-
|
173
|
+
|
174
|
+
return None
|
175
|
+
|
176
|
+
|
177
|
+
def param_to_constants(primitive: str, param_name: str, jaxpr, shared_memory=True):
|
178
|
+
processed_params = filter_param(primitive, param_name, jaxpr)
|
179
|
+
|
180
|
+
for k, v in processed_params.items():
|
181
|
+
processed_params[k] = ivalue_to_constant(v, shared_memory=shared_memory)
|
182
|
+
return processed_params
|
intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd
CHANGED
Binary file
|
intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd
CHANGED
Binary file
|
intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd
CHANGED
Binary file
|
intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd
CHANGED
Binary file
|
intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -16,6 +16,11 @@ logger = logging.getLogger(__name__)
|
|
16
16
|
logger.setLevel(logging.WARNING)
|
17
17
|
|
18
18
|
|
19
|
+
class InlinedInput:
|
20
|
+
def __init__(self, data) -> None:
|
21
|
+
self.data = data
|
22
|
+
|
23
|
+
|
19
24
|
class TorchFXPythonDecoder (Decoder):
|
20
25
|
|
21
26
|
def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, input_shapes=[], input_types=[]):
|
@@ -59,7 +64,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
59
64
|
for arg in uargs if arg[1] is not None]
|
60
65
|
for idx, shape in enumerate(found_shapes):
|
61
66
|
if shape is not None:
|
62
|
-
new_shape=[]
|
67
|
+
new_shape = []
|
63
68
|
for dim in range(0, len(shape)):
|
64
69
|
if (type(shape[dim]).__name__ == "SymInt"):
|
65
70
|
new_shape.append(-1)
|
@@ -81,7 +86,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
81
86
|
|
82
87
|
# None in inputs mean the input is inlined or None (also considered inlined)
|
83
88
|
self._inputs = [self._nodes.index(
|
84
|
-
arg) if arg in self._nodes else (arg
|
89
|
+
arg) if arg in self._nodes else InlinedInput(arg) for arg in pt_module.args]
|
85
90
|
|
86
91
|
# FIXME: Find a better way to pass nested tuples to OV frontend. This is a temporary solution to flatten arguments.
|
87
92
|
new_inputs = []
|
@@ -92,22 +97,22 @@ class TorchFXPythonDecoder (Decoder):
|
|
92
97
|
if arg in self._nodes:
|
93
98
|
new_inputs.append(self._nodes.index(arg))
|
94
99
|
else:
|
95
|
-
new_inputs.append((arg
|
100
|
+
new_inputs.append(InlinedInput(arg))
|
96
101
|
self.input_types.append(OVAny(DecoderType.List(
|
97
102
|
TorchFXPythonDecoder.get_type_for_value(arg))))
|
98
103
|
else:
|
99
104
|
v = self._inputs[i]
|
100
105
|
new_inputs.append(v)
|
101
106
|
self.input_types.append(
|
102
|
-
TorchFXPythonDecoder.get_type_for_value(v
|
107
|
+
TorchFXPythonDecoder.get_type_for_value(v.data if isinstance(v, InlinedInput) else self._nodes[v]))
|
103
108
|
self._inputs = new_inputs
|
104
109
|
|
105
110
|
def inputs(self):
|
106
111
|
# Consider 0 a special case which may mean the input is inlined, but not guaranteed
|
107
|
-
return [x if not isinstance(x,
|
112
|
+
return [x if not isinstance(x, InlinedInput) else 0 for x in self._inputs]
|
108
113
|
|
109
114
|
def is_input_inlined(self, index):
|
110
|
-
return isinstance(self._inputs[index],
|
115
|
+
return isinstance(self._inputs[index], InlinedInput)
|
111
116
|
|
112
117
|
@staticmethod
|
113
118
|
def unpack_containers(arg):
|
@@ -142,19 +147,24 @@ class TorchFXPythonDecoder (Decoder):
|
|
142
147
|
return make_constant(OVType.i64, Shape([]), [arg])
|
143
148
|
elif isinstance(arg, float):
|
144
149
|
return make_constant(OVType.f32, Shape([]), [arg])
|
150
|
+
elif isinstance(arg, str):
|
151
|
+
u8_tensor = torch.frombuffer(str.encode(arg), dtype=torch.uint8)
|
152
|
+
return torch_tensor_to_ov_const(u8_tensor, shared_memory=True)
|
145
153
|
return None
|
146
154
|
|
147
155
|
def inlined_input(self, index):
|
148
156
|
assert index < len(self._inputs), "Requested input doesn't exist"
|
149
157
|
assert isinstance(
|
150
|
-
self._inputs[index],
|
151
|
-
|
158
|
+
self._inputs[index], InlinedInput), "Requested input which is not inlined"
|
159
|
+
arg = self._inputs[index].data
|
160
|
+
assert arg is not None, f"Requested None inlined input for op {self.get_op_type()}"
|
152
161
|
constant = None
|
153
|
-
arg = self._inputs[index][0]
|
154
162
|
constant = self.arg_to_constant(arg)
|
155
163
|
|
156
|
-
|
157
|
-
|
164
|
+
if constant is not None:
|
165
|
+
return constant.outputs()
|
166
|
+
else:
|
167
|
+
return []
|
158
168
|
|
159
169
|
def input(self, index): # TODO: remove
|
160
170
|
return self.inputs()[index] # TODO: find specialized method
|
@@ -257,9 +267,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
257
267
|
raise RuntimeError("This input is not a Node")
|
258
268
|
|
259
269
|
def get_subgraph_size(self):
|
260
|
-
|
261
|
-
return 0
|
262
|
-
return len(self.get_subgraphs()) if hasattr(self.pt_module, 'blocks') else 1
|
270
|
+
return len(self.get_subgraphs())
|
263
271
|
|
264
272
|
def decoder_type_name(self) -> str:
|
265
273
|
return "fx"
|
@@ -277,9 +285,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
277
285
|
node_visitor(decoder)
|
278
286
|
|
279
287
|
def get_subgraphs(self):
|
280
|
-
|
281
|
-
return []
|
282
|
-
return list(self.pt_module.blocks())
|
288
|
+
return []
|
283
289
|
|
284
290
|
def get_subgraph_decoder(self, index):
|
285
291
|
decoder = TorchFXPythonDecoder(self.get_subgraphs()[index],
|
@@ -309,7 +315,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
309
315
|
return self._raw_outputs()[index]
|
310
316
|
|
311
317
|
def _raw_inputs(self):
|
312
|
-
return [self._nodes[x] if not isinstance(x,
|
318
|
+
return [self._nodes[x] if not isinstance(x, InlinedInput) and x < len(self._nodes) else x.data for x in self._inputs]
|
313
319
|
|
314
320
|
def _raw_input(self, index):
|
315
321
|
return self._raw_inputs()[index]
|
@@ -347,7 +353,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
347
353
|
return None
|
348
354
|
|
349
355
|
def input_is_none(self, index):
|
350
|
-
if index >= len(self._inputs) or (isinstance(self._inputs[index],
|
356
|
+
if index >= len(self._inputs) or (isinstance(self._inputs[index], InlinedInput) and self._inputs[index].data is None):
|
351
357
|
return True
|
352
358
|
else:
|
353
359
|
r_input = self._raw_input(index)
|
@@ -358,3 +364,7 @@ class TorchFXPythonDecoder (Decoder):
|
|
358
364
|
|
359
365
|
def may_produce_alias(self, in_index: int, out_index: int) -> bool:
|
360
366
|
return False
|
367
|
+
|
368
|
+
def get_rt_info(self):
|
369
|
+
rt_info = {}
|
370
|
+
return rt_info
|
@@ -43,8 +43,6 @@ def patched_forward(self, *args, **kwargs):
|
|
43
43
|
unpacked_zp = decompression_pattern(
|
44
44
|
self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1)
|
45
45
|
|
46
|
-
unpacked_zp = unpacked_zp.to(dtype) + 1
|
47
|
-
|
48
46
|
unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales
|
49
47
|
unpacked_weights = unpacked_weights.view(-1, self.width)
|
50
48
|
|
@@ -59,11 +57,50 @@ def patched_forward(self, *args, **kwargs):
|
|
59
57
|
return out
|
60
58
|
|
61
59
|
|
60
|
+
def patched_forward_sym(self, *args, **kwargs):
|
61
|
+
if hasattr(self, '_hf_hook'):
|
62
|
+
args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs)
|
63
|
+
|
64
|
+
x = args[0]
|
65
|
+
dtype = x.dtype
|
66
|
+
outshape = x.shape[:-1] + (self.width,)
|
67
|
+
x = x.contiguous().view(-1, x.shape[-1])
|
68
|
+
height = self.qweight.shape[0]
|
69
|
+
|
70
|
+
unpacked_weights = decompression_pattern(
|
71
|
+
self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8)
|
72
|
+
unpacked_weights = torch.transpose(
|
73
|
+
unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width)
|
74
|
+
|
75
|
+
# all zp is 8 for symmetrical, will repack to i4 in pt fe transformation
|
76
|
+
unpacked_weights = unpacked_weights.to(dtype) * self.scales
|
77
|
+
unpacked_weights = unpacked_weights.view(-1, self.width)
|
78
|
+
|
79
|
+
out = x @ unpacked_weights
|
80
|
+
|
81
|
+
out = out.view(outshape)
|
82
|
+
if self.bias is not None:
|
83
|
+
out.add_(self.bias)
|
84
|
+
|
85
|
+
if hasattr(self, '_hf_hook'):
|
86
|
+
out = self._hf_hook.post_forward(self, out)
|
87
|
+
return out
|
88
|
+
|
89
|
+
|
62
90
|
# All the following AutoGPTQ's quant types are supposed to have the same weights packing schema
|
63
91
|
supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old']
|
64
92
|
|
65
93
|
|
66
94
|
def patch_model(model):
|
95
|
+
is_symmetrical = False
|
96
|
+
config = None
|
97
|
+
if hasattr(model, "config"):
|
98
|
+
config = model.config
|
99
|
+
elif hasattr(model, "model") and hasattr(model.model, "config"):
|
100
|
+
# original model was wrapped
|
101
|
+
config = model.model.config
|
102
|
+
if config is not None and hasattr(config, 'quantization_config') and hasattr(config.quantization_config, 'sym'):
|
103
|
+
is_symmetrical = config.quantization_config.sym
|
67
104
|
for name, m in model.named_modules():
|
68
105
|
if hasattr(m, '_openvino_patch_orig_forward'):
|
69
106
|
# already patched, skipping
|
@@ -87,7 +124,10 @@ def patch_model(model):
|
|
87
124
|
assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups
|
88
125
|
|
89
126
|
m._openvino_patch_orig_forward = m.forward
|
90
|
-
|
127
|
+
if is_symmetrical:
|
128
|
+
m.forward = partial(patched_forward_sym, m)
|
129
|
+
else:
|
130
|
+
m.forward = partial(patched_forward, m)
|
91
131
|
|
92
132
|
# Keep original field properties to be used when model is returned back to its original state
|
93
133
|
m._openvino_patch_orig_qweights_type = m.qweight.dtype
|
@@ -97,11 +137,12 @@ def patch_model(model):
|
|
97
137
|
m.qweight = m.qweight.view(dtype=torch.uint8)
|
98
138
|
m.qzeros = m.qzeros.view(dtype=torch.uint8)
|
99
139
|
|
100
|
-
# TODO: Redundant tensor copy? Try to remove m.
|
140
|
+
# TODO: Redundant tensor copy? Try to remove m.qweight and m.qzeros after keeping modified values as submodules
|
101
141
|
m.add_module(
|
102
142
|
'_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight))
|
143
|
+
# Adding 17 to move zp+1 step from after unpacking to before to have correct decompression pattern. Can it overflow?
|
103
144
|
m.add_module('_openvino_u4_compression_submodule_qzeros',
|
104
|
-
KeepWeight(m.qzeros))
|
145
|
+
KeepWeight(m.qzeros + torch.tensor(17, dtype=torch.uint8)))
|
105
146
|
|
106
147
|
m.scales = m.scales.view(-1, 1, m.width)
|
107
148
|
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -13,6 +13,7 @@ import torch
|
|
13
13
|
from torch._dynamo.backends.common import fake_tensor_unsupported, aot_autograd
|
14
14
|
from torch._dynamo.backends.registry import register_backend
|
15
15
|
from torch._inductor.compile_fx import compile_fx
|
16
|
+
from torch._inductor.freezing import replace_params_with_constants
|
16
17
|
from torch.fx.experimental.proxy_tensor import make_fx
|
17
18
|
from torch._decomp import decomposition_table, get_decompositions
|
18
19
|
|
@@ -54,10 +55,9 @@ def openvino(subgraph, example_inputs, options=None):
|
|
54
55
|
if (_get_aot_autograd(options)):
|
55
56
|
global openvino_options
|
56
57
|
openvino_options = options
|
57
|
-
decompositions = _get_decompositions(options) + get_inf_decomposition_list()
|
58
|
-
|
59
|
-
|
60
|
-
bw_compiler=fx_openvino,
|
58
|
+
decompositions = _get_decompositions(options) + get_inf_decomposition_list() + get_aot_decomposition_list()
|
59
|
+
return aot_autograd(fw_compiler=fx_openvino,
|
60
|
+
bw_compiler=fx_openvino,
|
61
61
|
decompositions=get_decompositions(decompositions))(subgraph, example_inputs)
|
62
62
|
return fx_openvino(subgraph, example_inputs, options)
|
63
63
|
|
@@ -86,7 +86,14 @@ def fx_openvino(subgraph, example_inputs, options=None):
|
|
86
86
|
if inputs_reversed:
|
87
87
|
example_inputs.reverse()
|
88
88
|
|
89
|
+
preserved_arg_indices = []
|
89
90
|
if (_get_aot_autograd(options)):
|
91
|
+
if tracing_context := torch._guards.TracingContext.try_get():
|
92
|
+
fw_metadata = tracing_context.fw_metadata
|
93
|
+
params_flat = tracing_context.params_flat
|
94
|
+
assert fw_metadata is not None and params_flat is not None
|
95
|
+
preserved_arg_indices = replace_params_with_constants(subgraph, params_flat, fw_metadata)
|
96
|
+
example_inputs = [example_inputs[ind] for ind in preserved_arg_indices]
|
90
97
|
model = subgraph
|
91
98
|
else:
|
92
99
|
from torch._subclasses.fake_tensor import FakeTensorMode
|
@@ -96,7 +103,6 @@ def fx_openvino(subgraph, example_inputs, options=None):
|
|
96
103
|
|
97
104
|
with torch.no_grad():
|
98
105
|
model.eval()
|
99
|
-
|
100
106
|
partitioner = Partitioner(options)
|
101
107
|
compiled_model = partitioner.make_partitions(model, options)
|
102
108
|
|
@@ -107,9 +113,15 @@ def fx_openvino(subgraph, example_inputs, options=None):
|
|
107
113
|
executor_parameters["model_hash_str"] += "_fs"
|
108
114
|
|
109
115
|
def _call(*args):
|
116
|
+
if(_get_aot_autograd(options)):
|
117
|
+
args_list = args[0]
|
118
|
+
args_new = [args_list[i] for i in preserved_arg_indices]
|
119
|
+
args = args_new
|
110
120
|
res = execute(compiled_model, *args, executor="openvino",
|
111
121
|
executor_parameters=executor_parameters, options=options)
|
112
122
|
return res
|
123
|
+
if(_get_aot_autograd(options)):
|
124
|
+
_call._boxed_call = True # type: ignore[attr-defined]
|
113
125
|
return _call
|
114
126
|
except Exception as e:
|
115
127
|
logger.debug(f"Failed in OpenVINO execution: {e}")
|
@@ -25,6 +25,13 @@ logger = logging.getLogger(__name__)
|
|
25
25
|
logger.setLevel(logging.WARNING)
|
26
26
|
|
27
27
|
|
28
|
+
class PatternNode:
|
29
|
+
op_types = {}
|
30
|
+
|
31
|
+
def __init__(self):
|
32
|
+
self.op_types = {}
|
33
|
+
|
34
|
+
|
28
35
|
class Partitioner:
|
29
36
|
def __init__(self, options):
|
30
37
|
self.supported_ops = OperatorSupport(options)
|
@@ -56,55 +63,56 @@ class Partitioner:
|
|
56
63
|
return True
|
57
64
|
return False
|
58
65
|
|
59
|
-
def
|
66
|
+
def check_pattern(self, node: torch.fx.Node, pattern: PatternNode, enabled_ops: list) -> bool:
|
67
|
+
if node.op == "call_function":
|
68
|
+
if ("call_function" + ":" + str(node.target)) in pattern.op_types:
|
69
|
+
pt_input_nodes = node.all_input_nodes
|
70
|
+
pattern_input_ops = pattern.op_types["call_function" + ":" + str(node.target)]
|
71
|
+
if pattern_input_ops is None:
|
72
|
+
enabled_ops.append(node)
|
73
|
+
return True
|
74
|
+
if len(pt_input_nodes) != len(pattern_input_ops):
|
75
|
+
return False
|
76
|
+
for i in range(len(pt_input_nodes)):
|
77
|
+
if not self.check_pattern(pt_input_nodes[i], pattern_input_ops[i], enabled_ops):
|
78
|
+
return False
|
79
|
+
enabled_ops.append(node)
|
80
|
+
return True
|
81
|
+
elif node.op == "get_attr":
|
82
|
+
if "get_attr" in pattern.op_types:
|
83
|
+
return True
|
84
|
+
else:
|
85
|
+
return False
|
86
|
+
return False
|
87
|
+
|
88
|
+
def capture_gptq_patterns(self, graph_module: GraphModule):
|
89
|
+
const_0_node = PatternNode
|
90
|
+
const_0_node.op_types["get_attr"] = None
|
91
|
+
unsqueeze_0_node = PatternNode
|
92
|
+
unsqueeze_0_node.op_types["call_function:aten.unsqueeze.default"] = [const_0_node,]
|
93
|
+
expand_node = PatternNode
|
94
|
+
expand_node.op_types["call_function:aten.expand.default"] = [unsqueeze_0_node,]
|
95
|
+
const_1_node = PatternNode
|
96
|
+
const_1_node.op_types["get_attr"] = None
|
97
|
+
unsqueeze_1_node = PatternNode
|
98
|
+
unsqueeze_1_node.op_types["call_function:aten.unsqueeze.default"] = [const_1_node,]
|
99
|
+
bitwise_right_shift_node = PatternNode
|
100
|
+
bitwise_right_shift_node.op_types["call_function:aten.bitwise_right_shift.Tensor"] = [expand_node, unsqueeze_1_node]
|
101
|
+
to_copy_node = PatternNode
|
102
|
+
to_copy_node.op_types["call_function:aten._to_copy.default"] = [bitwise_right_shift_node,]
|
103
|
+
add_or_to_copy_node = PatternNode
|
104
|
+
add_or_to_copy_node.op_types["call_function:aten._to_copy.default"] = [bitwise_right_shift_node,]
|
105
|
+
add_or_to_copy_node.op_types["call_function:aten.add.Tensor"] = [to_copy_node,]
|
106
|
+
bitwise_and_node = PatternNode
|
107
|
+
bitwise_and_node.op_types["call_function:aten.bitwise_and.Scalar"] = [add_or_to_copy_node,]
|
108
|
+
|
60
109
|
for node in graph_module.graph.nodes:
|
61
110
|
if str(node.op) == "call_function" and str(node.target) == "aten.bitwise_and.Scalar":
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
continue
|
68
|
-
to_copy_in_nodes = to_copy_node.all_input_nodes
|
69
|
-
if len(to_copy_in_nodes) != 1:
|
70
|
-
continue
|
71
|
-
bitwise_right_shift_node = to_copy_in_nodes[0]
|
72
|
-
if str(bitwise_right_shift_node.op) != "call_function" or str(bitwise_right_shift_node.target) != "aten.bitwise_right_shift.Tensor":
|
73
|
-
continue
|
74
|
-
bitwise_right_shift_in_nodes = bitwise_right_shift_node.all_input_nodes
|
75
|
-
if len(bitwise_right_shift_in_nodes) != 2:
|
76
|
-
continue
|
77
|
-
expand_node = bitwise_right_shift_in_nodes[0]
|
78
|
-
if str(expand_node.op) != "call_function" or str(expand_node.target) != "aten.expand.default":
|
79
|
-
continue
|
80
|
-
expand_in_nodes = expand_node.all_input_nodes
|
81
|
-
if len(expand_in_nodes) != 1:
|
82
|
-
continue
|
83
|
-
unsqueeze_0_node = expand_in_nodes[0]
|
84
|
-
if str(unsqueeze_0_node.op) != "call_function" or str(unsqueeze_0_node.target) != "aten.unsqueeze.default":
|
85
|
-
continue
|
86
|
-
unsqueeze_0_in_nodes = unsqueeze_0_node.all_input_nodes
|
87
|
-
if len(unsqueeze_0_in_nodes) != 1:
|
88
|
-
continue
|
89
|
-
const_0_node = unsqueeze_0_in_nodes[0]
|
90
|
-
if str(const_0_node.op) != "get_attr":
|
91
|
-
continue
|
92
|
-
unsqueeze_1_node = bitwise_right_shift_in_nodes[1]
|
93
|
-
if str(unsqueeze_1_node.op) != "call_function" or str(unsqueeze_1_node.target) != "aten.unsqueeze.default":
|
94
|
-
continue
|
95
|
-
unsqueeze_1_in_nodes = unsqueeze_1_node.all_input_nodes
|
96
|
-
if len(unsqueeze_1_in_nodes) != 1:
|
97
|
-
continue
|
98
|
-
const_1_node = unsqueeze_1_in_nodes[0]
|
99
|
-
if str(const_1_node.op) != "get_attr":
|
100
|
-
continue
|
101
|
-
|
102
|
-
self.supported_ops.enable_by_name(node)
|
103
|
-
self.supported_ops.enable_by_name(to_copy_node)
|
104
|
-
self.supported_ops.enable_by_name(bitwise_right_shift_node)
|
105
|
-
self.supported_ops.enable_by_name(expand_node)
|
106
|
-
self.supported_ops.enable_by_name(unsqueeze_0_node)
|
107
|
-
self.supported_ops.enable_by_name(unsqueeze_1_node)
|
111
|
+
enabled_ops = []
|
112
|
+
pattern_match = self.check_pattern(node, bitwise_and_node, enabled_ops)
|
113
|
+
if pattern_match:
|
114
|
+
for pattern_op in enabled_ops:
|
115
|
+
self.supported_ops.enable_by_name(pattern_op)
|
108
116
|
|
109
117
|
def make_partitions(self, graph_module: GraphModule, options) -> GraphModule:
|
110
118
|
allow_single_node_partition = _is_testing(options)
|