bigdl-core-npu 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. bigdl-core-npu/__init__.py +0 -0
  2. bigdl-core-npu/common.lib +0 -0
  3. bigdl-core-npu/ggml.dll +0 -0
  4. bigdl-core-npu/ggml.lib +0 -0
  5. bigdl-core-npu/include/llamacpp/arg.h +77 -0
  6. bigdl-core-npu/include/llamacpp/common.h +563 -0
  7. bigdl-core-npu/include/llamacpp/ggml-alloc.h +76 -0
  8. bigdl-core-npu/include/llamacpp/ggml-backend.h +241 -0
  9. bigdl-core-npu/include/llamacpp/ggml.h +2679 -0
  10. bigdl-core-npu/include/llamacpp/llama.h +1234 -0
  11. bigdl-core-npu/include/llamacpp/log.h +92 -0
  12. bigdl-core-npu/include/npu/npu_common.h +119 -0
  13. bigdl-core-npu/include/npu/npu_llm.h +77 -0
  14. bigdl-core-npu/llama-cli-npu.exe +0 -0
  15. bigdl-core-npu/llama.dll +0 -0
  16. bigdl-core-npu/llama.lib +0 -0
  17. bigdl-core-npu/llm-cli.exe +0 -0
  18. bigdl-core-npu/npu_llm.dll +0 -0
  19. bigdl-core-npu/npu_llm.lib +0 -0
  20. bigdl-core-npu/zlib1.dll +0 -0
  21. bigdl_core_npu-2.6.0.data/scripts/init-llama-cpp.bat +29 -0
  22. {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/METADATA +12 -3
  23. {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/RECORD +146 -96
  24. {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/WHEEL +1 -1
  25. {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/top_level.txt +1 -0
  26. intel_npu_acceleration_library/_version.py +1 -1
  27. intel_npu_acceleration_library/backend/base.py +39 -4
  28. intel_npu_acceleration_library/backend/bindings.py +109 -5
  29. intel_npu_acceleration_library/backend/factory.py +264 -47
  30. intel_npu_acceleration_library/backend/ops.py +2 -1
  31. intel_npu_acceleration_library/backend/qlinear.py +8 -4
  32. intel_npu_acceleration_library/backend/runtime.py +7 -2
  33. intel_npu_acceleration_library/backend/tensor.py +73 -3
  34. intel_npu_acceleration_library/bigdl-core-npu/cache.json +113732 -0
  35. intel_npu_acceleration_library/bigdl-core-npu/openvino.dll +0 -0
  36. intel_npu_acceleration_library/bigdl-core-npu/openvino_auto_batch_plugin.dll +0 -0
  37. intel_npu_acceleration_library/bigdl-core-npu/openvino_auto_plugin.dll +0 -0
  38. intel_npu_acceleration_library/bigdl-core-npu/openvino_c.dll +0 -0
  39. intel_npu_acceleration_library/bigdl-core-npu/openvino_hetero_plugin.dll +0 -0
  40. intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_cpu_plugin.dll +0 -0
  41. intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_gpu_plugin.dll +0 -0
  42. intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_npu_plugin.dll +0 -0
  43. intel_npu_acceleration_library/bigdl-core-npu/openvino_ir_frontend.dll +0 -0
  44. intel_npu_acceleration_library/bigdl-core-npu/openvino_onnx_frontend.dll +0 -0
  45. intel_npu_acceleration_library/bigdl-core-npu/openvino_paddle_frontend.dll +0 -0
  46. intel_npu_acceleration_library/bigdl-core-npu/openvino_pytorch_frontend.dll +0 -0
  47. intel_npu_acceleration_library/bigdl-core-npu/openvino_tensorflow_frontend.dll +0 -0
  48. intel_npu_acceleration_library/bigdl-core-npu/openvino_tensorflow_lite_frontend.dll +0 -0
  49. intel_npu_acceleration_library/bigdl-core-npu/tbb12.dll +0 -0
  50. intel_npu_acceleration_library/bigdl-core-npu/tbb12_debug.dll +0 -0
  51. intel_npu_acceleration_library/bigdl-core-npu/tbbbind_2_5.dll +0 -0
  52. intel_npu_acceleration_library/bigdl-core-npu/tbbbind_2_5_debug.dll +0 -0
  53. intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc.dll +0 -0
  54. intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_debug.dll +0 -0
  55. intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_proxy.dll +0 -0
  56. intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_proxy_debug.dll +0 -0
  57. intel_npu_acceleration_library/device.py +2 -2
  58. intel_npu_acceleration_library/dtypes.py +34 -1
  59. intel_npu_acceleration_library/external/openvino/__init__.py +1 -0
  60. intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +1 -0
  61. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
  62. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
  63. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
  64. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
  65. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
  66. intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
  67. intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
  68. intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +293 -0
  69. intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
  70. intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +182 -0
  71. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
  72. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
  73. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
  74. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
  75. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
  76. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
  77. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
  78. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
  79. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
  80. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
  81. intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +37 -19
  82. intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +47 -6
  83. intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +28 -8
  84. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
  85. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
  86. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
  87. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
  88. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
  89. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +17 -5
  90. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +1 -0
  91. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +55 -47
  92. intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +95 -63
  93. intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +12 -10
  94. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
  95. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
  96. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
  97. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
  98. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
  99. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +31 -10
  100. intel_npu_acceleration_library/external/openvino/helpers/packing.py +4 -4
  101. intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +2 -0
  102. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +1 -0
  103. intel_npu_acceleration_library/external/openvino/properties/__init__.py +1 -0
  104. intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +1 -1
  105. intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +1 -0
  106. intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +2 -1
  107. intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +5 -6
  108. intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +7 -0
  109. intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +193 -2
  110. intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +69 -43
  111. intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +4 -0
  112. intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +2 -0
  113. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +21 -3
  114. intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +88 -2
  115. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +9 -9
  116. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +16 -2
  117. intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +5 -0
  118. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
  119. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +68 -16
  120. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +69 -60
  121. intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +90 -3
  122. intel_npu_acceleration_library/external/openvino/utils.py +17 -0
  123. intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
  124. intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
  125. intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
  126. intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
  127. intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
  128. intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
  129. intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
  130. intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
  131. intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
  132. intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
  133. intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
  134. intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
  135. intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
  136. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
  137. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
  138. intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
  139. intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
  140. intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
  141. intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
  142. intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
  143. intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
  144. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
  145. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
  146. intel_npu_acceleration_library/nn/module.py +17 -17
@@ -0,0 +1,182 @@
1
+ # Copyright (C) 2018-2024 Intel Corporation
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # flake8: noqa
5
+ # mypy: ignore-errors
6
+
7
+ import jax
8
+ import jax.numpy as jnp
9
+ import numpy as np
10
+ from openvino.frontend.jax.passes import filter_element, filter_ivalue, filter_param
11
+ from openvino.runtime import op, Type as OVType, Shape, OVAny
12
+
13
+ numpy_to_ov_type_map = {
14
+ np.float32: OVType.f32,
15
+ bool: OVType.boolean,
16
+ jax.dtypes.bfloat16: OVType.bf16, # TODO: check this
17
+ np.float16: OVType.f16,
18
+ np.float32: OVType.f32,
19
+ np.float64: OVType.f64,
20
+ np.uint8: OVType.u8,
21
+ np.int8: OVType.i8,
22
+ np.uint16: OVType.u16,
23
+ np.int16: OVType.i16,
24
+ np.uint32: OVType.u32,
25
+ np.int32: OVType.i32,
26
+ np.uint64: OVType.u64,
27
+ np.int64: OVType.i64,
28
+ }
29
+
30
+ jax_to_ov_type_map = {
31
+ jnp.float32: OVType.f32,
32
+ jnp.bfloat16: OVType.bf16, # TODO: check this
33
+ jnp.float16: OVType.f16,
34
+ jnp.float64: OVType.f64,
35
+ jnp.uint8: OVType.u8,
36
+ jnp.int8: OVType.i8,
37
+ jnp.uint16: OVType.u16,
38
+ jnp.int16: OVType.i16,
39
+ jnp.uint32: OVType.u32,
40
+ jnp.int32: OVType.i32,
41
+ jnp.uint64: OVType.u64,
42
+ jnp.int64: OVType.i64,
43
+ }
44
+
45
+ try:
46
+ jax_to_ov_type_map[jnp.bool] = OVType.boolean
47
+ except:
48
+ pass
49
+
50
+ basic_to_ov_type_map = {
51
+ int: OVType.i64,
52
+ float: OVType.f32,
53
+ bool: OVType.boolean,
54
+ }
55
+
56
+ ov_type_to_int_map = {
57
+ OVType.u8: 0,
58
+ OVType.i8: 1,
59
+ OVType.i16: 2,
60
+ OVType.i32: 3,
61
+ OVType.i64: 4,
62
+ OVType.f16: 5,
63
+ OVType.f32: 6,
64
+ OVType.f64: 7,
65
+ OVType.u16: 8,
66
+ OVType.u32: 9,
67
+ OVType.u64: 10,
68
+ OVType.boolean: 11,
69
+ OVType.bf16: 15,
70
+ }
71
+
72
+
73
+ def get_type_from_py_type(value):
74
+ if isinstance(value, float):
75
+ return OVType.f32
76
+ if isinstance(value, bool):
77
+ return OVType.boolean
78
+ if isinstance(value, int):
79
+ return OVType.i64
80
+ return OVType.dynamic
81
+
82
+
83
+ def get_type_from_np_type(value):
84
+ for np_dtype, ov_type in numpy_to_ov_type_map.items():
85
+ if isinstance(value, np_dtype):
86
+ return ov_type
87
+ return None
88
+
89
+
90
+ def _get_ov_type_from_value(value):
91
+ ov_type = get_type_from_np_type(value)
92
+ if ov_type is None:
93
+ ov_type = get_type_from_py_type(value)
94
+ return ov_type
95
+
96
+
97
+ def get_ov_type_for_value(value):
98
+ if isinstance(value, (jax.core.Var, jax.core.Literal)):
99
+ if value.aval.dtype in jax_to_ov_type_map:
100
+ return OVAny(jax_to_ov_type_map[value.aval.dtype])
101
+ for k, v in numpy_to_ov_type_map.items():
102
+ if value.aval.dtype == k:
103
+ return OVAny(v)
104
+ for k, v in basic_to_ov_type_map.items():
105
+ if isinstance(value.aval.dtype, k):
106
+ return OVAny(v)
107
+ elif isinstance(value, (int, float, bool)):
108
+ return OVAny(jax_to_ov_type_map[type(value)])
109
+ else:
110
+ raise NotImplementedError(f"dtype for {value} of type {type(value)} has not been supported yet.")
111
+
112
+
113
+ def get_ov_type_from_jax_type(dtype):
114
+ if dtype in jax_to_ov_type_map:
115
+ return OVAny(jax_to_ov_type_map[dtype])
116
+ for k, v in numpy_to_ov_type_map.items():
117
+ if dtype == k:
118
+ return OVAny(v)
119
+ for k, v in basic_to_ov_type_map.items():
120
+ if isinstance(dtype, k):
121
+ return OVAny(v)
122
+ return None
123
+
124
+
125
+ def jax_array_to_ov_const(arr: np.ndarray, shared_memory=True):
126
+ # TODO: deal with bfloat16 dtype here.
127
+ if isinstance(arr, np.ndarray):
128
+ return op.Constant(arr, shared_memory=shared_memory)
129
+ elif isinstance(arr, jax.Array):
130
+ return op.Constant(np.array(jax.device_get(arr)), shared_memory=shared_memory)
131
+ else:
132
+ raise ValueError(f"Constant is expected to be a numpy array or jax array but got {type(arr)}")
133
+
134
+
135
+ def ivalue_to_constant(ivalue, shared_memory=True):
136
+ '''
137
+ Convert a python object to an openvino constant.
138
+ '''
139
+ # print('ivalue = ', ivalue)
140
+ ivalue = filter_ivalue(ivalue)
141
+ ov_type = _get_ov_type_from_value(ivalue)
142
+ if ov_type.is_static():
143
+ return op.Constant(ov_type, Shape([]), [ivalue]).outputs()
144
+ if isinstance(ivalue, (list, tuple)):
145
+ assert len(ivalue) > 0, "Can't deduce type for empty list"
146
+ if isinstance(ivalue[0], (list, tuple)):
147
+ second_len = len(ivalue[0])
148
+ flattened_ivalue = []
149
+ for value in ivalue:
150
+ assert isinstance(value, (list, tuple)), "Can't deduce type for a list with both list and basic types."
151
+ assert len(value) == second_len or len(value) == 0, "Can't deduce type for nested list with different lengths."
152
+ flattened_ivalue.extend([filter_element(item) for item in value])
153
+ flattened_ivalue = [item for sublist in ivalue for item in sublist]
154
+ ov_type = _get_ov_type_from_value(flattened_ivalue[0])
155
+ assert ov_type.is_static(), f"Can't deduce type {flattened_ivalue[0].__class__} for list"
156
+ return op.Constant(ov_type, Shape([len(ivalue), second_len]), flattened_ivalue).outputs()
157
+ ivalue = [filter_element(item) for item in ivalue]
158
+ ov_type = _get_ov_type_from_value(ivalue[0])
159
+ try:
160
+ assert ov_type.is_static(), f"Can't deduce type {ivalue[0].__class__} for list"
161
+ except:
162
+ # TODO 150596: remove this workaround
163
+ ivalue = [0]
164
+ ov_type = OVType.f32
165
+ return op.Constant(ov_type, Shape([len(ivalue)]), ivalue).outputs()
166
+
167
+ if isinstance(ivalue, (jax.Array, np.ndarray)):
168
+ return jax_array_to_ov_const(ivalue, shared_memory=shared_memory).outputs()
169
+
170
+ ov_dtype_value = get_ov_type_from_jax_type(ivalue)
171
+ if ov_dtype_value is not None:
172
+ return op.Constant(OVType.i64, Shape([]), [ov_type_to_int_map[ov_dtype_value]]).outputs()
173
+
174
+ return None
175
+
176
+
177
+ def param_to_constants(primitive: str, param_name: str, jaxpr, shared_memory=True):
178
+ processed_params = filter_param(primitive, param_name, jaxpr)
179
+
180
+ for k, v in processed_params.items():
181
+ processed_params[k] = ivalue_to_constant(v, shared_memory=shared_memory)
182
+ return processed_params
@@ -16,6 +16,11 @@ logger = logging.getLogger(__name__)
16
16
  logger.setLevel(logging.WARNING)
17
17
 
18
18
 
19
+ class InlinedInput:
20
+ def __init__(self, data) -> None:
21
+ self.data = data
22
+
23
+
19
24
  class TorchFXPythonDecoder (Decoder):
20
25
 
21
26
  def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, input_shapes=[], input_types=[]):
@@ -30,6 +35,7 @@ class TorchFXPythonDecoder (Decoder):
30
35
  self.input_shapes = input_shapes
31
36
 
32
37
  self._input_signature = []
38
+ self._example_input = None
33
39
 
34
40
  if issubclass(type(pt_module), torch.fx.graph_module.GraphModule):
35
41
 
@@ -58,7 +64,7 @@ class TorchFXPythonDecoder (Decoder):
58
64
  for arg in uargs if arg[1] is not None]
59
65
  for idx, shape in enumerate(found_shapes):
60
66
  if shape is not None:
61
- new_shape=[]
67
+ new_shape = []
62
68
  for dim in range(0, len(shape)):
63
69
  if (type(shape[dim]).__name__ == "SymInt"):
64
70
  new_shape.append(-1)
@@ -80,7 +86,7 @@ class TorchFXPythonDecoder (Decoder):
80
86
 
81
87
  # None in inputs mean the input is inlined or None (also considered inlined)
82
88
  self._inputs = [self._nodes.index(
83
- arg) if arg in self._nodes else (arg,) for arg in pt_module.args]
89
+ arg) if arg in self._nodes else InlinedInput(arg) for arg in pt_module.args]
84
90
 
85
91
  # FIXME: Find a better way to pass nested tuples to OV frontend. This is a temporary solution to flatten arguments.
86
92
  new_inputs = []
@@ -91,22 +97,22 @@ class TorchFXPythonDecoder (Decoder):
91
97
  if arg in self._nodes:
92
98
  new_inputs.append(self._nodes.index(arg))
93
99
  else:
94
- new_inputs.append((arg,))
100
+ new_inputs.append(InlinedInput(arg))
95
101
  self.input_types.append(OVAny(DecoderType.List(
96
102
  TorchFXPythonDecoder.get_type_for_value(arg))))
97
103
  else:
98
104
  v = self._inputs[i]
99
105
  new_inputs.append(v)
100
106
  self.input_types.append(
101
- TorchFXPythonDecoder.get_type_for_value(v[0] if isinstance(v, tuple) else self._nodes[v]))
107
+ TorchFXPythonDecoder.get_type_for_value(v.data if isinstance(v, InlinedInput) else self._nodes[v]))
102
108
  self._inputs = new_inputs
103
109
 
104
110
  def inputs(self):
105
111
  # Consider 0 a special case which may mean the input is inlined, but not guaranteed
106
- return [x if not isinstance(x, tuple) else 0 for x in self._inputs]
112
+ return [x if not isinstance(x, InlinedInput) else 0 for x in self._inputs]
107
113
 
108
114
  def is_input_inlined(self, index):
109
- return isinstance(self._inputs[index], tuple)
115
+ return isinstance(self._inputs[index], InlinedInput)
110
116
 
111
117
  @staticmethod
112
118
  def unpack_containers(arg):
@@ -141,19 +147,24 @@ class TorchFXPythonDecoder (Decoder):
141
147
  return make_constant(OVType.i64, Shape([]), [arg])
142
148
  elif isinstance(arg, float):
143
149
  return make_constant(OVType.f32, Shape([]), [arg])
150
+ elif isinstance(arg, str):
151
+ u8_tensor = torch.frombuffer(str.encode(arg), dtype=torch.uint8)
152
+ return torch_tensor_to_ov_const(u8_tensor, shared_memory=True)
144
153
  return None
145
154
 
146
155
  def inlined_input(self, index):
147
156
  assert index < len(self._inputs), "Requested input doesn't exist"
148
157
  assert isinstance(
149
- self._inputs[index], tuple), "Requested input which is not inlined"
150
- assert self._inputs[index][0] is not None, "Requested None inlined input"
158
+ self._inputs[index], InlinedInput), "Requested input which is not inlined"
159
+ arg = self._inputs[index].data
160
+ assert arg is not None, f"Requested None inlined input for op {self.get_op_type()}"
151
161
  constant = None
152
- arg = self._inputs[index][0]
153
162
  constant = self.arg_to_constant(arg)
154
163
 
155
- assert constant is not None, f"Constant wasn't created for inlined input {index}"
156
- return constant.outputs()
164
+ if constant is not None:
165
+ return constant.outputs()
166
+ else:
167
+ return []
157
168
 
158
169
  def input(self, index): # TODO: remove
159
170
  return self.inputs()[index] # TODO: find specialized method
@@ -256,9 +267,7 @@ class TorchFXPythonDecoder (Decoder):
256
267
  raise RuntimeError("This input is not a Node")
257
268
 
258
269
  def get_subgraph_size(self):
259
- if issubclass(type(self.pt_module), torch.fx.Node):
260
- return 0
261
- return len(self.get_subgraphs()) if hasattr(self.pt_module, 'blocks') else 1
270
+ return len(self.get_subgraphs())
262
271
 
263
272
  def decoder_type_name(self) -> str:
264
273
  return "fx"
@@ -276,9 +285,7 @@ class TorchFXPythonDecoder (Decoder):
276
285
  node_visitor(decoder)
277
286
 
278
287
  def get_subgraphs(self):
279
- if issubclass(type(self.pt_module), torch.fx.Node):
280
- return []
281
- return list(self.pt_module.blocks())
288
+ return []
282
289
 
283
290
  def get_subgraph_decoder(self, index):
284
291
  decoder = TorchFXPythonDecoder(self.get_subgraphs()[index],
@@ -308,7 +315,7 @@ class TorchFXPythonDecoder (Decoder):
308
315
  return self._raw_outputs()[index]
309
316
 
310
317
  def _raw_inputs(self):
311
- return [self._nodes[x] if not isinstance(x, tuple) and x < len(self._nodes) else x[0] for x in self._inputs]
318
+ return [self._nodes[x] if not isinstance(x, InlinedInput) and x < len(self._nodes) else x.data for x in self._inputs]
312
319
 
313
320
  def _raw_input(self, index):
314
321
  return self._raw_inputs()[index]
@@ -316,6 +323,13 @@ class TorchFXPythonDecoder (Decoder):
316
323
  def num_of_outputs(self):
317
324
  return len(self.outputs())
318
325
 
326
+ def output_list_size(self):
327
+ max_out_id = -1
328
+ for user in self.pt_module.users:
329
+ if "<built-in function getitem>" == str(user.target) and max_out_id < user.args[1]:
330
+ max_out_id = user.args[1]
331
+ return max_out_id + 1
332
+
319
333
  def output(self, index):
320
334
  return self.outputs()[index]
321
335
 
@@ -339,7 +353,7 @@ class TorchFXPythonDecoder (Decoder):
339
353
  return None
340
354
 
341
355
  def input_is_none(self, index):
342
- if index >= len(self._inputs) or (isinstance(self._inputs[index], tuple) and self._inputs[index][0] is None):
356
+ if index >= len(self._inputs) or (isinstance(self._inputs[index], InlinedInput) and self._inputs[index].data is None):
343
357
  return True
344
358
  else:
345
359
  r_input = self._raw_input(index)
@@ -350,3 +364,7 @@ class TorchFXPythonDecoder (Decoder):
350
364
 
351
365
  def may_produce_alias(self, in_index: int, out_index: int) -> bool:
352
366
  return False
367
+
368
+ def get_rt_info(self):
369
+ rt_info = {}
370
+ return rt_info
@@ -32,7 +32,7 @@ def patched_forward(self, *args, **kwargs):
32
32
  x = args[0]
33
33
  dtype = x.dtype
34
34
  outshape = x.shape[:-1] + (self.width,)
35
- x = x.view(-1, x.shape[-1])
35
+ x = x.contiguous().view(-1, x.shape[-1])
36
36
  groups = self.qzeros.shape[0]
37
37
  height = self.qweight.shape[0]
38
38
 
@@ -43,8 +43,6 @@ def patched_forward(self, *args, **kwargs):
43
43
  unpacked_zp = decompression_pattern(
44
44
  self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1)
45
45
 
46
- unpacked_zp = unpacked_zp.to(dtype) + 1
47
-
48
46
  unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales
49
47
  unpacked_weights = unpacked_weights.view(-1, self.width)
50
48
 
@@ -59,11 +57,50 @@ def patched_forward(self, *args, **kwargs):
59
57
  return out
60
58
 
61
59
 
60
+ def patched_forward_sym(self, *args, **kwargs):
61
+ if hasattr(self, '_hf_hook'):
62
+ args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs)
63
+
64
+ x = args[0]
65
+ dtype = x.dtype
66
+ outshape = x.shape[:-1] + (self.width,)
67
+ x = x.contiguous().view(-1, x.shape[-1])
68
+ height = self.qweight.shape[0]
69
+
70
+ unpacked_weights = decompression_pattern(
71
+ self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8)
72
+ unpacked_weights = torch.transpose(
73
+ unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width)
74
+
75
+ # all zp is 8 for symmetrical, will repack to i4 in pt fe transformation
76
+ unpacked_weights = unpacked_weights.to(dtype) * self.scales
77
+ unpacked_weights = unpacked_weights.view(-1, self.width)
78
+
79
+ out = x @ unpacked_weights
80
+
81
+ out = out.view(outshape)
82
+ if self.bias is not None:
83
+ out.add_(self.bias)
84
+
85
+ if hasattr(self, '_hf_hook'):
86
+ out = self._hf_hook.post_forward(self, out)
87
+ return out
88
+
89
+
62
90
  # All the following AutoGPTQ's quant types are supposed to have the same weights packing schema
63
91
  supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old']
64
92
 
65
93
 
66
94
  def patch_model(model):
95
+ is_symmetrical = False
96
+ config = None
97
+ if hasattr(model, "config"):
98
+ config = model.config
99
+ elif hasattr(model, "model") and hasattr(model.model, "config"):
100
+ # original model was wrapped
101
+ config = model.model.config
102
+ if config is not None and hasattr(config, 'quantization_config') and hasattr(config.quantization_config, 'sym'):
103
+ is_symmetrical = config.quantization_config.sym
67
104
  for name, m in model.named_modules():
68
105
  if hasattr(m, '_openvino_patch_orig_forward'):
69
106
  # already patched, skipping
@@ -87,7 +124,10 @@ def patch_model(model):
87
124
  assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups
88
125
 
89
126
  m._openvino_patch_orig_forward = m.forward
90
- m.forward = partial(patched_forward, m)
127
+ if is_symmetrical:
128
+ m.forward = partial(patched_forward_sym, m)
129
+ else:
130
+ m.forward = partial(patched_forward, m)
91
131
 
92
132
  # Keep original field properties to be used when model is returned back to its original state
93
133
  m._openvino_patch_orig_qweights_type = m.qweight.dtype
@@ -97,11 +137,12 @@ def patch_model(model):
97
137
  m.qweight = m.qweight.view(dtype=torch.uint8)
98
138
  m.qzeros = m.qzeros.view(dtype=torch.uint8)
99
139
 
100
- # TODO: Redundant tensor copy? Try to remove m.qweigh and m.qzeros after keeping modified values as submodules
140
+ # TODO: Redundant tensor copy? Try to remove m.qweight and m.qzeros after keeping modified values as submodules
101
141
  m.add_module(
102
142
  '_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight))
143
+ # Adding 17 to move zp+1 step from after unpacking to before to have correct decompression pattern. Can it overflow?
103
144
  m.add_module('_openvino_u4_compression_submodule_qzeros',
104
- KeepWeight(m.qzeros))
145
+ KeepWeight(m.qzeros + torch.tensor(17, dtype=torch.uint8)))
105
146
 
106
147
  m.scales = m.scales.view(-1, 1, m.width)
107
148
 
@@ -30,6 +30,7 @@ def patch_model(model, module_extensions, orig_forward_name):
30
30
 
31
31
  if extension:
32
32
  # The Trampoline class is instantiated for every module replacement, so we can use class members individually for each module.
33
+
33
34
  class Trampoline(torch.autograd.Function):
34
35
  target_extension = extension
35
36
  original_module = m
@@ -83,16 +84,35 @@ def unpatch_model(model, orig_forward_name):
83
84
 
84
85
 
85
86
  def __make_16bit_traceable(model: torch.nn.Module):
86
- # Replace torch.nn.Linear with ModuleExtension and move other modules to fp32
87
- extensions = {torch.nn.Linear: ModuleExtension(
88
- torch.nn.Linear,
89
- "aten::linear",
90
- evaluate=lambda module, *args, **kwargs: torch.ones(
91
- list(args[0].shape[:-1]) + [module.out_features], dtype=torch.float32) * 0.5,
92
- convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias))
87
+ """
88
+ Prepare a 16-bit PyTorch model for tracing with OpenVINO.
89
+ - Replace known list of modules with ModuleExtension.
90
+ - Convert other modules with weights to FP32.
91
+ """
92
+ extensions = {
93
+ torch.nn.Linear: ModuleExtension(
94
+ torch.nn.Linear, "ov_ext::linear",
95
+ evaluate=lambda module, *args, **kwargs: torch.full(
96
+ list(args[0].shape[:-1]) + [module.out_features], 0.5, dtype=torch.float32),
97
+ convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias)),
98
+ torch.nn.Embedding: ModuleExtension(
99
+ torch.nn.Embedding, "ov_ext::embedding",
100
+ evaluate=lambda module, *args, **kwargs: torch.full(
101
+ list(args[0].shape) + [module.embedding_dim], 0.5, dtype=torch.float32),
102
+ convert=lambda module, target_op, *args, **kwargs: target_op(module.weight, args[0], module.padding_idx, module.scale_grad_by_freq, module.sparse)),
93
103
  }
104
+ try:
105
+ from transformers.pytorch_utils import Conv1D
106
+ extensions[Conv1D] = ModuleExtension(
107
+ Conv1D, "ov_ext::conv1d",
108
+ evaluate=lambda module, *args, **kwargs: torch.full(
109
+ list(args[0].shape[:-1]) + [module.nf], 0.5, dtype=torch.float32),
110
+ convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias))
111
+ except:
112
+ pass
94
113
  patch_model(model, extensions,
95
114
  "_openvino_module_extension_patch_orig_forward")
96
115
  for _, module in model.named_modules():
97
- if module.__class__ not in extensions and hasattr(module, "weight") and module.weight.dtype in [torch.float16, torch.bfloat16]:
116
+ if module.__class__ not in extensions and (any([p.dtype in [torch.float16, torch.bfloat16] for p in module.parameters(False)])
117
+ or any([b.dtype in [torch.float16, torch.bfloat16] for b in module.buffers(False)])):
98
118
  module.float()
@@ -13,6 +13,7 @@ import torch
13
13
  from torch._dynamo.backends.common import fake_tensor_unsupported, aot_autograd
14
14
  from torch._dynamo.backends.registry import register_backend
15
15
  from torch._inductor.compile_fx import compile_fx
16
+ from torch._inductor.freezing import replace_params_with_constants
16
17
  from torch.fx.experimental.proxy_tensor import make_fx
17
18
  from torch._decomp import decomposition_table, get_decompositions
18
19
 
@@ -54,10 +55,9 @@ def openvino(subgraph, example_inputs, options=None):
54
55
  if (_get_aot_autograd(options)):
55
56
  global openvino_options
56
57
  openvino_options = options
57
- decompositions = _get_decompositions(options) + get_inf_decomposition_list()
58
- decompositions = decompositions + get_aot_decomposition_list()
59
- return aot_autograd(fw_compiler=fx_openvino,
60
- bw_compiler=fx_openvino,
58
+ decompositions = _get_decompositions(options) + get_inf_decomposition_list() + get_aot_decomposition_list()
59
+ return aot_autograd(fw_compiler=fx_openvino,
60
+ bw_compiler=fx_openvino,
61
61
  decompositions=get_decompositions(decompositions))(subgraph, example_inputs)
62
62
  return fx_openvino(subgraph, example_inputs, options)
63
63
 
@@ -86,7 +86,14 @@ def fx_openvino(subgraph, example_inputs, options=None):
86
86
  if inputs_reversed:
87
87
  example_inputs.reverse()
88
88
 
89
+ preserved_arg_indices = []
89
90
  if (_get_aot_autograd(options)):
91
+ if tracing_context := torch._guards.TracingContext.try_get():
92
+ fw_metadata = tracing_context.fw_metadata
93
+ params_flat = tracing_context.params_flat
94
+ assert fw_metadata is not None and params_flat is not None
95
+ preserved_arg_indices = replace_params_with_constants(subgraph, params_flat, fw_metadata)
96
+ example_inputs = [example_inputs[ind] for ind in preserved_arg_indices]
90
97
  model = subgraph
91
98
  else:
92
99
  from torch._subclasses.fake_tensor import FakeTensorMode
@@ -96,7 +103,6 @@ def fx_openvino(subgraph, example_inputs, options=None):
96
103
 
97
104
  with torch.no_grad():
98
105
  model.eval()
99
-
100
106
  partitioner = Partitioner(options)
101
107
  compiled_model = partitioner.make_partitions(model, options)
102
108
 
@@ -107,9 +113,15 @@ def fx_openvino(subgraph, example_inputs, options=None):
107
113
  executor_parameters["model_hash_str"] += "_fs"
108
114
 
109
115
  def _call(*args):
116
+ if(_get_aot_autograd(options)):
117
+ args_list = args[0]
118
+ args_new = [args_list[i] for i in preserved_arg_indices]
119
+ args = args_new
110
120
  res = execute(compiled_model, *args, executor="openvino",
111
121
  executor_parameters=executor_parameters, options=options)
112
122
  return res
123
+ if(_get_aot_autograd(options)):
124
+ _call._boxed_call = True # type: ignore[attr-defined]
113
125
  return _call
114
126
  except Exception as e:
115
127
  logger.debug(f"Failed in OpenVINO execution: {e}")
@@ -241,6 +241,7 @@ class OperatorSupport(OperatorSupport):
241
241
  "torch.ops.aten.transpose.int": None,
242
242
  "torch.ops.aten.tril.default": None,
243
243
  "torch.ops.aten.tril_.default": None,
244
+ "torch.ops.aten.triu.default": None,
244
245
  "torch.ops.aten.unbind.int": None,
245
246
  "torch.ops.aten.unfold.default": None,
246
247
  "torch.ops.aten.unsqueeze.default": None,