bigdl-core-npu 2.6.0b20241120__cp310-cp310-win_amd64.whl → 2.6.0b20241121__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. bigdl-core-npu/npu_llm.dll +0 -0
  2. {bigdl_core_npu-2.6.0b20241120.dist-info → bigdl_core_npu-2.6.0b20241121.dist-info}/METADATA +1 -1
  3. {bigdl_core_npu-2.6.0b20241120.dist-info → bigdl_core_npu-2.6.0b20241121.dist-info}/RECORD +75 -79
  4. intel_npu_acceleration_library/_version.py +1 -1
  5. intel_npu_acceleration_library/external/openvino/__init__.py +1 -0
  6. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
  7. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
  8. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
  9. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
  10. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
  11. intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
  12. intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +15 -5
  13. intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
  14. intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +66 -13
  15. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
  16. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
  17. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
  18. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
  19. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
  20. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
  21. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
  22. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
  23. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
  24. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
  25. intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +29 -19
  26. intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +46 -5
  27. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
  28. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
  29. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
  30. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
  31. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
  32. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +17 -5
  33. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +55 -47
  34. intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +92 -63
  35. intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +12 -10
  36. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
  37. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
  38. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
  39. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
  40. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
  41. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +31 -10
  42. intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +1 -1
  43. intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +5 -0
  44. intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +131 -1
  45. intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +13 -4
  46. intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +1 -1
  47. intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +1 -0
  48. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +21 -3
  49. intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +29 -9
  50. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +0 -1
  51. intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
  52. intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
  53. intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
  54. intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
  55. intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
  56. intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
  57. intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
  58. intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
  59. intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
  60. intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
  61. intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
  62. intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
  63. intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
  64. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
  65. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
  66. intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
  67. intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
  68. intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
  69. intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
  70. intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
  71. intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
  72. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
  73. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
  74. intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp310-win_amd64.pyd +0 -0
  75. intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp311-win_amd64.pyd +0 -0
  76. intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp312-win_amd64.pyd +0 -0
  77. intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp38-win_amd64.pyd +0 -0
  78. intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp39-win_amd64.pyd +0 -0
  79. intel_npu_acceleration_library/lib/Release/openvino_jax_frontend.dll +0 -0
  80. {bigdl_core_npu-2.6.0b20241120.dist-info → bigdl_core_npu-2.6.0b20241121.dist-info}/WHEEL +0 -0
  81. {bigdl_core_npu-2.6.0b20241120.dist-info → bigdl_core_npu-2.6.0b20241121.dist-info}/top_level.txt +0 -0
@@ -16,6 +16,11 @@ logger = logging.getLogger(__name__)
16
16
  logger.setLevel(logging.WARNING)
17
17
 
18
18
 
19
+ class InlinedInput:
20
+ def __init__(self, data) -> None:
21
+ self.data = data
22
+
23
+
19
24
  class TorchFXPythonDecoder (Decoder):
20
25
 
21
26
  def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, input_shapes=[], input_types=[]):
@@ -59,7 +64,7 @@ class TorchFXPythonDecoder (Decoder):
59
64
  for arg in uargs if arg[1] is not None]
60
65
  for idx, shape in enumerate(found_shapes):
61
66
  if shape is not None:
62
- new_shape=[]
67
+ new_shape = []
63
68
  for dim in range(0, len(shape)):
64
69
  if (type(shape[dim]).__name__ == "SymInt"):
65
70
  new_shape.append(-1)
@@ -81,7 +86,7 @@ class TorchFXPythonDecoder (Decoder):
81
86
 
82
87
  # None in inputs mean the input is inlined or None (also considered inlined)
83
88
  self._inputs = [self._nodes.index(
84
- arg) if arg in self._nodes else (arg,) for arg in pt_module.args]
89
+ arg) if arg in self._nodes else InlinedInput(arg) for arg in pt_module.args]
85
90
 
86
91
  # FIXME: Find a better way to pass nested tuples to OV frontend. This is a temporary solution to flatten arguments.
87
92
  new_inputs = []
@@ -92,22 +97,22 @@ class TorchFXPythonDecoder (Decoder):
92
97
  if arg in self._nodes:
93
98
  new_inputs.append(self._nodes.index(arg))
94
99
  else:
95
- new_inputs.append((arg,))
100
+ new_inputs.append(InlinedInput(arg))
96
101
  self.input_types.append(OVAny(DecoderType.List(
97
102
  TorchFXPythonDecoder.get_type_for_value(arg))))
98
103
  else:
99
104
  v = self._inputs[i]
100
105
  new_inputs.append(v)
101
106
  self.input_types.append(
102
- TorchFXPythonDecoder.get_type_for_value(v[0] if isinstance(v, tuple) else self._nodes[v]))
107
+ TorchFXPythonDecoder.get_type_for_value(v.data if isinstance(v, InlinedInput) else self._nodes[v]))
103
108
  self._inputs = new_inputs
104
109
 
105
110
  def inputs(self):
106
111
  # Consider 0 a special case which may mean the input is inlined, but not guaranteed
107
- return [x if not isinstance(x, tuple) else 0 for x in self._inputs]
112
+ return [x if not isinstance(x, InlinedInput) else 0 for x in self._inputs]
108
113
 
109
114
  def is_input_inlined(self, index):
110
- return isinstance(self._inputs[index], tuple)
115
+ return isinstance(self._inputs[index], InlinedInput)
111
116
 
112
117
  @staticmethod
113
118
  def unpack_containers(arg):
@@ -142,19 +147,24 @@ class TorchFXPythonDecoder (Decoder):
142
147
  return make_constant(OVType.i64, Shape([]), [arg])
143
148
  elif isinstance(arg, float):
144
149
  return make_constant(OVType.f32, Shape([]), [arg])
150
+ elif isinstance(arg, str):
151
+ u8_tensor = torch.frombuffer(str.encode(arg), dtype=torch.uint8)
152
+ return torch_tensor_to_ov_const(u8_tensor, shared_memory=True)
145
153
  return None
146
154
 
147
155
  def inlined_input(self, index):
148
156
  assert index < len(self._inputs), "Requested input doesn't exist"
149
157
  assert isinstance(
150
- self._inputs[index], tuple), "Requested input which is not inlined"
151
- assert self._inputs[index][0] is not None, "Requested None inlined input"
158
+ self._inputs[index], InlinedInput), "Requested input which is not inlined"
159
+ arg = self._inputs[index].data
160
+ assert arg is not None, f"Requested None inlined input for op {self.get_op_type()}"
152
161
  constant = None
153
- arg = self._inputs[index][0]
154
162
  constant = self.arg_to_constant(arg)
155
163
 
156
- assert constant is not None, f"Constant wasn't created for inlined input {index}"
157
- return constant.outputs()
164
+ if constant is not None:
165
+ return constant.outputs()
166
+ else:
167
+ return []
158
168
 
159
169
  def input(self, index): # TODO: remove
160
170
  return self.inputs()[index] # TODO: find specialized method
@@ -257,9 +267,7 @@ class TorchFXPythonDecoder (Decoder):
257
267
  raise RuntimeError("This input is not a Node")
258
268
 
259
269
  def get_subgraph_size(self):
260
- if issubclass(type(self.pt_module), torch.fx.Node):
261
- return 0
262
- return len(self.get_subgraphs()) if hasattr(self.pt_module, 'blocks') else 1
270
+ return len(self.get_subgraphs())
263
271
 
264
272
  def decoder_type_name(self) -> str:
265
273
  return "fx"
@@ -277,9 +285,7 @@ class TorchFXPythonDecoder (Decoder):
277
285
  node_visitor(decoder)
278
286
 
279
287
  def get_subgraphs(self):
280
- if issubclass(type(self.pt_module), torch.fx.Node):
281
- return []
282
- return list(self.pt_module.blocks())
288
+ return []
283
289
 
284
290
  def get_subgraph_decoder(self, index):
285
291
  decoder = TorchFXPythonDecoder(self.get_subgraphs()[index],
@@ -309,7 +315,7 @@ class TorchFXPythonDecoder (Decoder):
309
315
  return self._raw_outputs()[index]
310
316
 
311
317
  def _raw_inputs(self):
312
- return [self._nodes[x] if not isinstance(x, tuple) and x < len(self._nodes) else x[0] for x in self._inputs]
318
+ return [self._nodes[x] if not isinstance(x, InlinedInput) and x < len(self._nodes) else x.data for x in self._inputs]
313
319
 
314
320
  def _raw_input(self, index):
315
321
  return self._raw_inputs()[index]
@@ -347,7 +353,7 @@ class TorchFXPythonDecoder (Decoder):
347
353
  return None
348
354
 
349
355
  def input_is_none(self, index):
350
- if index >= len(self._inputs) or (isinstance(self._inputs[index], tuple) and self._inputs[index][0] is None):
356
+ if index >= len(self._inputs) or (isinstance(self._inputs[index], InlinedInput) and self._inputs[index].data is None):
351
357
  return True
352
358
  else:
353
359
  r_input = self._raw_input(index)
@@ -358,3 +364,7 @@ class TorchFXPythonDecoder (Decoder):
358
364
 
359
365
  def may_produce_alias(self, in_index: int, out_index: int) -> bool:
360
366
  return False
367
+
368
+ def get_rt_info(self):
369
+ rt_info = {}
370
+ return rt_info
@@ -43,8 +43,6 @@ def patched_forward(self, *args, **kwargs):
43
43
  unpacked_zp = decompression_pattern(
44
44
  self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1)
45
45
 
46
- unpacked_zp = unpacked_zp.to(dtype) + 1
47
-
48
46
  unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales
49
47
  unpacked_weights = unpacked_weights.view(-1, self.width)
50
48
 
@@ -59,11 +57,50 @@ def patched_forward(self, *args, **kwargs):
59
57
  return out
60
58
 
61
59
 
60
+ def patched_forward_sym(self, *args, **kwargs):
61
+ if hasattr(self, '_hf_hook'):
62
+ args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs)
63
+
64
+ x = args[0]
65
+ dtype = x.dtype
66
+ outshape = x.shape[:-1] + (self.width,)
67
+ x = x.contiguous().view(-1, x.shape[-1])
68
+ height = self.qweight.shape[0]
69
+
70
+ unpacked_weights = decompression_pattern(
71
+ self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8)
72
+ unpacked_weights = torch.transpose(
73
+ unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width)
74
+
75
+ # all zp is 8 for symmetrical, will repack to i4 in pt fe transformation
76
+ unpacked_weights = unpacked_weights.to(dtype) * self.scales
77
+ unpacked_weights = unpacked_weights.view(-1, self.width)
78
+
79
+ out = x @ unpacked_weights
80
+
81
+ out = out.view(outshape)
82
+ if self.bias is not None:
83
+ out.add_(self.bias)
84
+
85
+ if hasattr(self, '_hf_hook'):
86
+ out = self._hf_hook.post_forward(self, out)
87
+ return out
88
+
89
+
62
90
  # All the following AutoGPTQ's quant types are supposed to have the same weights packing schema
63
91
  supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old']
64
92
 
65
93
 
66
94
  def patch_model(model):
95
+ is_symmetrical = False
96
+ config = None
97
+ if hasattr(model, "config"):
98
+ config = model.config
99
+ elif hasattr(model, "model") and hasattr(model.model, "config"):
100
+ # original model was wrapped
101
+ config = model.model.config
102
+ if config is not None and hasattr(config, 'quantization_config') and hasattr(config.quantization_config, 'sym'):
103
+ is_symmetrical = config.quantization_config.sym
67
104
  for name, m in model.named_modules():
68
105
  if hasattr(m, '_openvino_patch_orig_forward'):
69
106
  # already patched, skipping
@@ -87,7 +124,10 @@ def patch_model(model):
87
124
  assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups
88
125
 
89
126
  m._openvino_patch_orig_forward = m.forward
90
- m.forward = partial(patched_forward, m)
127
+ if is_symmetrical:
128
+ m.forward = partial(patched_forward_sym, m)
129
+ else:
130
+ m.forward = partial(patched_forward, m)
91
131
 
92
132
  # Keep original field properties to be used when model is returned back to its original state
93
133
  m._openvino_patch_orig_qweights_type = m.qweight.dtype
@@ -97,11 +137,12 @@ def patch_model(model):
97
137
  m.qweight = m.qweight.view(dtype=torch.uint8)
98
138
  m.qzeros = m.qzeros.view(dtype=torch.uint8)
99
139
 
100
- # TODO: Redundant tensor copy? Try to remove m.qweigh and m.qzeros after keeping modified values as submodules
140
+ # TODO: Redundant tensor copy? Try to remove m.qweight and m.qzeros after keeping modified values as submodules
101
141
  m.add_module(
102
142
  '_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight))
143
+ # Adding 17 to move zp+1 step from after unpacking to before to have correct decompression pattern. Can it overflow?
103
144
  m.add_module('_openvino_u4_compression_submodule_qzeros',
104
- KeepWeight(m.qzeros))
145
+ KeepWeight(m.qzeros + torch.tensor(17, dtype=torch.uint8)))
105
146
 
106
147
  m.scales = m.scales.view(-1, 1, m.width)
107
148
 
@@ -13,6 +13,7 @@ import torch
13
13
  from torch._dynamo.backends.common import fake_tensor_unsupported, aot_autograd
14
14
  from torch._dynamo.backends.registry import register_backend
15
15
  from torch._inductor.compile_fx import compile_fx
16
+ from torch._inductor.freezing import replace_params_with_constants
16
17
  from torch.fx.experimental.proxy_tensor import make_fx
17
18
  from torch._decomp import decomposition_table, get_decompositions
18
19
 
@@ -54,10 +55,9 @@ def openvino(subgraph, example_inputs, options=None):
54
55
  if (_get_aot_autograd(options)):
55
56
  global openvino_options
56
57
  openvino_options = options
57
- decompositions = _get_decompositions(options) + get_inf_decomposition_list()
58
- decompositions = decompositions + get_aot_decomposition_list()
59
- return aot_autograd(fw_compiler=fx_openvino,
60
- bw_compiler=fx_openvino,
58
+ decompositions = _get_decompositions(options) + get_inf_decomposition_list() + get_aot_decomposition_list()
59
+ return aot_autograd(fw_compiler=fx_openvino,
60
+ bw_compiler=fx_openvino,
61
61
  decompositions=get_decompositions(decompositions))(subgraph, example_inputs)
62
62
  return fx_openvino(subgraph, example_inputs, options)
63
63
 
@@ -86,7 +86,14 @@ def fx_openvino(subgraph, example_inputs, options=None):
86
86
  if inputs_reversed:
87
87
  example_inputs.reverse()
88
88
 
89
+ preserved_arg_indices = []
89
90
  if (_get_aot_autograd(options)):
91
+ if tracing_context := torch._guards.TracingContext.try_get():
92
+ fw_metadata = tracing_context.fw_metadata
93
+ params_flat = tracing_context.params_flat
94
+ assert fw_metadata is not None and params_flat is not None
95
+ preserved_arg_indices = replace_params_with_constants(subgraph, params_flat, fw_metadata)
96
+ example_inputs = [example_inputs[ind] for ind in preserved_arg_indices]
90
97
  model = subgraph
91
98
  else:
92
99
  from torch._subclasses.fake_tensor import FakeTensorMode
@@ -96,7 +103,6 @@ def fx_openvino(subgraph, example_inputs, options=None):
96
103
 
97
104
  with torch.no_grad():
98
105
  model.eval()
99
-
100
106
  partitioner = Partitioner(options)
101
107
  compiled_model = partitioner.make_partitions(model, options)
102
108
 
@@ -107,9 +113,15 @@ def fx_openvino(subgraph, example_inputs, options=None):
107
113
  executor_parameters["model_hash_str"] += "_fs"
108
114
 
109
115
  def _call(*args):
116
+ if(_get_aot_autograd(options)):
117
+ args_list = args[0]
118
+ args_new = [args_list[i] for i in preserved_arg_indices]
119
+ args = args_new
110
120
  res = execute(compiled_model, *args, executor="openvino",
111
121
  executor_parameters=executor_parameters, options=options)
112
122
  return res
123
+ if(_get_aot_autograd(options)):
124
+ _call._boxed_call = True # type: ignore[attr-defined]
113
125
  return _call
114
126
  except Exception as e:
115
127
  logger.debug(f"Failed in OpenVINO execution: {e}")
@@ -25,6 +25,13 @@ logger = logging.getLogger(__name__)
25
25
  logger.setLevel(logging.WARNING)
26
26
 
27
27
 
28
+ class PatternNode:
29
+ op_types = {}
30
+
31
+ def __init__(self):
32
+ self.op_types = {}
33
+
34
+
28
35
  class Partitioner:
29
36
  def __init__(self, options):
30
37
  self.supported_ops = OperatorSupport(options)
@@ -56,55 +63,56 @@ class Partitioner:
56
63
  return True
57
64
  return False
58
65
 
59
- def capture_gptq_patterns(self, graph_module: GraphModule) -> bool:
66
+ def check_pattern(self, node: torch.fx.Node, pattern: PatternNode, enabled_ops: list) -> bool:
67
+ if node.op == "call_function":
68
+ if ("call_function" + ":" + str(node.target)) in pattern.op_types:
69
+ pt_input_nodes = node.all_input_nodes
70
+ pattern_input_ops = pattern.op_types["call_function" + ":" + str(node.target)]
71
+ if pattern_input_ops is None:
72
+ enabled_ops.append(node)
73
+ return True
74
+ if len(pt_input_nodes) != len(pattern_input_ops):
75
+ return False
76
+ for i in range(len(pt_input_nodes)):
77
+ if not self.check_pattern(pt_input_nodes[i], pattern_input_ops[i], enabled_ops):
78
+ return False
79
+ enabled_ops.append(node)
80
+ return True
81
+ elif node.op == "get_attr":
82
+ if "get_attr" in pattern.op_types:
83
+ return True
84
+ else:
85
+ return False
86
+ return False
87
+
88
+ def capture_gptq_patterns(self, graph_module: GraphModule):
89
+ const_0_node = PatternNode
90
+ const_0_node.op_types["get_attr"] = None
91
+ unsqueeze_0_node = PatternNode
92
+ unsqueeze_0_node.op_types["call_function:aten.unsqueeze.default"] = [const_0_node,]
93
+ expand_node = PatternNode
94
+ expand_node.op_types["call_function:aten.expand.default"] = [unsqueeze_0_node,]
95
+ const_1_node = PatternNode
96
+ const_1_node.op_types["get_attr"] = None
97
+ unsqueeze_1_node = PatternNode
98
+ unsqueeze_1_node.op_types["call_function:aten.unsqueeze.default"] = [const_1_node,]
99
+ bitwise_right_shift_node = PatternNode
100
+ bitwise_right_shift_node.op_types["call_function:aten.bitwise_right_shift.Tensor"] = [expand_node, unsqueeze_1_node]
101
+ to_copy_node = PatternNode
102
+ to_copy_node.op_types["call_function:aten._to_copy.default"] = [bitwise_right_shift_node,]
103
+ add_or_to_copy_node = PatternNode
104
+ add_or_to_copy_node.op_types["call_function:aten._to_copy.default"] = [bitwise_right_shift_node,]
105
+ add_or_to_copy_node.op_types["call_function:aten.add.Tensor"] = [to_copy_node,]
106
+ bitwise_and_node = PatternNode
107
+ bitwise_and_node.op_types["call_function:aten.bitwise_and.Scalar"] = [add_or_to_copy_node,]
108
+
60
109
  for node in graph_module.graph.nodes:
61
110
  if str(node.op) == "call_function" and str(node.target) == "aten.bitwise_and.Scalar":
62
- bitwise_and_in_nodes = node.all_input_nodes
63
- if len(bitwise_and_in_nodes) != 1:
64
- continue
65
- to_copy_node = bitwise_and_in_nodes[0]
66
- if str(to_copy_node.op) != "call_function" or str(to_copy_node.target) != "aten._to_copy.default":
67
- continue
68
- to_copy_in_nodes = to_copy_node.all_input_nodes
69
- if len(to_copy_in_nodes) != 1:
70
- continue
71
- bitwise_right_shift_node = to_copy_in_nodes[0]
72
- if str(bitwise_right_shift_node.op) != "call_function" or str(bitwise_right_shift_node.target) != "aten.bitwise_right_shift.Tensor":
73
- continue
74
- bitwise_right_shift_in_nodes = bitwise_right_shift_node.all_input_nodes
75
- if len(bitwise_right_shift_in_nodes) != 2:
76
- continue
77
- expand_node = bitwise_right_shift_in_nodes[0]
78
- if str(expand_node.op) != "call_function" or str(expand_node.target) != "aten.expand.default":
79
- continue
80
- expand_in_nodes = expand_node.all_input_nodes
81
- if len(expand_in_nodes) != 1:
82
- continue
83
- unsqueeze_0_node = expand_in_nodes[0]
84
- if str(unsqueeze_0_node.op) != "call_function" or str(unsqueeze_0_node.target) != "aten.unsqueeze.default":
85
- continue
86
- unsqueeze_0_in_nodes = unsqueeze_0_node.all_input_nodes
87
- if len(unsqueeze_0_in_nodes) != 1:
88
- continue
89
- const_0_node = unsqueeze_0_in_nodes[0]
90
- if str(const_0_node.op) != "get_attr":
91
- continue
92
- unsqueeze_1_node = bitwise_right_shift_in_nodes[1]
93
- if str(unsqueeze_1_node.op) != "call_function" or str(unsqueeze_1_node.target) != "aten.unsqueeze.default":
94
- continue
95
- unsqueeze_1_in_nodes = unsqueeze_1_node.all_input_nodes
96
- if len(unsqueeze_1_in_nodes) != 1:
97
- continue
98
- const_1_node = unsqueeze_1_in_nodes[0]
99
- if str(const_1_node.op) != "get_attr":
100
- continue
101
-
102
- self.supported_ops.enable_by_name(node)
103
- self.supported_ops.enable_by_name(to_copy_node)
104
- self.supported_ops.enable_by_name(bitwise_right_shift_node)
105
- self.supported_ops.enable_by_name(expand_node)
106
- self.supported_ops.enable_by_name(unsqueeze_0_node)
107
- self.supported_ops.enable_by_name(unsqueeze_1_node)
111
+ enabled_ops = []
112
+ pattern_match = self.check_pattern(node, bitwise_and_node, enabled_ops)
113
+ if pattern_match:
114
+ for pattern_op in enabled_ops:
115
+ self.supported_ops.enable_by_name(pattern_op)
108
116
 
109
117
  def make_partitions(self, graph_module: GraphModule, options) -> GraphModule:
110
118
  allow_single_node_partition = _is_testing(options)