bigdl-core-npu 2.6.0b20241112__cp311-cp311-win_amd64.whl → 2.6.0b20241118__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {bigdl_core_npu-2.6.0b20241112.dist-info → bigdl_core_npu-2.6.0b20241118.dist-info}/METADATA +1 -1
  2. {bigdl_core_npu-2.6.0b20241112.dist-info → bigdl_core_npu-2.6.0b20241118.dist-info}/RECORD +96 -86
  3. {bigdl_core_npu-2.6.0b20241112.dist-info → bigdl_core_npu-2.6.0b20241118.dist-info}/WHEEL +1 -1
  4. intel_npu_acceleration_library/_version.py +1 -1
  5. intel_npu_acceleration_library/backend/bindings.py +38 -3
  6. intel_npu_acceleration_library/backend/factory.py +77 -48
  7. intel_npu_acceleration_library/backend/ops.py +2 -1
  8. intel_npu_acceleration_library/backend/tensor.py +73 -3
  9. intel_npu_acceleration_library/device.py +2 -2
  10. intel_npu_acceleration_library/dtypes.py +34 -1
  11. intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +1 -0
  12. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
  13. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
  14. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
  15. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
  16. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
  17. intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
  18. intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +283 -0
  19. intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp310-win_amd64.pyd +0 -0
  20. intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp311-win_amd64.pyd +0 -0
  21. intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp312-win_amd64.pyd +0 -0
  22. intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp38-win_amd64.pyd +0 -0
  23. intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp39-win_amd64.pyd +0 -0
  24. intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +129 -0
  25. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
  26. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
  27. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
  28. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
  29. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
  30. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
  31. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
  32. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
  33. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
  34. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
  35. intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +8 -0
  36. intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +1 -1
  37. intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +28 -8
  38. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
  39. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
  40. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
  41. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
  42. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
  43. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +1 -0
  44. intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +3 -0
  45. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
  46. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
  47. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
  48. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
  49. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
  50. intel_npu_acceleration_library/external/openvino/helpers/packing.py +4 -4
  51. intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +2 -0
  52. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +1 -0
  53. intel_npu_acceleration_library/external/openvino/properties/__init__.py +1 -0
  54. intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +1 -0
  55. intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +2 -1
  56. intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +5 -6
  57. intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +2 -0
  58. intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +62 -1
  59. intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +60 -43
  60. intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +4 -0
  61. intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +1 -0
  62. intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +67 -1
  63. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +9 -9
  64. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +16 -2
  65. intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +5 -0
  66. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
  67. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +68 -16
  68. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +70 -60
  69. intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +90 -3
  70. intel_npu_acceleration_library/external/openvino/utils.py +17 -0
  71. intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
  72. intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
  73. intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
  74. intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
  75. intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
  76. intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
  77. intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
  78. intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
  79. intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
  80. intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
  81. intel_npu_acceleration_library/lib/Release/openvino_jax_frontend.dll +0 -0
  82. intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
  83. intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
  84. intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
  85. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
  86. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
  87. intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
  88. intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
  89. intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
  90. intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
  91. intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
  92. intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
  93. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
  94. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
  95. intel_npu_acceleration_library/nn/module.py +17 -17
  96. {bigdl_core_npu-2.6.0b20241112.dist-info → bigdl_core_npu-2.6.0b20241118.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ from intel_npu_acceleration_library.backend.base import BaseNPUBackendWithPrefet
7
7
  from intel_npu_acceleration_library.backend.ops import get_supported_ops
8
8
  from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
9
9
  from intel_npu_acceleration_library.backend.tensor import Tensor
10
- from intel_npu_acceleration_library.dtypes import int4, bfloat16
10
+ from intel_npu_acceleration_library.dtypes import int4, bfloat16, get_backend_dtype
11
11
  from typing import Optional, Tuple, Any, Union, Sequence, TypeVar, Callable, cast, List
12
12
  from functools import partial
13
13
  import numpy.typing as npt
@@ -71,17 +71,24 @@ class NNFactory(BaseNPUBackendWithPrefetch):
71
71
  Tensor: Tensor object
72
72
  """
73
73
  # Convert Tensor objects to their underlying node
74
- args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
75
74
  kwargs = {
76
75
  k: v.node if isinstance(v, Tensor) else v for k, v in kwargs.items()
77
76
  }
78
77
 
78
+ if fn.__qualname__ == 'NNFactory.reshape':
79
+ output_idx = args[0].output_idx
80
+ kwargs["output_idx"] = output_idx
81
+ args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
82
+
83
+
79
84
  input_nodes = [arg for arg in args if isinstance(arg, ctypes._Pointer)] + [
80
85
  v for v in kwargs.values() if isinstance(v, ctypes._Pointer)
81
86
  ]
82
87
  # Call the function
83
88
  node = fn(self, *args, **kwargs)
84
89
 
90
+ output_len = backend_lib.op_output_size(node)
91
+
85
92
  # remove input nodes from output_nodes
86
93
  self.output_nodes = [
87
94
  node for node in self.output_nodes if node not in input_nodes
@@ -91,7 +98,13 @@ class NNFactory(BaseNPUBackendWithPrefetch):
91
98
  self.output_nodes.append(node)
92
99
 
93
100
  # Wrap the node in a Tensor object
94
- return Tensor(factory=self, node=node)
101
+ if output_len == 1:
102
+ return Tensor(factory=self, node=node, output_idx=0)
103
+ else:
104
+ output_tensor_list = []
105
+ for i in range(output_len):
106
+ output_tensor_list.append(Tensor(factory=self, node=node, output_idx=i))
107
+ return output_tensor_list
95
108
 
96
109
  return cast(F, wrapper)
97
110
 
@@ -184,34 +197,10 @@ class NNFactory(BaseNPUBackendWithPrefetch):
184
197
  Args:
185
198
  dtype: numpy dtype
186
199
 
187
- Raises:
188
- RuntimeError: Unsupported datatype
189
-
190
200
  Returns:
191
201
  ctypes.c_char_p: string representation of the dtype
192
202
  """
193
- if dtype in [np.int8, torch.int8]:
194
- str_dtype = "int8"
195
- elif dtype == np.uint8 or dtype == int4:
196
- # u8 represents packed i4 dtypes
197
- str_dtype = "int4"
198
- elif dtype in [np.int16, torch.int16]:
199
- str_dtype = "int16"
200
- elif dtype in [np.int32, torch.int32]:
201
- str_dtype = "int32"
202
- elif dtype in [np.int64, torch.int64]:
203
- str_dtype = "int64"
204
- elif dtype in [np.float16, torch.float16]:
205
- str_dtype = "float16"
206
- elif dtype in [np.float32, torch.float32]:
207
- str_dtype = "float32"
208
- elif dtype in [np.float64, torch.float64]:
209
- str_dtype = "float64"
210
- elif dtype in [bfloat16, torch.bfloat16]:
211
- str_dtype = "bfloat16"
212
- else:
213
- raise RuntimeError(f"DType is not supported {dtype}")
214
- return ctypes.c_char_p(str_dtype.encode())
203
+ return get_backend_dtype(dtype)
215
204
 
216
205
  @return_tensor
217
206
  def parameter(
@@ -422,6 +411,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
422
411
  wt_dtype: npt.DTypeLike = np.float16,
423
412
  scale_factor: bool = True,
424
413
  is_prefill: bool = False,
414
+ use_dq: bool = True,
425
415
  ) -> ctypes._Pointer:
426
416
  """Generate a linear layer for dynamic quantization linear layer.
427
417
 
@@ -439,7 +429,10 @@ class NNFactory(BaseNPUBackendWithPrefetch):
439
429
  Returns:
440
430
  ctypes._Pointer: output node
441
431
  """
442
- func = backend_lib.dq_split_linear_prefill if is_prefill else backend_lib.dq_split_linear
432
+ if is_prefill:
433
+ func = backend_lib.dq_split_linear_prefill if use_dq else backend_lib.gw_linear_prefill
434
+ else:
435
+ func = backend_lib.dq_split_linear
443
436
  return func(self._mm, input_node, n_splits,
444
437
  input_channels, outout_channels, bias,
445
438
  self.get_backend_dtype(act_dtype),
@@ -448,7 +441,9 @@ class NNFactory(BaseNPUBackendWithPrefetch):
448
441
 
449
442
  @return_tensor
450
443
  def reshape(
451
- self, input_node: ctypes._Pointer, shape: Sequence[int]
444
+ self, input_node: ctypes._Pointer, shape: Sequence[int],
445
+ special_zero: bool = True,
446
+ output_idx: int = 0
452
447
  ) -> ctypes._Pointer:
453
448
  """Generate a reshape layer.
454
449
 
@@ -460,7 +455,8 @@ class NNFactory(BaseNPUBackendWithPrefetch):
460
455
  ctypes._Pointer: output node
461
456
  """
462
457
  shape_node = self.constant(shape).node # type: ignore
463
- return backend_lib.reshape(self._mm, input_node, shape_node)
458
+ return backend_lib.reshape(self._mm, input_node, shape_node,
459
+ special_zero, output_idx)
464
460
 
465
461
  @return_tensor
466
462
  def broadcast(
@@ -612,7 +608,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
612
608
  ctypes._Pointer: output node
613
609
  """
614
610
  if axis < 0:
615
- shape_size = backend_lib.op_shape_size(input_node_1)
611
+ shape_size = backend_lib.op_shape_size(input_node_1, 0)
616
612
  axis = (axis + shape_size) % shape_size
617
613
  axis = np.int64(axis)
618
614
  return backend_lib.concat(self._mm, input_node_1, input_node_2, axis)
@@ -631,7 +627,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
631
627
  ctypes._Pointer: output node
632
628
  """
633
629
  if axis < 0:
634
- shape_size = backend_lib.op_shape_size(input_nodes[0])
630
+ shape_size = backend_lib.op_shape_size(input_nodes[0], 0)
635
631
  axis = (axis + shape_size) % shape_size
636
632
  axis = np.int64(axis)
637
633
 
@@ -656,7 +652,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
656
652
  ctypes._Pointer: output node
657
653
  """
658
654
  if reduction_axes is None:
659
- shape_size = backend_lib.op_shape_size(input_node)
655
+ shape_size = backend_lib.op_shape_size(input_node, 0)
660
656
  reduction_axes = list(range(shape_size - 1, -1, -1))
661
657
  axis_node = self.constant(reduction_axes).node # type: ignore
662
658
  return backend_lib.reduce_max(self._mm, input_node, axis_node, keep_dims)
@@ -679,7 +675,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
679
675
  ctypes._Pointer: output node
680
676
  """
681
677
  if reduction_axes is None:
682
- shape_size = backend_lib.op_shape_size(input_node)
678
+ shape_size = backend_lib.op_shape_size(input_node, 0)
683
679
  reduction_axes = list(range(shape_size - 1, -1, -1))
684
680
  axis_node = self.constant(reduction_axes).node # type: ignore
685
681
  return backend_lib.reduce_mean(self._mm, input_node, axis_node, keep_dims)
@@ -702,7 +698,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
702
698
  ctypes._Pointer: output node
703
699
  """
704
700
  if reduction_axes is None:
705
- shape_size = backend_lib.op_shape_size(input_node)
701
+ shape_size = backend_lib.op_shape_size(input_node, 0)
706
702
  reduction_axes = list(range(shape_size - 1, -1, -1))
707
703
  axis_node = self.constant(reduction_axes).node # type: ignore
708
704
  return backend_lib.reduce_min(self._mm, input_node, axis_node, keep_dims)
@@ -725,7 +721,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
725
721
  ctypes._Pointer: output node
726
722
  """
727
723
  if reduction_axes is None:
728
- shape_size = backend_lib.op_shape_size(input_node)
724
+ shape_size = backend_lib.op_shape_size(input_node, 0)
729
725
  reduction_axes = list(range(shape_size - 1, -1, -1))
730
726
  axis_node = self.constant(reduction_axes).node # type: ignore
731
727
  return backend_lib.reduce_prod(self._mm, input_node, axis_node, keep_dims)
@@ -748,7 +744,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
748
744
  ctypes._Pointer: output node
749
745
  """
750
746
  if reduction_axes is None:
751
- shape_size = backend_lib.op_shape_size(input_node)
747
+ shape_size = backend_lib.op_shape_size(input_node, 0)
752
748
  reduction_axes = list(range(shape_size - 1, -1, -1))
753
749
  axis_node = self.constant(reduction_axes).node # type: ignore
754
750
  return backend_lib.reduce_sum(self._mm, input_node, axis_node, keep_dims)
@@ -768,7 +764,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
768
764
  ctypes._Pointer: output node
769
765
  """
770
766
  if axis < 0:
771
- shape_size = backend_lib.op_shape_size(input_node)
767
+ shape_size = backend_lib.op_shape_size(input_node, 0)
772
768
  axis = (axis + shape_size) % shape_size
773
769
  axis_node = self.constant(axis).node # type: ignore
774
770
  return backend_lib.normL2(self._mm, input_node, axis_node, eps)
@@ -791,14 +787,14 @@ class NNFactory(BaseNPUBackendWithPrefetch):
791
787
  Returns:
792
788
  ctypes._Pointer: output node
793
789
  """
794
- input_shape_size = backend_lib.op_shape_size(input_node)
790
+ input_shape_size = backend_lib.op_shape_size(input_node, 0)
795
791
  input_shape = [
796
- backend_lib.op_shape(input_node, i) for i in range(input_shape_size)
792
+ backend_lib.op_shape(input_node, i, 0) for i in range(input_shape_size)
797
793
  ]
798
794
  if isinstance(exponent, ctypes._Pointer):
799
- exponent_shape_size = backend_lib.op_shape_size(input_node)
795
+ exponent_shape_size = backend_lib.op_shape_size(input_node, 0)
800
796
  exponent_shape = [
801
- backend_lib.op_shape(exponent, i) for i in range(exponent_shape_size)
797
+ backend_lib.op_shape(exponent, i, 0) for i in range(exponent_shape_size)
802
798
  ]
803
799
  else:
804
800
  exponent_shape = list(exponent.shape)
@@ -807,6 +803,39 @@ class NNFactory(BaseNPUBackendWithPrefetch):
807
803
  # raise ValueError("Input tensor shapes are not equal")
808
804
 
809
805
  return backend_lib.power(self._mm, input_node, exponent)
806
+
807
+ @return_tensor
808
+ def variadic_split(
809
+ self,
810
+ input: ctypes._Pointer,
811
+ axis: int,
812
+ split_lengths: Sequence[int],
813
+ ) -> ctypes._Pointer:
814
+ """Generate an average pooling layer.
815
+
816
+ Args:
817
+ input (ctypes._Pointer): layer input node
818
+ axis (int): split axis
819
+ split_lengths (Sequence[int]): A list containing the sizes of each output tensor
820
+ along the split "axis". Size of "split_lengths" should be equal to the number of
821
+ outputs. The sum of split_lengths must match data.shape[axis]
822
+
823
+ Raises:
824
+ NotImplementedError: divisor_override is not supported
825
+
826
+ Returns:
827
+ ctypes._Pointer: output node
828
+ """
829
+
830
+ split_lens_ptr = np.array(split_lengths, dtype=np.uint32)
831
+
832
+ return backend_lib.variadic_split(
833
+ self._mm,
834
+ input,
835
+ axis,
836
+ split_lens_ptr,
837
+ split_lens_ptr.size,
838
+ )
810
839
 
811
840
  @return_tensor
812
841
  def avg_pooling(
@@ -962,7 +991,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
962
991
  value, attn_mask,
963
992
  is_causal)
964
993
 
965
- def get_tensor_shape(self, node):
994
+ def get_tensor_shape(self, node, output_idx=0):
966
995
  """Get tensor shape.
967
996
 
968
997
  Args:
@@ -971,10 +1000,10 @@ class NNFactory(BaseNPUBackendWithPrefetch):
971
1000
  Returns:
972
1001
  tuple[int]: tensor shape
973
1002
  """
974
- size = backend_lib.op_shape_size(node)
975
- return tuple([backend_lib.op_shape(node, idx) for idx in range(size)])
1003
+ size = backend_lib.op_shape_size(node, output_idx)
1004
+ return tuple([backend_lib.op_shape(node, idx, output_idx) for idx in range(size)])
976
1005
 
977
- def get_tensor_dtype(self, node):
1006
+ def get_tensor_dtype(self, node, output_idx=0):
978
1007
  """Get tensor dtype.
979
1008
 
980
1009
  Args:
@@ -986,7 +1015,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
986
1015
  Returns:
987
1016
  str: tensor dtype
988
1017
  """
989
- dtype_int = backend_lib.op_dtype(node)
1018
+ dtype_int = backend_lib.op_dtype(node, output_idx)
990
1019
 
991
1020
  if dtype_int == 2:
992
1021
  return np.bool
@@ -98,7 +98,7 @@ def get_supported_ops() -> List[SupportedOp]:
98
98
  inputs=3,
99
99
  parameters=[ctypes.c_int],
100
100
  ),
101
- SupportedOp(name="reshape", inputs=2),
101
+ SupportedOp(name="reshape", inputs=2, parameters=[ctypes.c_bool, ctypes.c_int]),
102
102
  SupportedOp(name="transpose", inputs=2),
103
103
  SupportedOp(name="squeeze", inputs=1),
104
104
  SupportedOp(name="unsqueeze", inputs=2),
@@ -137,5 +137,6 @@ def get_supported_ops() -> List[SupportedOp]:
137
137
  SupportedOp(name="power", inputs=2),
138
138
  SupportedOp(name="broadcast", inputs=2),
139
139
  SupportedOp(name="log_softmax", inputs=1, parameters=[ctypes.c_int64]),
140
+ SupportedOp(name="rotate_half", inputs=1),
140
141
  ]
141
142
  return supported_ops
@@ -16,14 +16,83 @@ from intel_npu_acceleration_library.dtypes import (
16
16
  int32,
17
17
  int64,
18
18
  NPUDtype,
19
+ get_backend_dtype,
19
20
  )
20
21
  from dataclasses import dataclass
21
22
  import functools
23
+ from math import prod
22
24
  import numpy as np
23
25
  import ctypes
24
26
  import torch
25
27
 
26
28
 
29
+ class RemoteTensor(torch.Tensor):
30
+ """
31
+ Represent a remote tensor object.
32
+ Attrs:
33
+ _remote_tensor (ctypes._Pointer): The pointer to the underlying remote tensor.
34
+ Methods:
35
+ from_torch(x: torch.Tensor): Create a remote tensor from a torch tensor.
36
+ """
37
+
38
+ _remote_tensor = None
39
+
40
+ @staticmethod
41
+ def __new__(cls, x: Any, remote_tensor: ctypes._Pointer, *args: Any, **kwargs: Any):
42
+ """
43
+ Create a new remote tensor object.
44
+ Args:
45
+ x (Any): tensor input
46
+ remote_tensor (ctypes._Pointer): remote tensor pointer
47
+ args (Any): additional arguments
48
+ kwargs (Any): additional keyword arguments
49
+ Returns:
50
+ RemoteTensor: a RemoteTensor object
51
+ """
52
+ return super().__new__(cls, x, *args, **kwargs)
53
+
54
+ def __init__(self, x: Any, remote_tensor: ctypes._Pointer):
55
+ """
56
+ Initialize the remote tensor object.
57
+ Args:
58
+ x (Any): tensor input
59
+ remote_tensor (ctypes._Pointer): remote tensor pointer
60
+ """
61
+ self._remote_tensor = remote_tensor
62
+
63
+ # def __del__(self):
64
+ # if self._remote_tensor and backend_lib:
65
+ # backend_lib.del_remote_tensor(self._remote_tensor)
66
+
67
+ @staticmethod
68
+ def from_torch(x: torch.Tensor) -> "RemoteTensor":
69
+ """
70
+ Create a remote tensor from a torch tensor.
71
+ Args:
72
+ x (torch.Tensor): The torch tensor.
73
+ Returns:
74
+ RemoteTensor: The remote tensor.
75
+ """
76
+ shape_arr = np.array(x.shape, dtype=np.uint32)
77
+ dtype_str = get_backend_dtype(x.dtype)
78
+ p = ctypes.cast(x.data_ptr(), ctypes.c_void_p)
79
+
80
+ rt = backend_lib.to_npu(shape_arr.size, shape_arr, dtype_str, p)
81
+
82
+ pointer = ctypes.cast(
83
+ backend_lib.remote_tensor_data(rt),
84
+ ctypes.POINTER(ctypes.c_uint8),
85
+ )
86
+
87
+ arr = (pointer._type_ * prod(x.shape) * x.element_size()).from_address(
88
+ ctypes.addressof(pointer.contents)
89
+ )
90
+
91
+ pt_tensor = torch.frombuffer(arr, dtype=x.dtype).view(*x.shape)
92
+
93
+ return RemoteTensor(pt_tensor, rt)
94
+
95
+
27
96
  @dataclass
28
97
  class Tensor:
29
98
  """
@@ -88,6 +157,7 @@ class Tensor:
88
157
 
89
158
  factory: "NNFactory" # type: ignore # noqa: F821
90
159
  node: ctypes._Pointer
160
+ output_idx: int
91
161
 
92
162
  @property
93
163
  def shape(self) -> Sequence[int]:
@@ -97,8 +167,8 @@ class Tensor:
97
167
  Returns:
98
168
  Sequence[int]: The shape of the tensor.
99
169
  """
100
- shape_size = backend_lib.op_shape_size(self.node)
101
- return [backend_lib.op_shape(self.node, i) for i in range(shape_size)]
170
+ shape_size = backend_lib.op_shape_size(self.node, self.output_idx)
171
+ return [backend_lib.op_shape(self.node, i, self.output_idx) for i in range(shape_size)]
102
172
 
103
173
  @property
104
174
  def dtype(self) -> NPUDtype:
@@ -108,7 +178,7 @@ class Tensor:
108
178
  Returns:
109
179
  type: The data type of the tensor.
110
180
  """
111
- dtype_int = backend_lib.op_dtype(self.node)
181
+ dtype_int = backend_lib.op_dtype(self.node, self.output_idx)
112
182
 
113
183
  if dtype_int == 2:
114
184
  return np.bool
@@ -4,6 +4,7 @@
4
4
  #
5
5
 
6
6
  from intel_npu_acceleration_library.nn.module import convert_to_npu_module
7
+ from intel_npu_acceleration_library.backend.tensor import RemoteTensor
7
8
  from torch.overrides import TorchFunctionMode
8
9
  from functools import lru_cache
9
10
  from typing import Any, MutableMapping
@@ -165,8 +166,7 @@ def to(super_fn: Any, self: Any, *args: Any, **kwargs: Any):
165
166
  """
166
167
  npu_device, args, kwargs = parse_to_arguments(*args, **kwargs)
167
168
  if npu_device:
168
- # None for now, once the remote tensor feature lands, it can be converted to a remote tensor
169
- pass
169
+ return super_fn(RemoteTensor.from_torch(self), *args, **kwargs)
170
170
  return super_fn(self, *args, **kwargs)
171
171
 
172
172
 
@@ -7,7 +7,7 @@ from dataclasses import dataclass
7
7
  from typing import Union
8
8
  import numpy as np
9
9
  import torch
10
-
10
+ import ctypes
11
11
 
12
12
  @dataclass(frozen=True)
13
13
  class NPUDtype:
@@ -81,6 +81,39 @@ class NPUDtype:
81
81
  return self.name
82
82
 
83
83
 
84
+ def get_backend_dtype(dtype) -> ctypes.c_char_p:
85
+ """Get the string representation of the dtype.
86
+ Args:
87
+ dtype: numpy dtype
88
+ Raises:
89
+ RuntimeError: Unsupported datatype
90
+ Returns:
91
+ ctypes.c_char_p: string representation of the dtype
92
+ """
93
+ if dtype in [np.int8, torch.int8]:
94
+ str_dtype = "int8"
95
+ elif dtype in [np.uint8, int4, torch.uint8]:
96
+ # u8 represents packed i4 dtypes
97
+ str_dtype = "int4"
98
+ elif dtype in [np.int16, torch.int16]:
99
+ str_dtype = "int16"
100
+ elif dtype in [np.int32, torch.int32]:
101
+ str_dtype = "int32"
102
+ elif dtype in [np.int64, torch.int64]:
103
+ str_dtype = "int64"
104
+ elif dtype in [np.float16, torch.float16]:
105
+ str_dtype = "float16"
106
+ elif dtype in [np.float32, torch.float32]:
107
+ str_dtype = "float32"
108
+ elif dtype in [np.float64, torch.float64]:
109
+ str_dtype = "float64"
110
+ elif dtype in [bfloat16, torch.bfloat16]:
111
+ str_dtype = "bfloat16"
112
+ else:
113
+ raise RuntimeError(f"DType is not supported {dtype}")
114
+ return ctypes.c_char_p(str_dtype.encode())
115
+
116
+
84
117
  float16 = NPUDtype(
85
118
  "fp16",
86
119
  16,
@@ -18,3 +18,4 @@ from openvino._pyopenvino._offline_transformations import compress_model_transfo
18
18
  from openvino._pyopenvino._offline_transformations import compress_quantize_weights_transformation
19
19
  from openvino._pyopenvino._offline_transformations import convert_sequence_to_tensor_iterator_transformation
20
20
  from openvino._pyopenvino._offline_transformations import paged_attention_transformation
21
+ from openvino._pyopenvino._offline_transformations import stateful_to_stateless_transformation
@@ -0,0 +1,15 @@
1
+ # Copyright (C) 2018-2024 Intel Corporation
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """
5
+ Package: openvino
6
+ Low level wrappers for the FrontEnd C++ API.
7
+ """
8
+
9
+ # flake8: noqa
10
+
11
+ try:
12
+ from openvino.frontend.jax.py_jax_frontend import _FrontEndJaxDecoder as Decoder
13
+ except ImportError as err:
14
+ raise ImportError("OpenVINO JAX frontend is not available, please make sure the frontend is built."
15
+ "{}".format(err))