onnxruntime_extensions 0.11.0__cp39-cp39-win_amd64.whl → 0.13.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,6 @@ This enables more flexibility and control over model execution, thus expanding t
10
10
 
11
11
  __author__ = "Microsoft"
12
12
 
13
-
14
13
  from ._version import __version__
15
14
  from ._ocos import get_library_path
16
15
  from ._ocos import Opdef, PyCustomOpDef
@@ -66,6 +65,10 @@ if _lib_only:
66
65
  gen_processing_models = _unimplemented
67
66
  OrtPyFunction = _unimplemented
68
67
  ort_inference = _unimplemented
68
+ PyOrtFunction = _unimplemented
69
+ optimize_model = _unimplemented
70
+ make_onnx_model = _unimplemented
71
+ ONNXRuntimeError = _unimplemented
69
72
 
70
73
  else:
71
74
  __all__ += _offline_api
@@ -48,8 +48,9 @@ class HFTokenizerConverter(CustomOpConverter):
48
48
  model_dir = hf_tokenizer.name_or_path
49
49
  else:
50
50
  model_dir = os.path.dirname(vocab_file)
51
- tokenizer_json = json.load(
52
- open(os.path.join(model_dir, tokenizer_file), "r", encoding="utf-8"))
51
+ f = open(os.path.join(model_dir, tokenizer_file), "r", encoding="utf-8")
52
+ tokenizer_json = json.load(f)
53
+ f.close()
53
54
  # get vocab object from json file
54
55
  vocab = tokenizer_json.get("model", {}).get("vocab", {})
55
56
  sorted_merges = tokenizer_json.get("model", {}).get("merges", [])
@@ -17,7 +17,7 @@ from onnx import numpy_helper
17
17
  from ._ortapi2 import make_onnx_model
18
18
  from ._cuops import SingleOpGraph
19
19
  from ._hf_cvt import HFTokenizerConverter
20
- from .util import remove_unused_initializers
20
+ from .util import remove_unused_initializers, mel_filterbank
21
21
 
22
22
 
23
23
  class _WhisperHParams:
@@ -30,53 +30,15 @@ class _WhisperHParams:
30
30
  N_FRAMES = N_SAMPLES // HOP_LENGTH
31
31
 
32
32
 
33
- def _mel_filterbank(
34
- n_fft: int, n_mels: int = 80, sr=16000, min_mel=0, max_mel=45.245640471924965, dtype=np.float32):
35
- """
36
- Compute a Mel-filterbank. The filters are stored in the rows, the columns,
37
- and it is Slaney normalized mel-scale filterbank.
38
- """
39
- fbank = np.zeros((n_mels, n_fft // 2 + 1), dtype=dtype)
40
-
41
- # the centers of the frequency bins for the DFT
42
- freq_bins = np.fft.rfftfreq(n=n_fft, d=1.0 / sr)
43
-
44
- mel = np.linspace(min_mel, max_mel, n_mels + 2)
45
- # Fill in the linear scale
46
- f_min = 0.0
47
- f_sp = 200.0 / 3
48
- freqs = f_min + f_sp * mel
49
-
50
- # And now the nonlinear scale
51
- min_log_hz = 1000.0 # beginning of log region (Hz)
52
- min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels)
53
- logstep = np.log(6.4) / 27.0 # step size for log region
54
-
55
- log_t = mel >= min_log_mel
56
- freqs[log_t] = min_log_hz * np.exp(logstep * (mel[log_t] - min_log_mel))
57
- mel_bins = freqs
58
-
59
- mel_spacing = np.diff(mel_bins)
60
-
61
- ramps = mel_bins.reshape(-1, 1) - freq_bins.reshape(1, -1)
62
- for i in range(n_mels):
63
- left = -ramps[i] / mel_spacing[i]
64
- right = ramps[i + 2] / mel_spacing[i + 1]
65
-
66
- # intersect them with each other and zero
67
- fbank[i] = np.maximum(0, np.minimum(left, right))
68
-
69
- energy_norm = 2.0 / (mel_bins[2: n_mels + 2] - mel_bins[:n_mels])
70
- fbank *= energy_norm[:, np.newaxis]
71
- return fbank
72
-
73
-
74
33
  class CustomOpStftNorm(torch.autograd.Function):
75
34
  @staticmethod
76
35
  def symbolic(g, self, n_fft, hop_length, window):
77
- t_n_fft = g.op('Constant', value_t=torch.tensor(n_fft, dtype=torch.int64))
78
- t_hop_length = g.op('Constant', value_t=torch.tensor(hop_length, dtype=torch.int64))
79
- t_frame_size = g.op('Constant', value_t=torch.tensor(n_fft, dtype=torch.int64))
36
+ t_n_fft = g.op('Constant', value_t=torch.tensor(
37
+ n_fft, dtype=torch.int64))
38
+ t_hop_length = g.op('Constant', value_t=torch.tensor(
39
+ hop_length, dtype=torch.int64))
40
+ t_frame_size = g.op(
41
+ 'Constant', value_t=torch.tensor(n_fft, dtype=torch.int64))
80
42
  return g.op("ai.onnx.contrib::StftNorm", self, t_n_fft, t_hop_length, window, t_frame_size)
81
43
 
82
44
  @staticmethod
@@ -97,7 +59,7 @@ class WhisperPrePipeline(torch.nn.Module):
97
59
  self.n_fft = n_fft
98
60
  self.window = torch.hann_window(n_fft)
99
61
  self.mel_filters = torch.from_numpy(
100
- _mel_filterbank(sr=sr, n_fft=n_fft, n_mels=n_mels))
62
+ mel_filterbank(sr=sr, n_fft=n_fft, n_mels=n_mels))
101
63
 
102
64
  def forward(self, audio_pcm: torch.Tensor):
103
65
  stft_norm = CustomOpStftNorm.apply(audio_pcm,
@@ -112,7 +74,8 @@ class WhisperPrePipeline(torch.nn.Module):
112
74
  spec_shape = log_spec.shape
113
75
  padding_spec = torch.ones(spec_shape[0],
114
76
  spec_shape[1],
115
- self.n_samples // self.hop_length - spec_shape[2],
77
+ self.n_samples // self.hop_length -
78
+ spec_shape[2],
116
79
  dtype=torch.float)
117
80
  padding_spec *= spec_min
118
81
  log_spec = torch.cat((log_spec, padding_spec), dim=2)
@@ -165,15 +128,20 @@ def _to_onnx_stft(onnx_model, n_fft):
165
128
  make_node('Slice', inputs=['transpose_1_output_0', 'const_18_output_0', 'const_minus_1_output_0',
166
129
  'const_17_output_0', 'const_20_output_0'], outputs=['slice_1_output_0'],
167
130
  name='slice_1'),
168
- make_node('Constant', inputs=[], outputs=['const0_output_0'], name='const0', value_int=0),
169
- make_node('Constant', inputs=[], outputs=['const1_output_0'], name='const1', value_int=1),
131
+ make_node('Constant', inputs=[], outputs=[
132
+ 'const0_output_0'], name='const0', value_int=0),
133
+ make_node('Constant', inputs=[], outputs=[
134
+ 'const1_output_0'], name='const1', value_int=1),
170
135
  make_node('Gather', inputs=['slice_1_output_0', 'const0_output_0'], outputs=['gather_4_output_0'],
171
136
  name='gather_4', axis=3),
172
137
  make_node('Gather', inputs=['slice_1_output_0', 'const1_output_0'], outputs=['gather_5_output_0'],
173
138
  name='gather_5', axis=3),
174
- make_node('Mul', inputs=['gather_4_output_0', 'gather_4_output_0'], outputs=['mul_output_0'], name='mul0'),
175
- make_node('Mul', inputs=['gather_5_output_0', 'gather_5_output_0'], outputs=['mul_1_output_0'], name='mul1'),
176
- make_node('Add', inputs=['mul_output_0', 'mul_1_output_0'], outputs=[stft_norm_node.output[0]], name='add0'),
139
+ make_node('Mul', inputs=['gather_4_output_0', 'gather_4_output_0'], outputs=[
140
+ 'mul_output_0'], name='mul0'),
141
+ make_node('Mul', inputs=['gather_5_output_0', 'gather_5_output_0'], outputs=[
142
+ 'mul_1_output_0'], name='mul1'),
143
+ make_node('Add', inputs=['mul_output_0', 'mul_1_output_0'], outputs=[
144
+ stft_norm_node.output[0]], name='add0'),
177
145
  ]
178
146
  new_stft_nodes.extend(onnx_model.graph.node[:node_idx])
179
147
  new_stft_nodes.extend(replaced_nodes)
@@ -253,9 +221,11 @@ class WhisperDataProcGraph:
253
221
  del g.node[:]
254
222
  g.node.extend(nodes)
255
223
 
256
- inputs = [onnx.helper.make_tensor_value_info("sequences", onnx.TensorProto.INT32, ['N', 'seq_len', 'ids'])]
224
+ inputs = [onnx.helper.make_tensor_value_info(
225
+ "sequences", onnx.TensorProto.INT32, ['N', 'seq_len', 'ids'])]
257
226
  del g.input[:]
258
227
  g.input.extend(inputs)
259
- g.output[0].type.CopyFrom(onnx.helper.make_tensor_type_proto(onnx.TensorProto.STRING, ['N', 'text']))
228
+ g.output[0].type.CopyFrom(onnx.helper.make_tensor_type_proto(
229
+ onnx.TensorProto.STRING, ['N', 'text']))
260
230
 
261
231
  return make_onnx_model(g, opset_version=self.opset_version)
@@ -1,2 +1,2 @@
1
1
  # Generated by setup.py, DON'T MANUALLY UPDATE IT!
2
- __version__ = "0.11.0"
2
+ __version__ = "0.13.0"
@@ -0,0 +1,81 @@
1
+ # Copyright (c) Microsoft Corporation. All rights reserved.
2
+ # Licensed under the MIT License. See License.txt in the project root for
3
+ # license information.
4
+ ###############################################################################
5
+
6
+ import os
7
+ from . import _extensions_pydll as _C
8
+ if not hasattr(_C, "delete_object"):
9
+ raise ImportError(
10
+ "onnxruntime_extensions is not built with pre-processing C API\n"
11
+ "To enable it, please build the package with --ortx-user-option=pp_api")
12
+
13
+ create_processor = _C.create_processor
14
+ load_images = _C.load_images
15
+ image_pre_process = _C.image_pre_process
16
+ tensor_result_get_at = _C.tensor_result_get_at
17
+
18
+ create_tokenizer = _C.create_tokenizer
19
+ batch_tokenize = _C.batch_tokenize
20
+ batch_detokenize = _C.batch_detokenize
21
+
22
+ delete_object = _C.delete_object
23
+
24
+
25
+ class Tokenizer:
26
+ def __init__(self, tokenizer_dir):
27
+ self.tokenizer = None
28
+ if os.path.isdir(tokenizer_dir):
29
+ self.tokenizer = create_tokenizer(tokenizer_dir)
30
+ else:
31
+ try:
32
+ from transformers.utils import cached_file
33
+ resolved_full_file = cached_file(
34
+ tokenizer_dir, "tokenizer.json")
35
+ resolved_config_file = cached_file(
36
+ tokenizer_dir, "tokenizer_config.json")
37
+ except ImportError:
38
+ raise ValueError(
39
+ f"Directory '{tokenizer_dir}' not found and transformers is not available")
40
+ if not os.path.exists(resolved_full_file):
41
+ raise FileNotFoundError(
42
+ f"Downloaded HF file '{resolved_full_file}' cannot be found")
43
+ if (os.path.dirname(resolved_full_file) != os.path.dirname(resolved_config_file)):
44
+ raise FileNotFoundError(
45
+ f"Downloaded HF files '{resolved_full_file}' "
46
+ f"and '{resolved_config_file}' are not in the same directory")
47
+
48
+ tokenizer_dir = os.path.dirname(resolved_full_file)
49
+ self.tokenizer = create_tokenizer(tokenizer_dir)
50
+
51
+ def tokenize(self, text):
52
+ return batch_tokenize(self.tokenizer, [text])[0]
53
+
54
+ def detokenize(self, tokens):
55
+ return batch_detokenize(self.tokenizer, [tokens])[0]
56
+
57
+ def __del__(self):
58
+ if delete_object and self.tokenizer:
59
+ delete_object(self.tokenizer)
60
+ self.tokenizer = None
61
+
62
+
63
+ class ImageProcessor:
64
+ def __init__(self, processor_json):
65
+ self.processor = create_processor(processor_json)
66
+
67
+ def pre_process(self, images):
68
+ if isinstance(images, str):
69
+ images = [images]
70
+ if isinstance(images, list):
71
+ images = load_images(images)
72
+ return image_pre_process(self.processor, images)
73
+
74
+ @staticmethod
75
+ def to_numpy(result):
76
+ return tensor_result_get_at(result, 0)
77
+
78
+ def __del__(self):
79
+ if delete_object and self.processor:
80
+ delete_object(self.processor)
81
+ self.processor = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: onnxruntime_extensions
3
- Version: 0.11.0
3
+ Version: 0.13.0
4
4
  Summary: ONNXRuntime Extensions
5
5
  Home-page: https://github.com/microsoft/onnxruntime-extensions
6
6
  Author: Microsoft Corporation
@@ -25,29 +25,17 @@ License-File: LICENSE
25
25
 
26
26
  ## What's ONNXRuntime-Extensions
27
27
 
28
- Introduction: ONNXRuntime-Extensions is a library that extends the capability of the ONNX models and inference with ONNX Runtime, via ONNX Runtime Custom Operator ABIs. It includes a set of [ONNX Runtime Custom Operator](https://onnxruntime.ai/docs/reference/operators/add-custom-op.html) to support the common pre- and post-processing operators for vision, text, and nlp models. And it supports multiple languages and platforms, like Python on Windows/Linux/macOS, some mobile platforms like Android and iOS, and Web-Assembly etc. The basic workflow is to enhance a ONNX model firstly and then do the model inference with ONNX Runtime and ONNXRuntime-Extensions package.
28
+ Introduction: ONNXRuntime-Extensions is a C/C++ library that extends the capability of the ONNX models and inference with ONNX Runtime, via ONNX Runtime Custom Operator ABIs. It includes a set of [ONNX Runtime Custom Operator](https://onnxruntime.ai/docs/reference/operators/add-custom-op.html) to support the common pre- and post-processing operators for vision, text, and nlp models. And it supports multiple languages and platforms, like Python on Windows/Linux/macOS, some mobile platforms like Android and iOS, and Web-Assembly etc. The basic workflow is to enhance a ONNX model firstly and then do the model inference with ONNX Runtime and ONNXRuntime-Extensions package.
29
29
 
30
30
 
31
31
  ## Quickstart
32
+ The library can be utilized as either a C/C++ library or other advance language packages like Python, Java, C#, etc. To build it as a shared library, you can use the `build.bat` or `build.sh` scripts located in the root folder. The CMake build definition is available in the `CMakeLists.txt` file and can be modified by appending options to `build.bat` or `build.sh`, such as `build.bat -DOCOS_BUILD_SHARED_LIB=OFF`. For more details, please refer to the [C API documentation](./docs/c_api.md).
32
33
 
33
34
  ### **Python installation**
34
35
  ```bash
35
36
  pip install onnxruntime-extensions
36
37
  ````
37
-
38
-
39
- ### **Nightly Build**
40
-
41
- #### <strong>on Windows</strong>
42
- ```cmd
43
- pip install --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ onnxruntime-extensions
44
- ```
45
- Please ensure that you have met the prerequisites of onnxruntime-extensions (e.g., onnx and onnxruntime) in your Python environment.
46
- #### <strong>on Linux/macOS</strong>
47
- Please make sure the compiler toolkit like gcc(later than g++ 8.0) or clang are installed before the following command
48
- ```bash
49
- python -m pip install git+https://github.com/microsoft/onnxruntime-extensions.git
50
- ```
38
+ The nightly build is also available for the latest features, please refer to [nightly build](./docs/development.md#nightly-build)
51
39
 
52
40
 
53
41
  ## Usage
@@ -1,14 +1,15 @@
1
- onnxruntime_extensions/__init__.py,sha256=V21JqGUbsnBjatRb9Z83x7F8kjlDtw9pXlIrt4wEUbw,2239
1
+ onnxruntime_extensions/__init__.py,sha256=GMnMIHJ-uqvJGPn5fpCZOi7OG16kFVpfOTTO88kYJWY,2387
2
2
  onnxruntime_extensions/_cuops.py,sha256=SUD2NhEWHeMem8ylCtCGBKutSuZQs4WMj1ke65-52vA,16193
3
- onnxruntime_extensions/_extensions_pydll.cp39-win_amd64.pyd,sha256=yDFhYAzduNnCSw2fOYuHGs0xVjzSzSlr61C-H_cCRxE,5211136
3
+ onnxruntime_extensions/_extensions_pydll.cp39-win_amd64.pyd,sha256=E54SOjYcuHaJ9UBjtlYQi4QhDtHlLVIC7r4F4WiAJcQ,3374080
4
4
  onnxruntime_extensions/_extensions_pydll.pyi,sha256=mYXkqNaCgAbs161RDKgDjxIX9vWdYdVPDC-0X9cieco,1070
5
- onnxruntime_extensions/_hf_cvt.py,sha256=HJwpcdc02aYV9qgAYkrtSYbkargYi0xTqf7Ye60D84A,14062
5
+ onnxruntime_extensions/_hf_cvt.py,sha256=3RDEr4uga_FYBReSDgqLqvj_2-7HgVOk073BRT8lK_E,14082
6
6
  onnxruntime_extensions/_ocos.py,sha256=OlDOlCH_vWFOBkjbp6Pujgw6rgk8Fd3_2Mi5ev1eeS0,4193
7
7
  onnxruntime_extensions/_ortapi2.py,sha256=Tfrf9fQMQ0e7Wa4R8s4SHdwMNBdmj33wH3y5vMkVVQE,9951
8
- onnxruntime_extensions/_torch_cvt.py,sha256=1EQI7t_Bz6MvGYwQ15O1bIanPImacKSS0A_JtZrAN58,11403
9
- onnxruntime_extensions/_version.py,sha256=NhimnAwvZjvrFikdniA7jaWFoc8VEjT3TZb9r4Rfhb0,76
8
+ onnxruntime_extensions/_torch_cvt.py,sha256=hGOiw24QuFpK_3CLjg8Fs2GD_cCdM049xcJxkHVRbAk,10185
9
+ onnxruntime_extensions/_version.py,sha256=uFV2Hf2pKZgA12fk8O9oCxhl_fUheVDYuvrK3A-Vjnc,76
10
10
  onnxruntime_extensions/cmd.py,sha256=eIiNNY0ohbUCPgmr9RwOfi0Gzw7nWL17i625L-ZKezI,2428
11
11
  onnxruntime_extensions/cvt.py,sha256=XMz0CZXBJQ9IwnixjzJwz-utKyu9HREIEUCviZg6v8A,3977
12
+ onnxruntime_extensions/pp_api.py,sha256=MpW3frODcWXOmYaoTQiYWiM807rC8GjlWgpIYc-CDy8,3051
12
13
  onnxruntime_extensions/util.py,sha256=KxNFY0-5CG1i9HADcCc4V33PNukTO46Os_KIL8pj-l8,7394
13
14
  onnxruntime_extensions/onnxprocess/__init__.py,sha256=BnveHXnu2nTQNbCLeZujZgZwO9A3yWFbQGTDthCFbIc,534
14
15
  onnxruntime_extensions/onnxprocess/_builder.py,sha256=L_afKeE7Wc4mWJ47eVXQ2stvmal_37QVTQZgKmt0ZK8,1844
@@ -35,8 +36,8 @@ onnxruntime_extensions/tools/pre_post_processing/steps/__init__.py,sha256=pdVRZB
35
36
  onnxruntime_extensions/tools/pre_post_processing/steps/general.py,sha256=fF_XVFSKOCu482Sqjp-nVPbs-ZVGpPal2ekbO1gUO_4,13781
36
37
  onnxruntime_extensions/tools/pre_post_processing/steps/nlp.py,sha256=ZCxRNxqfANplxCe0I-6BfHziM1jDYJsNQKbHdM3Y1I0,15173
37
38
  onnxruntime_extensions/tools/pre_post_processing/steps/vision.py,sha256=BM6CGylOSu4l6UarPfW0I2tgkJDa1Q-gYz__CxZle-k,53183
38
- onnxruntime_extensions-0.11.0.dist-info/LICENSE,sha256=mQaUD2Gx8LUz-n2ZuvVReLKAj74RPqUd-_rYVyzNXys,1162
39
- onnxruntime_extensions-0.11.0.dist-info/METADATA,sha256=862ZX9u4FpIcC3g5P8oACkHj98sfgEBwq6H28mpmRb0,4452
40
- onnxruntime_extensions-0.11.0.dist-info/WHEEL,sha256=Z6c-bE0pUM47a70GvqO_SvH_XXU0lm62gEAKtoNJ08A,100
41
- onnxruntime_extensions-0.11.0.dist-info/top_level.txt,sha256=XyAgQDKyXsf6_0MJb58kRdHwigpTn7A7kl9diBEjs8M,23
42
- onnxruntime_extensions-0.11.0.dist-info/RECORD,,
39
+ onnxruntime_extensions-0.13.0.dist-info/LICENSE,sha256=mQaUD2Gx8LUz-n2ZuvVReLKAj74RPqUd-_rYVyzNXys,1162
40
+ onnxruntime_extensions-0.13.0.dist-info/METADATA,sha256=MmpoFbjh5Etph2BR7msz-Nrg6QNh56wtGqL46NUSiaE,4483
41
+ onnxruntime_extensions-0.13.0.dist-info/WHEEL,sha256=UAgGEIlEKluxCX20ppULf3M0rfG_1DUMdncOI_parX8,99
42
+ onnxruntime_extensions-0.13.0.dist-info/top_level.txt,sha256=XyAgQDKyXsf6_0MJb58kRdHwigpTn7A7kl9diBEjs8M,23
43
+ onnxruntime_extensions-0.13.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp39-cp39-win_amd64
5
5