onnxruntime_extensions 0.14.0__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnxruntime_extensions/__init__.py +82 -0
- onnxruntime_extensions/_cuops.py +564 -0
- onnxruntime_extensions/_extensions_pydll.cpython-313-darwin.so +0 -0
- onnxruntime_extensions/_extensions_pydll.pyi +45 -0
- onnxruntime_extensions/_hf_cvt.py +331 -0
- onnxruntime_extensions/_ocos.py +133 -0
- onnxruntime_extensions/_ortapi2.py +274 -0
- onnxruntime_extensions/_torch_cvt.py +231 -0
- onnxruntime_extensions/_version.py +2 -0
- onnxruntime_extensions/cmd.py +66 -0
- onnxruntime_extensions/cvt.py +306 -0
- onnxruntime_extensions/onnxprocess/__init__.py +12 -0
- onnxruntime_extensions/onnxprocess/_builder.py +53 -0
- onnxruntime_extensions/onnxprocess/_onnx_ops.py +1507 -0
- onnxruntime_extensions/onnxprocess/_session.py +355 -0
- onnxruntime_extensions/onnxprocess/_tensor.py +628 -0
- onnxruntime_extensions/onnxprocess/torch_wrapper.py +31 -0
- onnxruntime_extensions/pnp/__init__.py +13 -0
- onnxruntime_extensions/pnp/_base.py +124 -0
- onnxruntime_extensions/pnp/_imagenet.py +65 -0
- onnxruntime_extensions/pnp/_nlp.py +148 -0
- onnxruntime_extensions/pnp/_onnx_ops.py +1544 -0
- onnxruntime_extensions/pnp/_torchext.py +310 -0
- onnxruntime_extensions/pnp/_unifier.py +45 -0
- onnxruntime_extensions/pnp/_utils.py +302 -0
- onnxruntime_extensions/pp_api.py +83 -0
- onnxruntime_extensions/tools/__init__.py +0 -0
- onnxruntime_extensions/tools/add_HuggingFace_CLIPImageProcessor_to_model.py +171 -0
- onnxruntime_extensions/tools/add_pre_post_processing_to_model.py +535 -0
- onnxruntime_extensions/tools/pre_post_processing/__init__.py +4 -0
- onnxruntime_extensions/tools/pre_post_processing/pre_post_processor.py +395 -0
- onnxruntime_extensions/tools/pre_post_processing/step.py +227 -0
- onnxruntime_extensions/tools/pre_post_processing/steps/__init__.py +6 -0
- onnxruntime_extensions/tools/pre_post_processing/steps/general.py +366 -0
- onnxruntime_extensions/tools/pre_post_processing/steps/nlp.py +344 -0
- onnxruntime_extensions/tools/pre_post_processing/steps/vision.py +1157 -0
- onnxruntime_extensions/tools/pre_post_processing/utils.py +139 -0
- onnxruntime_extensions/util.py +186 -0
- onnxruntime_extensions-0.14.0.dist-info/LICENSE +21 -0
- onnxruntime_extensions-0.14.0.dist-info/METADATA +102 -0
- onnxruntime_extensions-0.14.0.dist-info/RECORD +43 -0
- onnxruntime_extensions-0.14.0.dist-info/WHEEL +6 -0
- onnxruntime_extensions-0.14.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import onnx
|
|
3
|
+
import torch
|
|
4
|
+
from typing import Any
|
|
5
|
+
from onnx.onnx_pb import TensorProto
|
|
6
|
+
from torch.onnx import TrainingMode, export as _export
|
|
7
|
+
|
|
8
|
+
from ._onnx_ops import OPSET_TO_IR_VERSION
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _export_f(model, *args,
|
|
12
|
+
opset_version=None,
|
|
13
|
+
output_path=None,
|
|
14
|
+
output_seq=0,
|
|
15
|
+
export_params=True,
|
|
16
|
+
verbose=False,
|
|
17
|
+
input_names=None,
|
|
18
|
+
output_names=None,
|
|
19
|
+
operator_export_type=None,
|
|
20
|
+
do_constant_folding=True,
|
|
21
|
+
dynamic_axes=None,
|
|
22
|
+
keep_initializers_as_inputs=None,
|
|
23
|
+
custom_opsets=None):
|
|
24
|
+
|
|
25
|
+
with io.BytesIO() as f:
|
|
26
|
+
_export(model, args, f,
|
|
27
|
+
export_params=export_params, verbose=verbose,
|
|
28
|
+
training=TrainingMode.EVAL, input_names=input_names,
|
|
29
|
+
output_names=output_names,
|
|
30
|
+
operator_export_type=operator_export_type, opset_version=opset_version,
|
|
31
|
+
do_constant_folding=do_constant_folding,
|
|
32
|
+
dynamic_axes=dynamic_axes,
|
|
33
|
+
keep_initializers_as_inputs=keep_initializers_as_inputs,
|
|
34
|
+
custom_opsets=custom_opsets)
|
|
35
|
+
|
|
36
|
+
mdl = onnx.load_model(io.BytesIO(f.getvalue()))
|
|
37
|
+
for ops in mdl.opset_import:
|
|
38
|
+
if ops.domain in ('', 'ai.onnx'):
|
|
39
|
+
mdl.ir_version = OPSET_TO_IR_VERSION[ops.version]
|
|
40
|
+
if output_path is not None:
|
|
41
|
+
if output_seq > 0:
|
|
42
|
+
output_path.replace('.onnx', '.{}.onnx'.format(output_seq))
|
|
43
|
+
onnx.save_model(mdl, output_path)
|
|
44
|
+
return mdl
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class _ProcessingModule:
|
|
48
|
+
|
|
49
|
+
def __init__(self):
|
|
50
|
+
super(_ProcessingModule, self).__init__()
|
|
51
|
+
_ProcessingModule.register_customops()
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
@torch.jit.unused
|
|
55
|
+
def _argsort(g, x, dim, descending):
|
|
56
|
+
return g.op('ai.onnx.contrib::ArgSort', x, dim)
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
@torch.jit.unused
|
|
60
|
+
def register_customops(cls):
|
|
61
|
+
if hasattr(cls, 'loaded'):
|
|
62
|
+
return True
|
|
63
|
+
|
|
64
|
+
torch.onnx.register_custom_op_symbolic('::argsort', cls._argsort, 1)
|
|
65
|
+
# ... more
|
|
66
|
+
|
|
67
|
+
cls.loaded = True
|
|
68
|
+
return True
|
|
69
|
+
|
|
70
|
+
@torch.jit.unused
|
|
71
|
+
def export(self, *args, opset_version=None, script_mode=False, output_path=None, output_seq=0, **kwargs):
|
|
72
|
+
if opset_version is None:
|
|
73
|
+
raise RuntimeError('No opset_version found in the kwargs.')
|
|
74
|
+
mod = self
|
|
75
|
+
if script_mode and not isinstance(mod, torch.jit.ScriptModule):
|
|
76
|
+
mod = torch.jit.script(mod)
|
|
77
|
+
|
|
78
|
+
return _export_f(mod,
|
|
79
|
+
*args,
|
|
80
|
+
opset_version=opset_version,
|
|
81
|
+
output_path=output_path,
|
|
82
|
+
output_seq=output_seq, **kwargs)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ProcessingTracedModule(torch.nn.Module, _ProcessingModule):
|
|
86
|
+
def __init__(self, func_obj=None):
|
|
87
|
+
super().__init__()
|
|
88
|
+
self.func_obj = func_obj
|
|
89
|
+
|
|
90
|
+
def forward(self, *args):
|
|
91
|
+
assert self.func_obj is not None, "No forward method found."
|
|
92
|
+
return self.func_obj(*args)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class ProcessingScriptModule(torch.nn.Module, _ProcessingModule):
|
|
96
|
+
|
|
97
|
+
@torch.jit.unused
|
|
98
|
+
def export(self, *args, **kwargs):
|
|
99
|
+
return super().export(*args, script_mode=True, **kwargs)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class CustomFunction(torch.autograd.Function):
|
|
103
|
+
@staticmethod
|
|
104
|
+
def jvp(ctx: Any, *grad_inputs: Any) -> Any:
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
@staticmethod
|
|
108
|
+
def backward(ctx: Any, *grad_outputs: Any) -> Any:
|
|
109
|
+
return grad_outputs
|
|
110
|
+
|
|
111
|
+
@classmethod
|
|
112
|
+
def forward(cls, ctx: Any, *args: Any, **kwargs: Any) -> Any:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def symbolic(cls, g, *args):
|
|
117
|
+
return g.op('ai.onnx.contrib::' + cls.__name__, *args)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
tensor_data_type = TensorProto
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def is_processing_module(m):
|
|
124
|
+
return isinstance(m, _ProcessingModule)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
from typing import Tuple
|
|
3
|
+
from torch.nn.functional import interpolate
|
|
4
|
+
from ._base import ProcessingTracedModule
|
|
5
|
+
from ._torchext import onnx_where, onnx_greater
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _resize_param(img, size):
|
|
9
|
+
y, x = tuple(img.shape[-2:])
|
|
10
|
+
scale_y = size / y
|
|
11
|
+
scale_x = size / x
|
|
12
|
+
return onnx_where(onnx_greater(scale_x, scale_y), scale_x, scale_y)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ImageNetPreProcessing(ProcessingTracedModule):
|
|
16
|
+
def __init__(self, size, resize_image=True):
|
|
17
|
+
super(ImageNetPreProcessing, self).__init__()
|
|
18
|
+
self.target_size = size
|
|
19
|
+
self.resize_image = resize_image
|
|
20
|
+
|
|
21
|
+
def forward(self, img):
|
|
22
|
+
if not isinstance(img, torch.Tensor):
|
|
23
|
+
img = torch.tensor(img)
|
|
24
|
+
assert img.shape[-1] == 3, 'the input image should be in RGB channels'
|
|
25
|
+
img = torch.permute(img, (2, 0, 1))
|
|
26
|
+
img = img.to(torch.float32).unsqueeze(0)
|
|
27
|
+
# T.Resize(256),
|
|
28
|
+
if self.resize_image:
|
|
29
|
+
scale = _resize_param(img, torch.tensor(256))
|
|
30
|
+
img = interpolate(img, scale_factor=scale,
|
|
31
|
+
recompute_scale_factor=True,
|
|
32
|
+
mode="bilinear", align_corners=False)
|
|
33
|
+
# T.CenterCrop(224),
|
|
34
|
+
width, height = self.target_size, self.target_size
|
|
35
|
+
img_h, img_w = img.shape[-2:]
|
|
36
|
+
s_h = torch.div((img_h - height), 2, rounding_mode='trunc')
|
|
37
|
+
s_w = torch.div((img_w - width), 2, rounding_mode='trunc')
|
|
38
|
+
x = img[:, :, s_h:s_h + height, s_w:s_w + width]
|
|
39
|
+
# T.ToTensor(),
|
|
40
|
+
x /= 255. # ToTensor
|
|
41
|
+
# T.Normalize(
|
|
42
|
+
# mean=[0.485, 0.456, 0.406],
|
|
43
|
+
# std=[0.229, 0.224, 0.225]
|
|
44
|
+
# )
|
|
45
|
+
mean = [0.485, 0.456, 0.406]
|
|
46
|
+
std = [0.229, 0.224, 0.225]
|
|
47
|
+
x -= torch.reshape(torch.tensor(mean), (3, 1, 1))
|
|
48
|
+
x /= torch.reshape(torch.tensor(std), (3, 1, 1))
|
|
49
|
+
return x
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ImageNetPostProcessing(ProcessingTracedModule):
|
|
53
|
+
def forward(self, scores: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
54
|
+
probabilities = torch.softmax(scores, dim=1)
|
|
55
|
+
top10_prob, top10_ids = probabilities.topk(k=10, dim=1, largest=True, sorted=True)
|
|
56
|
+
return top10_ids, top10_prob
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class PreMobileNet(ImageNetPreProcessing):
|
|
60
|
+
def __init__(self, size=None):
|
|
61
|
+
super(PreMobileNet, self).__init__(224 if size is None else size)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class PostMobileNet(ImageNetPostProcessing):
|
|
65
|
+
pass
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from collections import OrderedDict
|
|
3
|
+
|
|
4
|
+
from ._base import ProcessingTracedModule, tensor_data_type as _dt
|
|
5
|
+
from ._torchext import create_op_function
|
|
6
|
+
from ._onnx_ops import schema
|
|
7
|
+
from .._ocos import default_opset_domain
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def make_custom_op(ctx, op_type, input_names, output_names, container, operator_name=None, **kwargs):
|
|
11
|
+
op_name = container.get_unique_operator_name(op_type) if operator_name is None else operator_name
|
|
12
|
+
container.add_node(op_type, input_names, output_names,
|
|
13
|
+
op_version=1, name=op_name, op_domain=default_opset_domain(), **kwargs)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def create_bert_tokenizer(ctx, name, input_names, output_names, container, operator_name=None, **kwargs):
|
|
17
|
+
if 'hf_tok' in kwargs:
|
|
18
|
+
hf_bert_tokenizer = kwargs['hf_tok']
|
|
19
|
+
ordered_vocab = OrderedDict(sorted(hf_bert_tokenizer.vocab.items(), key=lambda item: int(item[1])))
|
|
20
|
+
vocab = '\n'.join(ordered_vocab.keys())
|
|
21
|
+
attrs = dict(vocab_file=vocab)
|
|
22
|
+
# Unfortunately, there's no specific accessor function on
|
|
23
|
+
# transformers.BertTokenizer to query for strip_accents.
|
|
24
|
+
attrs['strip_accents'] = 1 if 'strip_accents' in hf_bert_tokenizer.init_kwargs and hf_bert_tokenizer.init_kwargs.get('strip_accents') else 0
|
|
25
|
+
attrs['do_lower_case'] = 1 if hasattr(hf_bert_tokenizer, 'do_lower_case') and hf_bert_tokenizer.do_lower_case else 0
|
|
26
|
+
elif 'vocab_file' in kwargs:
|
|
27
|
+
vocab = None
|
|
28
|
+
vocab_file = kwargs['vocab_file']
|
|
29
|
+
with open(vocab_file, "r", encoding='utf-8') as vf:
|
|
30
|
+
lines = vf.readlines()
|
|
31
|
+
vocab = '\n'.join(lines)
|
|
32
|
+
if vocab is None:
|
|
33
|
+
raise RuntimeError("Cannot load vocabulary file {}!".format(vocab_file))
|
|
34
|
+
attrs = dict(vocab_file=vocab)
|
|
35
|
+
if 'strip_accents' in kwargs:
|
|
36
|
+
attrs['strip_accents'] = kwargs['strip_accents']
|
|
37
|
+
if 'do_lower_case' in kwargs:
|
|
38
|
+
attrs['do_lower_case'] = kwargs['do_lower_case']
|
|
39
|
+
else:
|
|
40
|
+
raise RuntimeError("Need hf_tok/vocab_file parameter to build the tokenizer")
|
|
41
|
+
|
|
42
|
+
return make_custom_op(ctx, name, input_names,
|
|
43
|
+
output_names, container, operator_name=operator_name, **attrs)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@schema(inputs=((_dt.STRING, []),),
|
|
47
|
+
outputs=((_dt.INT64, []), (_dt.INT64, []), (_dt.INT64, [])))
|
|
48
|
+
def bert_tokenizer(ctx, input_names, output_names, container, operator_name=None, **kwargs):
|
|
49
|
+
return create_bert_tokenizer(ctx, 'BertTokenizer', input_names, output_names,
|
|
50
|
+
container, operator_name=operator_name, **kwargs)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@schema(inputs=((_dt.STRING, []),),
|
|
54
|
+
outputs=((_dt.INT64, []), (_dt.INT64, []), (_dt.INT64, [])))
|
|
55
|
+
def hf_bert_tokenizer(ctx, input_names, output_names, container, operator_name=None, **kwargs):
|
|
56
|
+
return create_bert_tokenizer(ctx, 'HfBertTokenizer', input_names, output_names,
|
|
57
|
+
container, operator_name=operator_name, **kwargs)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@schema(inputs=((_dt.STRING, []),),
|
|
61
|
+
outputs=((_dt.INT64, []), (_dt.INT64, [])))
|
|
62
|
+
def gpt2_tokenize(ctx, input_names, output_names, container, operator_name=None, **kwargs):
|
|
63
|
+
if 'hf_tok' in kwargs:
|
|
64
|
+
hf_gpt2_tokenizer = kwargs['hf_tok']
|
|
65
|
+
attrs = {'vocab': json.dumps(hf_gpt2_tokenizer.encoder, separators=(',', ':'))}
|
|
66
|
+
sorted_merges = {v_: k_ for k_, v_ in hf_gpt2_tokenizer.bpe_ranks.items()}
|
|
67
|
+
attrs['merges'] = '\n'.join("{} {}".format(*sorted_merges[n_]) for n_ in range(len(sorted_merges)))
|
|
68
|
+
elif 'vocab' in kwargs:
|
|
69
|
+
attrs = dict(
|
|
70
|
+
vocab=kwargs['vocab'],
|
|
71
|
+
merges=kwargs['merges'])
|
|
72
|
+
else:
|
|
73
|
+
raise RuntimeError("Need hf_tok/vocab parameter to build the tokenizer")
|
|
74
|
+
padding_len = -1
|
|
75
|
+
if 'padding_length' in kwargs:
|
|
76
|
+
padding_len = kwargs['padding_length']
|
|
77
|
+
attrs['padding_length'] = padding_len
|
|
78
|
+
|
|
79
|
+
return make_custom_op(ctx, 'GPT2Tokenizer', input_names,
|
|
80
|
+
output_names, container, operator_name=operator_name, **attrs)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _get_file_content(path):
|
|
84
|
+
with open(path, "rb") as file:
|
|
85
|
+
return file.read()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _get_bound_object(func):
|
|
89
|
+
return func.__self__
|
|
90
|
+
|
|
91
|
+
# v1. Order of outputs - input_ids, token_type_ids, attention_mask
|
|
92
|
+
# (this is NOT consistent with the HuggingFace implementation of the tokenizer)
|
|
93
|
+
class PreHuggingFaceBert(ProcessingTracedModule):
|
|
94
|
+
def __init__(self, hf_tok=None, vocab_file=None, do_lower_case=0, strip_accents=1):
|
|
95
|
+
super(PreHuggingFaceBert, self).__init__()
|
|
96
|
+
if hf_tok is None:
|
|
97
|
+
self.onnx_bert_tokenizer = create_op_function('BertTokenizer', bert_tokenizer,
|
|
98
|
+
vocab_file=vocab_file,
|
|
99
|
+
do_lower_case=do_lower_case,
|
|
100
|
+
strip_accents=strip_accents)
|
|
101
|
+
else:
|
|
102
|
+
self.onnx_bert_tokenizer = create_op_function('BertTokenizer', bert_tokenizer,
|
|
103
|
+
hf_tok=hf_tok)
|
|
104
|
+
|
|
105
|
+
def forward(self, text):
|
|
106
|
+
return self.onnx_bert_tokenizer(text)
|
|
107
|
+
|
|
108
|
+
def export(self, *args, **kwargs):
|
|
109
|
+
return _get_bound_object(self.onnx_bert_tokenizer).build_model(kwargs.get('opset_version', 0), *args)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# v2. Order of outputs - input_ids, attention_mask, token_type_ids
|
|
113
|
+
# (this is consistent with the HuggingFace implementation of the tokenizer)
|
|
114
|
+
class HfBertTokenizer(ProcessingTracedModule):
|
|
115
|
+
def __init__(self, hf_tok=None, vocab_file=None, do_lower_case=0, strip_accents=1):
|
|
116
|
+
super(HfBertTokenizer, self).__init__()
|
|
117
|
+
if hf_tok is None:
|
|
118
|
+
self.onnx_bert_tokenizer = create_op_function('HfBertTokenizer', hf_bert_tokenizer,
|
|
119
|
+
vocab_file=vocab_file,
|
|
120
|
+
do_lower_case=do_lower_case,
|
|
121
|
+
strip_accents=strip_accents)
|
|
122
|
+
else:
|
|
123
|
+
self.onnx_bert_tokenizer = create_op_function('HfBertTokenizer', hf_bert_tokenizer,
|
|
124
|
+
hf_tok=hf_tok)
|
|
125
|
+
|
|
126
|
+
def forward(self, text):
|
|
127
|
+
return self.onnx_bert_tokenizer(text)
|
|
128
|
+
|
|
129
|
+
def export(self, *args, **kwargs):
|
|
130
|
+
return _get_bound_object(self.onnx_bert_tokenizer).build_model(kwargs.get('opset_version', 0), *args)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class PreHuggingFaceGPT2(ProcessingTracedModule):
|
|
134
|
+
def __init__(self, hf_tok=None, vocab_file=None, merges_file=None, padding_length=-1):
|
|
135
|
+
super(PreHuggingFaceGPT2, self).__init__()
|
|
136
|
+
if hf_tok is None:
|
|
137
|
+
self.onnx_gpt2_tokenize = create_op_function('GPT2Tokenizer', gpt2_tokenize,
|
|
138
|
+
vocab=_get_file_content(vocab_file),
|
|
139
|
+
merges=_get_file_content(merges_file),
|
|
140
|
+
padding_length=padding_length)
|
|
141
|
+
else:
|
|
142
|
+
self.onnx_gpt2_tokenize = create_op_function('GPT2Tokenizer', gpt2_tokenize, hf_tok=hf_tok)
|
|
143
|
+
|
|
144
|
+
def forward(self, text):
|
|
145
|
+
return self.onnx_gpt2_tokenize(text)
|
|
146
|
+
|
|
147
|
+
def export(self, *args, **kwargs):
|
|
148
|
+
return _get_bound_object(self.onnx_gpt2_tokenize).build_model(kwargs.get('opset_version', 0), *args)
|