onnxruntime_extensions 0.14.0__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. onnxruntime_extensions/__init__.py +82 -0
  2. onnxruntime_extensions/_cuops.py +564 -0
  3. onnxruntime_extensions/_extensions_pydll.cpython-313-darwin.so +0 -0
  4. onnxruntime_extensions/_extensions_pydll.pyi +45 -0
  5. onnxruntime_extensions/_hf_cvt.py +331 -0
  6. onnxruntime_extensions/_ocos.py +133 -0
  7. onnxruntime_extensions/_ortapi2.py +274 -0
  8. onnxruntime_extensions/_torch_cvt.py +231 -0
  9. onnxruntime_extensions/_version.py +2 -0
  10. onnxruntime_extensions/cmd.py +66 -0
  11. onnxruntime_extensions/cvt.py +306 -0
  12. onnxruntime_extensions/onnxprocess/__init__.py +12 -0
  13. onnxruntime_extensions/onnxprocess/_builder.py +53 -0
  14. onnxruntime_extensions/onnxprocess/_onnx_ops.py +1507 -0
  15. onnxruntime_extensions/onnxprocess/_session.py +355 -0
  16. onnxruntime_extensions/onnxprocess/_tensor.py +628 -0
  17. onnxruntime_extensions/onnxprocess/torch_wrapper.py +31 -0
  18. onnxruntime_extensions/pnp/__init__.py +13 -0
  19. onnxruntime_extensions/pnp/_base.py +124 -0
  20. onnxruntime_extensions/pnp/_imagenet.py +65 -0
  21. onnxruntime_extensions/pnp/_nlp.py +148 -0
  22. onnxruntime_extensions/pnp/_onnx_ops.py +1544 -0
  23. onnxruntime_extensions/pnp/_torchext.py +310 -0
  24. onnxruntime_extensions/pnp/_unifier.py +45 -0
  25. onnxruntime_extensions/pnp/_utils.py +302 -0
  26. onnxruntime_extensions/pp_api.py +83 -0
  27. onnxruntime_extensions/tools/__init__.py +0 -0
  28. onnxruntime_extensions/tools/add_HuggingFace_CLIPImageProcessor_to_model.py +171 -0
  29. onnxruntime_extensions/tools/add_pre_post_processing_to_model.py +535 -0
  30. onnxruntime_extensions/tools/pre_post_processing/__init__.py +4 -0
  31. onnxruntime_extensions/tools/pre_post_processing/pre_post_processor.py +395 -0
  32. onnxruntime_extensions/tools/pre_post_processing/step.py +227 -0
  33. onnxruntime_extensions/tools/pre_post_processing/steps/__init__.py +6 -0
  34. onnxruntime_extensions/tools/pre_post_processing/steps/general.py +366 -0
  35. onnxruntime_extensions/tools/pre_post_processing/steps/nlp.py +344 -0
  36. onnxruntime_extensions/tools/pre_post_processing/steps/vision.py +1157 -0
  37. onnxruntime_extensions/tools/pre_post_processing/utils.py +139 -0
  38. onnxruntime_extensions/util.py +186 -0
  39. onnxruntime_extensions-0.14.0.dist-info/LICENSE +21 -0
  40. onnxruntime_extensions-0.14.0.dist-info/METADATA +102 -0
  41. onnxruntime_extensions-0.14.0.dist-info/RECORD +43 -0
  42. onnxruntime_extensions-0.14.0.dist-info/WHEEL +6 -0
  43. onnxruntime_extensions-0.14.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,124 @@
1
+ import io
2
+ import onnx
3
+ import torch
4
+ from typing import Any
5
+ from onnx.onnx_pb import TensorProto
6
+ from torch.onnx import TrainingMode, export as _export
7
+
8
+ from ._onnx_ops import OPSET_TO_IR_VERSION
9
+
10
+
11
+ def _export_f(model, *args,
12
+ opset_version=None,
13
+ output_path=None,
14
+ output_seq=0,
15
+ export_params=True,
16
+ verbose=False,
17
+ input_names=None,
18
+ output_names=None,
19
+ operator_export_type=None,
20
+ do_constant_folding=True,
21
+ dynamic_axes=None,
22
+ keep_initializers_as_inputs=None,
23
+ custom_opsets=None):
24
+
25
+ with io.BytesIO() as f:
26
+ _export(model, args, f,
27
+ export_params=export_params, verbose=verbose,
28
+ training=TrainingMode.EVAL, input_names=input_names,
29
+ output_names=output_names,
30
+ operator_export_type=operator_export_type, opset_version=opset_version,
31
+ do_constant_folding=do_constant_folding,
32
+ dynamic_axes=dynamic_axes,
33
+ keep_initializers_as_inputs=keep_initializers_as_inputs,
34
+ custom_opsets=custom_opsets)
35
+
36
+ mdl = onnx.load_model(io.BytesIO(f.getvalue()))
37
+ for ops in mdl.opset_import:
38
+ if ops.domain in ('', 'ai.onnx'):
39
+ mdl.ir_version = OPSET_TO_IR_VERSION[ops.version]
40
+ if output_path is not None:
41
+ if output_seq > 0:
42
+ output_path.replace('.onnx', '.{}.onnx'.format(output_seq))
43
+ onnx.save_model(mdl, output_path)
44
+ return mdl
45
+
46
+
47
+ class _ProcessingModule:
48
+
49
+ def __init__(self):
50
+ super(_ProcessingModule, self).__init__()
51
+ _ProcessingModule.register_customops()
52
+
53
+ @staticmethod
54
+ @torch.jit.unused
55
+ def _argsort(g, x, dim, descending):
56
+ return g.op('ai.onnx.contrib::ArgSort', x, dim)
57
+
58
+ @classmethod
59
+ @torch.jit.unused
60
+ def register_customops(cls):
61
+ if hasattr(cls, 'loaded'):
62
+ return True
63
+
64
+ torch.onnx.register_custom_op_symbolic('::argsort', cls._argsort, 1)
65
+ # ... more
66
+
67
+ cls.loaded = True
68
+ return True
69
+
70
+ @torch.jit.unused
71
+ def export(self, *args, opset_version=None, script_mode=False, output_path=None, output_seq=0, **kwargs):
72
+ if opset_version is None:
73
+ raise RuntimeError('No opset_version found in the kwargs.')
74
+ mod = self
75
+ if script_mode and not isinstance(mod, torch.jit.ScriptModule):
76
+ mod = torch.jit.script(mod)
77
+
78
+ return _export_f(mod,
79
+ *args,
80
+ opset_version=opset_version,
81
+ output_path=output_path,
82
+ output_seq=output_seq, **kwargs)
83
+
84
+
85
+ class ProcessingTracedModule(torch.nn.Module, _ProcessingModule):
86
+ def __init__(self, func_obj=None):
87
+ super().__init__()
88
+ self.func_obj = func_obj
89
+
90
+ def forward(self, *args):
91
+ assert self.func_obj is not None, "No forward method found."
92
+ return self.func_obj(*args)
93
+
94
+
95
+ class ProcessingScriptModule(torch.nn.Module, _ProcessingModule):
96
+
97
+ @torch.jit.unused
98
+ def export(self, *args, **kwargs):
99
+ return super().export(*args, script_mode=True, **kwargs)
100
+
101
+
102
+ class CustomFunction(torch.autograd.Function):
103
+ @staticmethod
104
+ def jvp(ctx: Any, *grad_inputs: Any) -> Any:
105
+ pass
106
+
107
+ @staticmethod
108
+ def backward(ctx: Any, *grad_outputs: Any) -> Any:
109
+ return grad_outputs
110
+
111
+ @classmethod
112
+ def forward(cls, ctx: Any, *args: Any, **kwargs: Any) -> Any:
113
+ pass
114
+
115
+ @classmethod
116
+ def symbolic(cls, g, *args):
117
+ return g.op('ai.onnx.contrib::' + cls.__name__, *args)
118
+
119
+
120
+ tensor_data_type = TensorProto
121
+
122
+
123
+ def is_processing_module(m):
124
+ return isinstance(m, _ProcessingModule)
@@ -0,0 +1,65 @@
1
+ import torch
2
+ from typing import Tuple
3
+ from torch.nn.functional import interpolate
4
+ from ._base import ProcessingTracedModule
5
+ from ._torchext import onnx_where, onnx_greater
6
+
7
+
8
+ def _resize_param(img, size):
9
+ y, x = tuple(img.shape[-2:])
10
+ scale_y = size / y
11
+ scale_x = size / x
12
+ return onnx_where(onnx_greater(scale_x, scale_y), scale_x, scale_y)
13
+
14
+
15
+ class ImageNetPreProcessing(ProcessingTracedModule):
16
+ def __init__(self, size, resize_image=True):
17
+ super(ImageNetPreProcessing, self).__init__()
18
+ self.target_size = size
19
+ self.resize_image = resize_image
20
+
21
+ def forward(self, img):
22
+ if not isinstance(img, torch.Tensor):
23
+ img = torch.tensor(img)
24
+ assert img.shape[-1] == 3, 'the input image should be in RGB channels'
25
+ img = torch.permute(img, (2, 0, 1))
26
+ img = img.to(torch.float32).unsqueeze(0)
27
+ # T.Resize(256),
28
+ if self.resize_image:
29
+ scale = _resize_param(img, torch.tensor(256))
30
+ img = interpolate(img, scale_factor=scale,
31
+ recompute_scale_factor=True,
32
+ mode="bilinear", align_corners=False)
33
+ # T.CenterCrop(224),
34
+ width, height = self.target_size, self.target_size
35
+ img_h, img_w = img.shape[-2:]
36
+ s_h = torch.div((img_h - height), 2, rounding_mode='trunc')
37
+ s_w = torch.div((img_w - width), 2, rounding_mode='trunc')
38
+ x = img[:, :, s_h:s_h + height, s_w:s_w + width]
39
+ # T.ToTensor(),
40
+ x /= 255. # ToTensor
41
+ # T.Normalize(
42
+ # mean=[0.485, 0.456, 0.406],
43
+ # std=[0.229, 0.224, 0.225]
44
+ # )
45
+ mean = [0.485, 0.456, 0.406]
46
+ std = [0.229, 0.224, 0.225]
47
+ x -= torch.reshape(torch.tensor(mean), (3, 1, 1))
48
+ x /= torch.reshape(torch.tensor(std), (3, 1, 1))
49
+ return x
50
+
51
+
52
+ class ImageNetPostProcessing(ProcessingTracedModule):
53
+ def forward(self, scores: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
54
+ probabilities = torch.softmax(scores, dim=1)
55
+ top10_prob, top10_ids = probabilities.topk(k=10, dim=1, largest=True, sorted=True)
56
+ return top10_ids, top10_prob
57
+
58
+
59
+ class PreMobileNet(ImageNetPreProcessing):
60
+ def __init__(self, size=None):
61
+ super(PreMobileNet, self).__init__(224 if size is None else size)
62
+
63
+
64
+ class PostMobileNet(ImageNetPostProcessing):
65
+ pass
@@ -0,0 +1,148 @@
1
+ import json
2
+ from collections import OrderedDict
3
+
4
+ from ._base import ProcessingTracedModule, tensor_data_type as _dt
5
+ from ._torchext import create_op_function
6
+ from ._onnx_ops import schema
7
+ from .._ocos import default_opset_domain
8
+
9
+
10
+ def make_custom_op(ctx, op_type, input_names, output_names, container, operator_name=None, **kwargs):
11
+ op_name = container.get_unique_operator_name(op_type) if operator_name is None else operator_name
12
+ container.add_node(op_type, input_names, output_names,
13
+ op_version=1, name=op_name, op_domain=default_opset_domain(), **kwargs)
14
+
15
+
16
+ def create_bert_tokenizer(ctx, name, input_names, output_names, container, operator_name=None, **kwargs):
17
+ if 'hf_tok' in kwargs:
18
+ hf_bert_tokenizer = kwargs['hf_tok']
19
+ ordered_vocab = OrderedDict(sorted(hf_bert_tokenizer.vocab.items(), key=lambda item: int(item[1])))
20
+ vocab = '\n'.join(ordered_vocab.keys())
21
+ attrs = dict(vocab_file=vocab)
22
+ # Unfortunately, there's no specific accessor function on
23
+ # transformers.BertTokenizer to query for strip_accents.
24
+ attrs['strip_accents'] = 1 if 'strip_accents' in hf_bert_tokenizer.init_kwargs and hf_bert_tokenizer.init_kwargs.get('strip_accents') else 0
25
+ attrs['do_lower_case'] = 1 if hasattr(hf_bert_tokenizer, 'do_lower_case') and hf_bert_tokenizer.do_lower_case else 0
26
+ elif 'vocab_file' in kwargs:
27
+ vocab = None
28
+ vocab_file = kwargs['vocab_file']
29
+ with open(vocab_file, "r", encoding='utf-8') as vf:
30
+ lines = vf.readlines()
31
+ vocab = '\n'.join(lines)
32
+ if vocab is None:
33
+ raise RuntimeError("Cannot load vocabulary file {}!".format(vocab_file))
34
+ attrs = dict(vocab_file=vocab)
35
+ if 'strip_accents' in kwargs:
36
+ attrs['strip_accents'] = kwargs['strip_accents']
37
+ if 'do_lower_case' in kwargs:
38
+ attrs['do_lower_case'] = kwargs['do_lower_case']
39
+ else:
40
+ raise RuntimeError("Need hf_tok/vocab_file parameter to build the tokenizer")
41
+
42
+ return make_custom_op(ctx, name, input_names,
43
+ output_names, container, operator_name=operator_name, **attrs)
44
+
45
+
46
+ @schema(inputs=((_dt.STRING, []),),
47
+ outputs=((_dt.INT64, []), (_dt.INT64, []), (_dt.INT64, [])))
48
+ def bert_tokenizer(ctx, input_names, output_names, container, operator_name=None, **kwargs):
49
+ return create_bert_tokenizer(ctx, 'BertTokenizer', input_names, output_names,
50
+ container, operator_name=operator_name, **kwargs)
51
+
52
+
53
+ @schema(inputs=((_dt.STRING, []),),
54
+ outputs=((_dt.INT64, []), (_dt.INT64, []), (_dt.INT64, [])))
55
+ def hf_bert_tokenizer(ctx, input_names, output_names, container, operator_name=None, **kwargs):
56
+ return create_bert_tokenizer(ctx, 'HfBertTokenizer', input_names, output_names,
57
+ container, operator_name=operator_name, **kwargs)
58
+
59
+
60
+ @schema(inputs=((_dt.STRING, []),),
61
+ outputs=((_dt.INT64, []), (_dt.INT64, [])))
62
+ def gpt2_tokenize(ctx, input_names, output_names, container, operator_name=None, **kwargs):
63
+ if 'hf_tok' in kwargs:
64
+ hf_gpt2_tokenizer = kwargs['hf_tok']
65
+ attrs = {'vocab': json.dumps(hf_gpt2_tokenizer.encoder, separators=(',', ':'))}
66
+ sorted_merges = {v_: k_ for k_, v_ in hf_gpt2_tokenizer.bpe_ranks.items()}
67
+ attrs['merges'] = '\n'.join("{} {}".format(*sorted_merges[n_]) for n_ in range(len(sorted_merges)))
68
+ elif 'vocab' in kwargs:
69
+ attrs = dict(
70
+ vocab=kwargs['vocab'],
71
+ merges=kwargs['merges'])
72
+ else:
73
+ raise RuntimeError("Need hf_tok/vocab parameter to build the tokenizer")
74
+ padding_len = -1
75
+ if 'padding_length' in kwargs:
76
+ padding_len = kwargs['padding_length']
77
+ attrs['padding_length'] = padding_len
78
+
79
+ return make_custom_op(ctx, 'GPT2Tokenizer', input_names,
80
+ output_names, container, operator_name=operator_name, **attrs)
81
+
82
+
83
+ def _get_file_content(path):
84
+ with open(path, "rb") as file:
85
+ return file.read()
86
+
87
+
88
+ def _get_bound_object(func):
89
+ return func.__self__
90
+
91
+ # v1. Order of outputs - input_ids, token_type_ids, attention_mask
92
+ # (this is NOT consistent with the HuggingFace implementation of the tokenizer)
93
+ class PreHuggingFaceBert(ProcessingTracedModule):
94
+ def __init__(self, hf_tok=None, vocab_file=None, do_lower_case=0, strip_accents=1):
95
+ super(PreHuggingFaceBert, self).__init__()
96
+ if hf_tok is None:
97
+ self.onnx_bert_tokenizer = create_op_function('BertTokenizer', bert_tokenizer,
98
+ vocab_file=vocab_file,
99
+ do_lower_case=do_lower_case,
100
+ strip_accents=strip_accents)
101
+ else:
102
+ self.onnx_bert_tokenizer = create_op_function('BertTokenizer', bert_tokenizer,
103
+ hf_tok=hf_tok)
104
+
105
+ def forward(self, text):
106
+ return self.onnx_bert_tokenizer(text)
107
+
108
+ def export(self, *args, **kwargs):
109
+ return _get_bound_object(self.onnx_bert_tokenizer).build_model(kwargs.get('opset_version', 0), *args)
110
+
111
+
112
+ # v2. Order of outputs - input_ids, attention_mask, token_type_ids
113
+ # (this is consistent with the HuggingFace implementation of the tokenizer)
114
+ class HfBertTokenizer(ProcessingTracedModule):
115
+ def __init__(self, hf_tok=None, vocab_file=None, do_lower_case=0, strip_accents=1):
116
+ super(HfBertTokenizer, self).__init__()
117
+ if hf_tok is None:
118
+ self.onnx_bert_tokenizer = create_op_function('HfBertTokenizer', hf_bert_tokenizer,
119
+ vocab_file=vocab_file,
120
+ do_lower_case=do_lower_case,
121
+ strip_accents=strip_accents)
122
+ else:
123
+ self.onnx_bert_tokenizer = create_op_function('HfBertTokenizer', hf_bert_tokenizer,
124
+ hf_tok=hf_tok)
125
+
126
+ def forward(self, text):
127
+ return self.onnx_bert_tokenizer(text)
128
+
129
+ def export(self, *args, **kwargs):
130
+ return _get_bound_object(self.onnx_bert_tokenizer).build_model(kwargs.get('opset_version', 0), *args)
131
+
132
+
133
+ class PreHuggingFaceGPT2(ProcessingTracedModule):
134
+ def __init__(self, hf_tok=None, vocab_file=None, merges_file=None, padding_length=-1):
135
+ super(PreHuggingFaceGPT2, self).__init__()
136
+ if hf_tok is None:
137
+ self.onnx_gpt2_tokenize = create_op_function('GPT2Tokenizer', gpt2_tokenize,
138
+ vocab=_get_file_content(vocab_file),
139
+ merges=_get_file_content(merges_file),
140
+ padding_length=padding_length)
141
+ else:
142
+ self.onnx_gpt2_tokenize = create_op_function('GPT2Tokenizer', gpt2_tokenize, hf_tok=hf_tok)
143
+
144
+ def forward(self, text):
145
+ return self.onnx_gpt2_tokenize(text)
146
+
147
+ def export(self, *args, **kwargs):
148
+ return _get_bound_object(self.onnx_gpt2_tokenize).build_model(kwargs.get('opset_version', 0), *args)