onnxruntime_extensions 0.12.0__cp39-cp39-win_amd64.whl → 0.14.0__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnxruntime_extensions/_cuops.py +19 -2
- onnxruntime_extensions/_extensions_pydll.cp39-win_amd64.pyd +0 -0
- onnxruntime_extensions/_hf_cvt.py +59 -20
- onnxruntime_extensions/_version.py +1 -1
- onnxruntime_extensions/cvt.py +235 -25
- onnxruntime_extensions/pp_api.py +72 -2
- onnxruntime_extensions/tools/add_pre_post_processing_to_model.py +18 -12
- {onnxruntime_extensions-0.12.0.dist-info → onnxruntime_extensions-0.14.0.dist-info}/METADATA +13 -17
- {onnxruntime_extensions-0.12.0.dist-info → onnxruntime_extensions-0.14.0.dist-info}/RECORD +12 -12
- {onnxruntime_extensions-0.12.0.dist-info → onnxruntime_extensions-0.14.0.dist-info}/WHEEL +1 -1
- {onnxruntime_extensions-0.12.0.dist-info → onnxruntime_extensions-0.14.0.dist-info}/LICENSE +0 -0
- {onnxruntime_extensions-0.12.0.dist-info → onnxruntime_extensions-0.14.0.dist-info}/top_level.txt +0 -0
onnxruntime_extensions/_cuops.py
CHANGED
|
@@ -364,8 +364,15 @@ class SentencepieceDecoder(CustomOp):
|
|
|
364
364
|
@classmethod
|
|
365
365
|
def get_inputs(cls):
|
|
366
366
|
return [
|
|
367
|
-
cls.io_def("ids", onnx.TensorProto.INT64, [None])
|
|
367
|
+
cls.io_def("ids", onnx.TensorProto.INT64, [None]),
|
|
368
|
+
cls.io_def('fairseq', onnx_proto.TensorProto.BOOL, [None])
|
|
368
369
|
]
|
|
370
|
+
|
|
371
|
+
@classmethod
|
|
372
|
+
def input_default_values(cls):
|
|
373
|
+
return {
|
|
374
|
+
'fairseq': [False]
|
|
375
|
+
}
|
|
369
376
|
|
|
370
377
|
@classmethod
|
|
371
378
|
def get_outputs(cls):
|
|
@@ -491,6 +498,16 @@ class StftNorm(CustomOp):
|
|
|
491
498
|
]
|
|
492
499
|
|
|
493
500
|
|
|
501
|
+
class HfJsonTokenizer(CustomOp):
|
|
502
|
+
@classmethod
|
|
503
|
+
def get_inputs(cls):
|
|
504
|
+
return [cls.io_def('str', onnx_proto.TensorProto.STRING, ['N'])]
|
|
505
|
+
|
|
506
|
+
@classmethod
|
|
507
|
+
def get_outputs(cls):
|
|
508
|
+
return [cls.io_def("ids", onnx.TensorProto.INT64, ['N', None])]
|
|
509
|
+
|
|
510
|
+
|
|
494
511
|
# TODO: have a C++ impl.
|
|
495
512
|
def _argsort_op(x, dim):
|
|
496
513
|
d = numpy.argsort(x, dim)
|
|
@@ -544,4 +561,4 @@ class SingleOpGraph:
|
|
|
544
561
|
|
|
545
562
|
@staticmethod
|
|
546
563
|
def get_op_class(op_type):
|
|
547
|
-
return globals()[op_type]
|
|
564
|
+
return globals()[op_type]
|
|
Binary file
|
|
@@ -48,8 +48,9 @@ class HFTokenizerConverter(CustomOpConverter):
|
|
|
48
48
|
model_dir = hf_tokenizer.name_or_path
|
|
49
49
|
else:
|
|
50
50
|
model_dir = os.path.dirname(vocab_file)
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
f = open(os.path.join(model_dir, tokenizer_file), "r", encoding="utf-8")
|
|
52
|
+
tokenizer_json = json.load(f)
|
|
53
|
+
f.close()
|
|
53
54
|
# get vocab object from json file
|
|
54
55
|
vocab = tokenizer_json.get("model", {}).get("vocab", {})
|
|
55
56
|
sorted_merges = tokenizer_json.get("model", {}).get("merges", [])
|
|
@@ -167,7 +168,8 @@ class HFTokenizerConverter(CustomOpConverter):
|
|
|
167
168
|
TokenOpParam = namedtuple("TokenOpParam",
|
|
168
169
|
["pre_op", "pre_attribute_cvt",
|
|
169
170
|
"post_op", "post_attribute_cvt",
|
|
170
|
-
"
|
|
171
|
+
"default_encoder_inputs",
|
|
172
|
+
"default_decoder_inputs"],
|
|
171
173
|
defaults=(None, None, None, None, None))
|
|
172
174
|
|
|
173
175
|
# Some tokenizers can be added by this table
|
|
@@ -175,35 +177,36 @@ TokenOpParam = namedtuple("TokenOpParam",
|
|
|
175
177
|
# @formatter:off
|
|
176
178
|
_PROCESSOR_DICT = {
|
|
177
179
|
"BertTokenizer": TokenOpParam('BertTokenizer', HFTokenizerConverter.bert_tokenizer,
|
|
178
|
-
'BertDecoder', HFTokenizerConverter.bpe_decoder, None),
|
|
180
|
+
'BertDecoder', HFTokenizerConverter.bpe_decoder, None, None),
|
|
179
181
|
"DistilBertTokenizer": TokenOpParam('BertTokenizer', HFTokenizerConverter.bert_tokenizer,
|
|
180
|
-
'BertDecoder', HFTokenizerConverter.bpe_decoder, None),
|
|
182
|
+
'BertDecoder', HFTokenizerConverter.bpe_decoder, None, None),
|
|
181
183
|
"GPT2Tokenizer": TokenOpParam('GPT2Tokenizer', HFTokenizerConverter.bpe_tokenizer,
|
|
182
|
-
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
|
|
184
|
+
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None, None),
|
|
183
185
|
"CodeGenTokenizer": TokenOpParam('GPT2Tokenizer', HFTokenizerConverter.bpe_tokenizer,
|
|
184
|
-
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
|
|
186
|
+
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None, None),
|
|
185
187
|
"CLIPTokenizer": TokenOpParam('CLIPTokenizer', HFTokenizerConverter.clip_tokenizer,
|
|
186
|
-
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
|
|
188
|
+
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None, None),
|
|
187
189
|
"RobertaTokenizer": TokenOpParam('RobertaTokenizer', HFTokenizerConverter.roberta_tokenizer,
|
|
188
|
-
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
|
|
190
|
+
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None, None),
|
|
189
191
|
"BartTokenizer": TokenOpParam('RobertaTokenizer', HFTokenizerConverter.roberta_tokenizer,
|
|
190
|
-
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
|
|
192
|
+
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None, None),
|
|
191
193
|
"LayoutLMv3Tokenizer": TokenOpParam('RobertaTokenizer', HFTokenizerConverter.roberta_tokenizer,
|
|
192
|
-
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
|
|
194
|
+
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None, None),
|
|
193
195
|
"LongformerTokenizer": TokenOpParam('RobertaTokenizer', HFTokenizerConverter.roberta_tokenizer,
|
|
194
|
-
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
|
|
196
|
+
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None, None),
|
|
195
197
|
"LEDTokenizer": TokenOpParam('RobertaTokenizer', HFTokenizerConverter.roberta_tokenizer,
|
|
196
|
-
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
|
|
198
|
+
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None, None),
|
|
197
199
|
"MvpTokenizer": TokenOpParam('RobertaTokenizer', HFTokenizerConverter.roberta_tokenizer,
|
|
198
|
-
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
|
|
200
|
+
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None, None),
|
|
199
201
|
"T5Tokenizer": TokenOpParam('SentencepieceTokenizer', HFTokenizerConverter.spm_tokenizer,
|
|
200
202
|
'SentencepieceDecoder', HFTokenizerConverter.spm_decoder,
|
|
201
|
-
|
|
203
|
+
default_encoder_inputs={'add_eos': [True]}, default_decoder_inputs=None),
|
|
202
204
|
"LlamaTokenizer": TokenOpParam('SpmTokenizer', HFTokenizerConverter.bpe_tokenizer,
|
|
203
|
-
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None),
|
|
205
|
+
'BpeDecoder', HFTokenizerConverter.bpe_decoder, None, None),
|
|
204
206
|
"XLMRobertaTokenizer": TokenOpParam('SentencepieceTokenizer', HFTokenizerConverter.spm_tokenizer,
|
|
205
207
|
'SentencepieceDecoder', HFTokenizerConverter.spm_decoder,
|
|
206
|
-
|
|
208
|
+
default_encoder_inputs={'add_bos': [True], 'add_eos': [True], 'fairseq': [True]},
|
|
209
|
+
default_decoder_inputs={'fairseq': [True]}),
|
|
207
210
|
}
|
|
208
211
|
# @formatter:on
|
|
209
212
|
|
|
@@ -245,8 +248,8 @@ class HFTokenizerOnnxGraph:
|
|
|
245
248
|
|
|
246
249
|
# add default_inputs into initializers to simplify the model input
|
|
247
250
|
n_inputs = len(default_inputs)
|
|
248
|
-
if self.cvt_quadruple.
|
|
249
|
-
default_inputs.update(self.cvt_quadruple.
|
|
251
|
+
if self.cvt_quadruple.default_encoder_inputs is not None:
|
|
252
|
+
default_inputs.update(self.cvt_quadruple.default_encoder_inputs)
|
|
250
253
|
if len(default_inputs) != n_inputs:
|
|
251
254
|
raise ValueError(
|
|
252
255
|
"Op: {} does not have the inputs from its TokenOpParam.".format(_cvt_op))
|
|
@@ -286,7 +289,43 @@ class HFTokenizerOnnxGraph:
|
|
|
286
289
|
return g
|
|
287
290
|
|
|
288
291
|
def post_processing(self, **kwargs):
|
|
292
|
+
with_default_inputs = kwargs.pop("WITH_DEFAULT_INPUTS", True)
|
|
293
|
+
|
|
289
294
|
_cvt_op = self.cvt_quadruple.post_op
|
|
290
295
|
_cvt_func = self.cvt_quadruple.post_attribute_cvt
|
|
291
296
|
cvt = partial(_cvt_func, self.cvt_obj)
|
|
292
|
-
|
|
297
|
+
g = SingleOpGraph.build_graph(_cvt_op, cvt=cvt, **kwargs)
|
|
298
|
+
|
|
299
|
+
default_inputs = {}
|
|
300
|
+
if with_default_inputs:
|
|
301
|
+
op_class = SingleOpGraph.get_op_class(_cvt_op)
|
|
302
|
+
default_inputs = op_class.input_default_values()
|
|
303
|
+
if default_inputs is None:
|
|
304
|
+
encoder_inputs = self.cvt_quadruple.default_encoder_inputs
|
|
305
|
+
if encoder_inputs is not None and encoder_inputs["fairseq"]:
|
|
306
|
+
default_inputs = {} # need to set to empty dict to call .update later
|
|
307
|
+
else:
|
|
308
|
+
return g
|
|
309
|
+
|
|
310
|
+
# add default_inputs into initializers to simplify the model input
|
|
311
|
+
if self.cvt_quadruple.default_decoder_inputs is not None:
|
|
312
|
+
default_inputs.update(self.cvt_quadruple.default_decoder_inputs)
|
|
313
|
+
|
|
314
|
+
new_initializers = []
|
|
315
|
+
|
|
316
|
+
for k, v in default_inputs.items():
|
|
317
|
+
input_value_info = next((i for i in g.input if i.name == k), None)
|
|
318
|
+
if input_value_info is None:
|
|
319
|
+
raise ValueError(
|
|
320
|
+
"The input {} is not found in the graph".format(k))
|
|
321
|
+
|
|
322
|
+
np_dtype = onnx.helper.tensor_dtype_to_np_dtype(
|
|
323
|
+
input_value_info.type.tensor_type.elem_type)
|
|
324
|
+
value = nparray(v, np_dtype)
|
|
325
|
+
new_initializers.append(onnx.numpy_helper.from_array(value, k))
|
|
326
|
+
g.initializer.extend(new_initializers)
|
|
327
|
+
new_inputs = [i for i in g.input if i.name not in default_inputs]
|
|
328
|
+
g.ClearField("input")
|
|
329
|
+
g.input.extend(new_inputs)
|
|
330
|
+
|
|
331
|
+
return g
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# Generated by setup.py, DON'T MANUALLY UPDATE IT!
|
|
2
|
-
__version__ = "0.
|
|
2
|
+
__version__ = "0.14.0"
|
onnxruntime_extensions/cvt.py
CHANGED
|
@@ -12,6 +12,24 @@ from typing import Union
|
|
|
12
12
|
from ._hf_cvt import HFTokenizerConverter, HFTokenizerOnnxGraph # noqa
|
|
13
13
|
from ._ortapi2 import make_onnx_model, SingleOpGraph
|
|
14
14
|
|
|
15
|
+
import os
|
|
16
|
+
import numpy as np
|
|
17
|
+
import tempfile
|
|
18
|
+
import shutil
|
|
19
|
+
|
|
20
|
+
# edit environment variables to avoid protobuf version mismatch
|
|
21
|
+
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
|
|
22
|
+
|
|
23
|
+
from transformers.convert_slow_tokenizer import SpmConverter # noqa: E402
|
|
24
|
+
from transformers import AutoTokenizer # noqa: E402
|
|
25
|
+
from tokenizers import decoders, normalizers, pre_tokenizers, Regex # noqa: E402
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
OrtxTokenizer = None
|
|
29
|
+
try:
|
|
30
|
+
from onnxruntime_extensions.pp_api import Tokenizer as OrtxTokenizer
|
|
31
|
+
except ImportError:
|
|
32
|
+
pass
|
|
15
33
|
|
|
16
34
|
_is_torch_available = False
|
|
17
35
|
try:
|
|
@@ -24,11 +42,150 @@ except ImportError:
|
|
|
24
42
|
|
|
25
43
|
_PRE_POST_PAIR = {'TrieTokenizer': "TrieDetokenizer"}
|
|
26
44
|
|
|
45
|
+
def _get_prepend_scheme(add_prefix_space: bool, original_tokenizer) -> str:
|
|
46
|
+
if add_prefix_space:
|
|
47
|
+
prepend_scheme = "always"
|
|
48
|
+
if not getattr(original_tokenizer, "legacy", True):
|
|
49
|
+
prepend_scheme = "first"
|
|
50
|
+
else:
|
|
51
|
+
prepend_scheme = "never"
|
|
52
|
+
return prepend_scheme
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class Baichuan2Converter(SpmConverter):
|
|
56
|
+
handle_byte_fallback = True
|
|
57
|
+
|
|
58
|
+
def __init__(self, original_tokenizer):
|
|
59
|
+
super().__init__(original_tokenizer)
|
|
60
|
+
original_tokenizer.add_prefix_space = False
|
|
61
|
+
|
|
62
|
+
def vocab(self, proto):
|
|
63
|
+
vocab = [
|
|
64
|
+
(self.original_tokenizer.convert_ids_to_tokens(0), 0.0),
|
|
65
|
+
(self.original_tokenizer.convert_ids_to_tokens(1), 0.0),
|
|
66
|
+
(self.original_tokenizer.convert_ids_to_tokens(2), 0.0),
|
|
67
|
+
]
|
|
68
|
+
vocab += [(piece.piece, piece.score) for piece in proto.pieces[3:]]
|
|
69
|
+
return vocab
|
|
70
|
+
|
|
71
|
+
def unk_id(self, proto):
|
|
72
|
+
unk_id = 0
|
|
73
|
+
return unk_id
|
|
74
|
+
|
|
75
|
+
def decoder(self, replacement, add_prefix_space):
|
|
76
|
+
sequence = [
|
|
77
|
+
decoders.Replace("▁", " "),
|
|
78
|
+
decoders.ByteFallback(),
|
|
79
|
+
decoders.Fuse(),
|
|
80
|
+
]
|
|
81
|
+
if add_prefix_space:
|
|
82
|
+
sequence += [decoders.Strip(content=" ", left=1)]
|
|
83
|
+
return decoders.Sequence(sequence)
|
|
84
|
+
|
|
85
|
+
def normalizer(self, proto):
|
|
86
|
+
if getattr(self.original_tokenizer, "legacy", True):
|
|
87
|
+
sequence = []
|
|
88
|
+
if getattr(self.original_tokenizer, "add_prefix_space", True):
|
|
89
|
+
sequence += [normalizers.Prepend(prepend="▁")]
|
|
90
|
+
sequence += [normalizers.Replace(pattern=" ", content="▁")]
|
|
91
|
+
return normalizers.Sequence(sequence)
|
|
92
|
+
return None # non-legacy, no normalizer
|
|
93
|
+
|
|
94
|
+
def pre_tokenizer(self, replacement, add_prefix_space):
|
|
95
|
+
if not getattr(self.original_tokenizer, "legacy", True): # non-legacy, we need a replace
|
|
96
|
+
prepend_scheme = _get_prepend_scheme(add_prefix_space, self.original_tokenizer)
|
|
97
|
+
return pre_tokenizers.Metaspace(replacement=replacement, prepend_scheme=prepend_scheme, split=False)
|
|
98
|
+
else:
|
|
99
|
+
return super().pre_tokenizer(replacement, add_prefix_space)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class ChatGlmConverter(SpmConverter):
|
|
103
|
+
def normalizer(self, proto):
|
|
104
|
+
precompiled_charsmap = proto.normalizer_spec.precompiled_charsmap
|
|
105
|
+
_normalizers = [
|
|
106
|
+
normalizers.Strip(left=False, right=True), # stripping is important
|
|
107
|
+
normalizers.Replace(Regex(" {2,}"), "▁"),
|
|
108
|
+
]
|
|
109
|
+
return normalizers.Sequence([normalizers.Precompiled(precompiled_charsmap)] + _normalizers)
|
|
110
|
+
|
|
111
|
+
def pre_tokenizer(self, replacement, add_prefix_space):
|
|
112
|
+
prepend_scheme = "always"
|
|
113
|
+
if hasattr(self.original_tokenizer, "legacy") and not self.original_tokenizer.legacy:
|
|
114
|
+
prepend_scheme = "first"
|
|
115
|
+
return pre_tokenizers.Metaspace(
|
|
116
|
+
replacement=replacement, add_prefix_space=add_prefix_space, prepend_scheme=prepend_scheme
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
JSON_TOKEN_CONVERTERS = {
|
|
121
|
+
"BaichuanTokenizer": Baichuan2Converter,
|
|
122
|
+
"ChatGLMTokenizer": ChatGlmConverter,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
# Save tokenizer JSON files using HuggingFace AutoTokenizer
|
|
126
|
+
def convert_tokenizer(model_path, output_dir):
|
|
127
|
+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
128
|
+
if output_dir is None:
|
|
129
|
+
if os.path.isdir(model_path):
|
|
130
|
+
output_dir = model_path
|
|
131
|
+
else:
|
|
132
|
+
# create a temporary directory
|
|
133
|
+
output_dir = tempfile.mkdtemp()
|
|
134
|
+
tokenizer.save_pretrained(output_dir)
|
|
135
|
+
json_path = os.path.join(output_dir, "tokenizer.json")
|
|
136
|
+
|
|
137
|
+
if type(tokenizer).__name__ in JSON_TOKEN_CONVERTERS:
|
|
138
|
+
GenericSpmConverter = JSON_TOKEN_CONVERTERS[type(tokenizer).__name__]
|
|
139
|
+
|
|
140
|
+
converted = GenericSpmConverter(tokenizer).converted()
|
|
141
|
+
converted.save(json_path)
|
|
142
|
+
print(f"**Tokenizer saved to {json_path}")
|
|
143
|
+
return output_dir
|
|
144
|
+
|
|
145
|
+
# Validate tokenizer files downloaded from memory
|
|
146
|
+
def validate_tokenizer(model_path, output_dir):
|
|
147
|
+
test_sentence = "I like walking my cute dog\n and\x17 then, 生活的真谛是 \t\t\t\t \n\n61"
|
|
148
|
+
if OrtxTokenizer is None:
|
|
149
|
+
print("onnxruntime_extensions package was built with C API enabled, skipping tokenization test")
|
|
150
|
+
ortx_tokenizer = OrtxTokenizer(output_dir)
|
|
151
|
+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False)
|
|
152
|
+
expected_ids = tokenizer(test_sentence, return_tensors="np")["input_ids"]
|
|
153
|
+
ortx_ids = np.asarray(ortx_tokenizer.tokenize(test_sentence))
|
|
154
|
+
assert np.array_equal(expected_ids[0], ortx_ids), f"Tokenization mismatch: {expected_ids[0]} != {ortx_ids}"
|
|
155
|
+
print("Tokenization test passed")
|
|
156
|
+
|
|
157
|
+
# Download tokenizer JSON files from memory
|
|
158
|
+
def download_tokenizer(tokenizer_dir, output_dir):
|
|
159
|
+
try:
|
|
160
|
+
from transformers.utils import cached_file
|
|
161
|
+
|
|
162
|
+
resolved_full_file = cached_file(tokenizer_dir, "tokenizer.json")
|
|
163
|
+
resolved_config_file = cached_file(tokenizer_dir, "tokenizer_config.json")
|
|
164
|
+
except ImportError:
|
|
165
|
+
raise ValueError(f"Directory '{tokenizer_dir}' not found and transformers is not available")
|
|
166
|
+
if not os.path.exists(resolved_full_file):
|
|
167
|
+
raise FileNotFoundError(f"Downloaded HF file '{resolved_full_file}' cannot be found")
|
|
168
|
+
if os.path.dirname(resolved_full_file) != os.path.dirname(resolved_config_file):
|
|
169
|
+
raise FileNotFoundError(
|
|
170
|
+
f"Downloaded HF files '{resolved_full_file}' " f"and '{resolved_config_file}' are not in the same directory"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if output_dir is None or len(output_dir) == 0:
|
|
174
|
+
output_dir = os.path.dirname(resolved_full_file)
|
|
175
|
+
print(f"Using {output_dir} as output directory")
|
|
176
|
+
return output_dir
|
|
177
|
+
else:
|
|
178
|
+
# copy the files to the output directory
|
|
179
|
+
shutil.copy(resolved_full_file, output_dir)
|
|
180
|
+
shutil.copy(resolved_config_file, output_dir)
|
|
181
|
+
return output_dir
|
|
182
|
+
|
|
27
183
|
|
|
28
184
|
def gen_processing_models(processor: Union[str, object],
|
|
29
185
|
pre_kwargs: dict = None,
|
|
30
186
|
post_kwargs: dict = None,
|
|
31
187
|
opset: int = None,
|
|
188
|
+
schema_v2: bool = False,
|
|
32
189
|
**kwargs):
|
|
33
190
|
"""
|
|
34
191
|
Generate the pre- and post-processing ONNX model, basing on the name or HF class.
|
|
@@ -47,6 +204,9 @@ def gen_processing_models(processor: Union[str, object],
|
|
|
47
204
|
Keyword arguments for generating the post-processing model
|
|
48
205
|
opset: int
|
|
49
206
|
the target opset version of the model
|
|
207
|
+
schema_v2: bool
|
|
208
|
+
the flag for using embedded tokenizer files; this option leverages the blob-loading functionality
|
|
209
|
+
which loads HF tokenizers from memory rather than using the tokenizer files in HF JSON format.
|
|
50
210
|
kwargs:
|
|
51
211
|
The additional arguments for generating models
|
|
52
212
|
|
|
@@ -58,11 +218,42 @@ def gen_processing_models(processor: Union[str, object],
|
|
|
58
218
|
if pre_kwargs is None and post_kwargs is None:
|
|
59
219
|
raise ValueError(
|
|
60
220
|
"Either pre_kwargs or post_kwargs should be provided. None means no processing graph output.")
|
|
61
|
-
|
|
221
|
+
|
|
222
|
+
# If true, we get the tokenizer JSON files by either downloading from cache or using HuggingFace AutoTokenizer
|
|
223
|
+
# to convert them, and then create an ONNX model with the JSON files as strings in the model attributes (attrs).
|
|
224
|
+
if schema_v2:
|
|
225
|
+
model_name = processor if isinstance(processor, str) else type(processor).__name__
|
|
226
|
+
|
|
227
|
+
converted_tokenizer = {"Baichuan2", "chatglm"}
|
|
228
|
+
need_convert = False
|
|
229
|
+
for token in converted_tokenizer:
|
|
230
|
+
if model_name.find(token) != -1:
|
|
231
|
+
need_convert = True
|
|
232
|
+
break
|
|
233
|
+
|
|
234
|
+
if need_convert:
|
|
235
|
+
model_dir = convert_tokenizer(model_name)
|
|
236
|
+
validate_tokenizer(model_name, None)
|
|
237
|
+
else:
|
|
238
|
+
model_dir = download_tokenizer(model_name, None)
|
|
239
|
+
|
|
240
|
+
# Load the content of tokenizer.json into a string
|
|
241
|
+
with open(f"{model_dir}/tokenizer.json", "r", encoding="utf-8") as f:
|
|
242
|
+
tokenizer_vocab = f.read()
|
|
243
|
+
|
|
244
|
+
# Load the content of tokenizer_config.json into a string
|
|
245
|
+
with open(f"{model_dir}/tokenizer_config.json", "r", encoding="utf-8") as f:
|
|
246
|
+
tokenizer_config = f.read()
|
|
247
|
+
|
|
248
|
+
# Create an ONNX model with these JSON file strings in attrs
|
|
62
249
|
g_pre, g_post = (None, None)
|
|
63
|
-
if pre_kwargs:
|
|
64
|
-
|
|
65
|
-
|
|
250
|
+
if pre_kwargs is not None:
|
|
251
|
+
# Add tokenizer_vocab and tokenizer_config to the kwargs
|
|
252
|
+
# so they are added to attrs in build_graph
|
|
253
|
+
pre_kwargs['tokenizer_vocab'] = tokenizer_vocab
|
|
254
|
+
pre_kwargs['tokenizer_config'] = tokenizer_config
|
|
255
|
+
g_pre = SingleOpGraph.build_graph("HfJsonTokenizer", **pre_kwargs)
|
|
256
|
+
if post_kwargs is not None:
|
|
66
257
|
if pre_kwargs is None:
|
|
67
258
|
cls_name = processor
|
|
68
259
|
else:
|
|
@@ -70,27 +261,46 @@ def gen_processing_models(processor: Union[str, object],
|
|
|
70
261
|
raise RuntimeError(
|
|
71
262
|
f"Cannot locate the post processing operator name from {processor}")
|
|
72
263
|
cls_name = _PRE_POST_PAIR[processor]
|
|
264
|
+
# Add tokenizer_vocab and tokenizer_config to the kwargs
|
|
265
|
+
# so they are added to attrs in build_graph
|
|
266
|
+
post_kwargs['tokenizer_vocab'] = tokenizer_vocab
|
|
267
|
+
post_kwargs['tokenizer_config'] = tokenizer_config
|
|
73
268
|
g_post = SingleOpGraph.build_graph(cls_name, **post_kwargs)
|
|
74
269
|
return make_onnx_model(g_pre) if g_pre else None, make_onnx_model(g_post) if g_post else None
|
|
75
|
-
|
|
76
|
-
cls_name = type(processor).__name__
|
|
77
|
-
if cls_name == "WhisperProcessor":
|
|
78
|
-
if WhisperDataProcGraph is None:
|
|
79
|
-
raise ValueError(
|
|
80
|
-
"The Whisper processor needs torch.onnx support, please install pytorch 2.0 and above")
|
|
81
|
-
_converter = WhisperDataProcGraph(processor, opset=opset, **kwargs)
|
|
82
|
-
pre_m = _converter.pre_processing(
|
|
83
|
-
**pre_kwargs) if pre_kwargs is not None else None
|
|
84
|
-
post_m = _converter.post_processing(
|
|
85
|
-
**post_kwargs) if post_kwargs is not None else None
|
|
86
|
-
return pre_m, post_m
|
|
87
|
-
elif HFTokenizerOnnxGraph.is_supported(processor):
|
|
88
|
-
_converter = HFTokenizerOnnxGraph(processor)
|
|
89
|
-
pre_g = _converter.pre_processing(
|
|
90
|
-
**pre_kwargs) if pre_kwargs is not None else None
|
|
91
|
-
post_g = _converter.post_processing(
|
|
92
|
-
**post_kwargs) if post_kwargs is not None else None
|
|
93
|
-
return make_onnx_model(pre_g) if pre_g else None, \
|
|
94
|
-
make_onnx_model(post_g) if post_g else None
|
|
95
270
|
else:
|
|
96
|
-
|
|
271
|
+
if isinstance(processor, str):
|
|
272
|
+
g_pre, g_post = (None, None)
|
|
273
|
+
if pre_kwargs:
|
|
274
|
+
g_pre = SingleOpGraph.build_graph(processor, **pre_kwargs)
|
|
275
|
+
if post_kwargs:
|
|
276
|
+
if pre_kwargs is None:
|
|
277
|
+
cls_name = processor
|
|
278
|
+
else:
|
|
279
|
+
if processor not in _PRE_POST_PAIR:
|
|
280
|
+
raise RuntimeError(
|
|
281
|
+
f"Cannot locate the post processing operator name from {processor}")
|
|
282
|
+
cls_name = _PRE_POST_PAIR[processor]
|
|
283
|
+
g_post = SingleOpGraph.build_graph(cls_name, **post_kwargs)
|
|
284
|
+
return make_onnx_model(g_pre) if g_pre else None, make_onnx_model(g_post) if g_post else None
|
|
285
|
+
|
|
286
|
+
cls_name = type(processor).__name__
|
|
287
|
+
if cls_name == "WhisperProcessor":
|
|
288
|
+
if WhisperDataProcGraph is None:
|
|
289
|
+
raise ValueError(
|
|
290
|
+
"The Whisper processor needs torch.onnx support, please install pytorch 2.0 and above")
|
|
291
|
+
_converter = WhisperDataProcGraph(processor, opset=opset, **kwargs)
|
|
292
|
+
pre_m = _converter.pre_processing(
|
|
293
|
+
**pre_kwargs) if pre_kwargs is not None else None
|
|
294
|
+
post_m = _converter.post_processing(
|
|
295
|
+
**post_kwargs) if post_kwargs is not None else None
|
|
296
|
+
return pre_m, post_m
|
|
297
|
+
elif HFTokenizerOnnxGraph.is_supported(processor):
|
|
298
|
+
_converter = HFTokenizerOnnxGraph(processor)
|
|
299
|
+
pre_g = _converter.pre_processing(
|
|
300
|
+
**pre_kwargs) if pre_kwargs is not None else None
|
|
301
|
+
post_g = _converter.post_processing(
|
|
302
|
+
**post_kwargs) if post_kwargs is not None else None
|
|
303
|
+
return make_onnx_model(pre_g) if pre_g else None, \
|
|
304
|
+
make_onnx_model(post_g) if post_g else None
|
|
305
|
+
else:
|
|
306
|
+
raise ValueError(f"Unsupported processor/tokenizer: {cls_name}")
|
onnxruntime_extensions/pp_api.py
CHANGED
|
@@ -3,11 +3,81 @@
|
|
|
3
3
|
# license information.
|
|
4
4
|
###############################################################################
|
|
5
5
|
|
|
6
|
+
import os
|
|
6
7
|
from . import _extensions_pydll as _C
|
|
7
|
-
if not hasattr(_C, "
|
|
8
|
-
raise ImportError(
|
|
8
|
+
if not hasattr(_C, "delete_object"):
|
|
9
|
+
raise ImportError(
|
|
10
|
+
"onnxruntime_extensions is not built with pre-processing C API\n"
|
|
11
|
+
"To enable it, please build the package with --ortx-user-option=pp_api")
|
|
9
12
|
|
|
10
13
|
create_processor = _C.create_processor
|
|
11
14
|
load_images = _C.load_images
|
|
12
15
|
image_pre_process = _C.image_pre_process
|
|
13
16
|
tensor_result_get_at = _C.tensor_result_get_at
|
|
17
|
+
|
|
18
|
+
create_tokenizer = _C.create_tokenizer
|
|
19
|
+
batch_tokenize = _C.batch_tokenize
|
|
20
|
+
batch_detokenize = _C.batch_detokenize
|
|
21
|
+
|
|
22
|
+
delete_object = _C.delete_object
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Tokenizer:
|
|
26
|
+
def __init__(self, tokenizer_dir):
|
|
27
|
+
self.tokenizer = None
|
|
28
|
+
if os.path.isdir(tokenizer_dir):
|
|
29
|
+
self.tokenizer = create_tokenizer(tokenizer_dir)
|
|
30
|
+
else:
|
|
31
|
+
try:
|
|
32
|
+
from transformers.utils import cached_file
|
|
33
|
+
resolved_full_file = cached_file(
|
|
34
|
+
tokenizer_dir, "tokenizer.json")
|
|
35
|
+
resolved_config_file = cached_file(
|
|
36
|
+
tokenizer_dir, "tokenizer_config.json")
|
|
37
|
+
except ImportError:
|
|
38
|
+
raise ValueError(
|
|
39
|
+
f"Directory '{tokenizer_dir}' not found and transformers is not available")
|
|
40
|
+
if not os.path.exists(resolved_full_file):
|
|
41
|
+
raise FileNotFoundError(
|
|
42
|
+
f"Downloaded HF file '{resolved_full_file}' cannot be found")
|
|
43
|
+
if (os.path.dirname(resolved_full_file) != os.path.dirname(resolved_config_file)):
|
|
44
|
+
raise FileNotFoundError(
|
|
45
|
+
f"Downloaded HF files '{resolved_full_file}' "
|
|
46
|
+
f"and '{resolved_config_file}' are not in the same directory")
|
|
47
|
+
|
|
48
|
+
tokenizer_dir = os.path.dirname(resolved_full_file)
|
|
49
|
+
self.tokenizer = create_tokenizer(tokenizer_dir)
|
|
50
|
+
|
|
51
|
+
def tokenize(self, text):
|
|
52
|
+
if isinstance(text, (list, tuple)):
|
|
53
|
+
return batch_tokenize(self.tokenizer, text)
|
|
54
|
+
return batch_tokenize(self.tokenizer, [text])[0]
|
|
55
|
+
|
|
56
|
+
def detokenize(self, tokens):
|
|
57
|
+
return batch_detokenize(self.tokenizer, [tokens])
|
|
58
|
+
|
|
59
|
+
def __del__(self):
|
|
60
|
+
if delete_object and self.tokenizer:
|
|
61
|
+
delete_object(self.tokenizer)
|
|
62
|
+
self.tokenizer = None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class ImageProcessor:
|
|
66
|
+
def __init__(self, processor_json):
|
|
67
|
+
self.processor = create_processor(processor_json)
|
|
68
|
+
|
|
69
|
+
def pre_process(self, images):
|
|
70
|
+
if isinstance(images, str):
|
|
71
|
+
images = [images]
|
|
72
|
+
if isinstance(images, list):
|
|
73
|
+
images = load_images(images)
|
|
74
|
+
return image_pre_process(self.processor, images)
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def to_numpy(result, idx):
|
|
78
|
+
return tensor_result_get_at(result, idx)
|
|
79
|
+
|
|
80
|
+
def __del__(self):
|
|
81
|
+
if delete_object and self.processor:
|
|
82
|
+
delete_object(self.processor)
|
|
83
|
+
self.processor = None
|
|
@@ -163,7 +163,8 @@ def superresolution(model_file: Path, output_file: Path, output_format: str, onn
|
|
|
163
163
|
|
|
164
164
|
|
|
165
165
|
def yolo_detection(model_file: Path, output_file: Path, output_format: str = 'jpg',
|
|
166
|
-
onnx_opset: int = 16, num_classes: int = 80, input_shape: List[int] = None
|
|
166
|
+
onnx_opset: int = 16, num_classes: int = 80, input_shape: List[int] = None,
|
|
167
|
+
output_as_image: bool = True):
|
|
167
168
|
"""
|
|
168
169
|
SSD-like model and Faster-RCNN-like model are including NMS inside already, You can find it from onnx model zoo.
|
|
169
170
|
|
|
@@ -185,6 +186,7 @@ def yolo_detection(model_file: Path, output_file: Path, output_format: str = 'jp
|
|
|
185
186
|
:param onnx_opset: The opset version of onnx model, default(16).
|
|
186
187
|
:param num_classes: The number of classes, default(80).
|
|
187
188
|
:param input_shape: The shape of input image (height,width), default will be asked from model input.
|
|
189
|
+
:param output_as_image: The flag that means that the model should have the image with boxes instead of the coordinates of the boxess
|
|
188
190
|
"""
|
|
189
191
|
model = onnx.load(str(model_file.resolve(strict=True)))
|
|
190
192
|
inputs = [create_named_value("image", onnx.TensorProto.UINT8, ["num_bytes"])]
|
|
@@ -284,19 +286,23 @@ Because we need to execute the model to determine the output shape in order to a
|
|
|
284
286
|
utils.IoMapEntry("Resize", producer_idx=0, consumer_idx=2),
|
|
285
287
|
utils.IoMapEntry("LetterBox", producer_idx=0, consumer_idx=3),
|
|
286
288
|
]),
|
|
287
|
-
# DrawBoundingBoxes on the original image
|
|
288
|
-
# Model imported from pytorch has CENTER_XYWH format
|
|
289
|
-
# two mode for how to color box,
|
|
290
|
-
# 1. colour_by_classes=True, (colour_by_classes), 2. colour_by_classes=False,(colour_by_confidence)
|
|
291
|
-
(DrawBoundingBoxes(mode='CENTER_XYWH', num_classes=num_classes, colour_by_classes=True),
|
|
292
|
-
[
|
|
293
|
-
utils.IoMapEntry("ConvertImageToBGR", producer_idx=0, consumer_idx=0),
|
|
294
|
-
utils.IoMapEntry("ScaleBoundingBoxes", producer_idx=0, consumer_idx=1),
|
|
295
|
-
]),
|
|
296
|
-
# Encode to jpg/png
|
|
297
|
-
ConvertBGRToImage(image_format=output_format),
|
|
298
289
|
]
|
|
299
290
|
|
|
291
|
+
if output_as_image:
|
|
292
|
+
post_processing_steps += [
|
|
293
|
+
# DrawBoundingBoxes on the original image
|
|
294
|
+
# Model imported from pytorch has CENTER_XYWH format
|
|
295
|
+
# two mode for how to color box,
|
|
296
|
+
# 1. colour_by_classes=True, (colour_by_classes), 2. colour_by_classes=False,(colour_by_confidence)
|
|
297
|
+
(DrawBoundingBoxes(mode='CENTER_XYWH', num_classes=num_classes, colour_by_classes=True),
|
|
298
|
+
[
|
|
299
|
+
utils.IoMapEntry("ConvertImageToBGR", producer_idx=0, consumer_idx=0),
|
|
300
|
+
utils.IoMapEntry("ScaleBoundingBoxes", producer_idx=0, consumer_idx=1),
|
|
301
|
+
]),
|
|
302
|
+
# Encode to jpg/png
|
|
303
|
+
ConvertBGRToImage(image_format=output_format),
|
|
304
|
+
]
|
|
305
|
+
|
|
300
306
|
pipeline.add_post_processing(post_processing_steps)
|
|
301
307
|
|
|
302
308
|
new_model = pipeline.run(model)
|
{onnxruntime_extensions-0.12.0.dist-info → onnxruntime_extensions-0.14.0.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: onnxruntime_extensions
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.14.0
|
|
4
4
|
Summary: ONNXRuntime Extensions
|
|
5
5
|
Home-page: https://github.com/microsoft/onnxruntime-extensions
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -18,6 +18,14 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
|
18
18
|
Classifier: License :: OSI Approved :: MIT License
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
|
+
Dynamic: author
|
|
22
|
+
Dynamic: author-email
|
|
23
|
+
Dynamic: classifier
|
|
24
|
+
Dynamic: description
|
|
25
|
+
Dynamic: description-content-type
|
|
26
|
+
Dynamic: home-page
|
|
27
|
+
Dynamic: license
|
|
28
|
+
Dynamic: summary
|
|
21
29
|
|
|
22
30
|
# ONNXRuntime-Extensions
|
|
23
31
|
|
|
@@ -25,29 +33,17 @@ License-File: LICENSE
|
|
|
25
33
|
|
|
26
34
|
## What's ONNXRuntime-Extensions
|
|
27
35
|
|
|
28
|
-
Introduction: ONNXRuntime-Extensions is a library that extends the capability of the ONNX models and inference with ONNX Runtime, via ONNX Runtime Custom Operator ABIs. It includes a set of [ONNX Runtime Custom Operator](https://onnxruntime.ai/docs/reference/operators/add-custom-op.html) to support the common pre- and post-processing operators for vision, text, and nlp models. And it supports multiple languages and platforms, like Python on Windows/Linux/macOS, some mobile platforms like Android and iOS, and Web-Assembly etc. The basic workflow is to enhance a ONNX model firstly and then do the model inference with ONNX Runtime and ONNXRuntime-Extensions package.
|
|
36
|
+
Introduction: ONNXRuntime-Extensions is a C/C++ library that extends the capability of the ONNX models and inference with ONNX Runtime, via ONNX Runtime Custom Operator ABIs. It includes a set of [ONNX Runtime Custom Operator](https://onnxruntime.ai/docs/reference/operators/add-custom-op.html) to support the common pre- and post-processing operators for vision, text, and nlp models. And it supports multiple languages and platforms, like Python on Windows/Linux/macOS, some mobile platforms like Android and iOS, and Web-Assembly etc. The basic workflow is to enhance a ONNX model firstly and then do the model inference with ONNX Runtime and ONNXRuntime-Extensions package.
|
|
29
37
|
|
|
30
38
|
|
|
31
39
|
## Quickstart
|
|
40
|
+
The library can be utilized as either a C/C++ library or other advance language packages like Python, Java, C#, etc. To build it as a shared library, you can use the `build.bat` or `build.sh` scripts located in the root folder. The CMake build definition is available in the `CMakeLists.txt` file and can be modified by appending options to `build.bat` or `build.sh`, such as `build.bat -DOCOS_BUILD_SHARED_LIB=OFF`. For more details, please refer to the [C API documentation](./docs/c_api.md).
|
|
32
41
|
|
|
33
42
|
### **Python installation**
|
|
34
43
|
```bash
|
|
35
44
|
pip install onnxruntime-extensions
|
|
36
45
|
````
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
### **Nightly Build**
|
|
40
|
-
|
|
41
|
-
#### <strong>on Windows</strong>
|
|
42
|
-
```cmd
|
|
43
|
-
pip install --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ onnxruntime-extensions
|
|
44
|
-
```
|
|
45
|
-
Please ensure that you have met the prerequisites of onnxruntime-extensions (e.g., onnx and onnxruntime) in your Python environment.
|
|
46
|
-
#### <strong>on Linux/macOS</strong>
|
|
47
|
-
Please make sure the compiler toolkit like gcc(later than g++ 8.0) or clang are installed before the following command
|
|
48
|
-
```bash
|
|
49
|
-
python -m pip install git+https://github.com/microsoft/onnxruntime-extensions.git
|
|
50
|
-
```
|
|
46
|
+
The nightly build is also available for the latest features, please refer to [nightly build](./docs/development.md#nightly-build)
|
|
51
47
|
|
|
52
48
|
|
|
53
49
|
## Usage
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
onnxruntime_extensions/__init__.py,sha256=GMnMIHJ-uqvJGPn5fpCZOi7OG16kFVpfOTTO88kYJWY,2387
|
|
2
|
-
onnxruntime_extensions/_cuops.py,sha256=
|
|
3
|
-
onnxruntime_extensions/_extensions_pydll.cp39-win_amd64.pyd,sha256=
|
|
2
|
+
onnxruntime_extensions/_cuops.py,sha256=W4hmBNoNvFk84V4UAUpltGNFjzcf0fju3iCeuatqXHE,16661
|
|
3
|
+
onnxruntime_extensions/_extensions_pydll.cp39-win_amd64.pyd,sha256=0BtiZfy7lditfJwj-cOFbyntPzDZcERTbsHiVD7tiko,1900032
|
|
4
4
|
onnxruntime_extensions/_extensions_pydll.pyi,sha256=mYXkqNaCgAbs161RDKgDjxIX9vWdYdVPDC-0X9cieco,1070
|
|
5
|
-
onnxruntime_extensions/_hf_cvt.py,sha256=
|
|
5
|
+
onnxruntime_extensions/_hf_cvt.py,sha256=7-nV40_lCydWHBMXUkfe3oaJSI7l0SDQdLT92yZG2oc,15945
|
|
6
6
|
onnxruntime_extensions/_ocos.py,sha256=OlDOlCH_vWFOBkjbp6Pujgw6rgk8Fd3_2Mi5ev1eeS0,4193
|
|
7
7
|
onnxruntime_extensions/_ortapi2.py,sha256=Tfrf9fQMQ0e7Wa4R8s4SHdwMNBdmj33wH3y5vMkVVQE,9951
|
|
8
8
|
onnxruntime_extensions/_torch_cvt.py,sha256=hGOiw24QuFpK_3CLjg8Fs2GD_cCdM049xcJxkHVRbAk,10185
|
|
9
|
-
onnxruntime_extensions/_version.py,sha256=
|
|
9
|
+
onnxruntime_extensions/_version.py,sha256=u5KwYLG4_oeOTmNuRw2dLiPJ5hByZa12xh0VGidbJMU,76
|
|
10
10
|
onnxruntime_extensions/cmd.py,sha256=eIiNNY0ohbUCPgmr9RwOfi0Gzw7nWL17i625L-ZKezI,2428
|
|
11
|
-
onnxruntime_extensions/cvt.py,sha256=
|
|
12
|
-
onnxruntime_extensions/pp_api.py,sha256
|
|
11
|
+
onnxruntime_extensions/cvt.py,sha256=2cPsKj4weGDveV36mtoQ9yVUfjtqmFNUpFghrsppXOg,13409
|
|
12
|
+
onnxruntime_extensions/pp_api.py,sha256=Fk1iEMPwcnr84V9ALhr-zuMPNi_fyIMPTrKPeOQooZs,3157
|
|
13
13
|
onnxruntime_extensions/util.py,sha256=KxNFY0-5CG1i9HADcCc4V33PNukTO46Os_KIL8pj-l8,7394
|
|
14
14
|
onnxruntime_extensions/onnxprocess/__init__.py,sha256=BnveHXnu2nTQNbCLeZujZgZwO9A3yWFbQGTDthCFbIc,534
|
|
15
15
|
onnxruntime_extensions/onnxprocess/_builder.py,sha256=L_afKeE7Wc4mWJ47eVXQ2stvmal_37QVTQZgKmt0ZK8,1844
|
|
@@ -27,7 +27,7 @@ onnxruntime_extensions/pnp/_unifier.py,sha256=FPQYL1Z6f1Tv2qRsnhW_is9k7-GmCYhf6Z
|
|
|
27
27
|
onnxruntime_extensions/pnp/_utils.py,sha256=xBh7-_VstgqXlhBaQ_6E5GV6341ywCRQsrJZZZtYaCc,13061
|
|
28
28
|
onnxruntime_extensions/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
onnxruntime_extensions/tools/add_HuggingFace_CLIPImageProcessor_to_model.py,sha256=iNGAd9Ym0iKDQkXdWdka-R3S47TT3hMTihdGXg0uHL0,6786
|
|
30
|
-
onnxruntime_extensions/tools/add_pre_post_processing_to_model.py,sha256=
|
|
30
|
+
onnxruntime_extensions/tools/add_pre_post_processing_to_model.py,sha256=M2dSO2FdLo1Hs0GDVqYmKxmWDj7BsKCiyhpqxmCdDWg,24301
|
|
31
31
|
onnxruntime_extensions/tools/pre_post_processing/__init__.py,sha256=YKxCtG2McBExYYmcf1tbqDquqIS1iTs4iPx86MBcfRo,125
|
|
32
32
|
onnxruntime_extensions/tools/pre_post_processing/pre_post_processor.py,sha256=lnQ4TUKkZ-TvVC8U_ov3Nsz9gzES0ktnmD-DPTzutPA,19635
|
|
33
33
|
onnxruntime_extensions/tools/pre_post_processing/step.py,sha256=SYFxtrDmXyFpnnlPl4c49Yg1THFZvh5Y9NwuvquHTVg,9394
|
|
@@ -36,8 +36,8 @@ onnxruntime_extensions/tools/pre_post_processing/steps/__init__.py,sha256=pdVRZB
|
|
|
36
36
|
onnxruntime_extensions/tools/pre_post_processing/steps/general.py,sha256=fF_XVFSKOCu482Sqjp-nVPbs-ZVGpPal2ekbO1gUO_4,13781
|
|
37
37
|
onnxruntime_extensions/tools/pre_post_processing/steps/nlp.py,sha256=ZCxRNxqfANplxCe0I-6BfHziM1jDYJsNQKbHdM3Y1I0,15173
|
|
38
38
|
onnxruntime_extensions/tools/pre_post_processing/steps/vision.py,sha256=BM6CGylOSu4l6UarPfW0I2tgkJDa1Q-gYz__CxZle-k,53183
|
|
39
|
-
onnxruntime_extensions-0.
|
|
40
|
-
onnxruntime_extensions-0.
|
|
41
|
-
onnxruntime_extensions-0.
|
|
42
|
-
onnxruntime_extensions-0.
|
|
43
|
-
onnxruntime_extensions-0.
|
|
39
|
+
onnxruntime_extensions-0.14.0.dist-info/LICENSE,sha256=mQaUD2Gx8LUz-n2ZuvVReLKAj74RPqUd-_rYVyzNXys,1162
|
|
40
|
+
onnxruntime_extensions-0.14.0.dist-info/METADATA,sha256=udEUfhboh5qT4rtnEBg94FkkPmjjAYRJx9rWfg5ZrJg,4657
|
|
41
|
+
onnxruntime_extensions-0.14.0.dist-info/WHEEL,sha256=agy-BJge3afXwWznUXANATmKFW4eqelqRR0uf608A_0,99
|
|
42
|
+
onnxruntime_extensions-0.14.0.dist-info/top_level.txt,sha256=XyAgQDKyXsf6_0MJb58kRdHwigpTn7A7kl9diBEjs8M,23
|
|
43
|
+
onnxruntime_extensions-0.14.0.dist-info/RECORD,,
|
|
File without changes
|
{onnxruntime_extensions-0.12.0.dist-info → onnxruntime_extensions-0.14.0.dist-info}/top_level.txt
RENAMED
|
File without changes
|