clarifai 11.1.7rc3__py3-none-any.whl → 11.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/__init__.py +1 -1
- clarifai/cli/base.py +18 -7
- clarifai/cli/compute_cluster.py +8 -1
- clarifai/cli/deployment.py +5 -1
- clarifai/cli/model.py +25 -38
- clarifai/cli/nodepool.py +4 -1
- clarifai/client/model.py +393 -157
- clarifai/runners/__init__.py +7 -2
- clarifai/runners/models/model_builder.py +12 -80
- clarifai/runners/models/model_class.py +28 -279
- clarifai/runners/models/model_run_locally.py +88 -19
- clarifai/runners/models/model_runner.py +0 -2
- clarifai/runners/models/model_servicer.py +2 -11
- clarifai/runners/utils/data_handler.py +210 -271
- clarifai/utils/cli.py +9 -0
- {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info}/METADATA +16 -4
- clarifai-11.2.1.dist-info/RECORD +101 -0
- {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info}/WHEEL +1 -1
- clarifai/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/__pycache__/errors.cpython-310.pyc +0 -0
- clarifai/__pycache__/versions.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/compute_cluster.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/deployment.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/nodepool.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/compute_cluster.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/dataset.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/deployment.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/input.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/lister.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/module.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/nodepool.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/user.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/workflow.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/helper.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/register.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/stub.cpython-310.pyc +0 -0
- clarifai/client/cli/__init__.py +0 -0
- clarifai/client/cli/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
- clarifai/client/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
- clarifai/client/cli/base_cli.py +0 -88
- clarifai/client/cli/model_cli.py +0 -29
- clarifai/client/model_client.py +0 -448
- clarifai/constants/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/dataset.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/input.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/rag.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/workflow.cpython-310.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/inputs_annotations.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/features.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/image.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/multimodal.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/text.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/models/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/modules/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/dockerfile_template/Dockerfile.cpu.template +0 -31
- clarifai/runners/dockerfile_template/Dockerfile.cuda.template +0 -42
- clarifai/runners/dockerfile_template/Dockerfile.nim +0 -71
- clarifai/runners/models/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/model_class.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-310-pytest-7.1.2.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_runner.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_upload.cpython-310.pyc +0 -0
- clarifai/runners/models/model_class_refract.py +0 -80
- clarifai/runners/models/model_upload.py +0 -607
- clarifai/runners/models/temp.py +0 -25
- clarifai/runners/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/const.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/loader.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/stream_source.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/stream_source.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-39.pyc +0 -0
- clarifai/runners/utils/data_handler_refract.py +0 -213
- clarifai/runners/utils/data_types.py +0 -427
- clarifai/runners/utils/logger.py +0 -0
- clarifai/runners/utils/method_signatures.py +0 -477
- clarifai/runners/utils/serializers.py +0 -222
- clarifai/schema/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/urls/__pycache__/helper.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/cli.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/constants.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/logging.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/misc.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/model_train.cpython-310.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/main.cpython-39.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/workflows/__pycache__/export.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/validate.cpython-310.pyc +0 -0
- clarifai-11.1.7rc3.dist-info/RECORD +0 -237
- {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info}/entry_points.txt +0 -0
- {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info/licenses}/LICENSE +0 -0
- {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info}/top_level.txt +0 -0
@@ -1,477 +0,0 @@
|
|
1
|
-
import inspect
|
2
|
-
import json
|
3
|
-
from collections import namedtuple
|
4
|
-
from typing import List, Tuple, get_args, get_origin
|
5
|
-
|
6
|
-
import numpy as np
|
7
|
-
import PIL.Image
|
8
|
-
import yaml
|
9
|
-
from clarifai_grpc.grpc.api import resources_pb2
|
10
|
-
from google.protobuf.json_format import MessageToDict, ParseDict
|
11
|
-
from google.protobuf.message import Message as MessageProto
|
12
|
-
|
13
|
-
from clarifai.runners.utils import data_types
|
14
|
-
from clarifai.runners.utils.serializers import (
|
15
|
-
AtomicFieldSerializer, JSONSerializer, ListSerializer, MessageSerializer,
|
16
|
-
NamedFieldsSerializer, NDArraySerializer, Serializer, TupleSerializer)
|
17
|
-
|
18
|
-
|
19
|
-
def build_function_signature(func):
|
20
|
-
'''
|
21
|
-
Build a signature for the given function.
|
22
|
-
'''
|
23
|
-
sig = inspect.signature(func)
|
24
|
-
|
25
|
-
# check if func is bound, and if not, remove self/cls
|
26
|
-
if getattr(func, '__self__', None) is None and sig.parameters and list(
|
27
|
-
sig.parameters.values())[0].name in ('self', 'cls'):
|
28
|
-
sig = sig.replace(parameters=list(sig.parameters.values())[1:])
|
29
|
-
|
30
|
-
return_annotation = sig.return_annotation
|
31
|
-
if return_annotation == inspect.Parameter.empty:
|
32
|
-
raise TypeError('Function must have a return annotation')
|
33
|
-
|
34
|
-
input_sigs = []
|
35
|
-
input_streaming = []
|
36
|
-
for p in sig.parameters.values():
|
37
|
-
model_type_field, _, streaming = build_variable_signature(p.name, p.annotation, p.default)
|
38
|
-
input_sigs.append(model_type_field)
|
39
|
-
input_streaming.append(streaming)
|
40
|
-
|
41
|
-
output_sig, output_type, output_streaming = build_variable_signature(
|
42
|
-
'return', return_annotation, is_output=True)
|
43
|
-
# TODO: flatten out "return" layer if not needed
|
44
|
-
|
45
|
-
# check for streams and determine method type
|
46
|
-
if sum(input_streaming) > 1:
|
47
|
-
raise TypeError('streaming methods must have at most one streaming input')
|
48
|
-
input_streaming = any(input_streaming)
|
49
|
-
if not (input_streaming or output_streaming):
|
50
|
-
method_type = 'UNARY_UNARY'
|
51
|
-
elif not input_streaming and output_streaming:
|
52
|
-
method_type = 'UNARY_STREAMING'
|
53
|
-
elif input_streaming and output_streaming:
|
54
|
-
method_type = 'STREAMING_STREAMING'
|
55
|
-
else:
|
56
|
-
raise TypeError('stream methods with streaming inputs must have streaming outputs')
|
57
|
-
|
58
|
-
method_signature = resources_pb2.MethodSignature()
|
59
|
-
|
60
|
-
method_signature.name = func.__name__
|
61
|
-
method_signature.method_type = getattr(resources_pb2.RunnerMethodType, method_type)
|
62
|
-
assert method_type in ('UNARY_UNARY', 'UNARY_STREAMING', 'STREAMING_STREAMING')
|
63
|
-
# method_signature.method_type = method_type
|
64
|
-
method_signature.description = inspect.cleandoc(func.__doc__ or '')
|
65
|
-
# method_signature.annotations_json = json.dumps(_get_annotations_source(func))
|
66
|
-
|
67
|
-
method_signature.input_fields.extend(input_sigs)
|
68
|
-
method_signature.output_fields.append(output_sig)
|
69
|
-
return method_signature
|
70
|
-
|
71
|
-
|
72
|
-
# def _get_annotations_source(func):
|
73
|
-
# """Extracts raw annotation strings from the function source."""
|
74
|
-
# source = inspect.getsource(func) # Get function source code
|
75
|
-
# source = textwrap.dedent(source) # Dedent source code
|
76
|
-
# tree = ast.parse(source) # Parse into AST
|
77
|
-
# func_node = next(node for node in tree.body
|
78
|
-
# if isinstance(node, ast.FunctionDef)) # Get function node
|
79
|
-
|
80
|
-
# annotations = {}
|
81
|
-
# for arg in func_node.args.args: # Process arguments
|
82
|
-
# if arg.annotation:
|
83
|
-
# annotations[arg.arg] = ast.unparse(arg.annotation) # Get raw annotation string
|
84
|
-
|
85
|
-
# if func_node.returns: # Process return type
|
86
|
-
# annotations["return"] = ast.unparse(func_node.returns)
|
87
|
-
|
88
|
-
# return annotations
|
89
|
-
|
90
|
-
|
91
|
-
def build_variable_signature(name, annotation, default=inspect.Parameter.empty, is_output=False):
|
92
|
-
'''
|
93
|
-
Build a data proto signature and get the normalized python type for the given annotation.
|
94
|
-
'''
|
95
|
-
|
96
|
-
# check valid names (should already be constrained by python naming, but check anyway)
|
97
|
-
if not name.isidentifier():
|
98
|
-
raise ValueError(f'Invalid variable name: {name}')
|
99
|
-
|
100
|
-
# get fields for each variable based on type
|
101
|
-
tp, streaming = _normalize_type(annotation)
|
102
|
-
|
103
|
-
sig = resources_pb2.ModelTypeField()
|
104
|
-
sig.name = name
|
105
|
-
sig.iterator = streaming
|
106
|
-
|
107
|
-
if not is_output:
|
108
|
-
sig.required = (default is inspect.Parameter.empty)
|
109
|
-
if not sig.required:
|
110
|
-
sig.default = str(default)
|
111
|
-
|
112
|
-
_fill_signature_type(sig, tp)
|
113
|
-
|
114
|
-
return sig, type, streaming
|
115
|
-
|
116
|
-
|
117
|
-
def _fill_signature_type(sig, tp):
|
118
|
-
try:
|
119
|
-
if tp in _DATA_TYPES:
|
120
|
-
sig.type = _DATA_TYPES[tp].type
|
121
|
-
return
|
122
|
-
except TypeError:
|
123
|
-
pass # not hashable type
|
124
|
-
|
125
|
-
# Handle NamedFields with annotations
|
126
|
-
# Check for dynamically generated NamedFields subclasses (from type annotations)
|
127
|
-
if inspect.isclass(tp) and issubclass(tp, data_types.NamedFields) and hasattr(
|
128
|
-
tp, '__annotations__'):
|
129
|
-
sig.type = resources_pb2.ModelTypeField.DataType.NAMED_FIELDS
|
130
|
-
for name, inner_type in tp.__annotations__.items():
|
131
|
-
inner_sig = sig.type_args.add()
|
132
|
-
inner_sig.name = name
|
133
|
-
_fill_signature_type(inner_sig, inner_type)
|
134
|
-
return
|
135
|
-
|
136
|
-
# Handle NamedFields instances (dict-like)
|
137
|
-
if isinstance(tp, data_types.NamedFields):
|
138
|
-
sig.type = resources_pb2.ModelTypeField.DataType.NAMED_FIELDS
|
139
|
-
for name, inner_type in tp.items():
|
140
|
-
inner_sig = sig.type_args.add()
|
141
|
-
inner_sig.name = name
|
142
|
-
_fill_signature_type(inner_sig, inner_type)
|
143
|
-
return
|
144
|
-
|
145
|
-
origin = get_origin(tp)
|
146
|
-
args = get_args(tp)
|
147
|
-
|
148
|
-
# Handle Tuple type
|
149
|
-
if origin == tuple:
|
150
|
-
sig.type = resources_pb2.ModelTypeField.DataType.TUPLE
|
151
|
-
for inner_type in args:
|
152
|
-
inner_sig = sig.type_args.add()
|
153
|
-
_fill_signature_type(inner_sig, inner_type)
|
154
|
-
return
|
155
|
-
|
156
|
-
# Handle List type
|
157
|
-
if origin == list:
|
158
|
-
sig.type = resources_pb2.ModelTypeField.DataType.LIST
|
159
|
-
inner_sig = sig.type_args.add()
|
160
|
-
_fill_signature_type(inner_sig, args[0])
|
161
|
-
return
|
162
|
-
|
163
|
-
raise TypeError(f'Unsupported type: {tp}')
|
164
|
-
|
165
|
-
|
166
|
-
def serializer_from_signature(signature):
|
167
|
-
'''
|
168
|
-
Get the serializer for the given signature.
|
169
|
-
'''
|
170
|
-
if signature.type in _SERIALIZERS_BY_TYPE_ENUM:
|
171
|
-
return _SERIALIZERS_BY_TYPE_ENUM[signature.type]
|
172
|
-
if signature.type == resources_pb2.ModelTypeField.DataType.LIST:
|
173
|
-
return ListSerializer(serializer_from_signature(signature.type_args[0]))
|
174
|
-
if signature.type == resources_pb2.ModelTypeField.DataType.TUPLE:
|
175
|
-
return TupleSerializer([serializer_from_signature(sig) for sig in signature.type_args])
|
176
|
-
if signature.type == resources_pb2.ModelTypeField.DataType.NAMED_FIELDS:
|
177
|
-
return NamedFieldsSerializer(
|
178
|
-
{sig.name: serializer_from_signature(sig)
|
179
|
-
for sig in signature.type_args})
|
180
|
-
raise ValueError(f'Unsupported type: {signature.type}')
|
181
|
-
|
182
|
-
|
183
|
-
def signatures_to_json(signatures):
|
184
|
-
assert isinstance(
|
185
|
-
signatures, dict), 'Expected dict of signatures {name: signature}, got %s' % type(signatures)
|
186
|
-
# TODO change to proto when ready
|
187
|
-
signatures = {name: MessageToDict(sig) for name, sig in signatures.items()}
|
188
|
-
return json.dumps(signatures)
|
189
|
-
|
190
|
-
|
191
|
-
def signatures_from_json(json_str):
|
192
|
-
signatures_dict = json.loads(json_str)
|
193
|
-
assert isinstance(signatures_dict, dict), "Expected JSON to decode into a dictionary"
|
194
|
-
|
195
|
-
return {
|
196
|
-
name: ParseDict(sig_dict, resources_pb2.MethodSignature())
|
197
|
-
for name, sig_dict in signatures_dict.items()
|
198
|
-
}
|
199
|
-
# d = json.loads(json_str, object_pairs_hook=_SignatureDict)
|
200
|
-
# return d
|
201
|
-
|
202
|
-
|
203
|
-
def signatures_to_yaml(signatures):
|
204
|
-
# XXX go in/out of json to get the correct format and python dict types
|
205
|
-
d = json.loads(signatures_to_json(signatures))
|
206
|
-
|
207
|
-
def _filter_empty(d):
|
208
|
-
if isinstance(d, (list, tuple)):
|
209
|
-
return [_filter_empty(v) for v in d if v]
|
210
|
-
if isinstance(d, dict):
|
211
|
-
return {k: _filter_empty(v) for k, v in d.items() if v}
|
212
|
-
return d
|
213
|
-
|
214
|
-
return yaml.dump(_filter_empty(d), default_flow_style=False)
|
215
|
-
|
216
|
-
|
217
|
-
def signatures_from_yaml(yaml_str):
|
218
|
-
d = yaml.safe_load(yaml_str)
|
219
|
-
return signatures_from_json(json.dumps(d))
|
220
|
-
|
221
|
-
|
222
|
-
def serialize(kwargs, signatures, proto=None, is_output=False):
|
223
|
-
'''
|
224
|
-
Serialize the given kwargs into the proto using the given signatures.
|
225
|
-
'''
|
226
|
-
if proto is None:
|
227
|
-
proto = resources_pb2.Data()
|
228
|
-
unknown = set(kwargs.keys()) - set(sig.name for sig in signatures)
|
229
|
-
if unknown:
|
230
|
-
if unknown == {'return'} and len(signatures) > 1:
|
231
|
-
raise TypeError('Got a single return value, but expected multiple outputs {%s}' %
|
232
|
-
', '.join(sig.name for sig in signatures))
|
233
|
-
raise TypeError('Got unexpected key: %s' % ', '.join(unknown))
|
234
|
-
inline_first_value = False
|
235
|
-
if (is_output and len(signatures) == 1 and signatures[0].name == 'return' and
|
236
|
-
len(kwargs) == 1 and 'return' in kwargs):
|
237
|
-
# if there is only one output, flatten it and return directly
|
238
|
-
inline_first_value = True
|
239
|
-
if signatures and signatures[0].type not in _NON_INLINABLE_TYPES:
|
240
|
-
inline_first_value = True
|
241
|
-
for sig_i, sig in enumerate(signatures):
|
242
|
-
if sig.name not in kwargs:
|
243
|
-
if sig.required:
|
244
|
-
raise TypeError(f'Missing required argument: {sig.name}')
|
245
|
-
continue # skip missing fields, they can be set to default on the server
|
246
|
-
data = kwargs[sig.name]
|
247
|
-
serializer = serializer_from_signature(sig)
|
248
|
-
# TODO determine if any (esp the first) var can go in the proto without parts
|
249
|
-
# and whether to put this in the signature or dynamically determine it
|
250
|
-
if inline_first_value and sig_i == 0 and id(data) not in _ZERO_VALUE_IDS:
|
251
|
-
# inlined first value; note data must not be empty or 0 to inline, since that
|
252
|
-
# will correspond to the missing value case (which uses function defaults).
|
253
|
-
# empty values are put explicitly in parts.
|
254
|
-
serializer.serialize(proto, data)
|
255
|
-
else:
|
256
|
-
# add the part to the proto
|
257
|
-
part = proto.parts.add()
|
258
|
-
part.id = sig.name
|
259
|
-
serializer.serialize(part.data, data)
|
260
|
-
return proto
|
261
|
-
|
262
|
-
|
263
|
-
def deserialize(proto, signatures, inference_params={}, is_output=False):
|
264
|
-
'''
|
265
|
-
Deserialize the given proto into kwargs using the given signatures.
|
266
|
-
'''
|
267
|
-
if isinstance(signatures, dict):
|
268
|
-
signatures = [signatures] # TODO update return key level and make consistnet
|
269
|
-
kwargs = {}
|
270
|
-
parts_by_name = {part.id: part for part in proto.parts}
|
271
|
-
for sig_i, sig in enumerate(signatures):
|
272
|
-
serializer = serializer_from_signature(sig)
|
273
|
-
part = parts_by_name.get(sig.name)
|
274
|
-
inference_params_value = inference_params.get(sig.name)
|
275
|
-
if part is not None:
|
276
|
-
kwargs[sig.name] = serializer.deserialize(part.data)
|
277
|
-
elif inference_params_value is not None:
|
278
|
-
kwargs[sig.name] = inference_params_value
|
279
|
-
else:
|
280
|
-
if sig_i == 0:
|
281
|
-
# possible inlined first value
|
282
|
-
value = serializer.deserialize(proto)
|
283
|
-
if id(value) not in _ZERO_VALUE_IDS:
|
284
|
-
# note missing values are not set to defaults, since they are not in parts
|
285
|
-
# an actual zero value passed in must be set in an explicit part
|
286
|
-
kwargs[sig.name] = value
|
287
|
-
continue
|
288
|
-
|
289
|
-
if sig.required or is_output: # TODO allow optional outputs?
|
290
|
-
raise ValueError(f'Missing required field: {sig.name}')
|
291
|
-
continue
|
292
|
-
if len(kwargs) == 1 and 'return' in kwargs:
|
293
|
-
return kwargs['return']
|
294
|
-
return kwargs
|
295
|
-
|
296
|
-
|
297
|
-
def get_stream_from_signature(signatures):
|
298
|
-
'''
|
299
|
-
Get the stream signature from the given signatures.
|
300
|
-
'''
|
301
|
-
for sig in signatures:
|
302
|
-
if sig.iterator:
|
303
|
-
return sig
|
304
|
-
return None
|
305
|
-
|
306
|
-
|
307
|
-
def _is_empty_proto_data(data):
|
308
|
-
if isinstance(data, np.ndarray):
|
309
|
-
return False
|
310
|
-
if isinstance(data, MessageProto):
|
311
|
-
return not data.ByteSize()
|
312
|
-
return not data
|
313
|
-
|
314
|
-
|
315
|
-
def _normalize_type(tp):
|
316
|
-
'''
|
317
|
-
Normalize the types for the given parameter.
|
318
|
-
Returns the normalized type and whether the parameter is streaming.
|
319
|
-
'''
|
320
|
-
# stream type indicates streaming, not part of the data itself
|
321
|
-
# it can only be used at the top-level of the var type
|
322
|
-
streaming = (get_origin(tp) == data_types.Stream)
|
323
|
-
if streaming:
|
324
|
-
tp = get_args(tp)[0]
|
325
|
-
|
326
|
-
return _normalize_data_type(tp), streaming
|
327
|
-
|
328
|
-
|
329
|
-
def _normalize_data_type(tp):
|
330
|
-
|
331
|
-
# jsonable list and dict, these can be serialized as json
|
332
|
-
# (tuple we want to keep as a tuple for args and returns, so don't include here)
|
333
|
-
if tp in (list, dict) or (get_origin(tp) in (list, dict) and _is_jsonable(tp)):
|
334
|
-
return data_types.JSON
|
335
|
-
|
336
|
-
# container types that need to be serialized as parts
|
337
|
-
if get_origin(tp) == list and get_args(tp):
|
338
|
-
return List[_normalize_data_type(get_args(tp)[0])]
|
339
|
-
|
340
|
-
if get_origin(tp) == tuple:
|
341
|
-
if not get_args(tp):
|
342
|
-
raise TypeError('Tuple must have types specified')
|
343
|
-
return Tuple[tuple(_normalize_data_type(val) for val in get_args(tp))]
|
344
|
-
|
345
|
-
if isinstance(tp, (tuple, list)):
|
346
|
-
return Tuple[tuple(_normalize_data_type(val) for val in tp)]
|
347
|
-
|
348
|
-
if tp == data_types.NamedFields:
|
349
|
-
raise TypeError('NamedFields must have types specified')
|
350
|
-
|
351
|
-
# Handle dynamically generated NamedFields subclasses with annotations
|
352
|
-
if isinstance(tp, type) and issubclass(tp, data_types.NamedFields) and hasattr(
|
353
|
-
tp, '__annotations__'):
|
354
|
-
return data_types.NamedFields(
|
355
|
-
**{k: _normalize_data_type(v)
|
356
|
-
for k, v in tp.__annotations__.items()})
|
357
|
-
|
358
|
-
if isinstance(tp, (dict, data_types.NamedFields)):
|
359
|
-
return data_types.NamedFields(**{name: _normalize_data_type(val) for name, val in tp.items()})
|
360
|
-
|
361
|
-
# check if numpy array type, and if so, use ndarray
|
362
|
-
if get_origin(tp) == np.ndarray:
|
363
|
-
return np.ndarray
|
364
|
-
|
365
|
-
# check for PIL images (sometimes types use the module, sometimes the class)
|
366
|
-
# set these to use the Image data handler
|
367
|
-
if tp in (data_types.Image, PIL.Image.Image):
|
368
|
-
return data_types.Image
|
369
|
-
|
370
|
-
if tp == PIL.Image:
|
371
|
-
raise TypeError('Use PIL.Image.Image instead of PIL.Image module')
|
372
|
-
|
373
|
-
# check for known data types
|
374
|
-
try:
|
375
|
-
if tp in _DATA_TYPES:
|
376
|
-
return tp
|
377
|
-
except TypeError:
|
378
|
-
pass # not hashable type
|
379
|
-
|
380
|
-
raise TypeError(f'Unsupported type: {tp}')
|
381
|
-
|
382
|
-
|
383
|
-
def _is_jsonable(tp):
|
384
|
-
if tp in (dict, list, tuple, str, int, float, bool, type(None)):
|
385
|
-
return True
|
386
|
-
if get_origin(tp) in (tuple, list, dict):
|
387
|
-
return all(_is_jsonable(val) for val in get_args(tp))
|
388
|
-
return False
|
389
|
-
|
390
|
-
|
391
|
-
# type: name of the data type
|
392
|
-
# data_field: name of the field in the data proto
|
393
|
-
# serializer: serializer for the data type
|
394
|
-
_DataType = namedtuple('_DataType', ('type', 'serializer'))
|
395
|
-
|
396
|
-
_NON_INLINABLE_TYPES = {
|
397
|
-
resources_pb2.ModelTypeField.DataType.NAMED_FIELDS,
|
398
|
-
resources_pb2.ModelTypeField.DataType.TUPLE, resources_pb2.ModelTypeField.DataType.LIST
|
399
|
-
}
|
400
|
-
_ZERO_VALUE_IDS = {id(None), id(''), id(b''), id(0), id(0.0), id(False)}
|
401
|
-
|
402
|
-
# simple, non-container types that correspond directly to a data field
|
403
|
-
_DATA_TYPES = {
|
404
|
-
str:
|
405
|
-
_DataType(resources_pb2.ModelTypeField.DataType.STR,
|
406
|
-
AtomicFieldSerializer('string_value')),
|
407
|
-
bytes:
|
408
|
-
_DataType(resources_pb2.ModelTypeField.DataType.BYTES,
|
409
|
-
AtomicFieldSerializer('bytes_value')),
|
410
|
-
int:
|
411
|
-
_DataType(resources_pb2.ModelTypeField.DataType.INT, AtomicFieldSerializer('int_value')),
|
412
|
-
float:
|
413
|
-
_DataType(resources_pb2.ModelTypeField.DataType.FLOAT,
|
414
|
-
AtomicFieldSerializer('float_value')),
|
415
|
-
bool:
|
416
|
-
_DataType(resources_pb2.ModelTypeField.DataType.BOOL, AtomicFieldSerializer('bool_value')),
|
417
|
-
np.ndarray:
|
418
|
-
_DataType(resources_pb2.ModelTypeField.DataType.NDARRAY, NDArraySerializer('ndarray')),
|
419
|
-
data_types.JSON:
|
420
|
-
_DataType(resources_pb2.ModelTypeField.DataType.JSON_DATA, JSONSerializer('string_value')
|
421
|
-
), # TODO change to json_value when new proto is ready
|
422
|
-
data_types.Text:
|
423
|
-
_DataType(resources_pb2.ModelTypeField.DataType.TEXT,
|
424
|
-
MessageSerializer('text', data_types.Text)),
|
425
|
-
data_types.Image:
|
426
|
-
_DataType(resources_pb2.ModelTypeField.DataType.IMAGE,
|
427
|
-
MessageSerializer('image', data_types.Image)),
|
428
|
-
data_types.Concept:
|
429
|
-
_DataType(resources_pb2.ModelTypeField.DataType.CONCEPT,
|
430
|
-
MessageSerializer('concepts', data_types.Concept)),
|
431
|
-
data_types.Region:
|
432
|
-
_DataType(resources_pb2.ModelTypeField.DataType.REGION,
|
433
|
-
MessageSerializer('regions', data_types.Region)),
|
434
|
-
data_types.Frame:
|
435
|
-
_DataType(resources_pb2.ModelTypeField.DataType.FRAME,
|
436
|
-
MessageSerializer('frames', data_types.Frame)),
|
437
|
-
data_types.Audio:
|
438
|
-
_DataType(resources_pb2.ModelTypeField.DataType.AUDIO,
|
439
|
-
MessageSerializer('audio', data_types.Audio)),
|
440
|
-
data_types.Video:
|
441
|
-
_DataType(resources_pb2.ModelTypeField.DataType.VIDEO,
|
442
|
-
MessageSerializer('video', data_types.Video)),
|
443
|
-
}
|
444
|
-
|
445
|
-
_SERIALIZERS_BY_TYPE_ENUM = {dt.type: dt.serializer for dt in _DATA_TYPES.values()}
|
446
|
-
|
447
|
-
|
448
|
-
class CompatibilitySerializer(Serializer):
|
449
|
-
'''
|
450
|
-
Serialization of basic value types, used for backwards compatibility
|
451
|
-
with older models that don't have type signatures.
|
452
|
-
'''
|
453
|
-
|
454
|
-
def serialize(self, data_proto, value):
|
455
|
-
tp = _normalize_data_type(type(value))
|
456
|
-
|
457
|
-
try:
|
458
|
-
serializer = _DATA_TYPES[tp].serializer
|
459
|
-
except (KeyError, TypeError):
|
460
|
-
raise TypeError(f'serializer currently only supports basic types, got {tp}')
|
461
|
-
|
462
|
-
serializer.serialize(data_proto, value)
|
463
|
-
|
464
|
-
def deserialize(self, data_proto):
|
465
|
-
fields = [k.name for k, _ in data_proto.ListFields()]
|
466
|
-
if 'parts' in fields:
|
467
|
-
raise ValueError('serializer does not support parts')
|
468
|
-
serializers = [
|
469
|
-
serializer for serializer in _SERIALIZERS_BY_TYPE_ENUM.values()
|
470
|
-
if serializer.field_name in fields
|
471
|
-
]
|
472
|
-
if not serializers:
|
473
|
-
raise ValueError('Returned data not recognized')
|
474
|
-
if len(serializers) != 1:
|
475
|
-
raise ValueError('Only single output supported for serializer')
|
476
|
-
serializer = serializers[0]
|
477
|
-
return serializer.deserialize(data_proto)
|
@@ -1,222 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
from typing import Dict, Iterable
|
3
|
-
|
4
|
-
import numpy as np
|
5
|
-
from clarifai_grpc.grpc.api import resources_pb2
|
6
|
-
|
7
|
-
from clarifai.runners.utils import data_types
|
8
|
-
|
9
|
-
|
10
|
-
class Serializer:
|
11
|
-
|
12
|
-
def serialize(self, data_proto, value):
|
13
|
-
pass
|
14
|
-
|
15
|
-
def deserialize(self, data_proto):
|
16
|
-
pass
|
17
|
-
|
18
|
-
def handles_list(self):
|
19
|
-
return False
|
20
|
-
|
21
|
-
|
22
|
-
def is_repeated_field(field_name):
|
23
|
-
descriptor = resources_pb2.Data.DESCRIPTOR.fields_by_name.get(field_name)
|
24
|
-
return descriptor and descriptor.label == descriptor.LABEL_REPEATED
|
25
|
-
|
26
|
-
|
27
|
-
class AtomicFieldSerializer(Serializer):
|
28
|
-
|
29
|
-
def __init__(self, field_name):
|
30
|
-
self.field_name = field_name
|
31
|
-
|
32
|
-
def serialize(self, data_proto, value):
|
33
|
-
try:
|
34
|
-
setattr(data_proto, self.field_name, value)
|
35
|
-
except TypeError as e:
|
36
|
-
raise TypeError(f"Incompatible type for {self.field_name}: {type(value)}") from e
|
37
|
-
|
38
|
-
def deserialize(self, data_proto):
|
39
|
-
return getattr(data_proto, self.field_name)
|
40
|
-
|
41
|
-
|
42
|
-
class MessageSerializer(Serializer):
|
43
|
-
|
44
|
-
def __init__(self, field_name, message_class):
|
45
|
-
self.field_name = field_name
|
46
|
-
self.message_class = message_class
|
47
|
-
self.is_repeated_field = is_repeated_field(field_name)
|
48
|
-
|
49
|
-
def handles_list(self):
|
50
|
-
return self.is_repeated_field
|
51
|
-
|
52
|
-
def serialize(self, data_proto, value):
|
53
|
-
value = self.message_class.from_value(value).to_proto()
|
54
|
-
dst = getattr(data_proto, self.field_name)
|
55
|
-
try:
|
56
|
-
if self.is_repeated_field:
|
57
|
-
dst.add().CopyFrom(value)
|
58
|
-
else:
|
59
|
-
dst.CopyFrom(value)
|
60
|
-
except TypeError as e:
|
61
|
-
raise TypeError(f"Incompatible type for {self.field_name}: {type(value)}") from e
|
62
|
-
|
63
|
-
def serialize_list(self, data_proto, values):
|
64
|
-
assert self.is_repeated_field
|
65
|
-
dst = getattr(data_proto, self.field_name)
|
66
|
-
dst.extend([self.message_class.from_value(value).to_proto() for value in values])
|
67
|
-
|
68
|
-
def deserialize(self, data_proto):
|
69
|
-
src = getattr(data_proto, self.field_name)
|
70
|
-
if self.is_repeated_field:
|
71
|
-
values = [self.message_class.from_proto(x) for x in src]
|
72
|
-
if len(values) == 1:
|
73
|
-
return values[0]
|
74
|
-
return values if values else None
|
75
|
-
else:
|
76
|
-
if not data_proto.HasField(self.field_name):
|
77
|
-
return None
|
78
|
-
return self.message_class.from_proto(src)
|
79
|
-
|
80
|
-
def deserialize_list(self, data_proto):
|
81
|
-
assert self.is_repeated_field
|
82
|
-
src = getattr(data_proto, self.field_name)
|
83
|
-
return [self.message_class.from_proto(x) for x in src]
|
84
|
-
|
85
|
-
|
86
|
-
class NDArraySerializer(Serializer):
|
87
|
-
|
88
|
-
def __init__(self, field_name, as_list=False):
|
89
|
-
self.field_name = field_name
|
90
|
-
self.as_list = as_list
|
91
|
-
|
92
|
-
def serialize(self, data_proto, value):
|
93
|
-
if self.as_list and not isinstance(value, Iterable):
|
94
|
-
raise TypeError(f"Expected list, got {type(value)}")
|
95
|
-
value = np.asarray(value)
|
96
|
-
if not np.issubdtype(value.dtype, np.number):
|
97
|
-
raise TypeError(f"Expected number array, got {value.dtype}")
|
98
|
-
proto = getattr(data_proto, self.field_name)
|
99
|
-
proto.buffer = value.tobytes()
|
100
|
-
proto.shape.extend(value.shape)
|
101
|
-
proto.dtype = str(value.dtype)
|
102
|
-
|
103
|
-
def deserialize(self, data_proto):
|
104
|
-
proto = getattr(data_proto, self.field_name)
|
105
|
-
if not proto.buffer:
|
106
|
-
return None
|
107
|
-
array = np.frombuffer(proto.buffer, dtype=np.dtype(proto.dtype)).reshape(proto.shape)
|
108
|
-
if self.as_list:
|
109
|
-
return array.tolist()
|
110
|
-
return array
|
111
|
-
|
112
|
-
|
113
|
-
class JSONSerializer(Serializer):
|
114
|
-
|
115
|
-
def __init__(self, field_name, type=None):
|
116
|
-
self.field_name = field_name
|
117
|
-
self.type = type
|
118
|
-
|
119
|
-
def serialize(self, data_proto, value):
|
120
|
-
#if self.type is not None and not isinstance(value, self.type):
|
121
|
-
# raise TypeError(f"Expected {self.type}, got {type(value)}")
|
122
|
-
try:
|
123
|
-
setattr(data_proto, self.field_name, json.dumps(value))
|
124
|
-
except TypeError as e:
|
125
|
-
raise TypeError(f"Incompatible type for {self.field_name}: {type(value)}") from e
|
126
|
-
|
127
|
-
def deserialize(self, data_proto):
|
128
|
-
value = getattr(data_proto, self.field_name)
|
129
|
-
if not value:
|
130
|
-
return None
|
131
|
-
return json.loads(value)
|
132
|
-
|
133
|
-
|
134
|
-
class ListSerializer(Serializer):
|
135
|
-
|
136
|
-
def __init__(self, inner_serializer):
|
137
|
-
self.field_name = 'parts'
|
138
|
-
self.inner_serializer = inner_serializer
|
139
|
-
|
140
|
-
def handles_list(self):
|
141
|
-
# if handles_list() is called on this serializer, it means that we're
|
142
|
-
# trying to serialize a list of lists. In this case, we need to use
|
143
|
-
# parts[] for the outer list, so we return False here (we can't inline it).
|
144
|
-
return False
|
145
|
-
|
146
|
-
def serialize(self, data_proto, value):
|
147
|
-
if not isinstance(value, Iterable):
|
148
|
-
raise TypeError(f"Expected iterable, got {type(value)}")
|
149
|
-
if self.inner_serializer.handles_list():
|
150
|
-
self.inner_serializer.serialize_list(data_proto, value)
|
151
|
-
else:
|
152
|
-
for item in value:
|
153
|
-
part = data_proto.parts.add()
|
154
|
-
self.inner_serializer.serialize(part.data, item)
|
155
|
-
|
156
|
-
def deserialize(self, data_proto):
|
157
|
-
if self.inner_serializer.handles_list():
|
158
|
-
return self.inner_serializer.deserialize_list(data_proto)
|
159
|
-
return [self.inner_serializer.deserialize(part.data) for part in data_proto.parts]
|
160
|
-
|
161
|
-
|
162
|
-
class TupleSerializer(Serializer):
|
163
|
-
|
164
|
-
def __init__(self, inner_serializers):
|
165
|
-
self.field_name = 'parts'
|
166
|
-
self.inner_serializers = inner_serializers
|
167
|
-
|
168
|
-
def serialize(self, data_proto, value):
|
169
|
-
if not isinstance(value, (tuple, list)):
|
170
|
-
raise TypeError(f"Expected tuple, got {type(value)}")
|
171
|
-
if len(value) != len(self.inner_serializers):
|
172
|
-
raise ValueError(f"Expected tuple of length {len(self.inner_serializers)}, got {len(value)}")
|
173
|
-
for i, (serializer, item) in enumerate(zip(self.inner_serializers, value)):
|
174
|
-
part = data_proto.parts.add()
|
175
|
-
part.id = str(i)
|
176
|
-
serializer.serialize(part.data, item)
|
177
|
-
|
178
|
-
def deserialize(self, data_proto):
|
179
|
-
if not data_proto.parts and self.inner_serializers:
|
180
|
-
return None
|
181
|
-
if len(data_proto.parts) != len(self.inner_serializers):
|
182
|
-
raise ValueError(
|
183
|
-
f"Expected tuple of length {len(self.inner_serializers)}, got {len(data_proto.parts)}")
|
184
|
-
return tuple(
|
185
|
-
serializer.deserialize(part.data)
|
186
|
-
for serializer, part in zip(self.inner_serializers, data_proto.parts))
|
187
|
-
|
188
|
-
|
189
|
-
class NamedFieldsSerializer(Serializer):
|
190
|
-
|
191
|
-
def __init__(self, named_field_serializers: Dict[str, Serializer]):
|
192
|
-
self.field_name = 'parts'
|
193
|
-
self.named_field_serializers = named_field_serializers
|
194
|
-
|
195
|
-
def serialize(self, data_proto, value):
|
196
|
-
for name, serializer in self.named_field_serializers.items():
|
197
|
-
if name not in value:
|
198
|
-
raise TypeError(f"Missing field {name}")
|
199
|
-
part = self._get_part(data_proto, name, add=True)
|
200
|
-
serializer.serialize(part.data, value[name])
|
201
|
-
|
202
|
-
def deserialize(self, data_proto):
|
203
|
-
if not data_proto.parts and self.named_field_serializers:
|
204
|
-
return None
|
205
|
-
value = data_types.NamedFields()
|
206
|
-
for name, serializer in self.named_field_serializers.items():
|
207
|
-
part = self._get_part(data_proto, name)
|
208
|
-
value[name] = serializer.deserialize(part.data)
|
209
|
-
return value
|
210
|
-
|
211
|
-
def _get_part(self, data_proto, name, add=False):
|
212
|
-
for part in data_proto.parts:
|
213
|
-
if part.id == name:
|
214
|
-
return part
|
215
|
-
if add:
|
216
|
-
part = data_proto.parts.add()
|
217
|
-
part.id = name
|
218
|
-
return part
|
219
|
-
raise TypeError(f"Missing part with key {name}")
|
220
|
-
|
221
|
-
|
222
|
-
# TODO dict serializer, maybe json only?
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|