clarifai 11.1.7rc3__py3-none-any.whl → 11.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. clarifai/__init__.py +1 -1
  2. clarifai/cli/base.py +18 -7
  3. clarifai/cli/compute_cluster.py +8 -1
  4. clarifai/cli/deployment.py +5 -1
  5. clarifai/cli/model.py +25 -38
  6. clarifai/cli/nodepool.py +4 -1
  7. clarifai/client/model.py +393 -157
  8. clarifai/runners/__init__.py +7 -2
  9. clarifai/runners/models/model_builder.py +12 -80
  10. clarifai/runners/models/model_class.py +28 -279
  11. clarifai/runners/models/model_run_locally.py +88 -19
  12. clarifai/runners/models/model_runner.py +0 -2
  13. clarifai/runners/models/model_servicer.py +2 -11
  14. clarifai/runners/utils/data_handler.py +210 -271
  15. clarifai/utils/cli.py +9 -0
  16. {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info}/METADATA +16 -4
  17. clarifai-11.2.1.dist-info/RECORD +101 -0
  18. {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info}/WHEEL +1 -1
  19. clarifai/__pycache__/__init__.cpython-310.pyc +0 -0
  20. clarifai/__pycache__/__init__.cpython-39.pyc +0 -0
  21. clarifai/__pycache__/errors.cpython-310.pyc +0 -0
  22. clarifai/__pycache__/versions.cpython-310.pyc +0 -0
  23. clarifai/cli/__pycache__/__init__.cpython-310.pyc +0 -0
  24. clarifai/cli/__pycache__/base.cpython-310.pyc +0 -0
  25. clarifai/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
  26. clarifai/cli/__pycache__/compute_cluster.cpython-310.pyc +0 -0
  27. clarifai/cli/__pycache__/deployment.cpython-310.pyc +0 -0
  28. clarifai/cli/__pycache__/model.cpython-310.pyc +0 -0
  29. clarifai/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
  30. clarifai/cli/__pycache__/nodepool.cpython-310.pyc +0 -0
  31. clarifai/client/__pycache__/__init__.cpython-310.pyc +0 -0
  32. clarifai/client/__pycache__/__init__.cpython-39.pyc +0 -0
  33. clarifai/client/__pycache__/app.cpython-310.pyc +0 -0
  34. clarifai/client/__pycache__/app.cpython-39.pyc +0 -0
  35. clarifai/client/__pycache__/base.cpython-310.pyc +0 -0
  36. clarifai/client/__pycache__/compute_cluster.cpython-310.pyc +0 -0
  37. clarifai/client/__pycache__/dataset.cpython-310.pyc +0 -0
  38. clarifai/client/__pycache__/deployment.cpython-310.pyc +0 -0
  39. clarifai/client/__pycache__/input.cpython-310.pyc +0 -0
  40. clarifai/client/__pycache__/lister.cpython-310.pyc +0 -0
  41. clarifai/client/__pycache__/model.cpython-310.pyc +0 -0
  42. clarifai/client/__pycache__/module.cpython-310.pyc +0 -0
  43. clarifai/client/__pycache__/nodepool.cpython-310.pyc +0 -0
  44. clarifai/client/__pycache__/search.cpython-310.pyc +0 -0
  45. clarifai/client/__pycache__/user.cpython-310.pyc +0 -0
  46. clarifai/client/__pycache__/workflow.cpython-310.pyc +0 -0
  47. clarifai/client/auth/__pycache__/__init__.cpython-310.pyc +0 -0
  48. clarifai/client/auth/__pycache__/helper.cpython-310.pyc +0 -0
  49. clarifai/client/auth/__pycache__/register.cpython-310.pyc +0 -0
  50. clarifai/client/auth/__pycache__/stub.cpython-310.pyc +0 -0
  51. clarifai/client/cli/__init__.py +0 -0
  52. clarifai/client/cli/__pycache__/__init__.cpython-310.pyc +0 -0
  53. clarifai/client/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
  54. clarifai/client/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
  55. clarifai/client/cli/base_cli.py +0 -88
  56. clarifai/client/cli/model_cli.py +0 -29
  57. clarifai/client/model_client.py +0 -448
  58. clarifai/constants/__pycache__/base.cpython-310.pyc +0 -0
  59. clarifai/constants/__pycache__/dataset.cpython-310.pyc +0 -0
  60. clarifai/constants/__pycache__/input.cpython-310.pyc +0 -0
  61. clarifai/constants/__pycache__/model.cpython-310.pyc +0 -0
  62. clarifai/constants/__pycache__/rag.cpython-310.pyc +0 -0
  63. clarifai/constants/__pycache__/search.cpython-310.pyc +0 -0
  64. clarifai/constants/__pycache__/workflow.cpython-310.pyc +0 -0
  65. clarifai/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
  66. clarifai/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
  67. clarifai/datasets/export/__pycache__/__init__.cpython-310.pyc +0 -0
  68. clarifai/datasets/export/__pycache__/__init__.cpython-39.pyc +0 -0
  69. clarifai/datasets/export/__pycache__/inputs_annotations.cpython-310.pyc +0 -0
  70. clarifai/datasets/upload/__pycache__/__init__.cpython-310.pyc +0 -0
  71. clarifai/datasets/upload/__pycache__/__init__.cpython-39.pyc +0 -0
  72. clarifai/datasets/upload/__pycache__/base.cpython-310.pyc +0 -0
  73. clarifai/datasets/upload/__pycache__/features.cpython-310.pyc +0 -0
  74. clarifai/datasets/upload/__pycache__/image.cpython-310.pyc +0 -0
  75. clarifai/datasets/upload/__pycache__/multimodal.cpython-310.pyc +0 -0
  76. clarifai/datasets/upload/__pycache__/text.cpython-310.pyc +0 -0
  77. clarifai/datasets/upload/__pycache__/utils.cpython-310.pyc +0 -0
  78. clarifai/datasets/upload/loaders/__pycache__/__init__.cpython-39.pyc +0 -0
  79. clarifai/models/__pycache__/__init__.cpython-39.pyc +0 -0
  80. clarifai/modules/__pycache__/__init__.cpython-39.pyc +0 -0
  81. clarifai/rag/__pycache__/__init__.cpython-310.pyc +0 -0
  82. clarifai/rag/__pycache__/__init__.cpython-39.pyc +0 -0
  83. clarifai/rag/__pycache__/rag.cpython-310.pyc +0 -0
  84. clarifai/rag/__pycache__/rag.cpython-39.pyc +0 -0
  85. clarifai/rag/__pycache__/utils.cpython-310.pyc +0 -0
  86. clarifai/runners/__pycache__/__init__.cpython-310.pyc +0 -0
  87. clarifai/runners/__pycache__/__init__.cpython-39.pyc +0 -0
  88. clarifai/runners/dockerfile_template/Dockerfile.cpu.template +0 -31
  89. clarifai/runners/dockerfile_template/Dockerfile.cuda.template +0 -42
  90. clarifai/runners/dockerfile_template/Dockerfile.nim +0 -71
  91. clarifai/runners/models/__pycache__/__init__.cpython-310.pyc +0 -0
  92. clarifai/runners/models/__pycache__/__init__.cpython-39.pyc +0 -0
  93. clarifai/runners/models/__pycache__/base_typed_model.cpython-310.pyc +0 -0
  94. clarifai/runners/models/__pycache__/base_typed_model.cpython-39.pyc +0 -0
  95. clarifai/runners/models/__pycache__/model_class.cpython-310.pyc +0 -0
  96. clarifai/runners/models/__pycache__/model_run_locally.cpython-310-pytest-7.1.2.pyc +0 -0
  97. clarifai/runners/models/__pycache__/model_run_locally.cpython-310.pyc +0 -0
  98. clarifai/runners/models/__pycache__/model_runner.cpython-310.pyc +0 -0
  99. clarifai/runners/models/__pycache__/model_upload.cpython-310.pyc +0 -0
  100. clarifai/runners/models/model_class_refract.py +0 -80
  101. clarifai/runners/models/model_upload.py +0 -607
  102. clarifai/runners/models/temp.py +0 -25
  103. clarifai/runners/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  104. clarifai/runners/utils/__pycache__/__init__.cpython-38.pyc +0 -0
  105. clarifai/runners/utils/__pycache__/__init__.cpython-39.pyc +0 -0
  106. clarifai/runners/utils/__pycache__/buffered_stream.cpython-310.pyc +0 -0
  107. clarifai/runners/utils/__pycache__/buffered_stream.cpython-38.pyc +0 -0
  108. clarifai/runners/utils/__pycache__/buffered_stream.cpython-39.pyc +0 -0
  109. clarifai/runners/utils/__pycache__/const.cpython-310.pyc +0 -0
  110. clarifai/runners/utils/__pycache__/constants.cpython-310.pyc +0 -0
  111. clarifai/runners/utils/__pycache__/constants.cpython-38.pyc +0 -0
  112. clarifai/runners/utils/__pycache__/constants.cpython-39.pyc +0 -0
  113. clarifai/runners/utils/__pycache__/data_handler.cpython-310.pyc +0 -0
  114. clarifai/runners/utils/__pycache__/data_handler.cpython-38.pyc +0 -0
  115. clarifai/runners/utils/__pycache__/data_handler.cpython-39.pyc +0 -0
  116. clarifai/runners/utils/__pycache__/data_utils.cpython-310.pyc +0 -0
  117. clarifai/runners/utils/__pycache__/data_utils.cpython-38.pyc +0 -0
  118. clarifai/runners/utils/__pycache__/data_utils.cpython-39.pyc +0 -0
  119. clarifai/runners/utils/__pycache__/grpc_server.cpython-310.pyc +0 -0
  120. clarifai/runners/utils/__pycache__/grpc_server.cpython-38.pyc +0 -0
  121. clarifai/runners/utils/__pycache__/grpc_server.cpython-39.pyc +0 -0
  122. clarifai/runners/utils/__pycache__/health.cpython-310.pyc +0 -0
  123. clarifai/runners/utils/__pycache__/health.cpython-38.pyc +0 -0
  124. clarifai/runners/utils/__pycache__/health.cpython-39.pyc +0 -0
  125. clarifai/runners/utils/__pycache__/loader.cpython-310.pyc +0 -0
  126. clarifai/runners/utils/__pycache__/logging.cpython-310.pyc +0 -0
  127. clarifai/runners/utils/__pycache__/logging.cpython-38.pyc +0 -0
  128. clarifai/runners/utils/__pycache__/logging.cpython-39.pyc +0 -0
  129. clarifai/runners/utils/__pycache__/stream_source.cpython-310.pyc +0 -0
  130. clarifai/runners/utils/__pycache__/stream_source.cpython-39.pyc +0 -0
  131. clarifai/runners/utils/__pycache__/url_fetcher.cpython-310.pyc +0 -0
  132. clarifai/runners/utils/__pycache__/url_fetcher.cpython-38.pyc +0 -0
  133. clarifai/runners/utils/__pycache__/url_fetcher.cpython-39.pyc +0 -0
  134. clarifai/runners/utils/data_handler_refract.py +0 -213
  135. clarifai/runners/utils/data_types.py +0 -427
  136. clarifai/runners/utils/logger.py +0 -0
  137. clarifai/runners/utils/method_signatures.py +0 -477
  138. clarifai/runners/utils/serializers.py +0 -222
  139. clarifai/schema/__pycache__/search.cpython-310.pyc +0 -0
  140. clarifai/urls/__pycache__/helper.cpython-310.pyc +0 -0
  141. clarifai/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  142. clarifai/utils/__pycache__/__init__.cpython-39.pyc +0 -0
  143. clarifai/utils/__pycache__/cli.cpython-310.pyc +0 -0
  144. clarifai/utils/__pycache__/constants.cpython-310.pyc +0 -0
  145. clarifai/utils/__pycache__/logging.cpython-310.pyc +0 -0
  146. clarifai/utils/__pycache__/misc.cpython-310.pyc +0 -0
  147. clarifai/utils/__pycache__/model_train.cpython-310.pyc +0 -0
  148. clarifai/utils/evaluation/__pycache__/__init__.cpython-39.pyc +0 -0
  149. clarifai/utils/evaluation/__pycache__/main.cpython-39.pyc +0 -0
  150. clarifai/workflows/__pycache__/__init__.cpython-310.pyc +0 -0
  151. clarifai/workflows/__pycache__/__init__.cpython-39.pyc +0 -0
  152. clarifai/workflows/__pycache__/export.cpython-310.pyc +0 -0
  153. clarifai/workflows/__pycache__/utils.cpython-310.pyc +0 -0
  154. clarifai/workflows/__pycache__/validate.cpython-310.pyc +0 -0
  155. clarifai-11.1.7rc3.dist-info/RECORD +0 -237
  156. {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info}/entry_points.txt +0 -0
  157. {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info/licenses}/LICENSE +0 -0
  158. {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info}/top_level.txt +0 -0
@@ -1,477 +0,0 @@
1
- import inspect
2
- import json
3
- from collections import namedtuple
4
- from typing import List, Tuple, get_args, get_origin
5
-
6
- import numpy as np
7
- import PIL.Image
8
- import yaml
9
- from clarifai_grpc.grpc.api import resources_pb2
10
- from google.protobuf.json_format import MessageToDict, ParseDict
11
- from google.protobuf.message import Message as MessageProto
12
-
13
- from clarifai.runners.utils import data_types
14
- from clarifai.runners.utils.serializers import (
15
- AtomicFieldSerializer, JSONSerializer, ListSerializer, MessageSerializer,
16
- NamedFieldsSerializer, NDArraySerializer, Serializer, TupleSerializer)
17
-
18
-
19
- def build_function_signature(func):
20
- '''
21
- Build a signature for the given function.
22
- '''
23
- sig = inspect.signature(func)
24
-
25
- # check if func is bound, and if not, remove self/cls
26
- if getattr(func, '__self__', None) is None and sig.parameters and list(
27
- sig.parameters.values())[0].name in ('self', 'cls'):
28
- sig = sig.replace(parameters=list(sig.parameters.values())[1:])
29
-
30
- return_annotation = sig.return_annotation
31
- if return_annotation == inspect.Parameter.empty:
32
- raise TypeError('Function must have a return annotation')
33
-
34
- input_sigs = []
35
- input_streaming = []
36
- for p in sig.parameters.values():
37
- model_type_field, _, streaming = build_variable_signature(p.name, p.annotation, p.default)
38
- input_sigs.append(model_type_field)
39
- input_streaming.append(streaming)
40
-
41
- output_sig, output_type, output_streaming = build_variable_signature(
42
- 'return', return_annotation, is_output=True)
43
- # TODO: flatten out "return" layer if not needed
44
-
45
- # check for streams and determine method type
46
- if sum(input_streaming) > 1:
47
- raise TypeError('streaming methods must have at most one streaming input')
48
- input_streaming = any(input_streaming)
49
- if not (input_streaming or output_streaming):
50
- method_type = 'UNARY_UNARY'
51
- elif not input_streaming and output_streaming:
52
- method_type = 'UNARY_STREAMING'
53
- elif input_streaming and output_streaming:
54
- method_type = 'STREAMING_STREAMING'
55
- else:
56
- raise TypeError('stream methods with streaming inputs must have streaming outputs')
57
-
58
- method_signature = resources_pb2.MethodSignature()
59
-
60
- method_signature.name = func.__name__
61
- method_signature.method_type = getattr(resources_pb2.RunnerMethodType, method_type)
62
- assert method_type in ('UNARY_UNARY', 'UNARY_STREAMING', 'STREAMING_STREAMING')
63
- # method_signature.method_type = method_type
64
- method_signature.description = inspect.cleandoc(func.__doc__ or '')
65
- # method_signature.annotations_json = json.dumps(_get_annotations_source(func))
66
-
67
- method_signature.input_fields.extend(input_sigs)
68
- method_signature.output_fields.append(output_sig)
69
- return method_signature
70
-
71
-
72
- # def _get_annotations_source(func):
73
- # """Extracts raw annotation strings from the function source."""
74
- # source = inspect.getsource(func) # Get function source code
75
- # source = textwrap.dedent(source) # Dedent source code
76
- # tree = ast.parse(source) # Parse into AST
77
- # func_node = next(node for node in tree.body
78
- # if isinstance(node, ast.FunctionDef)) # Get function node
79
-
80
- # annotations = {}
81
- # for arg in func_node.args.args: # Process arguments
82
- # if arg.annotation:
83
- # annotations[arg.arg] = ast.unparse(arg.annotation) # Get raw annotation string
84
-
85
- # if func_node.returns: # Process return type
86
- # annotations["return"] = ast.unparse(func_node.returns)
87
-
88
- # return annotations
89
-
90
-
91
- def build_variable_signature(name, annotation, default=inspect.Parameter.empty, is_output=False):
92
- '''
93
- Build a data proto signature and get the normalized python type for the given annotation.
94
- '''
95
-
96
- # check valid names (should already be constrained by python naming, but check anyway)
97
- if not name.isidentifier():
98
- raise ValueError(f'Invalid variable name: {name}')
99
-
100
- # get fields for each variable based on type
101
- tp, streaming = _normalize_type(annotation)
102
-
103
- sig = resources_pb2.ModelTypeField()
104
- sig.name = name
105
- sig.iterator = streaming
106
-
107
- if not is_output:
108
- sig.required = (default is inspect.Parameter.empty)
109
- if not sig.required:
110
- sig.default = str(default)
111
-
112
- _fill_signature_type(sig, tp)
113
-
114
- return sig, type, streaming
115
-
116
-
117
- def _fill_signature_type(sig, tp):
118
- try:
119
- if tp in _DATA_TYPES:
120
- sig.type = _DATA_TYPES[tp].type
121
- return
122
- except TypeError:
123
- pass # not hashable type
124
-
125
- # Handle NamedFields with annotations
126
- # Check for dynamically generated NamedFields subclasses (from type annotations)
127
- if inspect.isclass(tp) and issubclass(tp, data_types.NamedFields) and hasattr(
128
- tp, '__annotations__'):
129
- sig.type = resources_pb2.ModelTypeField.DataType.NAMED_FIELDS
130
- for name, inner_type in tp.__annotations__.items():
131
- inner_sig = sig.type_args.add()
132
- inner_sig.name = name
133
- _fill_signature_type(inner_sig, inner_type)
134
- return
135
-
136
- # Handle NamedFields instances (dict-like)
137
- if isinstance(tp, data_types.NamedFields):
138
- sig.type = resources_pb2.ModelTypeField.DataType.NAMED_FIELDS
139
- for name, inner_type in tp.items():
140
- inner_sig = sig.type_args.add()
141
- inner_sig.name = name
142
- _fill_signature_type(inner_sig, inner_type)
143
- return
144
-
145
- origin = get_origin(tp)
146
- args = get_args(tp)
147
-
148
- # Handle Tuple type
149
- if origin == tuple:
150
- sig.type = resources_pb2.ModelTypeField.DataType.TUPLE
151
- for inner_type in args:
152
- inner_sig = sig.type_args.add()
153
- _fill_signature_type(inner_sig, inner_type)
154
- return
155
-
156
- # Handle List type
157
- if origin == list:
158
- sig.type = resources_pb2.ModelTypeField.DataType.LIST
159
- inner_sig = sig.type_args.add()
160
- _fill_signature_type(inner_sig, args[0])
161
- return
162
-
163
- raise TypeError(f'Unsupported type: {tp}')
164
-
165
-
166
- def serializer_from_signature(signature):
167
- '''
168
- Get the serializer for the given signature.
169
- '''
170
- if signature.type in _SERIALIZERS_BY_TYPE_ENUM:
171
- return _SERIALIZERS_BY_TYPE_ENUM[signature.type]
172
- if signature.type == resources_pb2.ModelTypeField.DataType.LIST:
173
- return ListSerializer(serializer_from_signature(signature.type_args[0]))
174
- if signature.type == resources_pb2.ModelTypeField.DataType.TUPLE:
175
- return TupleSerializer([serializer_from_signature(sig) for sig in signature.type_args])
176
- if signature.type == resources_pb2.ModelTypeField.DataType.NAMED_FIELDS:
177
- return NamedFieldsSerializer(
178
- {sig.name: serializer_from_signature(sig)
179
- for sig in signature.type_args})
180
- raise ValueError(f'Unsupported type: {signature.type}')
181
-
182
-
183
- def signatures_to_json(signatures):
184
- assert isinstance(
185
- signatures, dict), 'Expected dict of signatures {name: signature}, got %s' % type(signatures)
186
- # TODO change to proto when ready
187
- signatures = {name: MessageToDict(sig) for name, sig in signatures.items()}
188
- return json.dumps(signatures)
189
-
190
-
191
- def signatures_from_json(json_str):
192
- signatures_dict = json.loads(json_str)
193
- assert isinstance(signatures_dict, dict), "Expected JSON to decode into a dictionary"
194
-
195
- return {
196
- name: ParseDict(sig_dict, resources_pb2.MethodSignature())
197
- for name, sig_dict in signatures_dict.items()
198
- }
199
- # d = json.loads(json_str, object_pairs_hook=_SignatureDict)
200
- # return d
201
-
202
-
203
- def signatures_to_yaml(signatures):
204
- # XXX go in/out of json to get the correct format and python dict types
205
- d = json.loads(signatures_to_json(signatures))
206
-
207
- def _filter_empty(d):
208
- if isinstance(d, (list, tuple)):
209
- return [_filter_empty(v) for v in d if v]
210
- if isinstance(d, dict):
211
- return {k: _filter_empty(v) for k, v in d.items() if v}
212
- return d
213
-
214
- return yaml.dump(_filter_empty(d), default_flow_style=False)
215
-
216
-
217
- def signatures_from_yaml(yaml_str):
218
- d = yaml.safe_load(yaml_str)
219
- return signatures_from_json(json.dumps(d))
220
-
221
-
222
- def serialize(kwargs, signatures, proto=None, is_output=False):
223
- '''
224
- Serialize the given kwargs into the proto using the given signatures.
225
- '''
226
- if proto is None:
227
- proto = resources_pb2.Data()
228
- unknown = set(kwargs.keys()) - set(sig.name for sig in signatures)
229
- if unknown:
230
- if unknown == {'return'} and len(signatures) > 1:
231
- raise TypeError('Got a single return value, but expected multiple outputs {%s}' %
232
- ', '.join(sig.name for sig in signatures))
233
- raise TypeError('Got unexpected key: %s' % ', '.join(unknown))
234
- inline_first_value = False
235
- if (is_output and len(signatures) == 1 and signatures[0].name == 'return' and
236
- len(kwargs) == 1 and 'return' in kwargs):
237
- # if there is only one output, flatten it and return directly
238
- inline_first_value = True
239
- if signatures and signatures[0].type not in _NON_INLINABLE_TYPES:
240
- inline_first_value = True
241
- for sig_i, sig in enumerate(signatures):
242
- if sig.name not in kwargs:
243
- if sig.required:
244
- raise TypeError(f'Missing required argument: {sig.name}')
245
- continue # skip missing fields, they can be set to default on the server
246
- data = kwargs[sig.name]
247
- serializer = serializer_from_signature(sig)
248
- # TODO determine if any (esp the first) var can go in the proto without parts
249
- # and whether to put this in the signature or dynamically determine it
250
- if inline_first_value and sig_i == 0 and id(data) not in _ZERO_VALUE_IDS:
251
- # inlined first value; note data must not be empty or 0 to inline, since that
252
- # will correspond to the missing value case (which uses function defaults).
253
- # empty values are put explicitly in parts.
254
- serializer.serialize(proto, data)
255
- else:
256
- # add the part to the proto
257
- part = proto.parts.add()
258
- part.id = sig.name
259
- serializer.serialize(part.data, data)
260
- return proto
261
-
262
-
263
- def deserialize(proto, signatures, inference_params={}, is_output=False):
264
- '''
265
- Deserialize the given proto into kwargs using the given signatures.
266
- '''
267
- if isinstance(signatures, dict):
268
- signatures = [signatures] # TODO update return key level and make consistnet
269
- kwargs = {}
270
- parts_by_name = {part.id: part for part in proto.parts}
271
- for sig_i, sig in enumerate(signatures):
272
- serializer = serializer_from_signature(sig)
273
- part = parts_by_name.get(sig.name)
274
- inference_params_value = inference_params.get(sig.name)
275
- if part is not None:
276
- kwargs[sig.name] = serializer.deserialize(part.data)
277
- elif inference_params_value is not None:
278
- kwargs[sig.name] = inference_params_value
279
- else:
280
- if sig_i == 0:
281
- # possible inlined first value
282
- value = serializer.deserialize(proto)
283
- if id(value) not in _ZERO_VALUE_IDS:
284
- # note missing values are not set to defaults, since they are not in parts
285
- # an actual zero value passed in must be set in an explicit part
286
- kwargs[sig.name] = value
287
- continue
288
-
289
- if sig.required or is_output: # TODO allow optional outputs?
290
- raise ValueError(f'Missing required field: {sig.name}')
291
- continue
292
- if len(kwargs) == 1 and 'return' in kwargs:
293
- return kwargs['return']
294
- return kwargs
295
-
296
-
297
- def get_stream_from_signature(signatures):
298
- '''
299
- Get the stream signature from the given signatures.
300
- '''
301
- for sig in signatures:
302
- if sig.iterator:
303
- return sig
304
- return None
305
-
306
-
307
- def _is_empty_proto_data(data):
308
- if isinstance(data, np.ndarray):
309
- return False
310
- if isinstance(data, MessageProto):
311
- return not data.ByteSize()
312
- return not data
313
-
314
-
315
- def _normalize_type(tp):
316
- '''
317
- Normalize the types for the given parameter.
318
- Returns the normalized type and whether the parameter is streaming.
319
- '''
320
- # stream type indicates streaming, not part of the data itself
321
- # it can only be used at the top-level of the var type
322
- streaming = (get_origin(tp) == data_types.Stream)
323
- if streaming:
324
- tp = get_args(tp)[0]
325
-
326
- return _normalize_data_type(tp), streaming
327
-
328
-
329
- def _normalize_data_type(tp):
330
-
331
- # jsonable list and dict, these can be serialized as json
332
- # (tuple we want to keep as a tuple for args and returns, so don't include here)
333
- if tp in (list, dict) or (get_origin(tp) in (list, dict) and _is_jsonable(tp)):
334
- return data_types.JSON
335
-
336
- # container types that need to be serialized as parts
337
- if get_origin(tp) == list and get_args(tp):
338
- return List[_normalize_data_type(get_args(tp)[0])]
339
-
340
- if get_origin(tp) == tuple:
341
- if not get_args(tp):
342
- raise TypeError('Tuple must have types specified')
343
- return Tuple[tuple(_normalize_data_type(val) for val in get_args(tp))]
344
-
345
- if isinstance(tp, (tuple, list)):
346
- return Tuple[tuple(_normalize_data_type(val) for val in tp)]
347
-
348
- if tp == data_types.NamedFields:
349
- raise TypeError('NamedFields must have types specified')
350
-
351
- # Handle dynamically generated NamedFields subclasses with annotations
352
- if isinstance(tp, type) and issubclass(tp, data_types.NamedFields) and hasattr(
353
- tp, '__annotations__'):
354
- return data_types.NamedFields(
355
- **{k: _normalize_data_type(v)
356
- for k, v in tp.__annotations__.items()})
357
-
358
- if isinstance(tp, (dict, data_types.NamedFields)):
359
- return data_types.NamedFields(**{name: _normalize_data_type(val) for name, val in tp.items()})
360
-
361
- # check if numpy array type, and if so, use ndarray
362
- if get_origin(tp) == np.ndarray:
363
- return np.ndarray
364
-
365
- # check for PIL images (sometimes types use the module, sometimes the class)
366
- # set these to use the Image data handler
367
- if tp in (data_types.Image, PIL.Image.Image):
368
- return data_types.Image
369
-
370
- if tp == PIL.Image:
371
- raise TypeError('Use PIL.Image.Image instead of PIL.Image module')
372
-
373
- # check for known data types
374
- try:
375
- if tp in _DATA_TYPES:
376
- return tp
377
- except TypeError:
378
- pass # not hashable type
379
-
380
- raise TypeError(f'Unsupported type: {tp}')
381
-
382
-
383
- def _is_jsonable(tp):
384
- if tp in (dict, list, tuple, str, int, float, bool, type(None)):
385
- return True
386
- if get_origin(tp) in (tuple, list, dict):
387
- return all(_is_jsonable(val) for val in get_args(tp))
388
- return False
389
-
390
-
391
- # type: name of the data type
392
- # data_field: name of the field in the data proto
393
- # serializer: serializer for the data type
394
- _DataType = namedtuple('_DataType', ('type', 'serializer'))
395
-
396
- _NON_INLINABLE_TYPES = {
397
- resources_pb2.ModelTypeField.DataType.NAMED_FIELDS,
398
- resources_pb2.ModelTypeField.DataType.TUPLE, resources_pb2.ModelTypeField.DataType.LIST
399
- }
400
- _ZERO_VALUE_IDS = {id(None), id(''), id(b''), id(0), id(0.0), id(False)}
401
-
402
- # simple, non-container types that correspond directly to a data field
403
- _DATA_TYPES = {
404
- str:
405
- _DataType(resources_pb2.ModelTypeField.DataType.STR,
406
- AtomicFieldSerializer('string_value')),
407
- bytes:
408
- _DataType(resources_pb2.ModelTypeField.DataType.BYTES,
409
- AtomicFieldSerializer('bytes_value')),
410
- int:
411
- _DataType(resources_pb2.ModelTypeField.DataType.INT, AtomicFieldSerializer('int_value')),
412
- float:
413
- _DataType(resources_pb2.ModelTypeField.DataType.FLOAT,
414
- AtomicFieldSerializer('float_value')),
415
- bool:
416
- _DataType(resources_pb2.ModelTypeField.DataType.BOOL, AtomicFieldSerializer('bool_value')),
417
- np.ndarray:
418
- _DataType(resources_pb2.ModelTypeField.DataType.NDARRAY, NDArraySerializer('ndarray')),
419
- data_types.JSON:
420
- _DataType(resources_pb2.ModelTypeField.DataType.JSON_DATA, JSONSerializer('string_value')
421
- ), # TODO change to json_value when new proto is ready
422
- data_types.Text:
423
- _DataType(resources_pb2.ModelTypeField.DataType.TEXT,
424
- MessageSerializer('text', data_types.Text)),
425
- data_types.Image:
426
- _DataType(resources_pb2.ModelTypeField.DataType.IMAGE,
427
- MessageSerializer('image', data_types.Image)),
428
- data_types.Concept:
429
- _DataType(resources_pb2.ModelTypeField.DataType.CONCEPT,
430
- MessageSerializer('concepts', data_types.Concept)),
431
- data_types.Region:
432
- _DataType(resources_pb2.ModelTypeField.DataType.REGION,
433
- MessageSerializer('regions', data_types.Region)),
434
- data_types.Frame:
435
- _DataType(resources_pb2.ModelTypeField.DataType.FRAME,
436
- MessageSerializer('frames', data_types.Frame)),
437
- data_types.Audio:
438
- _DataType(resources_pb2.ModelTypeField.DataType.AUDIO,
439
- MessageSerializer('audio', data_types.Audio)),
440
- data_types.Video:
441
- _DataType(resources_pb2.ModelTypeField.DataType.VIDEO,
442
- MessageSerializer('video', data_types.Video)),
443
- }
444
-
445
- _SERIALIZERS_BY_TYPE_ENUM = {dt.type: dt.serializer for dt in _DATA_TYPES.values()}
446
-
447
-
448
- class CompatibilitySerializer(Serializer):
449
- '''
450
- Serialization of basic value types, used for backwards compatibility
451
- with older models that don't have type signatures.
452
- '''
453
-
454
- def serialize(self, data_proto, value):
455
- tp = _normalize_data_type(type(value))
456
-
457
- try:
458
- serializer = _DATA_TYPES[tp].serializer
459
- except (KeyError, TypeError):
460
- raise TypeError(f'serializer currently only supports basic types, got {tp}')
461
-
462
- serializer.serialize(data_proto, value)
463
-
464
- def deserialize(self, data_proto):
465
- fields = [k.name for k, _ in data_proto.ListFields()]
466
- if 'parts' in fields:
467
- raise ValueError('serializer does not support parts')
468
- serializers = [
469
- serializer for serializer in _SERIALIZERS_BY_TYPE_ENUM.values()
470
- if serializer.field_name in fields
471
- ]
472
- if not serializers:
473
- raise ValueError('Returned data not recognized')
474
- if len(serializers) != 1:
475
- raise ValueError('Only single output supported for serializer')
476
- serializer = serializers[0]
477
- return serializer.deserialize(data_proto)
@@ -1,222 +0,0 @@
1
- import json
2
- from typing import Dict, Iterable
3
-
4
- import numpy as np
5
- from clarifai_grpc.grpc.api import resources_pb2
6
-
7
- from clarifai.runners.utils import data_types
8
-
9
-
10
- class Serializer:
11
-
12
- def serialize(self, data_proto, value):
13
- pass
14
-
15
- def deserialize(self, data_proto):
16
- pass
17
-
18
- def handles_list(self):
19
- return False
20
-
21
-
22
- def is_repeated_field(field_name):
23
- descriptor = resources_pb2.Data.DESCRIPTOR.fields_by_name.get(field_name)
24
- return descriptor and descriptor.label == descriptor.LABEL_REPEATED
25
-
26
-
27
- class AtomicFieldSerializer(Serializer):
28
-
29
- def __init__(self, field_name):
30
- self.field_name = field_name
31
-
32
- def serialize(self, data_proto, value):
33
- try:
34
- setattr(data_proto, self.field_name, value)
35
- except TypeError as e:
36
- raise TypeError(f"Incompatible type for {self.field_name}: {type(value)}") from e
37
-
38
- def deserialize(self, data_proto):
39
- return getattr(data_proto, self.field_name)
40
-
41
-
42
- class MessageSerializer(Serializer):
43
-
44
- def __init__(self, field_name, message_class):
45
- self.field_name = field_name
46
- self.message_class = message_class
47
- self.is_repeated_field = is_repeated_field(field_name)
48
-
49
- def handles_list(self):
50
- return self.is_repeated_field
51
-
52
- def serialize(self, data_proto, value):
53
- value = self.message_class.from_value(value).to_proto()
54
- dst = getattr(data_proto, self.field_name)
55
- try:
56
- if self.is_repeated_field:
57
- dst.add().CopyFrom(value)
58
- else:
59
- dst.CopyFrom(value)
60
- except TypeError as e:
61
- raise TypeError(f"Incompatible type for {self.field_name}: {type(value)}") from e
62
-
63
- def serialize_list(self, data_proto, values):
64
- assert self.is_repeated_field
65
- dst = getattr(data_proto, self.field_name)
66
- dst.extend([self.message_class.from_value(value).to_proto() for value in values])
67
-
68
- def deserialize(self, data_proto):
69
- src = getattr(data_proto, self.field_name)
70
- if self.is_repeated_field:
71
- values = [self.message_class.from_proto(x) for x in src]
72
- if len(values) == 1:
73
- return values[0]
74
- return values if values else None
75
- else:
76
- if not data_proto.HasField(self.field_name):
77
- return None
78
- return self.message_class.from_proto(src)
79
-
80
- def deserialize_list(self, data_proto):
81
- assert self.is_repeated_field
82
- src = getattr(data_proto, self.field_name)
83
- return [self.message_class.from_proto(x) for x in src]
84
-
85
-
86
- class NDArraySerializer(Serializer):
87
-
88
- def __init__(self, field_name, as_list=False):
89
- self.field_name = field_name
90
- self.as_list = as_list
91
-
92
- def serialize(self, data_proto, value):
93
- if self.as_list and not isinstance(value, Iterable):
94
- raise TypeError(f"Expected list, got {type(value)}")
95
- value = np.asarray(value)
96
- if not np.issubdtype(value.dtype, np.number):
97
- raise TypeError(f"Expected number array, got {value.dtype}")
98
- proto = getattr(data_proto, self.field_name)
99
- proto.buffer = value.tobytes()
100
- proto.shape.extend(value.shape)
101
- proto.dtype = str(value.dtype)
102
-
103
- def deserialize(self, data_proto):
104
- proto = getattr(data_proto, self.field_name)
105
- if not proto.buffer:
106
- return None
107
- array = np.frombuffer(proto.buffer, dtype=np.dtype(proto.dtype)).reshape(proto.shape)
108
- if self.as_list:
109
- return array.tolist()
110
- return array
111
-
112
-
113
- class JSONSerializer(Serializer):
114
-
115
- def __init__(self, field_name, type=None):
116
- self.field_name = field_name
117
- self.type = type
118
-
119
- def serialize(self, data_proto, value):
120
- #if self.type is not None and not isinstance(value, self.type):
121
- # raise TypeError(f"Expected {self.type}, got {type(value)}")
122
- try:
123
- setattr(data_proto, self.field_name, json.dumps(value))
124
- except TypeError as e:
125
- raise TypeError(f"Incompatible type for {self.field_name}: {type(value)}") from e
126
-
127
- def deserialize(self, data_proto):
128
- value = getattr(data_proto, self.field_name)
129
- if not value:
130
- return None
131
- return json.loads(value)
132
-
133
-
134
- class ListSerializer(Serializer):
135
-
136
- def __init__(self, inner_serializer):
137
- self.field_name = 'parts'
138
- self.inner_serializer = inner_serializer
139
-
140
- def handles_list(self):
141
- # if handles_list() is called on this serializer, it means that we're
142
- # trying to serialize a list of lists. In this case, we need to use
143
- # parts[] for the outer list, so we return False here (we can't inline it).
144
- return False
145
-
146
- def serialize(self, data_proto, value):
147
- if not isinstance(value, Iterable):
148
- raise TypeError(f"Expected iterable, got {type(value)}")
149
- if self.inner_serializer.handles_list():
150
- self.inner_serializer.serialize_list(data_proto, value)
151
- else:
152
- for item in value:
153
- part = data_proto.parts.add()
154
- self.inner_serializer.serialize(part.data, item)
155
-
156
- def deserialize(self, data_proto):
157
- if self.inner_serializer.handles_list():
158
- return self.inner_serializer.deserialize_list(data_proto)
159
- return [self.inner_serializer.deserialize(part.data) for part in data_proto.parts]
160
-
161
-
162
- class TupleSerializer(Serializer):
163
-
164
- def __init__(self, inner_serializers):
165
- self.field_name = 'parts'
166
- self.inner_serializers = inner_serializers
167
-
168
- def serialize(self, data_proto, value):
169
- if not isinstance(value, (tuple, list)):
170
- raise TypeError(f"Expected tuple, got {type(value)}")
171
- if len(value) != len(self.inner_serializers):
172
- raise ValueError(f"Expected tuple of length {len(self.inner_serializers)}, got {len(value)}")
173
- for i, (serializer, item) in enumerate(zip(self.inner_serializers, value)):
174
- part = data_proto.parts.add()
175
- part.id = str(i)
176
- serializer.serialize(part.data, item)
177
-
178
- def deserialize(self, data_proto):
179
- if not data_proto.parts and self.inner_serializers:
180
- return None
181
- if len(data_proto.parts) != len(self.inner_serializers):
182
- raise ValueError(
183
- f"Expected tuple of length {len(self.inner_serializers)}, got {len(data_proto.parts)}")
184
- return tuple(
185
- serializer.deserialize(part.data)
186
- for serializer, part in zip(self.inner_serializers, data_proto.parts))
187
-
188
-
189
- class NamedFieldsSerializer(Serializer):
190
-
191
- def __init__(self, named_field_serializers: Dict[str, Serializer]):
192
- self.field_name = 'parts'
193
- self.named_field_serializers = named_field_serializers
194
-
195
- def serialize(self, data_proto, value):
196
- for name, serializer in self.named_field_serializers.items():
197
- if name not in value:
198
- raise TypeError(f"Missing field {name}")
199
- part = self._get_part(data_proto, name, add=True)
200
- serializer.serialize(part.data, value[name])
201
-
202
- def deserialize(self, data_proto):
203
- if not data_proto.parts and self.named_field_serializers:
204
- return None
205
- value = data_types.NamedFields()
206
- for name, serializer in self.named_field_serializers.items():
207
- part = self._get_part(data_proto, name)
208
- value[name] = serializer.deserialize(part.data)
209
- return value
210
-
211
- def _get_part(self, data_proto, name, add=False):
212
- for part in data_proto.parts:
213
- if part.id == name:
214
- return part
215
- if add:
216
- part = data_proto.parts.add()
217
- part.id = name
218
- return part
219
- raise TypeError(f"Missing part with key {name}")
220
-
221
-
222
- # TODO dict serializer, maybe json only?