clarifai 11.1.7rc3__py3-none-any.whl → 11.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/__init__.py +1 -1
- clarifai/cli/base.py +18 -7
- clarifai/cli/compute_cluster.py +8 -1
- clarifai/cli/deployment.py +5 -1
- clarifai/cli/model.py +25 -38
- clarifai/cli/nodepool.py +4 -1
- clarifai/client/model.py +393 -157
- clarifai/runners/__init__.py +7 -2
- clarifai/runners/models/model_builder.py +12 -80
- clarifai/runners/models/model_class.py +28 -279
- clarifai/runners/models/model_run_locally.py +88 -19
- clarifai/runners/models/model_runner.py +0 -2
- clarifai/runners/models/model_servicer.py +2 -11
- clarifai/runners/utils/data_handler.py +210 -271
- clarifai/utils/cli.py +9 -0
- {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info}/METADATA +16 -4
- clarifai-11.2.1.dist-info/RECORD +101 -0
- {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info}/WHEEL +1 -1
- clarifai/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/__pycache__/errors.cpython-310.pyc +0 -0
- clarifai/__pycache__/versions.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/compute_cluster.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/deployment.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/nodepool.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/compute_cluster.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/dataset.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/deployment.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/input.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/lister.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/module.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/nodepool.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/user.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/workflow.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/helper.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/register.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/stub.cpython-310.pyc +0 -0
- clarifai/client/cli/__init__.py +0 -0
- clarifai/client/cli/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
- clarifai/client/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
- clarifai/client/cli/base_cli.py +0 -88
- clarifai/client/cli/model_cli.py +0 -29
- clarifai/client/model_client.py +0 -448
- clarifai/constants/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/dataset.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/input.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/rag.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/workflow.cpython-310.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/inputs_annotations.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/features.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/image.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/multimodal.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/text.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/models/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/modules/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/dockerfile_template/Dockerfile.cpu.template +0 -31
- clarifai/runners/dockerfile_template/Dockerfile.cuda.template +0 -42
- clarifai/runners/dockerfile_template/Dockerfile.nim +0 -71
- clarifai/runners/models/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/model_class.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-310-pytest-7.1.2.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_runner.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_upload.cpython-310.pyc +0 -0
- clarifai/runners/models/model_class_refract.py +0 -80
- clarifai/runners/models/model_upload.py +0 -607
- clarifai/runners/models/temp.py +0 -25
- clarifai/runners/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/const.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/loader.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/stream_source.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/stream_source.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-39.pyc +0 -0
- clarifai/runners/utils/data_handler_refract.py +0 -213
- clarifai/runners/utils/data_types.py +0 -427
- clarifai/runners/utils/logger.py +0 -0
- clarifai/runners/utils/method_signatures.py +0 -477
- clarifai/runners/utils/serializers.py +0 -222
- clarifai/schema/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/urls/__pycache__/helper.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/cli.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/constants.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/logging.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/misc.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/model_train.cpython-310.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/main.cpython-39.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/workflows/__pycache__/export.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/validate.cpython-310.pyc +0 -0
- clarifai-11.1.7rc3.dist-info/RECORD +0 -237
- {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info}/entry_points.txt +0 -0
- {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info/licenses}/LICENSE +0 -0
- {clarifai-11.1.7rc3.dist-info → clarifai-11.2.1.dist-info}/top_level.txt +0 -0
clarifai/client/model_client.py
DELETED
@@ -1,448 +0,0 @@
|
|
1
|
-
import time
|
2
|
-
from typing import Any, Dict, Iterator, List
|
3
|
-
|
4
|
-
from clarifai_grpc.grpc.api import resources_pb2, service_pb2
|
5
|
-
from clarifai_grpc.grpc.api.status import status_code_pb2
|
6
|
-
|
7
|
-
from clarifai.constants.model import MAX_MODEL_PREDICT_INPUTS
|
8
|
-
from clarifai.errors import UserError
|
9
|
-
from clarifai.runners.utils.method_signatures import (CompatibilitySerializer, deserialize,
|
10
|
-
get_stream_from_signature, serialize,
|
11
|
-
signatures_from_json)
|
12
|
-
from clarifai.utils.logging import logger
|
13
|
-
from clarifai.utils.misc import BackoffIterator, status_is_retryable
|
14
|
-
|
15
|
-
|
16
|
-
class ModelClient:
|
17
|
-
'''
|
18
|
-
Client for calling model predict, generate, and stream methods.
|
19
|
-
'''
|
20
|
-
|
21
|
-
def __init__(self, stub, request_template: service_pb2.PostModelOutputsRequest = None):
|
22
|
-
'''
|
23
|
-
Initialize the model client.
|
24
|
-
|
25
|
-
Args:
|
26
|
-
stub: The gRPC stub for the model.
|
27
|
-
request_template: The template for the request to send to the model, including
|
28
|
-
common fields like model_id, model_version, cluster, etc.
|
29
|
-
'''
|
30
|
-
self.STUB = stub
|
31
|
-
self.request_template = request_template or service_pb2.PostModelOutputsRequest()
|
32
|
-
self._method_signatures = None
|
33
|
-
self._defined = False
|
34
|
-
|
35
|
-
def fetch(self):
|
36
|
-
'''
|
37
|
-
Fetch function signature definitions from the model and define the functions in the client
|
38
|
-
'''
|
39
|
-
if self._defined:
|
40
|
-
return
|
41
|
-
try:
|
42
|
-
self._fetch_signatures()
|
43
|
-
self._define_functions()
|
44
|
-
finally:
|
45
|
-
self._defined = True
|
46
|
-
|
47
|
-
def __getattr__(self, name):
|
48
|
-
if not self._defined:
|
49
|
-
self.fetch()
|
50
|
-
return self.__getattribute__(name)
|
51
|
-
|
52
|
-
def _fetch_signatures(self):
|
53
|
-
'''
|
54
|
-
Fetch the method signatures from the model.
|
55
|
-
|
56
|
-
Returns:
|
57
|
-
Dict: The method signatures.
|
58
|
-
'''
|
59
|
-
#request = resources_pb2.GetModelSignaturesRequest()
|
60
|
-
#response = self.stub.GetModelSignatures(request)
|
61
|
-
#self._method_signatures = json.loads(response.signatures) # or define protos
|
62
|
-
# TODO this could use a new endpoint to get the signatures
|
63
|
-
# for local grpc models, we'll also have to add the endpoint to the model servicer
|
64
|
-
# for now we'll just use the predict endpoint with a special method name
|
65
|
-
|
66
|
-
request = service_pb2.PostModelOutputsRequest()
|
67
|
-
request.CopyFrom(self.request_template)
|
68
|
-
# request.model.model_version.output_info.params['_method_name'] = '_GET_SIGNATURES'
|
69
|
-
inp = request.inputs.add() # empty input for this method
|
70
|
-
inp.data.parts.add() # empty part for this input
|
71
|
-
inp.data.metadata['_method_name'] = '_GET_SIGNATURES'
|
72
|
-
start_time = time.time()
|
73
|
-
backoff_iterator = BackoffIterator(10)
|
74
|
-
while True:
|
75
|
-
response = self.STUB.PostModelOutputs(request)
|
76
|
-
if status_is_retryable(
|
77
|
-
response.status.code) and time.time() - start_time < 60 * 10: # 10 minutes
|
78
|
-
logger.info(f"Retrying model info fetch with response {response.status!r}")
|
79
|
-
time.sleep(next(backoff_iterator))
|
80
|
-
continue
|
81
|
-
break
|
82
|
-
if (response.status.code == status_code_pb2.INPUT_UNSUPPORTED_FORMAT or
|
83
|
-
(response.status.code == status_code_pb2.SUCCESS and
|
84
|
-
response.outputs[0].data.text.raw == '')):
|
85
|
-
# return codes/values from older models that don't support _GET_SIGNATURES
|
86
|
-
self._method_signatures = {}
|
87
|
-
self._define_compatability_functions()
|
88
|
-
return
|
89
|
-
if response.status.code != status_code_pb2.SUCCESS:
|
90
|
-
raise Exception(f"Model failed with response {response!r}")
|
91
|
-
self._method_signatures = signatures_from_json(response.outputs[0].data.text.raw)
|
92
|
-
|
93
|
-
def _define_functions(self):
|
94
|
-
'''
|
95
|
-
Define the functions based on the method signatures.
|
96
|
-
'''
|
97
|
-
for method_name, method_signature in self._method_signatures.items():
|
98
|
-
# define the function in this client instance
|
99
|
-
if resources_pb2.RunnerMethodType.Name(method_signature.method_type) == 'UNARY_UNARY':
|
100
|
-
call_func = self._predict
|
101
|
-
elif resources_pb2.RunnerMethodType.Name(method_signature.method_type) == 'UNARY_STREAMING':
|
102
|
-
call_func = self._generate
|
103
|
-
elif resources_pb2.RunnerMethodType.Name(
|
104
|
-
method_signature.method_type) == 'STREAMING_STREAMING':
|
105
|
-
call_func = self._stream
|
106
|
-
else:
|
107
|
-
raise ValueError(f"Unknown method type {method_signature.method_type}")
|
108
|
-
|
109
|
-
# method argnames, in order, collapsing nested keys to corresponding user function args
|
110
|
-
method_argnames = []
|
111
|
-
for var in method_signature.input_fields:
|
112
|
-
outer = var.name.split('.', 1)[0]
|
113
|
-
if outer in method_argnames:
|
114
|
-
continue
|
115
|
-
method_argnames.append(outer)
|
116
|
-
|
117
|
-
def bind_f(method_name, method_argnames, call_func):
|
118
|
-
|
119
|
-
def f(*args, **kwargs):
|
120
|
-
if len(args) > len(method_argnames):
|
121
|
-
raise TypeError(
|
122
|
-
f"{method_name}() takes {len(method_argnames)} positional arguments but {len(args)} were given"
|
123
|
-
)
|
124
|
-
for name, arg in zip(method_argnames, args): # handle positional with zip shortest
|
125
|
-
if name in kwargs:
|
126
|
-
raise TypeError(f"Multiple values for argument {name}")
|
127
|
-
kwargs[name] = arg
|
128
|
-
return call_func(kwargs, method_name)
|
129
|
-
|
130
|
-
return f
|
131
|
-
|
132
|
-
# need to bind method_name to the value, not the mutating loop variable
|
133
|
-
f = bind_f(method_name, method_argnames, call_func)
|
134
|
-
|
135
|
-
# set names, annotations and docstrings
|
136
|
-
f.__name__ = method_name
|
137
|
-
f.__qualname__ = f'{self.__class__.__name__}.{method_name}'
|
138
|
-
# TODO: set signature from annotations to the function, currently MethodSignature don't have `annotations_json` field
|
139
|
-
# input_annotations = json.loads(method_signature.annotations_json)
|
140
|
-
# return_annotation = input_annotations.pop('return', None)
|
141
|
-
# sig = inspect.signature(f).replace(
|
142
|
-
# parameters=[
|
143
|
-
# inspect.Parameter(k, inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=v)
|
144
|
-
# for k, v in input_annotations.items()
|
145
|
-
# ],
|
146
|
-
# return_annotation=return_annotation,
|
147
|
-
# )
|
148
|
-
# f.__signature__ = sig
|
149
|
-
f.__doc__ = method_signature.description
|
150
|
-
setattr(self, method_name, f)
|
151
|
-
|
152
|
-
def _define_compatability_functions(self):
|
153
|
-
|
154
|
-
serializer = CompatibilitySerializer()
|
155
|
-
|
156
|
-
def predict(input: Any) -> Any:
|
157
|
-
proto = resources_pb2.Input()
|
158
|
-
serializer.serialize(proto.data, input)
|
159
|
-
# always use text.raw for compat
|
160
|
-
if proto.data.string_value:
|
161
|
-
proto.data.text.raw = proto.data.string_value
|
162
|
-
proto.data.string_value = ''
|
163
|
-
response = self._predict_by_proto([proto])
|
164
|
-
if response.status.code != status_code_pb2.SUCCESS:
|
165
|
-
raise Exception(f"Model predict failed with response {response!r}")
|
166
|
-
response_data = response.outputs[0].data
|
167
|
-
if response_data.text.raw:
|
168
|
-
response_data.string_value = response_data.text.raw
|
169
|
-
response_data.text.raw = ''
|
170
|
-
return serializer.deserialize(response_data)
|
171
|
-
|
172
|
-
self.predict = predict
|
173
|
-
|
174
|
-
def _predict(
|
175
|
-
self,
|
176
|
-
inputs, # TODO set up functions according to fetched signatures?
|
177
|
-
method_name: str = 'predict',
|
178
|
-
) -> Any:
|
179
|
-
input_signature = self._method_signatures[method_name].input_fields
|
180
|
-
output_signature = self._method_signatures[method_name].output_fields
|
181
|
-
|
182
|
-
batch_input = True
|
183
|
-
if isinstance(inputs, dict):
|
184
|
-
inputs = [inputs]
|
185
|
-
batch_input = False
|
186
|
-
|
187
|
-
proto_inputs = []
|
188
|
-
for input in inputs:
|
189
|
-
proto = resources_pb2.Input()
|
190
|
-
serialize(input, input_signature, proto.data)
|
191
|
-
proto_inputs.append(proto)
|
192
|
-
|
193
|
-
response = self._predict_by_proto(proto_inputs, method_name)
|
194
|
-
#print(response)
|
195
|
-
|
196
|
-
outputs = []
|
197
|
-
for output in response.outputs:
|
198
|
-
outputs.append(deserialize(output.data, output_signature, is_output=True))
|
199
|
-
if batch_input:
|
200
|
-
return outputs
|
201
|
-
return outputs[0]
|
202
|
-
|
203
|
-
def _predict_by_proto(
|
204
|
-
self,
|
205
|
-
inputs: List[resources_pb2.Input],
|
206
|
-
method_name: str = None,
|
207
|
-
inference_params: Dict = None,
|
208
|
-
output_config: Dict = None,
|
209
|
-
) -> service_pb2.MultiOutputResponse:
|
210
|
-
"""Predicts the model based on the given inputs.
|
211
|
-
|
212
|
-
Args:
|
213
|
-
inputs (List[resources_pb2.Input]): The inputs to predict.
|
214
|
-
method_name (str): The remote method name to call.
|
215
|
-
inference_params (Dict): Inference parameters to override.
|
216
|
-
output_config (Dict): Output configuration to override.
|
217
|
-
|
218
|
-
Returns:
|
219
|
-
service_pb2.MultiOutputResponse: The prediction response(s).
|
220
|
-
"""
|
221
|
-
if not isinstance(inputs, list):
|
222
|
-
raise UserError('Invalid inputs, inputs must be a list of Input objects.')
|
223
|
-
if len(inputs) > MAX_MODEL_PREDICT_INPUTS:
|
224
|
-
raise UserError(f"Too many inputs. Max is {MAX_MODEL_PREDICT_INPUTS}.")
|
225
|
-
|
226
|
-
request = service_pb2.PostModelOutputsRequest()
|
227
|
-
request.CopyFrom(self.request_template)
|
228
|
-
|
229
|
-
request.inputs.extend(inputs)
|
230
|
-
|
231
|
-
if method_name:
|
232
|
-
# TODO put in new proto field?
|
233
|
-
for inp in request.inputs:
|
234
|
-
inp.data.metadata['_method_name'] = method_name
|
235
|
-
if inference_params:
|
236
|
-
request.model.model_version.output_info.params.update(inference_params)
|
237
|
-
if output_config:
|
238
|
-
request.model.model_version.output_info.output_config.MergeFrom(
|
239
|
-
resources_pb2.OutputConfig(**output_config))
|
240
|
-
|
241
|
-
start_time = time.time()
|
242
|
-
backoff_iterator = BackoffIterator(10)
|
243
|
-
while True:
|
244
|
-
response = self.STUB.PostModelOutputs(request)
|
245
|
-
if status_is_retryable(
|
246
|
-
response.status.code) and time.time() - start_time < 60 * 10: # 10 minutes
|
247
|
-
logger.info(f"Model predict failed with response {response!r}")
|
248
|
-
time.sleep(next(backoff_iterator))
|
249
|
-
continue
|
250
|
-
|
251
|
-
if response.status.code != status_code_pb2.SUCCESS:
|
252
|
-
raise Exception(f"Model predict failed with response {response!r}")
|
253
|
-
break
|
254
|
-
|
255
|
-
return response
|
256
|
-
|
257
|
-
def _generate(
|
258
|
-
self,
|
259
|
-
inputs, # TODO set up functions according to fetched signatures?
|
260
|
-
method_name: str = 'generate',
|
261
|
-
) -> Any:
|
262
|
-
input_signature = self._method_signatures[method_name].input_fields
|
263
|
-
output_signature = self._method_signatures[method_name].output_fields
|
264
|
-
|
265
|
-
batch_input = True
|
266
|
-
if isinstance(inputs, dict):
|
267
|
-
inputs = [inputs]
|
268
|
-
batch_input = False
|
269
|
-
|
270
|
-
proto_inputs = []
|
271
|
-
for input in inputs:
|
272
|
-
proto = resources_pb2.Input()
|
273
|
-
serialize(input, input_signature, proto.data)
|
274
|
-
proto_inputs.append(proto)
|
275
|
-
|
276
|
-
response_stream = self._generate_by_proto(proto_inputs, method_name)
|
277
|
-
#print(response)
|
278
|
-
|
279
|
-
for response in response_stream:
|
280
|
-
outputs = []
|
281
|
-
for output in response.outputs:
|
282
|
-
outputs.append(deserialize(output.data, output_signature, is_output=True))
|
283
|
-
if batch_input:
|
284
|
-
yield outputs
|
285
|
-
yield outputs[0]
|
286
|
-
|
287
|
-
def _generate_by_proto(
|
288
|
-
self,
|
289
|
-
inputs: List[resources_pb2.Input],
|
290
|
-
method_name: str = None,
|
291
|
-
inference_params: Dict = {},
|
292
|
-
output_config: Dict = {},
|
293
|
-
):
|
294
|
-
"""Generate the stream output on model based on the given inputs.
|
295
|
-
|
296
|
-
Args:
|
297
|
-
inputs (list[Input]): The inputs to generate, must be less than 128.
|
298
|
-
method_name (str): The remote method name to call.
|
299
|
-
inference_params (dict): The inference params to override.
|
300
|
-
output_config (dict): The output config to override.
|
301
|
-
"""
|
302
|
-
if not isinstance(inputs, list):
|
303
|
-
raise UserError('Invalid inputs, inputs must be a list of Input objects.')
|
304
|
-
if len(inputs) > MAX_MODEL_PREDICT_INPUTS:
|
305
|
-
raise UserError(f"Too many inputs. Max is {MAX_MODEL_PREDICT_INPUTS}."
|
306
|
-
) # TODO Use Chunker for inputs len > 128
|
307
|
-
|
308
|
-
request = service_pb2.PostModelOutputsRequest()
|
309
|
-
request.CopyFrom(self.request_template)
|
310
|
-
|
311
|
-
request.inputs.extend(inputs)
|
312
|
-
|
313
|
-
if method_name:
|
314
|
-
# TODO put in new proto field?
|
315
|
-
for inp in request.inputs:
|
316
|
-
inp.data.metadata['_method_name'] = method_name
|
317
|
-
if inference_params:
|
318
|
-
request.model.model_version.output_info.params.update(inference_params)
|
319
|
-
if output_config:
|
320
|
-
request.model.model_version.output_info.output_config.MergeFromDict(output_config)
|
321
|
-
|
322
|
-
start_time = time.time()
|
323
|
-
backoff_iterator = BackoffIterator(10)
|
324
|
-
started = False
|
325
|
-
while not started:
|
326
|
-
stream_response = self.STUB.GenerateModelOutputs(request)
|
327
|
-
try:
|
328
|
-
response = next(stream_response) # get the first response
|
329
|
-
except StopIteration:
|
330
|
-
raise Exception("Model Generate failed with no response")
|
331
|
-
if status_is_retryable(response.status.code) and \
|
332
|
-
time.time() - start_time < 60 * 10:
|
333
|
-
logger.info("Model is still deploying, please wait...")
|
334
|
-
time.sleep(next(backoff_iterator))
|
335
|
-
continue
|
336
|
-
if response.status.code != status_code_pb2.SUCCESS:
|
337
|
-
raise Exception(f"Model Generate failed with response {response.status!r}")
|
338
|
-
started = True
|
339
|
-
|
340
|
-
yield response # yield the first response
|
341
|
-
|
342
|
-
for response in stream_response:
|
343
|
-
if response.status.code != status_code_pb2.SUCCESS:
|
344
|
-
raise Exception(f"Model Generate failed with response {response.status!r}")
|
345
|
-
yield response
|
346
|
-
|
347
|
-
def _stream(
|
348
|
-
self,
|
349
|
-
inputs,
|
350
|
-
method_name: str = 'stream',
|
351
|
-
) -> Any:
|
352
|
-
input_signature = self._method_signatures[method_name].input_fields
|
353
|
-
output_signature = self._method_signatures[method_name].output_fields
|
354
|
-
|
355
|
-
if isinstance(inputs, list):
|
356
|
-
assert len(inputs) == 1, 'streaming methods do not support batched calls'
|
357
|
-
inputs = inputs[0]
|
358
|
-
assert isinstance(inputs, dict)
|
359
|
-
kwargs = inputs
|
360
|
-
|
361
|
-
# find the streaming vars in the input signature, and the streaming input python param
|
362
|
-
stream_sig = get_stream_from_signature(input_signature)
|
363
|
-
if stream_sig is None:
|
364
|
-
raise ValueError("Streaming method must have a Stream input")
|
365
|
-
stream_argname = stream_sig.name
|
366
|
-
|
367
|
-
# get the streaming input generator from the user-provided function arg values
|
368
|
-
user_inputs_generator = kwargs.pop(stream_argname)
|
369
|
-
|
370
|
-
def _input_proto_stream():
|
371
|
-
# first item contains all the inputs and the first stream item
|
372
|
-
proto = resources_pb2.Input()
|
373
|
-
try:
|
374
|
-
item = next(user_inputs_generator)
|
375
|
-
except StopIteration:
|
376
|
-
return # no items to stream
|
377
|
-
kwargs[stream_argname] = item
|
378
|
-
serialize(kwargs, input_signature, proto.data)
|
379
|
-
|
380
|
-
yield proto
|
381
|
-
|
382
|
-
# subsequent items are just the stream items
|
383
|
-
for item in user_inputs_generator:
|
384
|
-
proto = resources_pb2.Input()
|
385
|
-
serialize({stream_argname: item}, [stream_sig], proto.data)
|
386
|
-
yield proto
|
387
|
-
|
388
|
-
response_stream = self._stream_by_proto(_input_proto_stream(), method_name)
|
389
|
-
#print(response)
|
390
|
-
|
391
|
-
for response in response_stream:
|
392
|
-
assert len(response.outputs) == 1, 'streaming methods must have exactly one output'
|
393
|
-
yield deserialize(response.outputs[0].data, output_signature, is_output=True)
|
394
|
-
|
395
|
-
def _req_iterator(self,
|
396
|
-
input_iterator: Iterator[List[resources_pb2.Input]],
|
397
|
-
method_name: str = None,
|
398
|
-
inference_params: Dict = {},
|
399
|
-
output_config: Dict = {}):
|
400
|
-
request = service_pb2.PostModelOutputsRequest()
|
401
|
-
request.CopyFrom(self.request_template)
|
402
|
-
if inference_params:
|
403
|
-
request.model.model_version.output_info.params.update(inference_params)
|
404
|
-
if output_config:
|
405
|
-
request.model.model_version.output_info.output_config.MergeFromDict(output_config)
|
406
|
-
for inputs in input_iterator:
|
407
|
-
req = service_pb2.PostModelOutputsRequest()
|
408
|
-
req.CopyFrom(request)
|
409
|
-
if isinstance(inputs, list):
|
410
|
-
req.inputs.extend(inputs)
|
411
|
-
else:
|
412
|
-
req.inputs.append(inputs)
|
413
|
-
# TODO: put into new proto field?
|
414
|
-
for inp in req.inputs:
|
415
|
-
inp.data.metadata['_method_name'] = method_name
|
416
|
-
yield req
|
417
|
-
|
418
|
-
def _stream_by_proto(self,
|
419
|
-
inputs: Iterator[List[resources_pb2.Input]],
|
420
|
-
method_name: str = None,
|
421
|
-
inference_params: Dict = {},
|
422
|
-
output_config: Dict = {}):
|
423
|
-
"""Generate the stream output on model based on the given stream of inputs.
|
424
|
-
"""
|
425
|
-
# if not isinstance(inputs, Iterator[List[Input]]):
|
426
|
-
# raise UserError('Invalid inputs, inputs must be a iterator of list of Input objects.')
|
427
|
-
|
428
|
-
request = self._req_iterator(inputs, method_name, inference_params, output_config)
|
429
|
-
|
430
|
-
start_time = time.time()
|
431
|
-
backoff_iterator = BackoffIterator(10)
|
432
|
-
generation_started = False
|
433
|
-
while True:
|
434
|
-
if generation_started:
|
435
|
-
break
|
436
|
-
stream_response = self.STUB.StreamModelOutputs(request)
|
437
|
-
for response in stream_response:
|
438
|
-
if status_is_retryable(response.status.code) and \
|
439
|
-
time.time() - start_time < 60 * 10:
|
440
|
-
logger.info("Model is still deploying, please wait...")
|
441
|
-
time.sleep(next(backoff_iterator))
|
442
|
-
break
|
443
|
-
if response.status.code != status_code_pb2.SUCCESS:
|
444
|
-
raise Exception(f"Model Predict failed with response {response.status!r}")
|
445
|
-
else:
|
446
|
-
if not generation_started:
|
447
|
-
generation_started = True
|
448
|
-
yield response
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -1,31 +0,0 @@
|
|
1
|
-
ARG BASE_IMAGE=${BASE_IMAGE}
|
2
|
-
FROM ${BASE_IMAGE} as build
|
3
|
-
|
4
|
-
# Set the working directory to /app
|
5
|
-
WORKDIR /app
|
6
|
-
|
7
|
-
COPY requirements.txt .
|
8
|
-
# Install requirements and cleanup before leaving this line.
|
9
|
-
# Note(zeiler): this could be in a future template as {{model_python_deps}}
|
10
|
-
RUN pip install --no-cache-dir -r requirements.txt
|
11
|
-
|
12
|
-
# Install Clarifai SDK
|
13
|
-
RUN pip install --no-cache-dir clarifai
|
14
|
-
|
15
|
-
# These will be set by the templaing system.
|
16
|
-
ENV CLARIFAI_PAT=${CLARIFAI_PAT}
|
17
|
-
ENV CLARIFAI_USER_ID=${CLARIFAI_USER_ID}
|
18
|
-
ENV CLARIFAI_RUNNER_ID=${CLARIFAI_RUNNER_ID}
|
19
|
-
ENV CLARIFAI_NODEPOOL_ID=${CLARIFAI_NODEPOOL_ID}
|
20
|
-
ENV CLARIFAI_COMPUTE_CLUSTER_ID=${CLARIFAI_COMPUTE_CLUSTER_ID}
|
21
|
-
ENV CLARIFAI_API_BASE=${CLARIFAI_API_BASE}
|
22
|
-
|
23
|
-
# Copy the current folder into /app/model_dir that the SDK will expect.
|
24
|
-
COPY . /app/model_dir/${name}
|
25
|
-
|
26
|
-
# Add the model directory to the python path.
|
27
|
-
ENV PYTHONPATH "${PYTHONPATH}:/app/model_dir/${name}"
|
28
|
-
|
29
|
-
# Finally run the clarifai entrypoint to start the runner loop and local dev server.
|
30
|
-
# Note(zeiler): we may want to make this a clarifai CLI call.
|
31
|
-
CMD ["-m", "clarifai.runners.server", "--model_path", "/app/model_dir/${name}"]
|
@@ -1,42 +0,0 @@
|
|
1
|
-
ARG TARGET_PLATFORM=linux/amd64
|
2
|
-
FROM --platform=$TARGET_PLATFORM ${BASE_IMAGE} as build
|
3
|
-
|
4
|
-
ENV DEBIAN_FRONTEND=noninteractive
|
5
|
-
|
6
|
-
#############################
|
7
|
-
# User specific requirements
|
8
|
-
#############################
|
9
|
-
COPY requirements.txt .
|
10
|
-
|
11
|
-
# Install requirements and cleanup before leaving this line.
|
12
|
-
# Note(zeiler): this could be in a future template as {{model_python_deps}}
|
13
|
-
RUN pip install --no-cache-dir -r requirements.txt
|
14
|
-
|
15
|
-
# Install Clarifai SDK
|
16
|
-
RUN pip install --no-cache-dir clarifai
|
17
|
-
|
18
|
-
# These will be set by the templaing system.
|
19
|
-
ENV CLARIFAI_PAT=${CLARIFAI_PAT}
|
20
|
-
ENV CLARIFAI_USER_ID=${CLARIFAI_USER_ID}
|
21
|
-
ENV CLARIFAI_RUNNER_ID=${CLARIFAI_RUNNER_ID}
|
22
|
-
ENV CLARIFAI_NODEPOOL_ID=${CLARIFAI_NODEPOOL_ID}
|
23
|
-
ENV CLARIFAI_COMPUTE_CLUSTER_ID=${CLARIFAI_COMPUTE_CLUSTER_ID}
|
24
|
-
ENV CLARIFAI_API_BASE=${CLARIFAI_API_BASE}
|
25
|
-
|
26
|
-
# Set the NUMBA cache dir to /tmp
|
27
|
-
ENV NUMBA_CACHE_DIR=/tmp/numba_cache
|
28
|
-
ENV HOME=/tmp
|
29
|
-
|
30
|
-
# Set the working directory to /app
|
31
|
-
WORKDIR /app
|
32
|
-
|
33
|
-
# Copy the current folder into /app/model_dir that the SDK will expect.
|
34
|
-
# Note(zeiler): would be nice to exclude checkpoints in case they were pre-downloaded.
|
35
|
-
COPY . /app/model_dir/${name}
|
36
|
-
|
37
|
-
# Add the model directory to the python path.
|
38
|
-
ENV PYTHONPATH=${PYTHONPATH}:/app/model_dir/${name}
|
39
|
-
|
40
|
-
# Finally run the clarifai entrypoint to start the runner loop and local dev server.
|
41
|
-
# Note(zeiler): we may want to make this a clarifai CLI call.
|
42
|
-
CMD ["-m", "clarifai.runners.server", "--model_path", "/app/model_dir/${name}"]
|
@@ -1,71 +0,0 @@
|
|
1
|
-
FROM nvcr.io/nim/meta/llama-3.1-8b-instruct:1.1.2 as build
|
2
|
-
|
3
|
-
FROM gcr.io/distroless/python3-debian12:debug
|
4
|
-
|
5
|
-
|
6
|
-
COPY --from=build /bin/bash /bin/rbash
|
7
|
-
COPY --from=build /bin/sh /bin/sh
|
8
|
-
COPY --from=build /bin/rsh /bin/rsh
|
9
|
-
|
10
|
-
# we have to overwrite the python3 binary that the distroless image uses
|
11
|
-
COPY --from=build /opt/nim/llm/.venv/bin/python3.10 /usr/bin/python3
|
12
|
-
COPY --from=build /opt/nim/llm/.venv/bin/python3.10 /usr/local/bin/python3.10
|
13
|
-
|
14
|
-
# also copy in all the lib files for it.
|
15
|
-
COPY --from=build /lib /lib
|
16
|
-
COPY --from=build /lib64 /lib64
|
17
|
-
COPY --from=build /usr/lib/ /usr/lib/
|
18
|
-
COPY --from=build /usr/local/lib/ /usr/local/lib/
|
19
|
-
# ldconfig is needed to update the shared library cache so system libraries (like CUDA) can be found
|
20
|
-
COPY --from=build /usr/sbin/ldconfig /sbin/ldconfig
|
21
|
-
COPY --from=build /usr/sbin/ldconfig.real /sbin/ldconfig.real
|
22
|
-
COPY --from=build /etc/ld.so.conf /etc/ld.so.conf
|
23
|
-
COPY --from=build /etc/ld.so.cache /etc/ld.so.cache
|
24
|
-
COPY --from=build /etc/ld.so.conf.d/ /etc/ld.so.conf.d/
|
25
|
-
|
26
|
-
# COPY NIM files
|
27
|
-
COPY --from=build /opt /opt
|
28
|
-
COPY --from=build /etc/nim /etc/nim
|
29
|
-
|
30
|
-
# Set environment variables to use the nim libraries and python
|
31
|
-
ENV PYTHONPATH=${PYTHONPATH}:/opt/nim/llm/.venv/lib/python3.10/site-packages:/opt/nim/llm
|
32
|
-
ENV PATH="/opt/nim/llm/.venv/bin:/opt/hpcx/ucc/bin:/opt/hpcx/ucx/bin:/opt/hpcx/ompi/bin:$PATH"
|
33
|
-
|
34
|
-
ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib/ucc:/opt/hpcx/ucc/lib:/opt/hpcx/ucx/lib/ucx:/opt/hpcx/ucx/lib:/opt/hpcx/ompi/lib:/opt/hpcx/ompi/lib/openmpi:/opt/nim/llm/.venv/lib/python3.10/site-packages/tensorrt_llm/libs:/opt/nim/llm/.venv/lib/python3.10/site-packages/nvidia/cublas/lib:/opt/nim/llm/.venv/lib/python3.10/site-packages/tensorrt_libs:/opt/nim/llm/.venv/lib/python3.10/site-packages/nvidia/nccl/lib:$LD_LIBRARY_PATH"
|
35
|
-
|
36
|
-
ENV LIBRARY_PATH=/opt/hpcx/ucc/lib:/opt/hpcx/ucx/lib:/opt/hpcx/ompi/lib:$LIBRARY_PATH
|
37
|
-
|
38
|
-
ENV CPATH=/opt/hpcx/ompi/include:/opt/hpcx/ucc/include:/opt/hpcx/ucx/include:$CPATH
|
39
|
-
ENV LLM_PROJECT_DIR=/opt/nim/llm
|
40
|
-
|
41
|
-
# Set environment variables for MPI
|
42
|
-
ENV OMPI_HOME=/opt/hpcx/ompi
|
43
|
-
ENV HPCX_MPI_DIR=/opt/hpcx/ompi
|
44
|
-
ENV MPIf_HOME=/opt/hpcx/ompi
|
45
|
-
ENV OPAL_PREFIX=/opt/hpcx/ompi
|
46
|
-
|
47
|
-
# Set environment variables for UCC
|
48
|
-
ENV UCC_DIR=/opt/hpcx/ucc/lib/cmake/ucc
|
49
|
-
ENV UCC_HOME=/opt/hpcx/ucc
|
50
|
-
ENV HPCX_UCC_DIR=/opt/hpcx/ucc
|
51
|
-
ENV USE_UCC=1
|
52
|
-
ENV USE_SYSTEM_UCC=1
|
53
|
-
|
54
|
-
# Set environment variables for HPC-X
|
55
|
-
ENV HPCX_DIR=/opt/hpcx
|
56
|
-
ENV HPCX_UCX_DIR=/opt/hpcx/ucx
|
57
|
-
ENV HPCX_MPI_DIR=/opt/hpcx/ompi
|
58
|
-
|
59
|
-
# Set environment variables for UCX
|
60
|
-
ENV UCX_DIR=/opt/hpcx/ucx/lib/cmake/ucx
|
61
|
-
ENV UCX_HOME=/opt/hpcx/ucx
|
62
|
-
|
63
|
-
ENV HOME=/opt/nim/llm
|
64
|
-
|
65
|
-
# ln is needed to create symbolic links (needed by nvidia-container-runtime)
|
66
|
-
COPY --from=build /usr/bin/ln /usr/bin/ln
|
67
|
-
|
68
|
-
# Run ldconfig in the build stage to update the library cache else CUDA libraries won't be found
|
69
|
-
RUN ldconfig -v
|
70
|
-
|
71
|
-
SHELL ["/bin/rbash", "-c"]
|
Binary file
|
Binary file
|
Binary file
|