clarifai 10.11.0__py3-none-any.whl → 11.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/__init__.py +1 -1
- clarifai/cli/model.py +206 -10
- clarifai/datasets/upload/features.py +1 -1
- clarifai/datasets/upload/multimodal.py +2 -1
- clarifai/datasets/upload/text.py +3 -2
- clarifai/rag/rag.py +6 -2
- clarifai/runners/dockerfile_template/Dockerfile.template +2 -0
- clarifai/runners/models/model_run_locally.py +358 -33
- clarifai/runners/models/model_upload.py +41 -55
- clarifai/runners/server.py +2 -0
- clarifai/runners/utils/const.py +40 -0
- {clarifai-10.11.0.dist-info → clarifai-11.0.0.dist-info}/METADATA +74 -4
- {clarifai-10.11.0.dist-info → clarifai-11.0.0.dist-info}/RECORD +17 -16
- {clarifai-10.11.0.dist-info → clarifai-11.0.0.dist-info}/WHEEL +1 -1
- {clarifai-10.11.0.dist-info → clarifai-11.0.0.dist-info}/LICENSE +0 -0
- {clarifai-10.11.0.dist-info → clarifai-11.0.0.dist-info}/entry_points.txt +0 -0
- {clarifai-10.11.0.dist-info → clarifai-11.0.0.dist-info}/top_level.txt +0 -0
clarifai/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "
|
1
|
+
__version__ = "11.0.0"
|
clarifai/cli/model.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
import click
|
2
|
+
|
2
3
|
from clarifai.cli.base import cli
|
3
4
|
|
4
5
|
|
5
6
|
@cli.group(['model'])
|
6
7
|
def model():
|
7
|
-
"""Manage models: upload, test locally"""
|
8
|
+
"""Manage models: upload, test locally, run_locally, predict"""
|
8
9
|
pass
|
9
10
|
|
10
11
|
|
@@ -39,17 +40,43 @@ def upload(model_path, download_checkpoints, skip_dockerfile):
|
|
39
40
|
type=click.Path(exists=True),
|
40
41
|
required=True,
|
41
42
|
help='Path to the model directory.')
|
43
|
+
@click.option(
|
44
|
+
'--mode',
|
45
|
+
type=click.Choice(['env', 'container'], case_sensitive=False),
|
46
|
+
default='env',
|
47
|
+
show_default=True,
|
48
|
+
help=
|
49
|
+
'Specify how to test the model locally: "env" for virtual environment or "container" for Docker container. Defaults to "env".'
|
50
|
+
)
|
42
51
|
@click.option(
|
43
52
|
'--keep_env',
|
44
53
|
is_flag=True,
|
45
|
-
help=
|
46
|
-
|
47
|
-
|
54
|
+
help=
|
55
|
+
'Keep the virtual environment after testing the model locally (applicable for virtualenv mode). Defaults to False.'
|
56
|
+
)
|
57
|
+
@click.option(
|
58
|
+
'--keep_image',
|
59
|
+
is_flag=True,
|
60
|
+
help=
|
61
|
+
'Keep the Docker image after testing the model locally (applicable for container mode). Defaults to False.'
|
62
|
+
)
|
63
|
+
def test_locally(model_path, keep_env=False, keep_image=False, mode='env'):
|
48
64
|
"""Test model locally."""
|
49
65
|
try:
|
50
66
|
from clarifai.runners.models import model_run_locally
|
51
|
-
|
52
|
-
|
67
|
+
if mode == 'env' and keep_image:
|
68
|
+
raise ValueError("'keep_image' is applicable only for 'container' mode")
|
69
|
+
if mode == 'container' and keep_env:
|
70
|
+
raise ValueError("'keep_env' is applicable only for 'env' mode")
|
71
|
+
|
72
|
+
if mode == "env":
|
73
|
+
click.echo("Testing model locally in a virtual environment...")
|
74
|
+
model_run_locally.main(model_path, run_model_server=False, keep_env=keep_env)
|
75
|
+
elif mode == "container":
|
76
|
+
click.echo("Testing model locally inside a container...")
|
77
|
+
model_run_locally.main(
|
78
|
+
model_path, inside_container=True, run_model_server=False, keep_image=keep_image)
|
79
|
+
click.echo("Model tested successfully.")
|
53
80
|
except Exception as e:
|
54
81
|
click.echo(f"Failed to test model locally: {e}", err=True)
|
55
82
|
|
@@ -60,11 +87,180 @@ def test_locally(model_path, keep_env=False):
|
|
60
87
|
type=click.Path(exists=True),
|
61
88
|
required=True,
|
62
89
|
help='Path to the model directory.')
|
63
|
-
|
64
|
-
|
90
|
+
@click.option(
|
91
|
+
'--port',
|
92
|
+
'-p',
|
93
|
+
type=int,
|
94
|
+
default=8000,
|
95
|
+
show_default=True,
|
96
|
+
help="The port to host the gRPC server for running the model locally. Defaults to 8000.")
|
97
|
+
@click.option(
|
98
|
+
'--mode',
|
99
|
+
type=click.Choice(['env', 'container'], case_sensitive=False),
|
100
|
+
default='env',
|
101
|
+
show_default=True,
|
102
|
+
help=
|
103
|
+
'Specifies how to run the model: "env" for virtual environment or "container" for Docker container. Defaults to "env".'
|
104
|
+
)
|
105
|
+
@click.option(
|
106
|
+
'--keep_env',
|
107
|
+
is_flag=True,
|
108
|
+
help=
|
109
|
+
'Keep the virtual environment after testing the model locally (applicable for virtualenv mode). Defaults to False.'
|
110
|
+
)
|
111
|
+
@click.option(
|
112
|
+
'--keep_image',
|
113
|
+
is_flag=True,
|
114
|
+
help=
|
115
|
+
'Keep the Docker image after testing the model locally (applicable for container mode). Defaults to False.'
|
116
|
+
)
|
117
|
+
def run_locally(model_path, port, mode, keep_env, keep_image):
|
118
|
+
"""Run the model locally and start a gRPC server to serve the model."""
|
65
119
|
try:
|
66
120
|
from clarifai.runners.models import model_run_locally
|
67
|
-
|
68
|
-
|
121
|
+
if mode == 'env' and keep_image:
|
122
|
+
raise ValueError("'keep_image' is applicable only for 'container' mode")
|
123
|
+
if mode == 'container' and keep_env:
|
124
|
+
raise ValueError("'keep_env' is applicable only for 'env' mode")
|
125
|
+
|
126
|
+
if mode == "env":
|
127
|
+
click.echo("Running model locally in a virtual environment...")
|
128
|
+
model_run_locally.main(model_path, run_model_server=True, keep_env=keep_env, port=port)
|
129
|
+
elif mode == "container":
|
130
|
+
click.echo("Running model locally inside a container...")
|
131
|
+
model_run_locally.main(
|
132
|
+
model_path,
|
133
|
+
inside_container=True,
|
134
|
+
run_model_server=True,
|
135
|
+
port=port,
|
136
|
+
keep_image=keep_image)
|
137
|
+
click.echo(f"Model server started locally from {model_path} in {mode} mode.")
|
69
138
|
except Exception as e:
|
70
139
|
click.echo(f"Failed to starts model server locally: {e}", err=True)
|
140
|
+
|
141
|
+
|
142
|
+
@model.command()
|
143
|
+
@click.option(
|
144
|
+
'--config',
|
145
|
+
type=click.Path(exists=True),
|
146
|
+
required=False,
|
147
|
+
help='Path to the model predict config file.')
|
148
|
+
@click.option('--model_id', required=False, help='Model ID of the model used to predict.')
|
149
|
+
@click.option('--user_id', required=False, help='User ID of the model used to predict.')
|
150
|
+
@click.option('--app_id', required=False, help='App ID of the model used to predict.')
|
151
|
+
@click.option('--model_url', required=False, help='Model URL of the model used to predict.')
|
152
|
+
@click.option('--file_path', required=False, help='File path of file for the model to predict')
|
153
|
+
@click.option('--url', required=False, help='URL to the file for the model to predict')
|
154
|
+
@click.option('--bytes', required=False, help='Bytes to the file for the model to predict')
|
155
|
+
@click.option(
|
156
|
+
'--input_id', required=False, help='Existing input id in the app for the model to predict')
|
157
|
+
@click.option('--input_type', required=False, help='Type of input')
|
158
|
+
@click.option(
|
159
|
+
'-cc_id',
|
160
|
+
'--compute_cluster_id',
|
161
|
+
required=False,
|
162
|
+
help='Compute Cluster ID to use for the model')
|
163
|
+
@click.option('-np_id', '--nodepool_id', required=False, help='Nodepool ID to use for the model')
|
164
|
+
@click.option(
|
165
|
+
'-dpl_id', '--deployment_id', required=False, help='Deployment ID to use for the model')
|
166
|
+
@click.option(
|
167
|
+
'--inference_params', required=False, default='{}', help='Inference parameters to override')
|
168
|
+
@click.option('--output_config', required=False, default='{}', help='Output config to override')
|
169
|
+
@click.pass_context
|
170
|
+
def predict(ctx, config, model_id, user_id, app_id, model_url, file_path, url, bytes, input_id,
|
171
|
+
input_type, compute_cluster_id, nodepool_id, deployment_id, inference_params,
|
172
|
+
output_config):
|
173
|
+
"""Predict using the given model"""
|
174
|
+
import json
|
175
|
+
|
176
|
+
from clarifai.client.deployment import Deployment
|
177
|
+
from clarifai.client.input import Input
|
178
|
+
from clarifai.client.model import Model
|
179
|
+
from clarifai.client.nodepool import Nodepool
|
180
|
+
from clarifai.utils.cli import from_yaml
|
181
|
+
if config:
|
182
|
+
config = from_yaml(config)
|
183
|
+
model_id, user_id, app_id, model_url, file_path, url, bytes, input_id, input_type, compute_cluster_id, nodepool_id, deployment_id, inference_params, output_config = (
|
184
|
+
config.get(k, v)
|
185
|
+
for k, v in [('model_id', model_id), ('user_id', user_id), ('app_id', app_id), (
|
186
|
+
'model_url', model_url), ('file_path', file_path), ('url', url), ('bytes', bytes), (
|
187
|
+
'input_id',
|
188
|
+
input_id), ('input_type',
|
189
|
+
input_type), ('compute_cluster_id',
|
190
|
+
compute_cluster_id), ('nodepool_id', nodepool_id), (
|
191
|
+
'deployment_id',
|
192
|
+
deployment_id), ('inference_params',
|
193
|
+
inference_params), ('output_config',
|
194
|
+
output_config)])
|
195
|
+
if sum([opt[1] for opt in [(model_id, 1), (user_id, 1), (app_id, 1), (model_url, 3)]
|
196
|
+
if opt[0]]) != 3:
|
197
|
+
raise ValueError("Either --model_id & --user_id & --app_id or --model_url must be provided.")
|
198
|
+
if sum([1 for opt in [file_path, url, bytes, input_id] if opt]) != 1:
|
199
|
+
raise ValueError("Exactly one of --file_path, --url, --bytes or --input_id must be provided.")
|
200
|
+
if compute_cluster_id or nodepool_id or deployment_id:
|
201
|
+
if sum([
|
202
|
+
opt[1] for opt in [(compute_cluster_id, 0.5), (nodepool_id, 0.5), (deployment_id, 1)]
|
203
|
+
if opt[0]
|
204
|
+
]) != 1:
|
205
|
+
raise ValueError(
|
206
|
+
"Either --compute_cluster_id & --nodepool_id or --deployment_id must be provided.")
|
207
|
+
if model_url:
|
208
|
+
model = Model(url=model_url, pat=ctx.obj['pat'], base_url=ctx.obj['base_url'])
|
209
|
+
else:
|
210
|
+
model = Model(
|
211
|
+
model_id=model_id,
|
212
|
+
user_id=user_id,
|
213
|
+
app_id=app_id,
|
214
|
+
pat=ctx.obj['pat'],
|
215
|
+
base_url=ctx.obj['base_url'])
|
216
|
+
|
217
|
+
if inference_params:
|
218
|
+
inference_params = json.loads(inference_params)
|
219
|
+
if output_config:
|
220
|
+
output_config = json.loads(output_config)
|
221
|
+
|
222
|
+
if file_path:
|
223
|
+
model_prediction = model.predict_by_filepath(
|
224
|
+
filepath=file_path,
|
225
|
+
input_type=input_type,
|
226
|
+
compute_cluster_id=compute_cluster_id,
|
227
|
+
nodepool_id=nodepool_id,
|
228
|
+
deployment_id=deployment_id,
|
229
|
+
inference_params=inference_params,
|
230
|
+
output_config=output_config)
|
231
|
+
elif url:
|
232
|
+
model_prediction = model.predict_by_url(
|
233
|
+
url=url,
|
234
|
+
input_type=input_type,
|
235
|
+
compute_cluster_id=compute_cluster_id,
|
236
|
+
nodepool_id=nodepool_id,
|
237
|
+
deployment_id=deployment_id,
|
238
|
+
inference_params=inference_params,
|
239
|
+
output_config=output_config)
|
240
|
+
elif bytes:
|
241
|
+
bytes = str.encode(bytes)
|
242
|
+
model_prediction = model.predict_by_bytes(
|
243
|
+
input_bytes=bytes,
|
244
|
+
input_type=input_type,
|
245
|
+
compute_cluster_id=compute_cluster_id,
|
246
|
+
nodepool_id=nodepool_id,
|
247
|
+
deployment_id=deployment_id,
|
248
|
+
inference_params=inference_params,
|
249
|
+
output_config=output_config)
|
250
|
+
elif input_id:
|
251
|
+
inputs = [Input.get_input(input_id)]
|
252
|
+
runner_selector = None
|
253
|
+
if deployment_id:
|
254
|
+
runner_selector = Deployment.get_runner_selector(
|
255
|
+
user_id=ctx.obj['user_id'], deployment_id=deployment_id)
|
256
|
+
elif compute_cluster_id and nodepool_id:
|
257
|
+
runner_selector = Nodepool.get_runner_selector(
|
258
|
+
user_id=ctx.obj['user_id'],
|
259
|
+
compute_cluster_id=compute_cluster_id,
|
260
|
+
nodepool_id=nodepool_id)
|
261
|
+
model_prediction = model.predict(
|
262
|
+
inputs=inputs,
|
263
|
+
runner_selector=runner_selector,
|
264
|
+
inference_params=inference_params,
|
265
|
+
output_config=output_config)
|
266
|
+
click.echo(model_prediction)
|
@@ -7,7 +7,7 @@ from typing import List, Optional, Union
|
|
7
7
|
class TextFeatures:
|
8
8
|
"""Text classification datasets preprocessing output features."""
|
9
9
|
text: str
|
10
|
-
labels: List[Union[str, int]] # List[str or int] to cater for multi-class tasks
|
10
|
+
labels: List[Union[str, int]] = None # List[str or int] to cater for multi-class tasks
|
11
11
|
id: Optional[int] = None # text_id
|
12
12
|
metadata: Optional[dict] = None
|
13
13
|
label_ids: Optional[List[str]] = None
|
@@ -34,7 +34,8 @@ class MultiModalDataset(ClarifaiDataset):
|
|
34
34
|
metadata = Struct()
|
35
35
|
image_bytes = data_item.image_bytes
|
36
36
|
text = data_item.text
|
37
|
-
labels = data_item.labels if
|
37
|
+
labels = data_item.labels if ((data_item.labels is None) or
|
38
|
+
isinstance(data_item.labels, list)) else [data_item.labels]
|
38
39
|
input_id = f"{self.dataset_id}-{id}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
|
39
40
|
if data_item.metadata is not None:
|
40
41
|
metadata.update(data_item.metadata)
|
clarifai/datasets/upload/text.py
CHANGED
@@ -34,8 +34,9 @@ class TextClassificationDataset(ClarifaiDataset):
|
|
34
34
|
data_item = self.data_generator[id]
|
35
35
|
metadata = Struct()
|
36
36
|
text = data_item.text
|
37
|
-
labels = data_item.labels if
|
38
|
-
|
37
|
+
labels = data_item.labels if ((data_item.labels is None) or
|
38
|
+
isinstance(data_item.labels, list)) else [data_item.labels
|
39
|
+
] # clarifai concept
|
39
40
|
label_ids = data_item.label_ids
|
40
41
|
input_id = f"{self.dataset_id}-{get_uuid(8)}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
|
41
42
|
if data_item.metadata is not None:
|
clarifai/rag/rag.py
CHANGED
@@ -45,10 +45,14 @@ class RAG:
|
|
45
45
|
self.logger.info("workflow_url:%s", workflow_url)
|
46
46
|
w = Workflow(workflow_url, base_url=base_url, pat=pat)
|
47
47
|
self._prompt_workflow = w
|
48
|
-
self._app = App(app_id=w.app_id, base_url=w.base, pat=w.pat)
|
48
|
+
self._app = App(app_id=w.app_id, user_id=w.user_id, base_url=w.base, pat=w.pat)
|
49
49
|
elif workflow_url is None and workflow is not None:
|
50
50
|
self._prompt_workflow = workflow
|
51
|
-
self._app = App(
|
51
|
+
self._app = App(
|
52
|
+
app_id=workflow.app_id,
|
53
|
+
user_id=workflow.user_id,
|
54
|
+
base_url=workflow.base,
|
55
|
+
pat=workflow.pat)
|
52
56
|
|
53
57
|
@classmethod
|
54
58
|
def setup(cls,
|
@@ -22,6 +22,8 @@ ENV CLARIFAI_API_BASE=${CLARIFAI_API_BASE}
|
|
22
22
|
|
23
23
|
# Set the NUMBA cache dir to /tmp
|
24
24
|
ENV NUMBA_CACHE_DIR=/tmp/numba_cache
|
25
|
+
# Set the TORCHINDUCTOR cache dir to /tmp
|
26
|
+
ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torchinductor_cache
|
25
27
|
ENV HOME=/tmp
|
26
28
|
|
27
29
|
# Set the working directory to /app
|
@@ -3,9 +3,11 @@ import importlib.util
|
|
3
3
|
import inspect
|
4
4
|
import os
|
5
5
|
import shutil
|
6
|
+
import signal
|
6
7
|
import subprocess
|
7
8
|
import sys
|
8
9
|
import tempfile
|
10
|
+
import time
|
9
11
|
import traceback
|
10
12
|
import venv
|
11
13
|
|
@@ -24,10 +26,18 @@ class ModelRunLocally:
|
|
24
26
|
self.model_path = model_path
|
25
27
|
self.requirements_file = os.path.join(self.model_path, "requirements.txt")
|
26
28
|
|
29
|
+
# ModelUploader contains multiple useful methods to interact with the model
|
30
|
+
self.uploader = ModelUploader(self.model_path)
|
31
|
+
self.config = self.uploader.config
|
32
|
+
|
33
|
+
def _requirements_hash(self):
|
34
|
+
"""Generate a hash of the requirements file."""
|
35
|
+
with open(self.requirements_file, "r") as f:
|
36
|
+
return hashlib.md5(f.read().encode('utf-8')).hexdigest()
|
37
|
+
|
27
38
|
def create_temp_venv(self):
|
28
39
|
"""Create a temporary virtual environment."""
|
29
|
-
|
30
|
-
requirements_hash = hashlib.md5(f.read().encode('utf-8')).hexdigest()
|
40
|
+
requirements_hash = self._requirements_hash()
|
31
41
|
|
32
42
|
temp_dir = os.path.join(tempfile.gettempdir(), str(requirements_hash))
|
33
43
|
venv_dir = os.path.join(temp_dir, "venv")
|
@@ -105,33 +115,91 @@ class ModelRunLocally:
|
|
105
115
|
text=resources_pb2.Text(raw="How many people live in new york?"),
|
106
116
|
image=resources_pb2.Image(url="https://samples.clarifai.com/metro-north.jpg"),
|
107
117
|
audio=resources_pb2.Audio(url="https://samples.clarifai.com/GoodMorning.wav"),
|
118
|
+
video=resources_pb2.Video(url="https://samples.clarifai.com/beer.mp4"),
|
108
119
|
))
|
109
120
|
],
|
110
121
|
)
|
111
122
|
|
123
|
+
def _build_stream_request(self):
|
124
|
+
request = self._build_request()
|
125
|
+
for i in range(1):
|
126
|
+
yield request
|
127
|
+
|
112
128
|
def _run_model_inference(self, runner):
|
113
129
|
"""Perform inference using the runner."""
|
114
130
|
request = self._build_request()
|
131
|
+
stream_request = self._build_stream_request()
|
115
132
|
|
116
133
|
ensure_urls_downloaded(request)
|
117
|
-
|
134
|
+
predict_response = None
|
135
|
+
generate_response = None
|
136
|
+
stream_response = None
|
118
137
|
try:
|
119
|
-
|
138
|
+
predict_response = runner.predict(request)
|
139
|
+
except NotImplementedError:
|
140
|
+
logger.info("Model does not implement predict() method.")
|
120
141
|
except Exception as e:
|
121
142
|
logger.error(f"Model Prediction failed: {e}")
|
122
143
|
traceback.print_exc()
|
123
|
-
|
144
|
+
predict_response = service_pb2.MultiOutputResponse(status=status_pb2.Status(
|
124
145
|
code=status_code_pb2.MODEL_PREDICTION_FAILED,
|
125
146
|
description="Prediction failed",
|
126
147
|
details="",
|
127
148
|
internal_details=str(e),
|
128
149
|
))
|
129
150
|
|
151
|
+
if predict_response:
|
152
|
+
if predict_response.outputs[0].status.code != status_code_pb2.SUCCESS:
|
153
|
+
logger.error(f"Moddel Prediction failed: {predict_response}")
|
154
|
+
else:
|
155
|
+
logger.info(f"Model Prediction succeeded: {predict_response}")
|
156
|
+
|
157
|
+
try:
|
158
|
+
generate_response = runner.generate(request)
|
159
|
+
except NotImplementedError:
|
160
|
+
logger.info("Model does not implement generate() method.")
|
161
|
+
except Exception as e:
|
162
|
+
logger.error(f"Model Generation failed: {e}")
|
163
|
+
traceback.print_exc()
|
164
|
+
generate_response = service_pb2.MultiOutputResponse(status=status_pb2.Status(
|
165
|
+
code=status_code_pb2.MODEL_GENERATION_FAILED,
|
166
|
+
description="Generation failed",
|
167
|
+
details="",
|
168
|
+
internal_details=str(e),
|
169
|
+
))
|
170
|
+
|
171
|
+
if generate_response:
|
172
|
+
generate_first_res = next(generate_response)
|
173
|
+
if generate_first_res.outputs[0].status.code != status_code_pb2.SUCCESS:
|
174
|
+
logger.error(f"Moddel Prediction failed: {generate_first_res}")
|
175
|
+
else:
|
176
|
+
logger.info(
|
177
|
+
f"Model Prediction succeeded for generate and first response: {generate_first_res}")
|
178
|
+
|
179
|
+
try:
|
180
|
+
stream_response = runner.stream(stream_request)
|
181
|
+
except NotImplementedError:
|
182
|
+
logger.info("Model does not implement stream() method.")
|
183
|
+
except Exception as e:
|
184
|
+
logger.error(f"Model Stream failed: {e}")
|
185
|
+
traceback.print_exc()
|
186
|
+
stream_response = service_pb2.MultiOutputResponse(status=status_pb2.Status(
|
187
|
+
code=status_code_pb2.MODEL_STREAM_FAILED,
|
188
|
+
description="Stream failed",
|
189
|
+
details="",
|
190
|
+
internal_details=str(e),
|
191
|
+
))
|
192
|
+
|
193
|
+
if stream_response:
|
194
|
+
stream_first_res = next(stream_response)
|
195
|
+
if stream_first_res.outputs[0].status.code != status_code_pb2.SUCCESS:
|
196
|
+
logger.error(f"Moddel Prediction failed: {stream_first_res}")
|
197
|
+
else:
|
198
|
+
logger.info(
|
199
|
+
f"Model Prediction succeeded for stream and first response: {stream_first_res}")
|
200
|
+
|
130
201
|
def _run_test(self):
|
131
202
|
"""Test the model locally by making a prediction."""
|
132
|
-
# validate that we have checkpoints downloaded before constructing MyRunner
|
133
|
-
uploader = ModelUploader(self.model_path)
|
134
|
-
uploader.download_checkpoints()
|
135
203
|
# construct MyRunner which will call load_model()
|
136
204
|
MyRunner = self._get_model_runner()
|
137
205
|
runner = MyRunner(
|
@@ -140,13 +208,8 @@ class ModelRunLocally:
|
|
140
208
|
compute_cluster_id="n/a",
|
141
209
|
user_id="n/a",
|
142
210
|
)
|
143
|
-
|
144
211
|
# send an inference.
|
145
|
-
|
146
|
-
if response.outputs[0].status.code != status_code_pb2.SUCCESS:
|
147
|
-
logger.error(f"Moddel Prediction failed: {response}")
|
148
|
-
else:
|
149
|
-
logger.info(f"Model Prediction succeeded: {response}")
|
212
|
+
self._run_model_inference(runner)
|
150
213
|
|
151
214
|
def test_model(self):
|
152
215
|
"""Test the model by running it locally in the virtual environment."""
|
@@ -156,31 +219,258 @@ class ModelRunLocally:
|
|
156
219
|
f"import sys; sys.path.append('{os.path.dirname(os.path.abspath(__file__))}'); "
|
157
220
|
f"from model_run_locally import ModelRunLocally; ModelRunLocally('{self.model_path}')._run_test()",
|
158
221
|
]
|
222
|
+
process = None
|
159
223
|
try:
|
160
224
|
logger.info("Testing the model locally...")
|
161
|
-
subprocess.
|
162
|
-
|
225
|
+
process = subprocess.Popen(command)
|
226
|
+
# Wait for the process to complete
|
227
|
+
process.wait()
|
228
|
+
if process.returncode == 0:
|
229
|
+
logger.info("Model tested successfully!")
|
230
|
+
if process.returncode != 0:
|
231
|
+
raise subprocess.CalledProcessError(process.returncode, command)
|
163
232
|
except subprocess.CalledProcessError as e:
|
164
233
|
logger.error(f"Error testing the model: {e}")
|
165
234
|
sys.exit(1)
|
235
|
+
except Exception as e:
|
236
|
+
logger.error(f"Unexpected error: {e}")
|
237
|
+
sys.exit(1)
|
238
|
+
finally:
|
239
|
+
# After the function runs, check if the process is still running
|
240
|
+
if process and process.poll() is None:
|
241
|
+
logger.info("Process is still running. Terminating process.")
|
242
|
+
process.terminate()
|
243
|
+
try:
|
244
|
+
process.wait(timeout=5)
|
245
|
+
except subprocess.TimeoutExpired:
|
246
|
+
logger.info("Process did not terminate gracefully. Killing process.")
|
247
|
+
# Kill the process if it doesn't terminate after 5 seconds
|
248
|
+
process.kill()
|
166
249
|
|
167
250
|
# run the model server
|
168
|
-
def run_model_server(self):
|
251
|
+
def run_model_server(self, port=8080):
|
169
252
|
"""Run the Clarifai Runners's model server."""
|
170
253
|
|
171
254
|
command = [
|
172
255
|
self.python_executable, "-m", "clarifai.runners.server", "--model_path", self.model_path,
|
173
|
-
"--start_dev_server"
|
256
|
+
"--start_dev_server", "--port",
|
257
|
+
str(port)
|
174
258
|
]
|
175
259
|
try:
|
176
|
-
logger.info(
|
260
|
+
logger.info(
|
261
|
+
f"Starting model server at localhost:{port} with the model at {self.model_path}...")
|
177
262
|
subprocess.check_call(command)
|
178
|
-
logger.info("Model server started successfully
|
263
|
+
logger.info("Model server started successfully and running at localhost:{port}")
|
179
264
|
except subprocess.CalledProcessError as e:
|
180
265
|
logger.error(f"Error running model server: {e}")
|
181
266
|
self.clean_up()
|
182
267
|
sys.exit(1)
|
183
268
|
|
269
|
+
def _docker_hash(self):
|
270
|
+
"""Generate a hash of the combined requirements file and Dockefile"""
|
271
|
+
with open(self.requirements_file, "r") as f:
|
272
|
+
requirements_hash = hashlib.md5(f.read().encode('utf-8')).hexdigest()
|
273
|
+
with open(os.path.join(self.model_path, "Dockerfile"), "r") as f:
|
274
|
+
dockerfile_hash = hashlib.md5(f.read().encode('utf-8')).hexdigest()
|
275
|
+
|
276
|
+
return hashlib.md5(f"{requirements_hash}{dockerfile_hash}".encode('utf-8')).hexdigest()
|
277
|
+
|
278
|
+
def is_docker_installed(self):
|
279
|
+
"""Checks if Docker is installed on the system."""
|
280
|
+
try:
|
281
|
+
logger.info("Checking if Docker is installed...")
|
282
|
+
subprocess.run(["docker", "--version"], check=True)
|
283
|
+
return True
|
284
|
+
except subprocess.CalledProcessError:
|
285
|
+
logger.error(
|
286
|
+
"Docker is not installed! Please install Docker to run the model in a container.")
|
287
|
+
return False
|
288
|
+
|
289
|
+
def build_docker_image(
|
290
|
+
self,
|
291
|
+
image_name="model_image",
|
292
|
+
):
|
293
|
+
"""Build the docker image using the Dockerfile in the model directory."""
|
294
|
+
try:
|
295
|
+
logger.info(f"Building docker image from Dockerfile in {self.model_path}...")
|
296
|
+
|
297
|
+
# since we don't want to copy the model directory into the container, we need to modify the Dockerfile and comment out the COPY instruction
|
298
|
+
dockerfile_path = os.path.join(self.model_path, "Dockerfile")
|
299
|
+
# Read the Dockerfile
|
300
|
+
with open(dockerfile_path, 'r') as file:
|
301
|
+
lines = file.readlines()
|
302
|
+
|
303
|
+
# Comment out the COPY instruction that copies the current folder
|
304
|
+
modified_lines = []
|
305
|
+
for line in lines:
|
306
|
+
if 'COPY .' in line and '/app/model_dir/main' in line:
|
307
|
+
modified_lines.append(f'# {line}')
|
308
|
+
else:
|
309
|
+
modified_lines.append(line)
|
310
|
+
|
311
|
+
# Create a temporary directory to store the modified Dockerfile
|
312
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
313
|
+
temp_dockerfile_path = os.path.join(temp_dir, "Dockerfile.temp")
|
314
|
+
|
315
|
+
# Write the modified Dockerfile to the temporary file
|
316
|
+
with open(temp_dockerfile_path, 'w') as file:
|
317
|
+
file.writelines(modified_lines)
|
318
|
+
|
319
|
+
# Build the Docker image using the temporary Dockerfile
|
320
|
+
subprocess.check_call(
|
321
|
+
['docker', 'build', '-t', image_name, '-f', temp_dockerfile_path, self.model_path])
|
322
|
+
logger.info(f"Docker image '{image_name}' built successfully!")
|
323
|
+
except subprocess.CalledProcessError as e:
|
324
|
+
logger.info(f"Error occurred while building the Docker image: {e}")
|
325
|
+
sys.exit(1)
|
326
|
+
|
327
|
+
def docker_image_exists(self, image_name):
|
328
|
+
"""Check if the Docker image exists."""
|
329
|
+
try:
|
330
|
+
logger.info(f"Checking if Docker image '{image_name}' exists...")
|
331
|
+
subprocess.run(["docker", "inspect", image_name], check=True)
|
332
|
+
logger.info(f"Docker image '{image_name}' exists!")
|
333
|
+
return True
|
334
|
+
except subprocess.CalledProcessError:
|
335
|
+
logger.info(f"Docker image '{image_name}' does not exist!")
|
336
|
+
return False
|
337
|
+
|
338
|
+
def run_docker_container(self,
|
339
|
+
image_name,
|
340
|
+
container_name="clarifai-model-container",
|
341
|
+
port=8080,
|
342
|
+
env_vars=None):
|
343
|
+
"""Runs a Docker container from the specified image."""
|
344
|
+
try:
|
345
|
+
logger.info(f"Running Docker container '{container_name}' from image '{image_name}'...")
|
346
|
+
# Base docker run command
|
347
|
+
cmd = [
|
348
|
+
"docker", "run", "--name", container_name, '--rm', "--gpus", "all", "--network", "host"
|
349
|
+
]
|
350
|
+
# Add volume mappings
|
351
|
+
cmd.extend(["-v", f"{self.model_path}:/app/model_dir/main"])
|
352
|
+
# Add environment variables
|
353
|
+
if env_vars:
|
354
|
+
for key, value in env_vars.items():
|
355
|
+
cmd.extend(["-e", f"{key}={value}"])
|
356
|
+
# Add the image name
|
357
|
+
cmd.append(image_name)
|
358
|
+
# update the CMD to run the server
|
359
|
+
cmd.extend(
|
360
|
+
["--model_path", "/app/model_dir/main", "--start_dev_server", "--port",
|
361
|
+
str(port)])
|
362
|
+
# Run the container
|
363
|
+
process = subprocess.Popen(cmd,)
|
364
|
+
logger.info(
|
365
|
+
f"Docker container '{container_name}' is running successfully! access the model at http://localhost:{port}"
|
366
|
+
)
|
367
|
+
|
368
|
+
# Function to handle Ctrl+C (SIGINT) gracefully
|
369
|
+
def signal_handler(sig, frame):
|
370
|
+
logger.info(f"Stopping Docker container '{container_name}'...")
|
371
|
+
subprocess.run(["docker", "stop", container_name], check=True)
|
372
|
+
process.terminate()
|
373
|
+
logger.info(f"Docker container '{container_name}' stopped successfully!")
|
374
|
+
time.sleep(1)
|
375
|
+
sys.exit(0)
|
376
|
+
|
377
|
+
# Register the signal handler for SIGINT (Ctrl+C)
|
378
|
+
signal.signal(signal.SIGINT, signal_handler)
|
379
|
+
# Wait for the process to finish (keeps the container running until it's stopped)
|
380
|
+
process.wait()
|
381
|
+
except subprocess.CalledProcessError as e:
|
382
|
+
logger.info(f"Error occurred while running the Docker container: {e}")
|
383
|
+
sys.exit(1)
|
384
|
+
except Exception as e:
|
385
|
+
logger.info(f"Error occurred while running the Docker container: {e}")
|
386
|
+
sys.exit(1)
|
387
|
+
|
388
|
+
def test_model_container(self,
|
389
|
+
image_name,
|
390
|
+
container_name="clarifai-model-container",
|
391
|
+
env_vars=None):
|
392
|
+
"""Test the model inside the Docker container."""
|
393
|
+
try:
|
394
|
+
logger.info("Testing the model inside the Docker container...")
|
395
|
+
# Base docker run command
|
396
|
+
cmd = [
|
397
|
+
"docker", "run", "--name", container_name, '--rm', "--gpus", "all", "--network", "host"
|
398
|
+
]
|
399
|
+
# update the entrypoint for testing the model
|
400
|
+
cmd.extend(["--entrypoint", "python"])
|
401
|
+
# Add volume mappings
|
402
|
+
cmd.extend(["-v", f"{self.model_path}:/app/model_dir/main"])
|
403
|
+
# Add environment variables
|
404
|
+
if env_vars:
|
405
|
+
for key, value in env_vars.items():
|
406
|
+
cmd.extend(["-e", f"{key}={value}"])
|
407
|
+
# Add the image name
|
408
|
+
cmd.append(image_name)
|
409
|
+
# update the CMD to test the model inside the container
|
410
|
+
cmd.extend([
|
411
|
+
"-c",
|
412
|
+
"from clarifai.runners.models.model_run_locally import ModelRunLocally; ModelRunLocally('/app/model_dir/main')._run_test()"
|
413
|
+
])
|
414
|
+
# Run the container
|
415
|
+
subprocess.check_call(cmd)
|
416
|
+
logger.info("Model tested successfully!")
|
417
|
+
except subprocess.CalledProcessError as e:
|
418
|
+
logger.error(f"Error testing the model inside the Docker container: {e}")
|
419
|
+
sys.exit(1)
|
420
|
+
|
421
|
+
def container_exists(self, container_name="clarifai-model-container"):
|
422
|
+
"""Check if the Docker container exists."""
|
423
|
+
try:
|
424
|
+
# Run docker ps -a to list all containers (running and stopped)
|
425
|
+
result = subprocess.run(
|
426
|
+
["docker", "ps", "-a", "--filter", f"name={container_name}", "--format", "{{.Names}}"],
|
427
|
+
check=True,
|
428
|
+
capture_output=True,
|
429
|
+
text=True)
|
430
|
+
# If the container name is returned, it exists
|
431
|
+
if result.stdout.strip() == container_name:
|
432
|
+
logger.info(f"Docker container '{container_name}' exists.")
|
433
|
+
return True
|
434
|
+
else:
|
435
|
+
return False
|
436
|
+
except subprocess.CalledProcessError as e:
|
437
|
+
logger.error(f"Error occurred while checking if container exists: {e}")
|
438
|
+
return False
|
439
|
+
|
440
|
+
def stop_docker_container(self, container_name="clarifai-model-container"):
|
441
|
+
"""Stop the Docker container if it's running."""
|
442
|
+
try:
|
443
|
+
# Check if the container is running
|
444
|
+
result = subprocess.run(
|
445
|
+
["docker", "ps", "--filter", f"name={container_name}", "--format", "{{.Names}}"],
|
446
|
+
check=True,
|
447
|
+
capture_output=True,
|
448
|
+
text=True)
|
449
|
+
if result.stdout.strip() == container_name:
|
450
|
+
logger.info(f"Docker container '{container_name}' is running. Stopping it...")
|
451
|
+
subprocess.run(["docker", "stop", container_name], check=True)
|
452
|
+
logger.info(f"Docker container '{container_name}' stopped successfully!")
|
453
|
+
except subprocess.CalledProcessError as e:
|
454
|
+
logger.error(f"Error occurred while stopping the Docker container: {e}")
|
455
|
+
|
456
|
+
def remove_docker_container(self, container_name="clarifai-model-container"):
|
457
|
+
"""Remove the Docker container."""
|
458
|
+
try:
|
459
|
+
logger.info(f"Removing Docker container '{container_name}'...")
|
460
|
+
subprocess.run(["docker", "rm", container_name], check=True)
|
461
|
+
logger.info(f"Docker container '{container_name}' removed successfully!")
|
462
|
+
except subprocess.CalledProcessError as e:
|
463
|
+
logger.error(f"Error occurred while removing the Docker container: {e}")
|
464
|
+
|
465
|
+
def remove_docker_image(self, image_name):
|
466
|
+
"""Remove the Docker image."""
|
467
|
+
try:
|
468
|
+
logger.info(f"Removing Docker image '{image_name}'...")
|
469
|
+
subprocess.run(["docker", "rmi", image_name], check=True)
|
470
|
+
logger.info(f"Docker image '{image_name}' removed successfully!")
|
471
|
+
except subprocess.CalledProcessError as e:
|
472
|
+
logger.error(f"Error occurred while removing the Docker image: {e}")
|
473
|
+
|
184
474
|
def clean_up(self):
|
185
475
|
"""Clean up the temporary virtual environment."""
|
186
476
|
if os.path.exists(self.temp_dir):
|
@@ -188,18 +478,53 @@ class ModelRunLocally:
|
|
188
478
|
shutil.rmtree(self.temp_dir)
|
189
479
|
|
190
480
|
|
191
|
-
def main(model_path,
|
481
|
+
def main(model_path,
|
482
|
+
run_model_server=False,
|
483
|
+
inside_container=False,
|
484
|
+
port=8080,
|
485
|
+
keep_env=False,
|
486
|
+
keep_image=False):
|
192
487
|
|
488
|
+
if not os.environ['CLARIFAI_PAT']:
|
489
|
+
logger.error(
|
490
|
+
"CLARIFAI_PAT environment variable is not set! Please set your PAT in the 'CLARIFAI_PAT' environment variable."
|
491
|
+
)
|
492
|
+
sys.exit(1)
|
193
493
|
manager = ModelRunLocally(model_path)
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
494
|
+
manager.uploader.download_checkpoints()
|
495
|
+
if inside_container:
|
496
|
+
if not manager.is_docker_installed():
|
497
|
+
sys.exit(1)
|
498
|
+
manager.uploader.create_dockerfile()
|
499
|
+
image_tag = manager._docker_hash()
|
500
|
+
image_name = f"{manager.config['model']['id']}:{image_tag}"
|
501
|
+
container_name = manager.config['model']['id']
|
502
|
+
if not manager.docker_image_exists(image_name):
|
503
|
+
manager.build_docker_image(image_name=image_name)
|
504
|
+
try:
|
505
|
+
envs = {'CLARIFAI_PAT': os.environ['CLARIFAI_PAT'], 'CLARIFAI_USER_ID': 'n/a'}
|
506
|
+
if run_model_server:
|
507
|
+
manager.run_docker_container(
|
508
|
+
image_name=image_name, container_name=container_name, port=port, env_vars=envs)
|
509
|
+
else:
|
510
|
+
manager.test_model_container(
|
511
|
+
image_name=image_name, container_name=container_name, env_vars=envs)
|
512
|
+
finally:
|
513
|
+
if manager.container_exists(container_name):
|
514
|
+
manager.stop_docker_container(container_name)
|
515
|
+
manager.remove_docker_container(container_name=container_name)
|
516
|
+
if not keep_image:
|
517
|
+
manager.remove_docker_image(image_name)
|
518
|
+
|
519
|
+
else:
|
520
|
+
try:
|
521
|
+
use_existing_env = manager.create_temp_venv()
|
522
|
+
if not use_existing_env:
|
523
|
+
manager.install_requirements()
|
524
|
+
if run_model_server:
|
525
|
+
manager.run_model_server(port)
|
526
|
+
else:
|
527
|
+
manager.test_model()
|
528
|
+
finally:
|
529
|
+
if not keep_env:
|
530
|
+
manager.clean_up()
|
@@ -10,6 +10,9 @@ from google.protobuf import json_format
|
|
10
10
|
from rich import print
|
11
11
|
|
12
12
|
from clarifai.client import BaseClient
|
13
|
+
from clarifai.runners.utils.const import (AVAILABLE_PYTHON_IMAGES, AVAILABLE_TORCH_IMAGES,
|
14
|
+
CONCEPTS_REQUIRED_MODEL_TYPE, DEFAULT_PYTHON_VERSION,
|
15
|
+
PYTHON_BASE_IMAGE, TORCH_BASE_IMAGE)
|
13
16
|
from clarifai.runners.utils.loader import HuggingFaceLoader
|
14
17
|
from clarifai.urls.helper import ClarifaiUrlHelper
|
15
18
|
from clarifai.utils.logging import logger
|
@@ -23,48 +26,6 @@ def _clear_line(n: int = 1) -> None:
|
|
23
26
|
|
24
27
|
|
25
28
|
class ModelUploader:
|
26
|
-
DEFAULT_PYTHON_VERSION = 3.11
|
27
|
-
DEFAULT_TORCH_VERSION = '2.4.0'
|
28
|
-
DEFAULT_CUDA_VERSION = '124'
|
29
|
-
# List of available torch images for matrix
|
30
|
-
'''
|
31
|
-
python_version: ['3.8', '3.9', '3.10', '3.11']
|
32
|
-
torch_version: ['2.0.0', '2.1.0', '2.2.0', '2.3.0', '2.4.0', '2.4.1', '2.5.0']
|
33
|
-
cuda_version: ['124']
|
34
|
-
'''
|
35
|
-
AVAILABLE_TORCH_IMAGES = [
|
36
|
-
'2.0.0-py3.8-cuda124',
|
37
|
-
'2.0.0-py3.9-cuda124',
|
38
|
-
'2.0.0-py3.10-cuda124',
|
39
|
-
'2.0.0-py3.11-cuda124',
|
40
|
-
'2.1.0-py3.8-cuda124',
|
41
|
-
'2.1.0-py3.9-cuda124',
|
42
|
-
'2.1.0-py3.10-cuda124',
|
43
|
-
'2.1.0-py3.11-cuda124',
|
44
|
-
'2.2.0-py3.8-cuda124',
|
45
|
-
'2.2.0-py3.9-cuda124',
|
46
|
-
'2.2.0-py3.10-cuda124',
|
47
|
-
'2.2.0-py3.11-cuda124',
|
48
|
-
'2.3.0-py3.8-cuda124',
|
49
|
-
'2.3.0-py3.9-cuda124',
|
50
|
-
'2.3.0-py3.10-cuda124',
|
51
|
-
'2.3.0-py3.11-cuda124',
|
52
|
-
'2.4.0-py3.8-cuda124',
|
53
|
-
'2.4.0-py3.9-cuda124',
|
54
|
-
'2.4.0-py3.10-cuda124',
|
55
|
-
'2.4.0-py3.11-cuda124',
|
56
|
-
'2.4.1-py3.8-cuda124',
|
57
|
-
'2.4.1-py3.9-cuda124',
|
58
|
-
'2.4.1-py3.10-cuda124',
|
59
|
-
'2.4.1-py3.11-cuda124',
|
60
|
-
]
|
61
|
-
AVAILABLE_PYTHON_IMAGES = ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
|
62
|
-
PYTHON_BASE_IMAGE = 'public.ecr.aws/clarifai-models/python-base:{python_version}'
|
63
|
-
TORCH_BASE_IMAGE = 'public.ecr.aws/clarifai-models/torch:{torch_version}-py{python_version}-cuda{cuda_version}'
|
64
|
-
|
65
|
-
CONCEPTS_REQUIRED_MODEL_TYPE = [
|
66
|
-
'visual-classifier', 'visual-detector', 'visual-segmenter', 'text-classifier'
|
67
|
-
]
|
68
29
|
|
69
30
|
def __init__(self, folder: str):
|
70
31
|
self._client = None
|
@@ -233,33 +194,35 @@ class ModelUploader:
|
|
233
194
|
build_info = self.config.get('build_info', {})
|
234
195
|
if 'python_version' in build_info:
|
235
196
|
python_version = build_info['python_version']
|
236
|
-
if python_version not in
|
197
|
+
if python_version not in AVAILABLE_PYTHON_IMAGES:
|
237
198
|
logger.error(
|
238
|
-
f"Python version {python_version} not supported, please use one of the following versions: {
|
199
|
+
f"Python version {python_version} not supported, please use one of the following versions: {AVAILABLE_PYTHON_IMAGES}"
|
239
200
|
)
|
240
201
|
return
|
241
202
|
logger.info(
|
242
203
|
f"Using Python version {python_version} from the config file to build the Dockerfile")
|
243
204
|
else:
|
244
205
|
logger.info(
|
245
|
-
f"Python version not found in the config file, using default Python version: {
|
206
|
+
f"Python version not found in the config file, using default Python version: {DEFAULT_PYTHON_VERSION}"
|
246
207
|
)
|
247
|
-
python_version =
|
208
|
+
python_version = DEFAULT_PYTHON_VERSION
|
248
209
|
|
249
|
-
base_image =
|
210
|
+
base_image = PYTHON_BASE_IMAGE.format(python_version=python_version)
|
250
211
|
|
251
212
|
# Parse the requirements.txt file to determine the base image
|
252
213
|
dependencies = self._parse_requirements()
|
253
214
|
if 'torch' in dependencies and dependencies['torch']:
|
254
215
|
torch_version = dependencies['torch']
|
255
216
|
|
256
|
-
for image in
|
217
|
+
for image in AVAILABLE_TORCH_IMAGES:
|
257
218
|
if torch_version in image and f'py{python_version}' in image:
|
258
|
-
|
219
|
+
cuda_version = image.split('-')[-1].replace('cuda', '')
|
220
|
+
base_image = TORCH_BASE_IMAGE.format(
|
259
221
|
torch_version=torch_version,
|
260
222
|
python_version=python_version,
|
261
|
-
cuda_version=
|
262
|
-
|
223
|
+
cuda_version=cuda_version,
|
224
|
+
)
|
225
|
+
logger.info(f"Using Torch version {torch_version} base image to build the Docker image")
|
263
226
|
break
|
264
227
|
|
265
228
|
# Replace placeholders with actual values
|
@@ -314,7 +277,7 @@ class ModelUploader:
|
|
314
277
|
config = yaml.safe_load(file)
|
315
278
|
model = config.get('model')
|
316
279
|
model_type_id = model.get('model_type_id')
|
317
|
-
assert model_type_id in
|
280
|
+
assert model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE, f"Model type {model_type_id} not supported for concepts"
|
318
281
|
concept_protos = self._concepts_protos_from_concepts(labels)
|
319
282
|
|
320
283
|
config['concepts'] = [{'id': concept.id, 'name': concept.name} for concept in concept_protos]
|
@@ -332,7 +295,7 @@ class ModelUploader:
|
|
332
295
|
)
|
333
296
|
|
334
297
|
model_type_id = self.config.get('model').get('model_type_id')
|
335
|
-
if model_type_id in
|
298
|
+
if model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE:
|
336
299
|
|
337
300
|
if 'concepts' in self.config:
|
338
301
|
labels = self.config.get('concepts')
|
@@ -347,7 +310,10 @@ class ModelUploader:
|
|
347
310
|
labels = sorted(labels.items(), key=lambda x: int(x[0]))
|
348
311
|
|
349
312
|
config_file = os.path.join(self.folder, 'config.yaml')
|
350
|
-
|
313
|
+
try:
|
314
|
+
self.hf_labels_to_config(labels, config_file)
|
315
|
+
except Exception as e:
|
316
|
+
logger.error(f"Failed to update the config.yaml file with the concepts: {e}")
|
351
317
|
|
352
318
|
model_version_proto.output_info.data.concepts.extend(
|
353
319
|
self._concepts_protos_from_concepts(labels))
|
@@ -359,7 +325,7 @@ class ModelUploader:
|
|
359
325
|
|
360
326
|
model_type_id = self.config.get('model').get('model_type_id')
|
361
327
|
|
362
|
-
if (model_type_id in
|
328
|
+
if (model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE) and 'concepts' not in self.config:
|
363
329
|
logger.info(
|
364
330
|
f"Model type {model_type_id} requires concepts to be specified in the config.yaml file.."
|
365
331
|
)
|
@@ -473,8 +439,21 @@ class ModelUploader:
|
|
473
439
|
is_v3=self.is_v3,
|
474
440
|
))
|
475
441
|
|
442
|
+
def get_model_build_logs(self):
|
443
|
+
logs_request = service_pb2.ListLogEntriesRequest(
|
444
|
+
log_type="builder",
|
445
|
+
user_app_id=self.client.user_app_id,
|
446
|
+
model_id=self.model_proto.id,
|
447
|
+
model_version_id=self.model_version_id,
|
448
|
+
page=1,
|
449
|
+
per_page=50)
|
450
|
+
response = self.client.STUB.ListLogEntries(logs_request)
|
451
|
+
|
452
|
+
return response
|
453
|
+
|
476
454
|
def monitor_model_build(self):
|
477
455
|
st = time.time()
|
456
|
+
seen_logs = set() # To avoid duplicate log messages
|
478
457
|
while True:
|
479
458
|
resp = self.client.STUB.GetModelVersion(
|
480
459
|
service_pb2.GetModelVersionRequest(
|
@@ -485,6 +464,13 @@ class ModelUploader:
|
|
485
464
|
status_code = resp.model_version.status.code
|
486
465
|
if status_code == status_code_pb2.MODEL_BUILDING:
|
487
466
|
print(f"Model is building... (elapsed {time.time() - st:.1f}s)", end='\r', flush=True)
|
467
|
+
|
468
|
+
# Fetch and display the logs
|
469
|
+
logs = self.get_model_build_logs()
|
470
|
+
for log_entry in logs.log_entries:
|
471
|
+
if log_entry.url not in seen_logs:
|
472
|
+
seen_logs.add(log_entry.url)
|
473
|
+
print(f"Model Building Logs...: {log_entry.message.strip()}")
|
488
474
|
time.sleep(1)
|
489
475
|
elif status_code == status_code_pb2.MODEL_TRAINED:
|
490
476
|
logger.info(f"\nModel build complete! (elapsed {time.time() - st:.1f}s)")
|
clarifai/runners/server.py
CHANGED
@@ -108,6 +108,7 @@ def main():
|
|
108
108
|
runner_id="n/a",
|
109
109
|
nodepool_id="n/a",
|
110
110
|
compute_cluster_id="n/a",
|
111
|
+
user_id="n/a",
|
111
112
|
health_check_port=None, # not needed when running local server
|
112
113
|
)
|
113
114
|
|
@@ -127,6 +128,7 @@ def main():
|
|
127
128
|
service_pb2_grpc.add_V2Servicer_to_server(servicer, server)
|
128
129
|
server.start()
|
129
130
|
logger.info("Started server on port %s", parsed_args.port)
|
131
|
+
logger.info(f"Access the model at http://localhost:{parsed_args.port}")
|
130
132
|
server.wait_for_termination()
|
131
133
|
else: # start the runner with the proper env variables and as a runner protocol.
|
132
134
|
|
@@ -0,0 +1,40 @@
|
|
1
|
+
PYTHON_BASE_IMAGE = 'public.ecr.aws/clarifai-models/python-base:{python_version}'
|
2
|
+
TORCH_BASE_IMAGE = 'public.ecr.aws/clarifai-models/torch:{torch_version}-py{python_version}-cuda{cuda_version}'
|
3
|
+
|
4
|
+
# List of available python base images
|
5
|
+
AVAILABLE_PYTHON_IMAGES = ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
|
6
|
+
|
7
|
+
DEFAULT_PYTHON_VERSION = 3.11
|
8
|
+
|
9
|
+
# List of available torch images
|
10
|
+
AVAILABLE_TORCH_IMAGES = [
|
11
|
+
'1.13.1-py3.8-cuda117',
|
12
|
+
'1.13.1-py3.9-cuda117',
|
13
|
+
'1.13.1-py3.10-cuda117',
|
14
|
+
'2.1.2-py3.8-cuda121',
|
15
|
+
'2.1.2-py3.9-cuda121',
|
16
|
+
'2.1.2-py3.10-cuda121',
|
17
|
+
'2.1.2-py3.11-cuda121',
|
18
|
+
'2.2.2-py3.8-cuda121',
|
19
|
+
'2.2.2-py3.9-cuda121',
|
20
|
+
'2.2.2-py3.10-cuda121',
|
21
|
+
'2.2.2-py3.11-cuda121',
|
22
|
+
'2.2.2-py3.12-cuda121',
|
23
|
+
'2.3.1-py3.8-cuda121',
|
24
|
+
'2.3.1-py3.9-cuda121',
|
25
|
+
'2.3.1-py3.10-cuda121',
|
26
|
+
'2.3.1-py3.11-cuda121',
|
27
|
+
'2.3.1-py3.12-cuda121',
|
28
|
+
'2.4.1-py3.8-cuda124',
|
29
|
+
'2.4.1-py3.9-cuda124',
|
30
|
+
'2.4.1-py3.10-cuda124',
|
31
|
+
'2.4.1-py3.11-cuda124',
|
32
|
+
'2.4.1-py3.12-cuda124',
|
33
|
+
'2.5.1-py3.9-cuda124',
|
34
|
+
'2.5.1-py3.10-cuda124',
|
35
|
+
'2.5.1-py3.11-cuda124',
|
36
|
+
'2.5.1-py3.12-cuda124',
|
37
|
+
]
|
38
|
+
CONCEPTS_REQUIRED_MODEL_TYPE = [
|
39
|
+
'visual-classifier', 'visual-detector', 'visual-segmenter', 'text-classifier'
|
40
|
+
]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: clarifai
|
3
|
-
Version:
|
3
|
+
Version: 11.0.0
|
4
4
|
Summary: Clarifai Python SDK
|
5
5
|
Home-page: https://github.com/Clarifai/clarifai-python
|
6
6
|
Author: Clarifai
|
@@ -20,8 +20,8 @@ Classifier: Operating System :: OS Independent
|
|
20
20
|
Requires-Python: >=3.8
|
21
21
|
Description-Content-Type: text/markdown
|
22
22
|
License-File: LICENSE
|
23
|
-
Requires-Dist: clarifai-grpc>=10.
|
24
|
-
Requires-Dist: clarifai-protocol>=0.0.
|
23
|
+
Requires-Dist: clarifai-grpc>=10.11.2
|
24
|
+
Requires-Dist: clarifai-protocol>=0.0.14
|
25
25
|
Requires-Dist: numpy>=1.22.0
|
26
26
|
Requires-Dist: tqdm>=4.65.0
|
27
27
|
Requires-Dist: tritonclient>=2.34.0
|
@@ -31,7 +31,6 @@ Requires-Dist: schema==0.7.5
|
|
31
31
|
Requires-Dist: Pillow>=9.5.0
|
32
32
|
Requires-Dist: inquirerpy==0.3.4
|
33
33
|
Requires-Dist: tabulate>=0.9.0
|
34
|
-
Requires-Dist: protobuf==5.27.3
|
35
34
|
Requires-Dist: fsspec==2024.6.1
|
36
35
|
Requires-Dist: click==8.1.7
|
37
36
|
Provides-Extra: all
|
@@ -73,6 +72,10 @@ Give the repo a star ⭐
|
|
73
72
|
|
74
73
|
* **[Installation](#rocket-installation)**
|
75
74
|
* **[Getting Started](#memo-getting-started)**
|
75
|
+
* **[Compute Orchestration](#rocket-compute-orchestration)**
|
76
|
+
* [Cluster Operations](#cluster-operations)
|
77
|
+
* [Nodepool Operations](#nodepool-operations)
|
78
|
+
* [Depolyment Operations](#deployment-operations)
|
76
79
|
* **[Interacting with Datasets](#floppy_disk-interacting-with-datasets)**
|
77
80
|
* **[Interacting with Inputs](#floppy_disk-interacting-with-inputs)**
|
78
81
|
* [Input Upload](#input-upload)
|
@@ -157,6 +160,73 @@ client = User(user_id="user_id", pat="your personal access token")
|
|
157
160
|
```
|
158
161
|
|
159
162
|
|
163
|
+
## :rocket: Compute Orchestration
|
164
|
+
|
165
|
+
Clarifai’s Compute Orchestration offers a streamlined solution for managing the infrastructure required for training, deploying, and scaling machine learning models and workflows.
|
166
|
+
|
167
|
+
This flexible system supports any compute instance — across various hardware providers and deployment methods — and provides automatic scaling to match workload demands. [More Details](https://www.clarifai.com/products/compute-orchestration)
|
168
|
+
|
169
|
+
#### Cluster Operations
|
170
|
+
```python
|
171
|
+
from clarifai.client.user import User
|
172
|
+
client = User(user_id="user_id",base_url="https://api.clarifai.com")
|
173
|
+
|
174
|
+
# Create a new compute cluster
|
175
|
+
compute_cluster = client.create_compute_cluster(compute_cluster_id="demo-id",config_filepath="computer_cluster_config.yaml")
|
176
|
+
|
177
|
+
# List Clusters
|
178
|
+
all_compute_clusters = list(client.list_compute_clusters())
|
179
|
+
print(all_compute_clusters)
|
180
|
+
```
|
181
|
+
##### [Example Cluster Config](https://github.com/Clarifai/examples/blob/main/ComputeOrchestration/configs/compute_cluster_config.yaml)
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
#### Nodepool Operations
|
186
|
+
```python
|
187
|
+
from clarifai.client.compute_cluster import ComputeCluster
|
188
|
+
|
189
|
+
# Initialize the ComputeCluster instance
|
190
|
+
compute_cluster = ComputeCluster(user_id="user_id",compute_cluster_id="demo-id")
|
191
|
+
|
192
|
+
# Create a new nodepool
|
193
|
+
nodepool = compute_cluster.create_nodepool(nodepool_id="demo-nodepool-id",config_filepath="nodepool_config.yaml")
|
194
|
+
|
195
|
+
#Get a nodepool
|
196
|
+
nodepool = compute_cluster.nodepool(nodepool_id="demo-nodepool-id")
|
197
|
+
print(nodepool)
|
198
|
+
|
199
|
+
# List nodepools
|
200
|
+
all_nodepools = list(compute_cluster.list_nodepools())
|
201
|
+
print(all_nodepools)
|
202
|
+
```
|
203
|
+
##### [Example Nodepool config](https://github.com/Clarifai/examples/blob/main/ComputeOrchestration/configs/nodepool_config.yaml)
|
204
|
+
|
205
|
+
#### Deployment Operations
|
206
|
+
```python
|
207
|
+
from clarifai.client.nodepool import Nodepool
|
208
|
+
|
209
|
+
# Initialize the Nodepool instance
|
210
|
+
nodepool = Nodepool(user_id="user_id",nodepool_id="demo-nodepool-id")
|
211
|
+
|
212
|
+
# Create a new deployment
|
213
|
+
deployment = nodepool.create_deployment(deployment_id="demo-deployment-id",config_filepath="deployment_config.yaml")
|
214
|
+
|
215
|
+
#Get a deployment
|
216
|
+
deployment = nodepool.deployment(nodepool_id="demo-deployment-id")
|
217
|
+
print(deployment)
|
218
|
+
|
219
|
+
# List deployments
|
220
|
+
all_deployments = list(nodepool.list_deployments())
|
221
|
+
print(all_deployments)
|
222
|
+
|
223
|
+
```
|
224
|
+
##### [Example Deployment config](https://github.com/Clarifai/examples/blob/main/ComputeOrchestration/configs/deployment_config.yaml)
|
225
|
+
|
226
|
+
#### Compute Orchestration CLI Operations
|
227
|
+
Refer Here: https://github.com/Clarifai/clarifai-python/tree/master/clarifai/cli
|
228
|
+
|
229
|
+
|
160
230
|
## :floppy_disk: Interacting with Datasets
|
161
231
|
|
162
232
|
Clarifai datasets help in managing datasets used for model training and evaluation. It provides functionalities like creating datasets,uploading datasets, retrying failed uploads from logs and exporting datasets as .zip files.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
clarifai/__init__.py,sha256=
|
1
|
+
clarifai/__init__.py,sha256=kddY_tek1AxisIg2ZL8YswPu4Sub-3LvRxixXkX8sSQ,23
|
2
2
|
clarifai/cli.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
clarifai/errors.py,sha256=RwzTajwds51wLD0MVlMC5kcpBnzRpreDLlazPSBZxrg,2605
|
4
4
|
clarifai/versions.py,sha256=jctnczzfGk_S3EnVqb2FjRKfSREkNmvNEwAAa_VoKiQ,222
|
@@ -7,7 +7,7 @@ clarifai/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
clarifai/cli/base.py,sha256=okuBNlMmLEQw9-0f4yzemCtneNNRTVXUugCwD58-ZtQ,3417
|
8
8
|
clarifai/cli/compute_cluster.py,sha256=N2dNQNJEPg9nxsb8x2igEzYuGRzjn7l4kNttjFIxmhI,1827
|
9
9
|
clarifai/cli/deployment.py,sha256=sUEuz5-rtozMx8deVcJXLi6lHsP2jc8x3y2MpUAVfqY,2506
|
10
|
-
clarifai/cli/model.py,sha256=
|
10
|
+
clarifai/cli/model.py,sha256=_l-vIqQpVF54SyWadHz-_6-6tS8SYmdDJmpUhKGv9AM,10475
|
11
11
|
clarifai/cli/nodepool.py,sha256=yihxS_rIFoBBKzRlqBX8Ab42iPpBMJrJFsk8saph6ms,3049
|
12
12
|
clarifai/client/__init__.py,sha256=xI1U0l5AZdRThvQAXCLsd9axxyFzXXJ22m8LHqVjQRU,662
|
13
13
|
clarifai/client/app.py,sha256=6pckYme1urV2YJjLIYfeZ-vH0Z5YSQa51jzIMcEfwug,38342
|
@@ -39,10 +39,10 @@ clarifai/datasets/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
|
|
39
39
|
clarifai/datasets/export/inputs_annotations.py,sha256=3AtUBrMIjw8H3ehDsJFYcBFoAZ1QKQo1hXTMsHh8f20,10159
|
40
40
|
clarifai/datasets/upload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
41
41
|
clarifai/datasets/upload/base.py,sha256=UIc0ufyIBCrb83_sFpv21L8FshsX4nwsLYQkdlJfzD4,2357
|
42
|
-
clarifai/datasets/upload/features.py,sha256=
|
42
|
+
clarifai/datasets/upload/features.py,sha256=GK69WvUYnks5G26Z5L5XSisBIZILLv7lYhS2y8BJCt0,2031
|
43
43
|
clarifai/datasets/upload/image.py,sha256=HlCsfEMu_C4GVecGSv52RUJ6laLW8H64Pfj_FQyX6qg,8580
|
44
|
-
clarifai/datasets/upload/multimodal.py,sha256=
|
45
|
-
clarifai/datasets/upload/text.py,sha256=
|
44
|
+
clarifai/datasets/upload/multimodal.py,sha256=_NpNQak9KMn0NOiOr48MYnXL0GQZ1LXKhwdYF1HhrHs,2377
|
45
|
+
clarifai/datasets/upload/text.py,sha256=dpRMNz49EyKau0kwksEaNV6TLBUf5lSr7t5g3pG2byM,2298
|
46
46
|
clarifai/datasets/upload/utils.py,sha256=BerWhq40ZUN30z6VImlc93eZtT-1vI18AMgSOuNzJEM,9647
|
47
47
|
clarifai/datasets/upload/loaders/README.md,sha256=aNRutSCTzLp2ruIZx74ZkN5AxpzwKOxMa7OzabnKpwg,2980
|
48
48
|
clarifai/datasets/upload/loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -58,19 +58,20 @@ clarifai/modules/css.py,sha256=kadCEunmyh5h2yf0-4aysE3ZcZ6qaQcxuAgDXS96yF8,2020
|
|
58
58
|
clarifai/modules/pages.py,sha256=iOoM3RNRMgXlV0qBqcdQofxoXo2RuRQh0h9c9BIS0-I,1383
|
59
59
|
clarifai/modules/style.css,sha256=j7FNPZVhLPj35vvBksAJ90RuX5sLuqzDR5iM2WIEhiA,6073
|
60
60
|
clarifai/rag/__init__.py,sha256=wu3PzAzo7uqgrEzuaC9lY_3gj1HFiR3GU3elZIKTT5g,40
|
61
|
-
clarifai/rag/rag.py,sha256=
|
61
|
+
clarifai/rag/rag.py,sha256=bqUWnfdf91OYMucEK0_rJXDwg0oKjz5c7eda-9CPXu8,12680
|
62
62
|
clarifai/rag/utils.py,sha256=yr1jAcbpws4vFGBqlAwPPE7v1DRba48g8gixLFw8OhQ,4070
|
63
63
|
clarifai/runners/__init__.py,sha256=3vr4RVvN1IRy2SxJpyycAAvrUBbH-mXR7pqUmu4w36A,412
|
64
|
-
clarifai/runners/server.py,sha256=
|
65
|
-
clarifai/runners/dockerfile_template/Dockerfile.template,sha256
|
64
|
+
clarifai/runners/server.py,sha256=Z46BCUqpb3GpcD59qJKb7rL5dpl6vzMYNG3o0MGzox8,5001
|
65
|
+
clarifai/runners/dockerfile_template/Dockerfile.template,sha256=mW3Bdu9elMpI75UbBD0JX8rS085FXEPUPDVXkHibxiE,1548
|
66
66
|
clarifai/runners/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
67
67
|
clarifai/runners/models/base_typed_model.py,sha256=DWEUK5ge9NVZE6LkT3BNTFYjYMPHz-nDgPA48Y0DGXU,7859
|
68
68
|
clarifai/runners/models/model_class.py,sha256=9JSPAr4U4K7xI0kSl-q0mHB06zknm2OR-8XIgBCto94,1611
|
69
|
-
clarifai/runners/models/model_run_locally.py,sha256=
|
69
|
+
clarifai/runners/models/model_run_locally.py,sha256=OhzQbmaV8Wwgs2H0KhdDF6Z7bYSaIh4RRA0QwSiv5vY,20644
|
70
70
|
clarifai/runners/models/model_runner.py,sha256=3vzoastQxkGRDK8T9aojDsLNBb9A3IiKm6YmbFrE9S0,6241
|
71
71
|
clarifai/runners/models/model_servicer.py,sha256=X4715PVA5PBurRTYcwSEudg8fShGV6InAF4mmRlRcHg,2826
|
72
|
-
clarifai/runners/models/model_upload.py,sha256=
|
72
|
+
clarifai/runners/models/model_upload.py,sha256=xQ0AqeBVePPwmMVM5uOiXRTRV09U-du2FduKv7Qgl-A,20087
|
73
73
|
clarifai/runners/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
74
|
+
clarifai/runners/utils/const.py,sha256=zINrIjDESUykq6xMLKNghwJ6N8qxDAtAJ6-1bH8VdOw,1238
|
74
75
|
clarifai/runners/utils/data_handler.py,sha256=sxy9zlAgI6ETuxCQhUgEXAn2GCsaW1GxpK6GTaMne0g,6966
|
75
76
|
clarifai/runners/utils/data_utils.py,sha256=R1iQ82TuQ9JwxCJk8yEB1Lyb0BYVhVbWJI9YDi1zGOs,318
|
76
77
|
clarifai/runners/utils/loader.py,sha256=1oktDUQA1Lpv0NiCXFwoxpp0jqqbvB7sWvpymwyWY2E,4243
|
@@ -91,9 +92,9 @@ clarifai/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
|
|
91
92
|
clarifai/workflows/export.py,sha256=vICRhIreqDSShxLKjHNM2JwzKsf1B4fdXB0ciMcA70k,1945
|
92
93
|
clarifai/workflows/utils.py,sha256=nGeB_yjVgUO9kOeKTg4OBBaBz-AwXI3m-huSVj-9W18,1924
|
93
94
|
clarifai/workflows/validate.py,sha256=yJq03MaJqi5AK3alKGJJBR89xmmjAQ31sVufJUiOqY8,2556
|
94
|
-
clarifai-
|
95
|
-
clarifai-
|
96
|
-
clarifai-
|
97
|
-
clarifai-
|
98
|
-
clarifai-
|
99
|
-
clarifai-
|
95
|
+
clarifai-11.0.0.dist-info/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
|
96
|
+
clarifai-11.0.0.dist-info/METADATA,sha256=GTidVDMb8d5MHEf3yAtZkS1xbjytKCQXU5IqnefLnu0,22219
|
97
|
+
clarifai-11.0.0.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
|
98
|
+
clarifai-11.0.0.dist-info/entry_points.txt,sha256=X9FZ4Z-i_r2Ud1RpZ9sNIFYuu_-9fogzCMCRUD9hyX0,51
|
99
|
+
clarifai-11.0.0.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
|
100
|
+
clarifai-11.0.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|