clarifai 10.10.1__py3-none-any.whl → 10.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/__init__.py +1 -1
- clarifai/cli/model.py +209 -8
- clarifai/client/model.py +1 -1
- clarifai/datasets/export/inputs_annotations.py +8 -0
- clarifai/rag/rag.py +6 -2
- clarifai/runners/dockerfile_template/Dockerfile.template +2 -0
- clarifai/runners/models/base_typed_model.py +6 -3
- clarifai/runners/models/model_run_locally.py +372 -32
- clarifai/runners/models/model_upload.py +42 -56
- clarifai/runners/server.py +2 -0
- clarifai/runners/utils/const.py +40 -0
- clarifai/runners/utils/url_fetcher.py +19 -12
- {clarifai-10.10.1.dist-info → clarifai-10.11.1.dist-info}/METADATA +86 -16
- {clarifai-10.10.1.dist-info → clarifai-10.11.1.dist-info}/RECORD +18 -17
- {clarifai-10.10.1.dist-info → clarifai-10.11.1.dist-info}/WHEEL +1 -1
- {clarifai-10.10.1.dist-info → clarifai-10.11.1.dist-info}/LICENSE +0 -0
- {clarifai-10.10.1.dist-info → clarifai-10.11.1.dist-info}/entry_points.txt +0 -0
- {clarifai-10.10.1.dist-info → clarifai-10.11.1.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,13 @@
|
|
1
|
+
import hashlib
|
1
2
|
import importlib.util
|
2
3
|
import inspect
|
3
4
|
import os
|
4
5
|
import shutil
|
6
|
+
import signal
|
5
7
|
import subprocess
|
6
8
|
import sys
|
7
9
|
import tempfile
|
10
|
+
import time
|
8
11
|
import traceback
|
9
12
|
import venv
|
10
13
|
|
@@ -13,6 +16,7 @@ from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
|
|
13
16
|
from clarifai_protocol import BaseRunner
|
14
17
|
|
15
18
|
from clarifai.runners.models.model_upload import ModelUploader
|
19
|
+
from clarifai.runners.utils.url_fetcher import ensure_urls_downloaded
|
16
20
|
from clarifai.utils.logging import logger
|
17
21
|
|
18
22
|
|
@@ -22,19 +26,36 @@ class ModelRunLocally:
|
|
22
26
|
self.model_path = model_path
|
23
27
|
self.requirements_file = os.path.join(self.model_path, "requirements.txt")
|
24
28
|
|
29
|
+
# ModelUploader contains multiple useful methods to interact with the model
|
30
|
+
self.uploader = ModelUploader(self.model_path)
|
31
|
+
self.config = self.uploader.config
|
32
|
+
|
33
|
+
def _requirements_hash(self):
|
34
|
+
"""Generate a hash of the requirements file."""
|
35
|
+
with open(self.requirements_file, "r") as f:
|
36
|
+
return hashlib.md5(f.read().encode('utf-8')).hexdigest()
|
37
|
+
|
25
38
|
def create_temp_venv(self):
|
26
39
|
"""Create a temporary virtual environment."""
|
27
|
-
|
28
|
-
|
40
|
+
requirements_hash = self._requirements_hash()
|
41
|
+
|
42
|
+
temp_dir = os.path.join(tempfile.gettempdir(), str(requirements_hash))
|
29
43
|
venv_dir = os.path.join(temp_dir, "venv")
|
30
|
-
|
44
|
+
|
45
|
+
if os.path.exists(temp_dir):
|
46
|
+
logger.info(f"Using previous virtual environment at {temp_dir}")
|
47
|
+
use_existing_venv = True
|
48
|
+
else:
|
49
|
+
logger.info("Creating temporary virtual environment...")
|
50
|
+
use_existing_venv = False
|
51
|
+
venv.create(venv_dir, with_pip=True)
|
52
|
+
logger.info(f"Created temporary virtual environment at {venv_dir}")
|
31
53
|
|
32
54
|
self.venv_dir = venv_dir
|
33
55
|
self.temp_dir = temp_dir
|
34
56
|
self.python_executable = os.path.join(venv_dir, "bin", "python")
|
35
57
|
|
36
|
-
|
37
|
-
return venv_dir, temp_dir
|
58
|
+
return use_existing_venv
|
38
59
|
|
39
60
|
def install_requirements(self):
|
40
61
|
"""Install the dependencies from requirements.txt and Clarifai."""
|
@@ -94,31 +115,91 @@ class ModelRunLocally:
|
|
94
115
|
text=resources_pb2.Text(raw="How many people live in new york?"),
|
95
116
|
image=resources_pb2.Image(url="https://samples.clarifai.com/metro-north.jpg"),
|
96
117
|
audio=resources_pb2.Audio(url="https://samples.clarifai.com/GoodMorning.wav"),
|
118
|
+
video=resources_pb2.Video(url="https://samples.clarifai.com/beer.mp4"),
|
97
119
|
))
|
98
120
|
],
|
99
121
|
)
|
100
122
|
|
123
|
+
def _build_stream_request(self):
|
124
|
+
request = self._build_request()
|
125
|
+
for i in range(1):
|
126
|
+
yield request
|
127
|
+
|
101
128
|
def _run_model_inference(self, runner):
|
102
129
|
"""Perform inference using the runner."""
|
103
130
|
request = self._build_request()
|
131
|
+
stream_request = self._build_stream_request()
|
104
132
|
|
133
|
+
ensure_urls_downloaded(request)
|
134
|
+
predict_response = None
|
135
|
+
generate_response = None
|
136
|
+
stream_response = None
|
105
137
|
try:
|
106
|
-
|
138
|
+
predict_response = runner.predict(request)
|
139
|
+
except NotImplementedError:
|
140
|
+
logger.info("Model does not implement predict() method.")
|
107
141
|
except Exception as e:
|
108
142
|
logger.error(f"Model Prediction failed: {e}")
|
109
143
|
traceback.print_exc()
|
110
|
-
|
144
|
+
predict_response = service_pb2.MultiOutputResponse(status=status_pb2.Status(
|
111
145
|
code=status_code_pb2.MODEL_PREDICTION_FAILED,
|
112
146
|
description="Prediction failed",
|
113
147
|
details="",
|
114
148
|
internal_details=str(e),
|
115
149
|
))
|
116
150
|
|
151
|
+
if predict_response:
|
152
|
+
if predict_response.outputs[0].status.code != status_code_pb2.SUCCESS:
|
153
|
+
logger.error(f"Moddel Prediction failed: {predict_response}")
|
154
|
+
else:
|
155
|
+
logger.info(f"Model Prediction succeeded: {predict_response}")
|
156
|
+
|
157
|
+
try:
|
158
|
+
generate_response = runner.generate(request)
|
159
|
+
except NotImplementedError:
|
160
|
+
logger.info("Model does not implement generate() method.")
|
161
|
+
except Exception as e:
|
162
|
+
logger.error(f"Model Generation failed: {e}")
|
163
|
+
traceback.print_exc()
|
164
|
+
generate_response = service_pb2.MultiOutputResponse(status=status_pb2.Status(
|
165
|
+
code=status_code_pb2.MODEL_GENERATION_FAILED,
|
166
|
+
description="Generation failed",
|
167
|
+
details="",
|
168
|
+
internal_details=str(e),
|
169
|
+
))
|
170
|
+
|
171
|
+
if generate_response:
|
172
|
+
generate_first_res = next(generate_response)
|
173
|
+
if generate_first_res.outputs[0].status.code != status_code_pb2.SUCCESS:
|
174
|
+
logger.error(f"Moddel Prediction failed: {generate_first_res}")
|
175
|
+
else:
|
176
|
+
logger.info(
|
177
|
+
f"Model Prediction succeeded for generate and first response: {generate_first_res}")
|
178
|
+
|
179
|
+
try:
|
180
|
+
stream_response = runner.stream(stream_request)
|
181
|
+
except NotImplementedError:
|
182
|
+
logger.info("Model does not implement stream() method.")
|
183
|
+
except Exception as e:
|
184
|
+
logger.error(f"Model Stream failed: {e}")
|
185
|
+
traceback.print_exc()
|
186
|
+
stream_response = service_pb2.MultiOutputResponse(status=status_pb2.Status(
|
187
|
+
code=status_code_pb2.MODEL_STREAM_FAILED,
|
188
|
+
description="Stream failed",
|
189
|
+
details="",
|
190
|
+
internal_details=str(e),
|
191
|
+
))
|
192
|
+
|
193
|
+
if stream_response:
|
194
|
+
stream_first_res = next(stream_response)
|
195
|
+
if stream_first_res.outputs[0].status.code != status_code_pb2.SUCCESS:
|
196
|
+
logger.error(f"Moddel Prediction failed: {stream_first_res}")
|
197
|
+
else:
|
198
|
+
logger.info(
|
199
|
+
f"Model Prediction succeeded for stream and first response: {stream_first_res}")
|
200
|
+
|
117
201
|
def _run_test(self):
|
118
202
|
"""Test the model locally by making a prediction."""
|
119
|
-
# validate that we have checkpoints downloaded before constructing MyRunner
|
120
|
-
uploader = ModelUploader(self.model_path)
|
121
|
-
uploader.download_checkpoints()
|
122
203
|
# construct MyRunner which will call load_model()
|
123
204
|
MyRunner = self._get_model_runner()
|
124
205
|
runner = MyRunner(
|
@@ -127,13 +208,8 @@ class ModelRunLocally:
|
|
127
208
|
compute_cluster_id="n/a",
|
128
209
|
user_id="n/a",
|
129
210
|
)
|
130
|
-
|
131
211
|
# send an inference.
|
132
|
-
|
133
|
-
if response.outputs[0].status.code != status_code_pb2.SUCCESS:
|
134
|
-
logger.error(f"Moddel Prediction failed: {response}")
|
135
|
-
else:
|
136
|
-
logger.info(f"Model Prediction succeeded: {response}")
|
212
|
+
self._run_model_inference(runner)
|
137
213
|
|
138
214
|
def test_model(self):
|
139
215
|
"""Test the model by running it locally in the virtual environment."""
|
@@ -143,31 +219,258 @@ class ModelRunLocally:
|
|
143
219
|
f"import sys; sys.path.append('{os.path.dirname(os.path.abspath(__file__))}'); "
|
144
220
|
f"from model_run_locally import ModelRunLocally; ModelRunLocally('{self.model_path}')._run_test()",
|
145
221
|
]
|
222
|
+
process = None
|
146
223
|
try:
|
147
224
|
logger.info("Testing the model locally...")
|
148
|
-
subprocess.
|
149
|
-
|
225
|
+
process = subprocess.Popen(command)
|
226
|
+
# Wait for the process to complete
|
227
|
+
process.wait()
|
228
|
+
if process.returncode == 0:
|
229
|
+
logger.info("Model tested successfully!")
|
230
|
+
if process.returncode != 0:
|
231
|
+
raise subprocess.CalledProcessError(process.returncode, command)
|
150
232
|
except subprocess.CalledProcessError as e:
|
151
233
|
logger.error(f"Error testing the model: {e}")
|
152
234
|
sys.exit(1)
|
235
|
+
except Exception as e:
|
236
|
+
logger.error(f"Unexpected error: {e}")
|
237
|
+
sys.exit(1)
|
238
|
+
finally:
|
239
|
+
# After the function runs, check if the process is still running
|
240
|
+
if process and process.poll() is None:
|
241
|
+
logger.info("Process is still running. Terminating process.")
|
242
|
+
process.terminate()
|
243
|
+
try:
|
244
|
+
process.wait(timeout=5)
|
245
|
+
except subprocess.TimeoutExpired:
|
246
|
+
logger.info("Process did not terminate gracefully. Killing process.")
|
247
|
+
# Kill the process if it doesn't terminate after 5 seconds
|
248
|
+
process.kill()
|
153
249
|
|
154
250
|
# run the model server
|
155
|
-
def run_model_server(self):
|
251
|
+
def run_model_server(self, port=8080):
|
156
252
|
"""Run the Clarifai Runners's model server."""
|
157
253
|
|
158
254
|
command = [
|
159
255
|
self.python_executable, "-m", "clarifai.runners.server", "--model_path", self.model_path,
|
160
|
-
"--start_dev_server"
|
256
|
+
"--start_dev_server", "--port",
|
257
|
+
str(port)
|
161
258
|
]
|
162
259
|
try:
|
163
|
-
logger.info(
|
260
|
+
logger.info(
|
261
|
+
f"Starting model server at localhost:{port} with the model at {self.model_path}...")
|
164
262
|
subprocess.check_call(command)
|
165
|
-
logger.info("Model server started successfully
|
263
|
+
logger.info("Model server started successfully and running at localhost:{port}")
|
166
264
|
except subprocess.CalledProcessError as e:
|
167
265
|
logger.error(f"Error running model server: {e}")
|
168
266
|
self.clean_up()
|
169
267
|
sys.exit(1)
|
170
268
|
|
269
|
+
def _docker_hash(self):
|
270
|
+
"""Generate a hash of the combined requirements file and Dockefile"""
|
271
|
+
with open(self.requirements_file, "r") as f:
|
272
|
+
requirements_hash = hashlib.md5(f.read().encode('utf-8')).hexdigest()
|
273
|
+
with open(os.path.join(self.model_path, "Dockerfile"), "r") as f:
|
274
|
+
dockerfile_hash = hashlib.md5(f.read().encode('utf-8')).hexdigest()
|
275
|
+
|
276
|
+
return hashlib.md5(f"{requirements_hash}{dockerfile_hash}".encode('utf-8')).hexdigest()
|
277
|
+
|
278
|
+
def is_docker_installed(self):
|
279
|
+
"""Checks if Docker is installed on the system."""
|
280
|
+
try:
|
281
|
+
logger.info("Checking if Docker is installed...")
|
282
|
+
subprocess.run(["docker", "--version"], check=True)
|
283
|
+
return True
|
284
|
+
except subprocess.CalledProcessError:
|
285
|
+
logger.error(
|
286
|
+
"Docker is not installed! Please install Docker to run the model in a container.")
|
287
|
+
return False
|
288
|
+
|
289
|
+
def build_docker_image(
|
290
|
+
self,
|
291
|
+
image_name="model_image",
|
292
|
+
):
|
293
|
+
"""Build the docker image using the Dockerfile in the model directory."""
|
294
|
+
try:
|
295
|
+
logger.info(f"Building docker image from Dockerfile in {self.model_path}...")
|
296
|
+
|
297
|
+
# since we don't want to copy the model directory into the container, we need to modify the Dockerfile and comment out the COPY instruction
|
298
|
+
dockerfile_path = os.path.join(self.model_path, "Dockerfile")
|
299
|
+
# Read the Dockerfile
|
300
|
+
with open(dockerfile_path, 'r') as file:
|
301
|
+
lines = file.readlines()
|
302
|
+
|
303
|
+
# Comment out the COPY instruction that copies the current folder
|
304
|
+
modified_lines = []
|
305
|
+
for line in lines:
|
306
|
+
if 'COPY .' in line and '/app/model_dir/main' in line:
|
307
|
+
modified_lines.append(f'# {line}')
|
308
|
+
else:
|
309
|
+
modified_lines.append(line)
|
310
|
+
|
311
|
+
# Create a temporary directory to store the modified Dockerfile
|
312
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
313
|
+
temp_dockerfile_path = os.path.join(temp_dir, "Dockerfile.temp")
|
314
|
+
|
315
|
+
# Write the modified Dockerfile to the temporary file
|
316
|
+
with open(temp_dockerfile_path, 'w') as file:
|
317
|
+
file.writelines(modified_lines)
|
318
|
+
|
319
|
+
# Build the Docker image using the temporary Dockerfile
|
320
|
+
subprocess.check_call(
|
321
|
+
['docker', 'build', '-t', image_name, '-f', temp_dockerfile_path, self.model_path])
|
322
|
+
logger.info(f"Docker image '{image_name}' built successfully!")
|
323
|
+
except subprocess.CalledProcessError as e:
|
324
|
+
logger.info(f"Error occurred while building the Docker image: {e}")
|
325
|
+
sys.exit(1)
|
326
|
+
|
327
|
+
def docker_image_exists(self, image_name):
|
328
|
+
"""Check if the Docker image exists."""
|
329
|
+
try:
|
330
|
+
logger.info(f"Checking if Docker image '{image_name}' exists...")
|
331
|
+
subprocess.run(["docker", "inspect", image_name], check=True)
|
332
|
+
logger.info(f"Docker image '{image_name}' exists!")
|
333
|
+
return True
|
334
|
+
except subprocess.CalledProcessError:
|
335
|
+
logger.info(f"Docker image '{image_name}' does not exist!")
|
336
|
+
return False
|
337
|
+
|
338
|
+
def run_docker_container(self,
|
339
|
+
image_name,
|
340
|
+
container_name="clarifai-model-container",
|
341
|
+
port=8080,
|
342
|
+
env_vars=None):
|
343
|
+
"""Runs a Docker container from the specified image."""
|
344
|
+
try:
|
345
|
+
logger.info(f"Running Docker container '{container_name}' from image '{image_name}'...")
|
346
|
+
# Base docker run command
|
347
|
+
cmd = [
|
348
|
+
"docker", "run", "--name", container_name, '--rm', "--gpus", "all", "--network", "host"
|
349
|
+
]
|
350
|
+
# Add volume mappings
|
351
|
+
cmd.extend(["-v", f"{self.model_path}:/app/model_dir/main"])
|
352
|
+
# Add environment variables
|
353
|
+
if env_vars:
|
354
|
+
for key, value in env_vars.items():
|
355
|
+
cmd.extend(["-e", f"{key}={value}"])
|
356
|
+
# Add the image name
|
357
|
+
cmd.append(image_name)
|
358
|
+
# update the CMD to run the server
|
359
|
+
cmd.extend(
|
360
|
+
["--model_path", "/app/model_dir/main", "--start_dev_server", "--port",
|
361
|
+
str(port)])
|
362
|
+
# Run the container
|
363
|
+
process = subprocess.Popen(cmd,)
|
364
|
+
logger.info(
|
365
|
+
f"Docker container '{container_name}' is running successfully! access the model at http://localhost:{port}"
|
366
|
+
)
|
367
|
+
|
368
|
+
# Function to handle Ctrl+C (SIGINT) gracefully
|
369
|
+
def signal_handler(sig, frame):
|
370
|
+
logger.info(f"Stopping Docker container '{container_name}'...")
|
371
|
+
subprocess.run(["docker", "stop", container_name], check=True)
|
372
|
+
process.terminate()
|
373
|
+
logger.info(f"Docker container '{container_name}' stopped successfully!")
|
374
|
+
time.sleep(1)
|
375
|
+
sys.exit(0)
|
376
|
+
|
377
|
+
# Register the signal handler for SIGINT (Ctrl+C)
|
378
|
+
signal.signal(signal.SIGINT, signal_handler)
|
379
|
+
# Wait for the process to finish (keeps the container running until it's stopped)
|
380
|
+
process.wait()
|
381
|
+
except subprocess.CalledProcessError as e:
|
382
|
+
logger.info(f"Error occurred while running the Docker container: {e}")
|
383
|
+
sys.exit(1)
|
384
|
+
except Exception as e:
|
385
|
+
logger.info(f"Error occurred while running the Docker container: {e}")
|
386
|
+
sys.exit(1)
|
387
|
+
|
388
|
+
def test_model_container(self,
|
389
|
+
image_name,
|
390
|
+
container_name="clarifai-model-container",
|
391
|
+
env_vars=None):
|
392
|
+
"""Test the model inside the Docker container."""
|
393
|
+
try:
|
394
|
+
logger.info("Testing the model inside the Docker container...")
|
395
|
+
# Base docker run command
|
396
|
+
cmd = [
|
397
|
+
"docker", "run", "--name", container_name, '--rm', "--gpus", "all", "--network", "host"
|
398
|
+
]
|
399
|
+
# update the entrypoint for testing the model
|
400
|
+
cmd.extend(["--entrypoint", "python"])
|
401
|
+
# Add volume mappings
|
402
|
+
cmd.extend(["-v", f"{self.model_path}:/app/model_dir/main"])
|
403
|
+
# Add environment variables
|
404
|
+
if env_vars:
|
405
|
+
for key, value in env_vars.items():
|
406
|
+
cmd.extend(["-e", f"{key}={value}"])
|
407
|
+
# Add the image name
|
408
|
+
cmd.append(image_name)
|
409
|
+
# update the CMD to test the model inside the container
|
410
|
+
cmd.extend([
|
411
|
+
"-c",
|
412
|
+
"from clarifai.runners.models.model_run_locally import ModelRunLocally; ModelRunLocally('/app/model_dir/main')._run_test()"
|
413
|
+
])
|
414
|
+
# Run the container
|
415
|
+
subprocess.check_call(cmd)
|
416
|
+
logger.info("Model tested successfully!")
|
417
|
+
except subprocess.CalledProcessError as e:
|
418
|
+
logger.error(f"Error testing the model inside the Docker container: {e}")
|
419
|
+
sys.exit(1)
|
420
|
+
|
421
|
+
def container_exists(self, container_name="clarifai-model-container"):
|
422
|
+
"""Check if the Docker container exists."""
|
423
|
+
try:
|
424
|
+
# Run docker ps -a to list all containers (running and stopped)
|
425
|
+
result = subprocess.run(
|
426
|
+
["docker", "ps", "-a", "--filter", f"name={container_name}", "--format", "{{.Names}}"],
|
427
|
+
check=True,
|
428
|
+
capture_output=True,
|
429
|
+
text=True)
|
430
|
+
# If the container name is returned, it exists
|
431
|
+
if result.stdout.strip() == container_name:
|
432
|
+
logger.info(f"Docker container '{container_name}' exists.")
|
433
|
+
return True
|
434
|
+
else:
|
435
|
+
return False
|
436
|
+
except subprocess.CalledProcessError as e:
|
437
|
+
logger.error(f"Error occurred while checking if container exists: {e}")
|
438
|
+
return False
|
439
|
+
|
440
|
+
def stop_docker_container(self, container_name="clarifai-model-container"):
|
441
|
+
"""Stop the Docker container if it's running."""
|
442
|
+
try:
|
443
|
+
# Check if the container is running
|
444
|
+
result = subprocess.run(
|
445
|
+
["docker", "ps", "--filter", f"name={container_name}", "--format", "{{.Names}}"],
|
446
|
+
check=True,
|
447
|
+
capture_output=True,
|
448
|
+
text=True)
|
449
|
+
if result.stdout.strip() == container_name:
|
450
|
+
logger.info(f"Docker container '{container_name}' is running. Stopping it...")
|
451
|
+
subprocess.run(["docker", "stop", container_name], check=True)
|
452
|
+
logger.info(f"Docker container '{container_name}' stopped successfully!")
|
453
|
+
except subprocess.CalledProcessError as e:
|
454
|
+
logger.error(f"Error occurred while stopping the Docker container: {e}")
|
455
|
+
|
456
|
+
def remove_docker_container(self, container_name="clarifai-model-container"):
|
457
|
+
"""Remove the Docker container."""
|
458
|
+
try:
|
459
|
+
logger.info(f"Removing Docker container '{container_name}'...")
|
460
|
+
subprocess.run(["docker", "rm", container_name], check=True)
|
461
|
+
logger.info(f"Docker container '{container_name}' removed successfully!")
|
462
|
+
except subprocess.CalledProcessError as e:
|
463
|
+
logger.error(f"Error occurred while removing the Docker container: {e}")
|
464
|
+
|
465
|
+
def remove_docker_image(self, image_name):
|
466
|
+
"""Remove the Docker image."""
|
467
|
+
try:
|
468
|
+
logger.info(f"Removing Docker image '{image_name}'...")
|
469
|
+
subprocess.run(["docker", "rmi", image_name], check=True)
|
470
|
+
logger.info(f"Docker image '{image_name}' removed successfully!")
|
471
|
+
except subprocess.CalledProcessError as e:
|
472
|
+
logger.error(f"Error occurred while removing the Docker image: {e}")
|
473
|
+
|
171
474
|
def clean_up(self):
|
172
475
|
"""Clean up the temporary virtual environment."""
|
173
476
|
if os.path.exists(self.temp_dir):
|
@@ -175,16 +478,53 @@ class ModelRunLocally:
|
|
175
478
|
shutil.rmtree(self.temp_dir)
|
176
479
|
|
177
480
|
|
178
|
-
def main(model_path,
|
481
|
+
def main(model_path,
|
482
|
+
run_model_server=False,
|
483
|
+
inside_container=False,
|
484
|
+
port=8080,
|
485
|
+
keep_env=False,
|
486
|
+
keep_image=False):
|
179
487
|
|
488
|
+
if not os.environ['CLARIFAI_PAT']:
|
489
|
+
logger.error(
|
490
|
+
"CLARIFAI_PAT environment variable is not set! Please set your PAT in the 'CLARIFAI_PAT' environment variable."
|
491
|
+
)
|
492
|
+
sys.exit(1)
|
180
493
|
manager = ModelRunLocally(model_path)
|
181
|
-
manager.
|
494
|
+
manager.uploader.download_checkpoints()
|
495
|
+
if inside_container:
|
496
|
+
if not manager.is_docker_installed():
|
497
|
+
sys.exit(1)
|
498
|
+
manager.uploader.create_dockerfile()
|
499
|
+
image_tag = manager._docker_hash()
|
500
|
+
image_name = f"{manager.config['model']['id']}:{image_tag}"
|
501
|
+
container_name = manager.config['model']['id']
|
502
|
+
if not manager.docker_image_exists(image_name):
|
503
|
+
manager.build_docker_image(image_name=image_name)
|
504
|
+
try:
|
505
|
+
envs = {'CLARIFAI_PAT': os.environ['CLARIFAI_PAT'], 'CLARIFAI_USER_ID': 'n/a'}
|
506
|
+
if run_model_server:
|
507
|
+
manager.run_docker_container(
|
508
|
+
image_name=image_name, container_name=container_name, port=port, env_vars=envs)
|
509
|
+
else:
|
510
|
+
manager.test_model_container(
|
511
|
+
image_name=image_name, container_name=container_name, env_vars=envs)
|
512
|
+
finally:
|
513
|
+
if manager.container_exists(container_name):
|
514
|
+
manager.stop_docker_container(container_name)
|
515
|
+
manager.remove_docker_container(container_name=container_name)
|
516
|
+
if not keep_image:
|
517
|
+
manager.remove_docker_image(image_name)
|
182
518
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
519
|
+
else:
|
520
|
+
try:
|
521
|
+
use_existing_env = manager.create_temp_venv()
|
522
|
+
if not use_existing_env:
|
523
|
+
manager.install_requirements()
|
524
|
+
if run_model_server:
|
525
|
+
manager.run_model_server(port)
|
526
|
+
else:
|
527
|
+
manager.test_model()
|
528
|
+
finally:
|
529
|
+
if not keep_env:
|
530
|
+
manager.clean_up()
|
@@ -10,6 +10,9 @@ from google.protobuf import json_format
|
|
10
10
|
from rich import print
|
11
11
|
|
12
12
|
from clarifai.client import BaseClient
|
13
|
+
from clarifai.runners.utils.const import (AVAILABLE_PYTHON_IMAGES, AVAILABLE_TORCH_IMAGES,
|
14
|
+
CONCEPTS_REQUIRED_MODEL_TYPE, DEFAULT_PYTHON_VERSION,
|
15
|
+
PYTHON_BASE_IMAGE, TORCH_BASE_IMAGE)
|
13
16
|
from clarifai.runners.utils.loader import HuggingFaceLoader
|
14
17
|
from clarifai.urls.helper import ClarifaiUrlHelper
|
15
18
|
from clarifai.utils.logging import logger
|
@@ -23,48 +26,6 @@ def _clear_line(n: int = 1) -> None:
|
|
23
26
|
|
24
27
|
|
25
28
|
class ModelUploader:
|
26
|
-
DEFAULT_PYTHON_VERSION = 3.11
|
27
|
-
DEFAULT_TORCH_VERSION = '2.4.0'
|
28
|
-
DEFAULT_CUDA_VERSION = '124'
|
29
|
-
# List of available torch images for matrix
|
30
|
-
'''
|
31
|
-
python_version: ['3.8', '3.9', '3.10', '3.11']
|
32
|
-
torch_version: ['2.0.0', '2.1.0', '2.2.0', '2.3.0', '2.4.0', '2.4.1', '2.5.0']
|
33
|
-
cuda_version: ['124']
|
34
|
-
'''
|
35
|
-
AVAILABLE_TORCH_IMAGES = [
|
36
|
-
'2.0.0-py3.8-cuda124',
|
37
|
-
'2.0.0-py3.9-cuda124',
|
38
|
-
'2.0.0-py3.10-cuda124',
|
39
|
-
'2.0.0-py3.11-cuda124',
|
40
|
-
'2.1.0-py3.8-cuda124',
|
41
|
-
'2.1.0-py3.9-cuda124',
|
42
|
-
'2.1.0-py3.10-cuda124',
|
43
|
-
'2.1.0-py3.11-cuda124',
|
44
|
-
'2.2.0-py3.8-cuda124',
|
45
|
-
'2.2.0-py3.9-cuda124',
|
46
|
-
'2.2.0-py3.10-cuda124',
|
47
|
-
'2.2.0-py3.11-cuda124',
|
48
|
-
'2.3.0-py3.8-cuda124',
|
49
|
-
'2.3.0-py3.9-cuda124',
|
50
|
-
'2.3.0-py3.10-cuda124',
|
51
|
-
'2.3.0-py3.11-cuda124',
|
52
|
-
'2.4.0-py3.8-cuda124',
|
53
|
-
'2.4.0-py3.9-cuda124',
|
54
|
-
'2.4.0-py3.10-cuda124',
|
55
|
-
'2.4.0-py3.11-cuda124',
|
56
|
-
'2.4.1-py3.8-cuda124',
|
57
|
-
'2.4.1-py3.9-cuda124',
|
58
|
-
'2.4.1-py3.10-cuda124',
|
59
|
-
'2.4.1-py3.11-cuda124',
|
60
|
-
]
|
61
|
-
AVAILABLE_PYTHON_IMAGES = ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
|
62
|
-
PYTHON_BASE_IMAGE = 'public.ecr.aws/clarifai-models/python-base:{python_version}'
|
63
|
-
TORCH_BASE_IMAGE = 'public.ecr.aws/clarifai-models/torch:{torch_version}-py{python_version}-cuda{cuda_version}'
|
64
|
-
|
65
|
-
CONCEPTS_REQUIRED_MODEL_TYPE = [
|
66
|
-
'visual-classifier', 'visual-detector', 'visual-segmenter', 'text-classifier'
|
67
|
-
]
|
68
29
|
|
69
30
|
def __init__(self, folder: str):
|
70
31
|
self._client = None
|
@@ -126,7 +87,7 @@ class ModelUploader:
|
|
126
87
|
user_id = model.get('user_id')
|
127
88
|
app_id = model.get('app_id')
|
128
89
|
|
129
|
-
base = os.environ.get('CLARIFAI_API_BASE', 'https://api
|
90
|
+
base = os.environ.get('CLARIFAI_API_BASE', 'https://api.clarifai.com')
|
130
91
|
|
131
92
|
self._client = BaseClient(user_id=user_id, app_id=app_id, base=base)
|
132
93
|
|
@@ -233,33 +194,35 @@ class ModelUploader:
|
|
233
194
|
build_info = self.config.get('build_info', {})
|
234
195
|
if 'python_version' in build_info:
|
235
196
|
python_version = build_info['python_version']
|
236
|
-
if python_version not in
|
197
|
+
if python_version not in AVAILABLE_PYTHON_IMAGES:
|
237
198
|
logger.error(
|
238
|
-
f"Python version {python_version} not supported, please use one of the following versions: {
|
199
|
+
f"Python version {python_version} not supported, please use one of the following versions: {AVAILABLE_PYTHON_IMAGES}"
|
239
200
|
)
|
240
201
|
return
|
241
202
|
logger.info(
|
242
203
|
f"Using Python version {python_version} from the config file to build the Dockerfile")
|
243
204
|
else:
|
244
205
|
logger.info(
|
245
|
-
f"Python version not found in the config file, using default Python version: {
|
206
|
+
f"Python version not found in the config file, using default Python version: {DEFAULT_PYTHON_VERSION}"
|
246
207
|
)
|
247
|
-
python_version =
|
208
|
+
python_version = DEFAULT_PYTHON_VERSION
|
248
209
|
|
249
|
-
base_image =
|
210
|
+
base_image = PYTHON_BASE_IMAGE.format(python_version=python_version)
|
250
211
|
|
251
212
|
# Parse the requirements.txt file to determine the base image
|
252
213
|
dependencies = self._parse_requirements()
|
253
214
|
if 'torch' in dependencies and dependencies['torch']:
|
254
215
|
torch_version = dependencies['torch']
|
255
216
|
|
256
|
-
for image in
|
217
|
+
for image in AVAILABLE_TORCH_IMAGES:
|
257
218
|
if torch_version in image and f'py{python_version}' in image:
|
258
|
-
|
219
|
+
cuda_version = image.split('-')[-1].replace('cuda', '')
|
220
|
+
base_image = TORCH_BASE_IMAGE.format(
|
259
221
|
torch_version=torch_version,
|
260
222
|
python_version=python_version,
|
261
|
-
cuda_version=
|
262
|
-
|
223
|
+
cuda_version=cuda_version,
|
224
|
+
)
|
225
|
+
logger.info(f"Using Torch version {torch_version} base image to build the Docker image")
|
263
226
|
break
|
264
227
|
|
265
228
|
# Replace placeholders with actual values
|
@@ -314,7 +277,7 @@ class ModelUploader:
|
|
314
277
|
config = yaml.safe_load(file)
|
315
278
|
model = config.get('model')
|
316
279
|
model_type_id = model.get('model_type_id')
|
317
|
-
assert model_type_id in
|
280
|
+
assert model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE, f"Model type {model_type_id} not supported for concepts"
|
318
281
|
concept_protos = self._concepts_protos_from_concepts(labels)
|
319
282
|
|
320
283
|
config['concepts'] = [{'id': concept.id, 'name': concept.name} for concept in concept_protos]
|
@@ -332,7 +295,7 @@ class ModelUploader:
|
|
332
295
|
)
|
333
296
|
|
334
297
|
model_type_id = self.config.get('model').get('model_type_id')
|
335
|
-
if model_type_id in
|
298
|
+
if model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE:
|
336
299
|
|
337
300
|
if 'concepts' in self.config:
|
338
301
|
labels = self.config.get('concepts')
|
@@ -347,7 +310,10 @@ class ModelUploader:
|
|
347
310
|
labels = sorted(labels.items(), key=lambda x: int(x[0]))
|
348
311
|
|
349
312
|
config_file = os.path.join(self.folder, 'config.yaml')
|
350
|
-
|
313
|
+
try:
|
314
|
+
self.hf_labels_to_config(labels, config_file)
|
315
|
+
except Exception as e:
|
316
|
+
logger.error(f"Failed to update the config.yaml file with the concepts: {e}")
|
351
317
|
|
352
318
|
model_version_proto.output_info.data.concepts.extend(
|
353
319
|
self._concepts_protos_from_concepts(labels))
|
@@ -359,7 +325,7 @@ class ModelUploader:
|
|
359
325
|
|
360
326
|
model_type_id = self.config.get('model').get('model_type_id')
|
361
327
|
|
362
|
-
if (model_type_id in
|
328
|
+
if (model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE) and 'concepts' not in self.config:
|
363
329
|
logger.info(
|
364
330
|
f"Model type {model_type_id} requires concepts to be specified in the config.yaml file.."
|
365
331
|
)
|
@@ -473,8 +439,21 @@ class ModelUploader:
|
|
473
439
|
is_v3=self.is_v3,
|
474
440
|
))
|
475
441
|
|
442
|
+
def get_model_build_logs(self):
|
443
|
+
logs_request = service_pb2.ListLogEntriesRequest(
|
444
|
+
log_type="builder",
|
445
|
+
user_app_id=self.client.user_app_id,
|
446
|
+
model_id=self.model_proto.id,
|
447
|
+
model_version_id=self.model_version_id,
|
448
|
+
page=1,
|
449
|
+
per_page=50)
|
450
|
+
response = self.client.STUB.ListLogEntries(logs_request)
|
451
|
+
|
452
|
+
return response
|
453
|
+
|
476
454
|
def monitor_model_build(self):
|
477
455
|
st = time.time()
|
456
|
+
seen_logs = set() # To avoid duplicate log messages
|
478
457
|
while True:
|
479
458
|
resp = self.client.STUB.GetModelVersion(
|
480
459
|
service_pb2.GetModelVersionRequest(
|
@@ -485,6 +464,13 @@ class ModelUploader:
|
|
485
464
|
status_code = resp.model_version.status.code
|
486
465
|
if status_code == status_code_pb2.MODEL_BUILDING:
|
487
466
|
print(f"Model is building... (elapsed {time.time() - st:.1f}s)", end='\r', flush=True)
|
467
|
+
|
468
|
+
# Fetch and display the logs
|
469
|
+
logs = self.get_model_build_logs()
|
470
|
+
for log_entry in logs.log_entries:
|
471
|
+
if log_entry.url not in seen_logs:
|
472
|
+
seen_logs.add(log_entry.url)
|
473
|
+
print(f"Model Building Logs...: {log_entry.message.strip()}")
|
488
474
|
time.sleep(1)
|
489
475
|
elif status_code == status_code_pb2.MODEL_TRAINED:
|
490
476
|
logger.info(f"\nModel build complete! (elapsed {time.time() - st:.1f}s)")
|