clarifai 10.10.1__py3-none-any.whl → 10.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,13 @@
1
+ import hashlib
1
2
  import importlib.util
2
3
  import inspect
3
4
  import os
4
5
  import shutil
6
+ import signal
5
7
  import subprocess
6
8
  import sys
7
9
  import tempfile
10
+ import time
8
11
  import traceback
9
12
  import venv
10
13
 
@@ -13,6 +16,7 @@ from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
13
16
  from clarifai_protocol import BaseRunner
14
17
 
15
18
  from clarifai.runners.models.model_upload import ModelUploader
19
+ from clarifai.runners.utils.url_fetcher import ensure_urls_downloaded
16
20
  from clarifai.utils.logging import logger
17
21
 
18
22
 
@@ -22,19 +26,36 @@ class ModelRunLocally:
22
26
  self.model_path = model_path
23
27
  self.requirements_file = os.path.join(self.model_path, "requirements.txt")
24
28
 
29
+ # ModelUploader contains multiple useful methods to interact with the model
30
+ self.uploader = ModelUploader(self.model_path)
31
+ self.config = self.uploader.config
32
+
33
+ def _requirements_hash(self):
34
+ """Generate a hash of the requirements file."""
35
+ with open(self.requirements_file, "r") as f:
36
+ return hashlib.md5(f.read().encode('utf-8')).hexdigest()
37
+
25
38
  def create_temp_venv(self):
26
39
  """Create a temporary virtual environment."""
27
- logger.info("Creating temporary virtual environment...")
28
- temp_dir = tempfile.mkdtemp()
40
+ requirements_hash = self._requirements_hash()
41
+
42
+ temp_dir = os.path.join(tempfile.gettempdir(), str(requirements_hash))
29
43
  venv_dir = os.path.join(temp_dir, "venv")
30
- venv.create(venv_dir, with_pip=True)
44
+
45
+ if os.path.exists(temp_dir):
46
+ logger.info(f"Using previous virtual environment at {temp_dir}")
47
+ use_existing_venv = True
48
+ else:
49
+ logger.info("Creating temporary virtual environment...")
50
+ use_existing_venv = False
51
+ venv.create(venv_dir, with_pip=True)
52
+ logger.info(f"Created temporary virtual environment at {venv_dir}")
31
53
 
32
54
  self.venv_dir = venv_dir
33
55
  self.temp_dir = temp_dir
34
56
  self.python_executable = os.path.join(venv_dir, "bin", "python")
35
57
 
36
- logger.info(f"Created temporary virtual environment at {venv_dir}")
37
- return venv_dir, temp_dir
58
+ return use_existing_venv
38
59
 
39
60
  def install_requirements(self):
40
61
  """Install the dependencies from requirements.txt and Clarifai."""
@@ -94,31 +115,91 @@ class ModelRunLocally:
94
115
  text=resources_pb2.Text(raw="How many people live in new york?"),
95
116
  image=resources_pb2.Image(url="https://samples.clarifai.com/metro-north.jpg"),
96
117
  audio=resources_pb2.Audio(url="https://samples.clarifai.com/GoodMorning.wav"),
118
+ video=resources_pb2.Video(url="https://samples.clarifai.com/beer.mp4"),
97
119
  ))
98
120
  ],
99
121
  )
100
122
 
123
+ def _build_stream_request(self):
124
+ request = self._build_request()
125
+ for i in range(1):
126
+ yield request
127
+
101
128
  def _run_model_inference(self, runner):
102
129
  """Perform inference using the runner."""
103
130
  request = self._build_request()
131
+ stream_request = self._build_stream_request()
104
132
 
133
+ ensure_urls_downloaded(request)
134
+ predict_response = None
135
+ generate_response = None
136
+ stream_response = None
105
137
  try:
106
- return runner.predict(request)
138
+ predict_response = runner.predict(request)
139
+ except NotImplementedError:
140
+ logger.info("Model does not implement predict() method.")
107
141
  except Exception as e:
108
142
  logger.error(f"Model Prediction failed: {e}")
109
143
  traceback.print_exc()
110
- return service_pb2.MultiOutputResponse(status=status_pb2.Status(
144
+ predict_response = service_pb2.MultiOutputResponse(status=status_pb2.Status(
111
145
  code=status_code_pb2.MODEL_PREDICTION_FAILED,
112
146
  description="Prediction failed",
113
147
  details="",
114
148
  internal_details=str(e),
115
149
  ))
116
150
 
151
+ if predict_response:
152
+ if predict_response.outputs[0].status.code != status_code_pb2.SUCCESS:
153
+ logger.error(f"Moddel Prediction failed: {predict_response}")
154
+ else:
155
+ logger.info(f"Model Prediction succeeded: {predict_response}")
156
+
157
+ try:
158
+ generate_response = runner.generate(request)
159
+ except NotImplementedError:
160
+ logger.info("Model does not implement generate() method.")
161
+ except Exception as e:
162
+ logger.error(f"Model Generation failed: {e}")
163
+ traceback.print_exc()
164
+ generate_response = service_pb2.MultiOutputResponse(status=status_pb2.Status(
165
+ code=status_code_pb2.MODEL_GENERATION_FAILED,
166
+ description="Generation failed",
167
+ details="",
168
+ internal_details=str(e),
169
+ ))
170
+
171
+ if generate_response:
172
+ generate_first_res = next(generate_response)
173
+ if generate_first_res.outputs[0].status.code != status_code_pb2.SUCCESS:
174
+ logger.error(f"Moddel Prediction failed: {generate_first_res}")
175
+ else:
176
+ logger.info(
177
+ f"Model Prediction succeeded for generate and first response: {generate_first_res}")
178
+
179
+ try:
180
+ stream_response = runner.stream(stream_request)
181
+ except NotImplementedError:
182
+ logger.info("Model does not implement stream() method.")
183
+ except Exception as e:
184
+ logger.error(f"Model Stream failed: {e}")
185
+ traceback.print_exc()
186
+ stream_response = service_pb2.MultiOutputResponse(status=status_pb2.Status(
187
+ code=status_code_pb2.MODEL_STREAM_FAILED,
188
+ description="Stream failed",
189
+ details="",
190
+ internal_details=str(e),
191
+ ))
192
+
193
+ if stream_response:
194
+ stream_first_res = next(stream_response)
195
+ if stream_first_res.outputs[0].status.code != status_code_pb2.SUCCESS:
196
+ logger.error(f"Moddel Prediction failed: {stream_first_res}")
197
+ else:
198
+ logger.info(
199
+ f"Model Prediction succeeded for stream and first response: {stream_first_res}")
200
+
117
201
  def _run_test(self):
118
202
  """Test the model locally by making a prediction."""
119
- # validate that we have checkpoints downloaded before constructing MyRunner
120
- uploader = ModelUploader(self.model_path)
121
- uploader.download_checkpoints()
122
203
  # construct MyRunner which will call load_model()
123
204
  MyRunner = self._get_model_runner()
124
205
  runner = MyRunner(
@@ -127,13 +208,8 @@ class ModelRunLocally:
127
208
  compute_cluster_id="n/a",
128
209
  user_id="n/a",
129
210
  )
130
-
131
211
  # send an inference.
132
- response = self._run_model_inference(runner)
133
- if response.outputs[0].status.code != status_code_pb2.SUCCESS:
134
- logger.error(f"Moddel Prediction failed: {response}")
135
- else:
136
- logger.info(f"Model Prediction succeeded: {response}")
212
+ self._run_model_inference(runner)
137
213
 
138
214
  def test_model(self):
139
215
  """Test the model by running it locally in the virtual environment."""
@@ -143,31 +219,258 @@ class ModelRunLocally:
143
219
  f"import sys; sys.path.append('{os.path.dirname(os.path.abspath(__file__))}'); "
144
220
  f"from model_run_locally import ModelRunLocally; ModelRunLocally('{self.model_path}')._run_test()",
145
221
  ]
222
+ process = None
146
223
  try:
147
224
  logger.info("Testing the model locally...")
148
- subprocess.check_call(command)
149
- logger.info("Model tested successfully!")
225
+ process = subprocess.Popen(command)
226
+ # Wait for the process to complete
227
+ process.wait()
228
+ if process.returncode == 0:
229
+ logger.info("Model tested successfully!")
230
+ if process.returncode != 0:
231
+ raise subprocess.CalledProcessError(process.returncode, command)
150
232
  except subprocess.CalledProcessError as e:
151
233
  logger.error(f"Error testing the model: {e}")
152
234
  sys.exit(1)
235
+ except Exception as e:
236
+ logger.error(f"Unexpected error: {e}")
237
+ sys.exit(1)
238
+ finally:
239
+ # After the function runs, check if the process is still running
240
+ if process and process.poll() is None:
241
+ logger.info("Process is still running. Terminating process.")
242
+ process.terminate()
243
+ try:
244
+ process.wait(timeout=5)
245
+ except subprocess.TimeoutExpired:
246
+ logger.info("Process did not terminate gracefully. Killing process.")
247
+ # Kill the process if it doesn't terminate after 5 seconds
248
+ process.kill()
153
249
 
154
250
  # run the model server
155
- def run_model_server(self):
251
+ def run_model_server(self, port=8080):
156
252
  """Run the Clarifai Runners's model server."""
157
253
 
158
254
  command = [
159
255
  self.python_executable, "-m", "clarifai.runners.server", "--model_path", self.model_path,
160
- "--start_dev_server"
256
+ "--start_dev_server", "--port",
257
+ str(port)
161
258
  ]
162
259
  try:
163
- logger.info(f"Starting model server with model at {self.model_path}...")
260
+ logger.info(
261
+ f"Starting model server at localhost:{port} with the model at {self.model_path}...")
164
262
  subprocess.check_call(command)
165
- logger.info("Model server started successfully!")
263
+ logger.info("Model server started successfully and running at localhost:{port}")
166
264
  except subprocess.CalledProcessError as e:
167
265
  logger.error(f"Error running model server: {e}")
168
266
  self.clean_up()
169
267
  sys.exit(1)
170
268
 
269
+ def _docker_hash(self):
270
+ """Generate a hash of the combined requirements file and Dockefile"""
271
+ with open(self.requirements_file, "r") as f:
272
+ requirements_hash = hashlib.md5(f.read().encode('utf-8')).hexdigest()
273
+ with open(os.path.join(self.model_path, "Dockerfile"), "r") as f:
274
+ dockerfile_hash = hashlib.md5(f.read().encode('utf-8')).hexdigest()
275
+
276
+ return hashlib.md5(f"{requirements_hash}{dockerfile_hash}".encode('utf-8')).hexdigest()
277
+
278
+ def is_docker_installed(self):
279
+ """Checks if Docker is installed on the system."""
280
+ try:
281
+ logger.info("Checking if Docker is installed...")
282
+ subprocess.run(["docker", "--version"], check=True)
283
+ return True
284
+ except subprocess.CalledProcessError:
285
+ logger.error(
286
+ "Docker is not installed! Please install Docker to run the model in a container.")
287
+ return False
288
+
289
+ def build_docker_image(
290
+ self,
291
+ image_name="model_image",
292
+ ):
293
+ """Build the docker image using the Dockerfile in the model directory."""
294
+ try:
295
+ logger.info(f"Building docker image from Dockerfile in {self.model_path}...")
296
+
297
+ # since we don't want to copy the model directory into the container, we need to modify the Dockerfile and comment out the COPY instruction
298
+ dockerfile_path = os.path.join(self.model_path, "Dockerfile")
299
+ # Read the Dockerfile
300
+ with open(dockerfile_path, 'r') as file:
301
+ lines = file.readlines()
302
+
303
+ # Comment out the COPY instruction that copies the current folder
304
+ modified_lines = []
305
+ for line in lines:
306
+ if 'COPY .' in line and '/app/model_dir/main' in line:
307
+ modified_lines.append(f'# {line}')
308
+ else:
309
+ modified_lines.append(line)
310
+
311
+ # Create a temporary directory to store the modified Dockerfile
312
+ with tempfile.TemporaryDirectory() as temp_dir:
313
+ temp_dockerfile_path = os.path.join(temp_dir, "Dockerfile.temp")
314
+
315
+ # Write the modified Dockerfile to the temporary file
316
+ with open(temp_dockerfile_path, 'w') as file:
317
+ file.writelines(modified_lines)
318
+
319
+ # Build the Docker image using the temporary Dockerfile
320
+ subprocess.check_call(
321
+ ['docker', 'build', '-t', image_name, '-f', temp_dockerfile_path, self.model_path])
322
+ logger.info(f"Docker image '{image_name}' built successfully!")
323
+ except subprocess.CalledProcessError as e:
324
+ logger.info(f"Error occurred while building the Docker image: {e}")
325
+ sys.exit(1)
326
+
327
+ def docker_image_exists(self, image_name):
328
+ """Check if the Docker image exists."""
329
+ try:
330
+ logger.info(f"Checking if Docker image '{image_name}' exists...")
331
+ subprocess.run(["docker", "inspect", image_name], check=True)
332
+ logger.info(f"Docker image '{image_name}' exists!")
333
+ return True
334
+ except subprocess.CalledProcessError:
335
+ logger.info(f"Docker image '{image_name}' does not exist!")
336
+ return False
337
+
338
+ def run_docker_container(self,
339
+ image_name,
340
+ container_name="clarifai-model-container",
341
+ port=8080,
342
+ env_vars=None):
343
+ """Runs a Docker container from the specified image."""
344
+ try:
345
+ logger.info(f"Running Docker container '{container_name}' from image '{image_name}'...")
346
+ # Base docker run command
347
+ cmd = [
348
+ "docker", "run", "--name", container_name, '--rm', "--gpus", "all", "--network", "host"
349
+ ]
350
+ # Add volume mappings
351
+ cmd.extend(["-v", f"{self.model_path}:/app/model_dir/main"])
352
+ # Add environment variables
353
+ if env_vars:
354
+ for key, value in env_vars.items():
355
+ cmd.extend(["-e", f"{key}={value}"])
356
+ # Add the image name
357
+ cmd.append(image_name)
358
+ # update the CMD to run the server
359
+ cmd.extend(
360
+ ["--model_path", "/app/model_dir/main", "--start_dev_server", "--port",
361
+ str(port)])
362
+ # Run the container
363
+ process = subprocess.Popen(cmd,)
364
+ logger.info(
365
+ f"Docker container '{container_name}' is running successfully! access the model at http://localhost:{port}"
366
+ )
367
+
368
+ # Function to handle Ctrl+C (SIGINT) gracefully
369
+ def signal_handler(sig, frame):
370
+ logger.info(f"Stopping Docker container '{container_name}'...")
371
+ subprocess.run(["docker", "stop", container_name], check=True)
372
+ process.terminate()
373
+ logger.info(f"Docker container '{container_name}' stopped successfully!")
374
+ time.sleep(1)
375
+ sys.exit(0)
376
+
377
+ # Register the signal handler for SIGINT (Ctrl+C)
378
+ signal.signal(signal.SIGINT, signal_handler)
379
+ # Wait for the process to finish (keeps the container running until it's stopped)
380
+ process.wait()
381
+ except subprocess.CalledProcessError as e:
382
+ logger.info(f"Error occurred while running the Docker container: {e}")
383
+ sys.exit(1)
384
+ except Exception as e:
385
+ logger.info(f"Error occurred while running the Docker container: {e}")
386
+ sys.exit(1)
387
+
388
+ def test_model_container(self,
389
+ image_name,
390
+ container_name="clarifai-model-container",
391
+ env_vars=None):
392
+ """Test the model inside the Docker container."""
393
+ try:
394
+ logger.info("Testing the model inside the Docker container...")
395
+ # Base docker run command
396
+ cmd = [
397
+ "docker", "run", "--name", container_name, '--rm', "--gpus", "all", "--network", "host"
398
+ ]
399
+ # update the entrypoint for testing the model
400
+ cmd.extend(["--entrypoint", "python"])
401
+ # Add volume mappings
402
+ cmd.extend(["-v", f"{self.model_path}:/app/model_dir/main"])
403
+ # Add environment variables
404
+ if env_vars:
405
+ for key, value in env_vars.items():
406
+ cmd.extend(["-e", f"{key}={value}"])
407
+ # Add the image name
408
+ cmd.append(image_name)
409
+ # update the CMD to test the model inside the container
410
+ cmd.extend([
411
+ "-c",
412
+ "from clarifai.runners.models.model_run_locally import ModelRunLocally; ModelRunLocally('/app/model_dir/main')._run_test()"
413
+ ])
414
+ # Run the container
415
+ subprocess.check_call(cmd)
416
+ logger.info("Model tested successfully!")
417
+ except subprocess.CalledProcessError as e:
418
+ logger.error(f"Error testing the model inside the Docker container: {e}")
419
+ sys.exit(1)
420
+
421
+ def container_exists(self, container_name="clarifai-model-container"):
422
+ """Check if the Docker container exists."""
423
+ try:
424
+ # Run docker ps -a to list all containers (running and stopped)
425
+ result = subprocess.run(
426
+ ["docker", "ps", "-a", "--filter", f"name={container_name}", "--format", "{{.Names}}"],
427
+ check=True,
428
+ capture_output=True,
429
+ text=True)
430
+ # If the container name is returned, it exists
431
+ if result.stdout.strip() == container_name:
432
+ logger.info(f"Docker container '{container_name}' exists.")
433
+ return True
434
+ else:
435
+ return False
436
+ except subprocess.CalledProcessError as e:
437
+ logger.error(f"Error occurred while checking if container exists: {e}")
438
+ return False
439
+
440
+ def stop_docker_container(self, container_name="clarifai-model-container"):
441
+ """Stop the Docker container if it's running."""
442
+ try:
443
+ # Check if the container is running
444
+ result = subprocess.run(
445
+ ["docker", "ps", "--filter", f"name={container_name}", "--format", "{{.Names}}"],
446
+ check=True,
447
+ capture_output=True,
448
+ text=True)
449
+ if result.stdout.strip() == container_name:
450
+ logger.info(f"Docker container '{container_name}' is running. Stopping it...")
451
+ subprocess.run(["docker", "stop", container_name], check=True)
452
+ logger.info(f"Docker container '{container_name}' stopped successfully!")
453
+ except subprocess.CalledProcessError as e:
454
+ logger.error(f"Error occurred while stopping the Docker container: {e}")
455
+
456
+ def remove_docker_container(self, container_name="clarifai-model-container"):
457
+ """Remove the Docker container."""
458
+ try:
459
+ logger.info(f"Removing Docker container '{container_name}'...")
460
+ subprocess.run(["docker", "rm", container_name], check=True)
461
+ logger.info(f"Docker container '{container_name}' removed successfully!")
462
+ except subprocess.CalledProcessError as e:
463
+ logger.error(f"Error occurred while removing the Docker container: {e}")
464
+
465
+ def remove_docker_image(self, image_name):
466
+ """Remove the Docker image."""
467
+ try:
468
+ logger.info(f"Removing Docker image '{image_name}'...")
469
+ subprocess.run(["docker", "rmi", image_name], check=True)
470
+ logger.info(f"Docker image '{image_name}' removed successfully!")
471
+ except subprocess.CalledProcessError as e:
472
+ logger.error(f"Error occurred while removing the Docker image: {e}")
473
+
171
474
  def clean_up(self):
172
475
  """Clean up the temporary virtual environment."""
173
476
  if os.path.exists(self.temp_dir):
@@ -175,16 +478,53 @@ class ModelRunLocally:
175
478
  shutil.rmtree(self.temp_dir)
176
479
 
177
480
 
178
- def main(model_path, run_model_server=False):
481
+ def main(model_path,
482
+ run_model_server=False,
483
+ inside_container=False,
484
+ port=8080,
485
+ keep_env=False,
486
+ keep_image=False):
179
487
 
488
+ if not os.environ['CLARIFAI_PAT']:
489
+ logger.error(
490
+ "CLARIFAI_PAT environment variable is not set! Please set your PAT in the 'CLARIFAI_PAT' environment variable."
491
+ )
492
+ sys.exit(1)
180
493
  manager = ModelRunLocally(model_path)
181
- manager.create_temp_venv()
494
+ manager.uploader.download_checkpoints()
495
+ if inside_container:
496
+ if not manager.is_docker_installed():
497
+ sys.exit(1)
498
+ manager.uploader.create_dockerfile()
499
+ image_tag = manager._docker_hash()
500
+ image_name = f"{manager.config['model']['id']}:{image_tag}"
501
+ container_name = manager.config['model']['id']
502
+ if not manager.docker_image_exists(image_name):
503
+ manager.build_docker_image(image_name=image_name)
504
+ try:
505
+ envs = {'CLARIFAI_PAT': os.environ['CLARIFAI_PAT'], 'CLARIFAI_USER_ID': 'n/a'}
506
+ if run_model_server:
507
+ manager.run_docker_container(
508
+ image_name=image_name, container_name=container_name, port=port, env_vars=envs)
509
+ else:
510
+ manager.test_model_container(
511
+ image_name=image_name, container_name=container_name, env_vars=envs)
512
+ finally:
513
+ if manager.container_exists(container_name):
514
+ manager.stop_docker_container(container_name)
515
+ manager.remove_docker_container(container_name=container_name)
516
+ if not keep_image:
517
+ manager.remove_docker_image(image_name)
182
518
 
183
- try:
184
- manager.install_requirements()
185
- if run_model_server:
186
- manager.run_model_server()
187
- else:
188
- manager.test_model()
189
- finally:
190
- manager.clean_up()
519
+ else:
520
+ try:
521
+ use_existing_env = manager.create_temp_venv()
522
+ if not use_existing_env:
523
+ manager.install_requirements()
524
+ if run_model_server:
525
+ manager.run_model_server(port)
526
+ else:
527
+ manager.test_model()
528
+ finally:
529
+ if not keep_env:
530
+ manager.clean_up()
@@ -10,6 +10,9 @@ from google.protobuf import json_format
10
10
  from rich import print
11
11
 
12
12
  from clarifai.client import BaseClient
13
+ from clarifai.runners.utils.const import (AVAILABLE_PYTHON_IMAGES, AVAILABLE_TORCH_IMAGES,
14
+ CONCEPTS_REQUIRED_MODEL_TYPE, DEFAULT_PYTHON_VERSION,
15
+ PYTHON_BASE_IMAGE, TORCH_BASE_IMAGE)
13
16
  from clarifai.runners.utils.loader import HuggingFaceLoader
14
17
  from clarifai.urls.helper import ClarifaiUrlHelper
15
18
  from clarifai.utils.logging import logger
@@ -23,48 +26,6 @@ def _clear_line(n: int = 1) -> None:
23
26
 
24
27
 
25
28
  class ModelUploader:
26
- DEFAULT_PYTHON_VERSION = 3.11
27
- DEFAULT_TORCH_VERSION = '2.4.0'
28
- DEFAULT_CUDA_VERSION = '124'
29
- # List of available torch images for matrix
30
- '''
31
- python_version: ['3.8', '3.9', '3.10', '3.11']
32
- torch_version: ['2.0.0', '2.1.0', '2.2.0', '2.3.0', '2.4.0', '2.4.1', '2.5.0']
33
- cuda_version: ['124']
34
- '''
35
- AVAILABLE_TORCH_IMAGES = [
36
- '2.0.0-py3.8-cuda124',
37
- '2.0.0-py3.9-cuda124',
38
- '2.0.0-py3.10-cuda124',
39
- '2.0.0-py3.11-cuda124',
40
- '2.1.0-py3.8-cuda124',
41
- '2.1.0-py3.9-cuda124',
42
- '2.1.0-py3.10-cuda124',
43
- '2.1.0-py3.11-cuda124',
44
- '2.2.0-py3.8-cuda124',
45
- '2.2.0-py3.9-cuda124',
46
- '2.2.0-py3.10-cuda124',
47
- '2.2.0-py3.11-cuda124',
48
- '2.3.0-py3.8-cuda124',
49
- '2.3.0-py3.9-cuda124',
50
- '2.3.0-py3.10-cuda124',
51
- '2.3.0-py3.11-cuda124',
52
- '2.4.0-py3.8-cuda124',
53
- '2.4.0-py3.9-cuda124',
54
- '2.4.0-py3.10-cuda124',
55
- '2.4.0-py3.11-cuda124',
56
- '2.4.1-py3.8-cuda124',
57
- '2.4.1-py3.9-cuda124',
58
- '2.4.1-py3.10-cuda124',
59
- '2.4.1-py3.11-cuda124',
60
- ]
61
- AVAILABLE_PYTHON_IMAGES = ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
62
- PYTHON_BASE_IMAGE = 'public.ecr.aws/clarifai-models/python-base:{python_version}'
63
- TORCH_BASE_IMAGE = 'public.ecr.aws/clarifai-models/torch:{torch_version}-py{python_version}-cuda{cuda_version}'
64
-
65
- CONCEPTS_REQUIRED_MODEL_TYPE = [
66
- 'visual-classifier', 'visual-detector', 'visual-segmenter', 'text-classifier'
67
- ]
68
29
 
69
30
  def __init__(self, folder: str):
70
31
  self._client = None
@@ -126,7 +87,7 @@ class ModelUploader:
126
87
  user_id = model.get('user_id')
127
88
  app_id = model.get('app_id')
128
89
 
129
- base = os.environ.get('CLARIFAI_API_BASE', 'https://api-dev.clarifai.com')
90
+ base = os.environ.get('CLARIFAI_API_BASE', 'https://api.clarifai.com')
130
91
 
131
92
  self._client = BaseClient(user_id=user_id, app_id=app_id, base=base)
132
93
 
@@ -233,33 +194,35 @@ class ModelUploader:
233
194
  build_info = self.config.get('build_info', {})
234
195
  if 'python_version' in build_info:
235
196
  python_version = build_info['python_version']
236
- if python_version not in self.AVAILABLE_PYTHON_IMAGES:
197
+ if python_version not in AVAILABLE_PYTHON_IMAGES:
237
198
  logger.error(
238
- f"Python version {python_version} not supported, please use one of the following versions: {self.AVAILABLE_PYTHON_IMAGES}"
199
+ f"Python version {python_version} not supported, please use one of the following versions: {AVAILABLE_PYTHON_IMAGES}"
239
200
  )
240
201
  return
241
202
  logger.info(
242
203
  f"Using Python version {python_version} from the config file to build the Dockerfile")
243
204
  else:
244
205
  logger.info(
245
- f"Python version not found in the config file, using default Python version: {self.DEFAULT_PYTHON_VERSION}"
206
+ f"Python version not found in the config file, using default Python version: {DEFAULT_PYTHON_VERSION}"
246
207
  )
247
- python_version = self.DEFAULT_PYTHON_VERSION
208
+ python_version = DEFAULT_PYTHON_VERSION
248
209
 
249
- base_image = self.PYTHON_BASE_IMAGE.format(python_version=python_version)
210
+ base_image = PYTHON_BASE_IMAGE.format(python_version=python_version)
250
211
 
251
212
  # Parse the requirements.txt file to determine the base image
252
213
  dependencies = self._parse_requirements()
253
214
  if 'torch' in dependencies and dependencies['torch']:
254
215
  torch_version = dependencies['torch']
255
216
 
256
- for image in self.AVAILABLE_TORCH_IMAGES:
217
+ for image in AVAILABLE_TORCH_IMAGES:
257
218
  if torch_version in image and f'py{python_version}' in image:
258
- base_image = self.TORCH_BASE_IMAGE.format(
219
+ cuda_version = image.split('-')[-1].replace('cuda', '')
220
+ base_image = TORCH_BASE_IMAGE.format(
259
221
  torch_version=torch_version,
260
222
  python_version=python_version,
261
- cuda_version=self.DEFAULT_CUDA_VERSION)
262
- logger.info(f"Using Torch version {torch_version} base image to build the Docker image")
223
+ cuda_version=cuda_version,
224
+ )
225
+ logger.info(f"Using Torch version {torch_version} base image to build the Docker image")
263
226
  break
264
227
 
265
228
  # Replace placeholders with actual values
@@ -314,7 +277,7 @@ class ModelUploader:
314
277
  config = yaml.safe_load(file)
315
278
  model = config.get('model')
316
279
  model_type_id = model.get('model_type_id')
317
- assert model_type_id in self.CONCEPTS_REQUIRED_MODEL_TYPE, f"Model type {model_type_id} not supported for concepts"
280
+ assert model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE, f"Model type {model_type_id} not supported for concepts"
318
281
  concept_protos = self._concepts_protos_from_concepts(labels)
319
282
 
320
283
  config['concepts'] = [{'id': concept.id, 'name': concept.name} for concept in concept_protos]
@@ -332,7 +295,7 @@ class ModelUploader:
332
295
  )
333
296
 
334
297
  model_type_id = self.config.get('model').get('model_type_id')
335
- if model_type_id in self.CONCEPTS_REQUIRED_MODEL_TYPE:
298
+ if model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE:
336
299
 
337
300
  if 'concepts' in self.config:
338
301
  labels = self.config.get('concepts')
@@ -347,7 +310,10 @@ class ModelUploader:
347
310
  labels = sorted(labels.items(), key=lambda x: int(x[0]))
348
311
 
349
312
  config_file = os.path.join(self.folder, 'config.yaml')
350
- self.hf_labels_to_config(labels, config_file)
313
+ try:
314
+ self.hf_labels_to_config(labels, config_file)
315
+ except Exception as e:
316
+ logger.error(f"Failed to update the config.yaml file with the concepts: {e}")
351
317
 
352
318
  model_version_proto.output_info.data.concepts.extend(
353
319
  self._concepts_protos_from_concepts(labels))
@@ -359,7 +325,7 @@ class ModelUploader:
359
325
 
360
326
  model_type_id = self.config.get('model').get('model_type_id')
361
327
 
362
- if (model_type_id in self.CONCEPTS_REQUIRED_MODEL_TYPE) and 'concepts' not in self.config:
328
+ if (model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE) and 'concepts' not in self.config:
363
329
  logger.info(
364
330
  f"Model type {model_type_id} requires concepts to be specified in the config.yaml file.."
365
331
  )
@@ -473,8 +439,21 @@ class ModelUploader:
473
439
  is_v3=self.is_v3,
474
440
  ))
475
441
 
442
+ def get_model_build_logs(self):
443
+ logs_request = service_pb2.ListLogEntriesRequest(
444
+ log_type="builder",
445
+ user_app_id=self.client.user_app_id,
446
+ model_id=self.model_proto.id,
447
+ model_version_id=self.model_version_id,
448
+ page=1,
449
+ per_page=50)
450
+ response = self.client.STUB.ListLogEntries(logs_request)
451
+
452
+ return response
453
+
476
454
  def monitor_model_build(self):
477
455
  st = time.time()
456
+ seen_logs = set() # To avoid duplicate log messages
478
457
  while True:
479
458
  resp = self.client.STUB.GetModelVersion(
480
459
  service_pb2.GetModelVersionRequest(
@@ -485,6 +464,13 @@ class ModelUploader:
485
464
  status_code = resp.model_version.status.code
486
465
  if status_code == status_code_pb2.MODEL_BUILDING:
487
466
  print(f"Model is building... (elapsed {time.time() - st:.1f}s)", end='\r', flush=True)
467
+
468
+ # Fetch and display the logs
469
+ logs = self.get_model_build_logs()
470
+ for log_entry in logs.log_entries:
471
+ if log_entry.url not in seen_logs:
472
+ seen_logs.add(log_entry.url)
473
+ print(f"Model Building Logs...: {log_entry.message.strip()}")
488
474
  time.sleep(1)
489
475
  elif status_code == status_code_pb2.MODEL_TRAINED:
490
476
  logger.info(f"\nModel build complete! (elapsed {time.time() - st:.1f}s)")