clarifai 11.0.2__py3-none-any.whl → 11.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
clarifai/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "11.0.2"
1
+ __version__ = "11.0.4"
@@ -29,8 +29,17 @@ def _clear_line(n: int = 1) -> None:
29
29
 
30
30
  class ModelUploader:
31
31
 
32
- def __init__(self, folder: str):
32
+ def __init__(self, folder: str, validate_api_ids: bool = True, download_validation_only=False):
33
+ """
34
+ :param folder: The folder containing the model.py, config.yaml, requirements.txt and
35
+ checkpoints.
36
+ :param validate_api_ids: Whether to validate the user_id and app_id in the config file. TODO(zeiler):
37
+ deprecate in favor of download_validation_only.
38
+ :param download_validation_only: Whether to skip the API config validation. Set to True if
39
+ just downloading a checkpoint.
40
+ """
33
41
  self._client = None
42
+ self.download_validation_only = download_validation_only
34
43
  self.folder = self._validate_folder(folder)
35
44
  self.config = self._load_config(os.path.join(self.folder, 'config.yaml'))
36
45
  self._validate_config()
@@ -40,19 +49,23 @@ class ModelUploader:
40
49
  self.inference_compute_info = self._get_inference_compute_info()
41
50
  self.is_v3 = True # Do model build for v3
42
51
 
43
- @staticmethod
44
- def _validate_folder(folder):
52
+ def _validate_folder(self, folder):
53
+ if folder == ".":
54
+ folder = "" # will getcwd() next which ends with /
45
55
  if not folder.startswith("/"):
46
56
  folder = os.path.join(os.getcwd(), folder)
47
57
  logger.info(f"Validating folder: {folder}")
48
58
  if not os.path.exists(folder):
49
59
  raise FileNotFoundError(f"Folder {folder} not found, please provide a valid folder path")
50
60
  files = os.listdir(folder)
51
- assert "requirements.txt" in files, "requirements.txt not found in the folder"
52
61
  assert "config.yaml" in files, "config.yaml not found in the folder"
62
+ # If just downloading we don't need requirements.txt or the python code, we do need the
63
+ # 1/ folder to put 1/checkpoints into.
53
64
  assert "1" in files, "Subfolder '1' not found in the folder"
54
- subfolder_files = os.listdir(os.path.join(folder, '1'))
55
- assert 'model.py' in subfolder_files, "model.py not found in the folder"
65
+ if not self.download_validation_only:
66
+ assert "requirements.txt" in files, "requirements.txt not found in the folder"
67
+ subfolder_files = os.listdir(os.path.join(folder, '1'))
68
+ assert 'model.py' in subfolder_files, "model.py not found in the folder"
56
69
  return folder
57
70
 
58
71
  @staticmethod
@@ -62,18 +75,25 @@ class ModelUploader:
62
75
  return config
63
76
 
64
77
  def _validate_config_checkpoints(self):
65
-
78
+ """
79
+ Validates the checkpoints section in the config file.
80
+ :return: loader_type the type of loader or None if no checkpoints.
81
+ :return: repo_id location of checkpoint.
82
+ :return: hf_token token to access checkpoint.
83
+ """
66
84
  assert "type" in self.config.get("checkpoints"), "No loader type specified in the config file"
67
85
  loader_type = self.config.get("checkpoints").get("type")
68
86
  if not loader_type:
69
87
  logger.info("No loader type specified in the config file for checkpoints")
88
+ return None, None, None
70
89
  assert loader_type == "huggingface", "Only huggingface loader supported for now"
71
90
  if loader_type == "huggingface":
72
91
  assert "repo_id" in self.config.get("checkpoints"), "No repo_id specified in the config file"
73
92
  repo_id = self.config.get("checkpoints").get("repo_id")
74
93
 
75
- hf_token = self.config.get("checkpoints").get("hf_token", None)
76
- return repo_id, hf_token
94
+ # get from config.yaml otherwise fall back to HF_TOKEN env var.
95
+ hf_token = self.config.get("checkpoints").get("hf_token", os.environ.get("HF_TOKEN", None))
96
+ return loader_type, repo_id, hf_token
77
97
 
78
98
  def _check_app_exists(self):
79
99
  resp = self.client.STUB.GetApp(service_pb2.GetAppRequest(user_app_id=self.client.user_app_id))
@@ -105,21 +125,19 @@ class ModelUploader:
105
125
  sys.exit(1)
106
126
 
107
127
  def _validate_config(self):
108
- self._validate_config_model()
109
-
110
- if self.config.get("checkpoints"):
111
- self._validate_config_checkpoints()
128
+ if not self.download_validation_only:
129
+ self._validate_config_model()
112
130
 
113
- assert "inference_compute_info" in self.config, "inference_compute_info not found in the config file"
131
+ assert "inference_compute_info" in self.config, "inference_compute_info not found in the config file"
114
132
 
115
- if self.config.get("concepts"):
116
- model_type_id = self.config.get('model').get('model_type_id')
117
- assert model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE, f"Model type {model_type_id} not supported for concepts"
133
+ if self.config.get("concepts"):
134
+ model_type_id = self.config.get('model').get('model_type_id')
135
+ assert model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE, f"Model type {model_type_id} not supported for concepts"
118
136
 
119
137
  if self.config.get("checkpoints"):
120
- _, hf_token = self._validate_config_checkpoints()
138
+ loader_type, _, hf_token = self._validate_config_checkpoints()
121
139
 
122
- if hf_token:
140
+ if loader_type == "huggingface" and hf_token:
123
141
  is_valid_token = HuggingFaceLoader.validate_hftoken(hf_token)
124
142
  if not is_valid_token:
125
143
  logger.error(
@@ -303,16 +321,19 @@ class ModelUploader:
303
321
  logger.info("No checkpoints specified in the config file")
304
322
  return True
305
323
 
306
- repo_id, hf_token = self._validate_config_checkpoints()
324
+ loader_type, repo_id, hf_token = self._validate_config_checkpoints()
307
325
 
308
- loader = HuggingFaceLoader(repo_id=repo_id, token=hf_token)
309
- success = loader.download_checkpoints(self.checkpoint_path)
326
+ success = True
327
+ if loader_type == "huggingface":
328
+ loader = HuggingFaceLoader(repo_id=repo_id, token=hf_token)
329
+ success = loader.download_checkpoints(self.checkpoint_path)
310
330
 
311
- if not success:
312
- logger.error(f"Failed to download checkpoints for model {repo_id}")
313
- sys.exit(1)
314
- else:
315
- logger.info(f"Downloaded checkpoints for model {repo_id}")
331
+ if loader_type:
332
+ if not success:
333
+ logger.error(f"Failed to download checkpoints for model {repo_id}")
334
+ sys.exit(1)
335
+ else:
336
+ logger.info(f"Downloaded checkpoints for model {repo_id}")
316
337
  return success
317
338
 
318
339
  def _concepts_protos_from_concepts(self, concepts):
@@ -392,9 +413,10 @@ class ModelUploader:
392
413
  input(
393
414
  "Press Enter to download the HuggingFace model's config.json file to infer the concepts and continue..."
394
415
  )
395
- repo_id, hf_token = self._validate_config_checkpoints()
396
- loader = HuggingFaceLoader(repo_id=repo_id, token=hf_token)
397
- loader.download_config(self.checkpoint_path)
416
+ loader_type, repo_id, hf_token = self._validate_config_checkpoints()
417
+ if loader_type == "huggingface":
418
+ loader = HuggingFaceLoader(repo_id=repo_id, token=hf_token)
419
+ loader.download_config(self.checkpoint_path)
398
420
 
399
421
  else:
400
422
  logger.error(
@@ -405,10 +427,10 @@ class ModelUploader:
405
427
  model_version_proto = self.get_model_version_proto()
406
428
 
407
429
  if download_checkpoints:
408
- tar_cmd = f"tar --exclude=*~ -czvf {self.tar_file} -C {self.folder} ."
430
+ tar_cmd = f"tar --exclude=*~ --exclude={self.tar_file} -czvf {self.tar_file} -C {self.folder} ."
409
431
  else: # we don't want to send the checkpoints up even if they are in the folder.
410
432
  logger.info(f"Skipping {self.checkpoint_path} in the tar file that is uploaded.")
411
- tar_cmd = f"tar --exclude={self.checkpoint_suffix} --exclude=*~ -czvf {self.tar_file} -C {self.folder} ."
433
+ tar_cmd = f"tar --exclude={self.checkpoint_suffix} --exclude=*~ --exclude={self.tar_file} -czvf {self.tar_file} -C {self.folder} ."
412
434
  # Tar the folder
413
435
  logger.debug(tar_cmd)
414
436
  os.system(tar_cmd)
@@ -485,7 +507,7 @@ class ModelUploader:
485
507
  file_size = os.path.getsize(file_path)
486
508
  logger.info(f"Uploading model version of model {self.model_proto.id}")
487
509
  logger.info(f"Using file '{os.path.basename(file_path)}' of size: {file_size} bytes")
488
- return service_pb2.PostModelVersionsUploadRequest(
510
+ result = service_pb2.PostModelVersionsUploadRequest(
489
511
  upload_config=service_pb2.PostModelVersionsUploadConfig(
490
512
  user_app_id=self.client.user_app_id,
491
513
  model_id=self.model_proto.id,
@@ -493,6 +515,7 @@ class ModelUploader:
493
515
  total_size=file_size,
494
516
  is_v3=self.is_v3,
495
517
  ))
518
+ return result
496
519
 
497
520
  def get_model_build_logs(self):
498
521
  logs_request = service_pb2.ListLogEntriesRequest(
@@ -1,6 +1,8 @@
1
+ import fnmatch
1
2
  import importlib.util
2
3
  import json
3
4
  import os
5
+ import shutil
4
6
  import subprocess
5
7
 
6
8
  from clarifai.utils.logging import logger
@@ -39,7 +41,7 @@ class HuggingFaceLoader:
39
41
  def download_checkpoints(self, checkpoint_path: str):
40
42
  # throw error if huggingface_hub wasn't installed
41
43
  try:
42
- from huggingface_hub import list_repo_files, snapshot_download
44
+ from huggingface_hub import snapshot_download
43
45
  except ImportError:
44
46
  raise ImportError(self.HF_DOWNLOAD_TEXT)
45
47
  if os.path.exists(checkpoint_path) and self.validate_download(checkpoint_path):
@@ -53,16 +55,17 @@ class HuggingFaceLoader:
53
55
  logger.error("Model %s not found on Hugging Face" % (self.repo_id))
54
56
  return False
55
57
 
56
- ignore_patterns = None # Download everything.
57
- repo_files = list_repo_files(repo_id=self.repo_id, token=self.token)
58
- if any(f.endswith(".safetensors") for f in repo_files):
59
- logger.info(f"SafeTensors found in {self.repo_id}, downloading only .safetensors files.")
60
- ignore_patterns = ["original/*", "*.pth", "*.bin"]
58
+ self.ignore_patterns = self._get_ignore_patterns()
61
59
  snapshot_download(
62
60
  repo_id=self.repo_id,
63
61
  local_dir=checkpoint_path,
64
62
  local_dir_use_symlinks=False,
65
- ignore_patterns=ignore_patterns)
63
+ ignore_patterns=self.ignore_patterns)
64
+ # Remove the `.cache` folder if it exists
65
+ cache_path = os.path.join(checkpoint_path, ".cache")
66
+ if os.path.exists(cache_path) and os.path.isdir(cache_path):
67
+ shutil.rmtree(cache_path)
68
+
66
69
  except Exception as e:
67
70
  logger.error(f"Error downloading model checkpoints {e}")
68
71
  return False
@@ -109,11 +112,44 @@ class HuggingFaceLoader:
109
112
  from huggingface_hub import list_repo_files
110
113
  except ImportError:
111
114
  raise ImportError(self.HF_DOWNLOAD_TEXT)
112
- checkpoint_dir_files = [
113
- f for dp, dn, fn in os.walk(os.path.expanduser(checkpoint_path)) for f in fn
114
- ]
115
- return (len(checkpoint_dir_files) >= len(list_repo_files(self.repo_id))) and len(
116
- list_repo_files(self.repo_id)) > 0
115
+ # Get the list of files on the repo
116
+ repo_files = list_repo_files(self.repo_id, token=self.token)
117
+
118
+ self.ignore_patterns = self._get_ignore_patterns()
119
+ # Get the list of files on the repo that are not ignored
120
+ if getattr(self, "ignore_patterns", None):
121
+ patterns = self.ignore_patterns
122
+
123
+ def should_ignore(file_path):
124
+ return any(fnmatch.fnmatch(file_path, pattern) for pattern in patterns)
125
+
126
+ repo_files = [f for f in repo_files if not should_ignore(f)]
127
+
128
+ # Check if downloaded files match the files we expect (ignoring ignored patterns)
129
+ checkpoint_dir_files = []
130
+ for dp, dn, fn in os.walk(os.path.expanduser(checkpoint_path)):
131
+ checkpoint_dir_files.extend(
132
+ [os.path.relpath(os.path.join(dp, f), checkpoint_path) for f in fn])
133
+
134
+ # Validate by comparing file lists
135
+ return len(checkpoint_dir_files) >= len(repo_files) and not (
136
+ len(set(repo_files) - set(checkpoint_dir_files)) > 0) and len(repo_files) > 0
137
+
138
+ def _get_ignore_patterns(self):
139
+ # check if model exists on HF
140
+ try:
141
+ from huggingface_hub import list_repo_files
142
+ except ImportError:
143
+ raise ImportError(self.HF_DOWNLOAD_TEXT)
144
+
145
+ # Get the list of files on the repo that are not ignored
146
+ repo_files = list_repo_files(self.repo_id, token=self.token)
147
+ self.ignore_patterns = None
148
+ if any(f.endswith(".safetensors") for f in repo_files):
149
+ self.ignore_patterns = [
150
+ "**/original/*", "**/*.pth", "**/*.bin", "*.pth", "*.bin", "**/.cache/*"
151
+ ]
152
+ return self.ignore_patterns
117
153
 
118
154
  @staticmethod
119
155
  def validate_config(checkpoint_path: str):
clarifai/utils/logging.py CHANGED
@@ -282,6 +282,11 @@ class JsonFormatter(logging.Formatter):
282
282
  except Exception:
283
283
  self.source_host = ""
284
284
 
285
+ self.extra_blacklist_fields = []
286
+ extra_blacklist_fields = os.getenv('EXTRA_JSON_LOGGER_BLACKLIST_FIELDS', None)
287
+ if extra_blacklist_fields:
288
+ self.extra_blacklist_fields = extra_blacklist_fields.split(",")
289
+
285
290
  def _build_fields(self, defaults, fields):
286
291
  """Return provided fields including any in defaults
287
292
  """
@@ -302,6 +307,8 @@ class JsonFormatter(logging.Formatter):
302
307
  msg = record.getMessage()
303
308
  for k in FIELD_BLACKLIST:
304
309
  fields.pop(k, None)
310
+ for k in self.extra_blacklist_fields:
311
+ fields.pop(k, None)
305
312
  # Rename 'levelname' to 'level' and make the value lowercase to match Go logs
306
313
  level = fields.pop('levelname', None)
307
314
  if level:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: clarifai
3
- Version: 11.0.2
3
+ Version: 11.0.4
4
4
  Summary: Clarifai Python SDK
5
5
  Home-page: https://github.com/Clarifai/clarifai-python
6
6
  Author: Clarifai
@@ -1,4 +1,4 @@
1
- clarifai/__init__.py,sha256=RDbyFSHLm1s06WmNHjem0y67LhbGXNidjKUmieqBJ5c,23
1
+ clarifai/__init__.py,sha256=vo9MZbRrD_gywPOV0Tq9RYPDXhSDrU97AuJT6cCihkg,23
2
2
  clarifai/cli.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  clarifai/errors.py,sha256=RwzTajwds51wLD0MVlMC5kcpBnzRpreDLlazPSBZxrg,2605
4
4
  clarifai/versions.py,sha256=jctnczzfGk_S3EnVqb2FjRKfSREkNmvNEwAAa_VoKiQ,222
@@ -69,19 +69,19 @@ clarifai/runners/models/model_class.py,sha256=9JSPAr4U4K7xI0kSl-q0mHB06zknm2OR-8
69
69
  clarifai/runners/models/model_run_locally.py,sha256=OhzQbmaV8Wwgs2H0KhdDF6Z7bYSaIh4RRA0QwSiv5vY,20644
70
70
  clarifai/runners/models/model_runner.py,sha256=3vzoastQxkGRDK8T9aojDsLNBb9A3IiKm6YmbFrE9S0,6241
71
71
  clarifai/runners/models/model_servicer.py,sha256=X4715PVA5PBurRTYcwSEudg8fShGV6InAF4mmRlRcHg,2826
72
- clarifai/runners/models/model_upload.py,sha256=ggUa1OwqZg57C-Dagf6U22fSC4SHKZ_mB4xMSynCtPg,22411
72
+ clarifai/runners/models/model_upload.py,sha256=pao_genthc8pW9ENFrJjd4TI3TRsWKgnceZtgD5M8sY,23829
73
73
  clarifai/runners/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
74
  clarifai/runners/utils/const.py,sha256=eyBrj5ywuGKPF-IFipm7yjiYyLhnsKhMNZ6xF-OvykQ,1250
75
75
  clarifai/runners/utils/data_handler.py,sha256=sxy9zlAgI6ETuxCQhUgEXAn2GCsaW1GxpK6GTaMne0g,6966
76
76
  clarifai/runners/utils/data_utils.py,sha256=R1iQ82TuQ9JwxCJk8yEB1Lyb0BYVhVbWJI9YDi1zGOs,318
77
- clarifai/runners/utils/loader.py,sha256=O7L8Foc98CALA4FLUeQaRNw0d-y3K46lCJPXigsJd18,4858
77
+ clarifai/runners/utils/loader.py,sha256=Q4psyvHjGPDHA6GMFLEhSSVqPRVq4vJyYyDItKn1WwU,6100
78
78
  clarifai/runners/utils/url_fetcher.py,sha256=v_8JOWmkyFAzsBulsieKX7Nfjy1Yg7wGSZeqfEvw2cg,1640
79
79
  clarifai/schema/search.py,sha256=JjTi8ammJgZZ2OGl4K6tIA4zEJ1Fr2ASZARXavI1j5c,2448
80
80
  clarifai/urls/helper.py,sha256=tjoMGGHuWX68DUB0pk4MEjrmFsClUAQj2jmVEM_Sy78,4751
81
81
  clarifai/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
82
  clarifai/utils/cli.py,sha256=O6ukcQb05pFIgdQKWn0tL0AALAjT3U3DFRjd_GgPCvk,1918
83
83
  clarifai/utils/constants.py,sha256=MG_iHnSwNEyUZOpvsrTicNwaT4CIjmlK_Ixk_qqEX8g,142
84
- clarifai/utils/logging.py,sha256=rhutBRQJLtkNRz8IErNCgbIpvtl2fQ3D2otYcGqd3-Q,11565
84
+ clarifai/utils/logging.py,sha256=Co2J4JzltrVEf-Fbll3X1n-iVxYOGXbfLVwx-X49bMg,11855
85
85
  clarifai/utils/misc.py,sha256=ptjt1NtteDT0EhrPoyQ7mgWtvoAQ-XNncQaZvNHb0KI,2253
86
86
  clarifai/utils/model_train.py,sha256=Mndqy5GNu7kjQHjDyNVyamL0hQFLGSHcWhOuPyOvr1w,8005
87
87
  clarifai/utils/evaluation/__init__.py,sha256=PYkurUrXrGevByj7RFb6CoU1iC7fllyQSfnnlo9WnY8,69
@@ -92,9 +92,9 @@ clarifai/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
92
92
  clarifai/workflows/export.py,sha256=vICRhIreqDSShxLKjHNM2JwzKsf1B4fdXB0ciMcA70k,1945
93
93
  clarifai/workflows/utils.py,sha256=nGeB_yjVgUO9kOeKTg4OBBaBz-AwXI3m-huSVj-9W18,1924
94
94
  clarifai/workflows/validate.py,sha256=yJq03MaJqi5AK3alKGJJBR89xmmjAQ31sVufJUiOqY8,2556
95
- clarifai-11.0.2.dist-info/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
96
- clarifai-11.0.2.dist-info/METADATA,sha256=UR2QCaE64X7v_Lc3VfVyclCDTp1LUfRTScyTY4wOxrU,22456
97
- clarifai-11.0.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
98
- clarifai-11.0.2.dist-info/entry_points.txt,sha256=X9FZ4Z-i_r2Ud1RpZ9sNIFYuu_-9fogzCMCRUD9hyX0,51
99
- clarifai-11.0.2.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
100
- clarifai-11.0.2.dist-info/RECORD,,
95
+ clarifai-11.0.4.dist-info/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
96
+ clarifai-11.0.4.dist-info/METADATA,sha256=pBd3sHEtOUF5sOlRGEixOA3cdZ39PhqWxEiLW-e6sSs,22456
97
+ clarifai-11.0.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
98
+ clarifai-11.0.4.dist-info/entry_points.txt,sha256=X9FZ4Z-i_r2Ud1RpZ9sNIFYuu_-9fogzCMCRUD9hyX0,51
99
+ clarifai-11.0.4.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
100
+ clarifai-11.0.4.dist-info/RECORD,,