clarifai 11.0.2__py3-none-any.whl → 11.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/__init__.py +1 -1
- clarifai/runners/models/model_upload.py +56 -33
- clarifai/runners/utils/loader.py +48 -12
- clarifai/utils/logging.py +7 -0
- {clarifai-11.0.2.dist-info → clarifai-11.0.4.dist-info}/METADATA +1 -1
- {clarifai-11.0.2.dist-info → clarifai-11.0.4.dist-info}/RECORD +10 -10
- {clarifai-11.0.2.dist-info → clarifai-11.0.4.dist-info}/LICENSE +0 -0
- {clarifai-11.0.2.dist-info → clarifai-11.0.4.dist-info}/WHEEL +0 -0
- {clarifai-11.0.2.dist-info → clarifai-11.0.4.dist-info}/entry_points.txt +0 -0
- {clarifai-11.0.2.dist-info → clarifai-11.0.4.dist-info}/top_level.txt +0 -0
clarifai/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "11.0.
|
1
|
+
__version__ = "11.0.4"
|
@@ -29,8 +29,17 @@ def _clear_line(n: int = 1) -> None:
|
|
29
29
|
|
30
30
|
class ModelUploader:
|
31
31
|
|
32
|
-
def __init__(self, folder: str):
|
32
|
+
def __init__(self, folder: str, validate_api_ids: bool = True, download_validation_only=False):
|
33
|
+
"""
|
34
|
+
:param folder: The folder containing the model.py, config.yaml, requirements.txt and
|
35
|
+
checkpoints.
|
36
|
+
:param validate_api_ids: Whether to validate the user_id and app_id in the config file. TODO(zeiler):
|
37
|
+
deprecate in favor of download_validation_only.
|
38
|
+
:param download_validation_only: Whether to skip the API config validation. Set to True if
|
39
|
+
just downloading a checkpoint.
|
40
|
+
"""
|
33
41
|
self._client = None
|
42
|
+
self.download_validation_only = download_validation_only
|
34
43
|
self.folder = self._validate_folder(folder)
|
35
44
|
self.config = self._load_config(os.path.join(self.folder, 'config.yaml'))
|
36
45
|
self._validate_config()
|
@@ -40,19 +49,23 @@ class ModelUploader:
|
|
40
49
|
self.inference_compute_info = self._get_inference_compute_info()
|
41
50
|
self.is_v3 = True # Do model build for v3
|
42
51
|
|
43
|
-
|
44
|
-
|
52
|
+
def _validate_folder(self, folder):
|
53
|
+
if folder == ".":
|
54
|
+
folder = "" # will getcwd() next which ends with /
|
45
55
|
if not folder.startswith("/"):
|
46
56
|
folder = os.path.join(os.getcwd(), folder)
|
47
57
|
logger.info(f"Validating folder: {folder}")
|
48
58
|
if not os.path.exists(folder):
|
49
59
|
raise FileNotFoundError(f"Folder {folder} not found, please provide a valid folder path")
|
50
60
|
files = os.listdir(folder)
|
51
|
-
assert "requirements.txt" in files, "requirements.txt not found in the folder"
|
52
61
|
assert "config.yaml" in files, "config.yaml not found in the folder"
|
62
|
+
# If just downloading we don't need requirements.txt or the python code, we do need the
|
63
|
+
# 1/ folder to put 1/checkpoints into.
|
53
64
|
assert "1" in files, "Subfolder '1' not found in the folder"
|
54
|
-
|
55
|
-
|
65
|
+
if not self.download_validation_only:
|
66
|
+
assert "requirements.txt" in files, "requirements.txt not found in the folder"
|
67
|
+
subfolder_files = os.listdir(os.path.join(folder, '1'))
|
68
|
+
assert 'model.py' in subfolder_files, "model.py not found in the folder"
|
56
69
|
return folder
|
57
70
|
|
58
71
|
@staticmethod
|
@@ -62,18 +75,25 @@ class ModelUploader:
|
|
62
75
|
return config
|
63
76
|
|
64
77
|
def _validate_config_checkpoints(self):
|
65
|
-
|
78
|
+
"""
|
79
|
+
Validates the checkpoints section in the config file.
|
80
|
+
:return: loader_type the type of loader or None if no checkpoints.
|
81
|
+
:return: repo_id location of checkpoint.
|
82
|
+
:return: hf_token token to access checkpoint.
|
83
|
+
"""
|
66
84
|
assert "type" in self.config.get("checkpoints"), "No loader type specified in the config file"
|
67
85
|
loader_type = self.config.get("checkpoints").get("type")
|
68
86
|
if not loader_type:
|
69
87
|
logger.info("No loader type specified in the config file for checkpoints")
|
88
|
+
return None, None, None
|
70
89
|
assert loader_type == "huggingface", "Only huggingface loader supported for now"
|
71
90
|
if loader_type == "huggingface":
|
72
91
|
assert "repo_id" in self.config.get("checkpoints"), "No repo_id specified in the config file"
|
73
92
|
repo_id = self.config.get("checkpoints").get("repo_id")
|
74
93
|
|
75
|
-
|
76
|
-
|
94
|
+
# get from config.yaml otherwise fall back to HF_TOKEN env var.
|
95
|
+
hf_token = self.config.get("checkpoints").get("hf_token", os.environ.get("HF_TOKEN", None))
|
96
|
+
return loader_type, repo_id, hf_token
|
77
97
|
|
78
98
|
def _check_app_exists(self):
|
79
99
|
resp = self.client.STUB.GetApp(service_pb2.GetAppRequest(user_app_id=self.client.user_app_id))
|
@@ -105,21 +125,19 @@ class ModelUploader:
|
|
105
125
|
sys.exit(1)
|
106
126
|
|
107
127
|
def _validate_config(self):
|
108
|
-
self.
|
109
|
-
|
110
|
-
if self.config.get("checkpoints"):
|
111
|
-
self._validate_config_checkpoints()
|
128
|
+
if not self.download_validation_only:
|
129
|
+
self._validate_config_model()
|
112
130
|
|
113
|
-
|
131
|
+
assert "inference_compute_info" in self.config, "inference_compute_info not found in the config file"
|
114
132
|
|
115
|
-
|
116
|
-
|
117
|
-
|
133
|
+
if self.config.get("concepts"):
|
134
|
+
model_type_id = self.config.get('model').get('model_type_id')
|
135
|
+
assert model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE, f"Model type {model_type_id} not supported for concepts"
|
118
136
|
|
119
137
|
if self.config.get("checkpoints"):
|
120
|
-
_, hf_token = self._validate_config_checkpoints()
|
138
|
+
loader_type, _, hf_token = self._validate_config_checkpoints()
|
121
139
|
|
122
|
-
if hf_token:
|
140
|
+
if loader_type == "huggingface" and hf_token:
|
123
141
|
is_valid_token = HuggingFaceLoader.validate_hftoken(hf_token)
|
124
142
|
if not is_valid_token:
|
125
143
|
logger.error(
|
@@ -303,16 +321,19 @@ class ModelUploader:
|
|
303
321
|
logger.info("No checkpoints specified in the config file")
|
304
322
|
return True
|
305
323
|
|
306
|
-
repo_id, hf_token = self._validate_config_checkpoints()
|
324
|
+
loader_type, repo_id, hf_token = self._validate_config_checkpoints()
|
307
325
|
|
308
|
-
|
309
|
-
|
326
|
+
success = True
|
327
|
+
if loader_type == "huggingface":
|
328
|
+
loader = HuggingFaceLoader(repo_id=repo_id, token=hf_token)
|
329
|
+
success = loader.download_checkpoints(self.checkpoint_path)
|
310
330
|
|
311
|
-
if
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
331
|
+
if loader_type:
|
332
|
+
if not success:
|
333
|
+
logger.error(f"Failed to download checkpoints for model {repo_id}")
|
334
|
+
sys.exit(1)
|
335
|
+
else:
|
336
|
+
logger.info(f"Downloaded checkpoints for model {repo_id}")
|
316
337
|
return success
|
317
338
|
|
318
339
|
def _concepts_protos_from_concepts(self, concepts):
|
@@ -392,9 +413,10 @@ class ModelUploader:
|
|
392
413
|
input(
|
393
414
|
"Press Enter to download the HuggingFace model's config.json file to infer the concepts and continue..."
|
394
415
|
)
|
395
|
-
repo_id, hf_token = self._validate_config_checkpoints()
|
396
|
-
|
397
|
-
|
416
|
+
loader_type, repo_id, hf_token = self._validate_config_checkpoints()
|
417
|
+
if loader_type == "huggingface":
|
418
|
+
loader = HuggingFaceLoader(repo_id=repo_id, token=hf_token)
|
419
|
+
loader.download_config(self.checkpoint_path)
|
398
420
|
|
399
421
|
else:
|
400
422
|
logger.error(
|
@@ -405,10 +427,10 @@ class ModelUploader:
|
|
405
427
|
model_version_proto = self.get_model_version_proto()
|
406
428
|
|
407
429
|
if download_checkpoints:
|
408
|
-
tar_cmd = f"tar --exclude=*~ -czvf {self.tar_file} -C {self.folder} ."
|
430
|
+
tar_cmd = f"tar --exclude=*~ --exclude={self.tar_file} -czvf {self.tar_file} -C {self.folder} ."
|
409
431
|
else: # we don't want to send the checkpoints up even if they are in the folder.
|
410
432
|
logger.info(f"Skipping {self.checkpoint_path} in the tar file that is uploaded.")
|
411
|
-
tar_cmd = f"tar --exclude={self.checkpoint_suffix} --exclude=*~ -czvf {self.tar_file} -C {self.folder} ."
|
433
|
+
tar_cmd = f"tar --exclude={self.checkpoint_suffix} --exclude=*~ --exclude={self.tar_file} -czvf {self.tar_file} -C {self.folder} ."
|
412
434
|
# Tar the folder
|
413
435
|
logger.debug(tar_cmd)
|
414
436
|
os.system(tar_cmd)
|
@@ -485,7 +507,7 @@ class ModelUploader:
|
|
485
507
|
file_size = os.path.getsize(file_path)
|
486
508
|
logger.info(f"Uploading model version of model {self.model_proto.id}")
|
487
509
|
logger.info(f"Using file '{os.path.basename(file_path)}' of size: {file_size} bytes")
|
488
|
-
|
510
|
+
result = service_pb2.PostModelVersionsUploadRequest(
|
489
511
|
upload_config=service_pb2.PostModelVersionsUploadConfig(
|
490
512
|
user_app_id=self.client.user_app_id,
|
491
513
|
model_id=self.model_proto.id,
|
@@ -493,6 +515,7 @@ class ModelUploader:
|
|
493
515
|
total_size=file_size,
|
494
516
|
is_v3=self.is_v3,
|
495
517
|
))
|
518
|
+
return result
|
496
519
|
|
497
520
|
def get_model_build_logs(self):
|
498
521
|
logs_request = service_pb2.ListLogEntriesRequest(
|
clarifai/runners/utils/loader.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
+
import fnmatch
|
1
2
|
import importlib.util
|
2
3
|
import json
|
3
4
|
import os
|
5
|
+
import shutil
|
4
6
|
import subprocess
|
5
7
|
|
6
8
|
from clarifai.utils.logging import logger
|
@@ -39,7 +41,7 @@ class HuggingFaceLoader:
|
|
39
41
|
def download_checkpoints(self, checkpoint_path: str):
|
40
42
|
# throw error if huggingface_hub wasn't installed
|
41
43
|
try:
|
42
|
-
from huggingface_hub import
|
44
|
+
from huggingface_hub import snapshot_download
|
43
45
|
except ImportError:
|
44
46
|
raise ImportError(self.HF_DOWNLOAD_TEXT)
|
45
47
|
if os.path.exists(checkpoint_path) and self.validate_download(checkpoint_path):
|
@@ -53,16 +55,17 @@ class HuggingFaceLoader:
|
|
53
55
|
logger.error("Model %s not found on Hugging Face" % (self.repo_id))
|
54
56
|
return False
|
55
57
|
|
56
|
-
ignore_patterns =
|
57
|
-
repo_files = list_repo_files(repo_id=self.repo_id, token=self.token)
|
58
|
-
if any(f.endswith(".safetensors") for f in repo_files):
|
59
|
-
logger.info(f"SafeTensors found in {self.repo_id}, downloading only .safetensors files.")
|
60
|
-
ignore_patterns = ["original/*", "*.pth", "*.bin"]
|
58
|
+
self.ignore_patterns = self._get_ignore_patterns()
|
61
59
|
snapshot_download(
|
62
60
|
repo_id=self.repo_id,
|
63
61
|
local_dir=checkpoint_path,
|
64
62
|
local_dir_use_symlinks=False,
|
65
|
-
ignore_patterns=ignore_patterns)
|
63
|
+
ignore_patterns=self.ignore_patterns)
|
64
|
+
# Remove the `.cache` folder if it exists
|
65
|
+
cache_path = os.path.join(checkpoint_path, ".cache")
|
66
|
+
if os.path.exists(cache_path) and os.path.isdir(cache_path):
|
67
|
+
shutil.rmtree(cache_path)
|
68
|
+
|
66
69
|
except Exception as e:
|
67
70
|
logger.error(f"Error downloading model checkpoints {e}")
|
68
71
|
return False
|
@@ -109,11 +112,44 @@ class HuggingFaceLoader:
|
|
109
112
|
from huggingface_hub import list_repo_files
|
110
113
|
except ImportError:
|
111
114
|
raise ImportError(self.HF_DOWNLOAD_TEXT)
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
115
|
+
# Get the list of files on the repo
|
116
|
+
repo_files = list_repo_files(self.repo_id, token=self.token)
|
117
|
+
|
118
|
+
self.ignore_patterns = self._get_ignore_patterns()
|
119
|
+
# Get the list of files on the repo that are not ignored
|
120
|
+
if getattr(self, "ignore_patterns", None):
|
121
|
+
patterns = self.ignore_patterns
|
122
|
+
|
123
|
+
def should_ignore(file_path):
|
124
|
+
return any(fnmatch.fnmatch(file_path, pattern) for pattern in patterns)
|
125
|
+
|
126
|
+
repo_files = [f for f in repo_files if not should_ignore(f)]
|
127
|
+
|
128
|
+
# Check if downloaded files match the files we expect (ignoring ignored patterns)
|
129
|
+
checkpoint_dir_files = []
|
130
|
+
for dp, dn, fn in os.walk(os.path.expanduser(checkpoint_path)):
|
131
|
+
checkpoint_dir_files.extend(
|
132
|
+
[os.path.relpath(os.path.join(dp, f), checkpoint_path) for f in fn])
|
133
|
+
|
134
|
+
# Validate by comparing file lists
|
135
|
+
return len(checkpoint_dir_files) >= len(repo_files) and not (
|
136
|
+
len(set(repo_files) - set(checkpoint_dir_files)) > 0) and len(repo_files) > 0
|
137
|
+
|
138
|
+
def _get_ignore_patterns(self):
|
139
|
+
# check if model exists on HF
|
140
|
+
try:
|
141
|
+
from huggingface_hub import list_repo_files
|
142
|
+
except ImportError:
|
143
|
+
raise ImportError(self.HF_DOWNLOAD_TEXT)
|
144
|
+
|
145
|
+
# Get the list of files on the repo that are not ignored
|
146
|
+
repo_files = list_repo_files(self.repo_id, token=self.token)
|
147
|
+
self.ignore_patterns = None
|
148
|
+
if any(f.endswith(".safetensors") for f in repo_files):
|
149
|
+
self.ignore_patterns = [
|
150
|
+
"**/original/*", "**/*.pth", "**/*.bin", "*.pth", "*.bin", "**/.cache/*"
|
151
|
+
]
|
152
|
+
return self.ignore_patterns
|
117
153
|
|
118
154
|
@staticmethod
|
119
155
|
def validate_config(checkpoint_path: str):
|
clarifai/utils/logging.py
CHANGED
@@ -282,6 +282,11 @@ class JsonFormatter(logging.Formatter):
|
|
282
282
|
except Exception:
|
283
283
|
self.source_host = ""
|
284
284
|
|
285
|
+
self.extra_blacklist_fields = []
|
286
|
+
extra_blacklist_fields = os.getenv('EXTRA_JSON_LOGGER_BLACKLIST_FIELDS', None)
|
287
|
+
if extra_blacklist_fields:
|
288
|
+
self.extra_blacklist_fields = extra_blacklist_fields.split(",")
|
289
|
+
|
285
290
|
def _build_fields(self, defaults, fields):
|
286
291
|
"""Return provided fields including any in defaults
|
287
292
|
"""
|
@@ -302,6 +307,8 @@ class JsonFormatter(logging.Formatter):
|
|
302
307
|
msg = record.getMessage()
|
303
308
|
for k in FIELD_BLACKLIST:
|
304
309
|
fields.pop(k, None)
|
310
|
+
for k in self.extra_blacklist_fields:
|
311
|
+
fields.pop(k, None)
|
305
312
|
# Rename 'levelname' to 'level' and make the value lowercase to match Go logs
|
306
313
|
level = fields.pop('levelname', None)
|
307
314
|
if level:
|
@@ -1,4 +1,4 @@
|
|
1
|
-
clarifai/__init__.py,sha256=
|
1
|
+
clarifai/__init__.py,sha256=vo9MZbRrD_gywPOV0Tq9RYPDXhSDrU97AuJT6cCihkg,23
|
2
2
|
clarifai/cli.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
clarifai/errors.py,sha256=RwzTajwds51wLD0MVlMC5kcpBnzRpreDLlazPSBZxrg,2605
|
4
4
|
clarifai/versions.py,sha256=jctnczzfGk_S3EnVqb2FjRKfSREkNmvNEwAAa_VoKiQ,222
|
@@ -69,19 +69,19 @@ clarifai/runners/models/model_class.py,sha256=9JSPAr4U4K7xI0kSl-q0mHB06zknm2OR-8
|
|
69
69
|
clarifai/runners/models/model_run_locally.py,sha256=OhzQbmaV8Wwgs2H0KhdDF6Z7bYSaIh4RRA0QwSiv5vY,20644
|
70
70
|
clarifai/runners/models/model_runner.py,sha256=3vzoastQxkGRDK8T9aojDsLNBb9A3IiKm6YmbFrE9S0,6241
|
71
71
|
clarifai/runners/models/model_servicer.py,sha256=X4715PVA5PBurRTYcwSEudg8fShGV6InAF4mmRlRcHg,2826
|
72
|
-
clarifai/runners/models/model_upload.py,sha256=
|
72
|
+
clarifai/runners/models/model_upload.py,sha256=pao_genthc8pW9ENFrJjd4TI3TRsWKgnceZtgD5M8sY,23829
|
73
73
|
clarifai/runners/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
74
74
|
clarifai/runners/utils/const.py,sha256=eyBrj5ywuGKPF-IFipm7yjiYyLhnsKhMNZ6xF-OvykQ,1250
|
75
75
|
clarifai/runners/utils/data_handler.py,sha256=sxy9zlAgI6ETuxCQhUgEXAn2GCsaW1GxpK6GTaMne0g,6966
|
76
76
|
clarifai/runners/utils/data_utils.py,sha256=R1iQ82TuQ9JwxCJk8yEB1Lyb0BYVhVbWJI9YDi1zGOs,318
|
77
|
-
clarifai/runners/utils/loader.py,sha256=
|
77
|
+
clarifai/runners/utils/loader.py,sha256=Q4psyvHjGPDHA6GMFLEhSSVqPRVq4vJyYyDItKn1WwU,6100
|
78
78
|
clarifai/runners/utils/url_fetcher.py,sha256=v_8JOWmkyFAzsBulsieKX7Nfjy1Yg7wGSZeqfEvw2cg,1640
|
79
79
|
clarifai/schema/search.py,sha256=JjTi8ammJgZZ2OGl4K6tIA4zEJ1Fr2ASZARXavI1j5c,2448
|
80
80
|
clarifai/urls/helper.py,sha256=tjoMGGHuWX68DUB0pk4MEjrmFsClUAQj2jmVEM_Sy78,4751
|
81
81
|
clarifai/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
82
82
|
clarifai/utils/cli.py,sha256=O6ukcQb05pFIgdQKWn0tL0AALAjT3U3DFRjd_GgPCvk,1918
|
83
83
|
clarifai/utils/constants.py,sha256=MG_iHnSwNEyUZOpvsrTicNwaT4CIjmlK_Ixk_qqEX8g,142
|
84
|
-
clarifai/utils/logging.py,sha256=
|
84
|
+
clarifai/utils/logging.py,sha256=Co2J4JzltrVEf-Fbll3X1n-iVxYOGXbfLVwx-X49bMg,11855
|
85
85
|
clarifai/utils/misc.py,sha256=ptjt1NtteDT0EhrPoyQ7mgWtvoAQ-XNncQaZvNHb0KI,2253
|
86
86
|
clarifai/utils/model_train.py,sha256=Mndqy5GNu7kjQHjDyNVyamL0hQFLGSHcWhOuPyOvr1w,8005
|
87
87
|
clarifai/utils/evaluation/__init__.py,sha256=PYkurUrXrGevByj7RFb6CoU1iC7fllyQSfnnlo9WnY8,69
|
@@ -92,9 +92,9 @@ clarifai/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
|
|
92
92
|
clarifai/workflows/export.py,sha256=vICRhIreqDSShxLKjHNM2JwzKsf1B4fdXB0ciMcA70k,1945
|
93
93
|
clarifai/workflows/utils.py,sha256=nGeB_yjVgUO9kOeKTg4OBBaBz-AwXI3m-huSVj-9W18,1924
|
94
94
|
clarifai/workflows/validate.py,sha256=yJq03MaJqi5AK3alKGJJBR89xmmjAQ31sVufJUiOqY8,2556
|
95
|
-
clarifai-11.0.
|
96
|
-
clarifai-11.0.
|
97
|
-
clarifai-11.0.
|
98
|
-
clarifai-11.0.
|
99
|
-
clarifai-11.0.
|
100
|
-
clarifai-11.0.
|
95
|
+
clarifai-11.0.4.dist-info/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
|
96
|
+
clarifai-11.0.4.dist-info/METADATA,sha256=pBd3sHEtOUF5sOlRGEixOA3cdZ39PhqWxEiLW-e6sSs,22456
|
97
|
+
clarifai-11.0.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
98
|
+
clarifai-11.0.4.dist-info/entry_points.txt,sha256=X9FZ4Z-i_r2Ud1RpZ9sNIFYuu_-9fogzCMCRUD9hyX0,51
|
99
|
+
clarifai-11.0.4.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
|
100
|
+
clarifai-11.0.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|