clarifai 10.1.1__py3-none-any.whl → 10.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/client/dataset.py +131 -41
- clarifai/client/input.py +8 -3
- clarifai/client/model.py +91 -1
- clarifai/client/search.py +2 -1
- clarifai/constants/dataset.py +2 -0
- clarifai/models/model_serving/cli/_utils.py +1 -1
- clarifai/models/model_serving/cli/build.py +1 -1
- clarifai/models/model_serving/cli/upload.py +1 -1
- clarifai/models/model_serving/utils.py +3 -1
- clarifai/utils/logging.py +30 -0
- clarifai/versions.py +1 -1
- clarifai/workflows/validate.py +1 -1
- {clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/METADATA +16 -4
- {clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/RECORD +18 -18
- {clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/LICENSE +0 -0
- {clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/WHEEL +0 -0
- {clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/entry_points.txt +0 -0
- {clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/top_level.txt +0 -0
clarifai/client/dataset.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import logging
|
1
2
|
import os
|
2
3
|
import time
|
3
4
|
import uuid
|
@@ -12,12 +13,13 @@ from clarifai_grpc.grpc.api.service_pb2 import MultiInputResponse
|
|
12
13
|
from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
|
13
14
|
from google.protobuf.json_format import MessageToDict
|
14
15
|
from requests.adapters import HTTPAdapter, Retry
|
16
|
+
from tabulate import tabulate
|
15
17
|
from tqdm import tqdm
|
16
18
|
|
17
19
|
from clarifai.client.base import BaseClient
|
18
20
|
from clarifai.client.input import Inputs
|
19
21
|
from clarifai.client.lister import Lister
|
20
|
-
from clarifai.constants.dataset import DATASET_UPLOAD_TASKS
|
22
|
+
from clarifai.constants.dataset import DATASET_UPLOAD_TASKS, MAX_RETRIES
|
21
23
|
from clarifai.datasets.export.inputs_annotations import (DatasetExportReader,
|
22
24
|
InputAnnotationDownloader)
|
23
25
|
from clarifai.datasets.upload.base import ClarifaiDataLoader
|
@@ -27,7 +29,7 @@ from clarifai.datasets.upload.text import TextClassificationDataset
|
|
27
29
|
from clarifai.datasets.upload.utils import DisplayUploadStatus
|
28
30
|
from clarifai.errors import UserError
|
29
31
|
from clarifai.urls.helper import ClarifaiUrlHelper
|
30
|
-
from clarifai.utils.logging import add_file_handler, get_logger
|
32
|
+
from clarifai.utils.logging import add_file_handler, get_logger, process_log_files
|
31
33
|
from clarifai.utils.misc import BackoffIterator, Chunker
|
32
34
|
|
33
35
|
ClarifaiDatasetType = TypeVar('ClarifaiDatasetType', VisualClassificationDataset,
|
@@ -68,7 +70,8 @@ class Dataset(Lister, BaseClient):
|
|
68
70
|
self.max_retires = 10
|
69
71
|
self.batch_size = 128 # limit max protos in a req
|
70
72
|
self.task = None # Upload dataset type
|
71
|
-
self.input_object = Inputs(
|
73
|
+
self.input_object = Inputs(
|
74
|
+
user_id=self.user_id, app_id=self.app_id, pat=pat, token=token, base_url=base_url)
|
72
75
|
self.logger = get_logger(logger_level="INFO", name=__name__)
|
73
76
|
BaseClient.__init__(
|
74
77
|
self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
|
@@ -192,11 +195,11 @@ class Dataset(Lister, BaseClient):
|
|
192
195
|
|
193
196
|
return retry_annot_upload
|
194
197
|
|
195
|
-
def _delete_failed_inputs(
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
198
|
+
def _delete_failed_inputs(self,
|
199
|
+
batch_input_ids: List[int],
|
200
|
+
dataset_obj: ClarifaiDatasetType,
|
201
|
+
upload_response: MultiInputResponse = None,
|
202
|
+
batch_no: Optional[int] = None) -> Tuple[List[int], List[int]]:
|
200
203
|
"""Delete failed input ids from clarifai platform dataset.
|
201
204
|
|
202
205
|
Args:
|
@@ -231,8 +234,19 @@ class Dataset(Lister, BaseClient):
|
|
231
234
|
if duplicate_input_ids:
|
232
235
|
success_input_ids = list(set(success_input_ids.copy()) - set(duplicate_input_ids.copy()))
|
233
236
|
failed_input_ids = list(set(failed_input_ids) - set(duplicate_input_ids))
|
237
|
+
duplicate_details = [[
|
238
|
+
input_ids[id], id, "Input has a duplicate ID.",
|
239
|
+
dataset_obj.data_generator[input_ids[id]].image_path,
|
240
|
+
dataset_obj.data_generator[input_ids[id]].labels,
|
241
|
+
dataset_obj.data_generator[input_ids[id]].metadata
|
242
|
+
] for id in duplicate_input_ids]
|
243
|
+
duplicate_table = tabulate(
|
244
|
+
duplicate_details,
|
245
|
+
headers=["Index", "Input ID", "Status", "Image Path", "Labels", "Metadata"],
|
246
|
+
tablefmt="grid")
|
247
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
234
248
|
self.logger.warning(
|
235
|
-
f"
|
249
|
+
f"{timestamp}\nFailed to upload {len(duplicate_input_ids)} inputs due to duplicate IDs in current batch {batch_no}:\n{duplicate_table}\n\n"
|
236
250
|
)
|
237
251
|
|
238
252
|
#delete failed inputs
|
@@ -243,7 +257,11 @@ class Dataset(Lister, BaseClient):
|
|
243
257
|
return [input_ids[id] for id in success_input_ids], [input_ids[id] for id in failed_input_ids]
|
244
258
|
|
245
259
|
def _upload_inputs_annotations(
|
246
|
-
self,
|
260
|
+
self,
|
261
|
+
batch_input_ids: List[int],
|
262
|
+
dataset_obj: ClarifaiDatasetType,
|
263
|
+
batch_no: Optional[int] = None,
|
264
|
+
is_retry_duplicates: bool = False,
|
247
265
|
) -> Tuple[List[int], List[resources_pb2.Annotation], MultiInputResponse]:
|
248
266
|
"""Uploads batch of inputs and annotations concurrently to clarifai platform dataset.
|
249
267
|
|
@@ -257,12 +275,16 @@ class Dataset(Lister, BaseClient):
|
|
257
275
|
response: upload response proto
|
258
276
|
"""
|
259
277
|
input_protos, _ = dataset_obj.get_protos(batch_input_ids)
|
278
|
+
if is_retry_duplicates:
|
279
|
+
for inp in input_protos:
|
280
|
+
inp.id = uuid.uuid4().hex
|
281
|
+
|
260
282
|
input_job_id, _response = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
|
261
283
|
retry_annot_protos = []
|
262
284
|
|
263
285
|
self.input_object._wait_for_inputs(input_job_id)
|
264
286
|
success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj,
|
265
|
-
_response)
|
287
|
+
_response, batch_no)
|
266
288
|
|
267
289
|
if self.task in ["visual_detection", "visual_segmentation"] and success_input_ids:
|
268
290
|
_, annotation_protos = dataset_obj.get_protos(success_input_ids)
|
@@ -273,7 +295,7 @@ class Dataset(Lister, BaseClient):
|
|
273
295
|
|
274
296
|
def _retry_uploads(self, failed_input_ids: List[int],
|
275
297
|
retry_annot_protos: List[resources_pb2.Annotation],
|
276
|
-
dataset_obj: ClarifaiDatasetType) -> None:
|
298
|
+
dataset_obj: ClarifaiDatasetType, batch_no: Optional[int]) -> None:
|
277
299
|
"""Retry failed uploads.
|
278
300
|
|
279
301
|
Args:
|
@@ -281,56 +303,87 @@ class Dataset(Lister, BaseClient):
|
|
281
303
|
retry_annot_protos: failed annot protos
|
282
304
|
dataset_obj: ClarifaiDataset object
|
283
305
|
"""
|
306
|
+
|
307
|
+
for _retry in range(MAX_RETRIES):
|
308
|
+
if not failed_input_ids and not retry_annot_protos:
|
309
|
+
break
|
310
|
+
if failed_input_ids:
|
311
|
+
retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
|
312
|
+
logging.warning(
|
313
|
+
f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}\n"
|
314
|
+
)
|
315
|
+
failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
|
316
|
+
failed_input_ids, dataset_obj, batch_no)
|
317
|
+
failed_input_ids = failed_retrying_inputs
|
318
|
+
if retry_annot_protos:
|
319
|
+
chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
|
320
|
+
_ = self._concurrent_annot_upload(chunked_annotation_protos)
|
321
|
+
|
322
|
+
#Log failed inputs
|
284
323
|
if failed_input_ids:
|
285
|
-
|
286
|
-
|
324
|
+
failed_inputs_logs = []
|
325
|
+
input_map = {input.id: input for input in retry_response.inputs}
|
326
|
+
for index in failed_retrying_inputs:
|
327
|
+
failed_id = dataset_obj.all_input_ids[index]
|
328
|
+
input_details = input_map.get(failed_id)
|
329
|
+
if input_details:
|
330
|
+
failed_input_details = [
|
331
|
+
index, failed_id, input_details.status.details,
|
332
|
+
dataset_obj.data_generator[index].image_path,
|
333
|
+
dataset_obj.data_generator[index].labels, dataset_obj.data_generator[index].metadata
|
334
|
+
]
|
335
|
+
failed_inputs_logs.append(failed_input_details)
|
336
|
+
|
337
|
+
failed_table = tabulate(
|
338
|
+
failed_inputs_logs,
|
339
|
+
headers=["Index", "Input ID", "Status", "Image Path", "Labels", "Metadata"],
|
340
|
+
tablefmt="grid")
|
341
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
287
342
|
self.logger.warning(
|
288
|
-
f"
|
343
|
+
f"{timestamp}\nFailed to upload {len(failed_retrying_inputs)} inputs in current batch {batch_no}:\n{failed_table}\n\n"
|
289
344
|
)
|
290
|
-
failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
|
291
|
-
failed_input_ids, dataset_obj)
|
292
|
-
#Log failed inputs
|
293
|
-
if failed_retrying_inputs:
|
294
|
-
failed_retrying_input_ids = [
|
295
|
-
dataset_obj.all_input_ids[id] for id in failed_retrying_inputs
|
296
|
-
]
|
297
|
-
failed_inputs_logs = {
|
298
|
-
input.id: input.status.details
|
299
|
-
for input in retry_response.inputs if input.id in failed_retrying_input_ids
|
300
|
-
}
|
301
|
-
self.logger.warning(
|
302
|
-
f"Failed to upload {len(failed_retrying_inputs)} inputs in current batch: {failed_inputs_logs}"
|
303
|
-
)
|
304
|
-
if retry_annot_protos:
|
305
|
-
chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
|
306
|
-
_ = self._concurrent_annot_upload(chunked_annotation_protos)
|
307
345
|
|
308
|
-
def _data_upload(self,
|
346
|
+
def _data_upload(self,
|
347
|
+
dataset_obj: ClarifaiDatasetType,
|
348
|
+
is_log_retry: bool = False,
|
349
|
+
log_retry_ids: List[int] = None,
|
350
|
+
**kwargs) -> None:
|
309
351
|
"""Uploads inputs and annotations to clarifai platform dataset.
|
310
352
|
|
311
353
|
Args:
|
312
|
-
dataset_obj: ClarifaiDataset object
|
354
|
+
dataset_obj: ClarifaiDataset object,
|
355
|
+
is_log_retry: True if the iteration is to retry uploads from logs.
|
356
|
+
**kwargs: Additional keyword arguments for retry uploading functionality..
|
357
|
+
|
358
|
+
Returns:
|
359
|
+
None
|
313
360
|
"""
|
314
|
-
|
361
|
+
if is_log_retry:
|
362
|
+
input_ids = log_retry_ids
|
363
|
+
else:
|
364
|
+
input_ids = list(range(len(dataset_obj)))
|
365
|
+
|
315
366
|
chunk_input_ids = Chunker(input_ids, self.batch_size).chunk()
|
316
367
|
with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
|
317
368
|
with tqdm(total=len(chunk_input_ids), desc='Uploading Dataset') as progress:
|
318
369
|
# Submit all jobs to the executor and store the returned futures
|
319
370
|
futures = [
|
320
|
-
executor.submit(self._upload_inputs_annotations, batch_input_ids, dataset_obj
|
321
|
-
|
371
|
+
executor.submit(self._upload_inputs_annotations, batch_input_ids, dataset_obj,
|
372
|
+
batch_no, **kwargs)
|
373
|
+
for batch_no, batch_input_ids in enumerate(chunk_input_ids)
|
322
374
|
]
|
323
375
|
|
324
|
-
for job in as_completed(futures):
|
376
|
+
for batch_no, job in enumerate(as_completed(futures)):
|
325
377
|
retry_input_ids, retry_annot_protos, _ = job.result()
|
326
|
-
self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj)
|
378
|
+
self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj, batch_no)
|
327
379
|
progress.update()
|
328
380
|
|
329
381
|
def upload_dataset(self,
|
330
382
|
dataloader: Type[ClarifaiDataLoader],
|
331
383
|
batch_size: int = 32,
|
332
384
|
get_upload_status: bool = False,
|
333
|
-
log_warnings: bool = False
|
385
|
+
log_warnings: bool = False,
|
386
|
+
**kwargs) -> None:
|
334
387
|
"""Uploads a dataset to the app.
|
335
388
|
|
336
389
|
Args:
|
@@ -338,6 +391,7 @@ class Dataset(Lister, BaseClient):
|
|
338
391
|
batch_size (int): batch size for concurrent upload of inputs and annotations (max: 128)
|
339
392
|
get_upload_status (bool): True if you want to get the upload status of the dataset
|
340
393
|
log_warnings (bool): True if you want to save log warnings in a file
|
394
|
+
kwargs: Additional keyword arguments for retry uploading functionality..
|
341
395
|
"""
|
342
396
|
#add file handler to log warnings
|
343
397
|
if log_warnings:
|
@@ -365,11 +419,47 @@ class Dataset(Lister, BaseClient):
|
|
365
419
|
if get_upload_status:
|
366
420
|
pre_upload_stats = self.get_upload_status(pre_upload=True)
|
367
421
|
|
368
|
-
self._data_upload(dataset_obj)
|
422
|
+
self._data_upload(dataset_obj, **kwargs)
|
369
423
|
|
370
424
|
if get_upload_status:
|
371
425
|
self.get_upload_status(dataloader=dataloader, pre_upload_stats=pre_upload_stats)
|
372
426
|
|
427
|
+
def retry_upload_from_logs(self,
|
428
|
+
log_file_path: str,
|
429
|
+
dataloader: Type[ClarifaiDataLoader],
|
430
|
+
retry_duplicates: bool = False,
|
431
|
+
log_warnings: bool = False,
|
432
|
+
**kwargs) -> None:
|
433
|
+
"""Retries failed uploads from the log file.
|
434
|
+
|
435
|
+
Args:
|
436
|
+
log_file_path (str): path to the log file
|
437
|
+
dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
|
438
|
+
retry_duplicate (bool): True if you want to retry duplicate inputs
|
439
|
+
kwargs: Additional keyword arguments for retry uploading functionality..
|
440
|
+
"""
|
441
|
+
|
442
|
+
duplicate_input_ids, failed_input_ids = process_log_files(log_file_path)
|
443
|
+
if log_warnings:
|
444
|
+
add_file_handler(self.logger, f"Dataset_Upload{str(int(datetime.now().timestamp()))}.log")
|
445
|
+
|
446
|
+
if retry_duplicates and duplicate_input_ids:
|
447
|
+
logging.warning(f"Retrying upload for {len(duplicate_input_ids)} duplicate inputs...\n")
|
448
|
+
duplicate_inputs_indexes = [input["Index"] for input in duplicate_input_ids]
|
449
|
+
self.upload_dataset(
|
450
|
+
dataloader=dataloader,
|
451
|
+
log_retry_ids=duplicate_inputs_indexes,
|
452
|
+
is_retry_duplicates=True,
|
453
|
+
is_log_retry=True,
|
454
|
+
**kwargs)
|
455
|
+
|
456
|
+
if failed_input_ids:
|
457
|
+
#failed_inputs= ([input["Input_ID"] for input in failed_input_ids])
|
458
|
+
logging.warning(f"Retrying upload for {len(failed_input_ids)} failed inputs...\n")
|
459
|
+
failed_input_indexes = [input["Index"] for input in failed_input_ids]
|
460
|
+
self.upload_dataset(
|
461
|
+
dataloader=dataloader, log_retry_ids=failed_input_indexes, is_log_retry=True, **kwargs)
|
462
|
+
|
373
463
|
def upload_from_csv(self,
|
374
464
|
csv_path: str,
|
375
465
|
input_type: str = 'text',
|
clarifai/client/input.py
CHANGED
@@ -18,6 +18,7 @@ from tqdm import tqdm
|
|
18
18
|
|
19
19
|
from clarifai.client.base import BaseClient
|
20
20
|
from clarifai.client.lister import Lister
|
21
|
+
from clarifai.constants.dataset import MAX_RETRIES
|
21
22
|
from clarifai.errors import UserError
|
22
23
|
from clarifai.utils.logging import get_logger
|
23
24
|
from clarifai.utils.misc import BackoffIterator, Chunker
|
@@ -936,10 +937,14 @@ class Inputs(Lister, BaseClient):
|
|
936
937
|
"""Retry failed uploads.
|
937
938
|
|
938
939
|
Args:
|
939
|
-
failed_inputs (List[Input]): failed input
|
940
|
+
failed_inputs (List[Input]): failed input protos
|
940
941
|
"""
|
941
|
-
|
942
|
-
|
942
|
+
for _retry in range(MAX_RETRIES):
|
943
|
+
if failed_inputs:
|
944
|
+
self.logger.info(f"Retrying upload for {len(failed_inputs)} Failed inputs..\n")
|
945
|
+
failed_inputs = self._upload_batch(failed_inputs)
|
946
|
+
|
947
|
+
self.logger.warning(f"Failed to upload {len(failed_inputs)} inputs..\n ")
|
943
948
|
|
944
949
|
def _delete_failed_inputs(self, inputs: List[Input]) -> List[Input]:
|
945
950
|
"""Delete failed input ids from clarifai platform dataset.
|
clarifai/client/model.py
CHANGED
@@ -9,6 +9,7 @@ from clarifai_grpc.grpc.api.resources_pb2 import Input
|
|
9
9
|
from clarifai_grpc.grpc.api.status import status_code_pb2
|
10
10
|
from google.protobuf.json_format import MessageToDict
|
11
11
|
from google.protobuf.struct_pb2 import Struct
|
12
|
+
from tqdm import tqdm
|
12
13
|
|
13
14
|
from clarifai.client.base import BaseClient
|
14
15
|
from clarifai.client.input import Inputs
|
@@ -381,7 +382,9 @@ class Model(Lister, BaseClient):
|
|
381
382
|
except KeyError:
|
382
383
|
pass
|
383
384
|
yield Model.from_auth_helper(
|
384
|
-
|
385
|
+
auth=self.auth_helper,
|
386
|
+
model_id=self.id,
|
387
|
+
**dict(self.kwargs, model_version=model_version_info))
|
385
388
|
|
386
389
|
def predict(self, inputs: List[Input], inference_params: Dict = {}, output_config: Dict = {}):
|
387
390
|
"""Predicts the model based on the given inputs.
|
@@ -757,3 +760,90 @@ class Model(Lister, BaseClient):
|
|
757
760
|
metrics_by_area=metrics_by_area)
|
758
761
|
|
759
762
|
return result
|
763
|
+
|
764
|
+
def export(self, export_dir: str = None) -> None:
|
765
|
+
"""Export the model, stores the exported model as model.tar file
|
766
|
+
|
767
|
+
Args:
|
768
|
+
export_dir (str): The directory to save the exported model.
|
769
|
+
|
770
|
+
Example:
|
771
|
+
>>> from clarifai.client.model import Model
|
772
|
+
>>> model = Model("url")
|
773
|
+
>>> model.export('/path/to/export_model_dir')
|
774
|
+
"""
|
775
|
+
assert self.model_info.model_version.id, "Model version ID is missing. Please provide a `model_version` with a valid `id` as an argument or as a URL in the following format: '{user_id}/{app_id}/models/{your_model_id}/model_version_id/{your_version_model_id}' when initializing."
|
776
|
+
try:
|
777
|
+
if not os.path.exists(export_dir):
|
778
|
+
os.makedirs(export_dir)
|
779
|
+
except OSError as e:
|
780
|
+
raise Exception(f"An error occurred while creating the directory: {e}")
|
781
|
+
|
782
|
+
def _get_export_response():
|
783
|
+
get_export_request = service_pb2.GetModelVersionExportRequest(
|
784
|
+
user_app_id=self.user_app_id,
|
785
|
+
model_id=self.id,
|
786
|
+
version_id=self.model_info.model_version.id,
|
787
|
+
)
|
788
|
+
response = self._grpc_request(self.STUB.GetModelVersionExport, get_export_request)
|
789
|
+
|
790
|
+
if response.status.code != status_code_pb2.SUCCESS and response.status.code != status_code_pb2.CONN_DOES_NOT_EXIST:
|
791
|
+
raise Exception(response.status)
|
792
|
+
|
793
|
+
return response
|
794
|
+
|
795
|
+
def _download_exported_model(
|
796
|
+
get_model_export_response: service_pb2.SingleModelVersionExportResponse,
|
797
|
+
local_filepath: str):
|
798
|
+
model_export_url = get_model_export_response.export.url
|
799
|
+
model_export_file_size = get_model_export_response.export.size
|
800
|
+
|
801
|
+
response = requests.get(model_export_url, stream=True)
|
802
|
+
response.raise_for_status()
|
803
|
+
|
804
|
+
with open(local_filepath, 'wb') as f:
|
805
|
+
progress = tqdm(
|
806
|
+
total=model_export_file_size, unit='B', unit_scale=True, desc="Exporting model")
|
807
|
+
for chunk in response.iter_content(chunk_size=8192):
|
808
|
+
f.write(chunk)
|
809
|
+
progress.update(len(chunk))
|
810
|
+
progress.close()
|
811
|
+
|
812
|
+
self.logger.info(
|
813
|
+
f"Model ID {self.id} with version {self.model_info.model_version.id} exported successfully to {export_dir}/model.tar"
|
814
|
+
)
|
815
|
+
|
816
|
+
get_export_response = _get_export_response()
|
817
|
+
if get_export_response.status.code == status_code_pb2.CONN_DOES_NOT_EXIST:
|
818
|
+
put_export_request = service_pb2.PutModelVersionExportsRequest(
|
819
|
+
user_app_id=self.user_app_id,
|
820
|
+
model_id=self.id,
|
821
|
+
version_id=self.model_info.model_version.id,
|
822
|
+
)
|
823
|
+
|
824
|
+
response = self._grpc_request(self.STUB.PutModelVersionExports, put_export_request)
|
825
|
+
if response.status.code != status_code_pb2.SUCCESS:
|
826
|
+
raise Exception(response.status)
|
827
|
+
|
828
|
+
self.logger.info(
|
829
|
+
f"Model ID {self.id} with version {self.model_info.model_version.id} export started, please wait..."
|
830
|
+
)
|
831
|
+
time.sleep(5)
|
832
|
+
start_time = time.time()
|
833
|
+
backoff_iterator = BackoffIterator()
|
834
|
+
while True:
|
835
|
+
get_export_response = _get_export_response()
|
836
|
+
if get_export_response.export.status.code == status_code_pb2.MODEL_EXPORTING and \
|
837
|
+
time.time() - start_time < 60 * 30: # 30 minutes
|
838
|
+
self.logger.info(
|
839
|
+
f"Model ID {self.id} with version {self.model_info.model_version.id} is still exporting, please wait..."
|
840
|
+
)
|
841
|
+
time.sleep(next(backoff_iterator))
|
842
|
+
elif get_export_response.export.status.code == status_code_pb2.MODEL_EXPORTED:
|
843
|
+
_download_exported_model(get_export_response, os.path.join(export_dir, "model.tar"))
|
844
|
+
elif time.time() - start_time > 60 * 30:
|
845
|
+
raise Exception(
|
846
|
+
f"""Model Export took too long. Please try again or contact support@clarifai.com
|
847
|
+
Req ID: {get_export_response.status.req_id}""")
|
848
|
+
elif get_export_response.export.status.code == status_code_pb2.MODEL_EXPORTED:
|
849
|
+
_download_exported_model(get_export_response, os.path.join(export_dir, "model.tar"))
|
clarifai/client/search.py
CHANGED
@@ -48,7 +48,8 @@ class Search(Lister, BaseClient):
|
|
48
48
|
self.data_proto = resources_pb2.Data()
|
49
49
|
self.top_k = top_k
|
50
50
|
|
51
|
-
self.inputs = Inputs(
|
51
|
+
self.inputs = Inputs(
|
52
|
+
user_id=self.user_id, app_id=self.app_id, pat=pat, token=token, base_url=base_url)
|
52
53
|
self.rank_filter_schema = get_schema()
|
53
54
|
BaseClient.__init__(
|
54
55
|
self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
|
clarifai/constants/dataset.py
CHANGED
@@ -11,7 +11,7 @@ from ..constants import (CLARIFAI_EXAMPLES_REPO, CLARIFAI_EXAMPLES_REPO_PATH,
|
|
11
11
|
def download_examples_repo(forced_download: bool = False):
|
12
12
|
|
13
13
|
def _pull():
|
14
|
-
subprocess.run(f"git clone {CLARIFAI_EXAMPLES_REPO} {CLARIFAI_EXAMPLES_REPO_PATH}")
|
14
|
+
subprocess.run(f"git clone {CLARIFAI_EXAMPLES_REPO} {CLARIFAI_EXAMPLES_REPO_PATH}", shell=True)
|
15
15
|
|
16
16
|
if not os.path.isdir(CLARIFAI_EXAMPLES_REPO_PATH):
|
17
17
|
print(f"Download examples to {CLARIFAI_EXAMPLES_REPO_PATH}")
|
@@ -70,7 +70,7 @@ class BuildModelSubCli(BaseClarifaiCli):
|
|
70
70
|
if not self.no_test:
|
71
71
|
assert os.path.exists(
|
72
72
|
self.test_path), FileNotFoundError(f"Could not find `test.py` in {self.path}")
|
73
|
-
result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}")
|
73
|
+
result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
|
74
74
|
assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
|
75
75
|
|
76
76
|
# build
|
@@ -126,7 +126,7 @@ class UploadModelSubCli(BaseClarifaiCli):
|
|
126
126
|
# Run test before uploading
|
127
127
|
if not self.no_test:
|
128
128
|
assert os.path.exists(self.test_path), FileNotFoundError(f"Not found {self.test_path}")
|
129
|
-
result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}")
|
129
|
+
result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
|
130
130
|
assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
|
131
131
|
|
132
132
|
deploy(
|
@@ -18,4 +18,6 @@ def _read_pat():
|
|
18
18
|
|
19
19
|
def login(pat=None):
|
20
20
|
""" if pat provided, set pat to CLARIFAI_PAT otherwise read pat from file"""
|
21
|
-
|
21
|
+
pat = pat or _read_pat()
|
22
|
+
assert pat, Exception("PAT is not found, please run `clarifai login` to persist your PAT")
|
23
|
+
os.environ["CLARIFAI_PAT"] = pat
|
clarifai/utils/logging.py
CHANGED
@@ -106,3 +106,33 @@ def add_file_handler(logger: logging.Logger, file_path: str, log_level: str = 'W
|
|
106
106
|
file_handler = logging.FileHandler(file_path)
|
107
107
|
file_handler.setLevel(log_level)
|
108
108
|
logger.addHandler(file_handler)
|
109
|
+
|
110
|
+
|
111
|
+
def process_log_files(log_file_path: str,) -> tuple:
|
112
|
+
"""Processes log files to get failed inputs and annotations.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
log_file_path (str): path to the log file
|
116
|
+
"""
|
117
|
+
import re
|
118
|
+
duplicate_input_ids = []
|
119
|
+
failed_input_ids = []
|
120
|
+
pattern = re.compile(r'\| +(\d+) +\| +(\S+) +\| +(.+?) +\| +(.+?) +\| +(.+?) +\| +(.+?) \|')
|
121
|
+
try:
|
122
|
+
with open(log_file_path, 'r') as file:
|
123
|
+
log_content = file.read()
|
124
|
+
matches = pattern.findall(log_content)
|
125
|
+
for match in matches:
|
126
|
+
index = int(match[0])
|
127
|
+
input_id = match[1]
|
128
|
+
status = match[2]
|
129
|
+
if status == "Input has a duplicate ID.":
|
130
|
+
duplicate_input_ids.append({"Index": index, "Input_ID": input_id})
|
131
|
+
else:
|
132
|
+
failed_input_ids.append({"Index": index, "Input_ID": input_id})
|
133
|
+
|
134
|
+
except Exception as e:
|
135
|
+
print(f"Error Processing log file {log_file_path}:{e}")
|
136
|
+
return [], []
|
137
|
+
|
138
|
+
return duplicate_input_ids, failed_input_ids
|
clarifai/versions.py
CHANGED
clarifai/workflows/validate.py
CHANGED
@@ -16,7 +16,7 @@ def _model_does_not_have_model_version_id_and_other_fields(m):
|
|
16
16
|
|
17
17
|
|
18
18
|
def _model_has_other_fields(m):
|
19
|
-
return any(k not in ['model_id', 'model_version_id'] for k in m.keys())
|
19
|
+
return any(k not in ['model_id', 'model_version_id', 'user_id', 'app_id'] for k in m.keys())
|
20
20
|
|
21
21
|
|
22
22
|
def _workflow_nodes_have_valid_dependencies(nodes):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: clarifai
|
3
|
-
Version: 10.
|
3
|
+
Version: 10.2.0
|
4
4
|
Summary: Clarifai Python SDK
|
5
5
|
Home-page: https://github.com/Clarifai/clarifai-python
|
6
6
|
Author: Clarifai
|
@@ -20,7 +20,7 @@ Classifier: Operating System :: OS Independent
|
|
20
20
|
Requires-Python: >=3.8
|
21
21
|
Description-Content-Type: text/markdown
|
22
22
|
License-File: LICENSE
|
23
|
-
Requires-Dist: clarifai-grpc (~=10.1
|
23
|
+
Requires-Dist: clarifai-grpc (~=10.2.1)
|
24
24
|
Requires-Dist: numpy (>=1.22.0)
|
25
25
|
Requires-Dist: tqdm (>=4.65.0)
|
26
26
|
Requires-Dist: tritonclient (>=2.34.0)
|
@@ -29,6 +29,7 @@ Requires-Dist: PyYAML (>=6.0.1)
|
|
29
29
|
Requires-Dist: schema (>=0.7.5)
|
30
30
|
Requires-Dist: Pillow (>=9.5.0)
|
31
31
|
Requires-Dist: inquirerpy (==0.3.4)
|
32
|
+
Requires-Dist: tabulate (>=0.9.0)
|
32
33
|
Provides-Extra: all
|
33
34
|
Requires-Dist: pycocotools (==2.0.6) ; extra == 'all'
|
34
35
|
|
@@ -150,7 +151,7 @@ client = User(user_id="user_id", pat="your personal access token")
|
|
150
151
|
|
151
152
|
## :floppy_disk: Interacting with Datasets
|
152
153
|
|
153
|
-
Clarifai datasets help in managing datasets used for model training and evaluation. It provides functionalities like creating datasets,uploading datasets and exporting datasets as .zip files.
|
154
|
+
Clarifai datasets help in managing datasets used for model training and evaluation. It provides functionalities like creating datasets,uploading datasets, retrying failed uploads from logs and exporting datasets as .zip files.
|
154
155
|
|
155
156
|
```python
|
156
157
|
# Note: CLARIFAI_PAT must be set as env variable.
|
@@ -162,7 +163,18 @@ dataset = app.create_dataset(dataset_id="demo_dataset")
|
|
162
163
|
# execute data upload to Clarifai app dataset
|
163
164
|
from clarifai.datasets.upload.laoders.coco_detection import COCODetectionDataLoader
|
164
165
|
coco_dataloader = COCODetectionDataLoader("images_dir", "coco_annotation_filepath")
|
165
|
-
dataset.upload_dataset(dataloader=coco_dataloader, get_upload_status=True)
|
166
|
+
dataset.upload_dataset(dataloader=coco_dataloader, get_upload_status=True, log_warnings =True)
|
167
|
+
|
168
|
+
|
169
|
+
#Try upload and record the failed outputs in log file.
|
170
|
+
from clarifai.datasets.upload.utils import load_module_dataloader
|
171
|
+
cifar_dataloader = load_module_dataloader('./image_classification/cifar10')
|
172
|
+
dataset.upload_dataset(dataloader=cifar_dataloader, get_upload_status=True, log_warnings =True)
|
173
|
+
|
174
|
+
#Retry upload from logs for `upload_dataset`
|
175
|
+
dataset.retry_upload_from_logs(dataloader=cifar_dataloader, log_file_path='log_file.log',
|
176
|
+
retry_duplicates=False,
|
177
|
+
log_warnings=True)
|
166
178
|
|
167
179
|
#upload text from csv
|
168
180
|
dataset.upload_from_csv(csv_path='csv_path', input_type='text', csv_type='raw', labels=True)
|
@@ -1,24 +1,24 @@
|
|
1
1
|
clarifai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
clarifai/cli.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
clarifai/errors.py,sha256=RwzTajwds51wLD0MVlMC5kcpBnzRpreDLlazPSBZxrg,2605
|
4
|
-
clarifai/versions.py,sha256=
|
4
|
+
clarifai/versions.py,sha256=i2k90NbuweI5RIEMwbwrZQUBIKcpkV97mANYDaTFaSo,186
|
5
5
|
clarifai/client/__init__.py,sha256=xI1U0l5AZdRThvQAXCLsd9axxyFzXXJ22m8LHqVjQRU,662
|
6
6
|
clarifai/client/app.py,sha256=_wDiHrMVhtofVHLZ2-4JHk_WoGCETPvHFe8ZQ3rRjFE,26700
|
7
7
|
clarifai/client/base.py,sha256=4XQU_cPyo8cCGUcZarCBXra_IVdT1KZGt_5c3OtdKig,6489
|
8
|
-
clarifai/client/dataset.py,sha256=
|
9
|
-
clarifai/client/input.py,sha256=
|
8
|
+
clarifai/client/dataset.py,sha256=u4OkBZLYME-B8sOAcE7RkTohtFWdFxVZ53yrNfadC5g,27841
|
9
|
+
clarifai/client/input.py,sha256=yZB9R1VMJvL4g5SdSgsbre4j4v18KUGPFvypMTFOF5k,40046
|
10
10
|
clarifai/client/lister.py,sha256=03KGMvs5RVyYqxLsSrWhNc34I8kiF1Ph0NeyEwu7nMU,2082
|
11
|
-
clarifai/client/model.py,sha256=
|
11
|
+
clarifai/client/model.py,sha256=oMudm5ACGK29dnvZbR90huH2czuECaZ3JxjXuGPJH28,36333
|
12
12
|
clarifai/client/module.py,sha256=BunlC4Uv7TX9JaZ0Kciwy_1_Mtg2GPZV5OLLZZcGz6I,3977
|
13
13
|
clarifai/client/runner.py,sha256=oZkydj1Lfxn6pVx4_-CLzyaneE-dHvBIGL44usW45gA,9867
|
14
|
-
clarifai/client/search.py,sha256=
|
14
|
+
clarifai/client/search.py,sha256=g9VZ7WmWeaSAy6jMwH5fzZCjnaB40xFscwmUvgPTvHs,10795
|
15
15
|
clarifai/client/user.py,sha256=QYngaFYINw-U-3FUwyrN2rFbwGyaHavuCXMGqV34pWA,10139
|
16
16
|
clarifai/client/workflow.py,sha256=oALMJfdgTqiilfpDT3H_nepqX9mexLu-uWV0NvtxUs0,10291
|
17
17
|
clarifai/client/auth/__init__.py,sha256=7EwR0NrozkAUwpUnCsqXvE_p0wqx_SelXlSpKShKJK0,136
|
18
18
|
clarifai/client/auth/helper.py,sha256=3lCKo24ZIOlcSh50juJh3ZDagOo_pxEKyoPjWUokYoA,13450
|
19
19
|
clarifai/client/auth/register.py,sha256=2CMdBsoVLoTfjyksE6j7BM2tiEc73WKYvxnwDDgNn1k,536
|
20
20
|
clarifai/client/auth/stub.py,sha256=KIzJZ8aRB1RzXJeWHDAx19HNdBsblPPHwYLfAkgI3rY,3779
|
21
|
-
clarifai/constants/dataset.py,sha256=
|
21
|
+
clarifai/constants/dataset.py,sha256=OXYirr0iaoN_47V6wxO0H6ptV81y8zNGapPBz9qqD8o,516
|
22
22
|
clarifai/constants/model.py,sha256=LsMkLVkuBpfS4j4yDW9M4O7HxzRpIuSo9qU5T8Wg2Co,217
|
23
23
|
clarifai/constants/rag.py,sha256=WcHwToUVIK9ItAhDefaSohQHCLNeR55PSjZ0BFnoZ3U,28
|
24
24
|
clarifai/constants/search.py,sha256=_g3S-JEvuygiFfMVK3cl4Ry9erZpt8Zo4ilXL2i3DAE,52
|
@@ -43,16 +43,16 @@ clarifai/models/api.py,sha256=d3FQQlG0mNDLrfEvchqaVcq4Tgb_TqryNnJtwp3c7sE,10961
|
|
43
43
|
clarifai/models/model_serving/README.md,sha256=Ln8hsyE38J3yiLZruKHjU_hdq9CjzzbDUAO28Xyw1dQ,4060
|
44
44
|
clarifai/models/model_serving/__init__.py,sha256=78fiK9LvdGvpMxICmZWqSIyS6BFATjW2s5R6_GgtbPA,645
|
45
45
|
clarifai/models/model_serving/constants.py,sha256=uoi8TqEFkdsHhSZu90HOO3R0BmPC3G0z9qA5ER-5H7w,688
|
46
|
-
clarifai/models/model_serving/utils.py,sha256=
|
46
|
+
clarifai/models/model_serving/utils.py,sha256=D2UZo90Afd9f7OGKXdjRqys_6N26psY018V7R-rcNO4,629
|
47
47
|
clarifai/models/model_serving/cli/__init__.py,sha256=Nls28G-fedNw2oQZIkPQSN__TgjJXbG9RDzzuHIM0VI,575
|
48
|
-
clarifai/models/model_serving/cli/_utils.py,sha256=
|
48
|
+
clarifai/models/model_serving/cli/_utils.py,sha256=CZTKKiaoO1Mg5MKQS2Qhgy4JRjnkEHqy8zY5U6b6C0w,1734
|
49
49
|
clarifai/models/model_serving/cli/base.py,sha256=k4ARNU1koNzGAi9ach6Vpk7hpISZySiYHyKjkBLuHLg,283
|
50
|
-
clarifai/models/model_serving/cli/build.py,sha256
|
50
|
+
clarifai/models/model_serving/cli/build.py,sha256=-C4PBt-9xO9YsyUagz3kF4J0_PsYb6YVKFY8y-VmY5I,2786
|
51
51
|
clarifai/models/model_serving/cli/clarifai_clis.py,sha256=sGDDj7MrlU3goWLQm4H9dCf4lPD2Ojx50_jdIoxb5QM,663
|
52
52
|
clarifai/models/model_serving/cli/create.py,sha256=wtKcVi8XSPN-Fx0RrSUxEwH1hm5TbZ_FrCEMIS9yszM,5598
|
53
53
|
clarifai/models/model_serving/cli/example_cli.py,sha256=tCm0J4EI0kuuSRhEiPTuraSA-bUYwtEFEHcL1eOXzRI,1039
|
54
54
|
clarifai/models/model_serving/cli/login.py,sha256=TYRQALJZUhNvtx2VcChO0y41YXs8-yP9BrShYb9tcOM,743
|
55
|
-
clarifai/models/model_serving/cli/upload.py,sha256=
|
55
|
+
clarifai/models/model_serving/cli/upload.py,sha256=ctAp_bckMLm8twO8KdilD4CnawbQ4o2FvixfBUd-91Y,6883
|
56
56
|
clarifai/models/model_serving/docs/cli.md,sha256=AM45FZag3520ri4Terb0t7_MmLTs7gjHXAf7TYVZjZk,3942
|
57
57
|
clarifai/models/model_serving/docs/concepts.md,sha256=ppQADibKQInf9JpfcH7wIpcMndTZ3618or5yzMhGNOE,9376
|
58
58
|
clarifai/models/model_serving/docs/dependencies.md,sha256=apwg_IxDBzovtQYXRpWMU9pUqdf0VaS10yMVOYYXhoc,728
|
@@ -98,7 +98,7 @@ clarifai/runners/example_llama2.py,sha256=WMGTqv3v9t3ID1rjW9BTLMkIuvyTESL6xHcOO6
|
|
98
98
|
clarifai/schema/search.py,sha256=JjTi8ammJgZZ2OGl4K6tIA4zEJ1Fr2ASZARXavI1j5c,2448
|
99
99
|
clarifai/urls/helper.py,sha256=tjoMGGHuWX68DUB0pk4MEjrmFsClUAQj2jmVEM_Sy78,4751
|
100
100
|
clarifai/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
101
|
-
clarifai/utils/logging.py,sha256=
|
101
|
+
clarifai/utils/logging.py,sha256=xJTteoUodQ7RfsbO676QgidKa5EVPbdUu89Xlwwso2s,4533
|
102
102
|
clarifai/utils/misc.py,sha256=cC_j0eEsJ8bfnj0oRd2z-Rms1mQbAfLwrSs07hwQuCE,1420
|
103
103
|
clarifai/utils/model_train.py,sha256=JlMJAclOQ6Nx4_30DiQrlgHbQnNedl9UKQILq_HwK7I,8001
|
104
104
|
clarifai/utils/evaluation/__init__.py,sha256=0gmQxbzejnv1tKLj4lKcV7DHQX69irBJkWhA9oYXL1k,15813
|
@@ -106,10 +106,10 @@ clarifai/utils/evaluation/helpers.py,sha256=d_dcASRI_lhsHIRukAF1S-w7XazLpK9y6E_u
|
|
106
106
|
clarifai/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
107
107
|
clarifai/workflows/export.py,sha256=vICRhIreqDSShxLKjHNM2JwzKsf1B4fdXB0ciMcA70k,1945
|
108
108
|
clarifai/workflows/utils.py,sha256=nGeB_yjVgUO9kOeKTg4OBBaBz-AwXI3m-huSVj-9W18,1924
|
109
|
-
clarifai/workflows/validate.py,sha256=
|
110
|
-
clarifai-10.
|
111
|
-
clarifai-10.
|
112
|
-
clarifai-10.
|
113
|
-
clarifai-10.
|
114
|
-
clarifai-10.
|
115
|
-
clarifai-10.
|
109
|
+
clarifai/workflows/validate.py,sha256=yJq03MaJqi5AK3alKGJJBR89xmmjAQ31sVufJUiOqY8,2556
|
110
|
+
clarifai-10.2.0.dist-info/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
|
111
|
+
clarifai-10.2.0.dist-info/METADATA,sha256=dxbFGbjs-EeDTbIrtt9SMibie7VgP0KNDFpfilZ_hF4,18632
|
112
|
+
clarifai-10.2.0.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
113
|
+
clarifai-10.2.0.dist-info/entry_points.txt,sha256=qZOr_MIPG0dBBE1zringDJS_wXNGTAA_SQ-zcbmDHOw,82
|
114
|
+
clarifai-10.2.0.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
|
115
|
+
clarifai-10.2.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|