clarifai 10.1.1__py3-none-any.whl → 10.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import os
2
3
  import time
3
4
  import uuid
@@ -12,12 +13,13 @@ from clarifai_grpc.grpc.api.service_pb2 import MultiInputResponse
12
13
  from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
13
14
  from google.protobuf.json_format import MessageToDict
14
15
  from requests.adapters import HTTPAdapter, Retry
16
+ from tabulate import tabulate
15
17
  from tqdm import tqdm
16
18
 
17
19
  from clarifai.client.base import BaseClient
18
20
  from clarifai.client.input import Inputs
19
21
  from clarifai.client.lister import Lister
20
- from clarifai.constants.dataset import DATASET_UPLOAD_TASKS
22
+ from clarifai.constants.dataset import DATASET_UPLOAD_TASKS, MAX_RETRIES
21
23
  from clarifai.datasets.export.inputs_annotations import (DatasetExportReader,
22
24
  InputAnnotationDownloader)
23
25
  from clarifai.datasets.upload.base import ClarifaiDataLoader
@@ -27,7 +29,7 @@ from clarifai.datasets.upload.text import TextClassificationDataset
27
29
  from clarifai.datasets.upload.utils import DisplayUploadStatus
28
30
  from clarifai.errors import UserError
29
31
  from clarifai.urls.helper import ClarifaiUrlHelper
30
- from clarifai.utils.logging import add_file_handler, get_logger
32
+ from clarifai.utils.logging import add_file_handler, get_logger, process_log_files
31
33
  from clarifai.utils.misc import BackoffIterator, Chunker
32
34
 
33
35
  ClarifaiDatasetType = TypeVar('ClarifaiDatasetType', VisualClassificationDataset,
@@ -68,7 +70,8 @@ class Dataset(Lister, BaseClient):
68
70
  self.max_retires = 10
69
71
  self.batch_size = 128 # limit max protos in a req
70
72
  self.task = None # Upload dataset type
71
- self.input_object = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat, token=token)
73
+ self.input_object = Inputs(
74
+ user_id=self.user_id, app_id=self.app_id, pat=pat, token=token, base_url=base_url)
72
75
  self.logger = get_logger(logger_level="INFO", name=__name__)
73
76
  BaseClient.__init__(
74
77
  self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
@@ -192,11 +195,11 @@ class Dataset(Lister, BaseClient):
192
195
 
193
196
  return retry_annot_upload
194
197
 
195
- def _delete_failed_inputs(
196
- self,
197
- batch_input_ids: List[int],
198
- dataset_obj: ClarifaiDatasetType,
199
- upload_response: MultiInputResponse = None) -> Tuple[List[int], List[int]]:
198
+ def _delete_failed_inputs(self,
199
+ batch_input_ids: List[int],
200
+ dataset_obj: ClarifaiDatasetType,
201
+ upload_response: MultiInputResponse = None,
202
+ batch_no: Optional[int] = None) -> Tuple[List[int], List[int]]:
200
203
  """Delete failed input ids from clarifai platform dataset.
201
204
 
202
205
  Args:
@@ -231,8 +234,19 @@ class Dataset(Lister, BaseClient):
231
234
  if duplicate_input_ids:
232
235
  success_input_ids = list(set(success_input_ids.copy()) - set(duplicate_input_ids.copy()))
233
236
  failed_input_ids = list(set(failed_input_ids) - set(duplicate_input_ids))
237
+ duplicate_details = [[
238
+ input_ids[id], id, "Input has a duplicate ID.",
239
+ dataset_obj.data_generator[input_ids[id]].image_path,
240
+ dataset_obj.data_generator[input_ids[id]].labels,
241
+ dataset_obj.data_generator[input_ids[id]].metadata
242
+ ] for id in duplicate_input_ids]
243
+ duplicate_table = tabulate(
244
+ duplicate_details,
245
+ headers=["Index", "Input ID", "Status", "Image Path", "Labels", "Metadata"],
246
+ tablefmt="grid")
247
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
234
248
  self.logger.warning(
235
- f"Upload Failed for {len(duplicate_input_ids)} inputs in current batch: Duplicate input ids: {duplicate_input_ids}"
249
+ f"{timestamp}\nFailed to upload {len(duplicate_input_ids)} inputs due to duplicate IDs in current batch {batch_no}:\n{duplicate_table}\n\n"
236
250
  )
237
251
 
238
252
  #delete failed inputs
@@ -243,7 +257,11 @@ class Dataset(Lister, BaseClient):
243
257
  return [input_ids[id] for id in success_input_ids], [input_ids[id] for id in failed_input_ids]
244
258
 
245
259
  def _upload_inputs_annotations(
246
- self, batch_input_ids: List[int], dataset_obj: ClarifaiDatasetType
260
+ self,
261
+ batch_input_ids: List[int],
262
+ dataset_obj: ClarifaiDatasetType,
263
+ batch_no: Optional[int] = None,
264
+ is_retry_duplicates: bool = False,
247
265
  ) -> Tuple[List[int], List[resources_pb2.Annotation], MultiInputResponse]:
248
266
  """Uploads batch of inputs and annotations concurrently to clarifai platform dataset.
249
267
 
@@ -257,12 +275,16 @@ class Dataset(Lister, BaseClient):
257
275
  response: upload response proto
258
276
  """
259
277
  input_protos, _ = dataset_obj.get_protos(batch_input_ids)
278
+ if is_retry_duplicates:
279
+ for inp in input_protos:
280
+ inp.id = uuid.uuid4().hex
281
+
260
282
  input_job_id, _response = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
261
283
  retry_annot_protos = []
262
284
 
263
285
  self.input_object._wait_for_inputs(input_job_id)
264
286
  success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj,
265
- _response)
287
+ _response, batch_no)
266
288
 
267
289
  if self.task in ["visual_detection", "visual_segmentation"] and success_input_ids:
268
290
  _, annotation_protos = dataset_obj.get_protos(success_input_ids)
@@ -273,7 +295,7 @@ class Dataset(Lister, BaseClient):
273
295
 
274
296
  def _retry_uploads(self, failed_input_ids: List[int],
275
297
  retry_annot_protos: List[resources_pb2.Annotation],
276
- dataset_obj: ClarifaiDatasetType) -> None:
298
+ dataset_obj: ClarifaiDatasetType, batch_no: Optional[int]) -> None:
277
299
  """Retry failed uploads.
278
300
 
279
301
  Args:
@@ -281,56 +303,87 @@ class Dataset(Lister, BaseClient):
281
303
  retry_annot_protos: failed annot protos
282
304
  dataset_obj: ClarifaiDataset object
283
305
  """
306
+
307
+ for _retry in range(MAX_RETRIES):
308
+ if not failed_input_ids and not retry_annot_protos:
309
+ break
310
+ if failed_input_ids:
311
+ retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
312
+ logging.warning(
313
+ f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}\n"
314
+ )
315
+ failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
316
+ failed_input_ids, dataset_obj, batch_no)
317
+ failed_input_ids = failed_retrying_inputs
318
+ if retry_annot_protos:
319
+ chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
320
+ _ = self._concurrent_annot_upload(chunked_annotation_protos)
321
+
322
+ #Log failed inputs
284
323
  if failed_input_ids:
285
- retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
286
- #Log Retrying inputs
324
+ failed_inputs_logs = []
325
+ input_map = {input.id: input for input in retry_response.inputs}
326
+ for index in failed_retrying_inputs:
327
+ failed_id = dataset_obj.all_input_ids[index]
328
+ input_details = input_map.get(failed_id)
329
+ if input_details:
330
+ failed_input_details = [
331
+ index, failed_id, input_details.status.details,
332
+ dataset_obj.data_generator[index].image_path,
333
+ dataset_obj.data_generator[index].labels, dataset_obj.data_generator[index].metadata
334
+ ]
335
+ failed_inputs_logs.append(failed_input_details)
336
+
337
+ failed_table = tabulate(
338
+ failed_inputs_logs,
339
+ headers=["Index", "Input ID", "Status", "Image Path", "Labels", "Metadata"],
340
+ tablefmt="grid")
341
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
287
342
  self.logger.warning(
288
- f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}"
343
+ f"{timestamp}\nFailed to upload {len(failed_retrying_inputs)} inputs in current batch {batch_no}:\n{failed_table}\n\n"
289
344
  )
290
- failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
291
- failed_input_ids, dataset_obj)
292
- #Log failed inputs
293
- if failed_retrying_inputs:
294
- failed_retrying_input_ids = [
295
- dataset_obj.all_input_ids[id] for id in failed_retrying_inputs
296
- ]
297
- failed_inputs_logs = {
298
- input.id: input.status.details
299
- for input in retry_response.inputs if input.id in failed_retrying_input_ids
300
- }
301
- self.logger.warning(
302
- f"Failed to upload {len(failed_retrying_inputs)} inputs in current batch: {failed_inputs_logs}"
303
- )
304
- if retry_annot_protos:
305
- chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
306
- _ = self._concurrent_annot_upload(chunked_annotation_protos)
307
345
 
308
- def _data_upload(self, dataset_obj: ClarifaiDatasetType) -> None:
346
+ def _data_upload(self,
347
+ dataset_obj: ClarifaiDatasetType,
348
+ is_log_retry: bool = False,
349
+ log_retry_ids: List[int] = None,
350
+ **kwargs) -> None:
309
351
  """Uploads inputs and annotations to clarifai platform dataset.
310
352
 
311
353
  Args:
312
- dataset_obj: ClarifaiDataset object
354
+ dataset_obj: ClarifaiDataset object,
355
+ is_log_retry: True if the iteration is to retry uploads from logs.
356
+ **kwargs: Additional keyword arguments for retry uploading functionality..
357
+
358
+ Returns:
359
+ None
313
360
  """
314
- input_ids = list(range(len(dataset_obj)))
361
+ if is_log_retry:
362
+ input_ids = log_retry_ids
363
+ else:
364
+ input_ids = list(range(len(dataset_obj)))
365
+
315
366
  chunk_input_ids = Chunker(input_ids, self.batch_size).chunk()
316
367
  with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
317
368
  with tqdm(total=len(chunk_input_ids), desc='Uploading Dataset') as progress:
318
369
  # Submit all jobs to the executor and store the returned futures
319
370
  futures = [
320
- executor.submit(self._upload_inputs_annotations, batch_input_ids, dataset_obj)
321
- for batch_input_ids in chunk_input_ids
371
+ executor.submit(self._upload_inputs_annotations, batch_input_ids, dataset_obj,
372
+ batch_no, **kwargs)
373
+ for batch_no, batch_input_ids in enumerate(chunk_input_ids)
322
374
  ]
323
375
 
324
- for job in as_completed(futures):
376
+ for batch_no, job in enumerate(as_completed(futures)):
325
377
  retry_input_ids, retry_annot_protos, _ = job.result()
326
- self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj)
378
+ self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj, batch_no)
327
379
  progress.update()
328
380
 
329
381
  def upload_dataset(self,
330
382
  dataloader: Type[ClarifaiDataLoader],
331
383
  batch_size: int = 32,
332
384
  get_upload_status: bool = False,
333
- log_warnings: bool = False) -> None:
385
+ log_warnings: bool = False,
386
+ **kwargs) -> None:
334
387
  """Uploads a dataset to the app.
335
388
 
336
389
  Args:
@@ -338,6 +391,7 @@ class Dataset(Lister, BaseClient):
338
391
  batch_size (int): batch size for concurrent upload of inputs and annotations (max: 128)
339
392
  get_upload_status (bool): True if you want to get the upload status of the dataset
340
393
  log_warnings (bool): True if you want to save log warnings in a file
394
+ kwargs: Additional keyword arguments for retry uploading functionality..
341
395
  """
342
396
  #add file handler to log warnings
343
397
  if log_warnings:
@@ -365,11 +419,47 @@ class Dataset(Lister, BaseClient):
365
419
  if get_upload_status:
366
420
  pre_upload_stats = self.get_upload_status(pre_upload=True)
367
421
 
368
- self._data_upload(dataset_obj)
422
+ self._data_upload(dataset_obj, **kwargs)
369
423
 
370
424
  if get_upload_status:
371
425
  self.get_upload_status(dataloader=dataloader, pre_upload_stats=pre_upload_stats)
372
426
 
427
+ def retry_upload_from_logs(self,
428
+ log_file_path: str,
429
+ dataloader: Type[ClarifaiDataLoader],
430
+ retry_duplicates: bool = False,
431
+ log_warnings: bool = False,
432
+ **kwargs) -> None:
433
+ """Retries failed uploads from the log file.
434
+
435
+ Args:
436
+ log_file_path (str): path to the log file
437
+ dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
438
+ retry_duplicate (bool): True if you want to retry duplicate inputs
439
+ kwargs: Additional keyword arguments for retry uploading functionality..
440
+ """
441
+
442
+ duplicate_input_ids, failed_input_ids = process_log_files(log_file_path)
443
+ if log_warnings:
444
+ add_file_handler(self.logger, f"Dataset_Upload{str(int(datetime.now().timestamp()))}.log")
445
+
446
+ if retry_duplicates and duplicate_input_ids:
447
+ logging.warning(f"Retrying upload for {len(duplicate_input_ids)} duplicate inputs...\n")
448
+ duplicate_inputs_indexes = [input["Index"] for input in duplicate_input_ids]
449
+ self.upload_dataset(
450
+ dataloader=dataloader,
451
+ log_retry_ids=duplicate_inputs_indexes,
452
+ is_retry_duplicates=True,
453
+ is_log_retry=True,
454
+ **kwargs)
455
+
456
+ if failed_input_ids:
457
+ #failed_inputs= ([input["Input_ID"] for input in failed_input_ids])
458
+ logging.warning(f"Retrying upload for {len(failed_input_ids)} failed inputs...\n")
459
+ failed_input_indexes = [input["Index"] for input in failed_input_ids]
460
+ self.upload_dataset(
461
+ dataloader=dataloader, log_retry_ids=failed_input_indexes, is_log_retry=True, **kwargs)
462
+
373
463
  def upload_from_csv(self,
374
464
  csv_path: str,
375
465
  input_type: str = 'text',
clarifai/client/input.py CHANGED
@@ -18,6 +18,7 @@ from tqdm import tqdm
18
18
 
19
19
  from clarifai.client.base import BaseClient
20
20
  from clarifai.client.lister import Lister
21
+ from clarifai.constants.dataset import MAX_RETRIES
21
22
  from clarifai.errors import UserError
22
23
  from clarifai.utils.logging import get_logger
23
24
  from clarifai.utils.misc import BackoffIterator, Chunker
@@ -936,10 +937,14 @@ class Inputs(Lister, BaseClient):
936
937
  """Retry failed uploads.
937
938
 
938
939
  Args:
939
- failed_inputs (List[Input]): failed input prots
940
+ failed_inputs (List[Input]): failed input protos
940
941
  """
941
- if failed_inputs:
942
- self._upload_batch(failed_inputs)
942
+ for _retry in range(MAX_RETRIES):
943
+ if failed_inputs:
944
+ self.logger.info(f"Retrying upload for {len(failed_inputs)} Failed inputs..\n")
945
+ failed_inputs = self._upload_batch(failed_inputs)
946
+
947
+ self.logger.warning(f"Failed to upload {len(failed_inputs)} inputs..\n ")
943
948
 
944
949
  def _delete_failed_inputs(self, inputs: List[Input]) -> List[Input]:
945
950
  """Delete failed input ids from clarifai platform dataset.
clarifai/client/model.py CHANGED
@@ -9,6 +9,7 @@ from clarifai_grpc.grpc.api.resources_pb2 import Input
9
9
  from clarifai_grpc.grpc.api.status import status_code_pb2
10
10
  from google.protobuf.json_format import MessageToDict
11
11
  from google.protobuf.struct_pb2 import Struct
12
+ from tqdm import tqdm
12
13
 
13
14
  from clarifai.client.base import BaseClient
14
15
  from clarifai.client.input import Inputs
@@ -381,7 +382,9 @@ class Model(Lister, BaseClient):
381
382
  except KeyError:
382
383
  pass
383
384
  yield Model.from_auth_helper(
384
- model_id=self.id, **dict(self.kwargs, model_version=model_version_info))
385
+ auth=self.auth_helper,
386
+ model_id=self.id,
387
+ **dict(self.kwargs, model_version=model_version_info))
385
388
 
386
389
  def predict(self, inputs: List[Input], inference_params: Dict = {}, output_config: Dict = {}):
387
390
  """Predicts the model based on the given inputs.
@@ -757,3 +760,90 @@ class Model(Lister, BaseClient):
757
760
  metrics_by_area=metrics_by_area)
758
761
 
759
762
  return result
763
+
764
+ def export(self, export_dir: str = None) -> None:
765
+ """Export the model, stores the exported model as model.tar file
766
+
767
+ Args:
768
+ export_dir (str): The directory to save the exported model.
769
+
770
+ Example:
771
+ >>> from clarifai.client.model import Model
772
+ >>> model = Model("url")
773
+ >>> model.export('/path/to/export_model_dir')
774
+ """
775
+ assert self.model_info.model_version.id, "Model version ID is missing. Please provide a `model_version` with a valid `id` as an argument or as a URL in the following format: '{user_id}/{app_id}/models/{your_model_id}/model_version_id/{your_version_model_id}' when initializing."
776
+ try:
777
+ if not os.path.exists(export_dir):
778
+ os.makedirs(export_dir)
779
+ except OSError as e:
780
+ raise Exception(f"An error occurred while creating the directory: {e}")
781
+
782
+ def _get_export_response():
783
+ get_export_request = service_pb2.GetModelVersionExportRequest(
784
+ user_app_id=self.user_app_id,
785
+ model_id=self.id,
786
+ version_id=self.model_info.model_version.id,
787
+ )
788
+ response = self._grpc_request(self.STUB.GetModelVersionExport, get_export_request)
789
+
790
+ if response.status.code != status_code_pb2.SUCCESS and response.status.code != status_code_pb2.CONN_DOES_NOT_EXIST:
791
+ raise Exception(response.status)
792
+
793
+ return response
794
+
795
+ def _download_exported_model(
796
+ get_model_export_response: service_pb2.SingleModelVersionExportResponse,
797
+ local_filepath: str):
798
+ model_export_url = get_model_export_response.export.url
799
+ model_export_file_size = get_model_export_response.export.size
800
+
801
+ response = requests.get(model_export_url, stream=True)
802
+ response.raise_for_status()
803
+
804
+ with open(local_filepath, 'wb') as f:
805
+ progress = tqdm(
806
+ total=model_export_file_size, unit='B', unit_scale=True, desc="Exporting model")
807
+ for chunk in response.iter_content(chunk_size=8192):
808
+ f.write(chunk)
809
+ progress.update(len(chunk))
810
+ progress.close()
811
+
812
+ self.logger.info(
813
+ f"Model ID {self.id} with version {self.model_info.model_version.id} exported successfully to {export_dir}/model.tar"
814
+ )
815
+
816
+ get_export_response = _get_export_response()
817
+ if get_export_response.status.code == status_code_pb2.CONN_DOES_NOT_EXIST:
818
+ put_export_request = service_pb2.PutModelVersionExportsRequest(
819
+ user_app_id=self.user_app_id,
820
+ model_id=self.id,
821
+ version_id=self.model_info.model_version.id,
822
+ )
823
+
824
+ response = self._grpc_request(self.STUB.PutModelVersionExports, put_export_request)
825
+ if response.status.code != status_code_pb2.SUCCESS:
826
+ raise Exception(response.status)
827
+
828
+ self.logger.info(
829
+ f"Model ID {self.id} with version {self.model_info.model_version.id} export started, please wait..."
830
+ )
831
+ time.sleep(5)
832
+ start_time = time.time()
833
+ backoff_iterator = BackoffIterator()
834
+ while True:
835
+ get_export_response = _get_export_response()
836
+ if get_export_response.export.status.code == status_code_pb2.MODEL_EXPORTING and \
837
+ time.time() - start_time < 60 * 30: # 30 minutes
838
+ self.logger.info(
839
+ f"Model ID {self.id} with version {self.model_info.model_version.id} is still exporting, please wait..."
840
+ )
841
+ time.sleep(next(backoff_iterator))
842
+ elif get_export_response.export.status.code == status_code_pb2.MODEL_EXPORTED:
843
+ _download_exported_model(get_export_response, os.path.join(export_dir, "model.tar"))
844
+ elif time.time() - start_time > 60 * 30:
845
+ raise Exception(
846
+ f"""Model Export took too long. Please try again or contact support@clarifai.com
847
+ Req ID: {get_export_response.status.req_id}""")
848
+ elif get_export_response.export.status.code == status_code_pb2.MODEL_EXPORTED:
849
+ _download_exported_model(get_export_response, os.path.join(export_dir, "model.tar"))
clarifai/client/search.py CHANGED
@@ -48,7 +48,8 @@ class Search(Lister, BaseClient):
48
48
  self.data_proto = resources_pb2.Data()
49
49
  self.top_k = top_k
50
50
 
51
- self.inputs = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat, token=token)
51
+ self.inputs = Inputs(
52
+ user_id=self.user_id, app_id=self.app_id, pat=pat, token=token, base_url=base_url)
52
53
  self.rank_filter_schema = get_schema()
53
54
  BaseClient.__init__(
54
55
  self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
@@ -20,3 +20,5 @@ TASK_TO_ANNOTATION_TYPE = {
20
20
  "polygons": "polygons"
21
21
  },
22
22
  }
23
+
24
+ MAX_RETRIES = 2
@@ -11,7 +11,7 @@ from ..constants import (CLARIFAI_EXAMPLES_REPO, CLARIFAI_EXAMPLES_REPO_PATH,
11
11
  def download_examples_repo(forced_download: bool = False):
12
12
 
13
13
  def _pull():
14
- subprocess.run(f"git clone {CLARIFAI_EXAMPLES_REPO} {CLARIFAI_EXAMPLES_REPO_PATH}")
14
+ subprocess.run(f"git clone {CLARIFAI_EXAMPLES_REPO} {CLARIFAI_EXAMPLES_REPO_PATH}", shell=True)
15
15
 
16
16
  if not os.path.isdir(CLARIFAI_EXAMPLES_REPO_PATH):
17
17
  print(f"Download examples to {CLARIFAI_EXAMPLES_REPO_PATH}")
@@ -70,7 +70,7 @@ class BuildModelSubCli(BaseClarifaiCli):
70
70
  if not self.no_test:
71
71
  assert os.path.exists(
72
72
  self.test_path), FileNotFoundError(f"Could not find `test.py` in {self.path}")
73
- result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}")
73
+ result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
74
74
  assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
75
75
 
76
76
  # build
@@ -126,7 +126,7 @@ class UploadModelSubCli(BaseClarifaiCli):
126
126
  # Run test before uploading
127
127
  if not self.no_test:
128
128
  assert os.path.exists(self.test_path), FileNotFoundError(f"Not found {self.test_path}")
129
- result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}")
129
+ result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
130
130
  assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
131
131
 
132
132
  deploy(
@@ -18,4 +18,6 @@ def _read_pat():
18
18
 
19
19
  def login(pat=None):
20
20
  """ if pat provided, set pat to CLARIFAI_PAT otherwise read pat from file"""
21
- os.environ["CLARIFAI_PAT"] = pat or _read_pat()
21
+ pat = pat or _read_pat()
22
+ assert pat, Exception("PAT is not found, please run `clarifai login` to persist your PAT")
23
+ os.environ["CLARIFAI_PAT"] = pat
clarifai/utils/logging.py CHANGED
@@ -106,3 +106,33 @@ def add_file_handler(logger: logging.Logger, file_path: str, log_level: str = 'W
106
106
  file_handler = logging.FileHandler(file_path)
107
107
  file_handler.setLevel(log_level)
108
108
  logger.addHandler(file_handler)
109
+
110
+
111
+ def process_log_files(log_file_path: str,) -> tuple:
112
+ """Processes log files to get failed inputs and annotations.
113
+
114
+ Args:
115
+ log_file_path (str): path to the log file
116
+ """
117
+ import re
118
+ duplicate_input_ids = []
119
+ failed_input_ids = []
120
+ pattern = re.compile(r'\| +(\d+) +\| +(\S+) +\| +(.+?) +\| +(.+?) +\| +(.+?) +\| +(.+?) \|')
121
+ try:
122
+ with open(log_file_path, 'r') as file:
123
+ log_content = file.read()
124
+ matches = pattern.findall(log_content)
125
+ for match in matches:
126
+ index = int(match[0])
127
+ input_id = match[1]
128
+ status = match[2]
129
+ if status == "Input has a duplicate ID.":
130
+ duplicate_input_ids.append({"Index": index, "Input_ID": input_id})
131
+ else:
132
+ failed_input_ids.append({"Index": index, "Input_ID": input_id})
133
+
134
+ except Exception as e:
135
+ print(f"Error Processing log file {log_file_path}:{e}")
136
+ return [], []
137
+
138
+ return duplicate_input_ids, failed_input_ids
clarifai/versions.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import os
2
2
 
3
- CLIENT_VERSION = "10.1.1"
3
+ CLIENT_VERSION = "10.2.0"
4
4
  OS_VER = os.sys.platform
5
5
  PYTHON_VERSION = '.'.join(
6
6
  map(str, [os.sys.version_info.major, os.sys.version_info.minor, os.sys.version_info.micro]))
@@ -16,7 +16,7 @@ def _model_does_not_have_model_version_id_and_other_fields(m):
16
16
 
17
17
 
18
18
  def _model_has_other_fields(m):
19
- return any(k not in ['model_id', 'model_version_id'] for k in m.keys())
19
+ return any(k not in ['model_id', 'model_version_id', 'user_id', 'app_id'] for k in m.keys())
20
20
 
21
21
 
22
22
  def _workflow_nodes_have_valid_dependencies(nodes):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: clarifai
3
- Version: 10.1.1
3
+ Version: 10.2.0
4
4
  Summary: Clarifai Python SDK
5
5
  Home-page: https://github.com/Clarifai/clarifai-python
6
6
  Author: Clarifai
@@ -20,7 +20,7 @@ Classifier: Operating System :: OS Independent
20
20
  Requires-Python: >=3.8
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
- Requires-Dist: clarifai-grpc (~=10.1.6)
23
+ Requires-Dist: clarifai-grpc (~=10.2.1)
24
24
  Requires-Dist: numpy (>=1.22.0)
25
25
  Requires-Dist: tqdm (>=4.65.0)
26
26
  Requires-Dist: tritonclient (>=2.34.0)
@@ -29,6 +29,7 @@ Requires-Dist: PyYAML (>=6.0.1)
29
29
  Requires-Dist: schema (>=0.7.5)
30
30
  Requires-Dist: Pillow (>=9.5.0)
31
31
  Requires-Dist: inquirerpy (==0.3.4)
32
+ Requires-Dist: tabulate (>=0.9.0)
32
33
  Provides-Extra: all
33
34
  Requires-Dist: pycocotools (==2.0.6) ; extra == 'all'
34
35
 
@@ -150,7 +151,7 @@ client = User(user_id="user_id", pat="your personal access token")
150
151
 
151
152
  ## :floppy_disk: Interacting with Datasets
152
153
 
153
- Clarifai datasets help in managing datasets used for model training and evaluation. It provides functionalities like creating datasets,uploading datasets and exporting datasets as .zip files.
154
+ Clarifai datasets help in managing datasets used for model training and evaluation. It provides functionalities like creating datasets,uploading datasets, retrying failed uploads from logs and exporting datasets as .zip files.
154
155
 
155
156
  ```python
156
157
  # Note: CLARIFAI_PAT must be set as env variable.
@@ -162,7 +163,18 @@ dataset = app.create_dataset(dataset_id="demo_dataset")
162
163
  # execute data upload to Clarifai app dataset
163
164
  from clarifai.datasets.upload.laoders.coco_detection import COCODetectionDataLoader
164
165
  coco_dataloader = COCODetectionDataLoader("images_dir", "coco_annotation_filepath")
165
- dataset.upload_dataset(dataloader=coco_dataloader, get_upload_status=True)
166
+ dataset.upload_dataset(dataloader=coco_dataloader, get_upload_status=True, log_warnings =True)
167
+
168
+
169
+ #Try upload and record the failed outputs in log file.
170
+ from clarifai.datasets.upload.utils import load_module_dataloader
171
+ cifar_dataloader = load_module_dataloader('./image_classification/cifar10')
172
+ dataset.upload_dataset(dataloader=cifar_dataloader, get_upload_status=True, log_warnings =True)
173
+
174
+ #Retry upload from logs for `upload_dataset`
175
+ dataset.retry_upload_from_logs(dataloader=cifar_dataloader, log_file_path='log_file.log',
176
+ retry_duplicates=False,
177
+ log_warnings=True)
166
178
 
167
179
  #upload text from csv
168
180
  dataset.upload_from_csv(csv_path='csv_path', input_type='text', csv_type='raw', labels=True)
@@ -1,24 +1,24 @@
1
1
  clarifai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  clarifai/cli.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  clarifai/errors.py,sha256=RwzTajwds51wLD0MVlMC5kcpBnzRpreDLlazPSBZxrg,2605
4
- clarifai/versions.py,sha256=bk6R6cGyCh8H_XRfcozmi8J8jsQ_tIACnIy4a-o0gbI,186
4
+ clarifai/versions.py,sha256=i2k90NbuweI5RIEMwbwrZQUBIKcpkV97mANYDaTFaSo,186
5
5
  clarifai/client/__init__.py,sha256=xI1U0l5AZdRThvQAXCLsd9axxyFzXXJ22m8LHqVjQRU,662
6
6
  clarifai/client/app.py,sha256=_wDiHrMVhtofVHLZ2-4JHk_WoGCETPvHFe8ZQ3rRjFE,26700
7
7
  clarifai/client/base.py,sha256=4XQU_cPyo8cCGUcZarCBXra_IVdT1KZGt_5c3OtdKig,6489
8
- clarifai/client/dataset.py,sha256=hA7fmUcCPOE_Of1pYKqX_9e5pEdmTkODaZaC9adXMJ8,23820
9
- clarifai/client/input.py,sha256=GZ7JWhS79GTQOqJ8KvexqLfWCyR-ANHACzciKE-wWxI,39769
8
+ clarifai/client/dataset.py,sha256=u4OkBZLYME-B8sOAcE7RkTohtFWdFxVZ53yrNfadC5g,27841
9
+ clarifai/client/input.py,sha256=yZB9R1VMJvL4g5SdSgsbre4j4v18KUGPFvypMTFOF5k,40046
10
10
  clarifai/client/lister.py,sha256=03KGMvs5RVyYqxLsSrWhNc34I8kiF1Ph0NeyEwu7nMU,2082
11
- clarifai/client/model.py,sha256=NoCfJ9vU9NvhXBszEV1Bi0O9xkNVzjWmmP6SFi8ZG1g,32311
11
+ clarifai/client/model.py,sha256=oMudm5ACGK29dnvZbR90huH2czuECaZ3JxjXuGPJH28,36333
12
12
  clarifai/client/module.py,sha256=BunlC4Uv7TX9JaZ0Kciwy_1_Mtg2GPZV5OLLZZcGz6I,3977
13
13
  clarifai/client/runner.py,sha256=oZkydj1Lfxn6pVx4_-CLzyaneE-dHvBIGL44usW45gA,9867
14
- clarifai/client/search.py,sha256=XadJjdV1PqM288LcU6DSnKmaiuVi7kzA5Tt1q0mS_Js,10767
14
+ clarifai/client/search.py,sha256=g9VZ7WmWeaSAy6jMwH5fzZCjnaB40xFscwmUvgPTvHs,10795
15
15
  clarifai/client/user.py,sha256=QYngaFYINw-U-3FUwyrN2rFbwGyaHavuCXMGqV34pWA,10139
16
16
  clarifai/client/workflow.py,sha256=oALMJfdgTqiilfpDT3H_nepqX9mexLu-uWV0NvtxUs0,10291
17
17
  clarifai/client/auth/__init__.py,sha256=7EwR0NrozkAUwpUnCsqXvE_p0wqx_SelXlSpKShKJK0,136
18
18
  clarifai/client/auth/helper.py,sha256=3lCKo24ZIOlcSh50juJh3ZDagOo_pxEKyoPjWUokYoA,13450
19
19
  clarifai/client/auth/register.py,sha256=2CMdBsoVLoTfjyksE6j7BM2tiEc73WKYvxnwDDgNn1k,536
20
20
  clarifai/client/auth/stub.py,sha256=KIzJZ8aRB1RzXJeWHDAx19HNdBsblPPHwYLfAkgI3rY,3779
21
- clarifai/constants/dataset.py,sha256=2QlHF0NMXfAdFlOpEzkNYVZcxSL-dIxq-ZsY_LsIPBA,499
21
+ clarifai/constants/dataset.py,sha256=OXYirr0iaoN_47V6wxO0H6ptV81y8zNGapPBz9qqD8o,516
22
22
  clarifai/constants/model.py,sha256=LsMkLVkuBpfS4j4yDW9M4O7HxzRpIuSo9qU5T8Wg2Co,217
23
23
  clarifai/constants/rag.py,sha256=WcHwToUVIK9ItAhDefaSohQHCLNeR55PSjZ0BFnoZ3U,28
24
24
  clarifai/constants/search.py,sha256=_g3S-JEvuygiFfMVK3cl4Ry9erZpt8Zo4ilXL2i3DAE,52
@@ -43,16 +43,16 @@ clarifai/models/api.py,sha256=d3FQQlG0mNDLrfEvchqaVcq4Tgb_TqryNnJtwp3c7sE,10961
43
43
  clarifai/models/model_serving/README.md,sha256=Ln8hsyE38J3yiLZruKHjU_hdq9CjzzbDUAO28Xyw1dQ,4060
44
44
  clarifai/models/model_serving/__init__.py,sha256=78fiK9LvdGvpMxICmZWqSIyS6BFATjW2s5R6_GgtbPA,645
45
45
  clarifai/models/model_serving/constants.py,sha256=uoi8TqEFkdsHhSZu90HOO3R0BmPC3G0z9qA5ER-5H7w,688
46
- clarifai/models/model_serving/utils.py,sha256=MXeOHsNHiwx9qsRoX-FzBO2Tmbgo_IVwTf3EUmgdtSQ,524
46
+ clarifai/models/model_serving/utils.py,sha256=D2UZo90Afd9f7OGKXdjRqys_6N26psY018V7R-rcNO4,629
47
47
  clarifai/models/model_serving/cli/__init__.py,sha256=Nls28G-fedNw2oQZIkPQSN__TgjJXbG9RDzzuHIM0VI,575
48
- clarifai/models/model_serving/cli/_utils.py,sha256=oDd885kwX7u5vf-8dssJFyrR3lEof8x4BXt32egaoKA,1722
48
+ clarifai/models/model_serving/cli/_utils.py,sha256=CZTKKiaoO1Mg5MKQS2Qhgy4JRjnkEHqy8zY5U6b6C0w,1734
49
49
  clarifai/models/model_serving/cli/base.py,sha256=k4ARNU1koNzGAi9ach6Vpk7hpISZySiYHyKjkBLuHLg,283
50
- clarifai/models/model_serving/cli/build.py,sha256=Bfa-PuLIGcreiBr_72XKqCS_IlVJdzJudZkINmX082Y,2774
50
+ clarifai/models/model_serving/cli/build.py,sha256=-C4PBt-9xO9YsyUagz3kF4J0_PsYb6YVKFY8y-VmY5I,2786
51
51
  clarifai/models/model_serving/cli/clarifai_clis.py,sha256=sGDDj7MrlU3goWLQm4H9dCf4lPD2Ojx50_jdIoxb5QM,663
52
52
  clarifai/models/model_serving/cli/create.py,sha256=wtKcVi8XSPN-Fx0RrSUxEwH1hm5TbZ_FrCEMIS9yszM,5598
53
53
  clarifai/models/model_serving/cli/example_cli.py,sha256=tCm0J4EI0kuuSRhEiPTuraSA-bUYwtEFEHcL1eOXzRI,1039
54
54
  clarifai/models/model_serving/cli/login.py,sha256=TYRQALJZUhNvtx2VcChO0y41YXs8-yP9BrShYb9tcOM,743
55
- clarifai/models/model_serving/cli/upload.py,sha256=8wYviCTLZYjnXhGykGlm0HhjBd_x5PKp7IKiB8BeOGc,6871
55
+ clarifai/models/model_serving/cli/upload.py,sha256=ctAp_bckMLm8twO8KdilD4CnawbQ4o2FvixfBUd-91Y,6883
56
56
  clarifai/models/model_serving/docs/cli.md,sha256=AM45FZag3520ri4Terb0t7_MmLTs7gjHXAf7TYVZjZk,3942
57
57
  clarifai/models/model_serving/docs/concepts.md,sha256=ppQADibKQInf9JpfcH7wIpcMndTZ3618or5yzMhGNOE,9376
58
58
  clarifai/models/model_serving/docs/dependencies.md,sha256=apwg_IxDBzovtQYXRpWMU9pUqdf0VaS10yMVOYYXhoc,728
@@ -98,7 +98,7 @@ clarifai/runners/example_llama2.py,sha256=WMGTqv3v9t3ID1rjW9BTLMkIuvyTESL6xHcOO6
98
98
  clarifai/schema/search.py,sha256=JjTi8ammJgZZ2OGl4K6tIA4zEJ1Fr2ASZARXavI1j5c,2448
99
99
  clarifai/urls/helper.py,sha256=tjoMGGHuWX68DUB0pk4MEjrmFsClUAQj2jmVEM_Sy78,4751
100
100
  clarifai/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
101
- clarifai/utils/logging.py,sha256=F19UmdeJKwIy8Nqo8o0hegf-qJGqzqtQ5Bi0Rz2NP4Q,3582
101
+ clarifai/utils/logging.py,sha256=xJTteoUodQ7RfsbO676QgidKa5EVPbdUu89Xlwwso2s,4533
102
102
  clarifai/utils/misc.py,sha256=cC_j0eEsJ8bfnj0oRd2z-Rms1mQbAfLwrSs07hwQuCE,1420
103
103
  clarifai/utils/model_train.py,sha256=JlMJAclOQ6Nx4_30DiQrlgHbQnNedl9UKQILq_HwK7I,8001
104
104
  clarifai/utils/evaluation/__init__.py,sha256=0gmQxbzejnv1tKLj4lKcV7DHQX69irBJkWhA9oYXL1k,15813
@@ -106,10 +106,10 @@ clarifai/utils/evaluation/helpers.py,sha256=d_dcASRI_lhsHIRukAF1S-w7XazLpK9y6E_u
106
106
  clarifai/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
107
  clarifai/workflows/export.py,sha256=vICRhIreqDSShxLKjHNM2JwzKsf1B4fdXB0ciMcA70k,1945
108
108
  clarifai/workflows/utils.py,sha256=nGeB_yjVgUO9kOeKTg4OBBaBz-AwXI3m-huSVj-9W18,1924
109
- clarifai/workflows/validate.py,sha256=iCEKBTtB-57uE3LVU7D4AI9BRHxIxahk3U1Ro08HP-o,2535
110
- clarifai-10.1.1.dist-info/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
111
- clarifai-10.1.1.dist-info/METADATA,sha256=oSrsyv-IDTTXBLdKGNaIeyHVh2vxkUj5FXNNbRcFa2c,18007
112
- clarifai-10.1.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
113
- clarifai-10.1.1.dist-info/entry_points.txt,sha256=qZOr_MIPG0dBBE1zringDJS_wXNGTAA_SQ-zcbmDHOw,82
114
- clarifai-10.1.1.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
115
- clarifai-10.1.1.dist-info/RECORD,,
109
+ clarifai/workflows/validate.py,sha256=yJq03MaJqi5AK3alKGJJBR89xmmjAQ31sVufJUiOqY8,2556
110
+ clarifai-10.2.0.dist-info/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
111
+ clarifai-10.2.0.dist-info/METADATA,sha256=dxbFGbjs-EeDTbIrtt9SMibie7VgP0KNDFpfilZ_hF4,18632
112
+ clarifai-10.2.0.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
113
+ clarifai-10.2.0.dist-info/entry_points.txt,sha256=qZOr_MIPG0dBBE1zringDJS_wXNGTAA_SQ-zcbmDHOw,82
114
+ clarifai-10.2.0.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
115
+ clarifai-10.2.0.dist-info/RECORD,,