PyPI - clarifai - Versions diffs - 10.1.1__py3-none-any.whl → 10.2.0__py3-none-any.whl - Mend

clarifai 10.1.1py3-none-any.whl → 10.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

clarifai/client/dataset.py +131 -41
clarifai/client/input.py +8 -3
clarifai/client/model.py +91 -1
clarifai/client/search.py +2 -1
clarifai/constants/dataset.py +2 -0
clarifai/models/model_serving/cli/_utils.py +1 -1
clarifai/models/model_serving/cli/build.py +1 -1
clarifai/models/model_serving/cli/upload.py +1 -1
clarifai/models/model_serving/utils.py +3 -1
clarifai/utils/logging.py +30 -0
clarifai/versions.py +1 -1
clarifai/workflows/validate.py +1 -1
{clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/METADATA +16 -4
{clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/RECORD +18 -18
{clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/LICENSE +0 -0
{clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/WHEEL +0 -0
{clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/entry_points.txt +0 -0
{clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/top_level.txt +0 -0

clarifai/client/dataset.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import logging
 import os
 import time
 import uuid
@@ -12,12 +13,13 @@ from clarifai_grpc.grpc.api.service_pb2 import MultiInputResponse
 from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
 from google.protobuf.json_format import MessageToDict
 from requests.adapters import HTTPAdapter, Retry
+from tabulate import tabulate
 from tqdm import tqdm
 from clarifai.client.base import BaseClient
 from clarifai.client.input import Inputs
 from clarifai.client.lister import Lister
-from clarifai.constants.dataset import DATASET_UPLOAD_TASKS
+from clarifai.constants.dataset import DATASET_UPLOAD_TASKS, MAX_RETRIES
 from clarifai.datasets.export.inputs_annotations import (DatasetExportReader,
                                                          InputAnnotationDownloader)
 from clarifai.datasets.upload.base import ClarifaiDataLoader
@@ -27,7 +29,7 @@ from clarifai.datasets.upload.text import TextClassificationDataset
 from clarifai.datasets.upload.utils import DisplayUploadStatus
 from clarifai.errors import UserError
 from clarifai.urls.helper import ClarifaiUrlHelper
-from clarifai.utils.logging import add_file_handler, get_logger
+from clarifai.utils.logging import add_file_handler, get_logger, process_log_files
 from clarifai.utils.misc import BackoffIterator, Chunker
 ClarifaiDatasetType = TypeVar('ClarifaiDatasetType', VisualClassificationDataset,
@@ -68,7 +70,8 @@ class Dataset(Lister, BaseClient):
     self.max_retires = 10
     self.batch_size = 128  # limit max protos in a req
     self.task = None  # Upload dataset type
-    self.input_object = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat, token=token)
+    self.input_object = Inputs(
+        user_id=self.user_id, app_id=self.app_id, pat=pat, token=token, base_url=base_url)
     self.logger = get_logger(logger_level="INFO", name=__name__)
     BaseClient.__init__(
         self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
@@ -192,11 +195,11 @@ class Dataset(Lister, BaseClient):
     return retry_annot_upload
-  def _delete_failed_inputs(
-      self,
-      batch_input_ids: List[int],
-      dataset_obj: ClarifaiDatasetType,
-      upload_response: MultiInputResponse = None) -> Tuple[List[int], List[int]]:
+  def _delete_failed_inputs(self,
+                            batch_input_ids: List[int],
+                            dataset_obj: ClarifaiDatasetType,
+                            upload_response: MultiInputResponse = None,
+                            batch_no: Optional[int] = None) -> Tuple[List[int], List[int]]:
     """Delete failed input ids from clarifai platform dataset.
     Args:
@@ -231,8 +234,19 @@ class Dataset(Lister, BaseClient):
     if duplicate_input_ids:
       success_input_ids = list(set(success_input_ids.copy()) - set(duplicate_input_ids.copy()))
       failed_input_ids = list(set(failed_input_ids) - set(duplicate_input_ids))
+      duplicate_details = [[
+          input_ids[id], id, "Input has a duplicate ID.",
+          dataset_obj.data_generator[input_ids[id]].image_path,
+          dataset_obj.data_generator[input_ids[id]].labels,
+          dataset_obj.data_generator[input_ids[id]].metadata
+      ] for id in duplicate_input_ids]
+      duplicate_table = tabulate(
+          duplicate_details,
+          headers=["Index", "Input ID", "Status", "Image Path", "Labels", "Metadata"],
+          tablefmt="grid")
+      timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
       self.logger.warning(
-          f"Upload Failed for {len(duplicate_input_ids)} inputs in current batch: Duplicate input ids: {duplicate_input_ids}"
+          f"{timestamp}\nFailed to upload {len(duplicate_input_ids)} inputs due to duplicate IDs in current batch {batch_no}:\n{duplicate_table}\n\n"
       )
     #delete failed inputs
@@ -243,7 +257,11 @@ class Dataset(Lister, BaseClient):
     return [input_ids[id] for id in success_input_ids], [input_ids[id] for id in failed_input_ids]
   def _upload_inputs_annotations(
-      self, batch_input_ids: List[int], dataset_obj: ClarifaiDatasetType
+      self,
+      batch_input_ids: List[int],
+      dataset_obj: ClarifaiDatasetType,
+      batch_no: Optional[int] = None,
+      is_retry_duplicates: bool = False,
   ) -> Tuple[List[int], List[resources_pb2.Annotation], MultiInputResponse]:
     """Uploads batch of inputs and annotations concurrently to clarifai platform dataset.
@@ -257,12 +275,16 @@ class Dataset(Lister, BaseClient):
       response: upload response proto
     """
     input_protos, _ = dataset_obj.get_protos(batch_input_ids)
+    if is_retry_duplicates:
+      for inp in input_protos:
+        inp.id = uuid.uuid4().hex
     input_job_id, _response = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
     retry_annot_protos = []
     self.input_object._wait_for_inputs(input_job_id)
     success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj,
-                                                                     _response)
+                                                                     _response, batch_no)
     if self.task in ["visual_detection", "visual_segmentation"] and success_input_ids:
       _, annotation_protos = dataset_obj.get_protos(success_input_ids)
@@ -273,7 +295,7 @@ class Dataset(Lister, BaseClient):
   def _retry_uploads(self, failed_input_ids: List[int],
                      retry_annot_protos: List[resources_pb2.Annotation],
-                     dataset_obj: ClarifaiDatasetType) -> None:
+                     dataset_obj: ClarifaiDatasetType, batch_no: Optional[int]) -> None:
     """Retry failed uploads.
     Args:
@@ -281,56 +303,87 @@ class Dataset(Lister, BaseClient):
       retry_annot_protos: failed annot protos
       dataset_obj: ClarifaiDataset object
     """
+    for _retry in range(MAX_RETRIES):
+      if not failed_input_ids and not retry_annot_protos:
+        break
+      if failed_input_ids:
+        retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
+        logging.warning(
+            f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}\n"
+        )
+        failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
+            failed_input_ids, dataset_obj, batch_no)
+        failed_input_ids = failed_retrying_inputs
+      if retry_annot_protos:
+        chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
+        _ = self._concurrent_annot_upload(chunked_annotation_protos)
+    #Log failed inputs
     if failed_input_ids:
-      retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
-      #Log Retrying inputs
+      failed_inputs_logs = []
+      input_map = {input.id: input for input in retry_response.inputs}
+      for index in failed_retrying_inputs:
+        failed_id = dataset_obj.all_input_ids[index]
+        input_details = input_map.get(failed_id)
+        if input_details:
+          failed_input_details = [
+              index, failed_id, input_details.status.details,
+              dataset_obj.data_generator[index].image_path,
+              dataset_obj.data_generator[index].labels, dataset_obj.data_generator[index].metadata
+          ]
+          failed_inputs_logs.append(failed_input_details)
+      failed_table = tabulate(
+          failed_inputs_logs,
+          headers=["Index", "Input ID", "Status", "Image Path", "Labels", "Metadata"],
+          tablefmt="grid")
+      timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
       self.logger.warning(
-          f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}"
+          f"{timestamp}\nFailed to upload {len(failed_retrying_inputs)} inputs in current batch {batch_no}:\n{failed_table}\n\n"
       )
-      failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
-          failed_input_ids, dataset_obj)
-      #Log failed inputs
-      if failed_retrying_inputs:
-        failed_retrying_input_ids = [
-            dataset_obj.all_input_ids[id] for id in failed_retrying_inputs
-        ]
-        failed_inputs_logs = {
-            input.id: input.status.details
-            for input in retry_response.inputs if input.id in failed_retrying_input_ids
-        }
-        self.logger.warning(
-            f"Failed to upload {len(failed_retrying_inputs)} inputs in current batch: {failed_inputs_logs}"
-        )
-    if retry_annot_protos:
-      chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
-      _ = self._concurrent_annot_upload(chunked_annotation_protos)
-  def _data_upload(self, dataset_obj: ClarifaiDatasetType) -> None:
+  def _data_upload(self,
+                   dataset_obj: ClarifaiDatasetType,
+                   is_log_retry: bool = False,
+                   log_retry_ids: List[int] = None,
+                   **kwargs) -> None:
     """Uploads inputs and annotations to clarifai platform dataset.
     Args:
-      dataset_obj: ClarifaiDataset object
+      dataset_obj: ClarifaiDataset object,
+      is_log_retry: True if the iteration is to retry uploads from logs.
+      **kwargs: Additional keyword arguments for retry uploading functionality..
+    Returns:
+        None
     """
-    input_ids = list(range(len(dataset_obj)))
+    if is_log_retry:
+      input_ids = log_retry_ids
+    else:
+      input_ids = list(range(len(dataset_obj)))
     chunk_input_ids = Chunker(input_ids, self.batch_size).chunk()
     with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
       with tqdm(total=len(chunk_input_ids), desc='Uploading Dataset') as progress:
         # Submit all jobs to the executor and store the returned futures
         futures = [
-            executor.submit(self._upload_inputs_annotations, batch_input_ids, dataset_obj)
-            for batch_input_ids in chunk_input_ids
+            executor.submit(self._upload_inputs_annotations, batch_input_ids, dataset_obj,
+                            batch_no, **kwargs)
+            for batch_no, batch_input_ids in enumerate(chunk_input_ids)
         ]
-        for job in as_completed(futures):
+        for batch_no, job in enumerate(as_completed(futures)):
           retry_input_ids, retry_annot_protos, _ = job.result()
-          self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj)
+          self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj, batch_no)
           progress.update()
   def upload_dataset(self,
                      dataloader: Type[ClarifaiDataLoader],
                      batch_size: int = 32,
                      get_upload_status: bool = False,
-                     log_warnings: bool = False) -> None:
+                     log_warnings: bool = False,
+                     **kwargs) -> None:
     """Uploads a dataset to the app.
     Args:
@@ -338,6 +391,7 @@ class Dataset(Lister, BaseClient):
       batch_size (int): batch size for concurrent upload of inputs and annotations (max: 128)
       get_upload_status (bool): True if you want to get the upload status of the dataset
       log_warnings (bool): True if you want to save log warnings in a file
+      kwargs: Additional keyword arguments for retry uploading functionality..
     """
     #add file handler to log warnings
     if log_warnings:
@@ -365,11 +419,47 @@ class Dataset(Lister, BaseClient):
     if get_upload_status:
       pre_upload_stats = self.get_upload_status(pre_upload=True)
-    self._data_upload(dataset_obj)
+    self._data_upload(dataset_obj, **kwargs)
     if get_upload_status:
       self.get_upload_status(dataloader=dataloader, pre_upload_stats=pre_upload_stats)
+  def retry_upload_from_logs(self,
+                             log_file_path: str,
+                             dataloader: Type[ClarifaiDataLoader],
+                             retry_duplicates: bool = False,
+                             log_warnings: bool = False,
+                             **kwargs) -> None:
+    """Retries failed uploads from the log file.
+    Args:
+        log_file_path (str): path to the log file
+        dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
+        retry_duplicate (bool): True if you want to retry duplicate inputs
+        kwargs: Additional keyword arguments for retry uploading functionality..
+    """
+    duplicate_input_ids, failed_input_ids = process_log_files(log_file_path)
+    if log_warnings:
+      add_file_handler(self.logger, f"Dataset_Upload{str(int(datetime.now().timestamp()))}.log")
+    if retry_duplicates and duplicate_input_ids:
+      logging.warning(f"Retrying upload for {len(duplicate_input_ids)} duplicate inputs...\n")
+      duplicate_inputs_indexes = [input["Index"] for input in duplicate_input_ids]
+      self.upload_dataset(
+          dataloader=dataloader,
+          log_retry_ids=duplicate_inputs_indexes,
+          is_retry_duplicates=True,
+          is_log_retry=True,
+          **kwargs)
+    if failed_input_ids:
+      #failed_inputs= ([input["Input_ID"] for input in failed_input_ids])
+      logging.warning(f"Retrying upload for {len(failed_input_ids)} failed inputs...\n")
+      failed_input_indexes = [input["Index"] for input in failed_input_ids]
+      self.upload_dataset(
+          dataloader=dataloader, log_retry_ids=failed_input_indexes, is_log_retry=True, **kwargs)
   def upload_from_csv(self,
                       csv_path: str,
                       input_type: str = 'text',

clarifai/client/input.py CHANGED Viewed

@@ -18,6 +18,7 @@ from tqdm import tqdm
 from clarifai.client.base import BaseClient
 from clarifai.client.lister import Lister
+from clarifai.constants.dataset import MAX_RETRIES
 from clarifai.errors import UserError
 from clarifai.utils.logging import get_logger
 from clarifai.utils.misc import BackoffIterator, Chunker
@@ -936,10 +937,14 @@ class Inputs(Lister, BaseClient):
     """Retry failed uploads.
     Args:
-        failed_inputs (List[Input]): failed input prots
+        failed_inputs (List[Input]): failed input protos
     """
-    if failed_inputs:
-      self._upload_batch(failed_inputs)
+    for _retry in range(MAX_RETRIES):
+      if failed_inputs:
+        self.logger.info(f"Retrying upload for {len(failed_inputs)} Failed inputs..\n")
+        failed_inputs = self._upload_batch(failed_inputs)
+    self.logger.warning(f"Failed to upload {len(failed_inputs)} inputs..\n ")
   def _delete_failed_inputs(self, inputs: List[Input]) -> List[Input]:
     """Delete failed input ids from clarifai platform dataset.

clarifai/client/model.py CHANGED Viewed

@@ -9,6 +9,7 @@ from clarifai_grpc.grpc.api.resources_pb2 import Input
 from clarifai_grpc.grpc.api.status import status_code_pb2
 from google.protobuf.json_format import MessageToDict
 from google.protobuf.struct_pb2 import Struct
+from tqdm import tqdm
 from clarifai.client.base import BaseClient
 from clarifai.client.input import Inputs
@@ -381,7 +382,9 @@ class Model(Lister, BaseClient):
       except KeyError:
         pass
       yield Model.from_auth_helper(
-          model_id=self.id, **dict(self.kwargs, model_version=model_version_info))
+          auth=self.auth_helper,
+          model_id=self.id,
+          **dict(self.kwargs, model_version=model_version_info))
   def predict(self, inputs: List[Input], inference_params: Dict = {}, output_config: Dict = {}):
     """Predicts the model based on the given inputs.
@@ -757,3 +760,90 @@ class Model(Lister, BaseClient):
           metrics_by_area=metrics_by_area)
     return result
+  def export(self, export_dir: str = None) -> None:
+    """Export the model, stores the exported model as model.tar file
+    Args:
+        export_dir (str): The directory to save the exported model.
+    Example:
+        >>> from clarifai.client.model import Model
+        >>> model = Model("url")
+        >>> model.export('/path/to/export_model_dir')
+    """
+    assert self.model_info.model_version.id, "Model version ID is missing. Please provide a `model_version` with a valid `id` as an argument or as a URL in the following format: '{user_id}/{app_id}/models/{your_model_id}/model_version_id/{your_version_model_id}' when initializing."
+    try:
+      if not os.path.exists(export_dir):
+        os.makedirs(export_dir)
+    except OSError as e:
+      raise Exception(f"An error occurred while creating the directory: {e}")
+    def _get_export_response():
+      get_export_request = service_pb2.GetModelVersionExportRequest(
+          user_app_id=self.user_app_id,
+          model_id=self.id,
+          version_id=self.model_info.model_version.id,
+      )
+      response = self._grpc_request(self.STUB.GetModelVersionExport, get_export_request)
+      if response.status.code != status_code_pb2.SUCCESS and response.status.code != status_code_pb2.CONN_DOES_NOT_EXIST:
+        raise Exception(response.status)
+      return response
+    def _download_exported_model(
+        get_model_export_response: service_pb2.SingleModelVersionExportResponse,
+        local_filepath: str):
+      model_export_url = get_model_export_response.export.url
+      model_export_file_size = get_model_export_response.export.size
+      response = requests.get(model_export_url, stream=True)
+      response.raise_for_status()
+      with open(local_filepath, 'wb') as f:
+        progress = tqdm(
+            total=model_export_file_size, unit='B', unit_scale=True, desc="Exporting model")
+        for chunk in response.iter_content(chunk_size=8192):
+          f.write(chunk)
+          progress.update(len(chunk))
+        progress.close()
+      self.logger.info(
+          f"Model ID {self.id} with version {self.model_info.model_version.id} exported successfully to {export_dir}/model.tar"
+      )
+    get_export_response = _get_export_response()
+    if get_export_response.status.code == status_code_pb2.CONN_DOES_NOT_EXIST:
+      put_export_request = service_pb2.PutModelVersionExportsRequest(
+          user_app_id=self.user_app_id,
+          model_id=self.id,
+          version_id=self.model_info.model_version.id,
+      )
+      response = self._grpc_request(self.STUB.PutModelVersionExports, put_export_request)
+      if response.status.code != status_code_pb2.SUCCESS:
+        raise Exception(response.status)
+      self.logger.info(
+          f"Model ID {self.id} with version {self.model_info.model_version.id} export started, please wait..."
+      )
+      time.sleep(5)
+      start_time = time.time()
+      backoff_iterator = BackoffIterator()
+      while True:
+        get_export_response = _get_export_response()
+        if get_export_response.export.status.code == status_code_pb2.MODEL_EXPORTING and \
+          time.time() - start_time < 60 * 30: # 30 minutes
+          self.logger.info(
+              f"Model ID {self.id} with version {self.model_info.model_version.id} is still exporting, please wait..."
+          )
+          time.sleep(next(backoff_iterator))
+        elif get_export_response.export.status.code == status_code_pb2.MODEL_EXPORTED:
+          _download_exported_model(get_export_response, os.path.join(export_dir, "model.tar"))
+        elif time.time() - start_time > 60 * 30:
+          raise Exception(
+              f"""Model Export took too long. Please try again or contact support@clarifai.com
+              Req ID: {get_export_response.status.req_id}""")
+    elif get_export_response.export.status.code == status_code_pb2.MODEL_EXPORTED:
+      _download_exported_model(get_export_response, os.path.join(export_dir, "model.tar"))

clarifai/client/search.py CHANGED Viewed

@@ -48,7 +48,8 @@ class Search(Lister, BaseClient):
     self.data_proto = resources_pb2.Data()
     self.top_k = top_k
-    self.inputs = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat, token=token)
+    self.inputs = Inputs(
+        user_id=self.user_id, app_id=self.app_id, pat=pat, token=token, base_url=base_url)
     self.rank_filter_schema = get_schema()
     BaseClient.__init__(
         self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)

clarifai/constants/dataset.py CHANGED Viewed

@@ -20,3 +20,5 @@ TASK_TO_ANNOTATION_TYPE = {
         "polygons": "polygons"
     },
 }
+MAX_RETRIES = 2

clarifai/models/model_serving/cli/_utils.py CHANGED Viewed

@@ -11,7 +11,7 @@ from ..constants import (CLARIFAI_EXAMPLES_REPO, CLARIFAI_EXAMPLES_REPO_PATH,
 def download_examples_repo(forced_download: bool = False):
   def _pull():
-    subprocess.run(f"git clone {CLARIFAI_EXAMPLES_REPO} {CLARIFAI_EXAMPLES_REPO_PATH}")
+    subprocess.run(f"git clone {CLARIFAI_EXAMPLES_REPO} {CLARIFAI_EXAMPLES_REPO_PATH}", shell=True)
   if not os.path.isdir(CLARIFAI_EXAMPLES_REPO_PATH):
     print(f"Download examples to {CLARIFAI_EXAMPLES_REPO_PATH}")

clarifai/models/model_serving/cli/build.py CHANGED Viewed

@@ -70,7 +70,7 @@ class BuildModelSubCli(BaseClarifaiCli):
     if not self.no_test:
       assert os.path.exists(
           self.test_path), FileNotFoundError(f"Could not find `test.py` in {self.path}")
-      result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}")
+      result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
       assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
     # build

clarifai/models/model_serving/cli/upload.py CHANGED Viewed

@@ -126,7 +126,7 @@ class UploadModelSubCli(BaseClarifaiCli):
     # Run test before uploading
     if not self.no_test:
       assert os.path.exists(self.test_path), FileNotFoundError(f"Not found {self.test_path}")
-      result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}")
+      result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
       assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
     deploy(

clarifai/models/model_serving/utils.py CHANGED Viewed

@@ -18,4 +18,6 @@ def _read_pat():
 def login(pat=None):
   """ if pat provided, set pat to CLARIFAI_PAT otherwise read pat from file"""
-  os.environ["CLARIFAI_PAT"] = pat or _read_pat()
+  pat = pat or _read_pat()
+  assert pat, Exception("PAT is not found, please run `clarifai login` to persist your PAT")
+  os.environ["CLARIFAI_PAT"] = pat

clarifai/utils/logging.py CHANGED Viewed

@@ -106,3 +106,33 @@ def add_file_handler(logger: logging.Logger, file_path: str, log_level: str = 'W
   file_handler = logging.FileHandler(file_path)
   file_handler.setLevel(log_level)
   logger.addHandler(file_handler)
+def process_log_files(log_file_path: str,) -> tuple:
+  """Processes log files to get failed inputs and annotations.
+    Args:
+        log_file_path (str): path to the log file
+    """
+  import re
+  duplicate_input_ids = []
+  failed_input_ids = []
+  pattern = re.compile(r'\| +(\d+) +\| +(\S+) +\| +(.+?) +\| +(.+?) +\| +(.+?) +\| +(.+?) \|')
+  try:
+    with open(log_file_path, 'r') as file:
+      log_content = file.read()
+      matches = pattern.findall(log_content)
+      for match in matches:
+        index = int(match[0])
+        input_id = match[1]
+        status = match[2]
+        if status == "Input has a duplicate ID.":
+          duplicate_input_ids.append({"Index": index, "Input_ID": input_id})
+        else:
+          failed_input_ids.append({"Index": index, "Input_ID": input_id})
+  except Exception as e:
+    print(f"Error Processing log file {log_file_path}:{e}")
+    return [], []
+  return duplicate_input_ids, failed_input_ids

clarifai/versions.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
-CLIENT_VERSION = "10.1.1"
+CLIENT_VERSION = "10.2.0"
 OS_VER = os.sys.platform
 PYTHON_VERSION = '.'.join(
     map(str, [os.sys.version_info.major, os.sys.version_info.minor, os.sys.version_info.micro]))

clarifai/workflows/validate.py CHANGED Viewed

@@ -16,7 +16,7 @@ def _model_does_not_have_model_version_id_and_other_fields(m):
 def _model_has_other_fields(m):
-  return any(k not in ['model_id', 'model_version_id'] for k in m.keys())
+  return any(k not in ['model_id', 'model_version_id', 'user_id', 'app_id'] for k in m.keys())
 def _workflow_nodes_have_valid_dependencies(nodes):

{clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: clarifai
-Version: 10.1.1
+Version: 10.2.0
 Summary: Clarifai Python SDK
 Home-page: https://github.com/Clarifai/clarifai-python
 Author: Clarifai
@@ -20,7 +20,7 @@ Classifier: Operating System :: OS Independent
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: clarifai-grpc (~=10.1.6)
+Requires-Dist: clarifai-grpc (~=10.2.1)
 Requires-Dist: numpy (>=1.22.0)
 Requires-Dist: tqdm (>=4.65.0)
 Requires-Dist: tritonclient (>=2.34.0)
@@ -29,6 +29,7 @@ Requires-Dist: PyYAML (>=6.0.1)
 Requires-Dist: schema (>=0.7.5)
 Requires-Dist: Pillow (>=9.5.0)
 Requires-Dist: inquirerpy (==0.3.4)
+Requires-Dist: tabulate (>=0.9.0)
 Provides-Extra: all
 Requires-Dist: pycocotools (==2.0.6) ; extra == 'all'
@@ -150,7 +151,7 @@ client = User(user_id="user_id", pat="your personal access token")
 ## :floppy_disk: Interacting with Datasets
-Clarifai datasets help in managing datasets used for model training and evaluation. It provides functionalities like creating datasets,uploading datasets and exporting datasets as .zip files.
+Clarifai datasets help in managing datasets used for model training and evaluation. It provides functionalities like creating datasets,uploading datasets, retrying failed uploads from logs and exporting datasets as .zip files.
 ```python
 # Note: CLARIFAI_PAT must be set as env variable.
@@ -162,7 +163,18 @@ dataset = app.create_dataset(dataset_id="demo_dataset")
 # execute data upload to Clarifai app dataset
 from clarifai.datasets.upload.laoders.coco_detection import COCODetectionDataLoader
 coco_dataloader = COCODetectionDataLoader("images_dir", "coco_annotation_filepath")
-dataset.upload_dataset(dataloader=coco_dataloader, get_upload_status=True)
+dataset.upload_dataset(dataloader=coco_dataloader, get_upload_status=True, log_warnings =True)
+#Try upload and record the failed outputs in log file.
+from clarifai.datasets.upload.utils import load_module_dataloader
+cifar_dataloader = load_module_dataloader('./image_classification/cifar10')
+dataset.upload_dataset(dataloader=cifar_dataloader, get_upload_status=True, log_warnings =True)
+#Retry upload from logs for `upload_dataset`
+dataset.retry_upload_from_logs(dataloader=cifar_dataloader, log_file_path='log_file.log',
+                               retry_duplicates=False,
+                               log_warnings=True)
 #upload text from csv
 dataset.upload_from_csv(csv_path='csv_path', input_type='text', csv_type='raw', labels=True)

{clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,24 +1,24 @@
 clarifai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 clarifai/cli.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 clarifai/errors.py,sha256=RwzTajwds51wLD0MVlMC5kcpBnzRpreDLlazPSBZxrg,2605
-clarifai/versions.py,sha256=bk6R6cGyCh8H_XRfcozmi8J8jsQ_tIACnIy4a-o0gbI,186
+clarifai/versions.py,sha256=i2k90NbuweI5RIEMwbwrZQUBIKcpkV97mANYDaTFaSo,186
 clarifai/client/__init__.py,sha256=xI1U0l5AZdRThvQAXCLsd9axxyFzXXJ22m8LHqVjQRU,662
 clarifai/client/app.py,sha256=_wDiHrMVhtofVHLZ2-4JHk_WoGCETPvHFe8ZQ3rRjFE,26700
 clarifai/client/base.py,sha256=4XQU_cPyo8cCGUcZarCBXra_IVdT1KZGt_5c3OtdKig,6489
-clarifai/client/dataset.py,sha256=hA7fmUcCPOE_Of1pYKqX_9e5pEdmTkODaZaC9adXMJ8,23820
-clarifai/client/input.py,sha256=GZ7JWhS79GTQOqJ8KvexqLfWCyR-ANHACzciKE-wWxI,39769
+clarifai/client/dataset.py,sha256=u4OkBZLYME-B8sOAcE7RkTohtFWdFxVZ53yrNfadC5g,27841
+clarifai/client/input.py,sha256=yZB9R1VMJvL4g5SdSgsbre4j4v18KUGPFvypMTFOF5k,40046
 clarifai/client/lister.py,sha256=03KGMvs5RVyYqxLsSrWhNc34I8kiF1Ph0NeyEwu7nMU,2082
-clarifai/client/model.py,sha256=NoCfJ9vU9NvhXBszEV1Bi0O9xkNVzjWmmP6SFi8ZG1g,32311
+clarifai/client/model.py,sha256=oMudm5ACGK29dnvZbR90huH2czuECaZ3JxjXuGPJH28,36333
 clarifai/client/module.py,sha256=BunlC4Uv7TX9JaZ0Kciwy_1_Mtg2GPZV5OLLZZcGz6I,3977
 clarifai/client/runner.py,sha256=oZkydj1Lfxn6pVx4_-CLzyaneE-dHvBIGL44usW45gA,9867
-clarifai/client/search.py,sha256=XadJjdV1PqM288LcU6DSnKmaiuVi7kzA5Tt1q0mS_Js,10767
+clarifai/client/search.py,sha256=g9VZ7WmWeaSAy6jMwH5fzZCjnaB40xFscwmUvgPTvHs,10795
 clarifai/client/user.py,sha256=QYngaFYINw-U-3FUwyrN2rFbwGyaHavuCXMGqV34pWA,10139
 clarifai/client/workflow.py,sha256=oALMJfdgTqiilfpDT3H_nepqX9mexLu-uWV0NvtxUs0,10291
 clarifai/client/auth/__init__.py,sha256=7EwR0NrozkAUwpUnCsqXvE_p0wqx_SelXlSpKShKJK0,136
 clarifai/client/auth/helper.py,sha256=3lCKo24ZIOlcSh50juJh3ZDagOo_pxEKyoPjWUokYoA,13450
 clarifai/client/auth/register.py,sha256=2CMdBsoVLoTfjyksE6j7BM2tiEc73WKYvxnwDDgNn1k,536
 clarifai/client/auth/stub.py,sha256=KIzJZ8aRB1RzXJeWHDAx19HNdBsblPPHwYLfAkgI3rY,3779
-clarifai/constants/dataset.py,sha256=2QlHF0NMXfAdFlOpEzkNYVZcxSL-dIxq-ZsY_LsIPBA,499
+clarifai/constants/dataset.py,sha256=OXYirr0iaoN_47V6wxO0H6ptV81y8zNGapPBz9qqD8o,516
 clarifai/constants/model.py,sha256=LsMkLVkuBpfS4j4yDW9M4O7HxzRpIuSo9qU5T8Wg2Co,217
 clarifai/constants/rag.py,sha256=WcHwToUVIK9ItAhDefaSohQHCLNeR55PSjZ0BFnoZ3U,28
 clarifai/constants/search.py,sha256=_g3S-JEvuygiFfMVK3cl4Ry9erZpt8Zo4ilXL2i3DAE,52
@@ -43,16 +43,16 @@ clarifai/models/api.py,sha256=d3FQQlG0mNDLrfEvchqaVcq4Tgb_TqryNnJtwp3c7sE,10961
 clarifai/models/model_serving/README.md,sha256=Ln8hsyE38J3yiLZruKHjU_hdq9CjzzbDUAO28Xyw1dQ,4060
 clarifai/models/model_serving/__init__.py,sha256=78fiK9LvdGvpMxICmZWqSIyS6BFATjW2s5R6_GgtbPA,645
 clarifai/models/model_serving/constants.py,sha256=uoi8TqEFkdsHhSZu90HOO3R0BmPC3G0z9qA5ER-5H7w,688
-clarifai/models/model_serving/utils.py,sha256=MXeOHsNHiwx9qsRoX-FzBO2Tmbgo_IVwTf3EUmgdtSQ,524
+clarifai/models/model_serving/utils.py,sha256=D2UZo90Afd9f7OGKXdjRqys_6N26psY018V7R-rcNO4,629
 clarifai/models/model_serving/cli/__init__.py,sha256=Nls28G-fedNw2oQZIkPQSN__TgjJXbG9RDzzuHIM0VI,575
-clarifai/models/model_serving/cli/_utils.py,sha256=oDd885kwX7u5vf-8dssJFyrR3lEof8x4BXt32egaoKA,1722
+clarifai/models/model_serving/cli/_utils.py,sha256=CZTKKiaoO1Mg5MKQS2Qhgy4JRjnkEHqy8zY5U6b6C0w,1734
 clarifai/models/model_serving/cli/base.py,sha256=k4ARNU1koNzGAi9ach6Vpk7hpISZySiYHyKjkBLuHLg,283
-clarifai/models/model_serving/cli/build.py,sha256=Bfa-PuLIGcreiBr_72XKqCS_IlVJdzJudZkINmX082Y,2774
+clarifai/models/model_serving/cli/build.py,sha256=-C4PBt-9xO9YsyUagz3kF4J0_PsYb6YVKFY8y-VmY5I,2786
 clarifai/models/model_serving/cli/clarifai_clis.py,sha256=sGDDj7MrlU3goWLQm4H9dCf4lPD2Ojx50_jdIoxb5QM,663
 clarifai/models/model_serving/cli/create.py,sha256=wtKcVi8XSPN-Fx0RrSUxEwH1hm5TbZ_FrCEMIS9yszM,5598
 clarifai/models/model_serving/cli/example_cli.py,sha256=tCm0J4EI0kuuSRhEiPTuraSA-bUYwtEFEHcL1eOXzRI,1039
 clarifai/models/model_serving/cli/login.py,sha256=TYRQALJZUhNvtx2VcChO0y41YXs8-yP9BrShYb9tcOM,743
-clarifai/models/model_serving/cli/upload.py,sha256=8wYviCTLZYjnXhGykGlm0HhjBd_x5PKp7IKiB8BeOGc,6871
+clarifai/models/model_serving/cli/upload.py,sha256=ctAp_bckMLm8twO8KdilD4CnawbQ4o2FvixfBUd-91Y,6883
 clarifai/models/model_serving/docs/cli.md,sha256=AM45FZag3520ri4Terb0t7_MmLTs7gjHXAf7TYVZjZk,3942
 clarifai/models/model_serving/docs/concepts.md,sha256=ppQADibKQInf9JpfcH7wIpcMndTZ3618or5yzMhGNOE,9376
 clarifai/models/model_serving/docs/dependencies.md,sha256=apwg_IxDBzovtQYXRpWMU9pUqdf0VaS10yMVOYYXhoc,728
@@ -98,7 +98,7 @@ clarifai/runners/example_llama2.py,sha256=WMGTqv3v9t3ID1rjW9BTLMkIuvyTESL6xHcOO6
 clarifai/schema/search.py,sha256=JjTi8ammJgZZ2OGl4K6tIA4zEJ1Fr2ASZARXavI1j5c,2448
 clarifai/urls/helper.py,sha256=tjoMGGHuWX68DUB0pk4MEjrmFsClUAQj2jmVEM_Sy78,4751
 clarifai/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-clarifai/utils/logging.py,sha256=F19UmdeJKwIy8Nqo8o0hegf-qJGqzqtQ5Bi0Rz2NP4Q,3582
+clarifai/utils/logging.py,sha256=xJTteoUodQ7RfsbO676QgidKa5EVPbdUu89Xlwwso2s,4533
 clarifai/utils/misc.py,sha256=cC_j0eEsJ8bfnj0oRd2z-Rms1mQbAfLwrSs07hwQuCE,1420
 clarifai/utils/model_train.py,sha256=JlMJAclOQ6Nx4_30DiQrlgHbQnNedl9UKQILq_HwK7I,8001
 clarifai/utils/evaluation/__init__.py,sha256=0gmQxbzejnv1tKLj4lKcV7DHQX69irBJkWhA9oYXL1k,15813
@@ -106,10 +106,10 @@ clarifai/utils/evaluation/helpers.py,sha256=d_dcASRI_lhsHIRukAF1S-w7XazLpK9y6E_u
 clarifai/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 clarifai/workflows/export.py,sha256=vICRhIreqDSShxLKjHNM2JwzKsf1B4fdXB0ciMcA70k,1945
 clarifai/workflows/utils.py,sha256=nGeB_yjVgUO9kOeKTg4OBBaBz-AwXI3m-huSVj-9W18,1924
-clarifai/workflows/validate.py,sha256=iCEKBTtB-57uE3LVU7D4AI9BRHxIxahk3U1Ro08HP-o,2535
-clarifai-10.1.1.dist-info/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
-clarifai-10.1.1.dist-info/METADATA,sha256=oSrsyv-IDTTXBLdKGNaIeyHVh2vxkUj5FXNNbRcFa2c,18007
-clarifai-10.1.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
-clarifai-10.1.1.dist-info/entry_points.txt,sha256=qZOr_MIPG0dBBE1zringDJS_wXNGTAA_SQ-zcbmDHOw,82
-clarifai-10.1.1.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
-clarifai-10.1.1.dist-info/RECORD,,
+clarifai/workflows/validate.py,sha256=yJq03MaJqi5AK3alKGJJBR89xmmjAQ31sVufJUiOqY8,2556
+clarifai-10.2.0.dist-info/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
+clarifai-10.2.0.dist-info/METADATA,sha256=dxbFGbjs-EeDTbIrtt9SMibie7VgP0KNDFpfilZ_hF4,18632
+clarifai-10.2.0.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
+clarifai-10.2.0.dist-info/entry_points.txt,sha256=qZOr_MIPG0dBBE1zringDJS_wXNGTAA_SQ-zcbmDHOw,82
+clarifai-10.2.0.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
+clarifai-10.2.0.dist-info/RECORD,,

{clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{clarifai-10.1.1.dist-info → clarifai-10.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

clarifai 10.1.1__py3-none-any.whl → 10.2.0__py3-none-any.whl

clarifai 10.1.1py3-none-any.whl → 10.2.0py3-none-any.whl