PyPI - clarifai - Versions diffs - 10.1.0__py3-none-any.whl → 10.2.0__py3-none-any.whl - Mend

clarifai 10.1.0py3-none-any.whl → 10.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

clarifai/client/app.py +23 -43
clarifai/client/base.py +44 -4
clarifai/client/dataset.py +138 -52
clarifai/client/input.py +37 -4
clarifai/client/model.py +279 -8
clarifai/client/module.py +7 -5
clarifai/client/runner.py +3 -1
clarifai/client/search.py +7 -3
clarifai/client/user.py +14 -12
clarifai/client/workflow.py +7 -4
clarifai/constants/dataset.py +2 -0
clarifai/datasets/upload/loaders/README.md +3 -4
clarifai/datasets/upload/loaders/xview_detection.py +5 -5
clarifai/models/model_serving/cli/_utils.py +1 -1
clarifai/models/model_serving/cli/build.py +1 -1
clarifai/models/model_serving/cli/upload.py +1 -1
clarifai/models/model_serving/utils.py +3 -1
clarifai/rag/rag.py +25 -11
clarifai/rag/utils.py +21 -6
clarifai/utils/evaluation/__init__.py +427 -0
clarifai/utils/evaluation/helpers.py +522 -0
clarifai/utils/logging.py +30 -0
clarifai/utils/model_train.py +3 -1
clarifai/versions.py +1 -1
clarifai/workflows/validate.py +1 -1
{clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/METADATA +46 -9
{clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/RECORD +31 -30
clarifai/datasets/upload/loaders/coco_segmentation.py +0 -98
{clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/LICENSE +0 -0
{clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/WHEEL +0 -0
{clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/entry_points.txt +0 -0
{clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/top_level.txt +0 -0

clarifai/client/app.py CHANGED Viewed

@@ -32,6 +32,7 @@ class App(Lister, BaseClient):
                app_id: str = None,
                base_url: str = "https://api.clarifai.com",
                pat: str = None,
+               token: str = None,
                **kwargs):
     """Initializes an App object.
@@ -40,6 +41,7 @@ class App(Lister, BaseClient):
         app_id (str): The App ID for the App to interact with.
         base_url (str): Base API url. Default "https://api.clarifai.com"
         pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
+        token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
         **kwargs: Additional keyword arguments to be passed to the App.
             - name (str): The name of the app.
             - description (str): The description of the app.
@@ -52,7 +54,8 @@ class App(Lister, BaseClient):
     self.kwargs = {**kwargs, 'id': app_id}
     self.app_info = resources_pb2.App(**self.kwargs)
     self.logger = get_logger(logger_level="INFO", name=__name__)
-    BaseClient.__init__(self, user_id=self.user_id, app_id=self.id, base=base_url, pat=pat)
+    BaseClient.__init__(
+        self, user_id=self.user_id, app_id=self.id, base=base_url, pat=pat, token=token)
     Lister.__init__(self)
   def list_datasets(self, page_no: int = None,
@@ -85,7 +88,7 @@ class App(Lister, BaseClient):
     for dataset_info in all_datasets_info:
       if 'version' in list(dataset_info.keys()):
         del dataset_info['version']['metrics']
-      yield Dataset(base_url=self.base, pat=self.pat, **dataset_info)
+      yield Dataset.from_auth_helper(auth=self.auth_helper, **dataset_info)
   def list_models(self,
                   filter_by: Dict[str, Any] = {},
@@ -126,7 +129,7 @@ class App(Lister, BaseClient):
       if only_in_app:
         if model_info['app_id'] != self.id:
           continue
-      yield Model(base_url=self.base, pat=self.pat, **model_info)
+      yield Model.from_auth_helper(auth=self.auth_helper, **model_info)
   def list_workflows(self,
                      filter_by: Dict[str, Any] = {},
@@ -165,7 +168,7 @@ class App(Lister, BaseClient):
       if only_in_app:
         if workflow_info['app_id'] != self.id:
           continue
-      yield Workflow(base_url=self.base, pat=self.pat, **workflow_info)
+      yield Workflow.from_auth_helper(auth=self.auth_helper, **workflow_info)
   def list_modules(self,
                    filter_by: Dict[str, Any] = {},
@@ -204,7 +207,7 @@ class App(Lister, BaseClient):
       if only_in_app:
         if module_info['app_id'] != self.id:
           continue
-      yield Module(base_url=self.base, pat=self.pat, **module_info)
+      yield Module.from_auth_helper(auth=self.auth_helper, **module_info)
   def list_installed_module_versions(self,
                                      filter_by: Dict[str, Any] = {},
@@ -239,11 +242,8 @@ class App(Lister, BaseClient):
     for imv_info in all_imv_infos:
       del imv_info['deploy_url']
       del imv_info['installed_module_version_id']  # TODO: remove this after the backend fix
-      yield Module(
-          module_id=imv_info['module_version']['module_id'],
-          base_url=self.base,
-          pat=self.pat,
-          **imv_info)
+      yield Module.from_auth_helper(
+          auth=self.auth_helper, module_id=imv_info['module_version']['module_id'], **imv_info)
   def list_concepts(self, page_no: int = None,
                     per_page: int = None) -> Generator[Concept, None, None]:
@@ -308,14 +308,8 @@ class App(Lister, BaseClient):
     if response.status.code != status_code_pb2.SUCCESS:
       raise Exception(response.status)
     self.logger.info("\nDataset created\n%s", response.status)
-    kwargs.update({
-        'app_id': self.id,
-        'user_id': self.user_id,
-        'base_url': self.base,
-        'pat': self.pat
-    })
-    return Dataset(dataset_id=dataset_id, **kwargs)
+    return Dataset.from_auth_helper(self.auth_helper, dataset_id=dataset_id, **kwargs)
   def create_model(self, model_id: str, **kwargs) -> Model:
     """Creates a model for the app.
@@ -339,14 +333,11 @@ class App(Lister, BaseClient):
       raise Exception(response.status)
     self.logger.info("\nModel created\n%s", response.status)
     kwargs.update({
-        'app_id': self.id,
-        'user_id': self.user_id,
+        'model_id': model_id,
         'model_type_id': response.model.model_type_id,
-        'base_url': self.base,
-        'pat': self.pat
     })
-    return Model(model_id=model_id, **kwargs)
+    return Model.from_auth_helper(auth=self.auth_helper, **kwargs)
   def create_workflow(self,
                       config_filepath: str,
@@ -436,9 +427,8 @@ class App(Lister, BaseClient):
       display_workflow_tree(dict_response["workflows"][0]["nodes"])
     kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]][0],
                                         "workflow")
-    kwargs.update({'base_url': self.base, 'pat': self.pat})
-    return Workflow(**kwargs)
+    return Workflow.from_auth_helper(auth=self.auth_helper, **kwargs)
   def create_module(self, module_id: str, description: str, **kwargs) -> Module:
     """Creates a module for the app.
@@ -464,14 +454,8 @@ class App(Lister, BaseClient):
     if response.status.code != status_code_pb2.SUCCESS:
       raise Exception(response.status)
     self.logger.info("\nModule created\n%s", response.status)
-    kwargs.update({
-        'app_id': self.id,
-        'user_id': self.user_id,
-        'base_url': self.base,
-        'pat': self.pat
-    })
-    return Module(module_id=module_id, **kwargs)
+    return Module.from_auth_helper(auth=self.auth_helper, module_id=module_id, **kwargs)
   def dataset(self, dataset_id: str, **kwargs) -> Dataset:
     """Returns a Dataset object for the existing dataset ID.
@@ -496,8 +480,7 @@ class App(Lister, BaseClient):
     kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
                                         list(dict_response.keys())[1])
     kwargs['version'] = response.dataset.version if response.dataset.version else None
-    kwargs.update({'base_url': self.base, 'pat': self.pat})
-    return Dataset(**kwargs)
+    return Dataset.from_auth_helper(auth=self.auth_helper, **kwargs)
   def model(self, model_id: str, model_version_id: str = "", **kwargs) -> Model:
     """Returns a Model object for the existing model ID.
@@ -532,9 +515,8 @@ class App(Lister, BaseClient):
     kwargs = self.process_response_keys(dict_response['model'], 'model')
     kwargs[
         'model_version'] = response.model.model_version if response.model.model_version else None
-    kwargs.update({'base_url': self.base, 'pat': self.pat})
-    return Model(**kwargs)
+    return Model.from_auth_helper(self.auth_helper, **kwargs)
   def workflow(self, workflow_id: str, **kwargs) -> Workflow:
     """Returns a workflow object for the existing workflow ID.
@@ -558,9 +540,8 @@ class App(Lister, BaseClient):
     dict_response = MessageToDict(response, preserving_proto_field_name=True)
     kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
                                         list(dict_response.keys())[1])
-    kwargs.update({'base_url': self.base, 'pat': self.pat})
-    return Workflow(**kwargs)
+    return Workflow.from_auth_helper(auth=self.auth_helper, **kwargs)
   def module(self, module_id: str, module_version_id: str = "", **kwargs) -> Module:
     """Returns a Module object for the existing module ID.
@@ -585,9 +566,8 @@ class App(Lister, BaseClient):
       raise Exception(response.status)
     dict_response = MessageToDict(response, preserving_proto_field_name=True)
     kwargs = self.process_response_keys(dict_response['module'], 'module')
-    kwargs.update({'base_url': self.base, 'pat': self.pat})
-    return Module(**kwargs)
+    return Module.from_auth_helper(auth=self.auth_helper, **kwargs)
   def inputs(self,):
     """Returns an Input object.
@@ -595,7 +575,7 @@ class App(Lister, BaseClient):
     Returns:
         Inputs: An input object.
     """
-    return Inputs(self.user_id, self.id, base_url=self.base, pat=self.pat)
+    return Inputs.from_auth_helper(self.auth_helper)
   def delete_dataset(self, dataset_id: str) -> None:
     """Deletes an dataset for the user.
@@ -684,9 +664,9 @@ class App(Lister, BaseClient):
         >>> app = App(app_id="app_id", user_id="user_id")
         >>> search_client = app.search(top_k=12, metric="euclidean")
     """
-    user_id = kwargs.get("user_id", self.user_app_id.user_id)
-    app_id = kwargs.get("app_id", self.user_app_id.app_id)
-    return Search(user_id=user_id, app_id=app_id, base_url=self.base, pat=self.pat, **kwargs)
+    kwargs.get("user_id", self.user_app_id.user_id)
+    kwargs.get("app_id", self.user_app_id.app_id)
+    return Search.from_auth_helper(auth=self.auth_helper, **kwargs)
   def __getattr__(self, name):
     return getattr(self.app_info, name)

clarifai/client/base.py CHANGED Viewed

@@ -7,7 +7,7 @@ from google.protobuf.wrappers_pb2 import BoolValue
 from clarifai.client.auth import create_stub
 from clarifai.client.auth.helper import ClarifaiAuthHelper
-from clarifai.errors import ApiError
+from clarifai.errors import ApiError, UserError
 from clarifai.utils.misc import get_from_dict_or_env
@@ -19,9 +19,11 @@ class BaseClient:
           - user_id (str): A user ID for authentication.
           - app_id (str): An app ID for the application to interact with.
           - pat (str): A personal access token for authentication.
+          - token (str): A session token for authentication. Accepts either a session token or a pat.
           - base (str): The base URL for the API endpoint. Defaults to 'https://api.clarifai.com'.
           - ui (str): The URL for the UI. Defaults to 'https://clarifai.com'.
   Attributes:
       auth_helper (ClarifaiAuthHelper): An instance of ClarifaiAuthHelper for authentication.
       STUB (Stub): The gRPC Stub object for API interaction.
@@ -31,15 +33,53 @@ class BaseClient:
   """
   def __init__(self, **kwargs):
-    pat = get_from_dict_or_env(key="pat", env_key="CLARIFAI_PAT", **kwargs)
-    kwargs.update({'pat': pat})
+    token, pat = "", ""
+    try:
+      pat = get_from_dict_or_env(key="pat", env_key="CLARIFAI_PAT", **kwargs)
+    except UserError:
+      token = get_from_dict_or_env(key="token", env_key="CLARIFAI_SESSION_TOKEN", **kwargs)
+    finally:
+      assert token or pat, Exception(
+          "Need 'pat' or 'token' in args or use one of the CLARIFAI_PAT or CLARIFAI_SESSION_TOKEN env vars"
+      )
+    kwargs.update({'token': token, 'pat': pat})
     self.auth_helper = ClarifaiAuthHelper(**kwargs, validate=False)
     self.STUB = create_stub(self.auth_helper)
     self.metadata = self.auth_helper.metadata
     self.pat = self.auth_helper.pat
+    self.token = self.auth_helper._token
     self.user_app_id = self.auth_helper.get_user_app_id_proto()
     self.base = self.auth_helper.base
+  @classmethod
+  def from_auth_helper(cls, auth: ClarifaiAuthHelper, **kwargs):
+    default_kwargs = {
+        "user_id": kwargs.get("user_id", None) or auth.user_id,
+        "app_id": kwargs.get("app_id", None) or auth.app_id,
+        "pat": kwargs.get("pat", None) or auth.pat,
+        "token": kwargs.get("token", None) or auth._token,
+    }
+    _base = kwargs.get("base", None) or auth.base
+    _clss = cls.__mro__[0]
+    if _clss == BaseClient:
+      kwargs = {
+        **default_kwargs,
+        "base": _base, # Baseclient uses `base`
+        "ui": kwargs.get("ui", None) or auth.ui
+      }
+    else:
+      # Remove user_id and app_id if a custom URL is provided
+      if kwargs.get("url"):
+        default_kwargs.pop("user_id", "")
+        default_kwargs.pop("app_id", "")
+      # Remove app_id if the class name contains "Runner"
+      if 'Runner' in _clss.__name__:
+        default_kwargs.pop("app_id", "")
+      kwargs.update({**default_kwargs, "base_url": _base})
+    return cls(**kwargs)
   def _grpc_request(self, method: Callable, argument: Any):
     """Makes a gRPC request to the API.
@@ -52,7 +92,7 @@ class BaseClient:
     """
     try:
-      res = method(argument)
+      res = method(argument, metadata=self.auth_helper.metadata)
       # MessageToDict(res) TODO global debug logger
       return res
     except ApiError:

clarifai/client/dataset.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import logging
 import os
 import time
 import uuid
@@ -12,12 +13,13 @@ from clarifai_grpc.grpc.api.service_pb2 import MultiInputResponse
 from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
 from google.protobuf.json_format import MessageToDict
 from requests.adapters import HTTPAdapter, Retry
+from tabulate import tabulate
 from tqdm import tqdm
 from clarifai.client.base import BaseClient
 from clarifai.client.input import Inputs
 from clarifai.client.lister import Lister
-from clarifai.constants.dataset import DATASET_UPLOAD_TASKS
+from clarifai.constants.dataset import DATASET_UPLOAD_TASKS, MAX_RETRIES
 from clarifai.datasets.export.inputs_annotations import (DatasetExportReader,
                                                          InputAnnotationDownloader)
 from clarifai.datasets.upload.base import ClarifaiDataLoader
@@ -27,7 +29,7 @@ from clarifai.datasets.upload.text import TextClassificationDataset
 from clarifai.datasets.upload.utils import DisplayUploadStatus
 from clarifai.errors import UserError
 from clarifai.urls.helper import ClarifaiUrlHelper
-from clarifai.utils.logging import add_file_handler, get_logger
+from clarifai.utils.logging import add_file_handler, get_logger, process_log_files
 from clarifai.utils.misc import BackoffIterator, Chunker
 ClarifaiDatasetType = TypeVar('ClarifaiDatasetType', VisualClassificationDataset,
@@ -43,6 +45,7 @@ class Dataset(Lister, BaseClient):
                dataset_id: str = None,
                base_url: str = "https://api.clarifai.com",
                pat: str = None,
+               token: str = None,
                **kwargs):
     """Initializes a Dataset object.
@@ -51,6 +54,7 @@ class Dataset(Lister, BaseClient):
         dataset_id (str): The Dataset ID within the App to interact with.
         base_url (str): Base API url. Default "https://api.clarifai.com"
         pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
+        token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
         **kwargs: Additional keyword arguments to be passed to the Dataset.
     """
     if url and dataset_id:
@@ -66,9 +70,11 @@ class Dataset(Lister, BaseClient):
     self.max_retires = 10
     self.batch_size = 128  # limit max protos in a req
     self.task = None  # Upload dataset type
-    self.input_object = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat)
+    self.input_object = Inputs(
+        user_id=self.user_id, app_id=self.app_id, pat=pat, token=token, base_url=base_url)
     self.logger = get_logger(logger_level="INFO", name=__name__)
-    BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
+    BaseClient.__init__(
+        self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
     Lister.__init__(self)
   def create_version(self, **kwargs) -> 'Dataset':
@@ -98,13 +104,10 @@ class Dataset(Lister, BaseClient):
     self.logger.info("\nDataset Version created\n%s", response.status)
     kwargs.update({
         'dataset_id': self.id,
-        'app_id': self.app_id,
-        'user_id': self.user_id,
         'version': response.dataset_versions[0],
-        'base_url': self.base,
-        'pat': self.pat
     })
-    return Dataset(**kwargs)
+    return Dataset.from_auth_helper(self.auth_helper, **kwargs)
   def delete_version(self, version_id: str) -> None:
     """Deletes a dataset version for the Dataset.
@@ -162,13 +165,9 @@ class Dataset(Lister, BaseClient):
       del dataset_version_info['metrics']
       kwargs = {
           'dataset_id': self.id,
-          'app_id': self.app_id,
-          'user_id': self.user_id,
           'version': resources_pb2.DatasetVersion(**dataset_version_info),
-          'base_url': self.base,
-          'pat': self.pat
       }
-      yield Dataset(**kwargs)
+      yield Dataset.from_auth_helper(self.auth_helper, **kwargs)
   def _concurrent_annot_upload(self, annots: List[List[resources_pb2.Annotation]]
                               ) -> Union[List[resources_pb2.Annotation], List[None]]:
@@ -196,11 +195,11 @@ class Dataset(Lister, BaseClient):
     return retry_annot_upload
-  def _delete_failed_inputs(
-      self,
-      batch_input_ids: List[int],
-      dataset_obj: ClarifaiDatasetType,
-      upload_response: MultiInputResponse = None) -> Tuple[List[int], List[int]]:
+  def _delete_failed_inputs(self,
+                            batch_input_ids: List[int],
+                            dataset_obj: ClarifaiDatasetType,
+                            upload_response: MultiInputResponse = None,
+                            batch_no: Optional[int] = None) -> Tuple[List[int], List[int]]:
     """Delete failed input ids from clarifai platform dataset.
     Args:
@@ -235,8 +234,19 @@ class Dataset(Lister, BaseClient):
     if duplicate_input_ids:
       success_input_ids = list(set(success_input_ids.copy()) - set(duplicate_input_ids.copy()))
       failed_input_ids = list(set(failed_input_ids) - set(duplicate_input_ids))
+      duplicate_details = [[
+          input_ids[id], id, "Input has a duplicate ID.",
+          dataset_obj.data_generator[input_ids[id]].image_path,
+          dataset_obj.data_generator[input_ids[id]].labels,
+          dataset_obj.data_generator[input_ids[id]].metadata
+      ] for id in duplicate_input_ids]
+      duplicate_table = tabulate(
+          duplicate_details,
+          headers=["Index", "Input ID", "Status", "Image Path", "Labels", "Metadata"],
+          tablefmt="grid")
+      timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
       self.logger.warning(
-          f"Upload Failed for {len(duplicate_input_ids)} inputs in current batch: Duplicate input ids: {duplicate_input_ids}"
+          f"{timestamp}\nFailed to upload {len(duplicate_input_ids)} inputs due to duplicate IDs in current batch {batch_no}:\n{duplicate_table}\n\n"
       )
     #delete failed inputs
@@ -247,7 +257,11 @@ class Dataset(Lister, BaseClient):
     return [input_ids[id] for id in success_input_ids], [input_ids[id] for id in failed_input_ids]
   def _upload_inputs_annotations(
-      self, batch_input_ids: List[int], dataset_obj: ClarifaiDatasetType
+      self,
+      batch_input_ids: List[int],
+      dataset_obj: ClarifaiDatasetType,
+      batch_no: Optional[int] = None,
+      is_retry_duplicates: bool = False,
   ) -> Tuple[List[int], List[resources_pb2.Annotation], MultiInputResponse]:
     """Uploads batch of inputs and annotations concurrently to clarifai platform dataset.
@@ -261,12 +275,16 @@ class Dataset(Lister, BaseClient):
       response: upload response proto
     """
     input_protos, _ = dataset_obj.get_protos(batch_input_ids)
+    if is_retry_duplicates:
+      for inp in input_protos:
+        inp.id = uuid.uuid4().hex
     input_job_id, _response = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
     retry_annot_protos = []
     self.input_object._wait_for_inputs(input_job_id)
     success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj,
-                                                                     _response)
+                                                                     _response, batch_no)
     if self.task in ["visual_detection", "visual_segmentation"] and success_input_ids:
       _, annotation_protos = dataset_obj.get_protos(success_input_ids)
@@ -277,7 +295,7 @@ class Dataset(Lister, BaseClient):
   def _retry_uploads(self, failed_input_ids: List[int],
                      retry_annot_protos: List[resources_pb2.Annotation],
-                     dataset_obj: ClarifaiDatasetType) -> None:
+                     dataset_obj: ClarifaiDatasetType, batch_no: Optional[int]) -> None:
     """Retry failed uploads.
     Args:
@@ -285,56 +303,87 @@ class Dataset(Lister, BaseClient):
       retry_annot_protos: failed annot protos
       dataset_obj: ClarifaiDataset object
     """
+    for _retry in range(MAX_RETRIES):
+      if not failed_input_ids and not retry_annot_protos:
+        break
+      if failed_input_ids:
+        retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
+        logging.warning(
+            f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}\n"
+        )
+        failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
+            failed_input_ids, dataset_obj, batch_no)
+        failed_input_ids = failed_retrying_inputs
+      if retry_annot_protos:
+        chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
+        _ = self._concurrent_annot_upload(chunked_annotation_protos)
+    #Log failed inputs
     if failed_input_ids:
-      retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
-      #Log Retrying inputs
+      failed_inputs_logs = []
+      input_map = {input.id: input for input in retry_response.inputs}
+      for index in failed_retrying_inputs:
+        failed_id = dataset_obj.all_input_ids[index]
+        input_details = input_map.get(failed_id)
+        if input_details:
+          failed_input_details = [
+              index, failed_id, input_details.status.details,
+              dataset_obj.data_generator[index].image_path,
+              dataset_obj.data_generator[index].labels, dataset_obj.data_generator[index].metadata
+          ]
+          failed_inputs_logs.append(failed_input_details)
+      failed_table = tabulate(
+          failed_inputs_logs,
+          headers=["Index", "Input ID", "Status", "Image Path", "Labels", "Metadata"],
+          tablefmt="grid")
+      timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
       self.logger.warning(
-          f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}"
+          f"{timestamp}\nFailed to upload {len(failed_retrying_inputs)} inputs in current batch {batch_no}:\n{failed_table}\n\n"
       )
-      failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
-          failed_input_ids, dataset_obj)
-      #Log failed inputs
-      if failed_retrying_inputs:
-        failed_retrying_input_ids = [
-            dataset_obj.all_input_ids[id] for id in failed_retrying_inputs
-        ]
-        failed_inputs_logs = {
-            input.id: input.status.details
-            for input in retry_response.inputs if input.id in failed_retrying_input_ids
-        }
-        self.logger.warning(
-            f"Failed to upload {len(failed_retrying_inputs)} inputs in current batch: {failed_inputs_logs}"
-        )
-    if retry_annot_protos:
-      chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
-      _ = self._concurrent_annot_upload(chunked_annotation_protos)
-  def _data_upload(self, dataset_obj: ClarifaiDatasetType) -> None:
+  def _data_upload(self,
+                   dataset_obj: ClarifaiDatasetType,
+                   is_log_retry: bool = False,
+                   log_retry_ids: List[int] = None,
+                   **kwargs) -> None:
     """Uploads inputs and annotations to clarifai platform dataset.
     Args:
-      dataset_obj: ClarifaiDataset object
+      dataset_obj: ClarifaiDataset object,
+      is_log_retry: True if the iteration is to retry uploads from logs.
+      **kwargs: Additional keyword arguments for retry uploading functionality..
+    Returns:
+        None
     """
-    input_ids = list(range(len(dataset_obj)))
+    if is_log_retry:
+      input_ids = log_retry_ids
+    else:
+      input_ids = list(range(len(dataset_obj)))
     chunk_input_ids = Chunker(input_ids, self.batch_size).chunk()
     with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
       with tqdm(total=len(chunk_input_ids), desc='Uploading Dataset') as progress:
         # Submit all jobs to the executor and store the returned futures
         futures = [
-            executor.submit(self._upload_inputs_annotations, batch_input_ids, dataset_obj)
-            for batch_input_ids in chunk_input_ids
+            executor.submit(self._upload_inputs_annotations, batch_input_ids, dataset_obj,
+                            batch_no, **kwargs)
+            for batch_no, batch_input_ids in enumerate(chunk_input_ids)
         ]
-        for job in as_completed(futures):
+        for batch_no, job in enumerate(as_completed(futures)):
           retry_input_ids, retry_annot_protos, _ = job.result()
-          self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj)
+          self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj, batch_no)
           progress.update()
   def upload_dataset(self,
                      dataloader: Type[ClarifaiDataLoader],
                      batch_size: int = 32,
                      get_upload_status: bool = False,
-                     log_warnings: bool = False) -> None:
+                     log_warnings: bool = False,
+                     **kwargs) -> None:
     """Uploads a dataset to the app.
     Args:
@@ -342,6 +391,7 @@ class Dataset(Lister, BaseClient):
       batch_size (int): batch size for concurrent upload of inputs and annotations (max: 128)
       get_upload_status (bool): True if you want to get the upload status of the dataset
       log_warnings (bool): True if you want to save log warnings in a file
+      kwargs: Additional keyword arguments for retry uploading functionality..
     """
     #add file handler to log warnings
     if log_warnings:
@@ -369,11 +419,47 @@ class Dataset(Lister, BaseClient):
     if get_upload_status:
       pre_upload_stats = self.get_upload_status(pre_upload=True)
-    self._data_upload(dataset_obj)
+    self._data_upload(dataset_obj, **kwargs)
     if get_upload_status:
       self.get_upload_status(dataloader=dataloader, pre_upload_stats=pre_upload_stats)
+  def retry_upload_from_logs(self,
+                             log_file_path: str,
+                             dataloader: Type[ClarifaiDataLoader],
+                             retry_duplicates: bool = False,
+                             log_warnings: bool = False,
+                             **kwargs) -> None:
+    """Retries failed uploads from the log file.
+    Args:
+        log_file_path (str): path to the log file
+        dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
+        retry_duplicate (bool): True if you want to retry duplicate inputs
+        kwargs: Additional keyword arguments for retry uploading functionality..
+    """
+    duplicate_input_ids, failed_input_ids = process_log_files(log_file_path)
+    if log_warnings:
+      add_file_handler(self.logger, f"Dataset_Upload{str(int(datetime.now().timestamp()))}.log")
+    if retry_duplicates and duplicate_input_ids:
+      logging.warning(f"Retrying upload for {len(duplicate_input_ids)} duplicate inputs...\n")
+      duplicate_inputs_indexes = [input["Index"] for input in duplicate_input_ids]
+      self.upload_dataset(
+          dataloader=dataloader,
+          log_retry_ids=duplicate_inputs_indexes,
+          is_retry_duplicates=True,
+          is_log_retry=True,
+          **kwargs)
+    if failed_input_ids:
+      #failed_inputs= ([input["Input_ID"] for input in failed_input_ids])
+      logging.warning(f"Retrying upload for {len(failed_input_ids)} failed inputs...\n")
+      failed_input_indexes = [input["Index"] for input in failed_input_ids]
+      self.upload_dataset(
+          dataloader=dataloader, log_retry_ids=failed_input_indexes, is_log_retry=True, **kwargs)
   def upload_from_csv(self,
                       csv_path: str,
                       input_type: str = 'text',

clarifai 10.1.0__py3-none-any.whl → 10.2.0__py3-none-any.whl

clarifai 10.1.0py3-none-any.whl → 10.2.0py3-none-any.whl