PyPI - clarifai - Versions diffs - 10.0.1__py3-none-any.whl → 10.1.1__py3-none-any.whl - Mend

clarifai 10.0.1py3-none-any.whl → 10.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

clarifai/client/app.py +23 -43
clarifai/client/base.py +46 -4
clarifai/client/dataset.py +85 -33
clarifai/client/input.py +35 -7
clarifai/client/model.py +192 -11
clarifai/client/module.py +8 -6
clarifai/client/runner.py +3 -1
clarifai/client/search.py +6 -3
clarifai/client/user.py +14 -12
clarifai/client/workflow.py +8 -5
clarifai/datasets/upload/features.py +3 -0
clarifai/datasets/upload/image.py +57 -26
clarifai/datasets/upload/loaders/README.md +3 -4
clarifai/datasets/upload/loaders/xview_detection.py +9 -5
clarifai/datasets/upload/utils.py +23 -7
clarifai/models/model_serving/README.md +113 -121
clarifai/models/model_serving/__init__.py +2 -0
clarifai/models/model_serving/cli/_utils.py +53 -0
clarifai/models/model_serving/cli/base.py +14 -0
clarifai/models/model_serving/cli/build.py +79 -0
clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
clarifai/models/model_serving/cli/create.py +171 -0
clarifai/models/model_serving/cli/example_cli.py +34 -0
clarifai/models/model_serving/cli/login.py +26 -0
clarifai/models/model_serving/cli/upload.py +182 -0
clarifai/models/model_serving/constants.py +20 -0
clarifai/models/model_serving/docs/cli.md +150 -0
clarifai/models/model_serving/docs/concepts.md +229 -0
clarifai/models/model_serving/docs/dependencies.md +1 -1
clarifai/models/model_serving/docs/inference_parameters.md +112 -107
clarifai/models/model_serving/docs/model_types.md +16 -17
clarifai/models/model_serving/model_config/__init__.py +4 -2
clarifai/models/model_serving/model_config/base.py +369 -0
clarifai/models/model_serving/model_config/config.py +219 -224
clarifai/models/model_serving/model_config/inference_parameter.py +5 -0
clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -24
clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -18
clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -18
clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -18
clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -18
clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -18
clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -28
clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -18
clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -18
clarifai/models/model_serving/{models → model_config}/output.py +8 -0
clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
clarifai/models/model_serving/model_config/{serializer.py → triton/serializer.py} +3 -1
clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
clarifai/models/model_serving/{models/model_types.py → model_config/triton/wrappers.py} +4 -4
clarifai/models/model_serving/{models → repo_build}/__init__.py +2 -0
clarifai/models/model_serving/repo_build/build.py +198 -0
clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
clarifai/models/model_serving/{models/pb_model.py → repo_build/static_files/triton/model.py} +15 -14
clarifai/models/model_serving/utils.py +21 -0
clarifai/rag/rag.py +67 -23
clarifai/rag/utils.py +21 -5
clarifai/utils/evaluation/__init__.py +427 -0
clarifai/utils/evaluation/helpers.py +522 -0
clarifai/utils/logging.py +7 -0
clarifai/utils/model_train.py +3 -1
clarifai/versions.py +1 -1
{clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/METADATA +58 -10
clarifai-10.1.1.dist-info/RECORD +115 -0
clarifai-10.1.1.dist-info/entry_points.txt +2 -0
clarifai/datasets/upload/loaders/coco_segmentation.py +0 -98
clarifai/models/model_serving/cli/deploy_cli.py +0 -123
clarifai/models/model_serving/cli/model_zip.py +0 -61
clarifai/models/model_serving/cli/repository.py +0 -89
clarifai/models/model_serving/docs/custom_config.md +0 -33
clarifai/models/model_serving/docs/output.md +0 -28
clarifai/models/model_serving/models/default_test.py +0 -281
clarifai/models/model_serving/models/inference.py +0 -50
clarifai/models/model_serving/models/test.py +0 -64
clarifai/models/model_serving/pb_model_repository.py +0 -108
clarifai-10.0.1.dist-info/RECORD +0 -103
clarifai-10.0.1.dist-info/entry_points.txt +0 -4
{clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/LICENSE +0 -0
{clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/WHEEL +0 -0
{clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/top_level.txt +0 -0

clarifai/client/app.py CHANGED Viewed

@@ -32,6 +32,7 @@ class App(Lister, BaseClient):
                app_id: str = None,
                base_url: str = "https://api.clarifai.com",
                pat: str = None,
+               token: str = None,
                **kwargs):
     """Initializes an App object.
@@ -40,6 +41,7 @@ class App(Lister, BaseClient):
         app_id (str): The App ID for the App to interact with.
         base_url (str): Base API url. Default "https://api.clarifai.com"
         pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
+        token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
         **kwargs: Additional keyword arguments to be passed to the App.
             - name (str): The name of the app.
             - description (str): The description of the app.
@@ -52,7 +54,8 @@ class App(Lister, BaseClient):
     self.kwargs = {**kwargs, 'id': app_id}
     self.app_info = resources_pb2.App(**self.kwargs)
     self.logger = get_logger(logger_level="INFO", name=__name__)
-    BaseClient.__init__(self, user_id=self.user_id, app_id=self.id, base=base_url, pat=pat)
+    BaseClient.__init__(
+        self, user_id=self.user_id, app_id=self.id, base=base_url, pat=pat, token=token)
     Lister.__init__(self)
   def list_datasets(self, page_no: int = None,
@@ -85,7 +88,7 @@ class App(Lister, BaseClient):
     for dataset_info in all_datasets_info:
       if 'version' in list(dataset_info.keys()):
         del dataset_info['version']['metrics']
-      yield Dataset(base_url=self.base, pat=self.pat, **dataset_info)
+      yield Dataset.from_auth_helper(auth=self.auth_helper, **dataset_info)
   def list_models(self,
                   filter_by: Dict[str, Any] = {},
@@ -126,7 +129,7 @@ class App(Lister, BaseClient):
       if only_in_app:
         if model_info['app_id'] != self.id:
           continue
-      yield Model(base_url=self.base, pat=self.pat, **model_info)
+      yield Model.from_auth_helper(auth=self.auth_helper, **model_info)
   def list_workflows(self,
                      filter_by: Dict[str, Any] = {},
@@ -165,7 +168,7 @@ class App(Lister, BaseClient):
       if only_in_app:
         if workflow_info['app_id'] != self.id:
           continue
-      yield Workflow(base_url=self.base, pat=self.pat, **workflow_info)
+      yield Workflow.from_auth_helper(auth=self.auth_helper, **workflow_info)
   def list_modules(self,
                    filter_by: Dict[str, Any] = {},
@@ -204,7 +207,7 @@ class App(Lister, BaseClient):
       if only_in_app:
         if module_info['app_id'] != self.id:
           continue
-      yield Module(base_url=self.base, pat=self.pat, **module_info)
+      yield Module.from_auth_helper(auth=self.auth_helper, **module_info)
   def list_installed_module_versions(self,
                                      filter_by: Dict[str, Any] = {},
@@ -239,11 +242,8 @@ class App(Lister, BaseClient):
     for imv_info in all_imv_infos:
       del imv_info['deploy_url']
       del imv_info['installed_module_version_id']  # TODO: remove this after the backend fix
-      yield Module(
-          module_id=imv_info['module_version']['module_id'],
-          base_url=self.base,
-          pat=self.pat,
-          **imv_info)
+      yield Module.from_auth_helper(
+          auth=self.auth_helper, module_id=imv_info['module_version']['module_id'], **imv_info)
   def list_concepts(self, page_no: int = None,
                     per_page: int = None) -> Generator[Concept, None, None]:
@@ -308,14 +308,8 @@ class App(Lister, BaseClient):
     if response.status.code != status_code_pb2.SUCCESS:
       raise Exception(response.status)
     self.logger.info("\nDataset created\n%s", response.status)
-    kwargs.update({
-        'app_id': self.id,
-        'user_id': self.user_id,
-        'base_url': self.base,
-        'pat': self.pat
-    })
-    return Dataset(dataset_id=dataset_id, **kwargs)
+    return Dataset.from_auth_helper(self.auth_helper, dataset_id=dataset_id, **kwargs)
   def create_model(self, model_id: str, **kwargs) -> Model:
     """Creates a model for the app.
@@ -339,14 +333,11 @@ class App(Lister, BaseClient):
       raise Exception(response.status)
     self.logger.info("\nModel created\n%s", response.status)
     kwargs.update({
-        'app_id': self.id,
-        'user_id': self.user_id,
+        'model_id': model_id,
         'model_type_id': response.model.model_type_id,
-        'base_url': self.base,
-        'pat': self.pat
     })
-    return Model(model_id=model_id, **kwargs)
+    return Model.from_auth_helper(auth=self.auth_helper, **kwargs)
   def create_workflow(self,
                       config_filepath: str,
@@ -436,9 +427,8 @@ class App(Lister, BaseClient):
       display_workflow_tree(dict_response["workflows"][0]["nodes"])
     kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]][0],
                                         "workflow")
-    kwargs.update({'base_url': self.base, 'pat': self.pat})
-    return Workflow(**kwargs)
+    return Workflow.from_auth_helper(auth=self.auth_helper, **kwargs)
   def create_module(self, module_id: str, description: str, **kwargs) -> Module:
     """Creates a module for the app.
@@ -464,14 +454,8 @@ class App(Lister, BaseClient):
     if response.status.code != status_code_pb2.SUCCESS:
       raise Exception(response.status)
     self.logger.info("\nModule created\n%s", response.status)
-    kwargs.update({
-        'app_id': self.id,
-        'user_id': self.user_id,
-        'base_url': self.base,
-        'pat': self.pat
-    })
-    return Module(module_id=module_id, **kwargs)
+    return Module.from_auth_helper(auth=self.auth_helper, module_id=module_id, **kwargs)
   def dataset(self, dataset_id: str, **kwargs) -> Dataset:
     """Returns a Dataset object for the existing dataset ID.
@@ -496,8 +480,7 @@ class App(Lister, BaseClient):
     kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
                                         list(dict_response.keys())[1])
     kwargs['version'] = response.dataset.version if response.dataset.version else None
-    kwargs.update({'base_url': self.base, 'pat': self.pat})
-    return Dataset(**kwargs)
+    return Dataset.from_auth_helper(auth=self.auth_helper, **kwargs)
   def model(self, model_id: str, model_version_id: str = "", **kwargs) -> Model:
     """Returns a Model object for the existing model ID.
@@ -532,9 +515,8 @@ class App(Lister, BaseClient):
     kwargs = self.process_response_keys(dict_response['model'], 'model')
     kwargs[
         'model_version'] = response.model.model_version if response.model.model_version else None
-    kwargs.update({'base_url': self.base, 'pat': self.pat})
-    return Model(**kwargs)
+    return Model.from_auth_helper(self.auth_helper, **kwargs)
   def workflow(self, workflow_id: str, **kwargs) -> Workflow:
     """Returns a workflow object for the existing workflow ID.
@@ -558,9 +540,8 @@ class App(Lister, BaseClient):
     dict_response = MessageToDict(response, preserving_proto_field_name=True)
     kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
                                         list(dict_response.keys())[1])
-    kwargs.update({'base_url': self.base, 'pat': self.pat})
-    return Workflow(**kwargs)
+    return Workflow.from_auth_helper(auth=self.auth_helper, **kwargs)
   def module(self, module_id: str, module_version_id: str = "", **kwargs) -> Module:
     """Returns a Module object for the existing module ID.
@@ -585,9 +566,8 @@ class App(Lister, BaseClient):
       raise Exception(response.status)
     dict_response = MessageToDict(response, preserving_proto_field_name=True)
     kwargs = self.process_response_keys(dict_response['module'], 'module')
-    kwargs.update({'base_url': self.base, 'pat': self.pat})
-    return Module(**kwargs)
+    return Module.from_auth_helper(auth=self.auth_helper, **kwargs)
   def inputs(self,):
     """Returns an Input object.
@@ -595,7 +575,7 @@ class App(Lister, BaseClient):
     Returns:
         Inputs: An input object.
     """
-    return Inputs(self.user_id, self.id, base_url=self.base, pat=self.pat)
+    return Inputs.from_auth_helper(self.auth_helper)
   def delete_dataset(self, dataset_id: str) -> None:
     """Deletes an dataset for the user.
@@ -684,9 +664,9 @@ class App(Lister, BaseClient):
         >>> app = App(app_id="app_id", user_id="user_id")
         >>> search_client = app.search(top_k=12, metric="euclidean")
     """
-    user_id = kwargs.get("user_id", self.user_app_id.user_id)
-    app_id = kwargs.get("app_id", self.user_app_id.app_id)
-    return Search(user_id=user_id, app_id=app_id, base_url=self.base, pat=self.pat, **kwargs)
+    kwargs.get("user_id", self.user_app_id.user_id)
+    kwargs.get("app_id", self.user_app_id.app_id)
+    return Search.from_auth_helper(auth=self.auth_helper, **kwargs)
   def __getattr__(self, name):
     return getattr(self.app_info, name)

clarifai/client/base.py CHANGED Viewed

@@ -7,7 +7,7 @@ from google.protobuf.wrappers_pb2 import BoolValue
 from clarifai.client.auth import create_stub
 from clarifai.client.auth.helper import ClarifaiAuthHelper
-from clarifai.errors import ApiError
+from clarifai.errors import ApiError, UserError
 from clarifai.utils.misc import get_from_dict_or_env
@@ -19,9 +19,11 @@ class BaseClient:
           - user_id (str): A user ID for authentication.
           - app_id (str): An app ID for the application to interact with.
           - pat (str): A personal access token for authentication.
+          - token (str): A session token for authentication. Accepts either a session token or a pat.
           - base (str): The base URL for the API endpoint. Defaults to 'https://api.clarifai.com'.
           - ui (str): The URL for the UI. Defaults to 'https://clarifai.com'.
   Attributes:
       auth_helper (ClarifaiAuthHelper): An instance of ClarifaiAuthHelper for authentication.
       STUB (Stub): The gRPC Stub object for API interaction.
@@ -31,15 +33,53 @@ class BaseClient:
   """
   def __init__(self, **kwargs):
-    pat = get_from_dict_or_env(key="pat", env_key="CLARIFAI_PAT", **kwargs)
-    kwargs.update({'pat': pat})
+    token, pat = "", ""
+    try:
+      pat = get_from_dict_or_env(key="pat", env_key="CLARIFAI_PAT", **kwargs)
+    except UserError:
+      token = get_from_dict_or_env(key="token", env_key="CLARIFAI_SESSION_TOKEN", **kwargs)
+    finally:
+      assert token or pat, Exception(
+          "Need 'pat' or 'token' in args or use one of the CLARIFAI_PAT or CLARIFAI_SESSION_TOKEN env vars"
+      )
+    kwargs.update({'token': token, 'pat': pat})
     self.auth_helper = ClarifaiAuthHelper(**kwargs, validate=False)
     self.STUB = create_stub(self.auth_helper)
     self.metadata = self.auth_helper.metadata
     self.pat = self.auth_helper.pat
+    self.token = self.auth_helper._token
     self.user_app_id = self.auth_helper.get_user_app_id_proto()
     self.base = self.auth_helper.base
+  @classmethod
+  def from_auth_helper(cls, auth: ClarifaiAuthHelper, **kwargs):
+    default_kwargs = {
+        "user_id": kwargs.get("user_id", None) or auth.user_id,
+        "app_id": kwargs.get("app_id", None) or auth.app_id,
+        "pat": kwargs.get("pat", None) or auth.pat,
+        "token": kwargs.get("token", None) or auth._token,
+    }
+    _base = kwargs.get("base", None) or auth.base
+    _clss = cls.__mro__[0]
+    if _clss == BaseClient:
+      kwargs = {
+        **default_kwargs,
+        "base": _base, # Baseclient uses `base`
+        "ui": kwargs.get("ui", None) or auth.ui
+      }
+    else:
+      # Remove user_id and app_id if a custom URL is provided
+      if kwargs.get("url"):
+        default_kwargs.pop("user_id", "")
+        default_kwargs.pop("app_id", "")
+      # Remove app_id if the class name contains "Runner"
+      if 'Runner' in _clss.__name__:
+        default_kwargs.pop("app_id", "")
+      kwargs.update({**default_kwargs, "base_url": _base})
+    return cls(**kwargs)
   def _grpc_request(self, method: Callable, argument: Any):
     """Makes a gRPC request to the API.
@@ -52,7 +92,7 @@ class BaseClient:
     """
     try:
-      res = method(argument)
+      res = method(argument, metadata=self.auth_helper.metadata)
       # MessageToDict(res) TODO global debug logger
       return res
     except ApiError:
@@ -118,6 +158,8 @@ class BaseClient:
             value_s = struct_pb2.Struct()
             value_s.update(value)
             value = value_s
+          elif key == 'metrics':
+            continue
           elif key in ['metadata']:
             if isinstance(value, dict) and value != {}:
               value_s = struct_pb2.Struct()

clarifai/client/dataset.py CHANGED Viewed

@@ -2,11 +2,13 @@ import os
 import time
 import uuid
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime
 from multiprocessing import cpu_count
-from typing import Generator, List, Tuple, Type, TypeVar, Union
+from typing import Dict, Generator, List, Optional, Tuple, Type, TypeVar, Union
 import requests
 from clarifai_grpc.grpc.api import resources_pb2, service_pb2
+from clarifai_grpc.grpc.api.service_pb2 import MultiInputResponse
 from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
 from google.protobuf.json_format import MessageToDict
 from requests.adapters import HTTPAdapter, Retry
@@ -25,7 +27,7 @@ from clarifai.datasets.upload.text import TextClassificationDataset
 from clarifai.datasets.upload.utils import DisplayUploadStatus
 from clarifai.errors import UserError
 from clarifai.urls.helper import ClarifaiUrlHelper
-from clarifai.utils.logging import get_logger
+from clarifai.utils.logging import add_file_handler, get_logger
 from clarifai.utils.misc import BackoffIterator, Chunker
 ClarifaiDatasetType = TypeVar('ClarifaiDatasetType', VisualClassificationDataset,
@@ -41,6 +43,7 @@ class Dataset(Lister, BaseClient):
                dataset_id: str = None,
                base_url: str = "https://api.clarifai.com",
                pat: str = None,
+               token: str = None,
                **kwargs):
     """Initializes a Dataset object.
@@ -49,6 +52,7 @@ class Dataset(Lister, BaseClient):
         dataset_id (str): The Dataset ID within the App to interact with.
         base_url (str): Base API url. Default "https://api.clarifai.com"
         pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
+        token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
         **kwargs: Additional keyword arguments to be passed to the Dataset.
     """
     if url and dataset_id:
@@ -64,9 +68,10 @@ class Dataset(Lister, BaseClient):
     self.max_retires = 10
     self.batch_size = 128  # limit max protos in a req
     self.task = None  # Upload dataset type
-    self.input_object = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat)
-    self.logger = get_logger(logger_level="INFO")
-    BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
+    self.input_object = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat, token=token)
+    self.logger = get_logger(logger_level="INFO", name=__name__)
+    BaseClient.__init__(
+        self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
     Lister.__init__(self)
   def create_version(self, **kwargs) -> 'Dataset':
@@ -96,13 +101,10 @@ class Dataset(Lister, BaseClient):
     self.logger.info("\nDataset Version created\n%s", response.status)
     kwargs.update({
         'dataset_id': self.id,
-        'app_id': self.app_id,
-        'user_id': self.user_id,
         'version': response.dataset_versions[0],
-        'base_url': self.base,
-        'pat': self.pat
     })
-    return Dataset(**kwargs)
+    return Dataset.from_auth_helper(self.auth_helper, **kwargs)
   def delete_version(self, version_id: str) -> None:
     """Deletes a dataset version for the Dataset.
@@ -160,13 +162,9 @@ class Dataset(Lister, BaseClient):
       del dataset_version_info['metrics']
       kwargs = {
           'dataset_id': self.id,
-          'app_id': self.app_id,
-          'user_id': self.user_id,
           'version': resources_pb2.DatasetVersion(**dataset_version_info),
-          'base_url': self.base,
-          'pat': self.pat
       }
-      yield Dataset(**kwargs)
+      yield Dataset.from_auth_helper(self.auth_helper, **kwargs)
   def _concurrent_annot_upload(self, annots: List[List[resources_pb2.Annotation]]
                               ) -> Union[List[resources_pb2.Annotation], List[None]]:
@@ -194,13 +192,17 @@ class Dataset(Lister, BaseClient):
     return retry_annot_upload
-  def _delete_failed_inputs(self, batch_input_ids: List[int],
-                            dataset_obj: ClarifaiDatasetType) -> Tuple[List[int], List[int]]:
+  def _delete_failed_inputs(
+      self,
+      batch_input_ids: List[int],
+      dataset_obj: ClarifaiDatasetType,
+      upload_response: MultiInputResponse = None) -> Tuple[List[int], List[int]]:
     """Delete failed input ids from clarifai platform dataset.
     Args:
       batch_input_ids: batch input ids
       dataset_obj: ClarifaiDataset object
+      upload_response: upload response proto
     Returns:
       success_inputs: upload success input ids
@@ -220,7 +222,19 @@ class Dataset(Lister, BaseClient):
     success_inputs = response_dict.get('inputs', [])
     success_input_ids = [input.get('id') for input in success_inputs]
-    failed_input_ids = list(set(input_ids) - set(success_input_ids))
+    failed_input_ids = list(set(input_ids) - set(success_input_ids.copy()))
+    #check duplicate input ids
+    duplicate_input_ids = [
+        input.id for input in upload_response.inputs
+        if input.status.details == 'Input has a duplicate ID.'
+    ]  #handling duplicte ID failures.
+    if duplicate_input_ids:
+      success_input_ids = list(set(success_input_ids.copy()) - set(duplicate_input_ids.copy()))
+      failed_input_ids = list(set(failed_input_ids) - set(duplicate_input_ids))
+      self.logger.warning(
+          f"Upload Failed for {len(duplicate_input_ids)} inputs in current batch: Duplicate input ids: {duplicate_input_ids}"
+      )
     #delete failed inputs
     self._grpc_request(
         self.STUB.DeleteInputs,
@@ -228,8 +242,9 @@ class Dataset(Lister, BaseClient):
     )
     return [input_ids[id] for id in success_input_ids], [input_ids[id] for id in failed_input_ids]
-  def _upload_inputs_annotations(self, batch_input_ids: List[int], dataset_obj: ClarifaiDatasetType
-                                ) -> Tuple[List[int], List[resources_pb2.Annotation]]:
+  def _upload_inputs_annotations(
+      self, batch_input_ids: List[int], dataset_obj: ClarifaiDatasetType
+  ) -> Tuple[List[int], List[resources_pb2.Annotation], MultiInputResponse]:
     """Uploads batch of inputs and annotations concurrently to clarifai platform dataset.
     Args:
@@ -239,20 +254,22 @@ class Dataset(Lister, BaseClient):
     Returns:
       failed_input_ids: failed input ids
       retry_annot_protos: failed annot protos
+      response: upload response proto
     """
     input_protos, _ = dataset_obj.get_protos(batch_input_ids)
-    input_job_id = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
+    input_job_id, _response = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
     retry_annot_protos = []
     self.input_object._wait_for_inputs(input_job_id)
-    success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj)
+    success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj,
+                                                                     _response)
-    if self.task in ["visual_detection", "visual_segmentation"]:
+    if self.task in ["visual_detection", "visual_segmentation"] and success_input_ids:
       _, annotation_protos = dataset_obj.get_protos(success_input_ids)
       chunked_annotation_protos = Chunker(annotation_protos, self.batch_size).chunk()
       retry_annot_protos.extend(self._concurrent_annot_upload(chunked_annotation_protos))
-    return failed_input_ids, retry_annot_protos
+    return failed_input_ids, retry_annot_protos, _response
   def _retry_uploads(self, failed_input_ids: List[int],
                      retry_annot_protos: List[resources_pb2.Annotation],
@@ -265,7 +282,25 @@ class Dataset(Lister, BaseClient):
       dataset_obj: ClarifaiDataset object
     """
     if failed_input_ids:
-      self._upload_inputs_annotations(failed_input_ids, dataset_obj)
+      retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
+      #Log Retrying inputs
+      self.logger.warning(
+          f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}"
+      )
+      failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
+          failed_input_ids, dataset_obj)
+      #Log failed inputs
+      if failed_retrying_inputs:
+        failed_retrying_input_ids = [
+            dataset_obj.all_input_ids[id] for id in failed_retrying_inputs
+        ]
+        failed_inputs_logs = {
+            input.id: input.status.details
+            for input in retry_response.inputs if input.id in failed_retrying_input_ids
+        }
+        self.logger.warning(
+            f"Failed to upload {len(failed_retrying_inputs)} inputs in current batch: {failed_inputs_logs}"
+        )
     if retry_annot_protos:
       chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
       _ = self._concurrent_annot_upload(chunked_annotation_protos)
@@ -287,21 +322,27 @@ class Dataset(Lister, BaseClient):
         ]
         for job in as_completed(futures):
-          retry_input_ids, retry_annot_protos = job.result()
+          retry_input_ids, retry_annot_protos, _ = job.result()
           self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj)
           progress.update()
   def upload_dataset(self,
                      dataloader: Type[ClarifaiDataLoader],
                      batch_size: int = 32,
-                     get_upload_status: bool = False) -> None:
+                     get_upload_status: bool = False,
+                     log_warnings: bool = False) -> None:
     """Uploads a dataset to the app.
     Args:
       dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
       batch_size (int): batch size for concurrent upload of inputs and annotations (max: 128)
       get_upload_status (bool): True if you want to get the upload status of the dataset
+      log_warnings (bool): True if you want to save log warnings in a file
     """
+    #add file handler to log warnings
+    if log_warnings:
+      add_file_handler(self.logger, f"Dataset_Upload{str(int(datetime.now().timestamp()))}.log")
+    #set batch size and task
     self.batch_size = min(self.batch_size, batch_size)
     self.task = dataloader.task
     if self.task not in DATASET_UPLOAD_TASKS:
@@ -321,10 +362,13 @@ class Dataset(Lister, BaseClient):
     else:  # visual_classification & visual_captioning
       dataset_obj = VisualClassificationDataset(dataloader, self.id)
+    if get_upload_status:
+      pre_upload_stats = self.get_upload_status(pre_upload=True)
     self._data_upload(dataset_obj)
     if get_upload_status:
-      self.get_upload_status(dataloader)
+      self.get_upload_status(dataloader=dataloader, pre_upload_stats=pre_upload_stats)
   def upload_from_csv(self,
                       csv_path: str,
@@ -398,16 +442,21 @@ class Dataset(Lister, BaseClient):
           folder_path=folder_path, dataset_id=self.id, labels=labels)
     self.input_object._bulk_upload(inputs=input_protos, batch_size=batch_size)
-  def get_upload_status(self,
-                        dataloader: Type[ClarifaiDataLoader],
-                        delete_version: bool = False,
-                        timeout: int = 600) -> None:
+  def get_upload_status(
+      self,
+      dataloader: Type[ClarifaiDataLoader] = None,
+      delete_version: bool = False,
+      timeout: int = 600,
+      pre_upload_stats: Tuple[Dict[str, int], Dict[str, int]] = None,
+      pre_upload: bool = False) -> Optional[Tuple[Dict[str, int], Dict[str, int]]]:
     """Creates a new dataset version and displays the upload status of the dataset.
     Args:
         dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
         delete_version (bool): True if you want to delete the version after getting the upload status
         timeout (int): Timeout in seconds for getting the upload status. Default is 600 seconds.
+        pre_upload_stats (Tuple[Dict[str, int], Dict[str, int]]): The pre upload stats for the dataset.
+        pre_upload (bool): True if you want to get the pre upload stats for the dataset.
     Example:
         >>> from clarifai.client.dataset import Dataset
@@ -450,9 +499,12 @@ class Dataset(Lister, BaseClient):
           raise UserError(
               "Dataset metrics are taking too long to process. Please try again later.")
         break
+    #get pre upload stats
+    if pre_upload:
+      return DisplayUploadStatus.get_dataset_version_stats(dataset_metrics_response)
     dataset_info_dict = dict(user_id=self.user_id, app_id=self.app_id, dataset_id=self.id)
-    DisplayUploadStatus(dataloader, dataset_metrics_response, dataset_info_dict)
+    DisplayUploadStatus(dataloader, dataset_metrics_response, dataset_info_dict, pre_upload_stats)
     if delete_version:
       self.delete_version(dataset_version_id)

clarifai/client/input.py CHANGED Viewed

@@ -32,6 +32,7 @@ class Inputs(Lister, BaseClient):
                logger_level: str = "INFO",
                base_url: str = "https://api.clarifai.com",
                pat: str = None,
+               token: str = None,
                **kwargs):
     """Initializes an Input object.
@@ -39,6 +40,8 @@ class Inputs(Lister, BaseClient):
         user_id (str): A user ID for authentication.
         app_id (str): An app ID for the application to interact with.
         base_url (str): Base API url. Default "https://api.clarifai.com"
+        pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
+        token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
         **kwargs: Additional keyword arguments to be passed to the Input
     """
     self.user_id = user_id
@@ -46,7 +49,8 @@ class Inputs(Lister, BaseClient):
     self.kwargs = {**kwargs}
     self.input_info = resources_pb2.Input(**self.kwargs)
     self.logger = get_logger(logger_level=logger_level, name=__name__)
-    BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
+    BaseClient.__init__(
+        self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
     Lister.__init__(self)
   @staticmethod
@@ -660,15 +664,39 @@ class Inputs(Lister, BaseClient):
         user_app_id=self.user_app_id, inputs=inputs, inputs_add_job_id=input_job_id)
     response = self._grpc_request(self.STUB.PostInputs, request)
     if response.status.code != status_code_pb2.SUCCESS:
-      try:
-        self.logger.warning(response.inputs[0].status)
-      except IndexError:
-        self.logger.warning(response.status)
+      if show_log:
+        self.logger.warning(response)
+      else:
+        return input_job_id, response
     else:
       if show_log:
         self.logger.info("\nInputs Uploaded\n%s", response.status)
-    return input_job_id
+    return input_job_id, response
+  def patch_inputs(self, inputs: List[Input], action: str = 'merge') -> str:
+    """Patch list of input objects to the app.
+    Args:
+        inputs (list): List of input objects to upload.
+        action (str): Action to perform on the input. Options: 'merge', 'overwrite', 'remove'.
+    Returns:
+        response: Response from the grpc request.
+    """
+    if not isinstance(inputs, list):
+      raise UserError("inputs must be a list of Input objects")
+    uuid.uuid4().hex  # generate a unique id for this job
+    request = service_pb2.PatchInputsRequest(
+        user_app_id=self.user_app_id, inputs=inputs, action=action)
+    response = self._grpc_request(self.STUB.PatchInputs, request)
+    if response.status.code != status_code_pb2.SUCCESS:
+      try:
+        self.logger.warning(f"Patch inputs failed, status: {response.annotations[0].status}")
+      except Exception:
+        self.logger.warning(f"Patch inputs failed, status: {response.status.details}")
+    self.logger.info("\nPatch Inputs Successful\n%s", response.status)
   def upload_annotations(self, batch_annot: List[resources_pb2.Annotation], show_log: bool = True
                         ) -> Union[List[resources_pb2.Annotation], List[None]]:
@@ -705,7 +733,7 @@ class Inputs(Lister, BaseClient):
     Returns:
         input_job_id: job id for the upload request.
     """
-    input_job_id = self.upload_inputs(inputs, False)
+    input_job_id, _ = self.upload_inputs(inputs, False)
     self._wait_for_inputs(input_job_id)
     failed_inputs = self._delete_failed_inputs(inputs)

clarifai 10.0.1__py3-none-any.whl → 10.1.1__py3-none-any.whl

clarifai 10.0.1py3-none-any.whl → 10.1.1py3-none-any.whl