PyPI - clarifai - Versions diffs - 10.1.0__py3-none-any.whl → 10.1.1__py3-none-any.whl - Mend

clarifai 10.1.0py3-none-any.whl → 10.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

clarifai/client/app.py +23 -43
clarifai/client/base.py +44 -4
clarifai/client/dataset.py +8 -12
clarifai/client/input.py +29 -1
clarifai/client/model.py +191 -10
clarifai/client/module.py +7 -5
clarifai/client/runner.py +3 -1
clarifai/client/search.py +6 -3
clarifai/client/user.py +14 -12
clarifai/client/workflow.py +7 -4
clarifai/datasets/upload/loaders/README.md +3 -4
clarifai/datasets/upload/loaders/xview_detection.py +5 -5
clarifai/rag/rag.py +25 -11
clarifai/rag/utils.py +21 -6
clarifai/utils/evaluation/__init__.py +427 -0
clarifai/utils/evaluation/helpers.py +522 -0
clarifai/utils/model_train.py +3 -1
clarifai/versions.py +1 -1
{clarifai-10.1.0.dist-info → clarifai-10.1.1.dist-info}/METADATA +32 -7
{clarifai-10.1.0.dist-info → clarifai-10.1.1.dist-info}/RECORD +24 -23
clarifai/datasets/upload/loaders/coco_segmentation.py +0 -98
{clarifai-10.1.0.dist-info → clarifai-10.1.1.dist-info}/LICENSE +0 -0
{clarifai-10.1.0.dist-info → clarifai-10.1.1.dist-info}/WHEEL +0 -0
{clarifai-10.1.0.dist-info → clarifai-10.1.1.dist-info}/entry_points.txt +0 -0
{clarifai-10.1.0.dist-info → clarifai-10.1.1.dist-info}/top_level.txt +0 -0

clarifai/client/module.py CHANGED Viewed

@@ -18,6 +18,7 @@ class Module(Lister, BaseClient):
                module_version: Dict = {'id': ""},
                base_url: str = "https://api.clarifai.com",
                pat: str = None,
+               token: str = None,
                **kwargs):
     """Initializes a Module object.
@@ -26,7 +27,8 @@ class Module(Lister, BaseClient):
             module_id (str): The Module ID to interact with.
             module_version (dict): The Module Version to interact with.
             base_url (str): Base API url. Default "https://api.clarifai.com"
-            pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
+            pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT.
+            token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN.
             **kwargs: Additional keyword arguments to be passed to the Module.
         """
     if url and module_id:
@@ -41,7 +43,8 @@ class Module(Lister, BaseClient):
     self.kwargs = {**kwargs, 'id': module_id, 'module_version': module_version}
     self.module_info = resources_pb2.Module(**self.kwargs)
     self.logger = get_logger(logger_level="INFO", name=__name__)
-    BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
+    BaseClient.__init__(
+        self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
     Lister.__init__(self)
   def list_versions(self, page_no: int = None,
@@ -78,10 +81,9 @@ class Module(Lister, BaseClient):
     for module_version_info in all_module_versions_info:
       module_version_info['id'] = module_version_info['module_version_id']
       del module_version_info['module_version_id']
-      yield Module(
+      yield Module.from_auth_helper(
+          self.auth_helper,
           module_id=self.id,
-          base_url=self.base,
-          pat=self.pat,
           **dict(self.kwargs, module_version=module_version_info))
   def __getattr__(self, name):

clarifai/client/runner.py CHANGED Viewed

@@ -39,6 +39,7 @@ class Runner(BaseClient):
                check_runner_exists: bool = True,
                base_url: str = "https://api.clarifai.com",
                pat: str = None,
+               token: str = None,
                num_parallel_polls: int = 4,
                **kwargs) -> None:
     """
@@ -47,6 +48,7 @@ class Runner(BaseClient):
       user_id (str): Clarifai User ID
       base_url (str): Base API url. Default "https://api.clarifai.com"
       pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
+      token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
       num_parallel_polls (int): the max number of threads for parallel run loops to be fetching work from
     """
     user_id = user_id or os.environ.get("CLARIFAI_USER_ID", "")
@@ -60,7 +62,7 @@ class Runner(BaseClient):
     self.kwargs = {**kwargs, 'id': runner_id, 'user_id': user_id}
     self.runner_info = resources_pb2.Runner(**self.kwargs)
     self.num_parallel_polls = min(10, num_parallel_polls)
-    BaseClient.__init__(self, user_id=self.user_id, app_id="", base=base_url, pat=pat)
+    BaseClient.__init__(self, user_id=self.user_id, app_id="", base=base_url, pat=pat, token=token)
     # Check that the runner exists.
     if check_runner_exists:

clarifai/client/search.py CHANGED Viewed

@@ -23,7 +23,8 @@ class Search(Lister, BaseClient):
                top_k: int = DEFAULT_TOP_K,
                metric: str = DEFAULT_SEARCH_METRIC,
                base_url: str = "https://api.clarifai.com",
-               pat: str = None):
+               pat: str = None,
+               token: str = None):
     """Initialize the Search object.
     Args:
@@ -33,6 +34,7 @@ class Search(Lister, BaseClient):
         metric (str, optional): Similarity metric (either 'cosine' or 'euclidean'). Defaults to 'cosine'.
         base_url (str, optional): Base API url. Defaults to "https://api.clarifai.com".
         pat (str, optional): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
+        token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
     Raises:
         UserError: If the metric is not 'cosine' or 'euclidean'.
@@ -46,9 +48,10 @@ class Search(Lister, BaseClient):
     self.data_proto = resources_pb2.Data()
     self.top_k = top_k
-    self.inputs = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat)
+    self.inputs = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat, token=token)
     self.rank_filter_schema = get_schema()
-    BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
+    BaseClient.__init__(
+        self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
     Lister.__init__(self, page_size=1000)
   def _get_annot_proto(self, **kwargs):

clarifai/client/user.py CHANGED Viewed

@@ -19,6 +19,7 @@ class User(Lister, BaseClient):
                user_id: str = None,
                base_url: str = "https://api.clarifai.com",
                pat: str = None,
+               token: str = None,
                **kwargs):
     """Initializes an User object.
@@ -26,12 +27,13 @@ class User(Lister, BaseClient):
         user_id (str): The user ID for the user to interact with.
         base_url (str): Base API url. Default "https://api.clarifai.com"
         pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
+        token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
         **kwargs: Additional keyword arguments to be passed to the User.
     """
     self.kwargs = {**kwargs, 'id': user_id}
     self.user_info = resources_pb2.User(**self.kwargs)
     self.logger = get_logger(logger_level="INFO", name=__name__)
-    BaseClient.__init__(self, user_id=self.id, app_id="", base=base_url, pat=pat)
+    BaseClient.__init__(self, user_id=self.id, app_id="", base=base_url, pat=pat, token=token)
     Lister.__init__(self)
   def list_apps(self, filter_by: Dict[str, Any] = {}, page_no: int = None,
@@ -62,7 +64,9 @@ class User(Lister, BaseClient):
         per_page=per_page,
         page_no=page_no)
     for app_info in all_apps_info:
-      yield App(base_url=self.base, pat=self.pat, **app_info)
+      yield App.from_auth_helper(
+          self.auth_helper,
+          **app_info)  #(base_url=self.base, pat=self.pat, token=self.token, **app_info)
   def list_runners(self, filter_by: Dict[str, Any] = {}, page_no: int = None,
                    per_page: int = None) -> Generator[Runner, None, None]:
@@ -94,7 +98,8 @@ class User(Lister, BaseClient):
         page_no=page_no)
     for runner_info in all_runners_info:
-      yield Runner(check_runner_exists=False, base_url=self.base, pat=self.pat, **runner_info)
+      yield Runner.from_auth_helper(
+          auth=self.auth_helper, check_runner_exists=False, **runner_info)
   def create_app(self, app_id: str, base_workflow: str = 'Empty', **kwargs) -> App:
     """Creates an app for the user.
@@ -120,8 +125,7 @@ class User(Lister, BaseClient):
     if response.status.code != status_code_pb2.SUCCESS:
       raise Exception(response.status)
     self.logger.info("\nApp created\n%s", response.status)
-    kwargs.update({'user_id': self.id, 'base_url': self.base, 'pat': self.pat})
-    return App(app_id=app_id, **kwargs)
+    return App.from_auth_helper(auth=self.auth_helper, app_id=app_id)
   def create_runner(self, runner_id: str, labels: List[str], description: str) -> Runner:
     """Create a runner
@@ -151,14 +155,13 @@ class User(Lister, BaseClient):
       raise Exception(response.status)
     self.logger.info("\nRunner created\n%s", response.status)
-    return Runner(
+    return Runner.from_auth_helper(
+        auth=self.auth_helper,
         runner_id=runner_id,
         user_id=self.id,
         labels=labels,
         description=description,
-        check_runner_exists=False,
-        base_url=self.base,
-        pat=self.pat)
+        check_runner_exists=False)
   def app(self, app_id: str, **kwargs) -> App:
     """Returns an App object for the specified app ID.
@@ -181,8 +184,7 @@ class User(Lister, BaseClient):
       raise Exception(response.status)
     kwargs['user_id'] = self.id
-    kwargs.update({'base_url': self.base, 'pat': self.pat})
-    return App(app_id=app_id, **kwargs)
+    return App.from_auth_helper(auth=self.auth_helper, app_id=app_id, **kwargs)
   def runner(self, runner_id: str) -> Runner:
     """Returns a Runner object if exists.
@@ -210,7 +212,7 @@ class User(Lister, BaseClient):
     kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
                                         list(dict_response.keys())[1])
-    return Runner(check_runner_exists=False, base_url=self.base, pat=self.pat, **kwargs)
+    return Runner.from_auth_helper(self.auth_helper, check_runner_exists=False, **kwargs)
   def delete_app(self, app_id: str) -> None:
     """Deletes an app for the user.

clarifai/client/workflow.py CHANGED Viewed

@@ -27,6 +27,7 @@ class Workflow(Lister, BaseClient):
                output_config: Dict = {'min_value': 0},
                base_url: str = "https://api.clarifai.com",
                pat: str = None,
+               token: str = None,
                **kwargs):
     """Initializes a Workflow object.
@@ -40,6 +41,8 @@ class Workflow(Lister, BaseClient):
           select_concepts (list[Concept]): The concepts to select.
           sample_ms (int): The number of milliseconds to sample.
         base_url (str): Base API url. Default "https://api.clarifai.com"
+        pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
+        token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
         **kwargs: Additional keyword arguments to be passed to the Workflow.
     """
     if url and workflow_id:
@@ -55,7 +58,8 @@ class Workflow(Lister, BaseClient):
     self.output_config = output_config
     self.workflow_info = resources_pb2.Workflow(**self.kwargs)
     self.logger = get_logger(logger_level="INFO", name=__name__)
-    BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
+    BaseClient.__init__(
+        self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
     Lister.__init__(self)
   def predict(self, inputs: List[Input], workflow_state_id: str = None):
@@ -206,10 +210,9 @@ class Workflow(Lister, BaseClient):
     for workflow_version_info in all_workflow_versions_info:
       workflow_version_info['id'] = workflow_version_info['workflow_version_id']
       del workflow_version_info['workflow_version_id']
-      yield Workflow(
+      yield Workflow.from_auth_helper(
+          auth=self.auth_helper,
           workflow_id=self.id,
-          base_url=self.base,
-          pat=self.pat,
           **dict(self.kwargs, version=workflow_version_info))
   def export(self, out_path: str):

clarifai/datasets/upload/loaders/README.md CHANGED Viewed

@@ -8,15 +8,15 @@ If a dataset module exists in the zoo, uploading the specific dataset can be eas
 ```python
 from clarifai.client.app import App
-from clarifai.datasets.upload.loaders.coco_segmentation import COCOSegmentationDataLoader
+from clarifai.datasets.upload.loaders.coco_detection import COCODetectionDataLoader
 app = App(app_id="", user_id="")
 # Create a dataset in Clarifai App
 dataset = app.create_dataset(dataset_id="")
 # instantiate dataloader object
-coco_seg_dataloader = COCOSegmentationDataLoader(images_dir="", label_filepath="")
+coco_det_dataloader = COCODetectionDataLoader(images_dir="", label_filepath="")
 # execute data upload to Clarifai app dataset
-dataset.upload_dataset(dataloader=coco_seg_dataloader)
+dataset.upload_dataset(dataloader=coco_det_dataloader)
 ```
 ## Dataset Loaders
@@ -24,7 +24,6 @@ dataset.upload_dataset(dataloader=coco_seg_dataloader)
  | dataset name | task | module name (.py)
  | --- | --- | ---
  | [COCO 2017](https://cocodataset.org/#download) | Detection | `coco_detection` |
- |        | Segmentation | `coco_segmentation` |
  |       | Captions | `coco_captions` |
  |[xVIEW](http://xviewdataset.org/)  | Detection | `xview_detection` |
  | [ImageNet](https://www.image-net.org/)  | Classification | `imagenet_classification` |

clarifai/datasets/upload/loaders/xview_detection.py CHANGED Viewed

@@ -6,7 +6,7 @@ from concurrent.futures import ThreadPoolExecutor
 from multiprocessing import cpu_count
 from typing import DefaultDict, Dict, List
-import cv2
+from PIL import Image
 from tqdm import tqdm
 from clarifai.datasets.upload.base import ClarifaiDataLoader
@@ -54,9 +54,8 @@ class xviewDetectionDataLoader(ClarifaiDataLoader):
   def compress_tiff(self, img_path: str) -> None:
     """Compress tiff image"""
     img_comp_path = os.path.join(self.img_comp_dir, os.path.basename(img_path))
-    img_arr = cv2.imread(img_path)
-    cv2.imwrite(
-        img_comp_path, img_arr, params=(cv2.IMWRITE_TIFF_COMPRESSION, 8))  # 8: Adobe Deflate
+    img_arr = Image.open(img_path)
+    img_arr.save(img_comp_path, 'TIFF', compression='tiff_deflate')
   def preprocess(self):
     """Compress the tiff images to comply with clarifai grpc image encoding limit(<20MB) Uses ADOBE_DEFLATE compression algorithm"""
@@ -133,7 +132,8 @@ class xviewDetectionDataLoader(ClarifaiDataLoader):
     _id = os.path.splitext(os.path.basename(self.image_paths[index]))[0]
     image_path = self.image_paths[index]
-    image_height, image_width = cv2.imread(image_path).shape[:2]
+    image = Image.open(image_path)
+    image_width, image_height = image.size
     annots = []
     class_names = []
     for bbox, concept in zip(self.all_data[_id]['bboxes'], self.all_data[_id]['concepts']):

clarifai/rag/rag.py CHANGED Viewed

@@ -76,16 +76,17 @@ class RAG:
         >>> rag_agent = RAG.setup(app_url=YOUR_APP_URL)
         >>> rag_agent.chat(messages=[{"role":"human", "content":"What is Clarifai"}])
     """
+    now_ts = str(int(datetime.now().timestamp()))
     if user_id and not app_url:
       user = User(user_id=user_id, base_url=base_url, pat=pat)
       ## Create an App
-      now_ts = str(int(datetime.now().timestamp()))
       app_id = f"rag_app_{now_ts}"
       app = user.create_app(app_id=app_id, base_workflow=base_workflow)
     if not user_id and app_url:
       app = App(url=app_url, pat=pat)
+      uid = app_url.split(".com/")[1].split("/")[0]
+      user = User(user_id=uid, base_url=base_url, pat=pat)
     if user_id and app_url:
       raise UserError("Must provide one of user_id or app_url, not both.")
@@ -95,7 +96,7 @@ class RAG:
           "user_id or app_url must be provided. The user_id can be found at https://clarifai.com/settings."
       )
-    llm = Model(llm_url)
+    llm = Model(url=llm_url, pat=pat)
     min_score = kwargs.get("min_score", 0.95)
     max_results = kwargs.get("max_results", 5)
@@ -109,8 +110,8 @@ class RAG:
     prompter_model_params = {"params": params}
     ## Create rag-prompter model and version
-    prompter_model = app.create_model(
-        model_id=f"rag_prompter_{now_ts}", model_type_id="rag-prompter")
+    model_id = f"prompter-{workflow_id}" if workflow_id is not None else f"rag-prompter-{now_ts}"
+    prompter_model = app.create_model(model_id=model_id, model_type_id="rag-prompter")
     prompter_model = prompter_model.create_version(output_info=prompter_model_params)
     ## Generate a tmp yaml file for workflow creation
@@ -153,6 +154,8 @@ class RAG:
              batch_size: int = 128,
              chunk_size: int = 1024,
              chunk_overlap: int = 200,
+             dataset_id: str = None,
+             metadata: dict = None,
              **kwargs) -> None:
     """Uploads documents to the app.
         - Read from a local directory or public url or local filename.
@@ -192,14 +195,15 @@ class RAG:
     #splitting documents into chunks
     text_chunks = []
-    metadata = []
+    metadata_list = []
     #iterate through documents
     for doc in documents:
+      doc_i = 0
       cur_text_chunks = split_document(
           text=doc.text, chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
       text_chunks.extend(cur_text_chunks)
-      metadata.extend([doc.metadata for _ in range(len(cur_text_chunks))])
+      metadata_list.extend([doc.metadata for _ in range(len(cur_text_chunks))])
       #if batch size is reached, upload the batch
       if len(text_chunks) > batch_size:
         for idx in range(0, len(text_chunks), batch_size):
@@ -208,18 +212,23 @@ class RAG:
           batch_texts = text_chunks[0:batch_size]
           batch_ids = [uuid.uuid4().hex for _ in range(batch_size)]
           #metadata
-          batch_metadatas = metadata[0:batch_size]
+          batch_metadatas = metadata_list[0:batch_size]
           meta_list = []
           for meta in batch_metadatas:
             meta_struct = Struct()
             meta_struct.update(meta)
+            meta_struct.update({"doc_chunk_no": doc_i})
+            if metadata and isinstance(metadata, dict):
+              meta_struct.update(metadata)
             meta_list.append(meta_struct)
+            doc_i += 1
           del batch_metadatas
           #creating input proto
           input_batch = [
               self._app.inputs().get_text_input(
                   input_id=batch_ids[i],
                   raw_text=text,
+                  dataset_id=dataset_id,
                   metadata=meta_list[i],
               ) for i, text in enumerate(batch_texts)
           ]
@@ -227,32 +236,37 @@ class RAG:
           self._app.inputs().upload_inputs(inputs=input_batch)
           #delete uploaded chunks
           del text_chunks[0:batch_size]
-          del metadata[0:batch_size]
+          del metadata_list[0:batch_size]
     #uploading the remaining chunks
     if len(text_chunks) > 0:
       batch_size = len(text_chunks)
       batch_ids = [uuid.uuid4().hex for _ in range(batch_size)]
       #metadata
-      batch_metadatas = metadata[0:batch_size]
+      batch_metadatas = metadata_list[0:batch_size]
       meta_list = []
       for meta in batch_metadatas:
         meta_struct = Struct()
         meta_struct.update(meta)
+        meta_struct.update({"doc_chunk_no": doc_i})
+        if metadata and isinstance(metadata, dict):
+          meta_struct.update(metadata)
         meta_list.append(meta_struct)
+        doc_i += 1
       del batch_metadatas
       #creating input proto
       input_batch = [
           self._app.inputs().get_text_input(
               input_id=batch_ids[i],
               raw_text=text,
+              dataset_id=dataset_id,
               metadata=meta_list[i],
           ) for i, text in enumerate(text_chunks)
       ]
       #uploading input with metadata
       self._app.inputs().upload_inputs(inputs=input_batch)
       del text_chunks
-      del metadata
+      del metadata_list
   def chat(self, messages: List[dict], client_manage_state: bool = False) -> List[dict]:
     """Chat interface in OpenAI API format.

clarifai/rag/utils.py CHANGED Viewed

@@ -3,10 +3,6 @@ from pathlib import Path
 from typing import List
 import requests
-from llama_index.core import Document, SimpleDirectoryReader
-from llama_index.core.node_parser.text import SentenceSplitter
-from llama_index.core.readers.download import download_loader
-from pypdf import PdfReader
 ## TODO: Make this token-aware.
@@ -36,8 +32,7 @@ def format_assistant_message(raw_text: str) -> dict:
   return {"role": "assistant", "content": raw_text}
-def load_documents(file_path: str = None, folder_path: str = None,
-                   url: str = None) -> List[Document]:
+def load_documents(file_path: str = None, folder_path: str = None, url: str = None) -> List[any]:
   """Loads documents from a local directory or public url or local filename.
   Args:
@@ -45,6 +40,13 @@ def load_documents(file_path: str = None, folder_path: str = None,
       folder_path (str): The path to the folder.
       url (str): The url to the file.
   """
+  #check import packages
+  try:
+    from llama_index.core import Document, SimpleDirectoryReader
+    from llama_index.core.readers.download import download_loader
+  except ImportError:
+    raise ImportError("Could not import llama index package. "
+                      "Please install it with `pip install llama-index-core==0.10.1`.")
   #document loaders for filepath
   if file_path:
     if file_path.endswith(".pdf"):
@@ -77,6 +79,12 @@ def load_documents(file_path: str = None, folder_path: str = None,
       documents = [Document(text=response.content)]
     #for pdf files
     except Exception:
+      #check import packages
+      try:
+        from pypdf import PdfReader
+      except ImportError:
+        raise ImportError("Could not import pypdf package. "
+                          "Please install it with `pip install pypdf==3.17.4`.")
       documents = []
       pdf_file = PdfReader(io.BytesIO(response.content))
       num_pages = len(pdf_file.pages)
@@ -98,6 +106,13 @@ def split_document(text: str, chunk_size: int, chunk_overlap: int, **kwargs) ->
       chunk_overlap (int): The amount of overlap between each chunk.
       **kwargs: Additional keyword arguments for the SentenceSplitter.
   """
+  #check import packages
+  try:
+    from llama_index.core.node_parser.text import SentenceSplitter
+  except ImportError:
+    raise ImportError("Could not import llama index package. "
+                      "Please install it with `pip install llama-index-core==0.10.1`.")
+  #document
   text_parser = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
   text_chunks = text_parser.split_text(text)
   return text_chunks

clarifai 10.1.0__py3-none-any.whl → 10.1.1__py3-none-any.whl

clarifai 10.1.0py3-none-any.whl → 10.1.1py3-none-any.whl