PyPI - clarifai - Versions diffs - 10.1.0__py3-none-any.whl → 10.2.0__py3-none-any.whl - Mend

clarifai 10.1.0py3-none-any.whl → 10.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

clarifai/client/app.py +23 -43
clarifai/client/base.py +44 -4
clarifai/client/dataset.py +138 -52
clarifai/client/input.py +37 -4
clarifai/client/model.py +279 -8
clarifai/client/module.py +7 -5
clarifai/client/runner.py +3 -1
clarifai/client/search.py +7 -3
clarifai/client/user.py +14 -12
clarifai/client/workflow.py +7 -4
clarifai/constants/dataset.py +2 -0
clarifai/datasets/upload/loaders/README.md +3 -4
clarifai/datasets/upload/loaders/xview_detection.py +5 -5
clarifai/models/model_serving/cli/_utils.py +1 -1
clarifai/models/model_serving/cli/build.py +1 -1
clarifai/models/model_serving/cli/upload.py +1 -1
clarifai/models/model_serving/utils.py +3 -1
clarifai/rag/rag.py +25 -11
clarifai/rag/utils.py +21 -6
clarifai/utils/evaluation/__init__.py +427 -0
clarifai/utils/evaluation/helpers.py +522 -0
clarifai/utils/logging.py +30 -0
clarifai/utils/model_train.py +3 -1
clarifai/versions.py +1 -1
clarifai/workflows/validate.py +1 -1
{clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/METADATA +46 -9
{clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/RECORD +31 -30
clarifai/datasets/upload/loaders/coco_segmentation.py +0 -98
{clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/LICENSE +0 -0
{clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/WHEEL +0 -0
{clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/entry_points.txt +0 -0
{clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/top_level.txt +0 -0

clarifai/datasets/upload/loaders/README.md CHANGED Viewed

@@ -8,15 +8,15 @@ If a dataset module exists in the zoo, uploading the specific dataset can be eas
 ```python
 from clarifai.client.app import App
-from clarifai.datasets.upload.loaders.coco_segmentation import COCOSegmentationDataLoader
+from clarifai.datasets.upload.loaders.coco_detection import COCODetectionDataLoader
 app = App(app_id="", user_id="")
 # Create a dataset in Clarifai App
 dataset = app.create_dataset(dataset_id="")
 # instantiate dataloader object
-coco_seg_dataloader = COCOSegmentationDataLoader(images_dir="", label_filepath="")
+coco_det_dataloader = COCODetectionDataLoader(images_dir="", label_filepath="")
 # execute data upload to Clarifai app dataset
-dataset.upload_dataset(dataloader=coco_seg_dataloader)
+dataset.upload_dataset(dataloader=coco_det_dataloader)
 ```
 ## Dataset Loaders
@@ -24,7 +24,6 @@ dataset.upload_dataset(dataloader=coco_seg_dataloader)
  | dataset name | task | module name (.py)
  | --- | --- | ---
  | [COCO 2017](https://cocodataset.org/#download) | Detection | `coco_detection` |
- |        | Segmentation | `coco_segmentation` |
  |       | Captions | `coco_captions` |
  |[xVIEW](http://xviewdataset.org/)  | Detection | `xview_detection` |
  | [ImageNet](https://www.image-net.org/)  | Classification | `imagenet_classification` |

clarifai/datasets/upload/loaders/xview_detection.py CHANGED Viewed

@@ -6,7 +6,7 @@ from concurrent.futures import ThreadPoolExecutor
 from multiprocessing import cpu_count
 from typing import DefaultDict, Dict, List
-import cv2
+from PIL import Image
 from tqdm import tqdm
 from clarifai.datasets.upload.base import ClarifaiDataLoader
@@ -54,9 +54,8 @@ class xviewDetectionDataLoader(ClarifaiDataLoader):
   def compress_tiff(self, img_path: str) -> None:
     """Compress tiff image"""
     img_comp_path = os.path.join(self.img_comp_dir, os.path.basename(img_path))
-    img_arr = cv2.imread(img_path)
-    cv2.imwrite(
-        img_comp_path, img_arr, params=(cv2.IMWRITE_TIFF_COMPRESSION, 8))  # 8: Adobe Deflate
+    img_arr = Image.open(img_path)
+    img_arr.save(img_comp_path, 'TIFF', compression='tiff_deflate')
   def preprocess(self):
     """Compress the tiff images to comply with clarifai grpc image encoding limit(<20MB) Uses ADOBE_DEFLATE compression algorithm"""
@@ -133,7 +132,8 @@ class xviewDetectionDataLoader(ClarifaiDataLoader):
     _id = os.path.splitext(os.path.basename(self.image_paths[index]))[0]
     image_path = self.image_paths[index]
-    image_height, image_width = cv2.imread(image_path).shape[:2]
+    image = Image.open(image_path)
+    image_width, image_height = image.size
     annots = []
     class_names = []
     for bbox, concept in zip(self.all_data[_id]['bboxes'], self.all_data[_id]['concepts']):

clarifai/models/model_serving/cli/_utils.py CHANGED Viewed

@@ -11,7 +11,7 @@ from ..constants import (CLARIFAI_EXAMPLES_REPO, CLARIFAI_EXAMPLES_REPO_PATH,
 def download_examples_repo(forced_download: bool = False):
   def _pull():
-    subprocess.run(f"git clone {CLARIFAI_EXAMPLES_REPO} {CLARIFAI_EXAMPLES_REPO_PATH}")
+    subprocess.run(f"git clone {CLARIFAI_EXAMPLES_REPO} {CLARIFAI_EXAMPLES_REPO_PATH}", shell=True)
   if not os.path.isdir(CLARIFAI_EXAMPLES_REPO_PATH):
     print(f"Download examples to {CLARIFAI_EXAMPLES_REPO_PATH}")

clarifai/models/model_serving/cli/build.py CHANGED Viewed

@@ -70,7 +70,7 @@ class BuildModelSubCli(BaseClarifaiCli):
     if not self.no_test:
       assert os.path.exists(
           self.test_path), FileNotFoundError(f"Could not find `test.py` in {self.path}")
-      result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}")
+      result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
       assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
     # build

clarifai/models/model_serving/cli/upload.py CHANGED Viewed

@@ -126,7 +126,7 @@ class UploadModelSubCli(BaseClarifaiCli):
     # Run test before uploading
     if not self.no_test:
       assert os.path.exists(self.test_path), FileNotFoundError(f"Not found {self.test_path}")
-      result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}")
+      result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
       assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
     deploy(

clarifai/models/model_serving/utils.py CHANGED Viewed

@@ -18,4 +18,6 @@ def _read_pat():
 def login(pat=None):
   """ if pat provided, set pat to CLARIFAI_PAT otherwise read pat from file"""
-  os.environ["CLARIFAI_PAT"] = pat or _read_pat()
+  pat = pat or _read_pat()
+  assert pat, Exception("PAT is not found, please run `clarifai login` to persist your PAT")
+  os.environ["CLARIFAI_PAT"] = pat

clarifai/rag/rag.py CHANGED Viewed

@@ -76,16 +76,17 @@ class RAG:
         >>> rag_agent = RAG.setup(app_url=YOUR_APP_URL)
         >>> rag_agent.chat(messages=[{"role":"human", "content":"What is Clarifai"}])
     """
+    now_ts = str(int(datetime.now().timestamp()))
     if user_id and not app_url:
       user = User(user_id=user_id, base_url=base_url, pat=pat)
       ## Create an App
-      now_ts = str(int(datetime.now().timestamp()))
       app_id = f"rag_app_{now_ts}"
       app = user.create_app(app_id=app_id, base_workflow=base_workflow)
     if not user_id and app_url:
       app = App(url=app_url, pat=pat)
+      uid = app_url.split(".com/")[1].split("/")[0]
+      user = User(user_id=uid, base_url=base_url, pat=pat)
     if user_id and app_url:
       raise UserError("Must provide one of user_id or app_url, not both.")
@@ -95,7 +96,7 @@ class RAG:
           "user_id or app_url must be provided. The user_id can be found at https://clarifai.com/settings."
       )
-    llm = Model(llm_url)
+    llm = Model(url=llm_url, pat=pat)
     min_score = kwargs.get("min_score", 0.95)
     max_results = kwargs.get("max_results", 5)
@@ -109,8 +110,8 @@ class RAG:
     prompter_model_params = {"params": params}
     ## Create rag-prompter model and version
-    prompter_model = app.create_model(
-        model_id=f"rag_prompter_{now_ts}", model_type_id="rag-prompter")
+    model_id = f"prompter-{workflow_id}" if workflow_id is not None else f"rag-prompter-{now_ts}"
+    prompter_model = app.create_model(model_id=model_id, model_type_id="rag-prompter")
     prompter_model = prompter_model.create_version(output_info=prompter_model_params)
     ## Generate a tmp yaml file for workflow creation
@@ -153,6 +154,8 @@ class RAG:
              batch_size: int = 128,
              chunk_size: int = 1024,
              chunk_overlap: int = 200,
+             dataset_id: str = None,
+             metadata: dict = None,
              **kwargs) -> None:
     """Uploads documents to the app.
         - Read from a local directory or public url or local filename.
@@ -192,14 +195,15 @@ class RAG:
     #splitting documents into chunks
     text_chunks = []
-    metadata = []
+    metadata_list = []
     #iterate through documents
     for doc in documents:
+      doc_i = 0
       cur_text_chunks = split_document(
           text=doc.text, chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
       text_chunks.extend(cur_text_chunks)
-      metadata.extend([doc.metadata for _ in range(len(cur_text_chunks))])
+      metadata_list.extend([doc.metadata for _ in range(len(cur_text_chunks))])
       #if batch size is reached, upload the batch
       if len(text_chunks) > batch_size:
         for idx in range(0, len(text_chunks), batch_size):
@@ -208,18 +212,23 @@ class RAG:
           batch_texts = text_chunks[0:batch_size]
           batch_ids = [uuid.uuid4().hex for _ in range(batch_size)]
           #metadata
-          batch_metadatas = metadata[0:batch_size]
+          batch_metadatas = metadata_list[0:batch_size]
           meta_list = []
           for meta in batch_metadatas:
             meta_struct = Struct()
             meta_struct.update(meta)
+            meta_struct.update({"doc_chunk_no": doc_i})
+            if metadata and isinstance(metadata, dict):
+              meta_struct.update(metadata)
             meta_list.append(meta_struct)
+            doc_i += 1
           del batch_metadatas
           #creating input proto
           input_batch = [
               self._app.inputs().get_text_input(
                   input_id=batch_ids[i],
                   raw_text=text,
+                  dataset_id=dataset_id,
                   metadata=meta_list[i],
               ) for i, text in enumerate(batch_texts)
           ]
@@ -227,32 +236,37 @@ class RAG:
           self._app.inputs().upload_inputs(inputs=input_batch)
           #delete uploaded chunks
           del text_chunks[0:batch_size]
-          del metadata[0:batch_size]
+          del metadata_list[0:batch_size]
     #uploading the remaining chunks
     if len(text_chunks) > 0:
       batch_size = len(text_chunks)
       batch_ids = [uuid.uuid4().hex for _ in range(batch_size)]
       #metadata
-      batch_metadatas = metadata[0:batch_size]
+      batch_metadatas = metadata_list[0:batch_size]
       meta_list = []
       for meta in batch_metadatas:
         meta_struct = Struct()
         meta_struct.update(meta)
+        meta_struct.update({"doc_chunk_no": doc_i})
+        if metadata and isinstance(metadata, dict):
+          meta_struct.update(metadata)
         meta_list.append(meta_struct)
+        doc_i += 1
       del batch_metadatas
       #creating input proto
       input_batch = [
           self._app.inputs().get_text_input(
               input_id=batch_ids[i],
               raw_text=text,
+              dataset_id=dataset_id,
               metadata=meta_list[i],
           ) for i, text in enumerate(text_chunks)
       ]
       #uploading input with metadata
       self._app.inputs().upload_inputs(inputs=input_batch)
       del text_chunks
-      del metadata
+      del metadata_list
   def chat(self, messages: List[dict], client_manage_state: bool = False) -> List[dict]:
     """Chat interface in OpenAI API format.

clarifai/rag/utils.py CHANGED Viewed

@@ -3,10 +3,6 @@ from pathlib import Path
 from typing import List
 import requests
-from llama_index.core import Document, SimpleDirectoryReader
-from llama_index.core.node_parser.text import SentenceSplitter
-from llama_index.core.readers.download import download_loader
-from pypdf import PdfReader
 ## TODO: Make this token-aware.
@@ -36,8 +32,7 @@ def format_assistant_message(raw_text: str) -> dict:
   return {"role": "assistant", "content": raw_text}
-def load_documents(file_path: str = None, folder_path: str = None,
-                   url: str = None) -> List[Document]:
+def load_documents(file_path: str = None, folder_path: str = None, url: str = None) -> List[any]:
   """Loads documents from a local directory or public url or local filename.
   Args:
@@ -45,6 +40,13 @@ def load_documents(file_path: str = None, folder_path: str = None,
       folder_path (str): The path to the folder.
       url (str): The url to the file.
   """
+  #check import packages
+  try:
+    from llama_index.core import Document, SimpleDirectoryReader
+    from llama_index.core.readers.download import download_loader
+  except ImportError:
+    raise ImportError("Could not import llama index package. "
+                      "Please install it with `pip install llama-index-core==0.10.1`.")
   #document loaders for filepath
   if file_path:
     if file_path.endswith(".pdf"):
@@ -77,6 +79,12 @@ def load_documents(file_path: str = None, folder_path: str = None,
       documents = [Document(text=response.content)]
     #for pdf files
     except Exception:
+      #check import packages
+      try:
+        from pypdf import PdfReader
+      except ImportError:
+        raise ImportError("Could not import pypdf package. "
+                          "Please install it with `pip install pypdf==3.17.4`.")
       documents = []
       pdf_file = PdfReader(io.BytesIO(response.content))
       num_pages = len(pdf_file.pages)
@@ -98,6 +106,13 @@ def split_document(text: str, chunk_size: int, chunk_overlap: int, **kwargs) ->
       chunk_overlap (int): The amount of overlap between each chunk.
       **kwargs: Additional keyword arguments for the SentenceSplitter.
   """
+  #check import packages
+  try:
+    from llama_index.core.node_parser.text import SentenceSplitter
+  except ImportError:
+    raise ImportError("Could not import llama index package. "
+                      "Please install it with `pip install llama-index-core==0.10.1`.")
+  #document
   text_parser = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
   text_chunks = text_parser.split_text(text)
   return text_chunks

clarifai 10.1.0__py3-none-any.whl → 10.2.0__py3-none-any.whl

clarifai 10.1.0py3-none-any.whl → 10.2.0py3-none-any.whl