PyPI - clarifai - Versions diffs - 9.0.0__py3-none-any.whl → 9.3.1__py3-none-any.whl - Mend

clarifai 9.0.0py3-none-any.whl → 9.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

clarifai/data_upload/datasets/__init__.py +0 -0
clarifai/data_upload/datasets/base.py +67 -0
clarifai/data_upload/datasets/features.py +45 -0
clarifai/data_upload/datasets/image.py +236 -0
clarifai/data_upload/datasets/text.py +62 -0
clarifai/data_upload/datasets/zoo/__init__.py +0 -0
clarifai/data_upload/datasets/zoo/coco_captions.py +99 -0
clarifai/data_upload/datasets/zoo/coco_detection.py +129 -0
clarifai/data_upload/datasets/zoo/coco_segmentation.py +158 -0
clarifai/data_upload/examples.py +19 -0
clarifai/data_upload/upload.py +269 -168
clarifai/listing/installed_module_versions.py +3 -14
clarifai/listing/lister.py +40 -0
clarifai/listing/module_versions.py +42 -0
clarifai/listing/modules.py +36 -0
clarifai/modules/style.css +7 -4
{clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/METADATA +3 -3
{clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/RECORD +37 -13
clarifai_utils/data_upload/datasets/__init__.py +0 -0
clarifai_utils/data_upload/datasets/base.py +67 -0
clarifai_utils/data_upload/datasets/features.py +45 -0
clarifai_utils/data_upload/datasets/image.py +236 -0
clarifai_utils/data_upload/datasets/text.py +62 -0
clarifai_utils/data_upload/datasets/zoo/__init__.py +0 -0
clarifai_utils/data_upload/datasets/zoo/coco_captions.py +99 -0
clarifai_utils/data_upload/datasets/zoo/coco_detection.py +129 -0
clarifai_utils/data_upload/datasets/zoo/coco_segmentation.py +158 -0
clarifai_utils/data_upload/examples.py +19 -0
clarifai_utils/data_upload/upload.py +269 -168
clarifai_utils/listing/installed_module_versions.py +3 -14
clarifai_utils/listing/lister.py +40 -0
clarifai_utils/listing/module_versions.py +42 -0
clarifai_utils/listing/modules.py +36 -0
clarifai_utils/modules/style.css +7 -4
{clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/LICENSE +0 -0
{clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/WHEEL +0 -0
{clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/top_level.txt +0 -0

clarifai/data_upload/datasets/zoo/coco_segmentation.py ADDED Viewed

@@ -0,0 +1,158 @@
+#! COCO 2017 Image Segmentation dataset
+import gc
+import os
+import zipfile
+from functools import reduce
+from glob import glob
+import cv2
+import numpy as np
+import requests
+from pycocotools import mask as maskUtils
+from pycocotools.coco import COCO
+from tqdm import tqdm
+from ..features import VisualSegmentationFeatures
+class COCOSegmentationDataset:
+  """COCO 2017 Image Segmentation Dataset."""
+  def __init__(self, split: str = "train"):
+    """
+    Initialize coco dataset.
+    Args:
+      filenames: the coco zip filenames: List[str] to be downloaded if download=True,
+      data_dir: the local coco dataset directory
+      split: "train" or "val"
+    """
+    self.filenames = {
+        "train": "train2017.zip",
+        "val": "val2017.zip",
+        "annotations": "annotations_trainval2017.zip"
+    }
+    self.split = split
+    self.url = "http://images.cocodataset.org/zips/"  # coco base image-zip url
+    self.data_dir = os.path.join(os.curdir, ".data")  # data storage dir
+    self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
+  def coco_download(self, save_dir):
+    """Download coco dataset."""
+    if not os.path.exists(save_dir):
+      os.mkdir(save_dir)
+    #check if train, val and annotation dirs exist
+    #so that the coco2017 data isn't downloaded
+    for key, filename in self.filenames.items():
+      if os.path.exists(glob(f"{save_dir}/{key}*")[0]):
+        print("dataset already downloded and extracted")
+        continue
+      print("-" * 80)
+      print(f"Downloading {filename}")
+      print("-" * 80)
+      if "annotations" in filename:
+        self.url = "http://images.cocodataset.org/annotations/"
+      response = requests.get(self.url + filename, stream=True)
+      response.raise_for_status()
+      with open(os.path.join(save_dir, filename), "wb") as _file:
+        for chunk in tqdm(response.iter_content(chunk_size=5124000)):
+          if chunk:
+            _file.write(chunk)
+      print("Coco data download complete...")
+      #extract files
+      zf = zipfile.ZipFile(os.path.join(save_dir, filename))
+      print(f" Extracting {filename} file")
+      zf.extractall(path=save_dir)
+      # Delete coco zip
+      print(f" Deleting {filename}")
+      os.remove(path=os.path.join(save_dir, filename))
+  def dataloader(self):
+    """
+    Transform coco data into clarifai proto compatible format for upload.
+    Returns:
+      VisualSegmentationFeatures type generator.
+    """
+    if isinstance(self.filenames, list) and len(self.filenames) == 3:
+      self.coco_download(self.data_dir)
+      self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
+      for i in os.listdir(self.data_dir) if "train" in i][0]
+      self.extracted_coco_dirs["val"] = [os.path.join(self.data_dir, i) \
+      for i in os.listdir(self.data_dir) if "val" in i][0]
+      self.extracted_coco_dirs["annotations"] = [os.path.join(self.data_dir, i) \
+      for i in os.listdir(self.data_dir) if "annotations" in i][0]
+    else:
+      raise Exception(f"`filenames` must be a list of atleast 3 coco zip file names; \
+      train, val and annotations. Found {len(self.filenames)} items instead.")
+    annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" +\
+     f"instances_{self.split}*")[0]
+    coco = COCO(annot_file)
+    categories = coco.loadCats(coco.getCatIds())
+    cat_id_map = {category["id"]: category["name"] for category in categories}
+    cat_img_ids = {}
+    for cat_id in list(cat_id_map.keys()):
+      cat_img_ids[cat_id] = coco.getImgIds(catIds=[cat_id])
+    img_ids = []
+    for i in list(cat_img_ids.values()):
+      img_ids.extend(i)
+    #get annotations for each image id
+    for _id in img_ids:
+      annots = []  # polygons
+      class_names = []
+      labels = [i for i in list(filter(lambda x: _id in cat_img_ids[x], cat_img_ids))]
+      image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
+      f"{str(_id).zfill(12)}*")[0]
+      image_height, image_width = cv2.imread(image_path).shape[:2]
+      for cat_id in labels:
+        annot_ids = coco.getAnnIds(imgIds=_id, catIds=[cat_id])
+        if len(annot_ids) > 0:
+          img_annotations = coco.loadAnns(annot_ids)
+          for ann in img_annotations:
+            class_names.append(cat_id_map[cat_id])
+            # get polygons
+            if type(ann['segmentation']) == list:
+              for seg in ann['segmentation']:
+                poly = np.array(seg).reshape((int(len(seg) / 2), 2))
+                poly[:, 0], poly[:, 1] = poly[:, 0] / image_width, poly[:, 1] / image_height
+                annots.append(poly.tolist())  #[[x=col, y=row],...]
+            else:  # seg: {"counts":[...]}
+              if type(ann['segmentation']['counts']) == list:
+                rle = maskUtils.frPyObjects([ann['segmentation']], image_height, image_width)
+              else:
+                rle = ann['segmentation']
+              mask = maskUtils.decode(rle)  #binary mask
+              #convert mask to polygons and add to annots
+              contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+              polygons = []
+              for cont in contours:
+                if cont.size >= 6:
+                  polygons.append(cont.astype(float).flatten().tolist())
+              # store polygons in (x,y) pairs
+              polygons_flattened = reduce(lambda x, y: x + y, polygons)
+              del polygons
+              del contours
+              del mask
+              gc.collect()
+              polygons = np.array(polygons_flattened).reshape((int(len(polygons_flattened) / 2),
+                                                               2))
+              polygons[:, 0] = polygons[:, 0] / image_width
+              polygons[:, 1] = polygons[:, 1] / image_height
+              annots.append(polygons.tolist())  #[[x=col, y=row],...,[x=col, y=row]]
+        else:  # if no annotations for given image_id-cat_id pair
+          continue
+      assert len(class_names) == len(annots), f"Num classes must match num annotations\
+      for a single image. Found {len(class_names)} classes and {len(annots)} polygons."
+      yield VisualSegmentationFeatures(image_path, class_names, annots, _id)

clarifai/data_upload/examples.py ADDED Viewed

@@ -0,0 +1,19 @@
+#! Execute dataset upload using the `from_module` upload feature
+from clarifai.data_upload.upload import UploadConfig
+text_upload_obj = UploadConfig(
+    user_id="",
+    app_id="",
+    pat="",
+    dataset_id="",
+    task="visual_clf",
+    from_module="./examples/image_classification/cifar10",
+    split="train",
+    portal="clarifai"  #clarifai(prod), dev or staging
+)
+## change the task and from_module arguments in UploadConfig() to upload
+## example food-101 dataset
+if __name__ == "__main__":
+  text_upload_obj.upload_to_clarifai()

clarifai/data_upload/upload.py CHANGED Viewed

@@ -1,201 +1,302 @@
 #! Clarifai data upload
+import importlib
+import inspect
+import os
+import sys
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from multiprocessing import cpu_count
+from typing import Iterator, Optional, Tuple, Union
-from base import Chunker
-from clarifai_grpc.grpc.api import resources_pb2, service_pb2
+from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
 from clarifai_grpc.grpc.api.status import status_code_pb2
-from datasets import (ImageClassificationDataset, TextClassificationDataset,
-                      VisualDetectionDataset, VisualSegmentationDataset)
-from omegaconf import OmegaConf
 from tqdm import tqdm
 from clarifai.client import create_stub
+from clarifai.data_upload.datasets.base import Chunker
+from clarifai.data_upload.datasets.image import (VisualClassificationDataset,
+                                                 VisualDetectionDataset, VisualSegmentationDataset)
+from clarifai.data_upload.datasets.text import TextClassificationDataset
-def upload_data(config, inputs, inp_stub):
+def load_dataset(module_dir: Union[str, os.PathLike], split: str) -> Iterator:
   """
-  Upload inputs to clarifai platform dataset.
+  Validate and import dataset module data generator.
   Args:
-  	config: auth and data path info.
-  	inputs: input protos
-  	inp_stub: grpc stub
+    `module_dir`: relative path to the module directory
+    The directory must contain a `dataset.py` script and the data itself.
+    `split`: "train" or "val"/"test" dataset split
+  Module Directory Structure:
+  ---------------------------
+      <folder_name>/
+      ├──__init__.py
+      ├──<Your local dir dataset>/
+      └──dataset.py
+  dataset.py must implement a class named following the convention,
+  <dataset_name>Dataset and this class must have a dataloader()
+  generator method
   """
-  STUB = inp_stub
-  USER_APP_ID = resources_pb2.UserAppIDSet(
-      user_id=config.auth["user_id"], app_id=config.auth["app_id"])
-  upload_count = 0
-  retry_upload = []  # those that fail to upload are stored for retries
-  for inp_proto in inputs:
-    response = STUB.PostInputs(
-        service_pb2.PostInputsRequest(user_app_id=USER_APP_ID, inputs=[inp_proto]),)
-    if response.status.code != status_code_pb2.SUCCESS:
-      try:
-        print(f"Post inputs failed, status:\n{response.inputs[0].status.details}\n")
-      except:
-        print(f"Post inputs failed, status:\n{response.status.details}\n")
-        retry_upload.append(inp_proto)
+  sys.path.append(str(module_dir))
+  if not os.path.exists(os.path.join(module_dir, "__init__.py")):
+    with open(os.path.join(module_dir, "__init__.py"), "w"):
+      pass
+  import dataset  # dataset module
+  # get main module class
+  main_module_cls = None
+  for name, obj in dataset.__dict__.items():
+    if inspect.isclass(obj) and "Dataset" in name:
+      main_module_cls = obj
     else:
-      upload_count += 1
+      continue
-  return retry_upload
+  return main_module_cls(split).dataloader()
-def upload_annotations(config, inputs, inp_stub):
+def load_zoo_dataset(name: str, split: str) -> Iterator:
   """
-  Upload image annotations to clarifai detection dataset
+  Get dataset generator object from dataset zoo.
+  Args:
+    `name`: dataset module name in datasets/zoo/.
+    `split`: "train" or "val"/"test" dataset split
+  Returns:
+    Data generator object
   """
-  STUB = inp_stub
-  USER_APP_ID = resources_pb2.UserAppIDSet(
-      user_id=config.auth["user_id"], app_id=config.auth["app_id"])
-  upload_count = 0
-  retry_upload = []  # those that fail to upload are stored for retries
-  for annot_proto in inputs:
-    response = STUB.PostAnnotations(
-        service_pb2.PostAnnotationsRequest(user_app_id=USER_APP_ID, annotations=[annot_proto]),)
-    if response.status.code != status_code_pb2.SUCCESS:
-      try:
-        print(f"Post annotations failed, status:\n{response.annotations[0].status.details}\n")
-      except:
-        print(f"Post annotations failed, status:\n{response.status.details}\n")
-        retry_upload.append(annot_proto)
+  zoo_dataset = importlib.import_module(f"datasets.zoo.{name}")
+  # get main module class
+  main_module_cls = None
+  for name, obj in zoo_dataset.__dict__.items():
+    if inspect.isclass(obj) and "Dataset" in name:
+      main_module_cls = obj
     else:
-      upload_count += 1
+      continue
+  return main_module_cls(split).dataloader()
+class UploadConfig:
+  def __init__(
+      self,
+      user_id: str,
+      app_id: str,
+      pat: str,
+      dataset_id: str,
+      task: str,
+      from_module: Optional[Union[str, os.PathLike]] = None,
+      from_zoo: Optional[str] = None,  # load dataset from zoo
+      split: str = "train",  # train or test/val
+      chunk_size: int = 16,
+      portal: str = "clarifai"):
+    """
+    Initialize upload configs.
+    Args:
+      `user_id`: Clarifai user id.
+      `app_id`: Clarifai app id.
+      `pat`: Clarifai PAT(Personal Access Token).
+      `dataset_id`: Clarifai dataset id (where data is to be uploaded).
+      `task`: either of `visual_clf`, `visual_detection`, `visual_segmentation` or `text_clf`.
+      `from_module`: Path to dataset module directory.
+        Should be left as None if `from_zoo` is to be used.
+      `from_zoo`: Name of dataset to upload from the zoo.
+        The name must match the dataset module name excluding the file extension.
+        Should be left as None if `from_module` is to be used.
+      `split`: Dataset split to upload. Either of train or test/val
+      `chunk_size`: size of chunks for parallel data upload.
+    """
+    self.USER_ID = user_id
+    self.APP_ID = app_id
+    self.PAT = pat
+    self.dataset_id = dataset_id
+    self.task = task
+    self.module_dir = from_module
+    self.zoo_dataset = from_zoo
+    self.split = split
+    self.chunk_size = chunk_size
+    self.num_workers: int = cpu_count()
+    self.__base: str = ""
+    if portal == "dev":
+      self.__base = "https://api-dev.clarifai.com"
+    elif portal == "staging":
+      self.__base = "https://api-staging.clarifai.com"
+    else:  #prod
+      self.__base = "https://api.clarifai.com"
+    # Set auth vars as env variables
+    os.environ["CLARIFAI_USER_ID"] = self.USER_ID
+    os.environ["CLARIFAI_APP_ID"] = self.APP_ID
+    os.environ["CLARIFAI_API_BASE"] = self.__base
+    os.environ["CLARIFAI_PAT"] = self.PAT
+    self.STUB: service_pb2_grpc.V2Stub = create_stub()
+    self.metadata: Tuple = (('authorization', 'Key ' + self.PAT),)
+    self.user_app_id = resources_pb2.UserAppIDSet(user_id=self.USER_ID, app_id=self.APP_ID)
+  def _upload_inputs(self, inputs):
+    """
+    Upload inputs to clarifai platform dataset.
+    Args:
+      inputs: input protos
+    """
+    upload_count = 0
+    retry_upload = []  # those that fail to upload are stored for retries
+    for inp_proto in inputs:
+      response = self.STUB.PostInputs(
+          service_pb2.PostInputsRequest(user_app_id=self.user_app_id, inputs=[inp_proto]),)
+      MESSAGE_DUPLICATE_ID = "Input has a duplicate ID."
+      if response.status.code != status_code_pb2.SUCCESS:
+        try:
+          if response.inputs[0].status.details != MESSAGE_DUPLICATE_ID:
+            retry_upload.append(inp_proto)
+          print(f"Post inputs failed, status: {response.inputs[0].status.details}\n")
+          continue
+        except:
+          print(f"Post inputs failed, status: {response.status.details}\n")
+      else:
+        upload_count += 1
+    return retry_upload
+  def upload_annotations(self, inputs):
+    """
+    Upload image annotations to clarifai detection dataset
+    """
+    upload_count = 0
+    retry_upload = []  # those that fail to upload are stored for retries
+    for annot_proto in inputs:
+      response = self.STUB.PostAnnotations(
+          service_pb2.PostAnnotationsRequest(
+              user_app_id=self.user_app_id, annotations=[annot_proto]),)
+      if response.status.code != status_code_pb2.SUCCESS:
+        try:
+          print(f"Post annotations failed, status:\n{response.annotations[0].status.details}\n")
+          continue
+        except:
+          print(f"Post annotations failed, status:\n{response.status.details}\n")
+          retry_upload.append(annot_proto)
+      else:
+        upload_count += 1
+    return retry_upload
+  def concurrent_inp_upload(self, inputs, chunks):
+    """
+    Upload images concurrently.
+    """
+    inp_threads = []
+    retry_upload = []
+    with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
+      for inp_batch in tqdm(inputs, total=chunks + 1, desc="uploading inputs..."):
+        inp_threads.append(executor.submit(self._upload_inputs, inp_batch))
+        time.sleep(0.1)
+    for job in tqdm(
+        as_completed(inp_threads), total=chunks + 1, desc="retry uploading failed protos..."):
+      if job.result():
+        retry_upload.extend(job.result())
+    if len(
+        list(retry_upload)) > 0:  ## TODO: use api_with_retries functionality via upload_inputs()
+      _ = self._upload_inputs(retry_upload)
+  def concurrent_annot_upload(self, inputs, chunks):
+    """
+    Uploads annotations concurrently.
+    """
+    annot_threads = []
+    retry_annot_upload = []
+    with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
+      for annot_batch in tqdm(inputs, total=chunks + 1, desc="uploading..."):
+        annot_threads.append(executor.submit(self.upload_annotations, annot_batch))
+        time.sleep(0.2)
+    for job in tqdm(
+        as_completed(annot_threads), total=chunks + 1, desc="retry uploading failed protos..."):
+      if job.result():
+        retry_annot_upload.extend(job.result())
+    if len(retry_annot_upload) > 0:
+      ## TODO: use api_with_retries functionality via upload_annotations()
+      _ = self.upload_annotations(retry_annot_upload)
+  def upload_to_clarifai(self):
+    """
+    Execute data upload.
+    """
+    datagen_object = None
+    if self.module_dir is None and self.zoo_dataset is None:
+      raise Exception("One of `from_module` and `from_zoo` must be \
+      specified. Both can't be None or defined at the same time.")
+    elif self.module_dir is not None and self.zoo_dataset is not None:
+      raise Exception("Use either of `from_module` or `from_zoo` \
+      but NOT both.")
+    elif self.module_dir is not None:
+      datagen_object = load_dataset(self.module_dir, self.split)
+    else:
+      datagen_object = load_zoo_dataset(self.zoo_dataset, self.split)
-  return retry_upload
+    if self.task == "text_clf":
+      dataset_obj = TextClassificationDataset(datagen_object, self.dataset_id, self.split)
+      text_protos = dataset_obj._get_input_protos()
+      text_protos = dataset_obj._to_list(text_protos)
+      # Upload text
+      chunks = len(text_protos) // self.num_workers
+      chunked_text_protos = Chunker(text_protos, self.chunk_size).chunk()
-def concurrent_inp_upload(config, inputs, workers, chunks, stub):
-  """
-  Upload images concurrently for efficiency.
-  """
-  inp_threads = []
-  retry_upload = []
+      self.concurrent_inp_upload(chunked_text_protos, chunks)
-  with ThreadPoolExecutor(max_workers=workers) as executor:
-    for inp_batch in tqdm(inputs, total=chunks + 1, desc="uploading.."):
-      inp_threads.append(executor.submit(upload_data, config, inp_batch, stub))
-      time.sleep(0.2)
+    elif self.task == "visual_detection":
+      dataset_obj = VisualDetectionDataset(datagen_object, self.dataset_id, self.split)
+      img_protos, annotation_protos = dataset_obj._get_input_protos()
+      img_protos = dataset_obj._to_list(img_protos)
-  for job in tqdm(
-      as_completed(inp_threads), total=chunks + 1, desc="retry uploading failed protos..."):
-    if job.result():
-      retry_upload.extend(job.result())
-  if len(list(retry_upload)) > 0:  ## TODO: use api_with_retries functionality via upload_data()
-    _ = upload_data(config, retry_upload, stub)
+      # Upload images
+      chunks = len(img_protos) // self.num_workers
+      chunked_img_protos = Chunker(img_protos, self.chunk_size).chunk()
+      self.concurrent_inp_upload(chunked_img_protos, chunks)
-def concurrent_annot_upload(config, inputs, workers, chunks, stub):
-  """
-  Upload annotations concurrently for efficiency.
-  """
-  annot_threads = []
-  retry_annot_upload = []
+      # Upload annotations:
+      print("Uploading annotations.......")
+      annotation_protos = dataset_obj._to_list(annotation_protos)
+      chunks_ = len(annotation_protos) // self.num_workers
+      chunked_annot_protos = Chunker(annotation_protos, self.chunk_size).chunk()
-  with ThreadPoolExecutor(max_workers=workers) as executor:
-    for annot_batch in tqdm(inputs, total=chunks + 1, desc="uploading..."):
-      annot_threads.append(executor.submit(upload_annotations, config, annot_batch, stub))
-      time.sleep(0.2)
+      self.concurrent_annot_upload(chunked_annot_protos, chunks_)
-  for job in tqdm(
-      as_completed(annot_threads), total=chunks + 1, desc="retry uploading failed protos..."):
-    if job.result():
-      retry_annot_upload.extend(job.result())
-  if len(retry_annot_upload) > 0:
-    ## TODO: use api_with_retries functionality via upload_annotations()
-    _ = upload_annotations(config, retry_annot_upload, stub)
+    elif self.task == "visual_segmentation":
+      dataset_obj = VisualSegmentationDataset(datagen_object, self.dataset_id, self.split)
+      img_protos, mask_protos = dataset_obj._get_input_protos()
+      img_protos = dataset_obj._to_list(img_protos)
+      mask_protos = dataset_obj._to_list(mask_protos)
+      # Upload images
+      chunks = len(img_protos) // self.num_workers
+      chunked_img_protos = Chunker(img_protos, self.chunk_size).chunk()
-def upload_to_clarifai(config, task: str = "visual_clf"):
-  """
-  Execute data upload.
-  Args:
-    `config`: auth and data path info.
-    `task`: Machine Learning domain task data type.
-       Can be either of `visual_clf`, `visual_det` or `text_clf`.
-  """
-  STUB = create_stub()
-  workers = cpu_count()
-  if task == "text_clf":
-    dataset_obj = TextClassificationDataset(config.data["clf_text_dir"], config.data["dataset_id"],
-                                            config["split"])
-    text_protos = dataset_obj._get_input_protos()
-    text_protos = dataset_obj.to_list(text_protos)
-    # Upload text
-    chunks = len(text_protos) // workers
-    chunked_text_protos = Chunker(text_protos, config["chunk_size"]).chunk()
-    concurrent_inp_upload(config, chunked_text_protos, workers, chunks, STUB)
-  elif task == "visual_det":
-    dataset_obj = VisualDetectionDataset(
-        config.data["visual_det_image_dir"],
-        config.data["visual_det_labels_dir"],
-        config.data["dataset_id"],
-        config["split"],
-        labels_from_text_file=False)
-    img_protos, annotation_protos = dataset_obj._get_input_protos()
-    img_protos = dataset_obj.to_list(img_protos)
-    # Upload images
-    chunks = len(img_protos) // workers
-    chunked_img_protos = Chunker(img_protos, config["chunk_size"]).chunk()
-    concurrent_inp_upload(config, chunked_img_protos, workers, chunks, STUB)
-    # Upload annotations:
-    print("Uploading annotations.......")
-    annotation_protos = dataset_obj.to_list(annotation_protos)
-    chunks_ = len(annotation_protos) // workers
-    chunked_annot_protos = Chunker(annotation_protos, config["chunk_size"]).chunk()
-    concurrent_annot_upload(config, chunked_annot_protos, workers, chunks_, STUB)
-  elif task == "visual_seg":
-    dataset_obj = VisualSegmentationDataset(config.data["visual_seg_image_dir"],
-                                            config.data["visual_seg_masks_dir"],
-                                            config.data["dataset_id"], config["split"])
-    img_protos, mask_protos = dataset_obj._get_input_protos()
-    img_protos = dataset_obj.to_list(img_protos)
-    mask_protos = dataset_obj.to_list(mask_protos)
-    # Upload images
-    chunks = len(img_protos) // workers
-    chunked_img_protos = Chunker(img_protos, config["chunk_size"]).chunk()
-    concurrent_inp_upload(config, chunked_img_protos, workers, chunks, STUB)
-    # Upload masks:
-    print("Uploading masks.......")
-    chunks_ = len(mask_protos) // workers
-    chunked_mask_protos = Chunker(mask_protos, config["chunk_size"]).chunk()
-    concurrent_annot_upload(config, chunked_mask_protos, workers, chunks_, STUB)
-  else:
-    dataset_obj = ImageClassificationDataset(config.data["clf_image_dir"],
-                                             config.data["dataset_id"], config["split"])
-    img_protos = dataset_obj._get_input_protos()
-    img_protos = dataset_obj.to_list(img_protos)
-    # Upload images
-    chunks = len(img_protos) // workers
-    chunked_img_protos = Chunker(img_protos, config["chunk_size"]).chunk()
-    concurrent_inp_upload(config, chunked_img_protos, workers, chunks, STUB)
-if __name__ == "__main__":
-  yaml_path = "./config.yaml"
-  config = OmegaConf.load(yaml_path)
-  upload_to_clarifai(config, task=config["task"])
+      #self.concurrent_inp_upload(chunked_img_protos, chunks)
+      # Upload masks:
+      print("Uploading masks.......")
+      chunks_ = len(mask_protos) // self.num_workers
+      chunked_mask_protos = Chunker(mask_protos, self.chunk_size).chunk()
+      self.concurrent_annot_upload(chunked_mask_protos, chunks_)
+    else:  # visual-classification & visual-captioning
+      dataset_obj = VisualClassificationDataset(datagen_object, self.dataset_id, self.split)
+      img_protos = dataset_obj._get_input_protos()
+      img_protos = dataset_obj._to_list(img_protos)
+      # Upload images
+      chunks = len(img_protos) // self.num_workers
+      chunked_img_protos = Chunker(img_protos, self.chunk_size).chunk()
+      self.concurrent_inp_upload(chunked_img_protos, chunks)

clarifai/listing/installed_module_versions.py CHANGED Viewed

@@ -24,12 +24,6 @@ def installed_module_versions_generator(stub: V2Stub,
   imv_success_status = {status_code_pb2.SUCCESS}
-  # HACK(zeiler): this is the number of default installed module versions every app has.
-  # so with pagination
-  seen = {
-      "module_manager_install": False,
-  }
   page = 1
   while True:
     response = stub.ListInstalledModuleVersions(
@@ -39,13 +33,8 @@ def installed_module_versions_generator(stub: V2Stub,
     if response.status.code not in imv_success_status:
       raise Exception("ListInstalledModuleVersions failed with response %r" % response)
     for item in response.installed_module_versions:
-      if item.id in seen:
-        if not seen[item.id]:  # yield it once.
-          seen[item.id] = True
-          yield item
-      else:
-        yield item
+      yield item
     page += 1
-    # if we don't get a full page back (plus the hard coded ones) we know we're done.
-    if len(response.installed_module_versions) < page_size + len(seen):
+    # if we don't get a full page back we know we're done.
+    if len(response.installed_module_versions) < page_size:
       break

clarifai 9.0.0__py3-none-any.whl → 9.3.1__py3-none-any.whl

clarifai 9.0.0py3-none-any.whl → 9.3.1py3-none-any.whl