PyPI - clarifai - Versions diffs - 9.0.0__py3-none-any.whl → 9.3.1__py3-none-any.whl - Mend

clarifai 9.0.0py3-none-any.whl → 9.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

clarifai/data_upload/datasets/__init__.py +0 -0
clarifai/data_upload/datasets/base.py +67 -0
clarifai/data_upload/datasets/features.py +45 -0
clarifai/data_upload/datasets/image.py +236 -0
clarifai/data_upload/datasets/text.py +62 -0
clarifai/data_upload/datasets/zoo/__init__.py +0 -0
clarifai/data_upload/datasets/zoo/coco_captions.py +99 -0
clarifai/data_upload/datasets/zoo/coco_detection.py +129 -0
clarifai/data_upload/datasets/zoo/coco_segmentation.py +158 -0
clarifai/data_upload/examples.py +19 -0
clarifai/data_upload/upload.py +269 -168
clarifai/listing/installed_module_versions.py +3 -14
clarifai/listing/lister.py +40 -0
clarifai/listing/module_versions.py +42 -0
clarifai/listing/modules.py +36 -0
clarifai/modules/style.css +7 -4
{clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/METADATA +3 -3
{clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/RECORD +37 -13
clarifai_utils/data_upload/datasets/__init__.py +0 -0
clarifai_utils/data_upload/datasets/base.py +67 -0
clarifai_utils/data_upload/datasets/features.py +45 -0
clarifai_utils/data_upload/datasets/image.py +236 -0
clarifai_utils/data_upload/datasets/text.py +62 -0
clarifai_utils/data_upload/datasets/zoo/__init__.py +0 -0
clarifai_utils/data_upload/datasets/zoo/coco_captions.py +99 -0
clarifai_utils/data_upload/datasets/zoo/coco_detection.py +129 -0
clarifai_utils/data_upload/datasets/zoo/coco_segmentation.py +158 -0
clarifai_utils/data_upload/examples.py +19 -0
clarifai_utils/data_upload/upload.py +269 -168
clarifai_utils/listing/installed_module_versions.py +3 -14
clarifai_utils/listing/lister.py +40 -0
clarifai_utils/listing/module_versions.py +42 -0
clarifai_utils/listing/modules.py +36 -0
clarifai_utils/modules/style.css +7 -4
{clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/LICENSE +0 -0
{clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/WHEEL +0 -0
{clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/top_level.txt +0 -0

clarifai_utils/data_upload/datasets/text.py ADDED Viewed

@@ -0,0 +1,62 @@
+from typing import Iterator, List
+from clarifai_grpc.grpc.api import resources_pb2
+from google.protobuf.struct_pb2 import Struct
+from tqdm import tqdm
+from .base import ClarifaiDataset
+class TextClassificationDataset(ClarifaiDataset):
+  """
+  Upload text classification datasets to clarifai datasets
+  """
+  def __init__(self, datagen_object: Iterator, dataset_id: str, split: str) -> None:
+    super().__init__(datagen_object, dataset_id, split)
+  def create_input_protos(self, text_input: str, labels: List[str], input_id: str, dataset_id: str,
+                          metadata: Struct) -> resources_pb2.Input:
+    """
+    Create input protos for each text, label input pairs.
+    Args:
+    	`text_input`: text string.
+    	`labels`: text labels
+    	`input_id: unique input id
+    	`dataset_id`: Clarifai dataset id
+    	`metadata`:input metadata
+    Returns:
+    	An input proto representing a single row input
+    """
+    input_proto = resources_pb2.Input(
+        id=input_id,
+        dataset_ids=[dataset_id],
+        data=resources_pb2.Data(
+            text=resources_pb2.Text(raw=text_input),
+            concepts=[
+                resources_pb2.Concept(
+                    id=f"id-{''.join(_label.split(' '))}", name=_label, value=1.)
+                for _label in labels
+            ],
+            metadata=metadata))
+    return input_proto
+  def _get_input_protos(self) -> Iterator:
+    """
+    Creates input protos for each data generator item.
+    Returns:
+    	A list of input protos
+    """
+    for i, item in tqdm(enumerate(self.datagen_object), desc="Loading text data"):
+      metadata = Struct()
+      text = item.text
+      labels = item.labels if isinstance(item.labels, list) else [item.labels]  # clarifai concept
+      input_id = f"{self.dataset_id}-{self.split}-{i}" if item.id is None else f"{self.split}-{str(item.id)}"
+      metadata.update({"label": labels, "split": self.split})
+      input_proto = self.create_input_protos(text, labels, input_id, self.dataset_id, metadata)
+      self._all_input_protos.append(input_proto)
+    return iter(self._all_input_protos)

clarifai_utils/data_upload/datasets/zoo/__init__.py ADDED Viewed

File without changes

clarifai_utils/data_upload/datasets/zoo/coco_captions.py ADDED Viewed

@@ -0,0 +1,99 @@
+#! COCO 2017 image captioning dataset
+import os
+import zipfile
+from glob import glob
+import requests
+from pycocotools.coco import COCO
+from tqdm import tqdm
+from ..features import VisualClassificationFeatures
+class COCOCaptionsDataset:
+  """COCO 2017 Image Captioning Dataset."""
+  def __init__(self, split: str = "train"):
+    """
+    Initialize coco dataset.
+    Args:
+      filenames: the coco zip filenames: List[str] to be downloaded if download=True,
+      data_dir: the local coco dataset directory.
+      split: "train" or "val"
+    """
+    self.filenames = {
+        "train": "train2017.zip",
+        "val": "val2017.zip",
+        "annotations": "annotations_trainval2017.zip"
+    }
+    self.split = split
+    self.url = "http://images.cocodataset.org/zips/"  # coco base image-zip url
+    self.data_dir = os.path.join(os.curdir, ".data")  # data storage directory
+    self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
+  def coco_download(self, save_dir):
+    """Download coco dataset."""
+    if not os.path.exists(save_dir):
+      os.mkdir(save_dir)
+    #check if train, val and annotation dirs exist
+    #so that the coco2017 data isn't downloaded
+    for key, filename in self.filenames.items():
+      if os.path.exists(glob(f"{save_dir}/{key}*")[0]):
+        print("Dataset already downloded and extracted")
+        continue
+      print("-" * 80)
+      print(f"Downloading {filename}")
+      print("-" * 80)
+      if "annotations" in filename:
+        self.url = "http://images.cocodataset.org/annotations/"
+      response = requests.get(self.url + filename, stream=True)
+      response.raise_for_status()
+      with open(os.path.join(save_dir, filename), "wb") as _file:
+        for chunk in tqdm(response.iter_content(chunk_size=5124000)):
+          if chunk:
+            _file.write(chunk)
+      print("Data download complete...")
+      #extract files
+      zf = zipfile.ZipFile(os.path.join(save_dir, filename))
+      print(f" Extracting {filename} file")
+      zf.extractall(path=save_dir)
+      # Delete coco zip
+      print(f" Deleting {filename}")
+      os.remove(path=os.path.join(save_dir, filename))
+  def dataloader(self):
+    """
+    Transform coco image captioning data into clarifai proto compatible
+    format for upload.
+    Returns:
+      VisualClassificationFeatures type generator.
+    """
+    if isinstance(self.filenames, list) and len(self.filenames) == 3:  #train, val, annotations
+      self.coco_download(self.data_dir)
+      self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
+      for i in os.listdir(self.data_dir) if "train" in i][0]
+      self.extracted_coco_dirs["val"] = [os.path.join(self.data_dir, i) \
+      for i in os.listdir(self.data_dir) if "val" in i][0]
+      self.extracted_coco_dirs["annotations"] = [os.path.join(self.data_dir, i) \
+      for i in os.listdir(self.data_dir) if "annotations" in i][0]
+    else:
+      raise Exception(f"`filenames` must be a list of atleast 3 coco zip file names; \
+      train, val and annotations. Found {len(self.filenames)} items instead.")
+    annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" + f"captions_{self.split}*")[0]
+    coco = COCO(annot_file)
+    annot_ids = coco.getAnnIds()
+    annotations = coco.loadAnns(annot_ids)
+    for annot in annotations:
+      image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
+      f"{str(annot['image_id']).zfill(12)}*")[0]
+      # image_captioning and image classification datasets have the same
+      # image-label input feature formats
+      yield VisualClassificationFeatures(image_path, annot["caption"], annot["image_id"])

clarifai_utils/data_upload/datasets/zoo/coco_detection.py ADDED Viewed

@@ -0,0 +1,129 @@
+#! COCO 2017 detection dataset
+import os
+import zipfile
+from glob import glob
+import cv2
+import requests
+from pycocotools.coco import COCO
+from tqdm import tqdm
+from ..features import VisualDetectionFeatures
+class COCODetectionDataset:
+  """COCO 2017 Image Detection Dataset."""
+  def __init__(self, split: str = "train"):
+    """
+    Initialize coco dataset.
+    Args:
+      filenames: the coco zip filenames: List[str] to be downloaded if download=True,
+      data_dir: the local coco dataset directory.
+      split: "train" or "val"
+    """
+    self.filenames = {
+        "train": "train2017.zip",
+        "val": "val2017.zip",
+        "annotations": "annotations_trainval2017.zip"
+    }
+    self.split = split
+    self.url = "http://images.cocodataset.org/zips/"  # coco base image-zip url
+    self.data_dir = os.path.join(os.curdir, ".data")  # data storage directory
+    self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
+  def coco_download(self, save_dir):
+    """Download coco dataset."""
+    if not os.path.exists(save_dir):
+      os.mkdir(save_dir)
+    #check if train*, val* and annotation* dirs exist
+    #so that the coco2017 data isn't downloaded
+    for key, filename in self.filenames.items():
+      if os.path.exists(glob(f"{save_dir}/{key}*")[0]):
+        print("dataset already downloded and extracted")
+        continue
+      print("-" * 80)
+      print(f"Downloading {filename}")
+      print("-" * 80)
+      if "annotations" in filename:
+        self.url = "http://images.cocodataset.org/annotations/"
+      response = requests.get(self.url + filename, stream=True)
+      response.raise_for_status()
+      with open(os.path.join(save_dir, filename), "wb") as _file:
+        for chunk in tqdm(response.iter_content(chunk_size=5124000)):
+          if chunk:
+            _file.write(chunk)
+      print("Coco data download complete...")
+      #extract files
+      zf = zipfile.ZipFile(os.path.join(save_dir, filename))
+      print(f" Extracting {filename} file")
+      zf.extractall(path=save_dir)
+      # Delete coco zip
+      print(f" Deleting {filename}")
+      os.remove(path=os.path.join(save_dir, filename))
+  def dataloader(self):
+    """
+    Transform coco object detection data into clarifai proto compatible
+    format for upload.
+    Returns:
+      VisualDetectionFeatures type generator.
+    """
+    if isinstance(self.filenames, list) and len(self.filenames) == 3:
+      self.coco_download(self.data_dir)
+      self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
+      for i in os.listdir(self.data_dir) if "train" in i][0]
+      self.extracted_coco_dirs["val"] = [os.path.join(self.data_dir, i) \
+      for i in os.listdir(self.data_dir) if "val" in i][0]
+      self.extracted_coco_dirs["annotations"] = [os.path.join(self.data_dir, i) \
+      for i in os.listdir(self.data_dir) if "annotations" in i][0]
+    else:
+      raise Exception(f"`filenames` must be a list of atleast 2 coco zip file names; \
+      train, val and annotations. Found {len(self.filenames)} items instead.")
+    annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" +\
+     f"instances_{self.split}*")[0]
+    coco = COCO(annot_file)
+    categories = coco.loadCats(coco.getCatIds())
+    cat_id_map = {category["id"]: category["name"] for category in categories}
+    cat_img_ids = {}
+    for cat_id in list(cat_id_map.keys()):
+      cat_img_ids[cat_id] = coco.getImgIds(catIds=[cat_id])
+    img_ids = []
+    for i in list(cat_img_ids.values()):
+      img_ids.extend(i)
+    #get annotations for each image id
+    for _id in img_ids:
+      annots = []  # bboxes
+      class_names = []
+      labels = [i for i in list(filter(lambda x: _id in cat_img_ids[x], cat_img_ids))]
+      image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
+      f"{str(_id).zfill(12)}*")[0]
+      image_height, image_width = cv2.imread(image_path).shape[:2]
+      for cat_id in labels:
+        annot_ids = coco.getAnnIds(imgIds=_id, catIds=[cat_id])
+        if len(annot_ids) > 0:
+          img_annotations = coco.loadAnns(annot_ids)
+          for ann in img_annotations:
+            class_names.append(cat_id_map[cat_id])
+            x_min = ann['bbox'][0] / image_width  #left_col
+            y_min = ann['bbox'][1] / image_height  #top_row
+            x_max = (ann['bbox'][0] + ann['bbox'][2]) / image_width  #right_col
+            y_max = (ann['bbox'][1] + ann['bbox'][3]) / image_height  #bottom_row
+            annots.append([x_min, y_min, x_max, y_max])
+        else:  # if no annotations for given image_id-cat_id pair
+          continue
+      assert len(class_names) == len(annots), f"Num classes must match num bbox annotations\
+      for a single image. Found {len(class_names)} classes and {len(annots)} bboxes."
+      yield VisualDetectionFeatures(image_path, class_names, annots, _id)

clarifai_utils/data_upload/datasets/zoo/coco_segmentation.py ADDED Viewed

@@ -0,0 +1,158 @@
+#! COCO 2017 Image Segmentation dataset
+import gc
+import os
+import zipfile
+from functools import reduce
+from glob import glob
+import cv2
+import numpy as np
+import requests
+from pycocotools import mask as maskUtils
+from pycocotools.coco import COCO
+from tqdm import tqdm
+from ..features import VisualSegmentationFeatures
+class COCOSegmentationDataset:
+  """COCO 2017 Image Segmentation Dataset."""
+  def __init__(self, split: str = "train"):
+    """
+    Initialize coco dataset.
+    Args:
+      filenames: the coco zip filenames: List[str] to be downloaded if download=True,
+      data_dir: the local coco dataset directory
+      split: "train" or "val"
+    """
+    self.filenames = {
+        "train": "train2017.zip",
+        "val": "val2017.zip",
+        "annotations": "annotations_trainval2017.zip"
+    }
+    self.split = split
+    self.url = "http://images.cocodataset.org/zips/"  # coco base image-zip url
+    self.data_dir = os.path.join(os.curdir, ".data")  # data storage dir
+    self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
+  def coco_download(self, save_dir):
+    """Download coco dataset."""
+    if not os.path.exists(save_dir):
+      os.mkdir(save_dir)
+    #check if train, val and annotation dirs exist
+    #so that the coco2017 data isn't downloaded
+    for key, filename in self.filenames.items():
+      if os.path.exists(glob(f"{save_dir}/{key}*")[0]):
+        print("dataset already downloded and extracted")
+        continue
+      print("-" * 80)
+      print(f"Downloading {filename}")
+      print("-" * 80)
+      if "annotations" in filename:
+        self.url = "http://images.cocodataset.org/annotations/"
+      response = requests.get(self.url + filename, stream=True)
+      response.raise_for_status()
+      with open(os.path.join(save_dir, filename), "wb") as _file:
+        for chunk in tqdm(response.iter_content(chunk_size=5124000)):
+          if chunk:
+            _file.write(chunk)
+      print("Coco data download complete...")
+      #extract files
+      zf = zipfile.ZipFile(os.path.join(save_dir, filename))
+      print(f" Extracting {filename} file")
+      zf.extractall(path=save_dir)
+      # Delete coco zip
+      print(f" Deleting {filename}")
+      os.remove(path=os.path.join(save_dir, filename))
+  def dataloader(self):
+    """
+    Transform coco data into clarifai proto compatible format for upload.
+    Returns:
+      VisualSegmentationFeatures type generator.
+    """
+    if isinstance(self.filenames, list) and len(self.filenames) == 3:
+      self.coco_download(self.data_dir)
+      self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
+      for i in os.listdir(self.data_dir) if "train" in i][0]
+      self.extracted_coco_dirs["val"] = [os.path.join(self.data_dir, i) \
+      for i in os.listdir(self.data_dir) if "val" in i][0]
+      self.extracted_coco_dirs["annotations"] = [os.path.join(self.data_dir, i) \
+      for i in os.listdir(self.data_dir) if "annotations" in i][0]
+    else:
+      raise Exception(f"`filenames` must be a list of atleast 3 coco zip file names; \
+      train, val and annotations. Found {len(self.filenames)} items instead.")
+    annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" +\
+     f"instances_{self.split}*")[0]
+    coco = COCO(annot_file)
+    categories = coco.loadCats(coco.getCatIds())
+    cat_id_map = {category["id"]: category["name"] for category in categories}
+    cat_img_ids = {}
+    for cat_id in list(cat_id_map.keys()):
+      cat_img_ids[cat_id] = coco.getImgIds(catIds=[cat_id])
+    img_ids = []
+    for i in list(cat_img_ids.values()):
+      img_ids.extend(i)
+    #get annotations for each image id
+    for _id in img_ids:
+      annots = []  # polygons
+      class_names = []
+      labels = [i for i in list(filter(lambda x: _id in cat_img_ids[x], cat_img_ids))]
+      image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
+      f"{str(_id).zfill(12)}*")[0]
+      image_height, image_width = cv2.imread(image_path).shape[:2]
+      for cat_id in labels:
+        annot_ids = coco.getAnnIds(imgIds=_id, catIds=[cat_id])
+        if len(annot_ids) > 0:
+          img_annotations = coco.loadAnns(annot_ids)
+          for ann in img_annotations:
+            class_names.append(cat_id_map[cat_id])
+            # get polygons
+            if type(ann['segmentation']) == list:
+              for seg in ann['segmentation']:
+                poly = np.array(seg).reshape((int(len(seg) / 2), 2))
+                poly[:, 0], poly[:, 1] = poly[:, 0] / image_width, poly[:, 1] / image_height
+                annots.append(poly.tolist())  #[[x=col, y=row],...]
+            else:  # seg: {"counts":[...]}
+              if type(ann['segmentation']['counts']) == list:
+                rle = maskUtils.frPyObjects([ann['segmentation']], image_height, image_width)
+              else:
+                rle = ann['segmentation']
+              mask = maskUtils.decode(rle)  #binary mask
+              #convert mask to polygons and add to annots
+              contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+              polygons = []
+              for cont in contours:
+                if cont.size >= 6:
+                  polygons.append(cont.astype(float).flatten().tolist())
+              # store polygons in (x,y) pairs
+              polygons_flattened = reduce(lambda x, y: x + y, polygons)
+              del polygons
+              del contours
+              del mask
+              gc.collect()
+              polygons = np.array(polygons_flattened).reshape((int(len(polygons_flattened) / 2),
+                                                               2))
+              polygons[:, 0] = polygons[:, 0] / image_width
+              polygons[:, 1] = polygons[:, 1] / image_height
+              annots.append(polygons.tolist())  #[[x=col, y=row],...,[x=col, y=row]]
+        else:  # if no annotations for given image_id-cat_id pair
+          continue
+      assert len(class_names) == len(annots), f"Num classes must match num annotations\
+      for a single image. Found {len(class_names)} classes and {len(annots)} polygons."
+      yield VisualSegmentationFeatures(image_path, class_names, annots, _id)

clarifai_utils/data_upload/examples.py ADDED Viewed

@@ -0,0 +1,19 @@
+#! Execute dataset upload using the `from_module` upload feature
+from clarifai.data_upload.upload import UploadConfig
+text_upload_obj = UploadConfig(
+    user_id="",
+    app_id="",
+    pat="",
+    dataset_id="",
+    task="visual_clf",
+    from_module="./examples/image_classification/cifar10",
+    split="train",
+    portal="clarifai"  #clarifai(prod), dev or staging
+)
+## change the task and from_module arguments in UploadConfig() to upload
+## example food-101 dataset
+if __name__ == "__main__":
+  text_upload_obj.upload_to_clarifai()

clarifai 9.0.0__py3-none-any.whl → 9.3.1__py3-none-any.whl

clarifai 9.0.0py3-none-any.whl → 9.3.1py3-none-any.whl