PyPI - clarifai - Versions diffs - 9.10.1__py3-none-any.whl → 9.10.3__py3-none-any.whl - Mend

clarifai 9.10.1py3-none-any.whl → 9.10.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (323) hide show

clarifai_utils/datasets/upload/features.py DELETED Viewed

@@ -1,44 +0,0 @@
-#! dataset output features (output from preprocessing & input to clarifai data proto builders)
-from dataclasses import dataclass
-from typing import List, Optional, Union
-@dataclass
-class TextFeatures:
-  """Text classification datasets preprocessing output features."""
-  text: str
-  labels: List[Union[str, int]]  # List[str or int] to cater for multi-class tasks
-  id: Optional[int] = None  # text_id
-  metadata: Optional[dict] = None
-@dataclass
-class VisualClassificationFeatures:
-  """Image classification datasets preprocessing output features."""
-  image_path: str
-  label: Union[str, int]
-  geo_info: Optional[List[float]] = None  #[Longitude, Latitude]
-  id: Optional[int] = None  # image_id
-  metadata: Optional[dict] = None
-@dataclass
-class VisualDetectionFeatures:
-  """Image Detection datasets preprocessing output features."""
-  image_path: str
-  classes: List[Union[str, int]]
-  bboxes: List[List[float]]
-  geo_info: Optional[List[float]] = None  #[Longitude, Latitude]
-  id: Optional[int] = None  # image_id
-  metadata: Optional[dict] = None
-@dataclass
-class VisualSegmentationFeatures:
-  """Image Segmentation datasets preprocessing output features."""
-  image_path: str
-  classes: List[Union[str, int]]
-  polygons: List[List[List[float]]]
-  geo_info: Optional[List[float]] = None  #[Longitude, Latitude]
-  id: Optional[int] = None  # image_id
-  metadata: Optional[dict] = None

clarifai_utils/datasets/upload/image.py DELETED Viewed

@@ -1,165 +0,0 @@
-import os
-from concurrent.futures import ThreadPoolExecutor
-from typing import Iterator, List, Tuple
-from clarifai_grpc.grpc.api import resources_pb2
-from google.protobuf.struct_pb2 import Struct
-from .base import ClarifaiDataset
-class VisualClassificationDataset(ClarifaiDataset):
-  def __init__(self, datagen_object: Iterator, dataset_id: str, split: str) -> None:
-    super().__init__(datagen_object, dataset_id, split)
-  def _extract_protos(self, batch_input_ids: List[str]
-                     ) -> Tuple[List[resources_pb2.Input], List[resources_pb2.Annotation]]:
-    """Create input image and annotation protos for batch of input ids.
-    Args:
-      batch_input_ids: List of input IDs to retrieve the protos for.
-    Returns:
-      input_protos: List of input protos.
-      annotation_protos: List of annotation protos.
-    """
-    input_protos, annotation_protos = [], []
-    def process_datagen_item(id):
-      datagen_item = self.datagen_object[id]
-      metadata = Struct()
-      image_path = datagen_item.image_path
-      label = datagen_item.label if isinstance(datagen_item.label,
-                                               list) else [datagen_item.label]  # clarifai concept
-      input_id = f"{self.dataset_id}-{self.split}-{id}" if datagen_item.id is None else f"{self.dataset_id}-{self.split}-{str(datagen_item.id)}"
-      geo_info = datagen_item.geo_info
-      if datagen_item.metadata is not None:
-        metadata.update(datagen_item.metadata)
-      else:
-        metadata.update({"filename": os.path.basename(image_path), "split": self.split})
-      self.all_input_ids[id] = input_id
-      input_protos.append(
-          self.input_object.get_input_from_file(
-              input_id=input_id,
-              image_file=image_path,
-              dataset_id=self.dataset_id,
-              labels=label,
-              geo_info=geo_info,
-              metadata=metadata))
-    with ThreadPoolExecutor(max_workers=4) as executor:
-      futures = [executor.submit(process_datagen_item, id) for id in batch_input_ids]
-      for job in futures:
-        job.result()
-    return input_protos, annotation_protos
-class VisualDetectionDataset(ClarifaiDataset):
-  """Visual detection dataset proto class."""
-  def __init__(self, datagen_object: Iterator, dataset_id: str, split: str) -> None:
-    super().__init__(datagen_object, dataset_id, split)
-  def _extract_protos(self, batch_input_ids: List[int]
-                     ) -> Tuple[List[resources_pb2.Input], List[resources_pb2.Annotation]]:
-    """Create input image protos for each data generator item.
-    Args:
-      batch_input_ids: List of input IDs to retrieve the protos for.
-    Returns:
-      input_protos: List of input protos.
-      annotation_protos: List of annotation protos.
-    """
-    input_protos, annotation_protos = [], []
-    def process_datagen_item(id):
-      datagen_item = self.datagen_object[id]
-      metadata = Struct()
-      image = datagen_item.image_path
-      labels = datagen_item.classes  # list:[l1,...,ln]
-      bboxes = datagen_item.bboxes  # [[xmin,ymin,xmax,ymax],...,[xmin,ymin,xmax,ymax]]
-      input_id = f"{self.dataset_id}-{self.split}-{id}" if datagen_item.id is None else f"{self.dataset_id}-{self.split}-{str(datagen_item.id)}"
-      if datagen_item.metadata is not None:
-        metadata.update(datagen_item.metadata)
-      else:
-        metadata.update({"filename": os.path.basename(image), "split": self.split})
-      geo_info = datagen_item.geo_info
-      self.all_input_ids[id] = input_id
-      input_protos.append(
-          self.input_object.get_input_from_file(
-              input_id=input_id,
-              image_file=image,
-              dataset_id=self.dataset_id,
-              geo_info=geo_info,
-              metadata=metadata))
-      # iter over bboxes and classes
-      # one id could have more than one bbox and label
-      for i in range(len(bboxes)):
-        annotation_protos.append(
-            self.input_object.get_annotation_proto(
-                input_id=input_id, label=labels[i], annotations=bboxes[i]))
-    with ThreadPoolExecutor(max_workers=4) as executor:
-      futures = [executor.submit(process_datagen_item, id) for id in batch_input_ids]
-      for job in futures:
-        job.result()
-    return input_protos, annotation_protos
-class VisualSegmentationDataset(ClarifaiDataset):
-  """Visual segmentation dataset proto class."""
-  def __init__(self, datagen_object: Iterator, dataset_id: str, split: str) -> None:
-    super().__init__(datagen_object, dataset_id, split)
-  def _extract_protos(self, batch_input_ids: List[str]
-                     ) -> Tuple[List[resources_pb2.Input], List[resources_pb2.Annotation]]:
-    """Create input image and annotation protos for batch of input ids.
-    Args:
-      batch_input_ids: List of input IDs to retrieve the protos for.
-    Returns:
-      input_protos: List of input protos.
-      annotation_protos: List of annotation protos.
-    """
-    input_protos, annotation_protos = [], []
-    def process_datagen_item(id):
-      datagen_item = self.datagen_object[id]
-      metadata = Struct()
-      image = datagen_item.image_path
-      labels = datagen_item.classes
-      _polygons = datagen_item.polygons  # list of polygons: [[[x,y],...,[x,y]],...]
-      input_id = f"{self.dataset_id}-{self.split}-{id}" if datagen_item.id is None else f"{self.dataset_id}-{self.split}-{str(datagen_item.id)}"
-      if datagen_item.metadata is not None:
-        metadata.update(datagen_item.metadata)
-      else:
-        metadata.update({"filename": os.path.basename(image), "split": self.split})
-      geo_info = datagen_item.geo_info
-      self.all_input_ids[id] = input_id
-      input_protos.append(
-          self.input_object.get_input_from_file(
-              input_id=input_id,
-              image_file=image,
-              dataset_id=self.dataset_id,
-              geo_info=geo_info,
-              metadata=metadata))
-      ## Iterate over each masked image and create a proto for upload to clarifai
-      ## The length of masks/polygons-list and labels must be equal
-      for i, _polygon in enumerate(_polygons):
-        try:
-          annotation_protos.append(
-              self.input_object.get_mask_proto(
-                  input_id=input_id, label=labels[i], polygons=_polygon))
-        except IndexError:
-          continue
-    with ThreadPoolExecutor(max_workers=4) as executor:
-      futures = [executor.submit(process_datagen_item, id) for id in batch_input_ids]
-      for job in futures:
-        job.result()
-    return input_protos, annotation_protos

clarifai_utils/datasets/upload/loaders/README.md DELETED Viewed

@@ -1,49 +0,0 @@
-## Dataset Loaders
-A collection of data preprocessing modules for popular public datasets to allow for compatible upload into Clarifai user app datasets.
-## Usage
-If a dataset module exists in the zoo, uploading the specific dataset can be easily done by simply creating a python script (or via commandline) and specifying the dataset module name in the `dataset_loader` parameter of the  `Dataset` class, `upload_dataset` method .i.e.
-```python
-from clarifai.client.app import App
-app = App(app_id="", user_id="")
-# Create a dataset in Clarifai App
-dataset = app.create_dataset(dataset_id="")
-# execute data upload to Clarifai app dataset
-dataset.upload_dataset(task='visual_segmentation', split="train", dataset_loader='coco_segmentation')
-```
-## Dataset Loaders
- | dataset name | task | module name (.py) | splits |
- | --- | --- | --- | --- |
- | [COCO 2017](https://cocodataset.org/#download) | Detection | `coco_detection` | `train`, `val` |
- |        | Segmentation | `coco_segmentation` | `train`, `val` |
- |       | Captions | `coco_captions` | `train`, `val` |
- |[xVIEW](http://xviewdataset.org/)  | Detection | `xview_detection` | `train`
- | [ImageNet](https://www.image-net.org/)  | Classification | `imagenet_classification` | `train`
-## Contributing Modules
-A dataloader (preprocessing) module is a python script that contains a dataloader class which implements data download (to download the dataloader from a source to local disk dir) & extraction and dataloader methods.
-The class naming convention is `<datasetname>DataLoader`. The dataset class must accept `split` as the only argument in the `__init__` method and the `__getitem__` method must return either of `VisualClassificationFeatures()`, `VisualDetectionFeatures()`, `VisualSegmentationFeatures()` or `TextFeatures()` as defined in [clarifai/datasets/upload/features.py](../features.py). Other methods can be added as seen fit but must be inherited from parent `ClarifaiDataLoader` base class [clarifai/datasets/upload/base.py](../base.py).
-Reference can be taken from the existing dataset modules in the zoo for development.
-## Notes
-* Dataloaders in the zoo by default first create a `data` directory in the zoo directory then download the data into this `data` directory, preprocess the data and finally execute upload to a Clarifai app dataset. For instance with the COCO dataset modules above, the coco2017 dataset is by default downloaded first into a `data` directory, extracted and then preprocessing is performed on it and finally uploaded to Clarifai.
-* Taking the above into consideration, to avoid the scripts re-downloading data you already have locally, create a `data` directory in the loaders directory and move your extracted data there. **Ensure that the extracted folder/file names and file structure MATCH those when the downloaded zips are extracted.**
-* COCO Format: To reuse the coco modules above on your coco format data, ensure the criteria in the two points above is adhered to first. If so, pass the coco module name from any of the above in the loaders to the `dataset_loader=` parameter in `upload_dataset()`.
-* xVIEW Dataset: To upload, you have to register and download images,label from [xviewdataset](http://xviewdataset.org/#dataset) follow the above mentioned steps to place extracted folder in `data` directory. Finally pass the xview module name to `dataset_loader=` parameter in `upload_dataset()`.
-* ImageNet Dataset: ImageNet Dataset should be downloaded and placed in the 'data' folder along with the [label mapping file](https://www.kaggle.com/competitions/imagenet-object-localization-challenge/data?select=LOC_synset_mapping.txt).
-		<data>/
-      	├── train/
-      	├── LOC_synset_mapping.txt

clarifai_utils/datasets/upload/loaders/__init__.py DELETED Viewed

File without changes

clarifai_utils/datasets/upload/loaders/coco_captions.py DELETED Viewed

@@ -1,103 +0,0 @@
-#! COCO 2017 image captioning dataset
-import os
-import zipfile
-from glob import glob
-import requests
-from pycocotools.coco import COCO
-from tqdm import tqdm
-from clarifai.datasets.upload.base import ClarifaiDataLoader
-from ..features import VisualClassificationFeatures
-class COCOCaptionsDataLoader(ClarifaiDataLoader):
-  """COCO 2017 Image Captioning Dataset."""
-  def __init__(self, split: str = "train"):
-    """Initialize coco dataset.
-    Args:
-      filenames: the coco zip filenames: Dict[str, str] to be downloaded if download=True,
-      data_dir: the local coco dataset directory.
-      split: "train" or "val"
-    """
-    self.filenames = {
-        "train": "train2017.zip",
-        "val": "val2017.zip",
-        "annotations": "annotations_trainval2017.zip"
-    }
-    self.split = split
-    self.url = "http://images.cocodataset.org/zips/"  # coco base image-zip url
-    self.data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                                 "data")  # data storage directory
-    self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
-    self.load_data()
-  def coco_download(self, save_dir):
-    """Download coco dataset."""
-    if not os.path.exists(save_dir):
-      os.mkdir(save_dir)
-    #check if train, val and annotation dirs exist
-    #so that the coco2017 data isn't downloaded
-    for key, filename in self.filenames.items():
-      existing_files = glob(f"{save_dir}/{key}*")
-      if existing_files:
-        print(f"{key} dataset already downloded and extracted")
-        continue
-      print("-" * 80)
-      print(f"Downloading {filename}")
-      print("-" * 80)
-      if "annotations" in filename:
-        self.url = "http://images.cocodataset.org/annotations/"
-      response = requests.get(self.url + filename, stream=True)
-      response.raise_for_status()
-      with open(os.path.join(save_dir, filename), "wb") as _file:
-        for chunk in tqdm(response.iter_content(chunk_size=5124000)):
-          if chunk:
-            _file.write(chunk)
-      print("Data download complete...")
-      #extract files
-      zf = zipfile.ZipFile(os.path.join(save_dir, filename))
-      print(f" Extracting {filename} file")
-      zf.extractall(path=save_dir)
-      # Delete coco zip
-      print(f" Deleting {filename}")
-      os.remove(path=os.path.join(save_dir, filename))
-  def load_data(self):
-    if isinstance(self.filenames, dict) and len(self.filenames) == 3:
-      self.coco_download(self.data_dir)
-      self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
-      for i in os.listdir(self.data_dir) if "train" in i][0]
-      self.extracted_coco_dirs["val"] = [os.path.join(self.data_dir, i) \
-      for i in os.listdir(self.data_dir) if "val" in i][0]
-      self.extracted_coco_dirs["annotations"] = [os.path.join(self.data_dir, i) \
-      for i in os.listdir(self.data_dir) if "annotations" in i][0]
-    else:
-      raise Exception(f"`filenames` must be a dict of atleast 2 coco zip file names; \
-  train, val and annotations. Found {len(self.filenames)} items instead.")
-    annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" + f"captions_{self.split}*")[0]
-    coco = COCO(annot_file)
-    annot_ids = coco.getAnnIds()
-    self.annotations = coco.loadAnns(annot_ids)
-  def __len__(self):
-    return len(self.annotations)
-  def __getitem__(self, idx):
-    annot = self.annotations[idx]
-    image_path = glob(
-        os.path.join(self.extracted_coco_dirs[self.split],
-                     f"{str(annot['image_id']).zfill(12)}*"))[0]
-    return VisualClassificationFeatures(image_path, annot["caption"], id=annot["image_id"])

clarifai_utils/datasets/upload/loaders/coco_detection.py DELETED Viewed

@@ -1,134 +0,0 @@
-#! COCO 2017 detection dataset
-import os
-import zipfile
-from glob import glob
-import cv2
-import requests
-from pycocotools.coco import COCO
-from tqdm import tqdm
-from clarifai.datasets.upload.base import ClarifaiDataLoader
-from ..features import VisualDetectionFeatures
-class COCODetectionDataLoader(ClarifaiDataLoader):
-  """COCO 2017 Image Detection Dataset."""
-  def __init__(self, split: str = "train"):
-    """
-    Initialize coco dataset.
-    Args:
-      filenames: the coco zip filenames: Dict[str, str] to be downloaded if download=True,
-      data_dir: the local coco dataset directory.
-      split: "train" or "val"
-    """
-    self.filenames = {
-        "train": "train2017.zip",
-        "val": "val2017.zip",
-        "annotations": "annotations_trainval2017.zip"
-    }
-    self.split = split
-    self.url = "http://images.cocodataset.org/zips/"  # coco base image-zip url
-    self.data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                                 "data")  # data storage directory
-    self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
-    self.load_data()
-  def coco_download(self, save_dir):
-    """Download coco dataset."""
-    if not os.path.exists(save_dir):
-      os.mkdir(save_dir)
-    #check if train*, val* and annotation* dirs exist
-    #so that the coco2017 data isn't downloaded
-    for key, filename in self.filenames.items():
-      existing_files = glob(f"{save_dir}/{key}*")
-      if existing_files:
-        print(f"{key} dataset already downloded and extracted")
-        continue
-      print("-" * 80)
-      print(f"Downloading {filename}")
-      print("-" * 80)
-      if "annotations" in filename:
-        self.url = "http://images.cocodataset.org/annotations/"
-      response = requests.get(self.url + filename, stream=True)
-      response.raise_for_status()
-      with open(os.path.join(save_dir, filename), "wb") as _file:
-        for chunk in tqdm(response.iter_content(chunk_size=5124000)):
-          if chunk:
-            _file.write(chunk)
-      print("Coco data download complete...")
-      #extract files
-      zf = zipfile.ZipFile(os.path.join(save_dir, filename))
-      print(f" Extracting {filename} file")
-      zf.extractall(path=save_dir)
-      # Delete coco zip
-      print(f" Deleting {filename}")
-      os.remove(path=os.path.join(save_dir, filename))
-  def load_data(self):
-    if isinstance(self.filenames, dict) and len(self.filenames) == 3:
-      self.coco_download(self.data_dir)
-      self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
-      for i in os.listdir(self.data_dir) if "train" in i][0]
-      self.extracted_coco_dirs["val"] = [os.path.join(self.data_dir, i) \
-      for i in os.listdir(self.data_dir) if "val" in i][0]
-      self.extracted_coco_dirs["annotations"] = [os.path.join(self.data_dir, i) \
-      for i in os.listdir(self.data_dir) if "annotations" in i][0]
-    else:
-      raise Exception(f"`filenames` must be a dict of atleast 2 coco zip file names; \
-      train, val and annotations. Found {len(self.filenames)} items instead.")
-    annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" +\
-     f"instances_{self.split}*")[0]
-    self.coco = COCO(annot_file)
-    categories = self.coco.loadCats(self.coco.getCatIds())
-    self.cat_id_map = {category["id"]: category["name"] for category in categories}
-    self.cat_img_ids = {}
-    for cat_id in list(self.cat_id_map.keys()):
-      self.cat_img_ids[cat_id] = self.coco.getImgIds(catIds=[cat_id])
-    img_ids = []
-    for i in list(self.cat_img_ids.values()):
-      img_ids.extend(i)
-    self.img_ids = list(set(img_ids))
-  def __len__(self):
-    return len(self.img_ids)
-  def __getitem__(self, idx):
-    _id = self.img_ids[idx]
-    annots = []  # bboxes
-    class_names = []
-    labels = [i for i in list(filter(lambda x: _id in self.cat_img_ids[x], self.cat_img_ids))]
-    image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
-    f"{str(_id).zfill(12)}*")[0]
-    image_height, image_width = cv2.imread(image_path).shape[:2]
-    for cat_id in labels:
-      annot_ids = self.coco.getAnnIds(imgIds=_id, catIds=[cat_id])
-      if len(annot_ids) > 0:
-        img_annotations = self.coco.loadAnns(annot_ids)
-        for ann in img_annotations:
-          class_names.append(self.cat_id_map[cat_id])
-          x_min = ann['bbox'][0] / image_width  #left_col
-          y_min = ann['bbox'][1] / image_height  #top_row
-          x_max = (ann['bbox'][0] + ann['bbox'][2]) / image_width  #right_col
-          y_max = (ann['bbox'][1] + ann['bbox'][3]) / image_height  #bottom_row
-          annots.append([x_min, y_min, x_max, y_max])
-      else:  # if no annotations for given image_id-cat_id pair
-        continue
-    assert len(class_names) == len(annots), f"Num classes must match num bbox annotations\
-    for a single image. Found {len(class_names)} classes and {len(annots)} bboxes."
-    return VisualDetectionFeatures(image_path, class_names, annots, id=str(_id))

clarifai_utils/datasets/upload/loaders/coco_segmentation.py DELETED Viewed

@@ -1,166 +0,0 @@
-#! COCO 2017 Image Segmentation dataset
-import gc
-import os
-import zipfile
-from functools import reduce
-from glob import glob
-import cv2
-import numpy as np
-import requests
-from pycocotools import mask as maskUtils
-from pycocotools.coco import COCO
-from tqdm import tqdm
-from clarifai.datasets.upload.base import ClarifaiDataLoader
-from ..features import VisualSegmentationFeatures
-class COCOSegmentationDataLoader(ClarifaiDataLoader):
-  """COCO 2017 Image Segmentation Dataset."""
-  def __init__(self, split: str = "train"):
-    """
-    Initialize coco dataset.
-    Args:
-      filenames: the coco zip filenames: Dict[str, str] to be downloaded if download=True,
-      data_dir: the local coco dataset directory
-      split: "train" or "val"
-    """
-    self.filenames = {
-        "train": "train2017.zip",
-        "val": "val2017.zip",
-        "annotations": "annotations_trainval2017.zip"
-    }
-    self.split = split
-    self.url = "http://images.cocodataset.org/zips/"  # coco base image-zip url
-    self.data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                                 "data")  # data storage dir
-    self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
-    self.load_data()
-  def coco_download(self, save_dir):
-    """Download coco dataset."""
-    if not os.path.exists(save_dir):
-      os.mkdir(save_dir)
-    #check if train, val and annotation dirs exist
-    #so that the coco2017 data isn't downloaded
-    for key, filename in self.filenames.items():
-      existing_files = glob(f"{save_dir}/{key}*")
-      if existing_files:
-        print(f"{key} dataset already downloded and extracted")
-        continue
-      print("-" * 80)
-      print(f"Downloading {filename}")
-      print("-" * 80)
-      if "annotations" in filename:
-        self.url = "http://images.cocodataset.org/annotations/"
-      response = requests.get(self.url + filename, stream=True)
-      response.raise_for_status()
-      with open(os.path.join(save_dir, filename), "wb") as _file:
-        for chunk in tqdm(response.iter_content(chunk_size=5124000)):
-          if chunk:
-            _file.write(chunk)
-      print("Coco data download complete...")
-      #extract files
-      zf = zipfile.ZipFile(os.path.join(save_dir, filename))
-      print(f" Extracting {filename} file")
-      zf.extractall(path=save_dir)
-      # Delete coco zip
-      print(f" Deleting {filename}")
-      os.remove(path=os.path.join(save_dir, filename))
-  def load_data(self):
-    """Load coco dataset image ids or filenames."""
-    if isinstance(self.filenames, dict) and len(self.filenames) == 3:
-      self.coco_download(self.data_dir)
-      self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
-          for i in os.listdir(self.data_dir) if "train" in i][0]
-      self.extracted_coco_dirs["val"] = [os.path.join(self.data_dir, i) \
-          for i in os.listdir(self.data_dir) if "val" in i][0]
-      self.extracted_coco_dirs["annotations"] = [os.path.join(self.data_dir, i) \
-          for i in os.listdir(self.data_dir) if "annotations" in i][0]
-    else:
-      raise Exception(f"`filenames` must be a dict of atleast 3 coco zip file names; \
-            train, val and annotations. Found {len(self.filenames)} items instead.")
-    annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" + f"instances_{self.split}*")[
-        0]
-    self.coco = COCO(annot_file)
-    categories = self.coco.loadCats(self.coco.getCatIds())
-    self.cat_id_map = {category["id"]: category["name"] for category in categories}
-    self.cat_img_ids = {}
-    for cat_id in list(self.cat_id_map.keys()):
-      self.cat_img_ids[cat_id] = self.coco.getImgIds(catIds=[cat_id])
-    img_ids = set()
-    for i in list(self.cat_img_ids.values()):
-      img_ids.update(i)
-    self.img_ids = list(img_ids)
-  def __len__(self):
-    return len(self.img_ids)
-  def __getitem__(self, idx):
-    """Get image and annotations for a given index."""
-    _id = self.img_ids[idx]
-    annots = []  # polygons
-    class_names = []
-    labels = [i for i in list(filter(lambda x: _id in self.cat_img_ids[x], self.cat_img_ids))]
-    image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
-    f"{str(_id).zfill(12)}*")[0]
-    image_height, image_width = cv2.imread(image_path).shape[:2]
-    for cat_id in labels:
-      annot_ids = self.coco.getAnnIds(imgIds=_id, catIds=[cat_id])
-      if len(annot_ids) > 0:
-        img_annotations = self.coco.loadAnns(annot_ids)
-        for ann in img_annotations:
-          # get polygons
-          if isinstance(ann['segmentation'], list):
-            for seg in ann['segmentation']:
-              poly = np.array(seg).reshape((int(len(seg) / 2), 2))
-              poly[:, 0], poly[:, 1] = poly[:, 0] / image_width, poly[:, 1] / image_height
-              annots.append(poly.tolist())  #[[x=col, y=row],...]
-              class_names.append(self.cat_id_map[cat_id])
-          else:  # seg: {"counts":[...]}
-            if isinstance(ann['segmentation']['counts'], list):
-              rle = maskUtils.frPyObjects([ann['segmentation']], image_height, image_width)
-            else:
-              rle = ann['segmentation']
-            mask = maskUtils.decode(rle)  #binary mask
-            #convert mask to polygons and add to annots
-            contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
-            polygons = []
-            for cont in contours:
-              if cont.size >= 6:
-                polygons.append(cont.astype(float).flatten().tolist())
-            # store polygons in (x,y) pairs
-            polygons_flattened = reduce(lambda x, y: x + y, polygons)
-            del polygons
-            del contours
-            del mask
-            gc.collect()
-            polygons = np.array(polygons_flattened).reshape((int(len(polygons_flattened) / 2), 2))
-            polygons[:, 0] = polygons[:, 0] / image_width
-            polygons[:, 1] = polygons[:, 1] / image_height
-            annots.append(polygons.tolist())  #[[x=col, y=row],...,[x=col, y=row]]
-            class_names.append(self.cat_id_map[cat_id])
-      else:  # if no annotations for given image_id-cat_id pair
-        continue
-    assert len(class_names) == len(annots), f"Num classes must match num annotations\
-    for a single image. Found {len(class_names)} classes and {len(annots)} polygons."
-    return VisualSegmentationFeatures(image_path, class_names, annots, id=str(_id))

clarifai 9.10.1__py3-none-any.whl → 9.10.3__py3-none-any.whl

clarifai 9.10.1py3-none-any.whl → 9.10.3py3-none-any.whl