PyPI - clarifai - Versions diffs - 9.7.1__py3-none-any.whl → 9.7.2__py3-none-any.whl - Mend

clarifai 9.7.1py3-none-any.whl → 9.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (336) hide show

{clarifai/data_upload/datasets/zoo → clarifai_utils/datasets/upload/loaders}/coco_detection.py RENAMED Viewed

@@ -9,10 +9,12 @@ import requests
 from pycocotools.coco import COCO
 from tqdm import tqdm
+from clarifai.datasets.upload.base import ClarifaiDataLoader
 from ..features import VisualDetectionFeatures
-class COCODetectionDataset:
+class COCODetectionDataLoader(ClarifaiDataLoader):
   """COCO 2017 Image Detection Dataset."""
   def __init__(self, split: str = "train"):
@@ -30,9 +32,12 @@ class COCODetectionDataset:
     }
     self.split = split
     self.url = "http://images.cocodataset.org/zips/"  # coco base image-zip url
-    self.data_dir = os.path.join(os.curdir, "data")  # data storage directory
+    self.data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                 "data")  # data storage directory
     self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
+    self.load_data()
   def coco_download(self, save_dir):
     """Download coco dataset."""
     if not os.path.exists(save_dir):
@@ -69,13 +74,7 @@ class COCODetectionDataset:
       print(f" Deleting {filename}")
       os.remove(path=os.path.join(save_dir, filename))
-  def dataloader(self):
-    """
-    Transform coco object detection data into clarifai proto compatible
-    format for upload.
-    Returns:
-      VisualDetectionFeatures type generator.
-    """
+  def load_data(self):
     if isinstance(self.filenames, dict) and len(self.filenames) == 3:
       self.coco_download(self.data_dir)
       self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
@@ -91,40 +90,45 @@ class COCODetectionDataset:
     annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" +\
      f"instances_{self.split}*")[0]
-    coco = COCO(annot_file)
-    categories = coco.loadCats(coco.getCatIds())
-    cat_id_map = {category["id"]: category["name"] for category in categories}
-    cat_img_ids = {}
-    for cat_id in list(cat_id_map.keys()):
-      cat_img_ids[cat_id] = coco.getImgIds(catIds=[cat_id])
+    self.coco = COCO(annot_file)
+    categories = self.coco.loadCats(self.coco.getCatIds())
+    self.cat_id_map = {category["id"]: category["name"] for category in categories}
+    self.cat_img_ids = {}
+    for cat_id in list(self.cat_id_map.keys()):
+      self.cat_img_ids[cat_id] = self.coco.getImgIds(catIds=[cat_id])
     img_ids = []
-    for i in list(cat_img_ids.values()):
+    for i in list(self.cat_img_ids.values()):
       img_ids.extend(i)
-    #get annotations for each image id
-    for _id in set(img_ids):
-      annots = []  # bboxes
-      class_names = []
-      labels = [i for i in list(filter(lambda x: _id in cat_img_ids[x], cat_img_ids))]
-      image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
-      f"{str(_id).zfill(12)}*")[0]
-      image_height, image_width = cv2.imread(image_path).shape[:2]
-      for cat_id in labels:
-        annot_ids = coco.getAnnIds(imgIds=_id, catIds=[cat_id])
-        if len(annot_ids) > 0:
-          img_annotations = coco.loadAnns(annot_ids)
-          for ann in img_annotations:
-            class_names.append(cat_id_map[cat_id])
-            x_min = ann['bbox'][0] / image_width  #left_col
-            y_min = ann['bbox'][1] / image_height  #top_row
-            x_max = (ann['bbox'][0] + ann['bbox'][2]) / image_width  #right_col
-            y_max = (ann['bbox'][1] + ann['bbox'][3]) / image_height  #bottom_row
-            annots.append([x_min, y_min, x_max, y_max])
-        else:  # if no annotations for given image_id-cat_id pair
-          continue
-      assert len(class_names) == len(annots), f"Num classes must match num bbox annotations\
-      for a single image. Found {len(class_names)} classes and {len(annots)} bboxes."
-      yield VisualDetectionFeatures(image_path, class_names, annots, id=_id)
+    self.img_ids = list(set(img_ids))
+  def __len__(self):
+    return len(self.img_ids)
+  def __getitem__(self, idx):
+    _id = self.img_ids[idx]
+    annots = []  # bboxes
+    class_names = []
+    labels = [i for i in list(filter(lambda x: _id in self.cat_img_ids[x], self.cat_img_ids))]
+    image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
+    f"{str(_id).zfill(12)}*")[0]
+    image_height, image_width = cv2.imread(image_path).shape[:2]
+    for cat_id in labels:
+      annot_ids = self.coco.getAnnIds(imgIds=_id, catIds=[cat_id])
+      if len(annot_ids) > 0:
+        img_annotations = self.coco.loadAnns(annot_ids)
+        for ann in img_annotations:
+          class_names.append(self.cat_id_map[cat_id])
+          x_min = ann['bbox'][0] / image_width  #left_col
+          y_min = ann['bbox'][1] / image_height  #top_row
+          x_max = (ann['bbox'][0] + ann['bbox'][2]) / image_width  #right_col
+          y_max = (ann['bbox'][1] + ann['bbox'][3]) / image_height  #bottom_row
+          annots.append([x_min, y_min, x_max, y_max])
+      else:  # if no annotations for given image_id-cat_id pair
+        continue
+    assert len(class_names) == len(annots), f"Num classes must match num bbox annotations\
+    for a single image. Found {len(class_names)} classes and {len(annots)} bboxes."
+    return VisualDetectionFeatures(image_path, class_names, annots, id=str(_id))

clarifai_utils/datasets/upload/loaders/coco_segmentation.py ADDED Viewed

@@ -0,0 +1,166 @@
+#! COCO 2017 Image Segmentation dataset
+import gc
+import os
+import zipfile
+from functools import reduce
+from glob import glob
+import cv2
+import numpy as np
+import requests
+from pycocotools import mask as maskUtils
+from pycocotools.coco import COCO
+from tqdm import tqdm
+from clarifai.datasets.upload.base import ClarifaiDataLoader
+from ..features import VisualSegmentationFeatures
+class COCOSegmentationDataLoader(ClarifaiDataLoader):
+  """COCO 2017 Image Segmentation Dataset."""
+  def __init__(self, split: str = "train"):
+    """
+    Initialize coco dataset.
+    Args:
+      filenames: the coco zip filenames: Dict[str, str] to be downloaded if download=True,
+      data_dir: the local coco dataset directory
+      split: "train" or "val"
+    """
+    self.filenames = {
+        "train": "train2017.zip",
+        "val": "val2017.zip",
+        "annotations": "annotations_trainval2017.zip"
+    }
+    self.split = split
+    self.url = "http://images.cocodataset.org/zips/"  # coco base image-zip url
+    self.data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                 "data")  # data storage dir
+    self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
+    self.load_data()
+  def coco_download(self, save_dir):
+    """Download coco dataset."""
+    if not os.path.exists(save_dir):
+      os.mkdir(save_dir)
+    #check if train, val and annotation dirs exist
+    #so that the coco2017 data isn't downloaded
+    for key, filename in self.filenames.items():
+      existing_files = glob(f"{save_dir}/{key}*")
+      if existing_files:
+        print(f"{key} dataset already downloded and extracted")
+        continue
+      print("-" * 80)
+      print(f"Downloading {filename}")
+      print("-" * 80)
+      if "annotations" in filename:
+        self.url = "http://images.cocodataset.org/annotations/"
+      response = requests.get(self.url + filename, stream=True)
+      response.raise_for_status()
+      with open(os.path.join(save_dir, filename), "wb") as _file:
+        for chunk in tqdm(response.iter_content(chunk_size=5124000)):
+          if chunk:
+            _file.write(chunk)
+      print("Coco data download complete...")
+      #extract files
+      zf = zipfile.ZipFile(os.path.join(save_dir, filename))
+      print(f" Extracting {filename} file")
+      zf.extractall(path=save_dir)
+      # Delete coco zip
+      print(f" Deleting {filename}")
+      os.remove(path=os.path.join(save_dir, filename))
+  def load_data(self):
+    """Load coco dataset image ids or filenames."""
+    if isinstance(self.filenames, dict) and len(self.filenames) == 3:
+      self.coco_download(self.data_dir)
+      self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
+          for i in os.listdir(self.data_dir) if "train" in i][0]
+      self.extracted_coco_dirs["val"] = [os.path.join(self.data_dir, i) \
+          for i in os.listdir(self.data_dir) if "val" in i][0]
+      self.extracted_coco_dirs["annotations"] = [os.path.join(self.data_dir, i) \
+          for i in os.listdir(self.data_dir) if "annotations" in i][0]
+    else:
+      raise Exception(f"`filenames` must be a dict of atleast 3 coco zip file names; \
+            train, val and annotations. Found {len(self.filenames)} items instead.")
+    annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" + f"instances_{self.split}*")[
+        0]
+    self.coco = COCO(annot_file)
+    categories = self.coco.loadCats(self.coco.getCatIds())
+    self.cat_id_map = {category["id"]: category["name"] for category in categories}
+    self.cat_img_ids = {}
+    for cat_id in list(self.cat_id_map.keys()):
+      self.cat_img_ids[cat_id] = self.coco.getImgIds(catIds=[cat_id])
+    img_ids = set()
+    for i in list(self.cat_img_ids.values()):
+      img_ids.update(i)
+    self.img_ids = list(img_ids)
+  def __len__(self):
+    return len(self.img_ids)
+  def __getitem__(self, idx):
+    """Get image and annotations for a given index."""
+    _id = self.img_ids[idx]
+    annots = []  # polygons
+    class_names = []
+    labels = [i for i in list(filter(lambda x: _id in self.cat_img_ids[x], self.cat_img_ids))]
+    image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
+    f"{str(_id).zfill(12)}*")[0]
+    image_height, image_width = cv2.imread(image_path).shape[:2]
+    for cat_id in labels:
+      annot_ids = self.coco.getAnnIds(imgIds=_id, catIds=[cat_id])
+      if len(annot_ids) > 0:
+        img_annotations = self.coco.loadAnns(annot_ids)
+        for ann in img_annotations:
+          # get polygons
+          if type(ann['segmentation']) == list:
+            for seg in ann['segmentation']:
+              poly = np.array(seg).reshape((int(len(seg) / 2), 2))
+              poly[:, 0], poly[:, 1] = poly[:, 0] / image_width, poly[:, 1] / image_height
+              annots.append(poly.tolist())  #[[x=col, y=row],...]
+              class_names.append(self.cat_id_map[cat_id])
+          else:  # seg: {"counts":[...]}
+            if type(ann['segmentation']['counts']) == list:
+              rle = maskUtils.frPyObjects([ann['segmentation']], image_height, image_width)
+            else:
+              rle = ann['segmentation']
+            mask = maskUtils.decode(rle)  #binary mask
+            #convert mask to polygons and add to annots
+            contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+            polygons = []
+            for cont in contours:
+              if cont.size >= 6:
+                polygons.append(cont.astype(float).flatten().tolist())
+            # store polygons in (x,y) pairs
+            polygons_flattened = reduce(lambda x, y: x + y, polygons)
+            del polygons
+            del contours
+            del mask
+            gc.collect()
+            polygons = np.array(polygons_flattened).reshape((int(len(polygons_flattened) / 2), 2))
+            polygons[:, 0] = polygons[:, 0] / image_width
+            polygons[:, 1] = polygons[:, 1] / image_height
+            annots.append(polygons.tolist())  #[[x=col, y=row],...,[x=col, y=row]]
+            class_names.append(self.cat_id_map[cat_id])
+      else:  # if no annotations for given image_id-cat_id pair
+        continue
+    assert len(class_names) == len(annots), f"Num classes must match num annotations\
+    for a single image. Found {len(class_names)} classes and {len(annots)} polygons."
+    return VisualSegmentationFeatures(image_path, class_names, annots, id=str(_id))

clarifai_utils/{data_upload/datasets/zoo → datasets/upload/loaders}/imagenet_classification.py RENAMED Viewed

@@ -2,10 +2,11 @@
 import os
-from clarifai.data_upload.datasets.features import VisualClassificationFeatures
+from clarifai.datasets.upload.base import ClarifaiDataLoader
+from ..features import VisualClassificationFeatures
-class ImageNetDataset:
+class ImageNetDataLoader(ClarifaiDataLoader):
   """ImageNet Dataset."""
   def __init__(self, split: str = "train"):
@@ -16,16 +17,15 @@ class ImageNetDataset:
       split: "train" or "test"
     """
     self.split = split
-    self.data_dir = os.path.join(os.curdir, "data")  # data storage directory
+    self.data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                 "data")  # data storage directory
     self.label_map = dict()
+    self.concepts = []
+    self.image_paths = []
-  def dataloader(self):
-    """
-    Transform text data into clarifai proto compatible
-    format for upload.
-    Returns:
-      VisualClassificationFeatures type generator.
-    """
+    self.load_data()
+  def load_data(self):
     #Creating label map
     with open(os.path.join(self.data_dir, "LOC_synset_mapping.txt")) as _file:
       for _id in _file:
@@ -43,5 +43,15 @@ class ImageNetDataset:
       folder_path = os.path.join(self.data_dir, self.split) + "/" + _folder
       for _img in os.listdir(folder_path):
         if _img.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff')):
-          yield VisualClassificationFeatures(
-              image_path=folder_path + "/" + _img, label=concept, id=None)
+          self.concepts.append(concept)
+          self.image_paths.append(folder_path + "/" + _img)
+    assert len(self.concepts) == len(self.image_paths)
+    "Number of concepts and images are not equal"
+  def __len__(self):
+    return len(self.image_paths)
+  def __getitem__(self, idx):
+    return VisualClassificationFeatures(
+        image_path=self.image_paths[idx], label=self.concepts[idx], id=None)

clarifai_utils/{data_upload/datasets/zoo → datasets/upload/loaders}/xview_detection.py RENAMED Viewed

@@ -1,6 +1,5 @@
 import glob
 import json
-import math
 import os
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor
@@ -10,10 +9,12 @@ from typing import DefaultDict, Dict, List
 import cv2
 from tqdm import tqdm
+from clarifai.datasets.upload.base import ClarifaiDataLoader
 from ..features import VisualDetectionFeatures
-class xviewDetectionDataset:
+class xviewDetectionDataLoader(ClarifaiDataLoader):
   """xview Image Detection Dataset"""
   xview_concepts = [
@@ -31,21 +32,21 @@ class xviewDetectionDataset:
   ]
   def __init__(self, split: str = "train") -> None:
+    """Initialize and Compress xview dataset.
+    Args:
+    split: "train"
     """
-        Initialize and Compress xview dataset.
-        Args:
-        split: "train"
-        """
-    self.data_dir = os.path.join(os.curdir, "data")
+    self.data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
     self.img_dir = os.path.join(self.data_dir, "train_images")
     self.img_comp_dir = os.path.join(self.data_dir, "train_images_comp")
     self.label_file = os.path.join(self.data_dir, "xview_train.geojson")
-    self.max_annotations_per_img = 700  # Max annotations per image(due to max grpc 10KB limit)
-    self.preprocess()
+    # self.preprocess()
     self.all_data = self.xview_data_parser()
+    self.load_data()
   def compress_tiff(self, img_path: str) -> None:
     """Compress tiff image"""
     img_comp_path = os.path.join(self.img_comp_dir, os.path.basename(img_path))
@@ -54,10 +55,7 @@ class xviewDetectionDataset:
         img_comp_path, img_arr, params=(cv2.IMWRITE_TIFF_COMPRESSION, 8))  # 8: Adobe Deflate
   def preprocess(self):
-    """
-        Compress the tiff images to comply with clarifai grpc image encoding limit(<20MB)
-        Uses ADOBE_DEFLATE compression algorithm
-        """
+    """Compress the tiff images to comply with clarifai grpc image encoding limit(<20MB) Uses ADOBE_DEFLATE compression algorithm"""
     all_img_ids = glob.glob(os.path.join(self.img_dir, "*.tif"))
     if not os.path.exists(self.img_comp_dir):
@@ -80,7 +78,7 @@ class xviewDetectionDataset:
           results.append(result)
   def xview_classes2indices(self, classes: List) -> List:
-    """ remap xview classes 11-94 to 0-59 """
+    """remap xview classes 11-94 to 0-59"""
     indices = [
         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10,
         11, 12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28,
@@ -91,13 +89,11 @@ class xviewDetectionDataset:
     return [indices[int(c)] for c in classes]
   def xview_indices2concepts(self, indices: List) -> List:
-    """ remap classes to concept names """
+    """remap classes to concept names"""
     return [self.xview_concepts[i] for i in indices]
   def xview_data_parser(self) -> DefaultDict[str, Dict[List, List]]:
-    """
-        Parse geojson data into nested dict of imageid w.r.t bounding boxes, concepts
-        """
+    """Parse geojson data into nested dict of imageid w.r.t bounding boxes, concepts"""
     all_data = defaultdict(lambda: dict(bboxes=[], concepts=[]))
     with open(self.label_file) as f:
@@ -110,48 +106,43 @@ class xviewDetectionDataset:
       xview_classid = feature['properties']['type_id']
       bbox = list(map(int, feature['properties']['bounds_imcoords'].split(",")))
       concept = self.xview_indices2concepts(self.xview_classes2indices([xview_classid]))
-      geo_loc = feature['geometry']['coordinates']  # Long,Lat
       all_data[image_id]['bboxes'].append(bbox)
       all_data[image_id]['concepts'].append(concept[0])
     return all_data
-  def dataloader(self):
+  def load_data(self):
+    """Load image paths"""
+    self.image_paths = []
+    all_img_ids = glob.glob(os.path.join(self.img_comp_dir, "*.tif"))
+    self.image_paths = all_img_ids
+  def __len__(self):
+    return len(self.image_paths)
+  def __getitem__(self, index: int):
+    """Get dataset for a given index.
+    Returns:
+        VisualDetectionFeature type.
     """
-        Transform xview object detection data into clarifai proto compatible
-        format for upload.
-        Returns:
-            VisualDetectionFeatures type generator.
-        """
-    img_ids = list(self.all_data.keys())
-    #get annotations for each image id
-    for _id in img_ids:
-      image_path = os.path.join(self.img_comp_dir, _id + ".tif")
-      if not os.path.exists(image_path):
-        print(f"Invalid Image path... {image_path}")
+    _id = os.path.splitext(os.path.basename(self.image_paths[index]))[0]
+    image_path = self.image_paths[index]
+    image_height, image_width = cv2.imread(image_path).shape[:2]
+    annots = []
+    class_names = []
+    for bbox, concept in zip(self.all_data[_id]['bboxes'], self.all_data[_id]['concepts']):
+      x_min = max(min(bbox[0] / image_width, 1.0), 0.0)  #left_col
+      y_min = max(min(bbox[1] / image_height, 1.0), 0.0)  #top_row
+      x_max = max(min(bbox[2] / image_width, 1.0), 0.0)  #right_col
+      y_max = max(min(bbox[3] / image_height, 1.0), 0.0)  #bottom_row
+      if (x_min >= x_max) or (y_min >= y_max):
         continue
+      annots.append([x for x in [x_min, y_min, x_max, y_max]])
+      class_names.append(concept)
-      image_size_MB = os.path.getsize(image_path) / 1048576  #Skip compressed image > 20MB
-      if math.ceil(image_size_MB) > 20:
-        continue
+    assert len(class_names) == len(annots), f"Num classes must match num bbox annotations\
+        for a single image. Found {len(class_names)} classes and {len(annots)} bboxes."
-      image_height, image_width = cv2.imread(image_path).shape[:2]
-      annots = []
-      class_names = []
-      for bbox, concept in zip(self.all_data[_id]['bboxes'][:self.max_annotations_per_img],
-                               self.all_data[_id]['concepts'][:self.max_annotations_per_img]):
-        x_min = max(min(bbox[0] / image_width, 1.0), 0.0)  #left_col
-        y_min = max(min(bbox[1] / image_height, 1.0), 0.0)  #top_row
-        x_max = max(min(bbox[2] / image_width, 1.0), 0.0)  #right_col
-        y_max = max(min(bbox[3] / image_height, 1.0), 0.0)  #bottom_row
-        if (x_min >= x_max) or (y_min >= y_max):
-          continue
-        annots.append([round(x, 3) for x in [x_min, y_min, x_max, y_max]])
-        class_names.append(concept)
-      assert len(class_names) == len(annots), f"Num classes must match num bbox annotations\
-            for a single image. Found {len(class_names)} classes and {len(annots)} bboxes."
-      yield VisualDetectionFeatures(image_path, class_names, annots, id=_id)
+    return VisualDetectionFeatures(image_path, class_names, annots, id=_id)

clarifai_utils/datasets/upload/text.py ADDED Viewed

@@ -0,0 +1,50 @@
+from concurrent.futures import ThreadPoolExecutor
+from typing import Iterator, List, Tuple
+from clarifai_grpc.grpc.api import resources_pb2
+from google.protobuf.struct_pb2 import Struct
+from .base import ClarifaiDataset
+class TextClassificationDataset(ClarifaiDataset):
+  """Upload text classification datasets to clarifai datasets"""
+  def __init__(self, datagen_object: Iterator, dataset_id: str, split: str) -> None:
+    super().__init__(datagen_object, dataset_id, split)
+  def _extract_protos(self, batch_input_ids: List[int]
+                     ) -> Tuple[List[resources_pb2.Input], List[resources_pb2.Annotation]]:
+    """Create input image and annotation protos for batch of input ids.
+    Args:
+      batch_input_ids: List of input IDs to retrieve the protos for.
+    Returns:
+      input_protos: List of input protos.
+      annotation_protos: List of annotation protos.
+    """
+    input_protos, annotation_protos = [], []
+    def process_datagen_item(id):
+      datagen_item = self.datagen_object[id]
+      metadata = Struct()
+      text = datagen_item.text
+      labels = datagen_item.labels if isinstance(
+          datagen_item.labels, list) else [datagen_item.labels]  # clarifai concept
+      input_id = f"{self.dataset_id}-{self.split}-{id}" if datagen_item.id is None else f"{self.split}-{str(datagen_item.id)}"
+      metadata.update({"split": self.split})
+      self.all_input_ids[id] = input_id
+      input_protos.append(
+          self.input_object.get_text_input(
+              input_id=input_id,
+              raw_text=text,
+              dataset_id=self.dataset_id,
+              labels=labels,
+              metadata=metadata))
+    with ThreadPoolExecutor(max_workers=4) as executor:
+      futures = [executor.submit(process_datagen_item, id) for id in batch_input_ids]
+      for job in futures:
+        job.result()
+    return input_protos, annotation_protos

clarifai_utils/datasets/upload/utils.py ADDED Viewed

@@ -0,0 +1,62 @@
+import importlib
+import inspect
+import os
+import sys
+from typing import Union
+from .base import ClarifaiDataLoader
+def load_module_dataloader(module_dir: Union[str, os.PathLike], split: str) -> ClarifaiDataLoader:
+  """Validate and import dataset module data generator.
+  Args:
+    `module_dir`: relative path to the module directory
+    The directory must contain a `dataset.py` script and the data itself.
+    `split`: "train" or "val"/"test" dataset split
+  Module Directory Structure:
+  ---------------------------
+      <folder_name>/
+      ├──__init__.py
+      ├──<Your local dir dataset>/
+      └──dataset.py
+  dataset.py must implement a class named following the convention,
+  <dataset_name>Dataset and this class must have a dataloader()
+  generator method
+  """
+  sys.path.append(str(module_dir))
+  if not os.path.exists(os.path.join(module_dir, "__init__.py")):
+    with open(os.path.join(module_dir, "__init__.py"), "w"):
+      pass
+  import dataset  # dataset module
+  # get main module class
+  main_module_cls = None
+  for name, obj in dataset.__dict__.items():
+    if inspect.isclass(obj) and "DataLoader" in name:
+      main_module_cls = obj
+    else:
+      continue
+  return main_module_cls(split)
+def load_dataloader(name: str, split: str) -> ClarifaiDataLoader:
+  """Get dataset generator object from dataset loaders.
+  Args:
+    `name`: dataset module name in datasets/upload/loaders/.
+    `split`: "train" or "val"/"test" dataset split
+  Returns:
+    Data generator object
+  """
+  loader_dataset = importlib.import_module(f"clarifai.datasets.upload.loaders.{name}")
+  # get main module class
+  main_module_cls = None
+  for name, obj in loader_dataset.__dict__.items():
+    if inspect.isclass(obj) and "DataLoader" in name:
+      main_module_cls = obj
+    else:
+      continue
+  return main_module_cls(split)

clarifai 9.7.1__py3-none-any.whl → 9.7.2__py3-none-any.whl

clarifai 9.7.1py3-none-any.whl → 9.7.2py3-none-any.whl