PyPI - clarifai - Versions diffs - 9.7.0__py3-none-any.whl → 9.7.2__py3-none-any.whl - Mend

clarifai 9.7.0py3-none-any.whl → 9.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (336) hide show

clarifai/data_upload/datasets/image.py DELETED Viewed

@@ -1,253 +0,0 @@
-import os
-from typing import Iterator, List, Union
-from clarifai_grpc.grpc.api import resources_pb2
-from google.protobuf.struct_pb2 import Struct
-from tqdm import tqdm
-from .base import ClarifaiDataset
-class VisualClassificationDataset(ClarifaiDataset):
-  def __init__(self, datagen_object: Iterator, dataset_id: str, split: str) -> None:
-    super().__init__(datagen_object, dataset_id, split)
-    self._extract_protos()
-  def create_input_protos(self, image_path: str, labels: List[Union[str, int]], input_id: str,
-                          dataset_id: str, geo_info: Union[List[float], None],
-                          metadata: Struct) -> resources_pb2.Input:
-    """
-    Create input protos for each image, label input pair.
-    Args:
-      `image_path`: image path.
-      `labels`: image label(s)
-      `input_id: unique input id
-      `dataset_id`: Clarifai dataset id
-      `geo_info`: image longitude, latitude info
-      `metadata`: image metadata
-    Returns:
-      An input proto representing a single row input
-    """
-    geo_pb = resources_pb2.Geo(geo_point=resources_pb2.GeoPoint(
-        longitude=geo_info[0], latitude=geo_info[1])) if geo_info is not None else None
-    input_proto = resources_pb2.Input(
-        id=input_id,
-        dataset_ids=[dataset_id],
-        data=resources_pb2.Data(
-            image=resources_pb2.Image(base64=open(image_path, 'rb').read(),),
-            geo=geo_pb,
-            concepts=[
-                resources_pb2.Concept(
-                  id=f"id-{''.join(_label.split(' '))}", name=_label, value=1.)\
-                for _label in labels
-            ],
-            metadata=metadata))
-    return input_proto
-  def _extract_protos(self) -> None:
-    """
-    Create input image protos for each data generator item.
-    """
-    for i, item in tqdm(enumerate(self.datagen_object), desc="Creating input protos..."):
-      metadata = Struct()
-      image_path = item.image_path
-      label = item.label if isinstance(item.label, list) else [item.label]  # clarifai concept
-      input_id = f"{self.dataset_id}-{self.split}-{i}" if item.id is None else f"{self.split}-{str(item.id)}"
-      geo_info = item.geo_info
-      metadata.update({"filename": os.path.basename(image_path), "split": self.split})
-      self.input_ids.append(input_id)
-      input_proto = self.create_input_protos(image_path, label, input_id, self.dataset_id,
-                                             geo_info, metadata)
-      self._all_input_protos[input_id] = input_proto
-class VisualDetectionDataset(ClarifaiDataset):
-  """
-  Visual detection dataset proto class.
-  """
-  def __init__(self, datagen_object: Iterator, dataset_id: str, split: str) -> None:
-    super().__init__(datagen_object, dataset_id, split)
-    self._extract_protos()
-  def create_input_protos(self, image_path: str, input_id: str, dataset_id: str,
-                          geo_info: Union[List[float], None],
-                          metadata: Struct) -> resources_pb2.Input:
-    """
-    Create input protos for each image, label input pair.
-    Args:
-      `image_path`: file path to image
-      `input_id: unique input id
-      `dataset_id`: Clarifai dataset id
-      `geo_info`: image longitude, latitude info
-      `metadata`: image metadata
-    Returns:
-      An input proto representing a single row input
-    """
-    geo_pb = resources_pb2.Geo(geo_point=resources_pb2.GeoPoint(
-        longitude=geo_info[0], latitude=geo_info[1])) if geo_info is not None else None
-    input_image_proto = resources_pb2.Input(
-        id=input_id,
-        dataset_ids=[dataset_id],
-        data=resources_pb2.Data(
-            image=resources_pb2.Image(base64=open(image_path, 'rb').read(),),
-            geo=geo_pb,
-            metadata=metadata))
-    return input_image_proto
-  def create_annotation_proto(self, label: str, annotations: List, input_id: str,
-                              dataset_id: str) -> resources_pb2.Annotation:
-    """
-    Create an input proto for each bounding box, label input pair.
-    Args:
-      `label`: annotation label
-      `annotations`: a list of a single bbox's coordinates.
-      `input_id: unique input id
-      `dataset_id`: Clarifai dataset id
-    Returns:
-      An input proto representing a single image input
-    """
-    input_annot_proto = resources_pb2.Annotation(
-        input_id=input_id,
-        data=resources_pb2.Data(regions=[
-            resources_pb2.Region(
-                region_info=resources_pb2.RegionInfo(bounding_box=resources_pb2.BoundingBox(
-                    # Annotations ordering: [xmin, ymin, xmax, ymax]
-                    # top_row must be less than bottom row
-                    # left_col must be less than right col
-                    top_row=annotations[1],  #y_min
-                    left_col=annotations[0],  #x_min
-                    bottom_row=annotations[3],  #y_max
-                    right_col=annotations[2]  #x_max
-                )),
-                data=resources_pb2.Data(concepts=[
-                    resources_pb2.Concept(
-                        id=f"id-{''.join(label.split(' '))}", name=label, value=1.)
-                ]))
-        ]))
-    return input_annot_proto
-  def _extract_protos(self) -> None:
-    """
-    Create input image protos for each data generator item.
-    """
-    for i, item in tqdm(enumerate(self.datagen_object), desc="Creating input protos..."):
-      metadata = Struct()
-      image = item.image_path
-      labels = item.classes  # list:[l1,...,ln]
-      bboxes = item.bboxes  # [[xmin,ymin,xmax,ymax],...,[xmin,ymin,xmax,ymax]]
-      input_id = f"{self.dataset_id}-{self.split}-{i}" if item.id is None else f"{self.split}-{str(item.id)}"
-      metadata.update({"filename": os.path.basename(image), "split": self.split})
-      geo_info = item.geo_info
-      self.input_ids.append(input_id)
-      input_image_proto = self.create_input_protos(image, input_id, self.dataset_id, geo_info,
-                                                   metadata)
-      self._all_input_protos[input_id] = input_image_proto
-      # iter over bboxes and classes
-      # one id could have more than one bbox and label
-      for i in range(len(bboxes)):
-        input_annot_proto = self.create_annotation_proto(labels[i], bboxes[i], input_id,
-                                                         self.dataset_id)
-        self._all_annotation_protos[input_id].append(input_annot_proto)
-class VisualSegmentationDataset(ClarifaiDataset):
-  """
-  Visual segmentation dataset proto class.
-  """
-  def __init__(self, datagen_object: Iterator, dataset_id: str, split: str) -> None:
-    super().__init__(datagen_object, dataset_id, split)
-    self._extract_protos()
-  def create_input_protos(self, image_path: str, input_id: str, dataset_id: str,
-                          geo_info: Union[List[float], None],
-                          metadata: Struct) -> resources_pb2.Input:
-    """
-    Create input protos for each image, label input pair.
-    Args:
-      `image_path`: absolute image file path
-      `input_id: unique input id
-      `dataset_id`: Clarifai dataset id
-      `geo_info`: image longitude, latitude info
-      `metadata`: image metadata
-    Returns:
-      An input proto representing a single input item
-    """
-    geo_pb = resources_pb2.Geo(geo_point=resources_pb2.GeoPoint(
-        longitude=geo_info[0], latitude=geo_info[1])) if geo_info is not None else None
-    input_image_proto = resources_pb2.Input(
-        id=input_id,
-        dataset_ids=[dataset_id],
-        data=resources_pb2.Data(
-            image=resources_pb2.Image(base64=open(image_path, 'rb').read(),),
-            geo=geo_pb,
-            metadata=metadata))
-    return input_image_proto
-  def create_mask_proto(self, label: str, polygons: List[List[float]], input_id: str,
-                        dataset_id: str) -> resources_pb2.Annotation:
-    """
-    Create an input mask proto for an input polygon/mask and label.
-    Args:
-      `label`: image label
-      `polygons`: Polygon x,y points iterable
-      `input_id: unique input id
-      `dataset_id`: Clarifai dataset id
-    Returns:
-      An input proto corresponding to a single image
-    """
-    input_mask_proto = resources_pb2.Annotation(
-        input_id=input_id,
-        data=resources_pb2.Data(regions=[
-            resources_pb2.Region(
-                region_info=resources_pb2.RegionInfo(polygon=resources_pb2.Polygon(
-                    points=[
-                        resources_pb2.Point(
-                            row=_point[1],  # row is y point
-                            col=_point[0],  # col is x point
-                            visibility="VISIBLE") for _point in polygons
-                    ])),
-                data=resources_pb2.Data(concepts=[
-                    resources_pb2.Concept(
-                        id=f"id-{''.join(label.split(' '))}", name=label, value=1.)
-                ]))
-        ]))
-    return input_mask_proto
-  def _extract_protos(self) -> None:
-    """
-    Create input image and annotation protos for each data generator item.
-    """
-    for i, item in tqdm(enumerate(self.datagen_object), desc="Creating input protos..."):
-      metadata = Struct()
-      image = item.image_path  # image path
-      labels = item.classes  # list of class labels
-      _polygons = item.polygons  # list of polygons: [[[x,y],...,[x,y]],...]
-      input_id = f"{self.dataset_id}-{self.split}-{i}" if item.id is None else f"{self.split}-{str(item.id)}"
-      metadata.update({"filename": os.path.basename(image), "split": self.split})
-      geo_info = item.geo_info
-      self.input_ids.append(input_id)
-      input_image_proto = self.create_input_protos(image, input_id, self.dataset_id, geo_info,
-                                                   metadata)
-      self._all_input_protos[input_id] = input_image_proto
-      ## Iterate over each masked image and create a proto for upload to clarifai
-      ## The length of masks/polygons-list and labels must be equal
-      for i, _polygon in enumerate(_polygons):
-        try:
-          input_mask_proto = self.create_mask_proto(labels[i], _polygon, input_id, self.dataset_id)
-          self._all_annotation_protos[input_id].append(input_mask_proto)
-        except IndexError:
-          continue

clarifai/data_upload/datasets/text.py DELETED Viewed

@@ -1,60 +0,0 @@
-from typing import Iterator, List
-from clarifai_grpc.grpc.api import resources_pb2
-from google.protobuf.struct_pb2 import Struct
-from tqdm import tqdm
-from .base import ClarifaiDataset
-class TextClassificationDataset(ClarifaiDataset):
-  """
-  Upload text classification datasets to clarifai datasets
-  """
-  def __init__(self, datagen_object: Iterator, dataset_id: str, split: str) -> None:
-    super().__init__(datagen_object, dataset_id, split)
-    self._extract_protos()
-  def create_input_protos(self, text_input: str, labels: List[str], input_id: str, dataset_id: str,
-                          metadata: Struct) -> resources_pb2.Input:
-    """
-    Create input protos for each text, label input pairs.
-    Args:
-    	`text_input`: text string.
-    	`labels`: text labels
-    	`input_id: unique input id
-    	`dataset_id`: Clarifai dataset id
-    	`metadata`:input metadata
-    Returns:
-    	An input proto representing a single row input
-    """
-    input_proto = resources_pb2.Input(
-        id=input_id,
-        dataset_ids=[dataset_id],
-        data=resources_pb2.Data(
-            text=resources_pb2.Text(raw=text_input),
-            concepts=[
-                resources_pb2.Concept(
-                    id=f"id-{''.join(_label.split(' '))}", name=_label, value=1.)
-                for _label in labels
-            ],
-            metadata=metadata))
-    return input_proto
-  def _extract_protos(self) -> None:
-    """
-    Creates input protos for each data generator item.
-    """
-    for i, item in tqdm(enumerate(self.datagen_object), desc="Loading text data"):
-      metadata = Struct()
-      text = item.text
-      labels = item.labels if isinstance(item.labels, list) else [item.labels]  # clarifai concept
-      input_id = f"{self.dataset_id}-{self.split}-{i}" if item.id is None else f"{self.split}-{str(item.id)}"
-      metadata.update({"split": self.split})
-      self.input_ids.append(input_id)
-      input_proto = self.create_input_protos(text, labels, input_id, self.dataset_id, metadata)
-      self._all_input_protos[input_id] = input_proto

clarifai/data_upload/datasets/zoo/README.md DELETED Viewed

@@ -1,55 +0,0 @@
-## Datasets Zoo
-A collection of data preprocessing modules for popular public datasets to allow for compatible upload into Clarifai user app datasets.
-## Usage
-If a dataset module exists in the zoo, uploading the specific dataset can be easily done by simply creating a python script (or via commandline) and specifying the dataset module name in the `from_zoo` parameter of the  `UploadConfig` class .i.e.
-```python
-from clarifai.data_upload.upload import UploadConfig
-upload_obj = UploadConfig(
-	user_id="",
-	app_id="",
-	pat="", # Clarifai user PAT (not Clarifai app PAT)
-	dataset_id="",
-	task="",
-	from_zoo="coco_detection",
-	split="val" # train, val or test depending on the dataset
-	)
-# execute data upload to Clarifai app dataset
-upload_obj.upload_to_clarifai()
-```
-## Zoo Datasets
- | dataset name | task | module name (.py) | splits |
- | --- | --- | --- | --- |
- | [COCO 2017](https://cocodataset.org/#download) | Detection | `coco_detection` | `train`, `val` |
- |        | Segmentation | `coco_segmentation` | `train`, `val` |
- |       | Captions | `coco_captions` | `train`, `val` |
- |[xVIEW](http://xviewdataset.org/)  | Detection | `xview_detection` | `train`
- | [ImageNet](https://www.image-net.org/)  | Classification | `imagenet_classification` | `train`
-## Contributing Modules
-A dataset (preprocessing) module is a python script that contains a dataset class which implements data download (to download the dataset from a source to local disk dir) & extraction and dataloader methods.
-The class naming convention is `<datasetname>Dataset`. The dataset class must accept `split` as the only argument in the `__init__` method and the `dataloader` method must be a generator that yields either of `VisualClassificationFeatures()`, `VisualDetectionFeatures()`, `VisualSegmentationFeatures()` or `TextFeatures()` as defined in [clarifai/data_upload/datasets/features.py](datasets/features.py). Other methods can be added as seen fit but `dataloader()` is the main method and must strictly be named `dataloader`.
-Reference can be taken from the existing dataset modules in the zoo for development.
-## Notes
-* Dataset in the zoo by default first create a `data` directory in the local directory where the call to `UploadConfig(...).upload_to_clarifai()` is made and then download the data into this `data` directory, preprocess the data and finally execute upload to a Clarifai app dataset. For instance with the COCO dataset modules above, the coco2017 dataset is by default downloaded first into a `data` directory, extracted and then preprocessing is performed on it and finally uploaded to Clarifai.
-* Taking the above into consideration, to avoid the scripts re-downloading data you already have locally, create a `data` directory in the same directory where you'll make a call to `UploadConfig(...).upload_to_clarifai()` and move your extracted data there. **Ensure that the extracted folder/file names and file structure MATCH those when the downloaded zips are extracted.**
-* COCO Format: To reuse the coco modules above on your coco format data, ensure the criteria in the two points above is adhered to first. If so, pass the coco module name from any of the above in the zoo to the `from_zoo=` parameter in `UploadConfig()` and finally invoke the `upload_to_clarifai()` method.
-* xVIEW Dataset: To upload, you have to register and download images,label from [xviewdataset](http://xviewdataset.org/#dataset) follow the above mentioned steps to place extracted folder in `data` directory. Finally pass the xview module name to `from_zoo=` parameter in `UploadConfig()` and invoke the `upload_to_clarifai()` method.
-* ImageNet Dataset: ImageNet Dataset should be downloaded and placed in the 'data' folder along with the [label mapping file](https://www.kaggle.com/competitions/imagenet-object-localization-challenge/data?select=LOC_synset_mapping.txt).
-		<data>/
-      	├── train/
-      	├── LOC_synset_mapping.txt

clarifai/data_upload/datasets/zoo/coco_segmentation.py DELETED Viewed

@@ -1,160 +0,0 @@
-#! COCO 2017 Image Segmentation dataset
-import gc
-import os
-import zipfile
-from functools import reduce
-from glob import glob
-import cv2
-import numpy as np
-import requests
-from pycocotools import mask as maskUtils
-from pycocotools.coco import COCO
-from tqdm import tqdm
-from ..features import VisualSegmentationFeatures
-class COCOSegmentationDataset:
-  """COCO 2017 Image Segmentation Dataset."""
-  def __init__(self, split: str = "train"):
-    """
-    Initialize coco dataset.
-    Args:
-      filenames: the coco zip filenames: Dict[str, str] to be downloaded if download=True,
-      data_dir: the local coco dataset directory
-      split: "train" or "val"
-    """
-    self.filenames = {
-        "train": "train2017.zip",
-        "val": "val2017.zip",
-        "annotations": "annotations_trainval2017.zip"
-    }
-    self.split = split
-    self.url = "http://images.cocodataset.org/zips/"  # coco base image-zip url
-    self.data_dir = os.path.join(os.curdir, "data")  # data storage dir
-    self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
-  def coco_download(self, save_dir):
-    """Download coco dataset."""
-    if not os.path.exists(save_dir):
-      os.mkdir(save_dir)
-    #check if train, val and annotation dirs exist
-    #so that the coco2017 data isn't downloaded
-    for key, filename in self.filenames.items():
-      existing_files = glob(f"{save_dir}/{key}*")
-      if existing_files:
-        print(f"{key} dataset already downloded and extracted")
-        continue
-      print("-" * 80)
-      print(f"Downloading {filename}")
-      print("-" * 80)
-      if "annotations" in filename:
-        self.url = "http://images.cocodataset.org/annotations/"
-      response = requests.get(self.url + filename, stream=True)
-      response.raise_for_status()
-      with open(os.path.join(save_dir, filename), "wb") as _file:
-        for chunk in tqdm(response.iter_content(chunk_size=5124000)):
-          if chunk:
-            _file.write(chunk)
-      print("Coco data download complete...")
-      #extract files
-      zf = zipfile.ZipFile(os.path.join(save_dir, filename))
-      print(f" Extracting {filename} file")
-      zf.extractall(path=save_dir)
-      # Delete coco zip
-      print(f" Deleting {filename}")
-      os.remove(path=os.path.join(save_dir, filename))
-  def dataloader(self):
-    """
-    Transform coco data into clarifai proto compatible format for upload.
-    Returns:
-      VisualSegmentationFeatures type generator.
-    """
-    if isinstance(self.filenames, dict) and len(self.filenames) == 3:
-      self.coco_download(self.data_dir)
-      self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
-      for i in os.listdir(self.data_dir) if "train" in i][0]
-      self.extracted_coco_dirs["val"] = [os.path.join(self.data_dir, i) \
-      for i in os.listdir(self.data_dir) if "val" in i][0]
-      self.extracted_coco_dirs["annotations"] = [os.path.join(self.data_dir, i) \
-      for i in os.listdir(self.data_dir) if "annotations" in i][0]
-    else:
-      raise Exception(f"`filenames` must be a dict of atleast 3 coco zip file names; \
-      train, val and annotations. Found {len(self.filenames)} items instead.")
-    annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" +\
-     f"instances_{self.split}*")[0]
-    coco = COCO(annot_file)
-    categories = coco.loadCats(coco.getCatIds())
-    cat_id_map = {category["id"]: category["name"] for category in categories}
-    cat_img_ids = {}
-    for cat_id in list(cat_id_map.keys()):
-      cat_img_ids[cat_id] = coco.getImgIds(catIds=[cat_id])
-    img_ids = []
-    for i in list(cat_img_ids.values()):
-      img_ids.extend(i)
-    #get annotations for each image id
-    for _id in set(img_ids):
-      annots = []  # polygons
-      class_names = []
-      labels = [i for i in list(filter(lambda x: _id in cat_img_ids[x], cat_img_ids))]
-      image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
-      f"{str(_id).zfill(12)}*")[0]
-      image_height, image_width = cv2.imread(image_path).shape[:2]
-      for cat_id in labels:
-        annot_ids = coco.getAnnIds(imgIds=_id, catIds=[cat_id])
-        if len(annot_ids) > 0:
-          img_annotations = coco.loadAnns(annot_ids)
-          for ann in img_annotations:
-            # get polygons
-            if type(ann['segmentation']) == list:
-              for seg in ann['segmentation']:
-                poly = np.array(seg).reshape((int(len(seg) / 2), 2))
-                poly[:, 0], poly[:, 1] = poly[:, 0] / image_width, poly[:, 1] / image_height
-                annots.append(poly.tolist())  #[[x=col, y=row],...]
-                class_names.append(cat_id_map[cat_id])
-            else:  # seg: {"counts":[...]}
-              if type(ann['segmentation']['counts']) == list:
-                rle = maskUtils.frPyObjects([ann['segmentation']], image_height, image_width)
-              else:
-                rle = ann['segmentation']
-              mask = maskUtils.decode(rle)  #binary mask
-              #convert mask to polygons and add to annots
-              contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
-              polygons = []
-              for cont in contours:
-                if cont.size >= 6:
-                  polygons.append(cont.astype(float).flatten().tolist())
-              # store polygons in (x,y) pairs
-              polygons_flattened = reduce(lambda x, y: x + y, polygons)
-              del polygons
-              del contours
-              del mask
-              gc.collect()
-              polygons = np.array(polygons_flattened).reshape((int(len(polygons_flattened) / 2),
-                                                               2))
-              polygons[:, 0] = polygons[:, 0] / image_width
-              polygons[:, 1] = polygons[:, 1] / image_height
-              annots.append(polygons.tolist())  #[[x=col, y=row],...,[x=col, y=row]]
-              class_names.append(cat_id_map[cat_id])
-        else:  # if no annotations for given image_id-cat_id pair
-          continue
-      assert len(class_names) == len(annots), f"Num classes must match num annotations\
-      for a single image. Found {len(class_names)} classes and {len(annots)} polygons."
-      yield VisualSegmentationFeatures(image_path, class_names, annots, id=_id)

clarifai/data_upload/examples/README.md DELETED Viewed

@@ -1,5 +0,0 @@
-## Data upload from packages
-Examples of how to upload your local directory datasets into clarifai app datasets using data_upload utils `from_module` feature.
-The `examples.py` script imports the data upload module and executes the upload of the IMDB_reviews text_classification and [food-101 image classification](https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/) datasets by passing the package paths as a parameter in UploadConfig().

clarifai/data_upload/examples/image_classification/cifar10/dataset.py DELETED Viewed

@@ -1,40 +0,0 @@
-#! Cifar10 Dataset
-import csv
-import os
-from clarifai.data_upload.datasets.features import VisualClassificationFeatures
-class Cifar10Dataset:
-  """Cifar10 Dataset."""
-  def __init__(self, split: str = "train"):
-    """
-    Initialize dataset params.
-    Args:
-      data_dir: the local dataset directory.
-      split: "train" or "test"
-    """
-    self.split = split
-    self.data_dirs = {
-        "train": os.path.join(os.path.dirname(__file__), "cifar_small_train.csv"),
-        "test": os.path.join(os.path.dirname(__file__), "cifar_small_test.csv")
-    }
-  def dataloader(self):
-    """
-    Transform text data into clarifai proto compatible
-    format for upload.
-    Returns:
-      TextFeatures type generator.
-    """
-    ## Your preprocessing code here
-    with open(self.data_dirs[self.split]) as _file:
-      reader = csv.reader(_file)
-      next(reader, None)  # skip header
-      for review in reader:
-        yield VisualClassificationFeatures(
-            image_path='examples/image_classification/cifar10/' + review[0],
-            label=review[1],
-            id=None)

clarifai/data_upload/examples/image_classification/food-101/dataset.py DELETED Viewed

@@ -1,39 +0,0 @@
-#! Food-101 image classification dataset
-import os
-from clarifai.data_upload.datasets.features import VisualClassificationFeatures
-class Food101Dataset:
-  """Food-101 Image Classification Dataset.
-  url: https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/
-  """
-  def __init__(self, split: str = "train"):
-    """
-    Initialize dataset params.
-    Args:
-      data_dir: the local dataset directory.
-      split: "train" or "test"
-    """
-    self.split = split
-    self.image_dir = {"train": os.path.join(os.path.dirname(__file__), "images")}
-  def dataloader(self):
-    """
-    Transform food-101 dataset into clarifai proto compatible
-    format for upload.
-    Returns:
-      VisualClassificationFeatures type generator.
-    """
-    ## Your preprocessing code here
-    class_names = os.listdir(self.image_dir[self.split])
-    for class_name in class_names:
-      for image in os.listdir(os.path.join(self.image_dir[self.split], class_name)):
-        image_path = os.path.join(self.image_dir[self.split], class_name, image)
-        yield VisualClassificationFeatures(
-            image_path=image_path,
-            label=class_name,
-            id=None  # or image_id
-        )