clarifai 9.0.0__py3-none-any.whl → 9.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. clarifai/data_upload/datasets/__init__.py +0 -0
  2. clarifai/data_upload/datasets/base.py +67 -0
  3. clarifai/data_upload/datasets/features.py +45 -0
  4. clarifai/data_upload/datasets/image.py +236 -0
  5. clarifai/data_upload/datasets/text.py +62 -0
  6. clarifai/data_upload/datasets/zoo/__init__.py +0 -0
  7. clarifai/data_upload/datasets/zoo/coco_captions.py +99 -0
  8. clarifai/data_upload/datasets/zoo/coco_detection.py +129 -0
  9. clarifai/data_upload/datasets/zoo/coco_segmentation.py +158 -0
  10. clarifai/data_upload/examples.py +19 -0
  11. clarifai/data_upload/upload.py +269 -168
  12. clarifai/listing/installed_module_versions.py +3 -14
  13. clarifai/listing/lister.py +40 -0
  14. clarifai/listing/module_versions.py +42 -0
  15. clarifai/listing/modules.py +36 -0
  16. clarifai/modules/style.css +7 -4
  17. {clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/METADATA +3 -3
  18. {clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/RECORD +37 -13
  19. clarifai_utils/data_upload/datasets/__init__.py +0 -0
  20. clarifai_utils/data_upload/datasets/base.py +67 -0
  21. clarifai_utils/data_upload/datasets/features.py +45 -0
  22. clarifai_utils/data_upload/datasets/image.py +236 -0
  23. clarifai_utils/data_upload/datasets/text.py +62 -0
  24. clarifai_utils/data_upload/datasets/zoo/__init__.py +0 -0
  25. clarifai_utils/data_upload/datasets/zoo/coco_captions.py +99 -0
  26. clarifai_utils/data_upload/datasets/zoo/coco_detection.py +129 -0
  27. clarifai_utils/data_upload/datasets/zoo/coco_segmentation.py +158 -0
  28. clarifai_utils/data_upload/examples.py +19 -0
  29. clarifai_utils/data_upload/upload.py +269 -168
  30. clarifai_utils/listing/installed_module_versions.py +3 -14
  31. clarifai_utils/listing/lister.py +40 -0
  32. clarifai_utils/listing/module_versions.py +42 -0
  33. clarifai_utils/listing/modules.py +36 -0
  34. clarifai_utils/modules/style.css +7 -4
  35. {clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/LICENSE +0 -0
  36. {clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/WHEEL +0 -0
  37. {clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,62 @@
1
+ from typing import Iterator, List
2
+
3
+ from clarifai_grpc.grpc.api import resources_pb2
4
+ from google.protobuf.struct_pb2 import Struct
5
+ from tqdm import tqdm
6
+
7
+ from .base import ClarifaiDataset
8
+
9
+
10
+ class TextClassificationDataset(ClarifaiDataset):
11
+ """
12
+ Upload text classification datasets to clarifai datasets
13
+ """
14
+
15
+ def __init__(self, datagen_object: Iterator, dataset_id: str, split: str) -> None:
16
+ super().__init__(datagen_object, dataset_id, split)
17
+
18
+ def create_input_protos(self, text_input: str, labels: List[str], input_id: str, dataset_id: str,
19
+ metadata: Struct) -> resources_pb2.Input:
20
+ """
21
+ Create input protos for each text, label input pairs.
22
+ Args:
23
+ `text_input`: text string.
24
+ `labels`: text labels
25
+ `input_id: unique input id
26
+ `dataset_id`: Clarifai dataset id
27
+ `metadata`:input metadata
28
+ Returns:
29
+ An input proto representing a single row input
30
+ """
31
+ input_proto = resources_pb2.Input(
32
+ id=input_id,
33
+ dataset_ids=[dataset_id],
34
+ data=resources_pb2.Data(
35
+ text=resources_pb2.Text(raw=text_input),
36
+ concepts=[
37
+ resources_pb2.Concept(
38
+ id=f"id-{''.join(_label.split(' '))}", name=_label, value=1.)
39
+ for _label in labels
40
+ ],
41
+ metadata=metadata))
42
+
43
+ return input_proto
44
+
45
+ def _get_input_protos(self) -> Iterator:
46
+ """
47
+ Creates input protos for each data generator item.
48
+ Returns:
49
+ A list of input protos
50
+ """
51
+ for i, item in tqdm(enumerate(self.datagen_object), desc="Loading text data"):
52
+ metadata = Struct()
53
+ text = item.text
54
+ labels = item.labels if isinstance(item.labels, list) else [item.labels] # clarifai concept
55
+ input_id = f"{self.dataset_id}-{self.split}-{i}" if item.id is None else f"{self.split}-{str(item.id)}"
56
+ metadata.update({"label": labels, "split": self.split})
57
+
58
+ input_proto = self.create_input_protos(text, labels, input_id, self.dataset_id, metadata)
59
+
60
+ self._all_input_protos.append(input_proto)
61
+
62
+ return iter(self._all_input_protos)
File without changes
@@ -0,0 +1,99 @@
1
+ #! COCO 2017 image captioning dataset
2
+
3
+ import os
4
+ import zipfile
5
+ from glob import glob
6
+
7
+ import requests
8
+ from pycocotools.coco import COCO
9
+ from tqdm import tqdm
10
+
11
+ from ..features import VisualClassificationFeatures
12
+
13
+
14
+ class COCOCaptionsDataset:
15
+ """COCO 2017 Image Captioning Dataset."""
16
+
17
+ def __init__(self, split: str = "train"):
18
+ """
19
+ Initialize coco dataset.
20
+ Args:
21
+ filenames: the coco zip filenames: List[str] to be downloaded if download=True,
22
+ data_dir: the local coco dataset directory.
23
+ split: "train" or "val"
24
+ """
25
+ self.filenames = {
26
+ "train": "train2017.zip",
27
+ "val": "val2017.zip",
28
+ "annotations": "annotations_trainval2017.zip"
29
+ }
30
+ self.split = split
31
+ self.url = "http://images.cocodataset.org/zips/" # coco base image-zip url
32
+ self.data_dir = os.path.join(os.curdir, ".data") # data storage directory
33
+ self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
34
+
35
+ def coco_download(self, save_dir):
36
+ """Download coco dataset."""
37
+ if not os.path.exists(save_dir):
38
+ os.mkdir(save_dir)
39
+
40
+ #check if train, val and annotation dirs exist
41
+ #so that the coco2017 data isn't downloaded
42
+ for key, filename in self.filenames.items():
43
+ if os.path.exists(glob(f"{save_dir}/{key}*")[0]):
44
+ print("Dataset already downloded and extracted")
45
+ continue
46
+
47
+ print("-" * 80)
48
+ print(f"Downloading {filename}")
49
+ print("-" * 80)
50
+
51
+ if "annotations" in filename:
52
+ self.url = "http://images.cocodataset.org/annotations/"
53
+
54
+ response = requests.get(self.url + filename, stream=True)
55
+ response.raise_for_status()
56
+ with open(os.path.join(save_dir, filename), "wb") as _file:
57
+ for chunk in tqdm(response.iter_content(chunk_size=5124000)):
58
+ if chunk:
59
+ _file.write(chunk)
60
+ print("Data download complete...")
61
+
62
+ #extract files
63
+ zf = zipfile.ZipFile(os.path.join(save_dir, filename))
64
+ print(f" Extracting {filename} file")
65
+ zf.extractall(path=save_dir)
66
+ # Delete coco zip
67
+ print(f" Deleting {filename}")
68
+ os.remove(path=os.path.join(save_dir, filename))
69
+
70
+ def dataloader(self):
71
+ """
72
+ Transform coco image captioning data into clarifai proto compatible
73
+ format for upload.
74
+ Returns:
75
+ VisualClassificationFeatures type generator.
76
+ """
77
+ if isinstance(self.filenames, list) and len(self.filenames) == 3: #train, val, annotations
78
+ self.coco_download(self.data_dir)
79
+ self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
80
+ for i in os.listdir(self.data_dir) if "train" in i][0]
81
+ self.extracted_coco_dirs["val"] = [os.path.join(self.data_dir, i) \
82
+ for i in os.listdir(self.data_dir) if "val" in i][0]
83
+
84
+ self.extracted_coco_dirs["annotations"] = [os.path.join(self.data_dir, i) \
85
+ for i in os.listdir(self.data_dir) if "annotations" in i][0]
86
+ else:
87
+ raise Exception(f"`filenames` must be a list of atleast 3 coco zip file names; \
88
+ train, val and annotations. Found {len(self.filenames)} items instead.")
89
+
90
+ annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" + f"captions_{self.split}*")[0]
91
+ coco = COCO(annot_file)
92
+ annot_ids = coco.getAnnIds()
93
+ annotations = coco.loadAnns(annot_ids)
94
+ for annot in annotations:
95
+ image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
96
+ f"{str(annot['image_id']).zfill(12)}*")[0]
97
+ # image_captioning and image classification datasets have the same
98
+ # image-label input feature formats
99
+ yield VisualClassificationFeatures(image_path, annot["caption"], annot["image_id"])
@@ -0,0 +1,129 @@
1
+ #! COCO 2017 detection dataset
2
+
3
+ import os
4
+ import zipfile
5
+ from glob import glob
6
+
7
+ import cv2
8
+ import requests
9
+ from pycocotools.coco import COCO
10
+ from tqdm import tqdm
11
+
12
+ from ..features import VisualDetectionFeatures
13
+
14
+
15
+ class COCODetectionDataset:
16
+ """COCO 2017 Image Detection Dataset."""
17
+
18
+ def __init__(self, split: str = "train"):
19
+ """
20
+ Initialize coco dataset.
21
+ Args:
22
+ filenames: the coco zip filenames: List[str] to be downloaded if download=True,
23
+ data_dir: the local coco dataset directory.
24
+ split: "train" or "val"
25
+ """
26
+ self.filenames = {
27
+ "train": "train2017.zip",
28
+ "val": "val2017.zip",
29
+ "annotations": "annotations_trainval2017.zip"
30
+ }
31
+ self.split = split
32
+ self.url = "http://images.cocodataset.org/zips/" # coco base image-zip url
33
+ self.data_dir = os.path.join(os.curdir, ".data") # data storage directory
34
+ self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
35
+
36
+ def coco_download(self, save_dir):
37
+ """Download coco dataset."""
38
+ if not os.path.exists(save_dir):
39
+ os.mkdir(save_dir)
40
+
41
+ #check if train*, val* and annotation* dirs exist
42
+ #so that the coco2017 data isn't downloaded
43
+ for key, filename in self.filenames.items():
44
+ if os.path.exists(glob(f"{save_dir}/{key}*")[0]):
45
+ print("dataset already downloded and extracted")
46
+ continue
47
+
48
+ print("-" * 80)
49
+ print(f"Downloading {filename}")
50
+ print("-" * 80)
51
+
52
+ if "annotations" in filename:
53
+ self.url = "http://images.cocodataset.org/annotations/"
54
+
55
+ response = requests.get(self.url + filename, stream=True)
56
+ response.raise_for_status()
57
+ with open(os.path.join(save_dir, filename), "wb") as _file:
58
+ for chunk in tqdm(response.iter_content(chunk_size=5124000)):
59
+ if chunk:
60
+ _file.write(chunk)
61
+ print("Coco data download complete...")
62
+
63
+ #extract files
64
+ zf = zipfile.ZipFile(os.path.join(save_dir, filename))
65
+ print(f" Extracting {filename} file")
66
+ zf.extractall(path=save_dir)
67
+ # Delete coco zip
68
+ print(f" Deleting {filename}")
69
+ os.remove(path=os.path.join(save_dir, filename))
70
+
71
+ def dataloader(self):
72
+ """
73
+ Transform coco object detection data into clarifai proto compatible
74
+ format for upload.
75
+ Returns:
76
+ VisualDetectionFeatures type generator.
77
+ """
78
+ if isinstance(self.filenames, list) and len(self.filenames) == 3:
79
+ self.coco_download(self.data_dir)
80
+ self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
81
+ for i in os.listdir(self.data_dir) if "train" in i][0]
82
+ self.extracted_coco_dirs["val"] = [os.path.join(self.data_dir, i) \
83
+ for i in os.listdir(self.data_dir) if "val" in i][0]
84
+
85
+ self.extracted_coco_dirs["annotations"] = [os.path.join(self.data_dir, i) \
86
+ for i in os.listdir(self.data_dir) if "annotations" in i][0]
87
+ else:
88
+ raise Exception(f"`filenames` must be a list of atleast 2 coco zip file names; \
89
+ train, val and annotations. Found {len(self.filenames)} items instead.")
90
+
91
+ annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" +\
92
+ f"instances_{self.split}*")[0]
93
+ coco = COCO(annot_file)
94
+ categories = coco.loadCats(coco.getCatIds())
95
+ cat_id_map = {category["id"]: category["name"] for category in categories}
96
+ cat_img_ids = {}
97
+ for cat_id in list(cat_id_map.keys()):
98
+ cat_img_ids[cat_id] = coco.getImgIds(catIds=[cat_id])
99
+
100
+ img_ids = []
101
+ for i in list(cat_img_ids.values()):
102
+ img_ids.extend(i)
103
+
104
+ #get annotations for each image id
105
+ for _id in img_ids:
106
+ annots = [] # bboxes
107
+ class_names = []
108
+ labels = [i for i in list(filter(lambda x: _id in cat_img_ids[x], cat_img_ids))]
109
+ image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
110
+ f"{str(_id).zfill(12)}*")[0]
111
+
112
+ image_height, image_width = cv2.imread(image_path).shape[:2]
113
+ for cat_id in labels:
114
+ annot_ids = coco.getAnnIds(imgIds=_id, catIds=[cat_id])
115
+ if len(annot_ids) > 0:
116
+ img_annotations = coco.loadAnns(annot_ids)
117
+ for ann in img_annotations:
118
+ class_names.append(cat_id_map[cat_id])
119
+ x_min = ann['bbox'][0] / image_width #left_col
120
+ y_min = ann['bbox'][1] / image_height #top_row
121
+ x_max = (ann['bbox'][0] + ann['bbox'][2]) / image_width #right_col
122
+ y_max = (ann['bbox'][1] + ann['bbox'][3]) / image_height #bottom_row
123
+ annots.append([x_min, y_min, x_max, y_max])
124
+ else: # if no annotations for given image_id-cat_id pair
125
+ continue
126
+ assert len(class_names) == len(annots), f"Num classes must match num bbox annotations\
127
+ for a single image. Found {len(class_names)} classes and {len(annots)} bboxes."
128
+
129
+ yield VisualDetectionFeatures(image_path, class_names, annots, _id)
@@ -0,0 +1,158 @@
1
+ #! COCO 2017 Image Segmentation dataset
2
+
3
+ import gc
4
+ import os
5
+ import zipfile
6
+ from functools import reduce
7
+ from glob import glob
8
+
9
+ import cv2
10
+ import numpy as np
11
+ import requests
12
+ from pycocotools import mask as maskUtils
13
+ from pycocotools.coco import COCO
14
+ from tqdm import tqdm
15
+
16
+ from ..features import VisualSegmentationFeatures
17
+
18
+
19
+ class COCOSegmentationDataset:
20
+ """COCO 2017 Image Segmentation Dataset."""
21
+
22
+ def __init__(self, split: str = "train"):
23
+ """
24
+ Initialize coco dataset.
25
+ Args:
26
+ filenames: the coco zip filenames: List[str] to be downloaded if download=True,
27
+ data_dir: the local coco dataset directory
28
+ split: "train" or "val"
29
+ """
30
+ self.filenames = {
31
+ "train": "train2017.zip",
32
+ "val": "val2017.zip",
33
+ "annotations": "annotations_trainval2017.zip"
34
+ }
35
+ self.split = split
36
+ self.url = "http://images.cocodataset.org/zips/" # coco base image-zip url
37
+ self.data_dir = os.path.join(os.curdir, ".data") # data storage dir
38
+ self.extracted_coco_dirs = {"train": None, "val": None, "annotations": None}
39
+
40
+ def coco_download(self, save_dir):
41
+ """Download coco dataset."""
42
+ if not os.path.exists(save_dir):
43
+ os.mkdir(save_dir)
44
+
45
+ #check if train, val and annotation dirs exist
46
+ #so that the coco2017 data isn't downloaded
47
+ for key, filename in self.filenames.items():
48
+ if os.path.exists(glob(f"{save_dir}/{key}*")[0]):
49
+ print("dataset already downloded and extracted")
50
+ continue
51
+
52
+ print("-" * 80)
53
+ print(f"Downloading {filename}")
54
+ print("-" * 80)
55
+
56
+ if "annotations" in filename:
57
+ self.url = "http://images.cocodataset.org/annotations/"
58
+
59
+ response = requests.get(self.url + filename, stream=True)
60
+ response.raise_for_status()
61
+ with open(os.path.join(save_dir, filename), "wb") as _file:
62
+ for chunk in tqdm(response.iter_content(chunk_size=5124000)):
63
+ if chunk:
64
+ _file.write(chunk)
65
+ print("Coco data download complete...")
66
+
67
+ #extract files
68
+ zf = zipfile.ZipFile(os.path.join(save_dir, filename))
69
+ print(f" Extracting {filename} file")
70
+ zf.extractall(path=save_dir)
71
+ # Delete coco zip
72
+ print(f" Deleting {filename}")
73
+ os.remove(path=os.path.join(save_dir, filename))
74
+
75
+ def dataloader(self):
76
+ """
77
+ Transform coco data into clarifai proto compatible format for upload.
78
+ Returns:
79
+ VisualSegmentationFeatures type generator.
80
+ """
81
+ if isinstance(self.filenames, list) and len(self.filenames) == 3:
82
+ self.coco_download(self.data_dir)
83
+ self.extracted_coco_dirs["train"] = [os.path.join(self.data_dir, i) \
84
+ for i in os.listdir(self.data_dir) if "train" in i][0]
85
+ self.extracted_coco_dirs["val"] = [os.path.join(self.data_dir, i) \
86
+ for i in os.listdir(self.data_dir) if "val" in i][0]
87
+
88
+ self.extracted_coco_dirs["annotations"] = [os.path.join(self.data_dir, i) \
89
+ for i in os.listdir(self.data_dir) if "annotations" in i][0]
90
+ else:
91
+ raise Exception(f"`filenames` must be a list of atleast 3 coco zip file names; \
92
+ train, val and annotations. Found {len(self.filenames)} items instead.")
93
+
94
+ annot_file = glob(self.extracted_coco_dirs["annotations"] + "/" +\
95
+ f"instances_{self.split}*")[0]
96
+ coco = COCO(annot_file)
97
+ categories = coco.loadCats(coco.getCatIds())
98
+ cat_id_map = {category["id"]: category["name"] for category in categories}
99
+ cat_img_ids = {}
100
+ for cat_id in list(cat_id_map.keys()):
101
+ cat_img_ids[cat_id] = coco.getImgIds(catIds=[cat_id])
102
+
103
+ img_ids = []
104
+ for i in list(cat_img_ids.values()):
105
+ img_ids.extend(i)
106
+
107
+ #get annotations for each image id
108
+ for _id in img_ids:
109
+ annots = [] # polygons
110
+ class_names = []
111
+ labels = [i for i in list(filter(lambda x: _id in cat_img_ids[x], cat_img_ids))]
112
+ image_path = glob(self.extracted_coco_dirs[self.split]+"/"+\
113
+ f"{str(_id).zfill(12)}*")[0]
114
+
115
+ image_height, image_width = cv2.imread(image_path).shape[:2]
116
+ for cat_id in labels:
117
+ annot_ids = coco.getAnnIds(imgIds=_id, catIds=[cat_id])
118
+ if len(annot_ids) > 0:
119
+ img_annotations = coco.loadAnns(annot_ids)
120
+ for ann in img_annotations:
121
+ class_names.append(cat_id_map[cat_id])
122
+ # get polygons
123
+ if type(ann['segmentation']) == list:
124
+ for seg in ann['segmentation']:
125
+ poly = np.array(seg).reshape((int(len(seg) / 2), 2))
126
+ poly[:, 0], poly[:, 1] = poly[:, 0] / image_width, poly[:, 1] / image_height
127
+ annots.append(poly.tolist()) #[[x=col, y=row],...]
128
+ else: # seg: {"counts":[...]}
129
+ if type(ann['segmentation']['counts']) == list:
130
+ rle = maskUtils.frPyObjects([ann['segmentation']], image_height, image_width)
131
+ else:
132
+ rle = ann['segmentation']
133
+ mask = maskUtils.decode(rle) #binary mask
134
+ #convert mask to polygons and add to annots
135
+ contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
136
+ polygons = []
137
+ for cont in contours:
138
+ if cont.size >= 6:
139
+ polygons.append(cont.astype(float).flatten().tolist())
140
+ # store polygons in (x,y) pairs
141
+ polygons_flattened = reduce(lambda x, y: x + y, polygons)
142
+ del polygons
143
+ del contours
144
+ del mask
145
+ gc.collect()
146
+
147
+ polygons = np.array(polygons_flattened).reshape((int(len(polygons_flattened) / 2),
148
+ 2))
149
+ polygons[:, 0] = polygons[:, 0] / image_width
150
+ polygons[:, 1] = polygons[:, 1] / image_height
151
+
152
+ annots.append(polygons.tolist()) #[[x=col, y=row],...,[x=col, y=row]]
153
+ else: # if no annotations for given image_id-cat_id pair
154
+ continue
155
+ assert len(class_names) == len(annots), f"Num classes must match num annotations\
156
+ for a single image. Found {len(class_names)} classes and {len(annots)} polygons."
157
+
158
+ yield VisualSegmentationFeatures(image_path, class_names, annots, _id)
@@ -0,0 +1,19 @@
1
+ #! Execute dataset upload using the `from_module` upload feature
2
+
3
+ from clarifai.data_upload.upload import UploadConfig
4
+
5
+ text_upload_obj = UploadConfig(
6
+ user_id="",
7
+ app_id="",
8
+ pat="",
9
+ dataset_id="",
10
+ task="visual_clf",
11
+ from_module="./examples/image_classification/cifar10",
12
+ split="train",
13
+ portal="clarifai" #clarifai(prod), dev or staging
14
+ )
15
+ ## change the task and from_module arguments in UploadConfig() to upload
16
+ ## example food-101 dataset
17
+
18
+ if __name__ == "__main__":
19
+ text_upload_obj.upload_to_clarifai()