clarifai 9.0.0__py3-none-any.whl → 9.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/data_upload/datasets/__init__.py +0 -0
- clarifai/data_upload/datasets/base.py +67 -0
- clarifai/data_upload/datasets/features.py +45 -0
- clarifai/data_upload/datasets/image.py +236 -0
- clarifai/data_upload/datasets/text.py +62 -0
- clarifai/data_upload/datasets/zoo/__init__.py +0 -0
- clarifai/data_upload/datasets/zoo/coco_captions.py +99 -0
- clarifai/data_upload/datasets/zoo/coco_detection.py +129 -0
- clarifai/data_upload/datasets/zoo/coco_segmentation.py +158 -0
- clarifai/data_upload/examples.py +19 -0
- clarifai/data_upload/upload.py +269 -168
- clarifai/listing/installed_module_versions.py +3 -14
- clarifai/listing/lister.py +40 -0
- clarifai/listing/module_versions.py +42 -0
- clarifai/listing/modules.py +36 -0
- clarifai/modules/style.css +7 -4
- {clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/METADATA +3 -3
- {clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/RECORD +37 -13
- clarifai_utils/data_upload/datasets/__init__.py +0 -0
- clarifai_utils/data_upload/datasets/base.py +67 -0
- clarifai_utils/data_upload/datasets/features.py +45 -0
- clarifai_utils/data_upload/datasets/image.py +236 -0
- clarifai_utils/data_upload/datasets/text.py +62 -0
- clarifai_utils/data_upload/datasets/zoo/__init__.py +0 -0
- clarifai_utils/data_upload/datasets/zoo/coco_captions.py +99 -0
- clarifai_utils/data_upload/datasets/zoo/coco_detection.py +129 -0
- clarifai_utils/data_upload/datasets/zoo/coco_segmentation.py +158 -0
- clarifai_utils/data_upload/examples.py +19 -0
- clarifai_utils/data_upload/upload.py +269 -168
- clarifai_utils/listing/installed_module_versions.py +3 -14
- clarifai_utils/listing/lister.py +40 -0
- clarifai_utils/listing/module_versions.py +42 -0
- clarifai_utils/listing/modules.py +36 -0
- clarifai_utils/modules/style.css +7 -4
- {clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/LICENSE +0 -0
- {clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/WHEEL +0 -0
- {clarifai-9.0.0.dist-info → clarifai-9.3.1.dist-info}/top_level.txt +0 -0
|
@@ -1,201 +1,302 @@
|
|
|
1
1
|
#! Clarifai data upload
|
|
2
2
|
|
|
3
|
+
import importlib
|
|
4
|
+
import inspect
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
3
7
|
import time
|
|
4
8
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
9
|
from multiprocessing import cpu_count
|
|
10
|
+
from typing import Iterator, Optional, Tuple, Union
|
|
6
11
|
|
|
7
|
-
from
|
|
8
|
-
from clarifai_grpc.grpc.api import resources_pb2, service_pb2
|
|
12
|
+
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
|
|
9
13
|
from clarifai_grpc.grpc.api.status import status_code_pb2
|
|
10
|
-
from datasets import (ImageClassificationDataset, TextClassificationDataset,
|
|
11
|
-
VisualDetectionDataset, VisualSegmentationDataset)
|
|
12
|
-
from omegaconf import OmegaConf
|
|
13
14
|
from tqdm import tqdm
|
|
14
15
|
|
|
15
16
|
from clarifai.client import create_stub
|
|
17
|
+
from clarifai.data_upload.datasets.base import Chunker
|
|
18
|
+
from clarifai.data_upload.datasets.image import (VisualClassificationDataset,
|
|
19
|
+
VisualDetectionDataset, VisualSegmentationDataset)
|
|
20
|
+
from clarifai.data_upload.datasets.text import TextClassificationDataset
|
|
16
21
|
|
|
17
22
|
|
|
18
|
-
def
|
|
23
|
+
def load_dataset(module_dir: Union[str, os.PathLike], split: str) -> Iterator:
|
|
19
24
|
"""
|
|
20
|
-
|
|
25
|
+
Validate and import dataset module data generator.
|
|
21
26
|
Args:
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
27
|
+
`module_dir`: relative path to the module directory
|
|
28
|
+
The directory must contain a `dataset.py` script and the data itself.
|
|
29
|
+
`split`: "train" or "val"/"test" dataset split
|
|
30
|
+
Module Directory Structure:
|
|
31
|
+
---------------------------
|
|
32
|
+
<folder_name>/
|
|
33
|
+
├──__init__.py
|
|
34
|
+
├──<Your local dir dataset>/
|
|
35
|
+
└──dataset.py
|
|
36
|
+
dataset.py must implement a class named following the convention,
|
|
37
|
+
<dataset_name>Dataset and this class must have a dataloader()
|
|
38
|
+
generator method
|
|
25
39
|
"""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
if
|
|
38
|
-
|
|
39
|
-
print(f"Post inputs failed, status:\n{response.inputs[0].status.details}\n")
|
|
40
|
-
except:
|
|
41
|
-
print(f"Post inputs failed, status:\n{response.status.details}\n")
|
|
42
|
-
retry_upload.append(inp_proto)
|
|
40
|
+
sys.path.append(str(module_dir))
|
|
41
|
+
|
|
42
|
+
if not os.path.exists(os.path.join(module_dir, "__init__.py")):
|
|
43
|
+
with open(os.path.join(module_dir, "__init__.py"), "w"):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
import dataset # dataset module
|
|
47
|
+
|
|
48
|
+
# get main module class
|
|
49
|
+
main_module_cls = None
|
|
50
|
+
for name, obj in dataset.__dict__.items():
|
|
51
|
+
if inspect.isclass(obj) and "Dataset" in name:
|
|
52
|
+
main_module_cls = obj
|
|
43
53
|
else:
|
|
44
|
-
|
|
54
|
+
continue
|
|
45
55
|
|
|
46
|
-
return
|
|
56
|
+
return main_module_cls(split).dataloader()
|
|
47
57
|
|
|
48
58
|
|
|
49
|
-
def
|
|
59
|
+
def load_zoo_dataset(name: str, split: str) -> Iterator:
|
|
50
60
|
"""
|
|
51
|
-
|
|
61
|
+
Get dataset generator object from dataset zoo.
|
|
62
|
+
Args:
|
|
63
|
+
`name`: dataset module name in datasets/zoo/.
|
|
64
|
+
`split`: "train" or "val"/"test" dataset split
|
|
65
|
+
Returns:
|
|
66
|
+
Data generator object
|
|
52
67
|
"""
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
for annot_proto in inputs:
|
|
61
|
-
response = STUB.PostAnnotations(
|
|
62
|
-
service_pb2.PostAnnotationsRequest(user_app_id=USER_APP_ID, annotations=[annot_proto]),)
|
|
63
|
-
|
|
64
|
-
if response.status.code != status_code_pb2.SUCCESS:
|
|
65
|
-
try:
|
|
66
|
-
print(f"Post annotations failed, status:\n{response.annotations[0].status.details}\n")
|
|
67
|
-
except:
|
|
68
|
-
print(f"Post annotations failed, status:\n{response.status.details}\n")
|
|
69
|
-
retry_upload.append(annot_proto)
|
|
68
|
+
zoo_dataset = importlib.import_module(f"datasets.zoo.{name}")
|
|
69
|
+
# get main module class
|
|
70
|
+
main_module_cls = None
|
|
71
|
+
for name, obj in zoo_dataset.__dict__.items():
|
|
72
|
+
if inspect.isclass(obj) and "Dataset" in name:
|
|
73
|
+
main_module_cls = obj
|
|
70
74
|
else:
|
|
71
|
-
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
return main_module_cls(split).dataloader()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class UploadConfig:
|
|
81
|
+
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
user_id: str,
|
|
85
|
+
app_id: str,
|
|
86
|
+
pat: str,
|
|
87
|
+
dataset_id: str,
|
|
88
|
+
task: str,
|
|
89
|
+
from_module: Optional[Union[str, os.PathLike]] = None,
|
|
90
|
+
from_zoo: Optional[str] = None, # load dataset from zoo
|
|
91
|
+
split: str = "train", # train or test/val
|
|
92
|
+
chunk_size: int = 16,
|
|
93
|
+
portal: str = "clarifai"):
|
|
94
|
+
"""
|
|
95
|
+
Initialize upload configs.
|
|
96
|
+
Args:
|
|
97
|
+
`user_id`: Clarifai user id.
|
|
98
|
+
`app_id`: Clarifai app id.
|
|
99
|
+
`pat`: Clarifai PAT(Personal Access Token).
|
|
100
|
+
`dataset_id`: Clarifai dataset id (where data is to be uploaded).
|
|
101
|
+
`task`: either of `visual_clf`, `visual_detection`, `visual_segmentation` or `text_clf`.
|
|
102
|
+
`from_module`: Path to dataset module directory.
|
|
103
|
+
Should be left as None if `from_zoo` is to be used.
|
|
104
|
+
`from_zoo`: Name of dataset to upload from the zoo.
|
|
105
|
+
The name must match the dataset module name excluding the file extension.
|
|
106
|
+
Should be left as None if `from_module` is to be used.
|
|
107
|
+
`split`: Dataset split to upload. Either of train or test/val
|
|
108
|
+
`chunk_size`: size of chunks for parallel data upload.
|
|
109
|
+
"""
|
|
110
|
+
self.USER_ID = user_id
|
|
111
|
+
self.APP_ID = app_id
|
|
112
|
+
self.PAT = pat
|
|
113
|
+
self.dataset_id = dataset_id
|
|
114
|
+
self.task = task
|
|
115
|
+
self.module_dir = from_module
|
|
116
|
+
self.zoo_dataset = from_zoo
|
|
117
|
+
self.split = split
|
|
118
|
+
self.chunk_size = chunk_size
|
|
119
|
+
self.num_workers: int = cpu_count()
|
|
120
|
+
self.__base: str = ""
|
|
121
|
+
if portal == "dev":
|
|
122
|
+
self.__base = "https://api-dev.clarifai.com"
|
|
123
|
+
elif portal == "staging":
|
|
124
|
+
self.__base = "https://api-staging.clarifai.com"
|
|
125
|
+
else: #prod
|
|
126
|
+
self.__base = "https://api.clarifai.com"
|
|
127
|
+
|
|
128
|
+
# Set auth vars as env variables
|
|
129
|
+
os.environ["CLARIFAI_USER_ID"] = self.USER_ID
|
|
130
|
+
os.environ["CLARIFAI_APP_ID"] = self.APP_ID
|
|
131
|
+
os.environ["CLARIFAI_API_BASE"] = self.__base
|
|
132
|
+
os.environ["CLARIFAI_PAT"] = self.PAT
|
|
133
|
+
|
|
134
|
+
self.STUB: service_pb2_grpc.V2Stub = create_stub()
|
|
135
|
+
self.metadata: Tuple = (('authorization', 'Key ' + self.PAT),)
|
|
136
|
+
self.user_app_id = resources_pb2.UserAppIDSet(user_id=self.USER_ID, app_id=self.APP_ID)
|
|
137
|
+
|
|
138
|
+
def _upload_inputs(self, inputs):
|
|
139
|
+
"""
|
|
140
|
+
Upload inputs to clarifai platform dataset.
|
|
141
|
+
Args:
|
|
142
|
+
inputs: input protos
|
|
143
|
+
"""
|
|
144
|
+
upload_count = 0
|
|
145
|
+
retry_upload = [] # those that fail to upload are stored for retries
|
|
146
|
+
|
|
147
|
+
for inp_proto in inputs:
|
|
148
|
+
response = self.STUB.PostInputs(
|
|
149
|
+
service_pb2.PostInputsRequest(user_app_id=self.user_app_id, inputs=[inp_proto]),)
|
|
150
|
+
|
|
151
|
+
MESSAGE_DUPLICATE_ID = "Input has a duplicate ID."
|
|
152
|
+
if response.status.code != status_code_pb2.SUCCESS:
|
|
153
|
+
try:
|
|
154
|
+
if response.inputs[0].status.details != MESSAGE_DUPLICATE_ID:
|
|
155
|
+
retry_upload.append(inp_proto)
|
|
156
|
+
print(f"Post inputs failed, status: {response.inputs[0].status.details}\n")
|
|
157
|
+
continue
|
|
158
|
+
except:
|
|
159
|
+
print(f"Post inputs failed, status: {response.status.details}\n")
|
|
160
|
+
else:
|
|
161
|
+
upload_count += 1
|
|
162
|
+
|
|
163
|
+
return retry_upload
|
|
164
|
+
|
|
165
|
+
def upload_annotations(self, inputs):
|
|
166
|
+
"""
|
|
167
|
+
Upload image annotations to clarifai detection dataset
|
|
168
|
+
"""
|
|
169
|
+
upload_count = 0
|
|
170
|
+
retry_upload = [] # those that fail to upload are stored for retries
|
|
171
|
+
|
|
172
|
+
for annot_proto in inputs:
|
|
173
|
+
response = self.STUB.PostAnnotations(
|
|
174
|
+
service_pb2.PostAnnotationsRequest(
|
|
175
|
+
user_app_id=self.user_app_id, annotations=[annot_proto]),)
|
|
176
|
+
|
|
177
|
+
if response.status.code != status_code_pb2.SUCCESS:
|
|
178
|
+
try:
|
|
179
|
+
print(f"Post annotations failed, status:\n{response.annotations[0].status.details}\n")
|
|
180
|
+
continue
|
|
181
|
+
except:
|
|
182
|
+
print(f"Post annotations failed, status:\n{response.status.details}\n")
|
|
183
|
+
retry_upload.append(annot_proto)
|
|
184
|
+
else:
|
|
185
|
+
upload_count += 1
|
|
186
|
+
|
|
187
|
+
return retry_upload
|
|
188
|
+
|
|
189
|
+
def concurrent_inp_upload(self, inputs, chunks):
|
|
190
|
+
"""
|
|
191
|
+
Upload images concurrently.
|
|
192
|
+
"""
|
|
193
|
+
inp_threads = []
|
|
194
|
+
retry_upload = []
|
|
195
|
+
|
|
196
|
+
with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
|
|
197
|
+
for inp_batch in tqdm(inputs, total=chunks + 1, desc="uploading inputs..."):
|
|
198
|
+
inp_threads.append(executor.submit(self._upload_inputs, inp_batch))
|
|
199
|
+
time.sleep(0.1)
|
|
200
|
+
|
|
201
|
+
for job in tqdm(
|
|
202
|
+
as_completed(inp_threads), total=chunks + 1, desc="retry uploading failed protos..."):
|
|
203
|
+
if job.result():
|
|
204
|
+
retry_upload.extend(job.result())
|
|
205
|
+
|
|
206
|
+
if len(
|
|
207
|
+
list(retry_upload)) > 0: ## TODO: use api_with_retries functionality via upload_inputs()
|
|
208
|
+
_ = self._upload_inputs(retry_upload)
|
|
209
|
+
|
|
210
|
+
def concurrent_annot_upload(self, inputs, chunks):
|
|
211
|
+
"""
|
|
212
|
+
Uploads annotations concurrently.
|
|
213
|
+
"""
|
|
214
|
+
annot_threads = []
|
|
215
|
+
retry_annot_upload = []
|
|
216
|
+
|
|
217
|
+
with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
|
|
218
|
+
for annot_batch in tqdm(inputs, total=chunks + 1, desc="uploading..."):
|
|
219
|
+
annot_threads.append(executor.submit(self.upload_annotations, annot_batch))
|
|
220
|
+
time.sleep(0.2)
|
|
221
|
+
|
|
222
|
+
for job in tqdm(
|
|
223
|
+
as_completed(annot_threads), total=chunks + 1, desc="retry uploading failed protos..."):
|
|
224
|
+
if job.result():
|
|
225
|
+
retry_annot_upload.extend(job.result())
|
|
226
|
+
if len(retry_annot_upload) > 0:
|
|
227
|
+
## TODO: use api_with_retries functionality via upload_annotations()
|
|
228
|
+
_ = self.upload_annotations(retry_annot_upload)
|
|
229
|
+
|
|
230
|
+
def upload_to_clarifai(self):
|
|
231
|
+
"""
|
|
232
|
+
Execute data upload.
|
|
233
|
+
"""
|
|
234
|
+
datagen_object = None
|
|
235
|
+
if self.module_dir is None and self.zoo_dataset is None:
|
|
236
|
+
raise Exception("One of `from_module` and `from_zoo` must be \
|
|
237
|
+
specified. Both can't be None or defined at the same time.")
|
|
238
|
+
elif self.module_dir is not None and self.zoo_dataset is not None:
|
|
239
|
+
raise Exception("Use either of `from_module` or `from_zoo` \
|
|
240
|
+
but NOT both.")
|
|
241
|
+
elif self.module_dir is not None:
|
|
242
|
+
datagen_object = load_dataset(self.module_dir, self.split)
|
|
243
|
+
else:
|
|
244
|
+
datagen_object = load_zoo_dataset(self.zoo_dataset, self.split)
|
|
72
245
|
|
|
73
|
-
|
|
246
|
+
if self.task == "text_clf":
|
|
247
|
+
dataset_obj = TextClassificationDataset(datagen_object, self.dataset_id, self.split)
|
|
248
|
+
text_protos = dataset_obj._get_input_protos()
|
|
249
|
+
text_protos = dataset_obj._to_list(text_protos)
|
|
74
250
|
|
|
251
|
+
# Upload text
|
|
252
|
+
chunks = len(text_protos) // self.num_workers
|
|
253
|
+
chunked_text_protos = Chunker(text_protos, self.chunk_size).chunk()
|
|
75
254
|
|
|
76
|
-
|
|
77
|
-
"""
|
|
78
|
-
Upload images concurrently for efficiency.
|
|
79
|
-
"""
|
|
80
|
-
inp_threads = []
|
|
81
|
-
retry_upload = []
|
|
255
|
+
self.concurrent_inp_upload(chunked_text_protos, chunks)
|
|
82
256
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
257
|
+
elif self.task == "visual_detection":
|
|
258
|
+
dataset_obj = VisualDetectionDataset(datagen_object, self.dataset_id, self.split)
|
|
259
|
+
img_protos, annotation_protos = dataset_obj._get_input_protos()
|
|
260
|
+
img_protos = dataset_obj._to_list(img_protos)
|
|
87
261
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
retry_upload.extend(job.result())
|
|
92
|
-
if len(list(retry_upload)) > 0: ## TODO: use api_with_retries functionality via upload_data()
|
|
93
|
-
_ = upload_data(config, retry_upload, stub)
|
|
262
|
+
# Upload images
|
|
263
|
+
chunks = len(img_protos) // self.num_workers
|
|
264
|
+
chunked_img_protos = Chunker(img_protos, self.chunk_size).chunk()
|
|
94
265
|
|
|
266
|
+
self.concurrent_inp_upload(chunked_img_protos, chunks)
|
|
95
267
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
retry_annot_upload = []
|
|
268
|
+
# Upload annotations:
|
|
269
|
+
print("Uploading annotations.......")
|
|
270
|
+
annotation_protos = dataset_obj._to_list(annotation_protos)
|
|
271
|
+
chunks_ = len(annotation_protos) // self.num_workers
|
|
272
|
+
chunked_annot_protos = Chunker(annotation_protos, self.chunk_size).chunk()
|
|
102
273
|
|
|
103
|
-
|
|
104
|
-
for annot_batch in tqdm(inputs, total=chunks + 1, desc="uploading..."):
|
|
105
|
-
annot_threads.append(executor.submit(upload_annotations, config, annot_batch, stub))
|
|
106
|
-
time.sleep(0.2)
|
|
274
|
+
self.concurrent_annot_upload(chunked_annot_protos, chunks_)
|
|
107
275
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
## TODO: use api_with_retries functionality via upload_annotations()
|
|
114
|
-
_ = upload_annotations(config, retry_annot_upload, stub)
|
|
276
|
+
elif self.task == "visual_segmentation":
|
|
277
|
+
dataset_obj = VisualSegmentationDataset(datagen_object, self.dataset_id, self.split)
|
|
278
|
+
img_protos, mask_protos = dataset_obj._get_input_protos()
|
|
279
|
+
img_protos = dataset_obj._to_list(img_protos)
|
|
280
|
+
mask_protos = dataset_obj._to_list(mask_protos)
|
|
115
281
|
|
|
282
|
+
# Upload images
|
|
283
|
+
chunks = len(img_protos) // self.num_workers
|
|
284
|
+
chunked_img_protos = Chunker(img_protos, self.chunk_size).chunk()
|
|
116
285
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
# Upload text
|
|
135
|
-
chunks = len(text_protos) // workers
|
|
136
|
-
chunked_text_protos = Chunker(text_protos, config["chunk_size"]).chunk()
|
|
137
|
-
|
|
138
|
-
concurrent_inp_upload(config, chunked_text_protos, workers, chunks, STUB)
|
|
139
|
-
|
|
140
|
-
elif task == "visual_det":
|
|
141
|
-
dataset_obj = VisualDetectionDataset(
|
|
142
|
-
config.data["visual_det_image_dir"],
|
|
143
|
-
config.data["visual_det_labels_dir"],
|
|
144
|
-
config.data["dataset_id"],
|
|
145
|
-
config["split"],
|
|
146
|
-
labels_from_text_file=False)
|
|
147
|
-
img_protos, annotation_protos = dataset_obj._get_input_protos()
|
|
148
|
-
img_protos = dataset_obj.to_list(img_protos)
|
|
149
|
-
|
|
150
|
-
# Upload images
|
|
151
|
-
chunks = len(img_protos) // workers
|
|
152
|
-
chunked_img_protos = Chunker(img_protos, config["chunk_size"]).chunk()
|
|
153
|
-
|
|
154
|
-
concurrent_inp_upload(config, chunked_img_protos, workers, chunks, STUB)
|
|
155
|
-
|
|
156
|
-
# Upload annotations:
|
|
157
|
-
print("Uploading annotations.......")
|
|
158
|
-
annotation_protos = dataset_obj.to_list(annotation_protos)
|
|
159
|
-
chunks_ = len(annotation_protos) // workers
|
|
160
|
-
chunked_annot_protos = Chunker(annotation_protos, config["chunk_size"]).chunk()
|
|
161
|
-
|
|
162
|
-
concurrent_annot_upload(config, chunked_annot_protos, workers, chunks_, STUB)
|
|
163
|
-
|
|
164
|
-
elif task == "visual_seg":
|
|
165
|
-
dataset_obj = VisualSegmentationDataset(config.data["visual_seg_image_dir"],
|
|
166
|
-
config.data["visual_seg_masks_dir"],
|
|
167
|
-
config.data["dataset_id"], config["split"])
|
|
168
|
-
img_protos, mask_protos = dataset_obj._get_input_protos()
|
|
169
|
-
img_protos = dataset_obj.to_list(img_protos)
|
|
170
|
-
mask_protos = dataset_obj.to_list(mask_protos)
|
|
171
|
-
|
|
172
|
-
# Upload images
|
|
173
|
-
chunks = len(img_protos) // workers
|
|
174
|
-
chunked_img_protos = Chunker(img_protos, config["chunk_size"]).chunk()
|
|
175
|
-
|
|
176
|
-
concurrent_inp_upload(config, chunked_img_protos, workers, chunks, STUB)
|
|
177
|
-
|
|
178
|
-
# Upload masks:
|
|
179
|
-
print("Uploading masks.......")
|
|
180
|
-
chunks_ = len(mask_protos) // workers
|
|
181
|
-
chunked_mask_protos = Chunker(mask_protos, config["chunk_size"]).chunk()
|
|
182
|
-
|
|
183
|
-
concurrent_annot_upload(config, chunked_mask_protos, workers, chunks_, STUB)
|
|
184
|
-
|
|
185
|
-
else:
|
|
186
|
-
dataset_obj = ImageClassificationDataset(config.data["clf_image_dir"],
|
|
187
|
-
config.data["dataset_id"], config["split"])
|
|
188
|
-
img_protos = dataset_obj._get_input_protos()
|
|
189
|
-
img_protos = dataset_obj.to_list(img_protos)
|
|
190
|
-
|
|
191
|
-
# Upload images
|
|
192
|
-
chunks = len(img_protos) // workers
|
|
193
|
-
chunked_img_protos = Chunker(img_protos, config["chunk_size"]).chunk()
|
|
194
|
-
|
|
195
|
-
concurrent_inp_upload(config, chunked_img_protos, workers, chunks, STUB)
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
if __name__ == "__main__":
|
|
199
|
-
yaml_path = "./config.yaml"
|
|
200
|
-
config = OmegaConf.load(yaml_path)
|
|
201
|
-
upload_to_clarifai(config, task=config["task"])
|
|
286
|
+
#self.concurrent_inp_upload(chunked_img_protos, chunks)
|
|
287
|
+
# Upload masks:
|
|
288
|
+
print("Uploading masks.......")
|
|
289
|
+
chunks_ = len(mask_protos) // self.num_workers
|
|
290
|
+
chunked_mask_protos = Chunker(mask_protos, self.chunk_size).chunk()
|
|
291
|
+
|
|
292
|
+
self.concurrent_annot_upload(chunked_mask_protos, chunks_)
|
|
293
|
+
else: # visual-classification & visual-captioning
|
|
294
|
+
dataset_obj = VisualClassificationDataset(datagen_object, self.dataset_id, self.split)
|
|
295
|
+
img_protos = dataset_obj._get_input_protos()
|
|
296
|
+
img_protos = dataset_obj._to_list(img_protos)
|
|
297
|
+
|
|
298
|
+
# Upload images
|
|
299
|
+
chunks = len(img_protos) // self.num_workers
|
|
300
|
+
chunked_img_protos = Chunker(img_protos, self.chunk_size).chunk()
|
|
301
|
+
|
|
302
|
+
self.concurrent_inp_upload(chunked_img_protos, chunks)
|
|
@@ -24,12 +24,6 @@ def installed_module_versions_generator(stub: V2Stub,
|
|
|
24
24
|
|
|
25
25
|
imv_success_status = {status_code_pb2.SUCCESS}
|
|
26
26
|
|
|
27
|
-
# HACK(zeiler): this is the number of default installed module versions every app has.
|
|
28
|
-
# so with pagination
|
|
29
|
-
seen = {
|
|
30
|
-
"module_manager_install": False,
|
|
31
|
-
}
|
|
32
|
-
|
|
33
27
|
page = 1
|
|
34
28
|
while True:
|
|
35
29
|
response = stub.ListInstalledModuleVersions(
|
|
@@ -39,13 +33,8 @@ def installed_module_versions_generator(stub: V2Stub,
|
|
|
39
33
|
if response.status.code not in imv_success_status:
|
|
40
34
|
raise Exception("ListInstalledModuleVersions failed with response %r" % response)
|
|
41
35
|
for item in response.installed_module_versions:
|
|
42
|
-
|
|
43
|
-
if not seen[item.id]: # yield it once.
|
|
44
|
-
seen[item.id] = True
|
|
45
|
-
yield item
|
|
46
|
-
else:
|
|
47
|
-
yield item
|
|
36
|
+
yield item
|
|
48
37
|
page += 1
|
|
49
|
-
# if we don't get a full page back
|
|
50
|
-
if len(response.installed_module_versions) < page_size
|
|
38
|
+
# if we don't get a full page back we know we're done.
|
|
39
|
+
if len(response.installed_module_versions) < page_size:
|
|
51
40
|
break
|
clarifai_utils/listing/lister.py
CHANGED
|
@@ -6,6 +6,8 @@ from clarifai.listing.datasets import datasets_generator
|
|
|
6
6
|
from clarifai.listing.inputs import dataset_inputs_generator, inputs_generator
|
|
7
7
|
from clarifai.listing.installed_module_versions import installed_module_versions_generator
|
|
8
8
|
from clarifai.listing.models import models_generator
|
|
9
|
+
from clarifai.listing.module_versions import module_versions_generator
|
|
10
|
+
from clarifai.listing.modules import modules_generator
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
class ClarifaiResourceLister(object):
|
|
@@ -158,3 +160,41 @@ class ClarifaiResourceLister(object):
|
|
|
158
160
|
"""
|
|
159
161
|
page_size = self.default_page_size if page_size is None else page_size
|
|
160
162
|
return installed_module_versions_generator(self.stub, self.user_id, self.app_id, page_size)
|
|
163
|
+
|
|
164
|
+
def list_all_modules(self, page_size: int = None):
|
|
165
|
+
"""
|
|
166
|
+
This lists all the modules in an app. Not recommended for large apps.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
modules: a list of Module protos for all the modules in the app.
|
|
170
|
+
"""
|
|
171
|
+
return [item for item in self.module_generator(page_size)]
|
|
172
|
+
|
|
173
|
+
def module_generator(self, page_size: int = None):
|
|
174
|
+
"""
|
|
175
|
+
This lists all the module in an app. Not recommended for large apps.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
gen: a generator that yields a single Module proto at a time.
|
|
179
|
+
"""
|
|
180
|
+
page_size = self.default_page_size if page_size is None else page_size
|
|
181
|
+
return modules_generator(self.stub, self.user_id, self.app_id, page_size)
|
|
182
|
+
|
|
183
|
+
def list_all_module_versions(self, page_size: int = None):
|
|
184
|
+
"""
|
|
185
|
+
This lists all the module_versions in an app. Not recommended for large apps.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
module_versions: a list of ModuleVersion protos for all the module_versions in the app.
|
|
189
|
+
"""
|
|
190
|
+
return [item for item in self.module_versions_generator(page_size)]
|
|
191
|
+
|
|
192
|
+
def module_versions_generator(self, page_size: int = None):
|
|
193
|
+
"""
|
|
194
|
+
This lists all the module_versions in an app. Not recommended for large apps.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
gen: a generator that yields a single ModuleVersion proto at a time.
|
|
198
|
+
"""
|
|
199
|
+
page_size = self.default_page_size if page_size is None else page_size
|
|
200
|
+
return module_versions_generator(self.stub, self.user_id, self.app_id, page_size)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from clarifai_grpc.grpc.api import resources_pb2, service_pb2
|
|
2
|
+
from clarifai_grpc.grpc.api.status import status_code_pb2
|
|
3
|
+
|
|
4
|
+
from clarifai.client import V2Stub
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def module_versions_generator(stub: V2Stub,
|
|
8
|
+
user_id: str,
|
|
9
|
+
app_id: str,
|
|
10
|
+
module_id: str,
|
|
11
|
+
page_size: int = 64):
|
|
12
|
+
"""
|
|
13
|
+
Lists all the module versions in the given userAppID user_id, app_id app, module_id module.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
stub: client stub.
|
|
17
|
+
user_id: the user to list from.
|
|
18
|
+
app_id: the app in the user_id account to list from.
|
|
19
|
+
module_id: the module in the app to list from.
|
|
20
|
+
page_size: the pagination size to use while iterating.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
module_versions: a list of ModuleVersion protos for all the modules in the app.
|
|
24
|
+
"""
|
|
25
|
+
userDataObject = resources_pb2.UserAppIDSet(user_id=user_id, app_id=app_id)
|
|
26
|
+
|
|
27
|
+
success_status = {status_code_pb2.SUCCESS}
|
|
28
|
+
|
|
29
|
+
page = 1
|
|
30
|
+
while True:
|
|
31
|
+
response = stub.ListModuleVersions(
|
|
32
|
+
service_pb2.ListModuleVersionsRequest(
|
|
33
|
+
user_app_id=userDataObject, module_id=module_id, page=page, per_page=page_size),)
|
|
34
|
+
|
|
35
|
+
if response.status.code not in success_status:
|
|
36
|
+
raise Exception("ListModuleVersions failed with response %r" % response)
|
|
37
|
+
for item in response.module_versions:
|
|
38
|
+
yield item
|
|
39
|
+
page += 1
|
|
40
|
+
# if we don't get a full page back we know we're done.
|
|
41
|
+
if len(response.module_versions) < page_size:
|
|
42
|
+
break
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from clarifai_grpc.grpc.api import resources_pb2, service_pb2
|
|
2
|
+
from clarifai_grpc.grpc.api.status import status_code_pb2
|
|
3
|
+
|
|
4
|
+
from clarifai.client import V2Stub
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def modules_generator(stub: V2Stub, user_id: str, app_id: str, page_size: int = 64):
|
|
8
|
+
"""
|
|
9
|
+
Lists all the modules in the given userAppID user_id, app_id app.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
stub: client stub.
|
|
13
|
+
user_id: the user to list from.
|
|
14
|
+
app_id: the app in the user_id account to list from.
|
|
15
|
+
page_size: the pagination size to use while iterating.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
imvs: a list of Module protos for all the modules in the app.
|
|
19
|
+
"""
|
|
20
|
+
userDataObject = resources_pb2.UserAppIDSet(user_id=user_id, app_id=app_id)
|
|
21
|
+
|
|
22
|
+
success_status = {status_code_pb2.SUCCESS}
|
|
23
|
+
|
|
24
|
+
page = 1
|
|
25
|
+
while True:
|
|
26
|
+
response = stub.ListModules(
|
|
27
|
+
service_pb2.ListModulesRequest(user_app_id=userDataObject, page=page, per_page=page_size),)
|
|
28
|
+
|
|
29
|
+
if response.status.code not in success_status:
|
|
30
|
+
raise Exception("ListModules failed with response %r" % response)
|
|
31
|
+
for item in response.modules:
|
|
32
|
+
yield item
|
|
33
|
+
page += 1
|
|
34
|
+
# if we don't get a full page back we know we're done.
|
|
35
|
+
if len(response.modules) < page_size:
|
|
36
|
+
break
|
clarifai_utils/modules/style.css
CHANGED
|
@@ -80,7 +80,7 @@ body {
|
|
|
80
80
|
color: #fff;
|
|
81
81
|
background-color: #006dff;
|
|
82
82
|
border-color: transparent;
|
|
83
|
-
border-radius: 0
|
|
83
|
+
border-radius: 0 7px 7px 0;
|
|
84
84
|
}
|
|
85
85
|
|
|
86
86
|
.stTextArea > div {
|
|
@@ -95,6 +95,12 @@ body {
|
|
|
95
95
|
border-radius: 8px;
|
|
96
96
|
}
|
|
97
97
|
|
|
98
|
+
.stNumberInput>div {
|
|
99
|
+
color: #fff;
|
|
100
|
+
border-color: #e4e7ec;
|
|
101
|
+
border-radius: 8px 8px 8px 8px;
|
|
102
|
+
}
|
|
103
|
+
|
|
98
104
|
.stNumberInput>div>div>button {
|
|
99
105
|
color: #fff;
|
|
100
106
|
background-color: #006dff;
|
|
@@ -139,9 +145,6 @@ div[data-testid="stFileUploader"]>section>button:active
|
|
|
139
145
|
background-color: #356dff;
|
|
140
146
|
}
|
|
141
147
|
|
|
142
|
-
.st-bt {
|
|
143
|
-
background-color: #fff;
|
|
144
|
-
}
|
|
145
148
|
|
|
146
149
|
.stTextInput>div>div>input {
|
|
147
150
|
background-color: #fff;
|
|
File without changes
|
|
File without changes
|
|
File without changes
|