clarifai 10.0.0__py3-none-any.whl → 10.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/client/base.py +8 -1
- clarifai/client/dataset.py +77 -21
- clarifai/client/input.py +6 -6
- clarifai/client/model.py +1 -1
- clarifai/client/module.py +1 -1
- clarifai/client/workflow.py +1 -1
- clarifai/datasets/upload/features.py +3 -0
- clarifai/datasets/upload/image.py +57 -26
- clarifai/datasets/upload/loaders/xview_detection.py +4 -0
- clarifai/datasets/upload/utils.py +23 -7
- clarifai/models/model_serving/README.md +113 -121
- clarifai/models/model_serving/__init__.py +2 -0
- clarifai/models/model_serving/cli/_utils.py +53 -0
- clarifai/models/model_serving/cli/base.py +14 -0
- clarifai/models/model_serving/cli/build.py +79 -0
- clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
- clarifai/models/model_serving/cli/create.py +171 -0
- clarifai/models/model_serving/cli/example_cli.py +34 -0
- clarifai/models/model_serving/cli/login.py +26 -0
- clarifai/models/model_serving/cli/upload.py +182 -0
- clarifai/models/model_serving/constants.py +20 -0
- clarifai/models/model_serving/docs/cli.md +150 -0
- clarifai/models/model_serving/docs/concepts.md +229 -0
- clarifai/models/model_serving/docs/dependencies.md +1 -1
- clarifai/models/model_serving/docs/inference_parameters.md +112 -107
- clarifai/models/model_serving/docs/model_types.md +16 -17
- clarifai/models/model_serving/model_config/__init__.py +4 -2
- clarifai/models/model_serving/model_config/base.py +369 -0
- clarifai/models/model_serving/model_config/config.py +219 -224
- clarifai/models/model_serving/model_config/inference_parameter.py +5 -0
- clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -24
- clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -18
- clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -18
- clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -18
- clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -18
- clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -18
- clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -28
- clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -18
- clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -18
- clarifai/models/model_serving/{models → model_config}/output.py +8 -0
- clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
- clarifai/models/model_serving/model_config/{serializer.py → triton/serializer.py} +3 -1
- clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
- clarifai/models/model_serving/{models/model_types.py → model_config/triton/wrappers.py} +4 -4
- clarifai/models/model_serving/{models → repo_build}/__init__.py +2 -0
- clarifai/models/model_serving/repo_build/build.py +198 -0
- clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
- clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
- clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
- clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
- clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
- clarifai/models/model_serving/{models/pb_model.py → repo_build/static_files/triton/model.py} +15 -14
- clarifai/models/model_serving/utils.py +21 -0
- clarifai/rag/rag.py +45 -12
- clarifai/rag/utils.py +3 -2
- clarifai/utils/logging.py +7 -0
- clarifai/versions.py +1 -1
- {clarifai-10.0.0.dist-info → clarifai-10.1.0.dist-info}/METADATA +28 -5
- clarifai-10.1.0.dist-info/RECORD +114 -0
- clarifai-10.1.0.dist-info/entry_points.txt +2 -0
- clarifai/models/model_serving/cli/deploy_cli.py +0 -123
- clarifai/models/model_serving/cli/model_zip.py +0 -61
- clarifai/models/model_serving/cli/repository.py +0 -89
- clarifai/models/model_serving/docs/custom_config.md +0 -33
- clarifai/models/model_serving/docs/output.md +0 -28
- clarifai/models/model_serving/models/default_test.py +0 -281
- clarifai/models/model_serving/models/inference.py +0 -50
- clarifai/models/model_serving/models/test.py +0 -64
- clarifai/models/model_serving/pb_model_repository.py +0 -108
- clarifai-10.0.0.dist-info/RECORD +0 -103
- clarifai-10.0.0.dist-info/entry_points.txt +0 -4
- {clarifai-10.0.0.dist-info → clarifai-10.1.0.dist-info}/LICENSE +0 -0
- {clarifai-10.0.0.dist-info → clarifai-10.1.0.dist-info}/WHEEL +0 -0
- {clarifai-10.0.0.dist-info → clarifai-10.1.0.dist-info}/top_level.txt +0 -0
clarifai/client/base.py
CHANGED
@@ -118,8 +118,15 @@ class BaseClient:
|
|
118
118
|
value_s = struct_pb2.Struct()
|
119
119
|
value_s.update(value)
|
120
120
|
value = value_s
|
121
|
+
elif key == 'metrics':
|
122
|
+
continue
|
121
123
|
elif key in ['metadata']:
|
122
|
-
|
124
|
+
if isinstance(value, dict) and value != {}:
|
125
|
+
value_s = struct_pb2.Struct()
|
126
|
+
value_s.update(value)
|
127
|
+
value = value_s
|
128
|
+
else:
|
129
|
+
continue
|
123
130
|
new_item[key] = convert_recursive(value)
|
124
131
|
return new_item
|
125
132
|
elif isinstance(item, list):
|
clarifai/client/dataset.py
CHANGED
@@ -2,11 +2,13 @@ import os
|
|
2
2
|
import time
|
3
3
|
import uuid
|
4
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
5
|
+
from datetime import datetime
|
5
6
|
from multiprocessing import cpu_count
|
6
|
-
from typing import Generator, List, Tuple, Type, TypeVar, Union
|
7
|
+
from typing import Dict, Generator, List, Optional, Tuple, Type, TypeVar, Union
|
7
8
|
|
8
9
|
import requests
|
9
10
|
from clarifai_grpc.grpc.api import resources_pb2, service_pb2
|
11
|
+
from clarifai_grpc.grpc.api.service_pb2 import MultiInputResponse
|
10
12
|
from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
|
11
13
|
from google.protobuf.json_format import MessageToDict
|
12
14
|
from requests.adapters import HTTPAdapter, Retry
|
@@ -25,7 +27,7 @@ from clarifai.datasets.upload.text import TextClassificationDataset
|
|
25
27
|
from clarifai.datasets.upload.utils import DisplayUploadStatus
|
26
28
|
from clarifai.errors import UserError
|
27
29
|
from clarifai.urls.helper import ClarifaiUrlHelper
|
28
|
-
from clarifai.utils.logging import get_logger
|
30
|
+
from clarifai.utils.logging import add_file_handler, get_logger
|
29
31
|
from clarifai.utils.misc import BackoffIterator, Chunker
|
30
32
|
|
31
33
|
ClarifaiDatasetType = TypeVar('ClarifaiDatasetType', VisualClassificationDataset,
|
@@ -65,7 +67,7 @@ class Dataset(Lister, BaseClient):
|
|
65
67
|
self.batch_size = 128 # limit max protos in a req
|
66
68
|
self.task = None # Upload dataset type
|
67
69
|
self.input_object = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat)
|
68
|
-
self.logger = get_logger(logger_level="INFO")
|
70
|
+
self.logger = get_logger(logger_level="INFO", name=__name__)
|
69
71
|
BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
|
70
72
|
Lister.__init__(self)
|
71
73
|
|
@@ -194,13 +196,17 @@ class Dataset(Lister, BaseClient):
|
|
194
196
|
|
195
197
|
return retry_annot_upload
|
196
198
|
|
197
|
-
def _delete_failed_inputs(
|
198
|
-
|
199
|
+
def _delete_failed_inputs(
|
200
|
+
self,
|
201
|
+
batch_input_ids: List[int],
|
202
|
+
dataset_obj: ClarifaiDatasetType,
|
203
|
+
upload_response: MultiInputResponse = None) -> Tuple[List[int], List[int]]:
|
199
204
|
"""Delete failed input ids from clarifai platform dataset.
|
200
205
|
|
201
206
|
Args:
|
202
207
|
batch_input_ids: batch input ids
|
203
208
|
dataset_obj: ClarifaiDataset object
|
209
|
+
upload_response: upload response proto
|
204
210
|
|
205
211
|
Returns:
|
206
212
|
success_inputs: upload success input ids
|
@@ -220,7 +226,19 @@ class Dataset(Lister, BaseClient):
|
|
220
226
|
success_inputs = response_dict.get('inputs', [])
|
221
227
|
|
222
228
|
success_input_ids = [input.get('id') for input in success_inputs]
|
223
|
-
failed_input_ids = list(set(input_ids) - set(success_input_ids))
|
229
|
+
failed_input_ids = list(set(input_ids) - set(success_input_ids.copy()))
|
230
|
+
#check duplicate input ids
|
231
|
+
duplicate_input_ids = [
|
232
|
+
input.id for input in upload_response.inputs
|
233
|
+
if input.status.details == 'Input has a duplicate ID.'
|
234
|
+
] #handling duplicte ID failures.
|
235
|
+
if duplicate_input_ids:
|
236
|
+
success_input_ids = list(set(success_input_ids.copy()) - set(duplicate_input_ids.copy()))
|
237
|
+
failed_input_ids = list(set(failed_input_ids) - set(duplicate_input_ids))
|
238
|
+
self.logger.warning(
|
239
|
+
f"Upload Failed for {len(duplicate_input_ids)} inputs in current batch: Duplicate input ids: {duplicate_input_ids}"
|
240
|
+
)
|
241
|
+
|
224
242
|
#delete failed inputs
|
225
243
|
self._grpc_request(
|
226
244
|
self.STUB.DeleteInputs,
|
@@ -228,8 +246,9 @@ class Dataset(Lister, BaseClient):
|
|
228
246
|
)
|
229
247
|
return [input_ids[id] for id in success_input_ids], [input_ids[id] for id in failed_input_ids]
|
230
248
|
|
231
|
-
def _upload_inputs_annotations(
|
232
|
-
|
249
|
+
def _upload_inputs_annotations(
|
250
|
+
self, batch_input_ids: List[int], dataset_obj: ClarifaiDatasetType
|
251
|
+
) -> Tuple[List[int], List[resources_pb2.Annotation], MultiInputResponse]:
|
233
252
|
"""Uploads batch of inputs and annotations concurrently to clarifai platform dataset.
|
234
253
|
|
235
254
|
Args:
|
@@ -239,20 +258,22 @@ class Dataset(Lister, BaseClient):
|
|
239
258
|
Returns:
|
240
259
|
failed_input_ids: failed input ids
|
241
260
|
retry_annot_protos: failed annot protos
|
261
|
+
response: upload response proto
|
242
262
|
"""
|
243
263
|
input_protos, _ = dataset_obj.get_protos(batch_input_ids)
|
244
|
-
input_job_id = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
|
264
|
+
input_job_id, _response = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
|
245
265
|
retry_annot_protos = []
|
246
266
|
|
247
267
|
self.input_object._wait_for_inputs(input_job_id)
|
248
|
-
success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj
|
268
|
+
success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj,
|
269
|
+
_response)
|
249
270
|
|
250
|
-
if self.task in ["visual_detection", "visual_segmentation"]:
|
271
|
+
if self.task in ["visual_detection", "visual_segmentation"] and success_input_ids:
|
251
272
|
_, annotation_protos = dataset_obj.get_protos(success_input_ids)
|
252
273
|
chunked_annotation_protos = Chunker(annotation_protos, self.batch_size).chunk()
|
253
274
|
retry_annot_protos.extend(self._concurrent_annot_upload(chunked_annotation_protos))
|
254
275
|
|
255
|
-
return failed_input_ids, retry_annot_protos
|
276
|
+
return failed_input_ids, retry_annot_protos, _response
|
256
277
|
|
257
278
|
def _retry_uploads(self, failed_input_ids: List[int],
|
258
279
|
retry_annot_protos: List[resources_pb2.Annotation],
|
@@ -265,7 +286,25 @@ class Dataset(Lister, BaseClient):
|
|
265
286
|
dataset_obj: ClarifaiDataset object
|
266
287
|
"""
|
267
288
|
if failed_input_ids:
|
268
|
-
|
289
|
+
retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
|
290
|
+
#Log Retrying inputs
|
291
|
+
self.logger.warning(
|
292
|
+
f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}"
|
293
|
+
)
|
294
|
+
failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
|
295
|
+
failed_input_ids, dataset_obj)
|
296
|
+
#Log failed inputs
|
297
|
+
if failed_retrying_inputs:
|
298
|
+
failed_retrying_input_ids = [
|
299
|
+
dataset_obj.all_input_ids[id] for id in failed_retrying_inputs
|
300
|
+
]
|
301
|
+
failed_inputs_logs = {
|
302
|
+
input.id: input.status.details
|
303
|
+
for input in retry_response.inputs if input.id in failed_retrying_input_ids
|
304
|
+
}
|
305
|
+
self.logger.warning(
|
306
|
+
f"Failed to upload {len(failed_retrying_inputs)} inputs in current batch: {failed_inputs_logs}"
|
307
|
+
)
|
269
308
|
if retry_annot_protos:
|
270
309
|
chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
|
271
310
|
_ = self._concurrent_annot_upload(chunked_annotation_protos)
|
@@ -287,21 +326,27 @@ class Dataset(Lister, BaseClient):
|
|
287
326
|
]
|
288
327
|
|
289
328
|
for job in as_completed(futures):
|
290
|
-
retry_input_ids, retry_annot_protos = job.result()
|
329
|
+
retry_input_ids, retry_annot_protos, _ = job.result()
|
291
330
|
self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj)
|
292
331
|
progress.update()
|
293
332
|
|
294
333
|
def upload_dataset(self,
|
295
334
|
dataloader: Type[ClarifaiDataLoader],
|
296
335
|
batch_size: int = 32,
|
297
|
-
get_upload_status: bool = False
|
336
|
+
get_upload_status: bool = False,
|
337
|
+
log_warnings: bool = False) -> None:
|
298
338
|
"""Uploads a dataset to the app.
|
299
339
|
|
300
340
|
Args:
|
301
341
|
dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
|
302
342
|
batch_size (int): batch size for concurrent upload of inputs and annotations (max: 128)
|
303
343
|
get_upload_status (bool): True if you want to get the upload status of the dataset
|
344
|
+
log_warnings (bool): True if you want to save log warnings in a file
|
304
345
|
"""
|
346
|
+
#add file handler to log warnings
|
347
|
+
if log_warnings:
|
348
|
+
add_file_handler(self.logger, f"Dataset_Upload{str(int(datetime.now().timestamp()))}.log")
|
349
|
+
#set batch size and task
|
305
350
|
self.batch_size = min(self.batch_size, batch_size)
|
306
351
|
self.task = dataloader.task
|
307
352
|
if self.task not in DATASET_UPLOAD_TASKS:
|
@@ -321,10 +366,13 @@ class Dataset(Lister, BaseClient):
|
|
321
366
|
else: # visual_classification & visual_captioning
|
322
367
|
dataset_obj = VisualClassificationDataset(dataloader, self.id)
|
323
368
|
|
369
|
+
if get_upload_status:
|
370
|
+
pre_upload_stats = self.get_upload_status(pre_upload=True)
|
371
|
+
|
324
372
|
self._data_upload(dataset_obj)
|
325
373
|
|
326
374
|
if get_upload_status:
|
327
|
-
self.get_upload_status(dataloader)
|
375
|
+
self.get_upload_status(dataloader=dataloader, pre_upload_stats=pre_upload_stats)
|
328
376
|
|
329
377
|
def upload_from_csv(self,
|
330
378
|
csv_path: str,
|
@@ -398,16 +446,21 @@ class Dataset(Lister, BaseClient):
|
|
398
446
|
folder_path=folder_path, dataset_id=self.id, labels=labels)
|
399
447
|
self.input_object._bulk_upload(inputs=input_protos, batch_size=batch_size)
|
400
448
|
|
401
|
-
def get_upload_status(
|
402
|
-
|
403
|
-
|
404
|
-
|
449
|
+
def get_upload_status(
|
450
|
+
self,
|
451
|
+
dataloader: Type[ClarifaiDataLoader] = None,
|
452
|
+
delete_version: bool = False,
|
453
|
+
timeout: int = 600,
|
454
|
+
pre_upload_stats: Tuple[Dict[str, int], Dict[str, int]] = None,
|
455
|
+
pre_upload: bool = False) -> Optional[Tuple[Dict[str, int], Dict[str, int]]]:
|
405
456
|
"""Creates a new dataset version and displays the upload status of the dataset.
|
406
457
|
|
407
458
|
Args:
|
408
459
|
dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
|
409
460
|
delete_version (bool): True if you want to delete the version after getting the upload status
|
410
461
|
timeout (int): Timeout in seconds for getting the upload status. Default is 600 seconds.
|
462
|
+
pre_upload_stats (Tuple[Dict[str, int], Dict[str, int]]): The pre upload stats for the dataset.
|
463
|
+
pre_upload (bool): True if you want to get the pre upload stats for the dataset.
|
411
464
|
|
412
465
|
Example:
|
413
466
|
>>> from clarifai.client.dataset import Dataset
|
@@ -450,9 +503,12 @@ class Dataset(Lister, BaseClient):
|
|
450
503
|
raise UserError(
|
451
504
|
"Dataset metrics are taking too long to process. Please try again later.")
|
452
505
|
break
|
506
|
+
#get pre upload stats
|
507
|
+
if pre_upload:
|
508
|
+
return DisplayUploadStatus.get_dataset_version_stats(dataset_metrics_response)
|
453
509
|
|
454
510
|
dataset_info_dict = dict(user_id=self.user_id, app_id=self.app_id, dataset_id=self.id)
|
455
|
-
DisplayUploadStatus(dataloader, dataset_metrics_response, dataset_info_dict)
|
511
|
+
DisplayUploadStatus(dataloader, dataset_metrics_response, dataset_info_dict, pre_upload_stats)
|
456
512
|
|
457
513
|
if delete_version:
|
458
514
|
self.delete_version(dataset_version_id)
|
clarifai/client/input.py
CHANGED
@@ -660,15 +660,15 @@ class Inputs(Lister, BaseClient):
|
|
660
660
|
user_app_id=self.user_app_id, inputs=inputs, inputs_add_job_id=input_job_id)
|
661
661
|
response = self._grpc_request(self.STUB.PostInputs, request)
|
662
662
|
if response.status.code != status_code_pb2.SUCCESS:
|
663
|
-
|
664
|
-
self.logger.warning(response
|
665
|
-
|
666
|
-
|
663
|
+
if show_log:
|
664
|
+
self.logger.warning(response)
|
665
|
+
else:
|
666
|
+
return input_job_id, response
|
667
667
|
else:
|
668
668
|
if show_log:
|
669
669
|
self.logger.info("\nInputs Uploaded\n%s", response.status)
|
670
670
|
|
671
|
-
return input_job_id
|
671
|
+
return input_job_id, response
|
672
672
|
|
673
673
|
def upload_annotations(self, batch_annot: List[resources_pb2.Annotation], show_log: bool = True
|
674
674
|
) -> Union[List[resources_pb2.Annotation], List[None]]:
|
@@ -705,7 +705,7 @@ class Inputs(Lister, BaseClient):
|
|
705
705
|
Returns:
|
706
706
|
input_job_id: job id for the upload request.
|
707
707
|
"""
|
708
|
-
input_job_id = self.upload_inputs(inputs, False)
|
708
|
+
input_job_id, _ = self.upload_inputs(inputs, False)
|
709
709
|
self._wait_for_inputs(input_job_id)
|
710
710
|
failed_inputs = self._delete_failed_inputs(inputs)
|
711
711
|
|
clarifai/client/model.py
CHANGED
@@ -53,7 +53,7 @@ class Model(Lister, BaseClient):
|
|
53
53
|
kwargs = {'user_id': user_id, 'app_id': app_id}
|
54
54
|
self.kwargs = {**kwargs, 'id': model_id, 'model_version': model_version,}
|
55
55
|
self.model_info = resources_pb2.Model(**self.kwargs)
|
56
|
-
self.logger = get_logger(logger_level="INFO")
|
56
|
+
self.logger = get_logger(logger_level="INFO", name=__name__)
|
57
57
|
self.training_params = {}
|
58
58
|
BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
|
59
59
|
Lister.__init__(self)
|
clarifai/client/module.py
CHANGED
@@ -40,7 +40,7 @@ class Module(Lister, BaseClient):
|
|
40
40
|
|
41
41
|
self.kwargs = {**kwargs, 'id': module_id, 'module_version': module_version}
|
42
42
|
self.module_info = resources_pb2.Module(**self.kwargs)
|
43
|
-
self.logger = get_logger(logger_level="INFO")
|
43
|
+
self.logger = get_logger(logger_level="INFO", name=__name__)
|
44
44
|
BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
|
45
45
|
Lister.__init__(self)
|
46
46
|
|
clarifai/client/workflow.py
CHANGED
@@ -54,7 +54,7 @@ class Workflow(Lister, BaseClient):
|
|
54
54
|
self.kwargs = {**kwargs, 'id': workflow_id, 'version': workflow_version}
|
55
55
|
self.output_config = output_config
|
56
56
|
self.workflow_info = resources_pb2.Workflow(**self.kwargs)
|
57
|
-
self.logger = get_logger(logger_level="INFO")
|
57
|
+
self.logger = get_logger(logger_level="INFO", name=__name__)
|
58
58
|
BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
|
59
59
|
Lister.__init__(self)
|
60
60
|
|
@@ -20,6 +20,7 @@ class VisualClassificationFeatures:
|
|
20
20
|
geo_info: Optional[List[float]] = None #[Longitude, Latitude]
|
21
21
|
id: Optional[int] = None # image_id
|
22
22
|
metadata: Optional[dict] = None
|
23
|
+
image_bytes: Optional[bytes] = None
|
23
24
|
|
24
25
|
|
25
26
|
@dataclass
|
@@ -31,6 +32,7 @@ class VisualDetectionFeatures:
|
|
31
32
|
geo_info: Optional[List[float]] = None #[Longitude, Latitude]
|
32
33
|
id: Optional[int] = None # image_id
|
33
34
|
metadata: Optional[dict] = None
|
35
|
+
image_bytes: Optional[bytes] = None
|
34
36
|
|
35
37
|
|
36
38
|
@dataclass
|
@@ -42,3 +44,4 @@ class VisualSegmentationFeatures:
|
|
42
44
|
geo_info: Optional[List[float]] = None #[Longitude, Latitude]
|
43
45
|
id: Optional[int] = None # image_id
|
44
46
|
metadata: Optional[dict] = None
|
47
|
+
image_bytes: Optional[bytes] = None
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import os
|
2
|
+
import uuid
|
2
3
|
from concurrent.futures import ThreadPoolExecutor
|
3
4
|
from typing import List, Tuple, Type
|
4
5
|
|
@@ -31,22 +32,34 @@ class VisualClassificationDataset(ClarifaiDataset):
|
|
31
32
|
image_path = data_item.image_path
|
32
33
|
labels = data_item.labels if isinstance(data_item.labels,
|
33
34
|
list) else [data_item.labels] # clarifai concept
|
34
|
-
input_id = f"{self.dataset_id}-{
|
35
|
+
input_id = f"{self.dataset_id}-{uuid.uuid4().hex[:8]}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
|
35
36
|
geo_info = data_item.geo_info
|
36
37
|
if data_item.metadata is not None:
|
37
38
|
metadata.update(data_item.metadata)
|
38
|
-
|
39
|
+
elif image_path is not None:
|
39
40
|
metadata.update({"filename": os.path.basename(image_path)})
|
41
|
+
else:
|
42
|
+
metadata = None
|
40
43
|
|
41
44
|
self.all_input_ids[id] = input_id
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
45
|
+
if data_item.image_bytes is not None:
|
46
|
+
input_protos.append(
|
47
|
+
Inputs.get_input_from_bytes(
|
48
|
+
input_id=input_id,
|
49
|
+
image_bytes=data_item.image_bytes,
|
50
|
+
dataset_id=self.dataset_id,
|
51
|
+
labels=labels,
|
52
|
+
geo_info=geo_info,
|
53
|
+
metadata=metadata))
|
54
|
+
else:
|
55
|
+
input_protos.append(
|
56
|
+
Inputs.get_input_from_file(
|
57
|
+
input_id=input_id,
|
58
|
+
image_file=image_path,
|
59
|
+
dataset_id=self.dataset_id,
|
60
|
+
labels=labels,
|
61
|
+
geo_info=geo_info,
|
62
|
+
metadata=metadata))
|
50
63
|
|
51
64
|
with ThreadPoolExecutor(max_workers=4) as executor:
|
52
65
|
futures = [executor.submit(process_data_item, id) for id in batch_input_ids]
|
@@ -79,7 +92,7 @@ class VisualDetectionDataset(ClarifaiDataset):
|
|
79
92
|
image = data_item.image_path
|
80
93
|
labels = data_item.labels # list:[l1,...,ln]
|
81
94
|
bboxes = data_item.bboxes # [[xmin,ymin,xmax,ymax],...,[xmin,ymin,xmax,ymax]]
|
82
|
-
input_id = f"{self.dataset_id}-{
|
95
|
+
input_id = f"{self.dataset_id}-{uuid.uuid4().hex[:8]}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
|
83
96
|
if data_item.metadata is not None:
|
84
97
|
metadata.update(data_item.metadata)
|
85
98
|
else:
|
@@ -87,13 +100,22 @@ class VisualDetectionDataset(ClarifaiDataset):
|
|
87
100
|
geo_info = data_item.geo_info
|
88
101
|
|
89
102
|
self.all_input_ids[id] = input_id
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
103
|
+
if data_item.image_bytes is not None:
|
104
|
+
input_protos.append(
|
105
|
+
Inputs.get_input_from_bytes(
|
106
|
+
input_id=input_id,
|
107
|
+
image_bytes=data_item.image_bytes,
|
108
|
+
dataset_id=self.dataset_id,
|
109
|
+
geo_info=geo_info,
|
110
|
+
metadata=metadata))
|
111
|
+
else:
|
112
|
+
input_protos.append(
|
113
|
+
Inputs.get_input_from_file(
|
114
|
+
input_id=input_id,
|
115
|
+
image_file=image,
|
116
|
+
dataset_id=self.dataset_id,
|
117
|
+
geo_info=geo_info,
|
118
|
+
metadata=metadata))
|
97
119
|
# iter over bboxes and labels
|
98
120
|
# one id could have more than one bbox and label
|
99
121
|
for i in range(len(bboxes)):
|
@@ -131,7 +153,7 @@ class VisualSegmentationDataset(ClarifaiDataset):
|
|
131
153
|
image = data_item.image_path
|
132
154
|
labels = data_item.labels
|
133
155
|
_polygons = data_item.polygons # list of polygons: [[[x,y],...,[x,y]],...]
|
134
|
-
input_id = f"{self.dataset_id}-{
|
156
|
+
input_id = f"{self.dataset_id}-{uuid.uuid4().hex[:8]}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
|
135
157
|
if data_item.metadata is not None:
|
136
158
|
metadata.update(data_item.metadata)
|
137
159
|
else:
|
@@ -139,13 +161,22 @@ class VisualSegmentationDataset(ClarifaiDataset):
|
|
139
161
|
geo_info = data_item.geo_info
|
140
162
|
|
141
163
|
self.all_input_ids[id] = input_id
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
164
|
+
if data_item.image_bytes is not None:
|
165
|
+
input_protos.append(
|
166
|
+
Inputs.get_input_from_bytes(
|
167
|
+
input_id=input_id,
|
168
|
+
image_bytes=data_item.image_bytes,
|
169
|
+
dataset_id=self.dataset_id,
|
170
|
+
geo_info=geo_info,
|
171
|
+
metadata=metadata))
|
172
|
+
else:
|
173
|
+
input_protos.append(
|
174
|
+
Inputs.get_input_from_file(
|
175
|
+
input_id=input_id,
|
176
|
+
image_file=image,
|
177
|
+
dataset_id=self.dataset_id,
|
178
|
+
geo_info=geo_info,
|
179
|
+
metadata=metadata))
|
149
180
|
|
150
181
|
## Iterate over each masked image and create a proto for upload to clarifai
|
151
182
|
## The length of masks/polygons-list and labels must be equal
|
@@ -47,6 +47,10 @@ class xviewDetectionDataLoader(ClarifaiDataLoader):
|
|
47
47
|
|
48
48
|
self.load_data()
|
49
49
|
|
50
|
+
@property
|
51
|
+
def task(self):
|
52
|
+
return "visual_detection"
|
53
|
+
|
50
54
|
def compress_tiff(self, img_path: str) -> None:
|
51
55
|
"""Compress tiff image"""
|
52
56
|
img_comp_path = os.path.join(self.img_comp_dir, os.path.basename(img_path))
|
@@ -53,16 +53,19 @@ class DisplayUploadStatus:
|
|
53
53
|
|
54
54
|
def __init__(self, dataloader: ClarifaiDataLoader,
|
55
55
|
dataset_metrics_response: Type[MultiDatasetVersionMetricsGroupResponse],
|
56
|
-
dataset_info_dict: Dict[str, str]
|
56
|
+
dataset_info_dict: Dict[str, str],
|
57
|
+
pre_upload_stats: Tuple[Dict[str, int], Dict[str, int]]) -> None:
|
57
58
|
"""Initialize the class.
|
58
59
|
Args:
|
59
60
|
dataloader: ClarifaiDataLoader object
|
60
61
|
dataset_metrics_response: The dataset version metrics response from the server.
|
61
62
|
dataset_info_dict: The dataset info dictionary.
|
63
|
+
pre_upload_stats: The pre upload stats for the dataset.
|
62
64
|
"""
|
63
65
|
self.dataloader = dataloader
|
64
66
|
self.dataset_metrics_response = dataset_metrics_response
|
65
67
|
self.dataset_info_dict = dataset_info_dict
|
68
|
+
self.pre_upload_stats = pre_upload_stats
|
66
69
|
|
67
70
|
self.display()
|
68
71
|
|
@@ -71,7 +74,18 @@ class DisplayUploadStatus:
|
|
71
74
|
from rich.console import Console
|
72
75
|
|
73
76
|
local_inputs_count, local_annotations_dict = self.get_dataloader_stats()
|
74
|
-
uploaded_inputs_dict, uploaded_annotations_dict = self.
|
77
|
+
uploaded_inputs_dict, uploaded_annotations_dict = self.get_dataset_version_stats(
|
78
|
+
self.dataset_metrics_response)
|
79
|
+
|
80
|
+
# Subtract the pre upload stats from the uploaded stats
|
81
|
+
uploaded_inputs_dict = {
|
82
|
+
key: int(uploaded_inputs_dict[key]) - int(self.pre_upload_stats[0].get(key, 0))
|
83
|
+
for key in uploaded_inputs_dict
|
84
|
+
}
|
85
|
+
uploaded_annotations_dict = {
|
86
|
+
key: uploaded_annotations_dict[key] - self.pre_upload_stats[1].get(key, 0)
|
87
|
+
for key in uploaded_annotations_dict
|
88
|
+
}
|
75
89
|
|
76
90
|
self.local_annotations_count = sum(local_annotations_dict.values())
|
77
91
|
self.uploaded_annotations_count = sum(uploaded_annotations_dict.values())
|
@@ -99,9 +113,6 @@ class DisplayUploadStatus:
|
|
99
113
|
"""
|
100
114
|
from clarifai.constants.dataset import DATASET_UPLOAD_TASKS
|
101
115
|
|
102
|
-
if not isinstance(self.dataloader, ClarifaiDataLoader):
|
103
|
-
raise UserError("Dataloader is not an instance of ClarifaiDataLoader")
|
104
|
-
|
105
116
|
task = self.dataloader.task
|
106
117
|
if task not in DATASET_UPLOAD_TASKS:
|
107
118
|
raise UserError(
|
@@ -113,8 +124,13 @@ class DisplayUploadStatus:
|
|
113
124
|
local_annotations_dict[key] += len(getattr(self.dataloader[i], attr))
|
114
125
|
return local_inputs_count, local_annotations_dict
|
115
126
|
|
116
|
-
|
127
|
+
@staticmethod
|
128
|
+
def get_dataset_version_stats(
|
129
|
+
dataset_metrics_response: Type[MultiDatasetVersionMetricsGroupResponse]
|
130
|
+
) -> Tuple[Dict[str, int], Dict[str, int]]:
|
117
131
|
"""Parse the response from the server for the dataset version metrics groups.
|
132
|
+
Args:
|
133
|
+
dataset_metrics_response: The dataset version metrics response from the server.
|
118
134
|
|
119
135
|
Returns:
|
120
136
|
uploaded_inputs_dict (Dict[str, int]): The input statistics for the dataset.
|
@@ -123,7 +139,7 @@ class DisplayUploadStatus:
|
|
123
139
|
dataset_statistics = []
|
124
140
|
uploaded_inputs_dict = {}
|
125
141
|
uploaded_annotations_dict = dict(concepts=0, bboxes=0, polygons=0)
|
126
|
-
dict_response = MessageToDict(
|
142
|
+
dict_response = MessageToDict(dataset_metrics_response)
|
127
143
|
|
128
144
|
for data in dict_response["datasetVersionMetricsGroups"]:
|
129
145
|
if isinstance(data["value"], str):
|