clarifai 10.0.0__py3-none-any.whl → 10.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. clarifai/client/base.py +8 -1
  2. clarifai/client/dataset.py +77 -21
  3. clarifai/client/input.py +6 -6
  4. clarifai/client/model.py +1 -1
  5. clarifai/client/module.py +1 -1
  6. clarifai/client/workflow.py +1 -1
  7. clarifai/datasets/upload/features.py +3 -0
  8. clarifai/datasets/upload/image.py +57 -26
  9. clarifai/datasets/upload/loaders/xview_detection.py +4 -0
  10. clarifai/datasets/upload/utils.py +23 -7
  11. clarifai/models/model_serving/README.md +113 -121
  12. clarifai/models/model_serving/__init__.py +2 -0
  13. clarifai/models/model_serving/cli/_utils.py +53 -0
  14. clarifai/models/model_serving/cli/base.py +14 -0
  15. clarifai/models/model_serving/cli/build.py +79 -0
  16. clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
  17. clarifai/models/model_serving/cli/create.py +171 -0
  18. clarifai/models/model_serving/cli/example_cli.py +34 -0
  19. clarifai/models/model_serving/cli/login.py +26 -0
  20. clarifai/models/model_serving/cli/upload.py +182 -0
  21. clarifai/models/model_serving/constants.py +20 -0
  22. clarifai/models/model_serving/docs/cli.md +150 -0
  23. clarifai/models/model_serving/docs/concepts.md +229 -0
  24. clarifai/models/model_serving/docs/dependencies.md +1 -1
  25. clarifai/models/model_serving/docs/inference_parameters.md +112 -107
  26. clarifai/models/model_serving/docs/model_types.md +16 -17
  27. clarifai/models/model_serving/model_config/__init__.py +4 -2
  28. clarifai/models/model_serving/model_config/base.py +369 -0
  29. clarifai/models/model_serving/model_config/config.py +219 -224
  30. clarifai/models/model_serving/model_config/inference_parameter.py +5 -0
  31. clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -24
  32. clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -18
  33. clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -18
  34. clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -18
  35. clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -18
  36. clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -18
  37. clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -28
  38. clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -18
  39. clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -18
  40. clarifai/models/model_serving/{models → model_config}/output.py +8 -0
  41. clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
  42. clarifai/models/model_serving/model_config/{serializer.py → triton/serializer.py} +3 -1
  43. clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
  44. clarifai/models/model_serving/{models/model_types.py → model_config/triton/wrappers.py} +4 -4
  45. clarifai/models/model_serving/{models → repo_build}/__init__.py +2 -0
  46. clarifai/models/model_serving/repo_build/build.py +198 -0
  47. clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
  48. clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
  49. clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
  50. clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
  51. clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
  52. clarifai/models/model_serving/{models/pb_model.py → repo_build/static_files/triton/model.py} +15 -14
  53. clarifai/models/model_serving/utils.py +21 -0
  54. clarifai/rag/rag.py +45 -12
  55. clarifai/rag/utils.py +3 -2
  56. clarifai/utils/logging.py +7 -0
  57. clarifai/versions.py +1 -1
  58. {clarifai-10.0.0.dist-info → clarifai-10.1.0.dist-info}/METADATA +28 -5
  59. clarifai-10.1.0.dist-info/RECORD +114 -0
  60. clarifai-10.1.0.dist-info/entry_points.txt +2 -0
  61. clarifai/models/model_serving/cli/deploy_cli.py +0 -123
  62. clarifai/models/model_serving/cli/model_zip.py +0 -61
  63. clarifai/models/model_serving/cli/repository.py +0 -89
  64. clarifai/models/model_serving/docs/custom_config.md +0 -33
  65. clarifai/models/model_serving/docs/output.md +0 -28
  66. clarifai/models/model_serving/models/default_test.py +0 -281
  67. clarifai/models/model_serving/models/inference.py +0 -50
  68. clarifai/models/model_serving/models/test.py +0 -64
  69. clarifai/models/model_serving/pb_model_repository.py +0 -108
  70. clarifai-10.0.0.dist-info/RECORD +0 -103
  71. clarifai-10.0.0.dist-info/entry_points.txt +0 -4
  72. {clarifai-10.0.0.dist-info → clarifai-10.1.0.dist-info}/LICENSE +0 -0
  73. {clarifai-10.0.0.dist-info → clarifai-10.1.0.dist-info}/WHEEL +0 -0
  74. {clarifai-10.0.0.dist-info → clarifai-10.1.0.dist-info}/top_level.txt +0 -0
clarifai/client/base.py CHANGED
@@ -118,8 +118,15 @@ class BaseClient:
118
118
  value_s = struct_pb2.Struct()
119
119
  value_s.update(value)
120
120
  value = value_s
121
+ elif key == 'metrics':
122
+ continue
121
123
  elif key in ['metadata']:
122
- continue # TODO Fix "app_duplication"
124
+ if isinstance(value, dict) and value != {}:
125
+ value_s = struct_pb2.Struct()
126
+ value_s.update(value)
127
+ value = value_s
128
+ else:
129
+ continue
123
130
  new_item[key] = convert_recursive(value)
124
131
  return new_item
125
132
  elif isinstance(item, list):
@@ -2,11 +2,13 @@ import os
2
2
  import time
3
3
  import uuid
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from datetime import datetime
5
6
  from multiprocessing import cpu_count
6
- from typing import Generator, List, Tuple, Type, TypeVar, Union
7
+ from typing import Dict, Generator, List, Optional, Tuple, Type, TypeVar, Union
7
8
 
8
9
  import requests
9
10
  from clarifai_grpc.grpc.api import resources_pb2, service_pb2
11
+ from clarifai_grpc.grpc.api.service_pb2 import MultiInputResponse
10
12
  from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
11
13
  from google.protobuf.json_format import MessageToDict
12
14
  from requests.adapters import HTTPAdapter, Retry
@@ -25,7 +27,7 @@ from clarifai.datasets.upload.text import TextClassificationDataset
25
27
  from clarifai.datasets.upload.utils import DisplayUploadStatus
26
28
  from clarifai.errors import UserError
27
29
  from clarifai.urls.helper import ClarifaiUrlHelper
28
- from clarifai.utils.logging import get_logger
30
+ from clarifai.utils.logging import add_file_handler, get_logger
29
31
  from clarifai.utils.misc import BackoffIterator, Chunker
30
32
 
31
33
  ClarifaiDatasetType = TypeVar('ClarifaiDatasetType', VisualClassificationDataset,
@@ -65,7 +67,7 @@ class Dataset(Lister, BaseClient):
65
67
  self.batch_size = 128 # limit max protos in a req
66
68
  self.task = None # Upload dataset type
67
69
  self.input_object = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat)
68
- self.logger = get_logger(logger_level="INFO")
70
+ self.logger = get_logger(logger_level="INFO", name=__name__)
69
71
  BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
70
72
  Lister.__init__(self)
71
73
 
@@ -194,13 +196,17 @@ class Dataset(Lister, BaseClient):
194
196
 
195
197
  return retry_annot_upload
196
198
 
197
- def _delete_failed_inputs(self, batch_input_ids: List[int],
198
- dataset_obj: ClarifaiDatasetType) -> Tuple[List[int], List[int]]:
199
+ def _delete_failed_inputs(
200
+ self,
201
+ batch_input_ids: List[int],
202
+ dataset_obj: ClarifaiDatasetType,
203
+ upload_response: MultiInputResponse = None) -> Tuple[List[int], List[int]]:
199
204
  """Delete failed input ids from clarifai platform dataset.
200
205
 
201
206
  Args:
202
207
  batch_input_ids: batch input ids
203
208
  dataset_obj: ClarifaiDataset object
209
+ upload_response: upload response proto
204
210
 
205
211
  Returns:
206
212
  success_inputs: upload success input ids
@@ -220,7 +226,19 @@ class Dataset(Lister, BaseClient):
220
226
  success_inputs = response_dict.get('inputs', [])
221
227
 
222
228
  success_input_ids = [input.get('id') for input in success_inputs]
223
- failed_input_ids = list(set(input_ids) - set(success_input_ids))
229
+ failed_input_ids = list(set(input_ids) - set(success_input_ids.copy()))
230
+ #check duplicate input ids
231
+ duplicate_input_ids = [
232
+ input.id for input in upload_response.inputs
233
+ if input.status.details == 'Input has a duplicate ID.'
234
+ ] #handling duplicte ID failures.
235
+ if duplicate_input_ids:
236
+ success_input_ids = list(set(success_input_ids.copy()) - set(duplicate_input_ids.copy()))
237
+ failed_input_ids = list(set(failed_input_ids) - set(duplicate_input_ids))
238
+ self.logger.warning(
239
+ f"Upload Failed for {len(duplicate_input_ids)} inputs in current batch: Duplicate input ids: {duplicate_input_ids}"
240
+ )
241
+
224
242
  #delete failed inputs
225
243
  self._grpc_request(
226
244
  self.STUB.DeleteInputs,
@@ -228,8 +246,9 @@ class Dataset(Lister, BaseClient):
228
246
  )
229
247
  return [input_ids[id] for id in success_input_ids], [input_ids[id] for id in failed_input_ids]
230
248
 
231
- def _upload_inputs_annotations(self, batch_input_ids: List[int], dataset_obj: ClarifaiDatasetType
232
- ) -> Tuple[List[int], List[resources_pb2.Annotation]]:
249
+ def _upload_inputs_annotations(
250
+ self, batch_input_ids: List[int], dataset_obj: ClarifaiDatasetType
251
+ ) -> Tuple[List[int], List[resources_pb2.Annotation], MultiInputResponse]:
233
252
  """Uploads batch of inputs and annotations concurrently to clarifai platform dataset.
234
253
 
235
254
  Args:
@@ -239,20 +258,22 @@ class Dataset(Lister, BaseClient):
239
258
  Returns:
240
259
  failed_input_ids: failed input ids
241
260
  retry_annot_protos: failed annot protos
261
+ response: upload response proto
242
262
  """
243
263
  input_protos, _ = dataset_obj.get_protos(batch_input_ids)
244
- input_job_id = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
264
+ input_job_id, _response = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
245
265
  retry_annot_protos = []
246
266
 
247
267
  self.input_object._wait_for_inputs(input_job_id)
248
- success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj)
268
+ success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj,
269
+ _response)
249
270
 
250
- if self.task in ["visual_detection", "visual_segmentation"]:
271
+ if self.task in ["visual_detection", "visual_segmentation"] and success_input_ids:
251
272
  _, annotation_protos = dataset_obj.get_protos(success_input_ids)
252
273
  chunked_annotation_protos = Chunker(annotation_protos, self.batch_size).chunk()
253
274
  retry_annot_protos.extend(self._concurrent_annot_upload(chunked_annotation_protos))
254
275
 
255
- return failed_input_ids, retry_annot_protos
276
+ return failed_input_ids, retry_annot_protos, _response
256
277
 
257
278
  def _retry_uploads(self, failed_input_ids: List[int],
258
279
  retry_annot_protos: List[resources_pb2.Annotation],
@@ -265,7 +286,25 @@ class Dataset(Lister, BaseClient):
265
286
  dataset_obj: ClarifaiDataset object
266
287
  """
267
288
  if failed_input_ids:
268
- self._upload_inputs_annotations(failed_input_ids, dataset_obj)
289
+ retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
290
+ #Log Retrying inputs
291
+ self.logger.warning(
292
+ f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}"
293
+ )
294
+ failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
295
+ failed_input_ids, dataset_obj)
296
+ #Log failed inputs
297
+ if failed_retrying_inputs:
298
+ failed_retrying_input_ids = [
299
+ dataset_obj.all_input_ids[id] for id in failed_retrying_inputs
300
+ ]
301
+ failed_inputs_logs = {
302
+ input.id: input.status.details
303
+ for input in retry_response.inputs if input.id in failed_retrying_input_ids
304
+ }
305
+ self.logger.warning(
306
+ f"Failed to upload {len(failed_retrying_inputs)} inputs in current batch: {failed_inputs_logs}"
307
+ )
269
308
  if retry_annot_protos:
270
309
  chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
271
310
  _ = self._concurrent_annot_upload(chunked_annotation_protos)
@@ -287,21 +326,27 @@ class Dataset(Lister, BaseClient):
287
326
  ]
288
327
 
289
328
  for job in as_completed(futures):
290
- retry_input_ids, retry_annot_protos = job.result()
329
+ retry_input_ids, retry_annot_protos, _ = job.result()
291
330
  self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj)
292
331
  progress.update()
293
332
 
294
333
  def upload_dataset(self,
295
334
  dataloader: Type[ClarifaiDataLoader],
296
335
  batch_size: int = 32,
297
- get_upload_status: bool = False) -> None:
336
+ get_upload_status: bool = False,
337
+ log_warnings: bool = False) -> None:
298
338
  """Uploads a dataset to the app.
299
339
 
300
340
  Args:
301
341
  dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
302
342
  batch_size (int): batch size for concurrent upload of inputs and annotations (max: 128)
303
343
  get_upload_status (bool): True if you want to get the upload status of the dataset
344
+ log_warnings (bool): True if you want to save log warnings in a file
304
345
  """
346
+ #add file handler to log warnings
347
+ if log_warnings:
348
+ add_file_handler(self.logger, f"Dataset_Upload{str(int(datetime.now().timestamp()))}.log")
349
+ #set batch size and task
305
350
  self.batch_size = min(self.batch_size, batch_size)
306
351
  self.task = dataloader.task
307
352
  if self.task not in DATASET_UPLOAD_TASKS:
@@ -321,10 +366,13 @@ class Dataset(Lister, BaseClient):
321
366
  else: # visual_classification & visual_captioning
322
367
  dataset_obj = VisualClassificationDataset(dataloader, self.id)
323
368
 
369
+ if get_upload_status:
370
+ pre_upload_stats = self.get_upload_status(pre_upload=True)
371
+
324
372
  self._data_upload(dataset_obj)
325
373
 
326
374
  if get_upload_status:
327
- self.get_upload_status(dataloader)
375
+ self.get_upload_status(dataloader=dataloader, pre_upload_stats=pre_upload_stats)
328
376
 
329
377
  def upload_from_csv(self,
330
378
  csv_path: str,
@@ -398,16 +446,21 @@ class Dataset(Lister, BaseClient):
398
446
  folder_path=folder_path, dataset_id=self.id, labels=labels)
399
447
  self.input_object._bulk_upload(inputs=input_protos, batch_size=batch_size)
400
448
 
401
- def get_upload_status(self,
402
- dataloader: Type[ClarifaiDataLoader],
403
- delete_version: bool = False,
404
- timeout: int = 600) -> None:
449
+ def get_upload_status(
450
+ self,
451
+ dataloader: Type[ClarifaiDataLoader] = None,
452
+ delete_version: bool = False,
453
+ timeout: int = 600,
454
+ pre_upload_stats: Tuple[Dict[str, int], Dict[str, int]] = None,
455
+ pre_upload: bool = False) -> Optional[Tuple[Dict[str, int], Dict[str, int]]]:
405
456
  """Creates a new dataset version and displays the upload status of the dataset.
406
457
 
407
458
  Args:
408
459
  dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
409
460
  delete_version (bool): True if you want to delete the version after getting the upload status
410
461
  timeout (int): Timeout in seconds for getting the upload status. Default is 600 seconds.
462
+ pre_upload_stats (Tuple[Dict[str, int], Dict[str, int]]): The pre upload stats for the dataset.
463
+ pre_upload (bool): True if you want to get the pre upload stats for the dataset.
411
464
 
412
465
  Example:
413
466
  >>> from clarifai.client.dataset import Dataset
@@ -450,9 +503,12 @@ class Dataset(Lister, BaseClient):
450
503
  raise UserError(
451
504
  "Dataset metrics are taking too long to process. Please try again later.")
452
505
  break
506
+ #get pre upload stats
507
+ if pre_upload:
508
+ return DisplayUploadStatus.get_dataset_version_stats(dataset_metrics_response)
453
509
 
454
510
  dataset_info_dict = dict(user_id=self.user_id, app_id=self.app_id, dataset_id=self.id)
455
- DisplayUploadStatus(dataloader, dataset_metrics_response, dataset_info_dict)
511
+ DisplayUploadStatus(dataloader, dataset_metrics_response, dataset_info_dict, pre_upload_stats)
456
512
 
457
513
  if delete_version:
458
514
  self.delete_version(dataset_version_id)
clarifai/client/input.py CHANGED
@@ -660,15 +660,15 @@ class Inputs(Lister, BaseClient):
660
660
  user_app_id=self.user_app_id, inputs=inputs, inputs_add_job_id=input_job_id)
661
661
  response = self._grpc_request(self.STUB.PostInputs, request)
662
662
  if response.status.code != status_code_pb2.SUCCESS:
663
- try:
664
- self.logger.warning(response.inputs[0].status)
665
- except IndexError:
666
- self.logger.warning(response.status)
663
+ if show_log:
664
+ self.logger.warning(response)
665
+ else:
666
+ return input_job_id, response
667
667
  else:
668
668
  if show_log:
669
669
  self.logger.info("\nInputs Uploaded\n%s", response.status)
670
670
 
671
- return input_job_id
671
+ return input_job_id, response
672
672
 
673
673
  def upload_annotations(self, batch_annot: List[resources_pb2.Annotation], show_log: bool = True
674
674
  ) -> Union[List[resources_pb2.Annotation], List[None]]:
@@ -705,7 +705,7 @@ class Inputs(Lister, BaseClient):
705
705
  Returns:
706
706
  input_job_id: job id for the upload request.
707
707
  """
708
- input_job_id = self.upload_inputs(inputs, False)
708
+ input_job_id, _ = self.upload_inputs(inputs, False)
709
709
  self._wait_for_inputs(input_job_id)
710
710
  failed_inputs = self._delete_failed_inputs(inputs)
711
711
 
clarifai/client/model.py CHANGED
@@ -53,7 +53,7 @@ class Model(Lister, BaseClient):
53
53
  kwargs = {'user_id': user_id, 'app_id': app_id}
54
54
  self.kwargs = {**kwargs, 'id': model_id, 'model_version': model_version,}
55
55
  self.model_info = resources_pb2.Model(**self.kwargs)
56
- self.logger = get_logger(logger_level="INFO")
56
+ self.logger = get_logger(logger_level="INFO", name=__name__)
57
57
  self.training_params = {}
58
58
  BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
59
59
  Lister.__init__(self)
clarifai/client/module.py CHANGED
@@ -40,7 +40,7 @@ class Module(Lister, BaseClient):
40
40
 
41
41
  self.kwargs = {**kwargs, 'id': module_id, 'module_version': module_version}
42
42
  self.module_info = resources_pb2.Module(**self.kwargs)
43
- self.logger = get_logger(logger_level="INFO")
43
+ self.logger = get_logger(logger_level="INFO", name=__name__)
44
44
  BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
45
45
  Lister.__init__(self)
46
46
 
@@ -54,7 +54,7 @@ class Workflow(Lister, BaseClient):
54
54
  self.kwargs = {**kwargs, 'id': workflow_id, 'version': workflow_version}
55
55
  self.output_config = output_config
56
56
  self.workflow_info = resources_pb2.Workflow(**self.kwargs)
57
- self.logger = get_logger(logger_level="INFO")
57
+ self.logger = get_logger(logger_level="INFO", name=__name__)
58
58
  BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
59
59
  Lister.__init__(self)
60
60
 
@@ -20,6 +20,7 @@ class VisualClassificationFeatures:
20
20
  geo_info: Optional[List[float]] = None #[Longitude, Latitude]
21
21
  id: Optional[int] = None # image_id
22
22
  metadata: Optional[dict] = None
23
+ image_bytes: Optional[bytes] = None
23
24
 
24
25
 
25
26
  @dataclass
@@ -31,6 +32,7 @@ class VisualDetectionFeatures:
31
32
  geo_info: Optional[List[float]] = None #[Longitude, Latitude]
32
33
  id: Optional[int] = None # image_id
33
34
  metadata: Optional[dict] = None
35
+ image_bytes: Optional[bytes] = None
34
36
 
35
37
 
36
38
  @dataclass
@@ -42,3 +44,4 @@ class VisualSegmentationFeatures:
42
44
  geo_info: Optional[List[float]] = None #[Longitude, Latitude]
43
45
  id: Optional[int] = None # image_id
44
46
  metadata: Optional[dict] = None
47
+ image_bytes: Optional[bytes] = None
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import uuid
2
3
  from concurrent.futures import ThreadPoolExecutor
3
4
  from typing import List, Tuple, Type
4
5
 
@@ -31,22 +32,34 @@ class VisualClassificationDataset(ClarifaiDataset):
31
32
  image_path = data_item.image_path
32
33
  labels = data_item.labels if isinstance(data_item.labels,
33
34
  list) else [data_item.labels] # clarifai concept
34
- input_id = f"{self.dataset_id}-{id}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
35
+ input_id = f"{self.dataset_id}-{uuid.uuid4().hex[:8]}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
35
36
  geo_info = data_item.geo_info
36
37
  if data_item.metadata is not None:
37
38
  metadata.update(data_item.metadata)
38
- else:
39
+ elif image_path is not None:
39
40
  metadata.update({"filename": os.path.basename(image_path)})
41
+ else:
42
+ metadata = None
40
43
 
41
44
  self.all_input_ids[id] = input_id
42
- input_protos.append(
43
- Inputs.get_input_from_file(
44
- input_id=input_id,
45
- image_file=image_path,
46
- dataset_id=self.dataset_id,
47
- labels=labels,
48
- geo_info=geo_info,
49
- metadata=metadata))
45
+ if data_item.image_bytes is not None:
46
+ input_protos.append(
47
+ Inputs.get_input_from_bytes(
48
+ input_id=input_id,
49
+ image_bytes=data_item.image_bytes,
50
+ dataset_id=self.dataset_id,
51
+ labels=labels,
52
+ geo_info=geo_info,
53
+ metadata=metadata))
54
+ else:
55
+ input_protos.append(
56
+ Inputs.get_input_from_file(
57
+ input_id=input_id,
58
+ image_file=image_path,
59
+ dataset_id=self.dataset_id,
60
+ labels=labels,
61
+ geo_info=geo_info,
62
+ metadata=metadata))
50
63
 
51
64
  with ThreadPoolExecutor(max_workers=4) as executor:
52
65
  futures = [executor.submit(process_data_item, id) for id in batch_input_ids]
@@ -79,7 +92,7 @@ class VisualDetectionDataset(ClarifaiDataset):
79
92
  image = data_item.image_path
80
93
  labels = data_item.labels # list:[l1,...,ln]
81
94
  bboxes = data_item.bboxes # [[xmin,ymin,xmax,ymax],...,[xmin,ymin,xmax,ymax]]
82
- input_id = f"{self.dataset_id}-{id}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
95
+ input_id = f"{self.dataset_id}-{uuid.uuid4().hex[:8]}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
83
96
  if data_item.metadata is not None:
84
97
  metadata.update(data_item.metadata)
85
98
  else:
@@ -87,13 +100,22 @@ class VisualDetectionDataset(ClarifaiDataset):
87
100
  geo_info = data_item.geo_info
88
101
 
89
102
  self.all_input_ids[id] = input_id
90
- input_protos.append(
91
- Inputs.get_input_from_file(
92
- input_id=input_id,
93
- image_file=image,
94
- dataset_id=self.dataset_id,
95
- geo_info=geo_info,
96
- metadata=metadata))
103
+ if data_item.image_bytes is not None:
104
+ input_protos.append(
105
+ Inputs.get_input_from_bytes(
106
+ input_id=input_id,
107
+ image_bytes=data_item.image_bytes,
108
+ dataset_id=self.dataset_id,
109
+ geo_info=geo_info,
110
+ metadata=metadata))
111
+ else:
112
+ input_protos.append(
113
+ Inputs.get_input_from_file(
114
+ input_id=input_id,
115
+ image_file=image,
116
+ dataset_id=self.dataset_id,
117
+ geo_info=geo_info,
118
+ metadata=metadata))
97
119
  # iter over bboxes and labels
98
120
  # one id could have more than one bbox and label
99
121
  for i in range(len(bboxes)):
@@ -131,7 +153,7 @@ class VisualSegmentationDataset(ClarifaiDataset):
131
153
  image = data_item.image_path
132
154
  labels = data_item.labels
133
155
  _polygons = data_item.polygons # list of polygons: [[[x,y],...,[x,y]],...]
134
- input_id = f"{self.dataset_id}-{id}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
156
+ input_id = f"{self.dataset_id}-{uuid.uuid4().hex[:8]}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
135
157
  if data_item.metadata is not None:
136
158
  metadata.update(data_item.metadata)
137
159
  else:
@@ -139,13 +161,22 @@ class VisualSegmentationDataset(ClarifaiDataset):
139
161
  geo_info = data_item.geo_info
140
162
 
141
163
  self.all_input_ids[id] = input_id
142
- input_protos.append(
143
- Inputs.get_input_from_file(
144
- input_id=input_id,
145
- image_file=image,
146
- dataset_id=self.dataset_id,
147
- geo_info=geo_info,
148
- metadata=metadata))
164
+ if data_item.image_bytes is not None:
165
+ input_protos.append(
166
+ Inputs.get_input_from_bytes(
167
+ input_id=input_id,
168
+ image_bytes=data_item.image_bytes,
169
+ dataset_id=self.dataset_id,
170
+ geo_info=geo_info,
171
+ metadata=metadata))
172
+ else:
173
+ input_protos.append(
174
+ Inputs.get_input_from_file(
175
+ input_id=input_id,
176
+ image_file=image,
177
+ dataset_id=self.dataset_id,
178
+ geo_info=geo_info,
179
+ metadata=metadata))
149
180
 
150
181
  ## Iterate over each masked image and create a proto for upload to clarifai
151
182
  ## The length of masks/polygons-list and labels must be equal
@@ -47,6 +47,10 @@ class xviewDetectionDataLoader(ClarifaiDataLoader):
47
47
 
48
48
  self.load_data()
49
49
 
50
+ @property
51
+ def task(self):
52
+ return "visual_detection"
53
+
50
54
  def compress_tiff(self, img_path: str) -> None:
51
55
  """Compress tiff image"""
52
56
  img_comp_path = os.path.join(self.img_comp_dir, os.path.basename(img_path))
@@ -53,16 +53,19 @@ class DisplayUploadStatus:
53
53
 
54
54
  def __init__(self, dataloader: ClarifaiDataLoader,
55
55
  dataset_metrics_response: Type[MultiDatasetVersionMetricsGroupResponse],
56
- dataset_info_dict: Dict[str, str]) -> None:
56
+ dataset_info_dict: Dict[str, str],
57
+ pre_upload_stats: Tuple[Dict[str, int], Dict[str, int]]) -> None:
57
58
  """Initialize the class.
58
59
  Args:
59
60
  dataloader: ClarifaiDataLoader object
60
61
  dataset_metrics_response: The dataset version metrics response from the server.
61
62
  dataset_info_dict: The dataset info dictionary.
63
+ pre_upload_stats: The pre upload stats for the dataset.
62
64
  """
63
65
  self.dataloader = dataloader
64
66
  self.dataset_metrics_response = dataset_metrics_response
65
67
  self.dataset_info_dict = dataset_info_dict
68
+ self.pre_upload_stats = pre_upload_stats
66
69
 
67
70
  self.display()
68
71
 
@@ -71,7 +74,18 @@ class DisplayUploadStatus:
71
74
  from rich.console import Console
72
75
 
73
76
  local_inputs_count, local_annotations_dict = self.get_dataloader_stats()
74
- uploaded_inputs_dict, uploaded_annotations_dict = self.get_uploaded_dataset_stats()
77
+ uploaded_inputs_dict, uploaded_annotations_dict = self.get_dataset_version_stats(
78
+ self.dataset_metrics_response)
79
+
80
+ # Subtract the pre upload stats from the uploaded stats
81
+ uploaded_inputs_dict = {
82
+ key: int(uploaded_inputs_dict[key]) - int(self.pre_upload_stats[0].get(key, 0))
83
+ for key in uploaded_inputs_dict
84
+ }
85
+ uploaded_annotations_dict = {
86
+ key: uploaded_annotations_dict[key] - self.pre_upload_stats[1].get(key, 0)
87
+ for key in uploaded_annotations_dict
88
+ }
75
89
 
76
90
  self.local_annotations_count = sum(local_annotations_dict.values())
77
91
  self.uploaded_annotations_count = sum(uploaded_annotations_dict.values())
@@ -99,9 +113,6 @@ class DisplayUploadStatus:
99
113
  """
100
114
  from clarifai.constants.dataset import DATASET_UPLOAD_TASKS
101
115
 
102
- if not isinstance(self.dataloader, ClarifaiDataLoader):
103
- raise UserError("Dataloader is not an instance of ClarifaiDataLoader")
104
-
105
116
  task = self.dataloader.task
106
117
  if task not in DATASET_UPLOAD_TASKS:
107
118
  raise UserError(
@@ -113,8 +124,13 @@ class DisplayUploadStatus:
113
124
  local_annotations_dict[key] += len(getattr(self.dataloader[i], attr))
114
125
  return local_inputs_count, local_annotations_dict
115
126
 
116
- def get_uploaded_dataset_stats(self) -> Tuple[Dict[str, int], Dict[str, int]]:
127
+ @staticmethod
128
+ def get_dataset_version_stats(
129
+ dataset_metrics_response: Type[MultiDatasetVersionMetricsGroupResponse]
130
+ ) -> Tuple[Dict[str, int], Dict[str, int]]:
117
131
  """Parse the response from the server for the dataset version metrics groups.
132
+ Args:
133
+ dataset_metrics_response: The dataset version metrics response from the server.
118
134
 
119
135
  Returns:
120
136
  uploaded_inputs_dict (Dict[str, int]): The input statistics for the dataset.
@@ -123,7 +139,7 @@ class DisplayUploadStatus:
123
139
  dataset_statistics = []
124
140
  uploaded_inputs_dict = {}
125
141
  uploaded_annotations_dict = dict(concepts=0, bboxes=0, polygons=0)
126
- dict_response = MessageToDict(self.dataset_metrics_response)
142
+ dict_response = MessageToDict(dataset_metrics_response)
127
143
 
128
144
  for data in dict_response["datasetVersionMetricsGroups"]:
129
145
  if isinstance(data["value"], str):