dtlpy 1.114.16__py3-none-any.whl → 1.115.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. dtlpy/__init__.py +1 -1
  2. dtlpy/__version__.py +1 -1
  3. dtlpy/entities/__init__.py +1 -1
  4. dtlpy/entities/analytic.py +42 -6
  5. dtlpy/entities/codebase.py +1 -5
  6. dtlpy/entities/compute.py +12 -5
  7. dtlpy/entities/dataset.py +19 -5
  8. dtlpy/entities/driver.py +14 -2
  9. dtlpy/entities/filters.py +156 -3
  10. dtlpy/entities/item.py +9 -3
  11. dtlpy/entities/prompt_item.py +7 -1
  12. dtlpy/entities/service.py +5 -0
  13. dtlpy/ml/base_model_adapter.py +407 -263
  14. dtlpy/repositories/commands.py +1 -7
  15. dtlpy/repositories/computes.py +17 -13
  16. dtlpy/repositories/datasets.py +287 -74
  17. dtlpy/repositories/downloader.py +23 -3
  18. dtlpy/repositories/drivers.py +12 -8
  19. dtlpy/repositories/executions.py +1 -3
  20. dtlpy/repositories/features.py +31 -14
  21. dtlpy/repositories/items.py +5 -2
  22. dtlpy/repositories/models.py +16 -4
  23. dtlpy/repositories/uploader.py +22 -12
  24. dtlpy/services/api_client.py +6 -3
  25. dtlpy/services/reporter.py +1 -1
  26. {dtlpy-1.114.16.dist-info → dtlpy-1.115.44.dist-info}/METADATA +15 -12
  27. {dtlpy-1.114.16.dist-info → dtlpy-1.115.44.dist-info}/RECORD +34 -34
  28. {dtlpy-1.114.16.data → dtlpy-1.115.44.data}/scripts/dlp +0 -0
  29. {dtlpy-1.114.16.data → dtlpy-1.115.44.data}/scripts/dlp.bat +0 -0
  30. {dtlpy-1.114.16.data → dtlpy-1.115.44.data}/scripts/dlp.py +0 -0
  31. {dtlpy-1.114.16.dist-info → dtlpy-1.115.44.dist-info}/WHEEL +0 -0
  32. {dtlpy-1.114.16.dist-info → dtlpy-1.115.44.dist-info}/entry_points.txt +0 -0
  33. {dtlpy-1.114.16.dist-info → dtlpy-1.115.44.dist-info}/licenses/LICENSE +0 -0
  34. {dtlpy-1.114.16.dist-info → dtlpy-1.115.44.dist-info}/top_level.txt +0 -0
@@ -113,13 +113,7 @@ class Commands:
113
113
  elapsed = time.time() - start
114
114
  sleep_time = np.min([timeout - elapsed, backoff_factor * (2 ** num_tries), MAX_SLEEP_TIME])
115
115
  num_tries += 1
116
- logger.debug(
117
- "Command {!r} is running for {:.2f}[s] and now Going to sleep {:.2f}[s]".format(
118
- command.id,
119
- elapsed,
120
- sleep_time
121
- )
122
- )
116
+ logger.debug(f"Command {command.id} is running for {elapsed:.2f}[s]. Sleeping for {sleep_time:.2f}[s]")
123
117
  if iteration_callback is not None:
124
118
  try:
125
119
  iteration_callback()
@@ -55,7 +55,8 @@ class Computes:
55
55
  wait=True,
56
56
  status: entities.ComputeStatus = None,
57
57
  settings: entities.ComputeSettings = None,
58
- metadata: dict = None
58
+ metadata: dict = None,
59
+ deployment_configuration: dict = None
59
60
  ):
60
61
  """
61
62
  Create a new compute
@@ -71,6 +72,7 @@ class Computes:
71
72
  :param status: Compute status
72
73
  :param settings: Compute settings
73
74
  :param metadata: Compute metadata
75
+ :param deployment_configuration: Compute deployment Configuration
74
76
  :return: Compute
75
77
  :rtype: dl.entities.compute.Compute
76
78
  """
@@ -78,7 +80,8 @@ class Computes:
78
80
  metadata = {}
79
81
  shared_contexts_json = []
80
82
  for shared_context in shared_contexts:
81
- src_json = shared_context.to_json() if isinstance(shared_context, entities.ComputeContext) else shared_context
83
+ src_json = shared_context.to_json() if isinstance(shared_context,
84
+ entities.ComputeContext) else shared_context
82
85
  shared_contexts_json.append(src_json)
83
86
  payload = {
84
87
  'name': name,
@@ -90,7 +93,8 @@ class Computes:
90
93
  'cluster': cluster.to_json(),
91
94
  'status': status,
92
95
  "settings": settings.to_json() if isinstance(settings, entities.ComputeSettings) else settings,
93
- "metadata": metadata
96
+ "metadata": metadata,
97
+ "deploymentConfiguration": deployment_configuration
94
98
  }
95
99
 
96
100
  # request
@@ -171,9 +175,10 @@ class Computes:
171
175
  if compute_id not in self.log_cache:
172
176
  self.log_cache[compute_id] = {}
173
177
  self.log_cache[compute_id]['validation'] = validation_logs
178
+
174
179
  return func
175
180
 
176
- def get(self, compute_id: str, archived = False):
181
+ def get(self, compute_id: str, archived=False):
177
182
  """
178
183
  Get a compute
179
184
 
@@ -183,7 +188,7 @@ class Computes:
183
188
  :rtype: dl.entities.compute.Compute
184
189
  """
185
190
  url_path = self._base_url + '/{}'.format(compute_id)
186
- params_to_add = {"archived": "true" if archived else "false" }
191
+ params_to_add = {"archived": "true" if archived else "false"}
187
192
  parsed_url = urlparse(url_path)
188
193
  query_dict = parse_qs(parsed_url.query)
189
194
  query_dict.update(params_to_add)
@@ -234,7 +239,7 @@ class Computes:
234
239
  :param bool wait: Wait for deletion
235
240
  """
236
241
  url_path = self._base_url + '/{}'.format(compute_id)
237
- params_to_add = {"skipDestroy": "true" if skip_destroy else "false" }
242
+ params_to_add = {"skipDestroy": "true" if skip_destroy else "false"}
238
243
  parsed_url = urlparse(url_path)
239
244
  query_dict = parse_qs(parsed_url.query)
240
245
  query_dict.update(params_to_add)
@@ -315,7 +320,6 @@ class Computes:
315
320
  if not success:
316
321
  raise exceptions.PlatformException(response)
317
322
 
318
-
319
323
  return response.json()
320
324
 
321
325
  @staticmethod
@@ -346,7 +350,7 @@ class Computes:
346
350
  }
347
351
  )
348
352
 
349
- def setup_compute_cluster(self, config, integration, org_id, project=None):
353
+ def setup_compute_cluster(self, config, integration, org_id, project=None, is_global=False):
350
354
  """Set up a compute cluster using the provided configuration and integration."""
351
355
  cluster = ComputeCluster.from_setup_json(config, integration)
352
356
  project_id = None
@@ -360,11 +364,12 @@ class Computes:
360
364
  ComputeType.KUBERNETES,
361
365
  status=config['config'].get('status', None),
362
366
  settings=config['config'].get('settings', None),
363
- metadata=config['config'].get('metadata', None))
367
+ deployment_configuration=config['config'].get('deploymentConfiguration', {}),
368
+ metadata=config['config'].get('metadata', None), is_global=is_global)
364
369
 
365
370
  return compute
366
371
 
367
- def create_from_config_file(self, config_file_path, org_id, project_name: Optional[str] = None):
372
+ def create_from_config_file(self, config_file_path, org_id, project_name: Optional[str] = None, is_global=False):
368
373
  config = self.decode_and_parse_input(config_file_path)
369
374
  project = None
370
375
  if project_name is not None:
@@ -373,10 +378,9 @@ class Computes:
373
378
  integration_name = ('cluster_integration_test_' + datetime.datetime.now().isoformat().split('.')[0]
374
379
  .replace(':', '_'))
375
380
  integration = self.create_integration(org, integration_name, config['authentication'])
376
- compute = self.setup_compute_cluster(config, integration, org_id, project)
381
+ compute = self.setup_compute_cluster(config, integration, org_id, project, is_global=is_global)
377
382
  return compute
378
383
 
379
-
380
384
  def _list(self, filters: entities.Filters):
381
385
  url = self._base_url + '/query'
382
386
  success, response = self._client_api.gen_request(req_type='POST',
@@ -432,4 +436,4 @@ class Computes:
432
436
  page_size=filters.page_size,
433
437
  client_api=self._client_api)
434
438
  paged.get_page()
435
- return paged
439
+ return paged
@@ -8,14 +8,17 @@ import time
8
8
  import copy
9
9
  import tqdm
10
10
  import logging
11
+ import zipfile
11
12
  import json
12
- from typing import Union
13
+ from typing import Union, Generator, Optional
13
14
 
14
15
  from .. import entities, repositories, miscellaneous, exceptions, services, PlatformException, _api_reference
15
16
  from ..services.api_client import ApiClient
17
+ from ..entities.dataset import OutputExportType, ExportType
16
18
 
17
19
  logger = logging.getLogger(name='dtlpy')
18
20
 
21
+ MAX_ITEMS_PER_SUBSET = 50000
19
22
 
20
23
  class Datasets:
21
24
  """
@@ -155,8 +158,7 @@ class Datasets:
155
158
  payload['annotations'] = {"include": include_annotations, "convertSemantic": False}
156
159
 
157
160
  if annotation_filters is not None:
158
- payload['annotationsQuery'] = annotation_filters.prepare()['filter']
159
- payload['annotations']['filter'] = True
161
+ payload['annotationsQuery'] = annotation_filters.prepare()
160
162
 
161
163
  if dataset_lock:
162
164
  payload['datasetLock'] = dataset_lock
@@ -166,29 +168,37 @@ class Datasets:
166
168
 
167
169
  if lock_timeout_sec:
168
170
  payload['lockTimeoutSec'] = lock_timeout_sec
169
-
171
+
170
172
  return payload
171
173
 
172
- def _download_exported_item(self, item_id, export_type, local_path=None):
174
+ def _download_exported_item(self, item_id, export_type, local_path=None, unzip=True):
175
+ logger.debug(f"start downloading exported item {item_id} with export_type {export_type} and local_path {local_path} and unzip {unzip}")
173
176
  export_item = repositories.Items(client_api=self._client_api).get(item_id=item_id)
174
- export_item_path = export_item.download(local_path=local_path)
177
+ export_item_path = export_item.download(local_path=local_path)
175
178
 
176
- if export_type == entities.ExportType.ZIP:
177
- # unzipping annotations to directory
178
- if isinstance(export_item_path, list) or not os.path.isfile(export_item_path):
179
- raise exceptions.PlatformException(
180
- error='404',
181
- message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
182
- export_item.id))
179
+ # Common validation check for both JSON and other export types
180
+ if isinstance(export_item_path, list) or not os.path.isfile(export_item_path):
181
+ raise exceptions.PlatformException(
182
+ error='404',
183
+ message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
184
+ export_item.id))
185
+
186
+ result = None
187
+ if unzip is False or export_type == entities.ExportType.JSON:
188
+ result = export_item_path
189
+ else:
183
190
  try:
184
191
  miscellaneous.Zipping.unzip_directory(zip_filename=export_item_path,
185
- to_directory=local_path)
192
+ to_directory=local_path)
193
+ result = local_path
186
194
  except Exception as e:
187
195
  logger.warning("Failed to extract zip file error: {}".format(e))
188
196
  finally:
189
- # cleanup
197
+ # cleanup only for zip files to avoid removing needed results
190
198
  if isinstance(export_item_path, str) and os.path.isfile(export_item_path):
191
199
  os.remove(export_item_path)
200
+ logger.debug(f"end downloading, result {result}")
201
+ return result
192
202
 
193
203
  @property
194
204
  def platform_url(self):
@@ -480,7 +490,7 @@ class Datasets:
480
490
  return dataset
481
491
  else:
482
492
  raise exceptions.PlatformException(response)
483
-
493
+
484
494
  @_api_reference.add(path='/datasets/{id}/unlock', method='patch')
485
495
  def unlock(self, dataset: entities.Dataset ) -> entities.Dataset:
486
496
  """
@@ -625,22 +635,137 @@ class Datasets:
625
635
  .format(response))
626
636
  return self.get(dataset_id=command.spec['returnedModelId'])
627
637
 
638
+ def _export_recursive(
639
+ self,
640
+ dataset: entities.Dataset = None,
641
+ dataset_name: str = None,
642
+ dataset_id: str = None,
643
+ local_path: str = None,
644
+ filters: Union[dict, entities.Filters] = None,
645
+ annotation_filters: entities.Filters = None,
646
+ feature_vector_filters: entities.Filters = None,
647
+ include_feature_vectors: bool = False,
648
+ include_annotations: bool = False,
649
+ timeout: int = 0,
650
+ dataset_lock: bool = False,
651
+ lock_timeout_sec: int = None,
652
+ export_summary: bool = False,
653
+ max_items_per_subset: int = MAX_ITEMS_PER_SUBSET,
654
+ export_type: ExportType = ExportType.JSON,
655
+ output_export_type: OutputExportType = OutputExportType.JSON,
656
+ ) -> Generator[str, None, None]:
657
+ """
658
+ Export dataset items recursively by splitting large datasets into smaller subsets.
659
+
660
+ Args:
661
+ dataset (entities.Dataset, optional): Dataset entity to export
662
+ dataset_name (str, optional): Name of the dataset to export
663
+ dataset_id (str, optional): ID of the dataset to export
664
+ local_path (str, optional): Local path to save the exported data
665
+ filters (Union[dict, entities.Filters], optional): Filters to apply on the items
666
+ annotation_filters (entities.Filters, optional): Filters to apply on the annotations
667
+ feature_vector_filters (entities.Filters, optional): Filters to apply on the feature vectors
668
+ include_feature_vectors (bool, optional): Whether to include feature vectors in export. Defaults to False
669
+ include_annotations (bool, optional): Whether to include annotations in export. Defaults to False
670
+ timeout (int, optional): Timeout in seconds for the export operation. Defaults to 0
671
+ dataset_lock (bool, optional): Whether to lock the dataset during export. Defaults to False
672
+ lock_timeout_sec (int, optional): Timeout for dataset lock in seconds. Defaults to None
673
+ export_summary (bool, optional): Whether to include export summary. Defaults to False
674
+ max_items_per_subset (int, optional): Maximum items per subset for recursive export. Defaults to MAX_ITEMS_PER_SUBSET
675
+ export_type (ExportType, optional): Type of export (JSON or ZIP). Defaults to ExportType.JSON
676
+ output_export_type (OutputExportType, optional): Output format type. Defaults to OutputExportType.JSON
677
+
678
+ Returns:
679
+ Generator[str, None, None]: Generator yielding export paths
680
+
681
+ Raises:
682
+ NotImplementedError: If ZIP export type is used with JSON output type
683
+ exceptions.PlatformException: If API request fails or command response is invalid
684
+ """
685
+ logger.debug(f"exporting dataset with export_type {export_type} and output_export_type {output_export_type}")
686
+ if export_type == ExportType.ZIP and output_export_type == OutputExportType.JSON:
687
+ raise NotImplementedError(
688
+ "Zip export type is not supported for JSON output type.\n"
689
+ "If Json output is required, please use the export_type = JSON"
690
+ )
691
+
692
+ # Get dataset entity for recursive filtering
693
+ dataset_entity = self.get(dataset_id=self._resolve_dataset_id(dataset, dataset_name, dataset_id))
694
+ if export_type != ExportType.JSON:
695
+ filters_list = [filters]
696
+ else:
697
+ # Generate filter subsets using recursive_get_filters
698
+ filters_list = entities.Filters._get_split_filters(
699
+ dataset=dataset_entity, filters=filters, max_items=max_items_per_subset
700
+ )
701
+ # First loop: Make all API requests without waiting
702
+ commands = []
703
+ logger.debug("start making all API requests without waiting")
704
+ for filter_i in filters_list:
705
+ # Build payload for this subset
706
+ payload = self._build_payload(
707
+ filters=filter_i,
708
+ include_feature_vectors=include_feature_vectors,
709
+ include_annotations=include_annotations,
710
+ export_type=export_type,
711
+ annotation_filters=annotation_filters,
712
+ feature_vector_filters=feature_vector_filters,
713
+ dataset_lock=dataset_lock,
714
+ lock_timeout_sec=lock_timeout_sec,
715
+ export_summary=export_summary,
716
+ )
717
+
718
+ # Make API request for this subset
719
+ success, response = self._client_api.gen_request(
720
+ req_type='post', path=f'/datasets/{dataset_entity.id}/export', json_req=payload
721
+ )
722
+
723
+ if not success:
724
+ logger.error(f"failed to make API request /datasets/{dataset_entity.id}/export with payload {payload} response {response}")
725
+ raise exceptions.PlatformException(response)
726
+
727
+ # Handle command execution
728
+ commands.append( entities.Command.from_json(_json=response.json(), client_api=self._client_api))
729
+
730
+ time.sleep(2) # as the command have wrong progress in the beginning
731
+ logger.debug("start waiting for all commands")
732
+ # Second loop: Wait for all commands and process results
733
+ for command in commands:
734
+ command = command.wait(timeout=timeout)
735
+
736
+ if 'outputItemId' not in command.spec:
737
+ raise exceptions.PlatformException(
738
+ error='400', message="outputItemId key is missing in command response"
739
+ )
740
+
741
+ item_id = command.spec['outputItemId']
742
+ # Download and process the exported item
743
+ yield self._download_exported_item(
744
+ item_id=item_id,
745
+ export_type=export_type,
746
+ local_path=local_path,
747
+ unzip=output_export_type != OutputExportType.ZIP,
748
+ )
749
+
628
750
  @_api_reference.add(path='/datasets/{id}/export', method='post')
629
- def export(self,
630
- dataset: entities.Dataset = None,
631
- dataset_name: str = None,
632
- dataset_id: str = None,
633
- local_path: str = None,
634
- filters: Union[dict, entities.Filters] = None,
635
- annotation_filters: entities.Filters = None,
636
- feature_vector_filters: entities.Filters = None,
637
- include_feature_vectors: bool = False,
638
- include_annotations: bool = False,
639
- export_type: entities.ExportType = entities.ExportType.JSON,
640
- timeout: int = 0,
641
- dataset_lock: bool = False,
642
- lock_timeout_sec: int = None,
643
- export_summary: bool = False):
751
+ def export(
752
+ self,
753
+ dataset: entities.Dataset = None,
754
+ dataset_name: str = None,
755
+ dataset_id: str = None,
756
+ local_path: str = None,
757
+ filters: Union[dict, entities.Filters] = None,
758
+ annotation_filters: entities.Filters = None,
759
+ feature_vector_filters: entities.Filters = None,
760
+ include_feature_vectors: bool = False,
761
+ include_annotations: bool = False,
762
+ export_type: ExportType = ExportType.JSON,
763
+ timeout: int = 0,
764
+ dataset_lock: bool = False,
765
+ lock_timeout_sec: int = None,
766
+ export_summary: bool = False,
767
+ output_export_type: OutputExportType = None,
768
+ ) -> Optional[str]:
644
769
  """
645
770
  Export dataset items and annotations.
646
771
 
@@ -648,12 +773,55 @@ class Datasets:
648
773
 
649
774
  You must provide at least ONE of the following params: dataset, dataset_name, dataset_id.
650
775
 
776
+ **Export Behavior by Parameter Combination:**
777
+
778
+ The behavior of this method depends on the combination of `export_type` and `output_export_type`:
779
+
780
+ **When export_type = ExportType.JSON:**
781
+
782
+ - **output_export_type = OutputExportType.JSON (default when None):**
783
+ - Exports data in JSON format, split into subsets of max 500 items
784
+ - Downloads all subset JSON files and concatenates them into a single `result.json` file
785
+ - Returns the path to the concatenated JSON file
786
+ - Cleans up individual subset files after concatenation
787
+
788
+ - **output_export_type = OutputExportType.ZIP:**
789
+ - Same as JSON export, but zips the final `result.json` file
790
+ - Returns the path to the zipped file (`result.json.zip`)
791
+ - Cleans up the unzipped JSON file after zipping
792
+
793
+ - **output_export_type = OutputExportType.FOLDERS:**
794
+ - Exports data in JSON format, split into subsets of max 500 items
795
+ - Downloads all subset JSON files and creates individual JSON files for each item
796
+ - Creates a folder structure mirroring the remote dataset structure
797
+ - Returns the path to the base directory containing the folder structure
798
+ - Each item gets its own JSON file named after the original filename
799
+
800
+ **When export_type = ExportType.ZIP:**
801
+
802
+ - **output_export_type = OutputExportType.ZIP:**
803
+ - Exports data as a ZIP file containing the dataset
804
+ - Returns the downloaded ZIP item directly
805
+ - No additional processing or concatenation
806
+
807
+ - **output_export_type = OutputExportType.JSON:**
808
+ - **NOT SUPPORTED** - Raises NotImplementedError
809
+ - Use export_type=ExportType.JSON instead for JSON output
810
+
811
+ - **output_export_type = OutputExportType.FOLDERS:**
812
+ - **NOT SUPPORTED** - Raises NotImplementedError
813
+ - Use export_type=ExportType.JSON instead for folder output
814
+
815
+ **When output_export_type = None (legacy behavior):**
816
+ - Defaults to OutputExportType.JSON
817
+ - Maintains backward compatibility with existing code
818
+
651
819
  :param dtlpy.entities.dataset.Dataset dataset: Dataset object
652
820
  :param str dataset_name: The name of the dataset
653
821
  :param str dataset_id: The ID of the dataset
654
- :param str local_path: Local path to save the exported dataset
822
+ :param str local_path: Local path to save the exported dataset
655
823
  :param Union[dict, dtlpy.entities.filters.Filters] filters: Filters entity or a query dictionary
656
- :param dtlpy.entities.filters.Filters annotation_filters: Filters entity to filter annotations for export
824
+ :param dtlpy.entities.filters.Filters annotation_filters: Filters entity to filter annotations for export
657
825
  :param dtlpy.entities.filters.Filters feature_vector_filters: Filters entity to filter feature vectors for export
658
826
  :param bool include_feature_vectors: Include item feature vectors in the export
659
827
  :param bool include_annotations: Include item annotations in the export
@@ -661,45 +829,92 @@ class Datasets:
661
829
  :param bool export_summary: Get Summary of the dataset export
662
830
  :param int lock_timeout_sec: Timeout for locking the dataset during export in seconds
663
831
  :param entities.ExportType export_type: Type of export ('json' or 'zip')
832
+ :param entities.OutputExportType output_export_type: Output format ('json', 'zip', or 'folders'). If None, defaults to 'json'
664
833
  :param int timeout: Maximum time in seconds to wait for the export to complete
665
- :return: Exported item
666
- :rtype: dtlpy.entities.item.Item
667
-
668
- **Example**:
669
-
670
- .. code-block:: python
671
-
672
- export_item = project.datasets.export(dataset_id='dataset_id',
673
- filters=filters,
674
- include_feature_vectors=True,
675
- include_annotations=True,
676
- export_type=dl.ExportType.JSON,
677
- dataset_lock=True,
678
- lock_timeout_sec=300,
679
- export_summary=False)
834
+ :return: Path to exported file/directory, or None if export result is empty
835
+ :rtype: Optional[str]
680
836
  """
681
- dataset_id = self._resolve_dataset_id(dataset, dataset_name, dataset_id)
682
- payload = self._build_payload(filters, include_feature_vectors, include_annotations,
683
- export_type, annotation_filters, feature_vector_filters,
684
- dataset_lock, lock_timeout_sec, export_summary)
685
-
686
- success, response = self._client_api.gen_request(req_type='post', path=f'/datasets/{dataset_id}/export',
687
- json_req=payload)
688
- if not success:
689
- raise exceptions.PlatformException(response)
690
-
691
- command = entities.Command.from_json(_json=response.json(),
692
- client_api=self._client_api)
693
-
694
- time.sleep(2) # as the command have wrong progress in the beginning
695
- command = command.wait(timeout=timeout)
696
- if 'outputItemId' not in command.spec:
697
- raise exceptions.PlatformException(
698
- error='400',
699
- message="outputItemId key is missing in command response: {}".format(response))
700
- item_id = command.spec['outputItemId']
701
- self._download_exported_item(item_id=item_id, export_type=export_type, local_path=local_path)
702
- return local_path
837
+ export_result = list(
838
+ self._export_recursive(
839
+ dataset=dataset,
840
+ dataset_name=dataset_name,
841
+ dataset_id=dataset_id,
842
+ local_path=local_path,
843
+ filters=filters,
844
+ annotation_filters=annotation_filters,
845
+ feature_vector_filters=feature_vector_filters,
846
+ include_feature_vectors=include_feature_vectors,
847
+ include_annotations=include_annotations,
848
+ timeout=timeout,
849
+ dataset_lock=dataset_lock,
850
+ lock_timeout_sec=lock_timeout_sec,
851
+ export_summary=export_summary,
852
+ export_type=export_type,
853
+ output_export_type=output_export_type,
854
+ )
855
+ )
856
+ if all(x is None for x in export_result):
857
+ logger.error("export result is empty")
858
+ return None
859
+
860
+ if export_type == ExportType.ZIP:
861
+ # if export type is zip, then return the _export_recursive result as it
862
+ return export_result[0]
863
+
864
+ # if user didn't provide output_export_type, keep the previous behavior
865
+ if output_export_type is None:
866
+ output_export_type = OutputExportType.JSON
867
+
868
+ # export type is jsos :
869
+ # Load all items from subset JSON files and clean them up
870
+ all_items = []
871
+ logger.debug("start loading all items from subset JSON files")
872
+ for json_file in export_result:
873
+ if json_file is None:
874
+ continue
875
+ if os.path.isfile(json_file):
876
+ with open(json_file, 'r') as f:
877
+ items = json.load(f)
878
+ if isinstance(items, list):
879
+ all_items.extend(items)
880
+ os.remove(json_file)
881
+
882
+ base_dir = os.path.dirname(export_result[0])
883
+ if output_export_type != OutputExportType.FOLDERS:
884
+ dataset_id=self._resolve_dataset_id(dataset, dataset_name, dataset_id)
885
+ result_file_name = f"{dataset_id}.json"
886
+ result_file = os.path.join(base_dir, result_file_name)
887
+ logger.debug(f"start writing all items to result file {result_file}")
888
+ with open(result_file, 'w') as f:
889
+ json.dump(all_items, f)
890
+ if output_export_type == OutputExportType.ZIP:
891
+ # Zip the result file
892
+ zip_filename = result_file + '.zip'
893
+ # Create zip file
894
+ logger.debug(f"start zipping result file {zip_filename}")
895
+ with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zf:
896
+ zf.write(result_file, arcname=os.path.basename(result_file))
897
+
898
+ # Remove original json after zipping
899
+ os.remove(result_file)
900
+ result_file = zip_filename
901
+ return result_file
902
+ logger.debug("start building per-item JSON files under local_path mirroring remote structure")
903
+ # Build per-item JSON files under local_path mirroring remote structure
904
+ for item in all_items:
905
+ rel_json_path = os.path.splitext(item.get('filename'))[0] + '.json'
906
+ # Remove leading slash to make it a relative path
907
+ if rel_json_path.startswith('/'):
908
+ rel_json_path = rel_json_path[1:]
909
+ out_path = os.path.join(base_dir, rel_json_path)
910
+ os.makedirs(os.path.dirname(out_path), exist_ok=True)
911
+ try:
912
+ with open(out_path, 'w') as outf:
913
+ json.dump(item, outf)
914
+ except Exception:
915
+ logger.exception(f'Failed writing export item JSON to {out_path}')
916
+ logger.debug("end building per-item JSON files under local_path mirroring remote structure")
917
+ return base_dir
703
918
 
704
919
  @_api_reference.add(path='/datasets/merge', method='post')
705
920
  def merge(self,
@@ -1185,7 +1400,6 @@ class Datasets:
1185
1400
  import warnings
1186
1401
  warnings.warn("`readonly` flag on dataset is deprecated, doing nothing.", DeprecationWarning)
1187
1402
 
1188
-
1189
1403
  @_api_reference.add(path='/datasets/{id}/split', method='post')
1190
1404
  def split_ml_subsets(self,
1191
1405
  dataset_id: str,
@@ -1201,10 +1415,10 @@ class Datasets:
1201
1415
  :rtype: bool
1202
1416
  :raises: PlatformException on failure and ValueError if percentages do not sum to 100 or invalid keys/values.
1203
1417
  """
1204
- # Validate percentages
1418
+ # Validate percentages
1205
1419
  if not ml_split_list:
1206
1420
  ml_split_list = {'train': 80, 'validation': 10, 'test': 10}
1207
-
1421
+
1208
1422
  if not items_query:
1209
1423
  items_query = entities.Filters()
1210
1424
 
@@ -1238,7 +1452,6 @@ class Datasets:
1238
1452
  else:
1239
1453
  raise exceptions.PlatformException(response)
1240
1454
 
1241
-
1242
1455
  @_api_reference.add(path='/datasets/{id}/items/bulk-update-metadata', method='post')
1243
1456
  def bulk_update_ml_subset(self, dataset_id: str, items_query: dict, subset: str = None, deleteTag: bool = False) -> bool:
1244
1457
  """
@@ -49,7 +49,8 @@ class Downloader:
49
49
  export_version=entities.ExportVersion.V1,
50
50
  dataset_lock=False,
51
51
  lock_timeout_sec=None,
52
- export_summary=False
52
+ export_summary=False,
53
+ raise_on_error=False
53
54
  ):
54
55
  """
55
56
  Download dataset by filters.
@@ -78,6 +79,7 @@ class Downloader:
78
79
  :param bool dataset_lock: optional - default = False
79
80
  :param bool export_summary: optional - default = False
80
81
  :param int lock_timeout_sec: optional
82
+ :param bool raise_on_error: raise an exception if an error occurs
81
83
  :return: Output (list)
82
84
  """
83
85
 
@@ -313,8 +315,24 @@ class Downloader:
313
315
  # log error
314
316
  if n_error > 0:
315
317
  log_filepath = reporter.generate_log_files()
318
+ # Get up to 5 error examples for the exception message
319
+ error_text = ""
320
+ error_counter = 0
321
+ if reporter._errors:
322
+ for _id, error in reporter._errors.items():
323
+ error_counter += 1
324
+ error_text += f"Item ID: {_id}, Error: {error} | "
325
+ if error_counter >= 5:
326
+ break
327
+ error_message = f"Errors in {n_error} files. Errors: {error_text}"
316
328
  if log_filepath is not None:
317
- logger.warning("Errors in {} files. See {} for full log".format(n_error, log_filepath))
329
+ error_message += f", see {log_filepath} for full log"
330
+ if raise_on_error is True:
331
+ raise PlatformException(
332
+ error="400", message=error_message
333
+ )
334
+ else:
335
+ logger.warning(error_message)
318
336
  if int(n_download) <= 1 and int(n_exist) <= 1:
319
337
  try:
320
338
  return next(reporter.output)
@@ -428,7 +446,7 @@ class Downloader:
428
446
 
429
447
  if export_summary:
430
448
  payload['summary'] = export_summary
431
-
449
+
432
450
  if lock_timeout_sec:
433
451
  payload['lockTimeoutSec'] = lock_timeout_sec
434
452
 
@@ -753,6 +771,7 @@ class Downloader:
753
771
  if response_output != local_filepath:
754
772
  source_path = os.path.normpath(response_output)
755
773
  shutil.copyfile(source_path, local_filepath)
774
+ download_done = True
756
775
  else:
757
776
  try:
758
777
  temp_file_path = local_filepath + '.download'
@@ -806,6 +825,7 @@ class Downloader:
806
825
  source_file = response_output
807
826
  with open(source_file, 'wb') as f:
808
827
  data = f.read()
828
+ download_done = True
809
829
  else:
810
830
  try:
811
831
  for chunk in response.iter_content(chunk_size=chunk_size):