PyPI - dtlpy - Versions diffs - 1.114.17__py3-none-any.whl → 1.115.44__py3-none-any.whl - Mend

dtlpy 1.114.17py3-none-any.whl → 1.115.44py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

dtlpy/__init__.py +1 -1
dtlpy/__version__.py +1 -1
dtlpy/entities/__init__.py +1 -1
dtlpy/entities/analytic.py +42 -6
dtlpy/entities/codebase.py +1 -5
dtlpy/entities/compute.py +12 -5
dtlpy/entities/dataset.py +19 -5
dtlpy/entities/driver.py +14 -2
dtlpy/entities/filters.py +156 -3
dtlpy/entities/item.py +9 -3
dtlpy/entities/prompt_item.py +7 -1
dtlpy/entities/service.py +5 -0
dtlpy/ml/base_model_adapter.py +407 -263
dtlpy/repositories/commands.py +1 -7
dtlpy/repositories/computes.py +17 -13
dtlpy/repositories/datasets.py +287 -74
dtlpy/repositories/downloader.py +23 -3
dtlpy/repositories/drivers.py +12 -0
dtlpy/repositories/executions.py +1 -3
dtlpy/repositories/features.py +31 -14
dtlpy/repositories/items.py +5 -2
dtlpy/repositories/models.py +16 -4
dtlpy/repositories/uploader.py +22 -12
dtlpy/services/api_client.py +6 -3
dtlpy/services/reporter.py +1 -1
{dtlpy-1.114.17.dist-info → dtlpy-1.115.44.dist-info}/METADATA +15 -12
{dtlpy-1.114.17.dist-info → dtlpy-1.115.44.dist-info}/RECORD +34 -34
{dtlpy-1.114.17.data → dtlpy-1.115.44.data}/scripts/dlp +0 -0
{dtlpy-1.114.17.data → dtlpy-1.115.44.data}/scripts/dlp.bat +0 -0
{dtlpy-1.114.17.data → dtlpy-1.115.44.data}/scripts/dlp.py +0 -0
{dtlpy-1.114.17.dist-info → dtlpy-1.115.44.dist-info}/WHEEL +0 -0
{dtlpy-1.114.17.dist-info → dtlpy-1.115.44.dist-info}/entry_points.txt +0 -0
{dtlpy-1.114.17.dist-info → dtlpy-1.115.44.dist-info}/licenses/LICENSE +0 -0
{dtlpy-1.114.17.dist-info → dtlpy-1.115.44.dist-info}/top_level.txt +0 -0

dtlpy/repositories/commands.py CHANGED Viewed

@@ -113,13 +113,7 @@ class Commands:
             elapsed = time.time() - start
             sleep_time = np.min([timeout - elapsed, backoff_factor * (2 ** num_tries), MAX_SLEEP_TIME])
             num_tries += 1
-            logger.debug(
-                "Command {!r} is running for {:.2f}[s] and now Going to sleep {:.2f}[s]".format(
-                    command.id,
-                    elapsed,
-                    sleep_time
-                )
-            )
+            logger.debug(f"Command {command.id} is running for {elapsed:.2f}[s]. Sleeping for {sleep_time:.2f}[s]")
             if iteration_callback is not None:
                 try:
                     iteration_callback()

dtlpy/repositories/computes.py CHANGED Viewed

@@ -55,7 +55,8 @@ class Computes:
             wait=True,
             status: entities.ComputeStatus = None,
             settings: entities.ComputeSettings = None,
-            metadata: dict = None
+            metadata: dict = None,
+            deployment_configuration: dict = None
     ):
         """
         Create a new compute
@@ -71,6 +72,7 @@ class Computes:
         :param status: Compute status
         :param settings: Compute settings
         :param metadata: Compute metadata
+        :param deployment_configuration: Compute deployment Configuration
         :return: Compute
         :rtype: dl.entities.compute.Compute
         """
@@ -78,7 +80,8 @@ class Computes:
             metadata = {}
         shared_contexts_json = []
         for shared_context in shared_contexts:
-            src_json = shared_context.to_json() if isinstance(shared_context, entities.ComputeContext) else shared_context
+            src_json = shared_context.to_json() if isinstance(shared_context,
+                                                              entities.ComputeContext) else shared_context
             shared_contexts_json.append(src_json)
         payload = {
             'name': name,
@@ -90,7 +93,8 @@ class Computes:
             'cluster': cluster.to_json(),
             'status': status,
             "settings": settings.to_json() if isinstance(settings, entities.ComputeSettings) else settings,
-            "metadata": metadata
+            "metadata": metadata,
+            "deploymentConfiguration": deployment_configuration
         }
         # request
@@ -171,9 +175,10 @@ class Computes:
                     if compute_id not in self.log_cache:
                         self.log_cache[compute_id] = {}
                     self.log_cache[compute_id]['validation'] = validation_logs
         return func
-    def get(self, compute_id: str, archived = False):
+    def get(self, compute_id: str, archived=False):
         """
         Get a compute
@@ -183,7 +188,7 @@ class Computes:
         :rtype: dl.entities.compute.Compute
         """
         url_path = self._base_url + '/{}'.format(compute_id)
-        params_to_add = {"archived": "true" if archived else "false" }
+        params_to_add = {"archived": "true" if archived else "false"}
         parsed_url = urlparse(url_path)
         query_dict = parse_qs(parsed_url.query)
         query_dict.update(params_to_add)
@@ -234,7 +239,7 @@ class Computes:
         :param bool wait: Wait for deletion
         """
         url_path = self._base_url + '/{}'.format(compute_id)
-        params_to_add = {"skipDestroy": "true" if skip_destroy else "false" }
+        params_to_add = {"skipDestroy": "true" if skip_destroy else "false"}
         parsed_url = urlparse(url_path)
         query_dict = parse_qs(parsed_url.query)
         query_dict.update(params_to_add)
@@ -315,7 +320,6 @@ class Computes:
         if not success:
             raise exceptions.PlatformException(response)
         return response.json()
     @staticmethod
@@ -346,7 +350,7 @@ class Computes:
             }
         )
-    def setup_compute_cluster(self, config, integration, org_id, project=None):
+    def setup_compute_cluster(self, config, integration, org_id, project=None, is_global=False):
         """Set up a compute cluster using the provided configuration and integration."""
         cluster = ComputeCluster.from_setup_json(config, integration)
         project_id = None
@@ -360,11 +364,12 @@ class Computes:
             ComputeType.KUBERNETES,
             status=config['config'].get('status', None),
             settings=config['config'].get('settings', None),
-            metadata=config['config'].get('metadata', None))
+            deployment_configuration=config['config'].get('deploymentConfiguration', {}),
+            metadata=config['config'].get('metadata', None), is_global=is_global)
         return compute
-    def create_from_config_file(self, config_file_path, org_id, project_name: Optional[str] = None):
+    def create_from_config_file(self, config_file_path, org_id, project_name: Optional[str] = None, is_global=False):
         config = self.decode_and_parse_input(config_file_path)
         project = None
         if project_name is not None:
@@ -373,10 +378,9 @@ class Computes:
         integration_name = ('cluster_integration_test_' + datetime.datetime.now().isoformat().split('.')[0]
                             .replace(':', '_'))
         integration = self.create_integration(org, integration_name, config['authentication'])
-        compute = self.setup_compute_cluster(config, integration, org_id, project)
+        compute = self.setup_compute_cluster(config, integration, org_id, project, is_global=is_global)
         return compute
     def _list(self, filters: entities.Filters):
         url = self._base_url + '/query'
         success, response = self._client_api.gen_request(req_type='POST',
@@ -432,4 +436,4 @@ class Computes:
                                        page_size=filters.page_size,
                                        client_api=self._client_api)
         paged.get_page()
-        return paged
+        return paged

dtlpy/repositories/datasets.py CHANGED Viewed

@@ -8,14 +8,17 @@ import time
 import copy
 import tqdm
 import logging
+import zipfile
 import json
-from typing import Union
+from typing import Union, Generator, Optional
 from .. import entities, repositories, miscellaneous, exceptions, services, PlatformException, _api_reference
 from ..services.api_client import ApiClient
+from ..entities.dataset import OutputExportType, ExportType
 logger = logging.getLogger(name='dtlpy')
+MAX_ITEMS_PER_SUBSET = 50000
 class Datasets:
     """
@@ -155,8 +158,7 @@ class Datasets:
         payload['annotations'] = {"include": include_annotations, "convertSemantic": False}
         if annotation_filters is not None:
-            payload['annotationsQuery'] = annotation_filters.prepare()['filter']
-            payload['annotations']['filter'] = True
+            payload['annotationsQuery'] = annotation_filters.prepare()
         if dataset_lock:
             payload['datasetLock'] = dataset_lock
@@ -166,29 +168,37 @@ class Datasets:
         if lock_timeout_sec:
             payload['lockTimeoutSec'] = lock_timeout_sec
         return payload
-    def _download_exported_item(self, item_id, export_type, local_path=None):
+    def _download_exported_item(self, item_id, export_type, local_path=None, unzip=True):
+        logger.debug(f"start downloading exported item {item_id} with export_type {export_type} and local_path {local_path} and unzip {unzip}")
         export_item = repositories.Items(client_api=self._client_api).get(item_id=item_id)
-        export_item_path = export_item.download(local_path=local_path)
+        export_item_path = export_item.download(local_path=local_path)
-        if export_type == entities.ExportType.ZIP:
-            # unzipping annotations to directory
-            if isinstance(export_item_path, list) or not os.path.isfile(export_item_path):
-                raise exceptions.PlatformException(
-                    error='404',
-                    message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
-                        export_item.id))
+        # Common validation check for both JSON and other export types
+        if isinstance(export_item_path, list) or not os.path.isfile(export_item_path):
+            raise exceptions.PlatformException(
+                error='404',
+                message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
+                    export_item.id))
+        result = None
+        if unzip is False or export_type == entities.ExportType.JSON:
+            result = export_item_path
+        else:
             try:
                 miscellaneous.Zipping.unzip_directory(zip_filename=export_item_path,
-                                                      to_directory=local_path)
+                                                        to_directory=local_path)
+                result = local_path
             except Exception as e:
                 logger.warning("Failed to extract zip file error: {}".format(e))
             finally:
-                # cleanup
+                # cleanup only for zip files to avoid removing needed results
                 if isinstance(export_item_path, str) and os.path.isfile(export_item_path):
                     os.remove(export_item_path)
+        logger.debug(f"end downloading, result {result}")
+        return result
     @property
     def platform_url(self):
@@ -480,7 +490,7 @@ class Datasets:
             return dataset
         else:
             raise exceptions.PlatformException(response)
     @_api_reference.add(path='/datasets/{id}/unlock', method='patch')
     def unlock(self, dataset: entities.Dataset ) -> entities.Dataset:
         """
@@ -625,22 +635,137 @@ class Datasets:
                                                .format(response))
         return self.get(dataset_id=command.spec['returnedModelId'])
+    def _export_recursive(
+        self,
+        dataset: entities.Dataset = None,
+        dataset_name: str = None,
+        dataset_id: str = None,
+        local_path: str = None,
+        filters: Union[dict, entities.Filters] = None,
+        annotation_filters: entities.Filters = None,
+        feature_vector_filters: entities.Filters = None,
+        include_feature_vectors: bool = False,
+        include_annotations: bool = False,
+        timeout: int = 0,
+        dataset_lock: bool = False,
+        lock_timeout_sec: int = None,
+        export_summary: bool = False,
+        max_items_per_subset: int = MAX_ITEMS_PER_SUBSET,
+        export_type: ExportType = ExportType.JSON,
+        output_export_type: OutputExportType = OutputExportType.JSON,
+    ) -> Generator[str, None, None]:
+        """
+        Export dataset items recursively by splitting large datasets into smaller subsets.
+        Args:
+            dataset (entities.Dataset, optional): Dataset entity to export
+            dataset_name (str, optional): Name of the dataset to export
+            dataset_id (str, optional): ID of the dataset to export
+            local_path (str, optional): Local path to save the exported data
+            filters (Union[dict, entities.Filters], optional): Filters to apply on the items
+            annotation_filters (entities.Filters, optional): Filters to apply on the annotations
+            feature_vector_filters (entities.Filters, optional): Filters to apply on the feature vectors
+            include_feature_vectors (bool, optional): Whether to include feature vectors in export. Defaults to False
+            include_annotations (bool, optional): Whether to include annotations in export. Defaults to False
+            timeout (int, optional): Timeout in seconds for the export operation. Defaults to 0
+            dataset_lock (bool, optional): Whether to lock the dataset during export. Defaults to False
+            lock_timeout_sec (int, optional): Timeout for dataset lock in seconds. Defaults to None
+            export_summary (bool, optional): Whether to include export summary. Defaults to False
+            max_items_per_subset (int, optional): Maximum items per subset for recursive export. Defaults to MAX_ITEMS_PER_SUBSET
+            export_type (ExportType, optional): Type of export (JSON or ZIP). Defaults to ExportType.JSON
+            output_export_type (OutputExportType, optional): Output format type. Defaults to OutputExportType.JSON
+        Returns:
+            Generator[str, None, None]: Generator yielding export paths
+        Raises:
+            NotImplementedError: If ZIP export type is used with JSON output type
+            exceptions.PlatformException: If API request fails or command response is invalid
+        """
+        logger.debug(f"exporting dataset with export_type {export_type} and output_export_type {output_export_type}")
+        if export_type == ExportType.ZIP and output_export_type == OutputExportType.JSON:
+            raise NotImplementedError(
+                "Zip export type is not supported for JSON output type.\n"
+                "If Json output is required, please use the export_type = JSON"
+            )
+        # Get dataset entity for recursive filtering
+        dataset_entity = self.get(dataset_id=self._resolve_dataset_id(dataset, dataset_name, dataset_id))
+        if export_type != ExportType.JSON:
+            filters_list = [filters]
+        else:
+            # Generate filter subsets using recursive_get_filters
+            filters_list = entities.Filters._get_split_filters(
+                dataset=dataset_entity, filters=filters, max_items=max_items_per_subset
+            )
+        # First loop: Make all API requests without waiting
+        commands = []
+        logger.debug("start making all API requests without waiting")
+        for filter_i in filters_list:
+            # Build payload for this subset
+            payload = self._build_payload(
+                filters=filter_i,
+                include_feature_vectors=include_feature_vectors,
+                include_annotations=include_annotations,
+                export_type=export_type,
+                annotation_filters=annotation_filters,
+                feature_vector_filters=feature_vector_filters,
+                dataset_lock=dataset_lock,
+                lock_timeout_sec=lock_timeout_sec,
+                export_summary=export_summary,
+            )
+            # Make API request for this subset
+            success, response = self._client_api.gen_request(
+                req_type='post', path=f'/datasets/{dataset_entity.id}/export', json_req=payload
+            )
+            if not success:
+                logger.error(f"failed to make API request /datasets/{dataset_entity.id}/export with payload {payload} response {response}")
+                raise exceptions.PlatformException(response)
+            # Handle command execution
+            commands.append( entities.Command.from_json(_json=response.json(), client_api=self._client_api))
+        time.sleep(2)  # as the command have wrong progress in the beginning
+        logger.debug("start waiting for all commands")
+        # Second loop: Wait for all commands and process results
+        for command in commands:
+            command = command.wait(timeout=timeout)
+            if 'outputItemId' not in command.spec:
+                raise exceptions.PlatformException(
+                    error='400', message="outputItemId key is missing in command response"
+                )
+            item_id = command.spec['outputItemId']
+            # Download and process the exported item
+            yield self._download_exported_item(
+                item_id=item_id,
+                export_type=export_type,
+                local_path=local_path,
+                unzip=output_export_type != OutputExportType.ZIP,
+            )
     @_api_reference.add(path='/datasets/{id}/export', method='post')
-    def export(self,
-               dataset: entities.Dataset = None,
-               dataset_name: str = None,
-               dataset_id: str = None,
-               local_path: str = None,
-               filters: Union[dict, entities.Filters] = None,
-               annotation_filters: entities.Filters = None,
-               feature_vector_filters: entities.Filters = None,
-               include_feature_vectors: bool = False,
-               include_annotations: bool = False,
-               export_type: entities.ExportType = entities.ExportType.JSON,
-               timeout: int = 0,
-               dataset_lock: bool = False,
-               lock_timeout_sec: int = None,
-               export_summary: bool = False):
+    def export(
+        self,
+        dataset: entities.Dataset = None,
+        dataset_name: str = None,
+        dataset_id: str = None,
+        local_path: str = None,
+        filters: Union[dict, entities.Filters] = None,
+        annotation_filters: entities.Filters = None,
+        feature_vector_filters: entities.Filters = None,
+        include_feature_vectors: bool = False,
+        include_annotations: bool = False,
+        export_type: ExportType = ExportType.JSON,
+        timeout: int = 0,
+        dataset_lock: bool = False,
+        lock_timeout_sec: int = None,
+        export_summary: bool = False,
+        output_export_type: OutputExportType = None,
+    ) -> Optional[str]:
         """
         Export dataset items and annotations.
@@ -648,12 +773,55 @@ class Datasets:
         You must provide at least ONE of the following params: dataset, dataset_name, dataset_id.
+        **Export Behavior by Parameter Combination:**
+        The behavior of this method depends on the combination of `export_type` and `output_export_type`:
+        **When export_type = ExportType.JSON:**
+        - **output_export_type = OutputExportType.JSON (default when None):**
+          - Exports data in JSON format, split into subsets of max 500 items
+          - Downloads all subset JSON files and concatenates them into a single `result.json` file
+          - Returns the path to the concatenated JSON file
+          - Cleans up individual subset files after concatenation
+        - **output_export_type = OutputExportType.ZIP:**
+          - Same as JSON export, but zips the final `result.json` file
+          - Returns the path to the zipped file (`result.json.zip`)
+          - Cleans up the unzipped JSON file after zipping
+        - **output_export_type = OutputExportType.FOLDERS:**
+          - Exports data in JSON format, split into subsets of max 500 items
+          - Downloads all subset JSON files and creates individual JSON files for each item
+          - Creates a folder structure mirroring the remote dataset structure
+          - Returns the path to the base directory containing the folder structure
+          - Each item gets its own JSON file named after the original filename
+        **When export_type = ExportType.ZIP:**
+        - **output_export_type = OutputExportType.ZIP:**
+          - Exports data as a ZIP file containing the dataset
+          - Returns the downloaded ZIP item directly
+          - No additional processing or concatenation
+        - **output_export_type = OutputExportType.JSON:**
+          - **NOT SUPPORTED** - Raises NotImplementedError
+          - Use export_type=ExportType.JSON instead for JSON output
+        - **output_export_type = OutputExportType.FOLDERS:**
+          - **NOT SUPPORTED** - Raises NotImplementedError
+          - Use export_type=ExportType.JSON instead for folder output
+        **When output_export_type = None (legacy behavior):**
+        - Defaults to OutputExportType.JSON
+        - Maintains backward compatibility with existing code
         :param dtlpy.entities.dataset.Dataset dataset: Dataset object
         :param str dataset_name: The name of the dataset
         :param str dataset_id: The ID of the dataset
-        :param str local_path: Local path to save the exported dataset
+        :param str local_path: Local path to save the exported dataset
         :param Union[dict, dtlpy.entities.filters.Filters] filters: Filters entity or a query dictionary
-        :param dtlpy.entities.filters.Filters annotation_filters: Filters entity to filter annotations for export
+        :param dtlpy.entities.filters.Filters annotation_filters: Filters entity to filter annotations for export
         :param dtlpy.entities.filters.Filters feature_vector_filters: Filters entity to filter feature vectors for export
         :param bool include_feature_vectors: Include item feature vectors in the export
         :param bool include_annotations: Include item annotations in the export
@@ -661,45 +829,92 @@ class Datasets:
         :param bool export_summary: Get Summary of the dataset export
         :param int lock_timeout_sec: Timeout for locking the dataset during export in seconds
         :param entities.ExportType export_type: Type of export ('json' or 'zip')
+        :param entities.OutputExportType output_export_type: Output format ('json', 'zip', or 'folders'). If None, defaults to 'json'
         :param int timeout: Maximum time in seconds to wait for the export to complete
-        :return: Exported item
-        :rtype: dtlpy.entities.item.Item
-        **Example**:
-        .. code-block:: python
-            export_item = project.datasets.export(dataset_id='dataset_id',
-                                                  filters=filters,
-                                                  include_feature_vectors=True,
-                                                  include_annotations=True,
-                                                  export_type=dl.ExportType.JSON,
-                                                  dataset_lock=True,
-                                                  lock_timeout_sec=300,
-                                                  export_summary=False)
+        :return: Path to exported file/directory, or None if export result is empty
+        :rtype: Optional[str]
         """
-        dataset_id = self._resolve_dataset_id(dataset, dataset_name, dataset_id)
-        payload = self._build_payload(filters, include_feature_vectors, include_annotations,
-                                       export_type, annotation_filters, feature_vector_filters,
-                                       dataset_lock, lock_timeout_sec, export_summary)
-        success, response = self._client_api.gen_request(req_type='post', path=f'/datasets/{dataset_id}/export',
-                                                         json_req=payload)
-        if not success:
-            raise exceptions.PlatformException(response)
-        command = entities.Command.from_json(_json=response.json(),
-                                             client_api=self._client_api)
-        time.sleep(2)  # as the command have wrong progress in the beginning
-        command = command.wait(timeout=timeout)
-        if 'outputItemId' not in command.spec:
-            raise exceptions.PlatformException(
-                error='400',
-                message="outputItemId key is missing in command response: {}".format(response))
-        item_id = command.spec['outputItemId']
-        self._download_exported_item(item_id=item_id, export_type=export_type, local_path=local_path)
-        return local_path
+        export_result = list(
+            self._export_recursive(
+                dataset=dataset,
+                dataset_name=dataset_name,
+                dataset_id=dataset_id,
+                local_path=local_path,
+                filters=filters,
+                annotation_filters=annotation_filters,
+                feature_vector_filters=feature_vector_filters,
+                include_feature_vectors=include_feature_vectors,
+                include_annotations=include_annotations,
+                timeout=timeout,
+                dataset_lock=dataset_lock,
+                lock_timeout_sec=lock_timeout_sec,
+                export_summary=export_summary,
+                export_type=export_type,
+                output_export_type=output_export_type,
+            )
+        )
+        if all(x is None for x in export_result):
+            logger.error("export result is empty")
+            return None
+        if export_type == ExportType.ZIP:
+            # if export type is zip, then return the _export_recursive result as it
+            return export_result[0]
+        # if user didn't provide output_export_type, keep the previous behavior
+        if output_export_type is None:
+            output_export_type = OutputExportType.JSON
+        # export type is jsos :
+        # Load all items from subset JSON files and clean them up
+        all_items = []
+        logger.debug("start loading all items from subset JSON files")
+        for json_file in export_result:
+            if json_file is None:
+                continue
+            if os.path.isfile(json_file):
+                with open(json_file, 'r') as f:
+                    items = json.load(f)
+                    if isinstance(items, list):
+                        all_items.extend(items)
+                os.remove(json_file)
+        base_dir = os.path.dirname(export_result[0])
+        if output_export_type != OutputExportType.FOLDERS:
+            dataset_id=self._resolve_dataset_id(dataset, dataset_name, dataset_id)
+            result_file_name = f"{dataset_id}.json"
+            result_file = os.path.join(base_dir, result_file_name)
+            logger.debug(f"start writing all items to result file {result_file}")
+            with open(result_file, 'w') as f:
+                json.dump(all_items, f)
+            if output_export_type == OutputExportType.ZIP:
+                # Zip the result file
+                zip_filename = result_file + '.zip'
+                # Create zip file
+                logger.debug(f"start zipping result file {zip_filename}")
+                with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zf:
+                    zf.write(result_file, arcname=os.path.basename(result_file))
+                # Remove original json after zipping
+                os.remove(result_file)
+                result_file = zip_filename
+            return result_file
+        logger.debug("start building per-item JSON files under local_path mirroring remote structure")
+        # Build per-item JSON files under local_path mirroring remote structure
+        for item in all_items:
+            rel_json_path = os.path.splitext(item.get('filename'))[0] + '.json'
+            # Remove leading slash to make it a relative path
+            if rel_json_path.startswith('/'):
+                rel_json_path = rel_json_path[1:]
+            out_path = os.path.join(base_dir, rel_json_path)
+            os.makedirs(os.path.dirname(out_path), exist_ok=True)
+            try:
+                with open(out_path, 'w') as outf:
+                    json.dump(item, outf)
+            except Exception:
+                logger.exception(f'Failed writing export item JSON to {out_path}')
+        logger.debug("end building per-item JSON files under local_path mirroring remote structure")
+        return base_dir
     @_api_reference.add(path='/datasets/merge', method='post')
     def merge(self,
@@ -1185,7 +1400,6 @@ class Datasets:
         import warnings
         warnings.warn("`readonly` flag on dataset is deprecated, doing nothing.", DeprecationWarning)
     @_api_reference.add(path='/datasets/{id}/split', method='post')
     def split_ml_subsets(self,
                         dataset_id: str,
@@ -1201,10 +1415,10 @@ class Datasets:
         :rtype: bool
         :raises: PlatformException on failure and ValueError if percentages do not sum to 100 or invalid keys/values.
         """
-         # Validate percentages
+        # Validate percentages
         if not ml_split_list:
             ml_split_list = {'train': 80, 'validation': 10, 'test': 10}
         if not items_query:
             items_query = entities.Filters()
@@ -1238,7 +1452,6 @@ class Datasets:
         else:
             raise exceptions.PlatformException(response)
     @_api_reference.add(path='/datasets/{id}/items/bulk-update-metadata', method='post')
     def bulk_update_ml_subset(self, dataset_id: str, items_query: dict, subset: str = None, deleteTag: bool = False) -> bool:
         """

dtlpy/repositories/downloader.py CHANGED Viewed

@@ -49,7 +49,8 @@ class Downloader:
                  export_version=entities.ExportVersion.V1,
                  dataset_lock=False,
                  lock_timeout_sec=None,
-                 export_summary=False
+                 export_summary=False,
+                 raise_on_error=False
                  ):
         """
         Download dataset by filters.
@@ -78,6 +79,7 @@ class Downloader:
         :param bool dataset_lock: optional - default = False
         :param bool export_summary: optional - default = False
         :param int lock_timeout_sec: optional
+        :param bool raise_on_error: raise an exception if an error occurs
         :return: Output (list)
         """
@@ -313,8 +315,24 @@ class Downloader:
         # log error
         if n_error > 0:
             log_filepath = reporter.generate_log_files()
+            # Get up to 5 error examples for the exception message
+            error_text = ""
+            error_counter = 0
+            if reporter._errors:
+                for _id, error in reporter._errors.items():
+                    error_counter += 1
+                    error_text += f"Item ID: {_id}, Error: {error} | "
+                    if error_counter >= 5:
+                        break
+            error_message = f"Errors in {n_error} files. Errors: {error_text}"
             if log_filepath is not None:
-                logger.warning("Errors in {} files. See {} for full log".format(n_error, log_filepath))
+                error_message += f", see {log_filepath} for full log"
+            if raise_on_error is True:
+                raise PlatformException(
+                    error="400", message=error_message
+                )
+            else:
+                logger.warning(error_message)
         if int(n_download) <= 1 and int(n_exist) <= 1:
             try:
                 return next(reporter.output)
@@ -428,7 +446,7 @@ class Downloader:
             if export_summary:
                 payload['summary'] = export_summary
             if lock_timeout_sec:
                 payload['lockTimeoutSec'] = lock_timeout_sec
@@ -753,6 +771,7 @@ class Downloader:
                             if response_output != local_filepath:
                                 source_path = os.path.normpath(response_output)
                                 shutil.copyfile(source_path, local_filepath)
+                        download_done = True
                     else:
                         try:
                             temp_file_path = local_filepath + '.download'
@@ -806,6 +825,7 @@ class Downloader:
                             source_file = response_output
                             with open(source_file, 'wb') as f:
                                 data = f.read()
+                        download_done = True
                     else:
                         try:
                             for chunk in response.iter_content(chunk_size=chunk_size):

dtlpy 1.114.17__py3-none-any.whl → 1.115.44__py3-none-any.whl

dtlpy 1.114.17py3-none-any.whl → 1.115.44py3-none-any.whl