dtlpy 1.113.10__py3-none-any.whl → 1.114.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtlpy/__init__.py +488 -488
- dtlpy/__version__.py +1 -1
- dtlpy/assets/__init__.py +26 -26
- dtlpy/assets/__pycache__/__init__.cpython-38.pyc +0 -0
- dtlpy/assets/code_server/config.yaml +2 -2
- dtlpy/assets/code_server/installation.sh +24 -24
- dtlpy/assets/code_server/launch.json +13 -13
- dtlpy/assets/code_server/settings.json +2 -2
- dtlpy/assets/main.py +53 -53
- dtlpy/assets/main_partial.py +18 -18
- dtlpy/assets/mock.json +11 -11
- dtlpy/assets/model_adapter.py +83 -83
- dtlpy/assets/package.json +61 -61
- dtlpy/assets/package_catalog.json +29 -29
- dtlpy/assets/package_gitignore +307 -307
- dtlpy/assets/service_runners/__init__.py +33 -33
- dtlpy/assets/service_runners/converter.py +96 -96
- dtlpy/assets/service_runners/multi_method.py +49 -49
- dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
- dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
- dtlpy/assets/service_runners/multi_method_item.py +52 -52
- dtlpy/assets/service_runners/multi_method_json.py +52 -52
- dtlpy/assets/service_runners/single_method.py +37 -37
- dtlpy/assets/service_runners/single_method_annotation.py +43 -43
- dtlpy/assets/service_runners/single_method_dataset.py +43 -43
- dtlpy/assets/service_runners/single_method_item.py +41 -41
- dtlpy/assets/service_runners/single_method_json.py +42 -42
- dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
- dtlpy/assets/voc_annotation_template.xml +23 -23
- dtlpy/caches/base_cache.py +32 -32
- dtlpy/caches/cache.py +473 -473
- dtlpy/caches/dl_cache.py +201 -201
- dtlpy/caches/filesystem_cache.py +89 -89
- dtlpy/caches/redis_cache.py +84 -84
- dtlpy/dlp/__init__.py +20 -20
- dtlpy/dlp/cli_utilities.py +367 -367
- dtlpy/dlp/command_executor.py +764 -764
- dtlpy/dlp/dlp +1 -1
- dtlpy/dlp/dlp.bat +1 -1
- dtlpy/dlp/dlp.py +128 -128
- dtlpy/dlp/parser.py +651 -651
- dtlpy/entities/__init__.py +83 -83
- dtlpy/entities/analytic.py +311 -311
- dtlpy/entities/annotation.py +1879 -1879
- dtlpy/entities/annotation_collection.py +699 -699
- dtlpy/entities/annotation_definitions/__init__.py +20 -20
- dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
- dtlpy/entities/annotation_definitions/box.py +195 -195
- dtlpy/entities/annotation_definitions/classification.py +67 -67
- dtlpy/entities/annotation_definitions/comparison.py +72 -72
- dtlpy/entities/annotation_definitions/cube.py +204 -204
- dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
- dtlpy/entities/annotation_definitions/description.py +32 -32
- dtlpy/entities/annotation_definitions/ellipse.py +124 -124
- dtlpy/entities/annotation_definitions/free_text.py +62 -62
- dtlpy/entities/annotation_definitions/gis.py +69 -69
- dtlpy/entities/annotation_definitions/note.py +139 -139
- dtlpy/entities/annotation_definitions/point.py +117 -117
- dtlpy/entities/annotation_definitions/polygon.py +182 -182
- dtlpy/entities/annotation_definitions/polyline.py +111 -111
- dtlpy/entities/annotation_definitions/pose.py +92 -92
- dtlpy/entities/annotation_definitions/ref_image.py +86 -86
- dtlpy/entities/annotation_definitions/segmentation.py +240 -240
- dtlpy/entities/annotation_definitions/subtitle.py +34 -34
- dtlpy/entities/annotation_definitions/text.py +85 -85
- dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
- dtlpy/entities/app.py +220 -220
- dtlpy/entities/app_module.py +107 -107
- dtlpy/entities/artifact.py +174 -174
- dtlpy/entities/assignment.py +399 -399
- dtlpy/entities/base_entity.py +214 -214
- dtlpy/entities/bot.py +113 -113
- dtlpy/entities/codebase.py +296 -296
- dtlpy/entities/collection.py +38 -38
- dtlpy/entities/command.py +169 -169
- dtlpy/entities/compute.py +442 -442
- dtlpy/entities/dataset.py +1285 -1285
- dtlpy/entities/directory_tree.py +44 -44
- dtlpy/entities/dpk.py +470 -470
- dtlpy/entities/driver.py +222 -222
- dtlpy/entities/execution.py +397 -397
- dtlpy/entities/feature.py +124 -124
- dtlpy/entities/feature_set.py +145 -145
- dtlpy/entities/filters.py +641 -641
- dtlpy/entities/gis_item.py +107 -107
- dtlpy/entities/integration.py +184 -184
- dtlpy/entities/item.py +953 -953
- dtlpy/entities/label.py +123 -123
- dtlpy/entities/links.py +85 -85
- dtlpy/entities/message.py +175 -175
- dtlpy/entities/model.py +694 -691
- dtlpy/entities/node.py +1005 -1005
- dtlpy/entities/ontology.py +803 -803
- dtlpy/entities/organization.py +287 -287
- dtlpy/entities/package.py +657 -657
- dtlpy/entities/package_defaults.py +5 -5
- dtlpy/entities/package_function.py +185 -185
- dtlpy/entities/package_module.py +113 -113
- dtlpy/entities/package_slot.py +118 -118
- dtlpy/entities/paged_entities.py +290 -267
- dtlpy/entities/pipeline.py +593 -593
- dtlpy/entities/pipeline_execution.py +279 -279
- dtlpy/entities/project.py +394 -394
- dtlpy/entities/prompt_item.py +499 -499
- dtlpy/entities/recipe.py +301 -301
- dtlpy/entities/reflect_dict.py +102 -102
- dtlpy/entities/resource_execution.py +138 -138
- dtlpy/entities/service.py +958 -958
- dtlpy/entities/service_driver.py +117 -117
- dtlpy/entities/setting.py +294 -294
- dtlpy/entities/task.py +491 -491
- dtlpy/entities/time_series.py +143 -143
- dtlpy/entities/trigger.py +426 -426
- dtlpy/entities/user.py +118 -118
- dtlpy/entities/webhook.py +124 -124
- dtlpy/examples/__init__.py +19 -19
- dtlpy/examples/add_labels.py +135 -135
- dtlpy/examples/add_metadata_to_item.py +21 -21
- dtlpy/examples/annotate_items_using_model.py +65 -65
- dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
- dtlpy/examples/annotations_convert_to_voc.py +9 -9
- dtlpy/examples/annotations_convert_to_yolo.py +9 -9
- dtlpy/examples/convert_annotation_types.py +51 -51
- dtlpy/examples/converter.py +143 -143
- dtlpy/examples/copy_annotations.py +22 -22
- dtlpy/examples/copy_folder.py +31 -31
- dtlpy/examples/create_annotations.py +51 -51
- dtlpy/examples/create_video_annotations.py +83 -83
- dtlpy/examples/delete_annotations.py +26 -26
- dtlpy/examples/filters.py +113 -113
- dtlpy/examples/move_item.py +23 -23
- dtlpy/examples/play_video_annotation.py +13 -13
- dtlpy/examples/show_item_and_mask.py +53 -53
- dtlpy/examples/triggers.py +49 -49
- dtlpy/examples/upload_batch_of_items.py +20 -20
- dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
- dtlpy/examples/upload_items_with_modalities.py +43 -43
- dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
- dtlpy/examples/upload_yolo_format_annotations.py +70 -70
- dtlpy/exceptions.py +125 -125
- dtlpy/miscellaneous/__init__.py +20 -20
- dtlpy/miscellaneous/dict_differ.py +95 -95
- dtlpy/miscellaneous/git_utils.py +217 -217
- dtlpy/miscellaneous/json_utils.py +14 -14
- dtlpy/miscellaneous/list_print.py +105 -105
- dtlpy/miscellaneous/zipping.py +130 -130
- dtlpy/ml/__init__.py +20 -20
- dtlpy/ml/base_feature_extractor_adapter.py +27 -27
- dtlpy/ml/base_model_adapter.py +945 -940
- dtlpy/ml/metrics.py +461 -461
- dtlpy/ml/predictions_utils.py +274 -274
- dtlpy/ml/summary_writer.py +57 -57
- dtlpy/ml/train_utils.py +60 -60
- dtlpy/new_instance.py +252 -252
- dtlpy/repositories/__init__.py +56 -56
- dtlpy/repositories/analytics.py +85 -85
- dtlpy/repositories/annotations.py +916 -916
- dtlpy/repositories/apps.py +383 -383
- dtlpy/repositories/artifacts.py +452 -452
- dtlpy/repositories/assignments.py +599 -599
- dtlpy/repositories/bots.py +213 -213
- dtlpy/repositories/codebases.py +559 -559
- dtlpy/repositories/collections.py +332 -348
- dtlpy/repositories/commands.py +158 -158
- dtlpy/repositories/compositions.py +61 -61
- dtlpy/repositories/computes.py +434 -406
- dtlpy/repositories/datasets.py +1291 -1291
- dtlpy/repositories/downloader.py +895 -895
- dtlpy/repositories/dpks.py +433 -433
- dtlpy/repositories/drivers.py +266 -266
- dtlpy/repositories/executions.py +817 -817
- dtlpy/repositories/feature_sets.py +226 -226
- dtlpy/repositories/features.py +238 -238
- dtlpy/repositories/integrations.py +484 -484
- dtlpy/repositories/items.py +909 -915
- dtlpy/repositories/messages.py +94 -94
- dtlpy/repositories/models.py +877 -867
- dtlpy/repositories/nodes.py +80 -80
- dtlpy/repositories/ontologies.py +511 -511
- dtlpy/repositories/organizations.py +525 -525
- dtlpy/repositories/packages.py +1941 -1941
- dtlpy/repositories/pipeline_executions.py +448 -448
- dtlpy/repositories/pipelines.py +642 -642
- dtlpy/repositories/projects.py +539 -539
- dtlpy/repositories/recipes.py +399 -399
- dtlpy/repositories/resource_executions.py +137 -137
- dtlpy/repositories/schema.py +120 -120
- dtlpy/repositories/service_drivers.py +213 -213
- dtlpy/repositories/services.py +1704 -1704
- dtlpy/repositories/settings.py +339 -339
- dtlpy/repositories/tasks.py +1124 -1124
- dtlpy/repositories/times_series.py +278 -278
- dtlpy/repositories/triggers.py +536 -536
- dtlpy/repositories/upload_element.py +257 -257
- dtlpy/repositories/uploader.py +651 -651
- dtlpy/repositories/webhooks.py +249 -249
- dtlpy/services/__init__.py +22 -22
- dtlpy/services/aihttp_retry.py +131 -131
- dtlpy/services/api_client.py +1782 -1782
- dtlpy/services/api_reference.py +40 -40
- dtlpy/services/async_utils.py +133 -133
- dtlpy/services/calls_counter.py +44 -44
- dtlpy/services/check_sdk.py +68 -68
- dtlpy/services/cookie.py +115 -115
- dtlpy/services/create_logger.py +156 -156
- dtlpy/services/events.py +84 -84
- dtlpy/services/logins.py +235 -235
- dtlpy/services/reporter.py +256 -256
- dtlpy/services/service_defaults.py +91 -91
- dtlpy/utilities/__init__.py +20 -20
- dtlpy/utilities/annotations/__init__.py +16 -16
- dtlpy/utilities/annotations/annotation_converters.py +269 -269
- dtlpy/utilities/base_package_runner.py +264 -264
- dtlpy/utilities/converter.py +1650 -1650
- dtlpy/utilities/dataset_generators/__init__.py +1 -1
- dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
- dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
- dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
- dtlpy/utilities/local_development/__init__.py +1 -1
- dtlpy/utilities/local_development/local_session.py +179 -179
- dtlpy/utilities/reports/__init__.py +2 -2
- dtlpy/utilities/reports/figures.py +343 -343
- dtlpy/utilities/reports/report.py +71 -71
- dtlpy/utilities/videos/__init__.py +17 -17
- dtlpy/utilities/videos/video_player.py +598 -598
- dtlpy/utilities/videos/videos.py +470 -470
- {dtlpy-1.113.10.data → dtlpy-1.114.13.data}/scripts/dlp +1 -1
- dtlpy-1.114.13.data/scripts/dlp.bat +2 -0
- {dtlpy-1.113.10.data → dtlpy-1.114.13.data}/scripts/dlp.py +128 -128
- {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/LICENSE +200 -200
- {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/METADATA +172 -172
- dtlpy-1.114.13.dist-info/RECORD +240 -0
- {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/WHEEL +1 -1
- tests/features/environment.py +551 -550
- dtlpy-1.113.10.data/scripts/dlp.bat +0 -2
- dtlpy-1.113.10.dist-info/RECORD +0 -244
- tests/assets/__init__.py +0 -0
- tests/assets/models_flow/__init__.py +0 -0
- tests/assets/models_flow/failedmain.py +0 -52
- tests/assets/models_flow/main.py +0 -62
- tests/assets/models_flow/main_model.py +0 -54
- {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/entry_points.txt +0 -0
- {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/top_level.txt +0 -0
dtlpy/repositories/downloader.py
CHANGED
|
@@ -1,895 +1,895 @@
|
|
|
1
|
-
from requests.adapters import HTTPAdapter
|
|
2
|
-
from urllib3.util import Retry
|
|
3
|
-
from PIL import Image
|
|
4
|
-
import numpy as np
|
|
5
|
-
import traceback
|
|
6
|
-
import warnings
|
|
7
|
-
import requests
|
|
8
|
-
import logging
|
|
9
|
-
import shutil
|
|
10
|
-
import json
|
|
11
|
-
import tqdm
|
|
12
|
-
import sys
|
|
13
|
-
import os
|
|
14
|
-
import io
|
|
15
|
-
|
|
16
|
-
from .. import entities, repositories, miscellaneous, PlatformException, exceptions
|
|
17
|
-
from ..services import Reporter
|
|
18
|
-
|
|
19
|
-
logger = logging.getLogger(name='dtlpy')
|
|
20
|
-
|
|
21
|
-
NUM_TRIES = 3 # try to download 3 time before fail on item
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class Downloader:
|
|
25
|
-
def __init__(self, items_repository):
|
|
26
|
-
self.items_repository = items_repository
|
|
27
|
-
|
|
28
|
-
def download(self,
|
|
29
|
-
# filter options
|
|
30
|
-
filters: entities.Filters = None,
|
|
31
|
-
items=None,
|
|
32
|
-
# download options
|
|
33
|
-
local_path=None,
|
|
34
|
-
file_types=None,
|
|
35
|
-
save_locally=True,
|
|
36
|
-
to_array=False,
|
|
37
|
-
overwrite=False,
|
|
38
|
-
annotation_filters: entities.Filters = None,
|
|
39
|
-
annotation_options: entities.ViewAnnotationOptions = None,
|
|
40
|
-
to_items_folder=True,
|
|
41
|
-
thickness=1,
|
|
42
|
-
with_text=False,
|
|
43
|
-
without_relative_path=None,
|
|
44
|
-
avoid_unnecessary_annotation_download=False,
|
|
45
|
-
include_annotations_in_output=True,
|
|
46
|
-
export_png_files=False,
|
|
47
|
-
filter_output_annotations=False,
|
|
48
|
-
alpha=1,
|
|
49
|
-
export_version=entities.ExportVersion.V1,
|
|
50
|
-
dataset_lock=False,
|
|
51
|
-
lock_timeout_sec=None,
|
|
52
|
-
export_summary=False
|
|
53
|
-
):
|
|
54
|
-
"""
|
|
55
|
-
Download dataset by filters.
|
|
56
|
-
Filtering the dataset for items and save them local
|
|
57
|
-
Optional - also download annotation, mask, instance and image mask of the item
|
|
58
|
-
|
|
59
|
-
:param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
|
|
60
|
-
:param items: download Item entity or item_id (or a list of item)
|
|
61
|
-
:param local_path: local folder or filename to save to.
|
|
62
|
-
:param file_types: a list of file type to download. e.g ['video/webm', 'video/mp4', 'image/jpeg', 'image/png']
|
|
63
|
-
:param save_locally: bool. save to disk or return a buffer
|
|
64
|
-
:param to_array: returns Ndarray when True and local_path = False
|
|
65
|
-
:param overwrite: optional - default = False
|
|
66
|
-
:param annotation_options: download annotations options. options: list(dl.ViewAnnotationOptions)
|
|
67
|
-
:param annotation_filters: Filters entity to filter annotations for download
|
|
68
|
-
:param to_items_folder: Create 'items' folder and download items to it
|
|
69
|
-
:param with_text: optional - add text to annotations, default = False
|
|
70
|
-
:param thickness: optional - line thickness, if -1 annotation will be filled, default =1
|
|
71
|
-
:param without_relative_path: bool - download items without the relative path from platform
|
|
72
|
-
:param avoid_unnecessary_annotation_download: DEPRECATED only items and annotations in filters are downloaded
|
|
73
|
-
:param include_annotations_in_output: default - False , if export should contain annotations
|
|
74
|
-
:param export_png_files: default - True, if semantic annotations should be exported as png files
|
|
75
|
-
:param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
|
|
76
|
-
:param alpha: opacity value [0 1], default 1
|
|
77
|
-
:param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
78
|
-
:param bool dataset_lock: optional - default = False
|
|
79
|
-
:param bool export_summary: optional - default = False
|
|
80
|
-
:param int lock_timeout_sec: optional
|
|
81
|
-
:return: Output (list)
|
|
82
|
-
"""
|
|
83
|
-
|
|
84
|
-
###################
|
|
85
|
-
# Default options #
|
|
86
|
-
###################
|
|
87
|
-
# annotation options
|
|
88
|
-
if annotation_options is None:
|
|
89
|
-
annotation_options = list()
|
|
90
|
-
elif not isinstance(annotation_options, list):
|
|
91
|
-
annotation_options = [annotation_options]
|
|
92
|
-
for ann_option in annotation_options:
|
|
93
|
-
if not isinstance(ann_option, entities.ViewAnnotationOptions):
|
|
94
|
-
if ann_option not in list(entities.ViewAnnotationOptions):
|
|
95
|
-
raise PlatformException(
|
|
96
|
-
error='400',
|
|
97
|
-
message='Unknown annotation download option: {}, please choose from: {}'.format(
|
|
98
|
-
ann_option, list(entities.ViewAnnotationOptions)))
|
|
99
|
-
# normalize items argument: treat empty list as “no items specified”
|
|
100
|
-
if isinstance(items, list) and len(items) == 0:
|
|
101
|
-
items = None
|
|
102
|
-
#####################
|
|
103
|
-
# items to download #
|
|
104
|
-
#####################
|
|
105
|
-
if items is not None:
|
|
106
|
-
# convert input to a list
|
|
107
|
-
if not isinstance(items, list):
|
|
108
|
-
items = [items]
|
|
109
|
-
# get items by id
|
|
110
|
-
if isinstance(items[0], str):
|
|
111
|
-
items = [self.items_repository.get(item_id=item_id) for item_id in items]
|
|
112
|
-
elif isinstance(items[0], entities.Item):
|
|
113
|
-
pass
|
|
114
|
-
else:
|
|
115
|
-
raise PlatformException(
|
|
116
|
-
error="400",
|
|
117
|
-
message='Unknown items type to download. Expecting str or Item entities. Got "{}" instead'.format(
|
|
118
|
-
type(items[0])
|
|
119
|
-
)
|
|
120
|
-
)
|
|
121
|
-
# create filters to download annotations
|
|
122
|
-
filters = entities.Filters(field='id',
|
|
123
|
-
values=[item.id for item in items],
|
|
124
|
-
operator=entities.FiltersOperations.IN)
|
|
125
|
-
filters._user_query = 'false'
|
|
126
|
-
|
|
127
|
-
# convert to list of list (like pages and page)
|
|
128
|
-
items_to_download = [items]
|
|
129
|
-
num_items = len(items)
|
|
130
|
-
else:
|
|
131
|
-
# filters
|
|
132
|
-
if filters is None:
|
|
133
|
-
filters = entities.Filters()
|
|
134
|
-
filters._user_query = 'false'
|
|
135
|
-
# file types
|
|
136
|
-
if file_types is not None:
|
|
137
|
-
filters.add(field='metadata.system.mimetype', values=file_types, operator=entities.FiltersOperations.IN)
|
|
138
|
-
if annotation_filters is not None:
|
|
139
|
-
for annotation_filter_and in annotation_filters.and_filter_list:
|
|
140
|
-
filters.add_join(field=annotation_filter_and.field,
|
|
141
|
-
values=annotation_filter_and.values,
|
|
142
|
-
operator=annotation_filter_and.operator,
|
|
143
|
-
method=entities.FiltersMethod.AND)
|
|
144
|
-
for annotation_filter_or in annotation_filters.or_filter_list:
|
|
145
|
-
filters.add_join(field=annotation_filter_or.field,
|
|
146
|
-
values=annotation_filter_or.values,
|
|
147
|
-
operator=annotation_filter_or.operator,
|
|
148
|
-
method=entities.FiltersMethod.OR)
|
|
149
|
-
else:
|
|
150
|
-
annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
|
|
151
|
-
filters._user_query = 'false'
|
|
152
|
-
|
|
153
|
-
items_to_download = self.items_repository.list(filters=filters)
|
|
154
|
-
num_items = items_to_download.items_count
|
|
155
|
-
|
|
156
|
-
if num_items == 0:
|
|
157
|
-
logger.warning('No items found! Nothing was downloaded')
|
|
158
|
-
return list()
|
|
159
|
-
|
|
160
|
-
##############
|
|
161
|
-
# local path #
|
|
162
|
-
##############
|
|
163
|
-
is_folder = False
|
|
164
|
-
if local_path is None:
|
|
165
|
-
# create default local path
|
|
166
|
-
local_path = self.__default_local_path()
|
|
167
|
-
|
|
168
|
-
if os.path.isdir(local_path):
|
|
169
|
-
logger.info('Local folder already exists:{}. merge/overwrite according to "overwrite option"'.format(
|
|
170
|
-
local_path))
|
|
171
|
-
is_folder = True
|
|
172
|
-
else:
|
|
173
|
-
# check if filename
|
|
174
|
-
_, ext = os.path.splitext(local_path)
|
|
175
|
-
if num_items > 1:
|
|
176
|
-
is_folder = True
|
|
177
|
-
else:
|
|
178
|
-
item_to_download = items_to_download[0][0]
|
|
179
|
-
file_name = item_to_download.name
|
|
180
|
-
_, ext_download = os.path.splitext(file_name)
|
|
181
|
-
if ext_download != ext:
|
|
182
|
-
is_folder = True
|
|
183
|
-
if is_folder and save_locally:
|
|
184
|
-
path_to_create = local_path
|
|
185
|
-
if local_path.endswith('*'):
|
|
186
|
-
path_to_create = os.path.dirname(local_path)
|
|
187
|
-
logger.info("Creating new directory for download: {}".format(path_to_create))
|
|
188
|
-
os.makedirs(path_to_create, exist_ok=True)
|
|
189
|
-
|
|
190
|
-
####################
|
|
191
|
-
# annotations json #
|
|
192
|
-
####################
|
|
193
|
-
# download annotations' json files in a new thread
|
|
194
|
-
# items will start downloading and if json not exists yet - will download for each file
|
|
195
|
-
if num_items > 1 and annotation_options:
|
|
196
|
-
# a new folder named 'json' will be created under the "local_path"
|
|
197
|
-
logger.info("Downloading annotations formats: {}".format(annotation_options))
|
|
198
|
-
self.download_annotations(**{
|
|
199
|
-
"dataset": self.items_repository.dataset,
|
|
200
|
-
"filters": filters,
|
|
201
|
-
"annotation_filters": annotation_filters,
|
|
202
|
-
"local_path": local_path,
|
|
203
|
-
'overwrite': overwrite,
|
|
204
|
-
'include_annotations_in_output': include_annotations_in_output,
|
|
205
|
-
'export_png_files': export_png_files,
|
|
206
|
-
'filter_output_annotations': filter_output_annotations,
|
|
207
|
-
'export_version': export_version,
|
|
208
|
-
'dataset_lock': dataset_lock,
|
|
209
|
-
'lock_timeout_sec': lock_timeout_sec,
|
|
210
|
-
'export_summary': export_summary
|
|
211
|
-
})
|
|
212
|
-
###############
|
|
213
|
-
# downloading #
|
|
214
|
-
###############
|
|
215
|
-
# create result lists
|
|
216
|
-
client_api = self.items_repository._client_api
|
|
217
|
-
|
|
218
|
-
reporter = Reporter(num_workers=num_items,
|
|
219
|
-
resource=Reporter.ITEMS_DOWNLOAD,
|
|
220
|
-
print_error_logs=client_api.verbose.print_error_logs,
|
|
221
|
-
client_api=client_api)
|
|
222
|
-
jobs = [None for _ in range(num_items)]
|
|
223
|
-
# pool
|
|
224
|
-
pool = client_api.thread_pools(pool_name='item.download')
|
|
225
|
-
# download
|
|
226
|
-
pbar = tqdm.tqdm(total=num_items, disable=client_api.verbose.disable_progress_bar_download_dataset, file=sys.stdout,
|
|
227
|
-
desc='Download Items')
|
|
228
|
-
try:
|
|
229
|
-
i_item = 0
|
|
230
|
-
for page in items_to_download:
|
|
231
|
-
for item in page:
|
|
232
|
-
if item.type == "dir":
|
|
233
|
-
continue
|
|
234
|
-
if save_locally:
|
|
235
|
-
# get local file path
|
|
236
|
-
item_local_path, item_local_filepath = self.__get_local_filepath(
|
|
237
|
-
local_path=local_path,
|
|
238
|
-
without_relative_path=without_relative_path,
|
|
239
|
-
item=item,
|
|
240
|
-
to_items_folder=to_items_folder,
|
|
241
|
-
is_folder=is_folder)
|
|
242
|
-
|
|
243
|
-
if os.path.isfile(item_local_filepath) and not overwrite:
|
|
244
|
-
logger.debug("File Exists: {}".format(item_local_filepath))
|
|
245
|
-
reporter.set_index(ref=item.id, status='exist', output=item_local_filepath, success=True)
|
|
246
|
-
pbar.update()
|
|
247
|
-
if annotation_options and item.annotated:
|
|
248
|
-
# download annotations only
|
|
249
|
-
jobs[i_item] = pool.submit(
|
|
250
|
-
self._download_img_annotations,
|
|
251
|
-
**{
|
|
252
|
-
"item": item,
|
|
253
|
-
"img_filepath": item_local_filepath,
|
|
254
|
-
"overwrite": overwrite,
|
|
255
|
-
"annotation_options": annotation_options,
|
|
256
|
-
"annotation_filters": annotation_filters,
|
|
257
|
-
"local_path": item_local_path,
|
|
258
|
-
"thickness": thickness,
|
|
259
|
-
"alpha": alpha,
|
|
260
|
-
"with_text": with_text,
|
|
261
|
-
"export_version": export_version,
|
|
262
|
-
},
|
|
263
|
-
)
|
|
264
|
-
i_item += 1
|
|
265
|
-
continue
|
|
266
|
-
else:
|
|
267
|
-
item_local_path = None
|
|
268
|
-
item_local_filepath = None
|
|
269
|
-
|
|
270
|
-
# download single item
|
|
271
|
-
jobs[i_item] = pool.submit(
|
|
272
|
-
self.__thread_download_wrapper,
|
|
273
|
-
**{
|
|
274
|
-
"i_item": i_item,
|
|
275
|
-
"item": item,
|
|
276
|
-
"item_local_path": item_local_path,
|
|
277
|
-
"item_local_filepath": item_local_filepath,
|
|
278
|
-
"save_locally": save_locally,
|
|
279
|
-
"to_array": to_array,
|
|
280
|
-
"annotation_options": annotation_options,
|
|
281
|
-
"annotation_filters": annotation_filters,
|
|
282
|
-
"reporter": reporter,
|
|
283
|
-
"pbar": pbar,
|
|
284
|
-
"overwrite": overwrite,
|
|
285
|
-
"thickness": thickness,
|
|
286
|
-
"alpha": alpha,
|
|
287
|
-
"with_text": with_text,
|
|
288
|
-
"export_version": export_version
|
|
289
|
-
},
|
|
290
|
-
)
|
|
291
|
-
i_item += 1
|
|
292
|
-
except Exception:
|
|
293
|
-
logger.exception('Error downloading:')
|
|
294
|
-
finally:
|
|
295
|
-
_ = [j.result() for j in jobs if j is not None]
|
|
296
|
-
pbar.close()
|
|
297
|
-
# reporting
|
|
298
|
-
n_download = reporter.status_count(status='download')
|
|
299
|
-
n_exist = reporter.status_count(status='exist')
|
|
300
|
-
n_error = reporter.status_count(status='error')
|
|
301
|
-
logger.info("Number of files downloaded:{}".format(n_download))
|
|
302
|
-
logger.info("Number of files exists: {}".format(n_exist))
|
|
303
|
-
logger.info("Total number of files: {}".format(n_download + n_exist))
|
|
304
|
-
|
|
305
|
-
# log error
|
|
306
|
-
if n_error > 0:
|
|
307
|
-
log_filepath = reporter.generate_log_files()
|
|
308
|
-
if log_filepath is not None:
|
|
309
|
-
logger.warning("Errors in {} files. See {} for full log".format(n_error, log_filepath))
|
|
310
|
-
if int(n_download) <= 1 and int(n_exist) <= 1:
|
|
311
|
-
try:
|
|
312
|
-
return next(reporter.output)
|
|
313
|
-
except StopIteration:
|
|
314
|
-
return None
|
|
315
|
-
return reporter.output
|
|
316
|
-
|
|
317
|
-
def __thread_download_wrapper(self, i_item,
|
|
318
|
-
# item params
|
|
319
|
-
item, item_local_path, item_local_filepath,
|
|
320
|
-
save_locally, to_array, overwrite,
|
|
321
|
-
# annotations params
|
|
322
|
-
annotation_options, annotation_filters, with_text, thickness,
|
|
323
|
-
# threading params
|
|
324
|
-
reporter, pbar, alpha, export_version):
|
|
325
|
-
|
|
326
|
-
download = None
|
|
327
|
-
err = None
|
|
328
|
-
trace = None
|
|
329
|
-
for i_try in range(NUM_TRIES):
|
|
330
|
-
try:
|
|
331
|
-
logger.debug("Download item: {path}. Try {i}/{n}. Starting..".format(path=item.filename,
|
|
332
|
-
i=i_try + 1,
|
|
333
|
-
n=NUM_TRIES))
|
|
334
|
-
download = self.__thread_download(item=item,
|
|
335
|
-
save_locally=save_locally,
|
|
336
|
-
to_array=to_array,
|
|
337
|
-
local_path=item_local_path,
|
|
338
|
-
local_filepath=item_local_filepath,
|
|
339
|
-
annotation_options=annotation_options,
|
|
340
|
-
annotation_filters=annotation_filters,
|
|
341
|
-
overwrite=overwrite,
|
|
342
|
-
thickness=thickness,
|
|
343
|
-
alpha=alpha,
|
|
344
|
-
with_text=with_text,
|
|
345
|
-
export_version=export_version)
|
|
346
|
-
logger.debug("Download item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=item.filename,
|
|
347
|
-
i=i_try + 1,
|
|
348
|
-
n=NUM_TRIES,
|
|
349
|
-
id=item.id))
|
|
350
|
-
if download is not None:
|
|
351
|
-
break
|
|
352
|
-
except Exception as e:
|
|
353
|
-
logger.debug("Download item: {path}. Try {i}/{n}. Fail.".format(path=item.filename,
|
|
354
|
-
i=i_try + 1,
|
|
355
|
-
n=NUM_TRIES))
|
|
356
|
-
err = e
|
|
357
|
-
trace = traceback.format_exc()
|
|
358
|
-
pbar.update()
|
|
359
|
-
if download is None:
|
|
360
|
-
if err is None:
|
|
361
|
-
err = self.items_repository._client_api.platform_exception
|
|
362
|
-
reporter.set_index(status="error", ref=item.id, success=False,
|
|
363
|
-
error="{}\n{}".format(err, trace))
|
|
364
|
-
else:
|
|
365
|
-
reporter.set_index(ref=item.id, status="download", output=download, success=True)
|
|
366
|
-
|
|
367
|
-
@staticmethod
|
|
368
|
-
def download_annotations(dataset: entities.Dataset,
|
|
369
|
-
local_path: str,
|
|
370
|
-
filters: entities.Filters = None,
|
|
371
|
-
annotation_filters: entities.Filters = None,
|
|
372
|
-
overwrite=False,
|
|
373
|
-
include_annotations_in_output=True,
|
|
374
|
-
export_png_files=False,
|
|
375
|
-
filter_output_annotations=False,
|
|
376
|
-
export_version=entities.ExportVersion.V1,
|
|
377
|
-
dataset_lock=False,
|
|
378
|
-
lock_timeout_sec=None,
|
|
379
|
-
export_summary=False
|
|
380
|
-
):
|
|
381
|
-
"""
|
|
382
|
-
Download annotations json for entire dataset
|
|
383
|
-
|
|
384
|
-
:param dataset: Dataset entity
|
|
385
|
-
:param local_path:
|
|
386
|
-
:param dtlpy.entities.filters.Filters filters: dl.Filters entity to filters items
|
|
387
|
-
:param annotation_filters: dl.Filters entity to filters items' annotations
|
|
388
|
-
:param overwrite: optional - overwrite annotations if exist, default = false
|
|
389
|
-
:param include_annotations_in_output: default - True , if export should contain annotations
|
|
390
|
-
:param export_png_files: default - if True, semantic annotations should be exported as png files
|
|
391
|
-
:param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
|
|
392
|
-
:param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
393
|
-
:param bool dataset_lock: optional - default = False
|
|
394
|
-
:param bool export_summary: optional - default = False
|
|
395
|
-
:param int lock_timeout_sec: optional
|
|
396
|
-
:return:
|
|
397
|
-
"""
|
|
398
|
-
local_path = os.path.join(local_path, "json")
|
|
399
|
-
zip_filepath = None
|
|
400
|
-
# only if json folder does not exist or exist and overwrite
|
|
401
|
-
if not os.path.isdir(os.path.join(local_path, 'json')) or overwrite:
|
|
402
|
-
# create local path to download and save to
|
|
403
|
-
if not os.path.isdir(local_path):
|
|
404
|
-
os.makedirs(local_path)
|
|
405
|
-
|
|
406
|
-
try:
|
|
407
|
-
payload = dict()
|
|
408
|
-
if filters is not None:
|
|
409
|
-
payload['itemsQuery'] = filters.prepare()
|
|
410
|
-
payload['annotations'] = {
|
|
411
|
-
"include": include_annotations_in_output,
|
|
412
|
-
"convertSemantic": export_png_files
|
|
413
|
-
}
|
|
414
|
-
payload['exportVersion'] = export_version
|
|
415
|
-
if annotation_filters is not None:
|
|
416
|
-
payload['annotationsQuery'] = annotation_filters.prepare()
|
|
417
|
-
payload['annotations']['filter'] = filter_output_annotations
|
|
418
|
-
if dataset_lock:
|
|
419
|
-
payload['datasetLock'] = dataset_lock
|
|
420
|
-
|
|
421
|
-
if export_summary:
|
|
422
|
-
payload['summary'] = export_summary
|
|
423
|
-
|
|
424
|
-
if lock_timeout_sec:
|
|
425
|
-
payload['lockTimeoutSec'] = lock_timeout_sec
|
|
426
|
-
|
|
427
|
-
success, response = dataset._client_api.gen_request(req_type='post',
|
|
428
|
-
path='/datasets/{}/export'.format(dataset.id),
|
|
429
|
-
json_req=payload,
|
|
430
|
-
headers={'user_query': filters._user_query})
|
|
431
|
-
if not success:
|
|
432
|
-
raise exceptions.PlatformException(response)
|
|
433
|
-
command = entities.Command.from_json(_json=response.json(),
|
|
434
|
-
client_api=dataset._client_api)
|
|
435
|
-
command = command.wait(timeout=0)
|
|
436
|
-
if 'outputItemId' not in command.spec:
|
|
437
|
-
raise exceptions.PlatformException(
|
|
438
|
-
error='400',
|
|
439
|
-
message="outputItemId key is missing in command response: {}".format(response))
|
|
440
|
-
item_id = command.spec['outputItemId']
|
|
441
|
-
annotation_zip_item = repositories.Items(client_api=dataset._client_api).get(item_id=item_id)
|
|
442
|
-
zip_filepath = annotation_zip_item.download(local_path=local_path, export_version=export_version)
|
|
443
|
-
# unzipping annotations to directory
|
|
444
|
-
if isinstance(zip_filepath, list) or not os.path.isfile(zip_filepath):
|
|
445
|
-
raise exceptions.PlatformException(
|
|
446
|
-
error='404',
|
|
447
|
-
message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
|
|
448
|
-
annotation_zip_item.id))
|
|
449
|
-
try:
|
|
450
|
-
miscellaneous.Zipping.unzip_directory(zip_filename=zip_filepath,
|
|
451
|
-
to_directory=local_path)
|
|
452
|
-
except Exception as e:
|
|
453
|
-
logger.warning("Failed to extract zip file error: {}".format(e))
|
|
454
|
-
|
|
455
|
-
finally:
|
|
456
|
-
# cleanup
|
|
457
|
-
if isinstance(zip_filepath, str) and os.path.isfile(zip_filepath):
|
|
458
|
-
os.remove(zip_filepath)
|
|
459
|
-
|
|
460
|
-
@staticmethod
|
|
461
|
-
def _download_img_annotations(item: entities.Item,
|
|
462
|
-
img_filepath,
|
|
463
|
-
local_path,
|
|
464
|
-
overwrite,
|
|
465
|
-
annotation_options,
|
|
466
|
-
annotation_filters,
|
|
467
|
-
thickness=1,
|
|
468
|
-
with_text=False,
|
|
469
|
-
alpha=1,
|
|
470
|
-
export_version=entities.ExportVersion.V1
|
|
471
|
-
):
|
|
472
|
-
|
|
473
|
-
# check if local_path is a file name
|
|
474
|
-
_, ext = os.path.splitext(local_path)
|
|
475
|
-
if ext:
|
|
476
|
-
# take the dir of the file for the annotations save
|
|
477
|
-
local_path = os.path.dirname(local_path)
|
|
478
|
-
|
|
479
|
-
# fix local path
|
|
480
|
-
if local_path.endswith("/items") or local_path.endswith("\\items"):
|
|
481
|
-
local_path = os.path.dirname(local_path)
|
|
482
|
-
|
|
483
|
-
annotation_rel_path = item.filename[1:]
|
|
484
|
-
if img_filepath is not None:
|
|
485
|
-
dir_name = os.path.dirname(annotation_rel_path)
|
|
486
|
-
base_name = os.path.basename(img_filepath)
|
|
487
|
-
annotation_rel_path = os.path.join(dir_name, base_name)
|
|
488
|
-
|
|
489
|
-
# find annotations json
|
|
490
|
-
annotations_json_filepath = os.path.join(local_path, "json", annotation_rel_path)
|
|
491
|
-
if export_version == entities.ExportVersion.V1:
|
|
492
|
-
name, _ = os.path.splitext(annotations_json_filepath)
|
|
493
|
-
else:
|
|
494
|
-
name = annotations_json_filepath
|
|
495
|
-
annotations_json_filepath = name + ".json"
|
|
496
|
-
|
|
497
|
-
if os.path.isfile(annotations_json_filepath) and annotation_filters is None:
|
|
498
|
-
# if exists take from json file
|
|
499
|
-
with open(annotations_json_filepath, "r", encoding="utf8") as f:
|
|
500
|
-
data = json.load(f)
|
|
501
|
-
if "annotations" in data:
|
|
502
|
-
data = data["annotations"]
|
|
503
|
-
annotations = entities.AnnotationCollection.from_json(_json=data, item=item)
|
|
504
|
-
# no need to use the filters here because the annotations were already downloaded with annotation_filters
|
|
505
|
-
else:
|
|
506
|
-
# if json file doesnt exist get the annotations from platform
|
|
507
|
-
annotations = item.annotations.list(filters=annotation_filters)
|
|
508
|
-
|
|
509
|
-
# get image shape
|
|
510
|
-
is_url_item = item.metadata. \
|
|
511
|
-
get('system', dict()). \
|
|
512
|
-
get('shebang', dict()). \
|
|
513
|
-
get('linkInfo', dict()). \
|
|
514
|
-
get('type', None) == 'url'
|
|
515
|
-
|
|
516
|
-
if item is not None:
|
|
517
|
-
orientation = item.system.get('exif', {}).get('Orientation', 0)
|
|
518
|
-
else:
|
|
519
|
-
orientation = 0
|
|
520
|
-
if item.width is not None and item.height is not None:
|
|
521
|
-
if orientation in [5, 6, 7, 8]:
|
|
522
|
-
img_shape = (item.width, item.height)
|
|
523
|
-
else:
|
|
524
|
-
img_shape = (item.height, item.width)
|
|
525
|
-
elif ('image' in item.mimetype and img_filepath is not None) or \
|
|
526
|
-
(is_url_item and img_filepath is not None):
|
|
527
|
-
img_shape = Image.open(img_filepath).size[::-1]
|
|
528
|
-
else:
|
|
529
|
-
img_shape = (0, 0)
|
|
530
|
-
|
|
531
|
-
# download all annotation options
|
|
532
|
-
for option in annotation_options:
|
|
533
|
-
# get path and create dirs
|
|
534
|
-
annotation_filepath = os.path.join(local_path, option, annotation_rel_path)
|
|
535
|
-
if not os.path.isdir(os.path.dirname(annotation_filepath)):
|
|
536
|
-
os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
|
|
537
|
-
|
|
538
|
-
if export_version == entities.ExportVersion.V1:
|
|
539
|
-
temp_path, ext = os.path.splitext(annotation_filepath)
|
|
540
|
-
else:
|
|
541
|
-
temp_path = annotation_filepath
|
|
542
|
-
|
|
543
|
-
if option == entities.ViewAnnotationOptions.JSON:
|
|
544
|
-
if not os.path.isfile(annotations_json_filepath):
|
|
545
|
-
annotations.download(
|
|
546
|
-
filepath=annotations_json_filepath,
|
|
547
|
-
annotation_format=option,
|
|
548
|
-
height=img_shape[0],
|
|
549
|
-
width=img_shape[1],
|
|
550
|
-
)
|
|
551
|
-
elif option in [entities.ViewAnnotationOptions.MASK,
|
|
552
|
-
entities.ViewAnnotationOptions.INSTANCE,
|
|
553
|
-
entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE,
|
|
554
|
-
entities.ViewAnnotationOptions.OBJECT_ID,
|
|
555
|
-
entities.ViewAnnotationOptions.VTT]:
|
|
556
|
-
if option == entities.ViewAnnotationOptions.VTT:
|
|
557
|
-
annotation_filepath = temp_path + ".vtt"
|
|
558
|
-
else:
|
|
559
|
-
if 'video' in item.mimetype:
|
|
560
|
-
annotation_filepath = temp_path + ".mp4"
|
|
561
|
-
else:
|
|
562
|
-
annotation_filepath = temp_path + ".png"
|
|
563
|
-
if not os.path.isfile(annotation_filepath) or overwrite:
|
|
564
|
-
# if not exists OR (exists AND overwrite)
|
|
565
|
-
if not os.path.exists(os.path.dirname(annotation_filepath)):
|
|
566
|
-
# create folder if not exists
|
|
567
|
-
os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
|
|
568
|
-
if option == entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE and img_filepath is None:
|
|
569
|
-
raise PlatformException(
|
|
570
|
-
error="1002",
|
|
571
|
-
message="Missing image for annotation option dl.ViewAnnotationOptions.ANNOTATION_ON_IMAGE")
|
|
572
|
-
annotations.download(
|
|
573
|
-
filepath=annotation_filepath,
|
|
574
|
-
img_filepath=img_filepath,
|
|
575
|
-
annotation_format=option,
|
|
576
|
-
height=img_shape[0],
|
|
577
|
-
width=img_shape[1],
|
|
578
|
-
thickness=thickness,
|
|
579
|
-
alpha=alpha,
|
|
580
|
-
with_text=with_text,
|
|
581
|
-
orientation=orientation
|
|
582
|
-
)
|
|
583
|
-
else:
|
|
584
|
-
raise PlatformException(error="400", message="Unknown annotation option: {}".format(option))
|
|
585
|
-
|
|
586
|
-
@staticmethod
|
|
587
|
-
def __get_local_filepath(local_path, item, to_items_folder, without_relative_path=None, is_folder=False):
|
|
588
|
-
# create paths
|
|
589
|
-
_, ext = os.path.splitext(local_path)
|
|
590
|
-
if ext and not is_folder:
|
|
591
|
-
# local_path is a filename
|
|
592
|
-
local_filepath = local_path
|
|
593
|
-
local_path = os.path.dirname(local_filepath)
|
|
594
|
-
else:
|
|
595
|
-
# if directory - get item's filename
|
|
596
|
-
if to_items_folder:
|
|
597
|
-
local_path = os.path.join(local_path, "items")
|
|
598
|
-
elif is_folder:
|
|
599
|
-
local_path = os.path.join(local_path, "")
|
|
600
|
-
if without_relative_path is not None:
|
|
601
|
-
local_filepath = os.path.join(local_path, item.name)
|
|
602
|
-
else:
|
|
603
|
-
local_filepath = os.path.join(local_path, item.filename[1:])
|
|
604
|
-
return local_path, local_filepath
|
|
605
|
-
|
|
606
|
-
@staticmethod
|
|
607
|
-
def __get_link_source(item):
|
|
608
|
-
assert isinstance(item, entities.Item)
|
|
609
|
-
if not item.is_fetched:
|
|
610
|
-
return item, '', False
|
|
611
|
-
|
|
612
|
-
if not item.filename.endswith('.json') or \
|
|
613
|
-
item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') != 'link':
|
|
614
|
-
return item, '', False
|
|
615
|
-
|
|
616
|
-
# recursively get next id link item
|
|
617
|
-
while item.filename.endswith('.json') and \
|
|
618
|
-
item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
|
|
619
|
-
item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'id':
|
|
620
|
-
item = item.dataset.items.get(item_id=item.metadata['system']['shebang']['linkInfo']['ref'])
|
|
621
|
-
|
|
622
|
-
# check if link
|
|
623
|
-
if item.filename.endswith('.json') and \
|
|
624
|
-
item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
|
|
625
|
-
item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'url':
|
|
626
|
-
url = item.metadata['system']['shebang']['linkInfo']['ref']
|
|
627
|
-
return item, url, True
|
|
628
|
-
else:
|
|
629
|
-
return item, '', False
|
|
630
|
-
|
|
631
|
-
def __file_validation(self, item, downloaded_file):
|
|
632
|
-
res = False
|
|
633
|
-
resume = True
|
|
634
|
-
if isinstance(downloaded_file, io.BytesIO):
|
|
635
|
-
file_size = downloaded_file.getbuffer().nbytes
|
|
636
|
-
else:
|
|
637
|
-
file_size = os.stat(downloaded_file).st_size
|
|
638
|
-
expected_size = item.metadata['system']['size']
|
|
639
|
-
size_diff = file_size - expected_size
|
|
640
|
-
if size_diff == 0:
|
|
641
|
-
res = True
|
|
642
|
-
if size_diff > 0:
|
|
643
|
-
resume = False
|
|
644
|
-
return res, file_size, resume
|
|
645
|
-
|
|
646
|
-
def __thread_download(self,
|
|
647
|
-
item,
|
|
648
|
-
save_locally,
|
|
649
|
-
local_path,
|
|
650
|
-
to_array,
|
|
651
|
-
local_filepath,
|
|
652
|
-
overwrite,
|
|
653
|
-
annotation_options,
|
|
654
|
-
annotation_filters,
|
|
655
|
-
chunk_size=8192,
|
|
656
|
-
thickness=1,
|
|
657
|
-
with_text=False,
|
|
658
|
-
alpha=1,
|
|
659
|
-
export_version=entities.ExportVersion.V1
|
|
660
|
-
):
|
|
661
|
-
"""
|
|
662
|
-
Get a single item's binary data
|
|
663
|
-
Calling this method will returns the item body itself , an image for example with the proper mimetype.
|
|
664
|
-
|
|
665
|
-
:param item: Item entity to download
|
|
666
|
-
:param save_locally: bool. save to file or return buffer
|
|
667
|
-
:param local_path: item local folder to save to.
|
|
668
|
-
:param to_array: returns Ndarray when True and local_path = False
|
|
669
|
-
:param local_filepath: item local filepath
|
|
670
|
-
:param overwrite: overwrite the file is existing
|
|
671
|
-
:param annotation_options: download annotations options: list(dl.ViewAnnotationOptions)
|
|
672
|
-
:param annotation_filters: Filters entity to filter item's annotation
|
|
673
|
-
:param chunk_size: size of chunks to download - optional. default = 8192
|
|
674
|
-
:param thickness: optional - line thickness, if -1 annotation will be filled, default =1
|
|
675
|
-
:param with_text: optional - add text to annotations, default = False
|
|
676
|
-
:param alpha: opacity value [0 1], default 1
|
|
677
|
-
:param ExportVersion export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
678
|
-
:return:
|
|
679
|
-
"""
|
|
680
|
-
# check if need to download image binary from platform
|
|
681
|
-
need_to_download = True
|
|
682
|
-
if save_locally and os.path.isfile(local_filepath):
|
|
683
|
-
need_to_download = overwrite
|
|
684
|
-
|
|
685
|
-
item, url, is_url = self.__get_link_source(item=item)
|
|
686
|
-
|
|
687
|
-
# save as byte stream
|
|
688
|
-
data = io.BytesIO()
|
|
689
|
-
if need_to_download:
|
|
690
|
-
chunk_resume = {0: 0}
|
|
691
|
-
start_point = 0
|
|
692
|
-
download_done = False
|
|
693
|
-
while chunk_resume.get(start_point, '') != 3 and not download_done:
|
|
694
|
-
if not is_url:
|
|
695
|
-
headers = {'x-dl-sanitize': '0', 'Range': 'bytes={}-'.format(start_point)}
|
|
696
|
-
result, response = self.items_repository._client_api.gen_request(req_type="get",
|
|
697
|
-
headers=headers,
|
|
698
|
-
path="/items/{}/stream".format(
|
|
699
|
-
item.id),
|
|
700
|
-
stream=True,
|
|
701
|
-
dataset_id=item.dataset_id)
|
|
702
|
-
if not result:
|
|
703
|
-
if os.path.isfile(local_filepath + '.download'):
|
|
704
|
-
os.remove(local_filepath + '.download')
|
|
705
|
-
raise PlatformException(response)
|
|
706
|
-
else:
|
|
707
|
-
_, ext = os.path.splitext(item.metadata['system']['shebang']['linkInfo']['ref'].split('?')[0])
|
|
708
|
-
if local_filepath:
|
|
709
|
-
local_filepath += ext
|
|
710
|
-
response = self.get_url_stream(url=url)
|
|
711
|
-
|
|
712
|
-
if save_locally:
|
|
713
|
-
# save to file
|
|
714
|
-
if not os.path.exists(os.path.dirname(local_filepath)):
|
|
715
|
-
# create folder if not exists
|
|
716
|
-
os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
|
|
717
|
-
|
|
718
|
-
# decide if create progress bar for item
|
|
719
|
-
total_length = response.headers.get("content-length")
|
|
720
|
-
one_file_pbar = None
|
|
721
|
-
try:
|
|
722
|
-
one_file_progress_bar = total_length is not None and int(
|
|
723
|
-
total_length) > 10e6 # size larger than 10 MB
|
|
724
|
-
if one_file_progress_bar:
|
|
725
|
-
one_file_pbar = tqdm.tqdm(total=int(total_length),
|
|
726
|
-
unit='B',
|
|
727
|
-
unit_scale=True,
|
|
728
|
-
unit_divisor=1024,
|
|
729
|
-
position=1,
|
|
730
|
-
file=sys.stdout,
|
|
731
|
-
disable=self.items_repository._client_api.verbose.disable_progress_bar_download_item,
|
|
732
|
-
desc='Download Item')
|
|
733
|
-
except Exception as err:
|
|
734
|
-
one_file_progress_bar = False
|
|
735
|
-
logger.debug('Cant decide downloaded file length, bar will not be presented: {}'.format(err))
|
|
736
|
-
|
|
737
|
-
# start download
|
|
738
|
-
if self.items_repository._client_api.sdk_cache.use_cache and \
|
|
739
|
-
self.items_repository._client_api.cache is not None:
|
|
740
|
-
response_output = os.path.normpath(response.content)
|
|
741
|
-
if isinstance(response_output, bytes):
|
|
742
|
-
response_output = response_output.decode('utf-8')[1:-1]
|
|
743
|
-
|
|
744
|
-
if os.path.isfile(os.path.normpath(response_output)):
|
|
745
|
-
if response_output != local_filepath:
|
|
746
|
-
source_path = os.path.normpath(response_output)
|
|
747
|
-
shutil.copyfile(source_path, local_filepath)
|
|
748
|
-
else:
|
|
749
|
-
try:
|
|
750
|
-
temp_file_path = local_filepath + '.download'
|
|
751
|
-
with open(temp_file_path, "ab") as f:
|
|
752
|
-
try:
|
|
753
|
-
for chunk in response.iter_content(chunk_size=chunk_size):
|
|
754
|
-
if chunk: # filter out keep-alive new chunks
|
|
755
|
-
f.write(chunk)
|
|
756
|
-
if one_file_progress_bar:
|
|
757
|
-
one_file_pbar.update(len(chunk))
|
|
758
|
-
except Exception as err:
|
|
759
|
-
pass
|
|
760
|
-
|
|
761
|
-
file_validation = True
|
|
762
|
-
if not is_url:
|
|
763
|
-
file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
|
|
764
|
-
download_progress=temp_file_path,
|
|
765
|
-
chunk_resume=chunk_resume)
|
|
766
|
-
if file_validation:
|
|
767
|
-
shutil.move(temp_file_path, local_filepath)
|
|
768
|
-
download_done = True
|
|
769
|
-
except Exception as err:
|
|
770
|
-
if os.path.isfile(temp_file_path):
|
|
771
|
-
os.remove(temp_file_path)
|
|
772
|
-
raise err
|
|
773
|
-
if one_file_progress_bar:
|
|
774
|
-
one_file_pbar.close()
|
|
775
|
-
# save to output variable
|
|
776
|
-
data = local_filepath
|
|
777
|
-
# if image - can download annotation mask
|
|
778
|
-
if item.annotated and annotation_options:
|
|
779
|
-
self._download_img_annotations(item=item,
|
|
780
|
-
img_filepath=local_filepath,
|
|
781
|
-
annotation_options=annotation_options,
|
|
782
|
-
annotation_filters=annotation_filters,
|
|
783
|
-
local_path=local_path,
|
|
784
|
-
overwrite=overwrite,
|
|
785
|
-
thickness=thickness,
|
|
786
|
-
alpha=alpha,
|
|
787
|
-
with_text=with_text,
|
|
788
|
-
export_version=export_version
|
|
789
|
-
)
|
|
790
|
-
else:
|
|
791
|
-
if self.items_repository._client_api.sdk_cache.use_cache and \
|
|
792
|
-
self.items_repository._client_api.cache is not None:
|
|
793
|
-
response_output = os.path.normpath(response.content)
|
|
794
|
-
if isinstance(response_output, bytes):
|
|
795
|
-
response_output = response_output.decode('utf-8')[1:-1]
|
|
796
|
-
|
|
797
|
-
if os.path.isfile(response_output):
|
|
798
|
-
source_file = response_output
|
|
799
|
-
with open(source_file, 'wb') as f:
|
|
800
|
-
data = f.read()
|
|
801
|
-
else:
|
|
802
|
-
try:
|
|
803
|
-
for chunk in response.iter_content(chunk_size=chunk_size):
|
|
804
|
-
if chunk: # filter out keep-alive new chunks
|
|
805
|
-
data.write(chunk)
|
|
806
|
-
|
|
807
|
-
file_validation = True
|
|
808
|
-
if not is_url:
|
|
809
|
-
file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
|
|
810
|
-
download_progress=data,
|
|
811
|
-
chunk_resume=chunk_resume)
|
|
812
|
-
if file_validation:
|
|
813
|
-
download_done = True
|
|
814
|
-
else:
|
|
815
|
-
continue
|
|
816
|
-
except Exception as err:
|
|
817
|
-
raise err
|
|
818
|
-
# go back to the beginning of the stream
|
|
819
|
-
data.seek(0)
|
|
820
|
-
data.name = item.name
|
|
821
|
-
if not save_locally and to_array:
|
|
822
|
-
if 'image' not in item.mimetype and not is_url:
|
|
823
|
-
raise PlatformException(
|
|
824
|
-
error="400",
|
|
825
|
-
message='Download element type numpy.ndarray support for image only. '
|
|
826
|
-
'Item Id: {} is {} type'.format(item.id, item.mimetype))
|
|
827
|
-
|
|
828
|
-
data = np.array(Image.open(data))
|
|
829
|
-
else:
|
|
830
|
-
data = local_filepath
|
|
831
|
-
return data
|
|
832
|
-
|
|
833
|
-
def __get_next_chunk(self, item, download_progress, chunk_resume):
|
|
834
|
-
size_validation, file_size, resume = self.__file_validation(item=item,
|
|
835
|
-
downloaded_file=download_progress)
|
|
836
|
-
start_point = file_size
|
|
837
|
-
if not size_validation:
|
|
838
|
-
if chunk_resume.get(start_point, None) is None:
|
|
839
|
-
chunk_resume = {start_point: 1}
|
|
840
|
-
else:
|
|
841
|
-
chunk_resume[start_point] += 1
|
|
842
|
-
if chunk_resume[start_point] == 3 or not resume:
|
|
843
|
-
raise PlatformException(
|
|
844
|
-
error=500,
|
|
845
|
-
message='The downloaded file is corrupted. Please try again. If the issue repeats please contact support.')
|
|
846
|
-
return size_validation, start_point, chunk_resume
|
|
847
|
-
|
|
848
|
-
def __default_local_path(self):
|
|
849
|
-
|
|
850
|
-
# create default local path
|
|
851
|
-
if self.items_repository._dataset is None:
|
|
852
|
-
local_path = os.path.join(
|
|
853
|
-
self.items_repository._client_api.sdk_cache.cache_path_bin,
|
|
854
|
-
"items",
|
|
855
|
-
)
|
|
856
|
-
else:
|
|
857
|
-
if self.items_repository.dataset._project is None:
|
|
858
|
-
# by dataset name
|
|
859
|
-
local_path = os.path.join(
|
|
860
|
-
self.items_repository._client_api.sdk_cache.cache_path_bin,
|
|
861
|
-
"datasets",
|
|
862
|
-
"{}_{}".format(self.items_repository.dataset.name, self.items_repository.dataset.id),
|
|
863
|
-
)
|
|
864
|
-
else:
|
|
865
|
-
# by dataset and project name
|
|
866
|
-
local_path = os.path.join(
|
|
867
|
-
self.items_repository._client_api.sdk_cache.cache_path_bin,
|
|
868
|
-
"projects",
|
|
869
|
-
self.items_repository.dataset.project.name,
|
|
870
|
-
"datasets",
|
|
871
|
-
self.items_repository.dataset.name,
|
|
872
|
-
)
|
|
873
|
-
logger.info("Downloading to: {}".format(local_path))
|
|
874
|
-
return local_path
|
|
875
|
-
|
|
876
|
-
@staticmethod
|
|
877
|
-
def get_url_stream(url):
|
|
878
|
-
"""
|
|
879
|
-
:param url:
|
|
880
|
-
"""
|
|
881
|
-
# This will download the binaries from the URL user provided
|
|
882
|
-
prepared_request = requests.Request(method='GET', url=url).prepare()
|
|
883
|
-
with requests.Session() as s:
|
|
884
|
-
retry = Retry(
|
|
885
|
-
total=3,
|
|
886
|
-
read=3,
|
|
887
|
-
connect=3,
|
|
888
|
-
backoff_factor=1,
|
|
889
|
-
)
|
|
890
|
-
adapter = HTTPAdapter(max_retries=retry)
|
|
891
|
-
s.mount('http://', adapter)
|
|
892
|
-
s.mount('https://', adapter)
|
|
893
|
-
response = s.send(request=prepared_request, stream=True)
|
|
894
|
-
|
|
895
|
-
return response
|
|
1
|
+
from requests.adapters import HTTPAdapter
|
|
2
|
+
from urllib3.util import Retry
|
|
3
|
+
from PIL import Image
|
|
4
|
+
import numpy as np
|
|
5
|
+
import traceback
|
|
6
|
+
import warnings
|
|
7
|
+
import requests
|
|
8
|
+
import logging
|
|
9
|
+
import shutil
|
|
10
|
+
import json
|
|
11
|
+
import tqdm
|
|
12
|
+
import sys
|
|
13
|
+
import os
|
|
14
|
+
import io
|
|
15
|
+
|
|
16
|
+
from .. import entities, repositories, miscellaneous, PlatformException, exceptions
|
|
17
|
+
from ..services import Reporter
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(name='dtlpy')
|
|
20
|
+
|
|
21
|
+
NUM_TRIES = 3 # try to download 3 time before fail on item
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Downloader:
|
|
25
|
+
def __init__(self, items_repository):
|
|
26
|
+
self.items_repository = items_repository
|
|
27
|
+
|
|
28
|
+
def download(self,
|
|
29
|
+
# filter options
|
|
30
|
+
filters: entities.Filters = None,
|
|
31
|
+
items=None,
|
|
32
|
+
# download options
|
|
33
|
+
local_path=None,
|
|
34
|
+
file_types=None,
|
|
35
|
+
save_locally=True,
|
|
36
|
+
to_array=False,
|
|
37
|
+
overwrite=False,
|
|
38
|
+
annotation_filters: entities.Filters = None,
|
|
39
|
+
annotation_options: entities.ViewAnnotationOptions = None,
|
|
40
|
+
to_items_folder=True,
|
|
41
|
+
thickness=1,
|
|
42
|
+
with_text=False,
|
|
43
|
+
without_relative_path=None,
|
|
44
|
+
avoid_unnecessary_annotation_download=False,
|
|
45
|
+
include_annotations_in_output=True,
|
|
46
|
+
export_png_files=False,
|
|
47
|
+
filter_output_annotations=False,
|
|
48
|
+
alpha=1,
|
|
49
|
+
export_version=entities.ExportVersion.V1,
|
|
50
|
+
dataset_lock=False,
|
|
51
|
+
lock_timeout_sec=None,
|
|
52
|
+
export_summary=False
|
|
53
|
+
):
|
|
54
|
+
"""
|
|
55
|
+
Download dataset by filters.
|
|
56
|
+
Filtering the dataset for items and save them local
|
|
57
|
+
Optional - also download annotation, mask, instance and image mask of the item
|
|
58
|
+
|
|
59
|
+
:param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
|
|
60
|
+
:param items: download Item entity or item_id (or a list of item)
|
|
61
|
+
:param local_path: local folder or filename to save to.
|
|
62
|
+
:param file_types: a list of file type to download. e.g ['video/webm', 'video/mp4', 'image/jpeg', 'image/png']
|
|
63
|
+
:param save_locally: bool. save to disk or return a buffer
|
|
64
|
+
:param to_array: returns Ndarray when True and local_path = False
|
|
65
|
+
:param overwrite: optional - default = False
|
|
66
|
+
:param annotation_options: download annotations options. options: list(dl.ViewAnnotationOptions)
|
|
67
|
+
:param annotation_filters: Filters entity to filter annotations for download
|
|
68
|
+
:param to_items_folder: Create 'items' folder and download items to it
|
|
69
|
+
:param with_text: optional - add text to annotations, default = False
|
|
70
|
+
:param thickness: optional - line thickness, if -1 annotation will be filled, default =1
|
|
71
|
+
:param without_relative_path: bool - download items without the relative path from platform
|
|
72
|
+
:param avoid_unnecessary_annotation_download: DEPRECATED only items and annotations in filters are downloaded
|
|
73
|
+
:param include_annotations_in_output: default - False , if export should contain annotations
|
|
74
|
+
:param export_png_files: default - True, if semantic annotations should be exported as png files
|
|
75
|
+
:param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
|
|
76
|
+
:param alpha: opacity value [0 1], default 1
|
|
77
|
+
:param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
78
|
+
:param bool dataset_lock: optional - default = False
|
|
79
|
+
:param bool export_summary: optional - default = False
|
|
80
|
+
:param int lock_timeout_sec: optional
|
|
81
|
+
:return: Output (list)
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
###################
|
|
85
|
+
# Default options #
|
|
86
|
+
###################
|
|
87
|
+
# annotation options
|
|
88
|
+
if annotation_options is None:
|
|
89
|
+
annotation_options = list()
|
|
90
|
+
elif not isinstance(annotation_options, list):
|
|
91
|
+
annotation_options = [annotation_options]
|
|
92
|
+
for ann_option in annotation_options:
|
|
93
|
+
if not isinstance(ann_option, entities.ViewAnnotationOptions):
|
|
94
|
+
if ann_option not in list(entities.ViewAnnotationOptions):
|
|
95
|
+
raise PlatformException(
|
|
96
|
+
error='400',
|
|
97
|
+
message='Unknown annotation download option: {}, please choose from: {}'.format(
|
|
98
|
+
ann_option, list(entities.ViewAnnotationOptions)))
|
|
99
|
+
# normalize items argument: treat empty list as “no items specified”
|
|
100
|
+
if isinstance(items, list) and len(items) == 0:
|
|
101
|
+
items = None
|
|
102
|
+
#####################
|
|
103
|
+
# items to download #
|
|
104
|
+
#####################
|
|
105
|
+
if items is not None:
|
|
106
|
+
# convert input to a list
|
|
107
|
+
if not isinstance(items, list):
|
|
108
|
+
items = [items]
|
|
109
|
+
# get items by id
|
|
110
|
+
if isinstance(items[0], str):
|
|
111
|
+
items = [self.items_repository.get(item_id=item_id) for item_id in items]
|
|
112
|
+
elif isinstance(items[0], entities.Item):
|
|
113
|
+
pass
|
|
114
|
+
else:
|
|
115
|
+
raise PlatformException(
|
|
116
|
+
error="400",
|
|
117
|
+
message='Unknown items type to download. Expecting str or Item entities. Got "{}" instead'.format(
|
|
118
|
+
type(items[0])
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
# create filters to download annotations
|
|
122
|
+
filters = entities.Filters(field='id',
|
|
123
|
+
values=[item.id for item in items],
|
|
124
|
+
operator=entities.FiltersOperations.IN)
|
|
125
|
+
filters._user_query = 'false'
|
|
126
|
+
|
|
127
|
+
# convert to list of list (like pages and page)
|
|
128
|
+
items_to_download = [items]
|
|
129
|
+
num_items = len(items)
|
|
130
|
+
else:
|
|
131
|
+
# filters
|
|
132
|
+
if filters is None:
|
|
133
|
+
filters = entities.Filters()
|
|
134
|
+
filters._user_query = 'false'
|
|
135
|
+
# file types
|
|
136
|
+
if file_types is not None:
|
|
137
|
+
filters.add(field='metadata.system.mimetype', values=file_types, operator=entities.FiltersOperations.IN)
|
|
138
|
+
if annotation_filters is not None:
|
|
139
|
+
for annotation_filter_and in annotation_filters.and_filter_list:
|
|
140
|
+
filters.add_join(field=annotation_filter_and.field,
|
|
141
|
+
values=annotation_filter_and.values,
|
|
142
|
+
operator=annotation_filter_and.operator,
|
|
143
|
+
method=entities.FiltersMethod.AND)
|
|
144
|
+
for annotation_filter_or in annotation_filters.or_filter_list:
|
|
145
|
+
filters.add_join(field=annotation_filter_or.field,
|
|
146
|
+
values=annotation_filter_or.values,
|
|
147
|
+
operator=annotation_filter_or.operator,
|
|
148
|
+
method=entities.FiltersMethod.OR)
|
|
149
|
+
else:
|
|
150
|
+
annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
|
|
151
|
+
filters._user_query = 'false'
|
|
152
|
+
|
|
153
|
+
items_to_download = self.items_repository.list(filters=filters)
|
|
154
|
+
num_items = items_to_download.items_count
|
|
155
|
+
|
|
156
|
+
if num_items == 0:
|
|
157
|
+
logger.warning('No items found! Nothing was downloaded')
|
|
158
|
+
return list()
|
|
159
|
+
|
|
160
|
+
##############
|
|
161
|
+
# local path #
|
|
162
|
+
##############
|
|
163
|
+
is_folder = False
|
|
164
|
+
if local_path is None:
|
|
165
|
+
# create default local path
|
|
166
|
+
local_path = self.__default_local_path()
|
|
167
|
+
|
|
168
|
+
if os.path.isdir(local_path):
|
|
169
|
+
logger.info('Local folder already exists:{}. merge/overwrite according to "overwrite option"'.format(
|
|
170
|
+
local_path))
|
|
171
|
+
is_folder = True
|
|
172
|
+
else:
|
|
173
|
+
# check if filename
|
|
174
|
+
_, ext = os.path.splitext(local_path)
|
|
175
|
+
if num_items > 1:
|
|
176
|
+
is_folder = True
|
|
177
|
+
else:
|
|
178
|
+
item_to_download = items_to_download[0][0]
|
|
179
|
+
file_name = item_to_download.name
|
|
180
|
+
_, ext_download = os.path.splitext(file_name)
|
|
181
|
+
if ext_download != ext:
|
|
182
|
+
is_folder = True
|
|
183
|
+
if is_folder and save_locally:
|
|
184
|
+
path_to_create = local_path
|
|
185
|
+
if local_path.endswith('*'):
|
|
186
|
+
path_to_create = os.path.dirname(local_path)
|
|
187
|
+
logger.info("Creating new directory for download: {}".format(path_to_create))
|
|
188
|
+
os.makedirs(path_to_create, exist_ok=True)
|
|
189
|
+
|
|
190
|
+
####################
|
|
191
|
+
# annotations json #
|
|
192
|
+
####################
|
|
193
|
+
# download annotations' json files in a new thread
|
|
194
|
+
# items will start downloading and if json not exists yet - will download for each file
|
|
195
|
+
if num_items > 1 and annotation_options:
|
|
196
|
+
# a new folder named 'json' will be created under the "local_path"
|
|
197
|
+
logger.info("Downloading annotations formats: {}".format(annotation_options))
|
|
198
|
+
self.download_annotations(**{
|
|
199
|
+
"dataset": self.items_repository.dataset,
|
|
200
|
+
"filters": filters,
|
|
201
|
+
"annotation_filters": annotation_filters,
|
|
202
|
+
"local_path": local_path,
|
|
203
|
+
'overwrite': overwrite,
|
|
204
|
+
'include_annotations_in_output': include_annotations_in_output,
|
|
205
|
+
'export_png_files': export_png_files,
|
|
206
|
+
'filter_output_annotations': filter_output_annotations,
|
|
207
|
+
'export_version': export_version,
|
|
208
|
+
'dataset_lock': dataset_lock,
|
|
209
|
+
'lock_timeout_sec': lock_timeout_sec,
|
|
210
|
+
'export_summary': export_summary
|
|
211
|
+
})
|
|
212
|
+
###############
|
|
213
|
+
# downloading #
|
|
214
|
+
###############
|
|
215
|
+
# create result lists
|
|
216
|
+
client_api = self.items_repository._client_api
|
|
217
|
+
|
|
218
|
+
reporter = Reporter(num_workers=num_items,
|
|
219
|
+
resource=Reporter.ITEMS_DOWNLOAD,
|
|
220
|
+
print_error_logs=client_api.verbose.print_error_logs,
|
|
221
|
+
client_api=client_api)
|
|
222
|
+
jobs = [None for _ in range(num_items)]
|
|
223
|
+
# pool
|
|
224
|
+
pool = client_api.thread_pools(pool_name='item.download')
|
|
225
|
+
# download
|
|
226
|
+
pbar = tqdm.tqdm(total=num_items, disable=client_api.verbose.disable_progress_bar_download_dataset, file=sys.stdout,
|
|
227
|
+
desc='Download Items')
|
|
228
|
+
try:
|
|
229
|
+
i_item = 0
|
|
230
|
+
for page in items_to_download:
|
|
231
|
+
for item in page:
|
|
232
|
+
if item.type == "dir":
|
|
233
|
+
continue
|
|
234
|
+
if save_locally:
|
|
235
|
+
# get local file path
|
|
236
|
+
item_local_path, item_local_filepath = self.__get_local_filepath(
|
|
237
|
+
local_path=local_path,
|
|
238
|
+
without_relative_path=without_relative_path,
|
|
239
|
+
item=item,
|
|
240
|
+
to_items_folder=to_items_folder,
|
|
241
|
+
is_folder=is_folder)
|
|
242
|
+
|
|
243
|
+
if os.path.isfile(item_local_filepath) and not overwrite:
|
|
244
|
+
logger.debug("File Exists: {}".format(item_local_filepath))
|
|
245
|
+
reporter.set_index(ref=item.id, status='exist', output=item_local_filepath, success=True)
|
|
246
|
+
pbar.update()
|
|
247
|
+
if annotation_options and item.annotated:
|
|
248
|
+
# download annotations only
|
|
249
|
+
jobs[i_item] = pool.submit(
|
|
250
|
+
self._download_img_annotations,
|
|
251
|
+
**{
|
|
252
|
+
"item": item,
|
|
253
|
+
"img_filepath": item_local_filepath,
|
|
254
|
+
"overwrite": overwrite,
|
|
255
|
+
"annotation_options": annotation_options,
|
|
256
|
+
"annotation_filters": annotation_filters,
|
|
257
|
+
"local_path": item_local_path,
|
|
258
|
+
"thickness": thickness,
|
|
259
|
+
"alpha": alpha,
|
|
260
|
+
"with_text": with_text,
|
|
261
|
+
"export_version": export_version,
|
|
262
|
+
},
|
|
263
|
+
)
|
|
264
|
+
i_item += 1
|
|
265
|
+
continue
|
|
266
|
+
else:
|
|
267
|
+
item_local_path = None
|
|
268
|
+
item_local_filepath = None
|
|
269
|
+
|
|
270
|
+
# download single item
|
|
271
|
+
jobs[i_item] = pool.submit(
|
|
272
|
+
self.__thread_download_wrapper,
|
|
273
|
+
**{
|
|
274
|
+
"i_item": i_item,
|
|
275
|
+
"item": item,
|
|
276
|
+
"item_local_path": item_local_path,
|
|
277
|
+
"item_local_filepath": item_local_filepath,
|
|
278
|
+
"save_locally": save_locally,
|
|
279
|
+
"to_array": to_array,
|
|
280
|
+
"annotation_options": annotation_options,
|
|
281
|
+
"annotation_filters": annotation_filters,
|
|
282
|
+
"reporter": reporter,
|
|
283
|
+
"pbar": pbar,
|
|
284
|
+
"overwrite": overwrite,
|
|
285
|
+
"thickness": thickness,
|
|
286
|
+
"alpha": alpha,
|
|
287
|
+
"with_text": with_text,
|
|
288
|
+
"export_version": export_version
|
|
289
|
+
},
|
|
290
|
+
)
|
|
291
|
+
i_item += 1
|
|
292
|
+
except Exception:
|
|
293
|
+
logger.exception('Error downloading:')
|
|
294
|
+
finally:
|
|
295
|
+
_ = [j.result() for j in jobs if j is not None]
|
|
296
|
+
pbar.close()
|
|
297
|
+
# reporting
|
|
298
|
+
n_download = reporter.status_count(status='download')
|
|
299
|
+
n_exist = reporter.status_count(status='exist')
|
|
300
|
+
n_error = reporter.status_count(status='error')
|
|
301
|
+
logger.info("Number of files downloaded:{}".format(n_download))
|
|
302
|
+
logger.info("Number of files exists: {}".format(n_exist))
|
|
303
|
+
logger.info("Total number of files: {}".format(n_download + n_exist))
|
|
304
|
+
|
|
305
|
+
# log error
|
|
306
|
+
if n_error > 0:
|
|
307
|
+
log_filepath = reporter.generate_log_files()
|
|
308
|
+
if log_filepath is not None:
|
|
309
|
+
logger.warning("Errors in {} files. See {} for full log".format(n_error, log_filepath))
|
|
310
|
+
if int(n_download) <= 1 and int(n_exist) <= 1:
|
|
311
|
+
try:
|
|
312
|
+
return next(reporter.output)
|
|
313
|
+
except StopIteration:
|
|
314
|
+
return None
|
|
315
|
+
return reporter.output
|
|
316
|
+
|
|
317
|
+
def __thread_download_wrapper(self, i_item,
|
|
318
|
+
# item params
|
|
319
|
+
item, item_local_path, item_local_filepath,
|
|
320
|
+
save_locally, to_array, overwrite,
|
|
321
|
+
# annotations params
|
|
322
|
+
annotation_options, annotation_filters, with_text, thickness,
|
|
323
|
+
# threading params
|
|
324
|
+
reporter, pbar, alpha, export_version):
|
|
325
|
+
|
|
326
|
+
download = None
|
|
327
|
+
err = None
|
|
328
|
+
trace = None
|
|
329
|
+
for i_try in range(NUM_TRIES):
|
|
330
|
+
try:
|
|
331
|
+
logger.debug("Download item: {path}. Try {i}/{n}. Starting..".format(path=item.filename,
|
|
332
|
+
i=i_try + 1,
|
|
333
|
+
n=NUM_TRIES))
|
|
334
|
+
download = self.__thread_download(item=item,
|
|
335
|
+
save_locally=save_locally,
|
|
336
|
+
to_array=to_array,
|
|
337
|
+
local_path=item_local_path,
|
|
338
|
+
local_filepath=item_local_filepath,
|
|
339
|
+
annotation_options=annotation_options,
|
|
340
|
+
annotation_filters=annotation_filters,
|
|
341
|
+
overwrite=overwrite,
|
|
342
|
+
thickness=thickness,
|
|
343
|
+
alpha=alpha,
|
|
344
|
+
with_text=with_text,
|
|
345
|
+
export_version=export_version)
|
|
346
|
+
logger.debug("Download item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=item.filename,
|
|
347
|
+
i=i_try + 1,
|
|
348
|
+
n=NUM_TRIES,
|
|
349
|
+
id=item.id))
|
|
350
|
+
if download is not None:
|
|
351
|
+
break
|
|
352
|
+
except Exception as e:
|
|
353
|
+
logger.debug("Download item: {path}. Try {i}/{n}. Fail.".format(path=item.filename,
|
|
354
|
+
i=i_try + 1,
|
|
355
|
+
n=NUM_TRIES))
|
|
356
|
+
err = e
|
|
357
|
+
trace = traceback.format_exc()
|
|
358
|
+
pbar.update()
|
|
359
|
+
if download is None:
|
|
360
|
+
if err is None:
|
|
361
|
+
err = self.items_repository._client_api.platform_exception
|
|
362
|
+
reporter.set_index(status="error", ref=item.id, success=False,
|
|
363
|
+
error="{}\n{}".format(err, trace))
|
|
364
|
+
else:
|
|
365
|
+
reporter.set_index(ref=item.id, status="download", output=download, success=True)
|
|
366
|
+
|
|
367
|
+
@staticmethod
|
|
368
|
+
def download_annotations(dataset: entities.Dataset,
|
|
369
|
+
local_path: str,
|
|
370
|
+
filters: entities.Filters = None,
|
|
371
|
+
annotation_filters: entities.Filters = None,
|
|
372
|
+
overwrite=False,
|
|
373
|
+
include_annotations_in_output=True,
|
|
374
|
+
export_png_files=False,
|
|
375
|
+
filter_output_annotations=False,
|
|
376
|
+
export_version=entities.ExportVersion.V1,
|
|
377
|
+
dataset_lock=False,
|
|
378
|
+
lock_timeout_sec=None,
|
|
379
|
+
export_summary=False
|
|
380
|
+
):
|
|
381
|
+
"""
|
|
382
|
+
Download annotations json for entire dataset
|
|
383
|
+
|
|
384
|
+
:param dataset: Dataset entity
|
|
385
|
+
:param local_path:
|
|
386
|
+
:param dtlpy.entities.filters.Filters filters: dl.Filters entity to filters items
|
|
387
|
+
:param annotation_filters: dl.Filters entity to filters items' annotations
|
|
388
|
+
:param overwrite: optional - overwrite annotations if exist, default = false
|
|
389
|
+
:param include_annotations_in_output: default - True , if export should contain annotations
|
|
390
|
+
:param export_png_files: default - if True, semantic annotations should be exported as png files
|
|
391
|
+
:param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
|
|
392
|
+
:param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
393
|
+
:param bool dataset_lock: optional - default = False
|
|
394
|
+
:param bool export_summary: optional - default = False
|
|
395
|
+
:param int lock_timeout_sec: optional
|
|
396
|
+
:return:
|
|
397
|
+
"""
|
|
398
|
+
local_path = os.path.join(local_path, "json")
|
|
399
|
+
zip_filepath = None
|
|
400
|
+
# only if json folder does not exist or exist and overwrite
|
|
401
|
+
if not os.path.isdir(os.path.join(local_path, 'json')) or overwrite:
|
|
402
|
+
# create local path to download and save to
|
|
403
|
+
if not os.path.isdir(local_path):
|
|
404
|
+
os.makedirs(local_path)
|
|
405
|
+
|
|
406
|
+
try:
|
|
407
|
+
payload = dict()
|
|
408
|
+
if filters is not None:
|
|
409
|
+
payload['itemsQuery'] = filters.prepare()
|
|
410
|
+
payload['annotations'] = {
|
|
411
|
+
"include": include_annotations_in_output,
|
|
412
|
+
"convertSemantic": export_png_files
|
|
413
|
+
}
|
|
414
|
+
payload['exportVersion'] = export_version
|
|
415
|
+
if annotation_filters is not None:
|
|
416
|
+
payload['annotationsQuery'] = annotation_filters.prepare()
|
|
417
|
+
payload['annotations']['filter'] = filter_output_annotations
|
|
418
|
+
if dataset_lock:
|
|
419
|
+
payload['datasetLock'] = dataset_lock
|
|
420
|
+
|
|
421
|
+
if export_summary:
|
|
422
|
+
payload['summary'] = export_summary
|
|
423
|
+
|
|
424
|
+
if lock_timeout_sec:
|
|
425
|
+
payload['lockTimeoutSec'] = lock_timeout_sec
|
|
426
|
+
|
|
427
|
+
success, response = dataset._client_api.gen_request(req_type='post',
|
|
428
|
+
path='/datasets/{}/export'.format(dataset.id),
|
|
429
|
+
json_req=payload,
|
|
430
|
+
headers={'user_query': filters._user_query})
|
|
431
|
+
if not success:
|
|
432
|
+
raise exceptions.PlatformException(response)
|
|
433
|
+
command = entities.Command.from_json(_json=response.json(),
|
|
434
|
+
client_api=dataset._client_api)
|
|
435
|
+
command = command.wait(timeout=0)
|
|
436
|
+
if 'outputItemId' not in command.spec:
|
|
437
|
+
raise exceptions.PlatformException(
|
|
438
|
+
error='400',
|
|
439
|
+
message="outputItemId key is missing in command response: {}".format(response))
|
|
440
|
+
item_id = command.spec['outputItemId']
|
|
441
|
+
annotation_zip_item = repositories.Items(client_api=dataset._client_api).get(item_id=item_id)
|
|
442
|
+
zip_filepath = annotation_zip_item.download(local_path=local_path, export_version=export_version)
|
|
443
|
+
# unzipping annotations to directory
|
|
444
|
+
if isinstance(zip_filepath, list) or not os.path.isfile(zip_filepath):
|
|
445
|
+
raise exceptions.PlatformException(
|
|
446
|
+
error='404',
|
|
447
|
+
message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
|
|
448
|
+
annotation_zip_item.id))
|
|
449
|
+
try:
|
|
450
|
+
miscellaneous.Zipping.unzip_directory(zip_filename=zip_filepath,
|
|
451
|
+
to_directory=local_path)
|
|
452
|
+
except Exception as e:
|
|
453
|
+
logger.warning("Failed to extract zip file error: {}".format(e))
|
|
454
|
+
|
|
455
|
+
finally:
|
|
456
|
+
# cleanup
|
|
457
|
+
if isinstance(zip_filepath, str) and os.path.isfile(zip_filepath):
|
|
458
|
+
os.remove(zip_filepath)
|
|
459
|
+
|
|
460
|
+
@staticmethod
|
|
461
|
+
def _download_img_annotations(item: entities.Item,
|
|
462
|
+
img_filepath,
|
|
463
|
+
local_path,
|
|
464
|
+
overwrite,
|
|
465
|
+
annotation_options,
|
|
466
|
+
annotation_filters,
|
|
467
|
+
thickness=1,
|
|
468
|
+
with_text=False,
|
|
469
|
+
alpha=1,
|
|
470
|
+
export_version=entities.ExportVersion.V1
|
|
471
|
+
):
|
|
472
|
+
|
|
473
|
+
# check if local_path is a file name
|
|
474
|
+
_, ext = os.path.splitext(local_path)
|
|
475
|
+
if ext:
|
|
476
|
+
# take the dir of the file for the annotations save
|
|
477
|
+
local_path = os.path.dirname(local_path)
|
|
478
|
+
|
|
479
|
+
# fix local path
|
|
480
|
+
if local_path.endswith("/items") or local_path.endswith("\\items"):
|
|
481
|
+
local_path = os.path.dirname(local_path)
|
|
482
|
+
|
|
483
|
+
annotation_rel_path = item.filename[1:]
|
|
484
|
+
if img_filepath is not None:
|
|
485
|
+
dir_name = os.path.dirname(annotation_rel_path)
|
|
486
|
+
base_name = os.path.basename(img_filepath)
|
|
487
|
+
annotation_rel_path = os.path.join(dir_name, base_name)
|
|
488
|
+
|
|
489
|
+
# find annotations json
|
|
490
|
+
annotations_json_filepath = os.path.join(local_path, "json", annotation_rel_path)
|
|
491
|
+
if export_version == entities.ExportVersion.V1:
|
|
492
|
+
name, _ = os.path.splitext(annotations_json_filepath)
|
|
493
|
+
else:
|
|
494
|
+
name = annotations_json_filepath
|
|
495
|
+
annotations_json_filepath = name + ".json"
|
|
496
|
+
|
|
497
|
+
if os.path.isfile(annotations_json_filepath) and annotation_filters is None:
|
|
498
|
+
# if exists take from json file
|
|
499
|
+
with open(annotations_json_filepath, "r", encoding="utf8") as f:
|
|
500
|
+
data = json.load(f)
|
|
501
|
+
if "annotations" in data:
|
|
502
|
+
data = data["annotations"]
|
|
503
|
+
annotations = entities.AnnotationCollection.from_json(_json=data, item=item)
|
|
504
|
+
# no need to use the filters here because the annotations were already downloaded with annotation_filters
|
|
505
|
+
else:
|
|
506
|
+
# if json file doesnt exist get the annotations from platform
|
|
507
|
+
annotations = item.annotations.list(filters=annotation_filters)
|
|
508
|
+
|
|
509
|
+
# get image shape
|
|
510
|
+
is_url_item = item.metadata. \
|
|
511
|
+
get('system', dict()). \
|
|
512
|
+
get('shebang', dict()). \
|
|
513
|
+
get('linkInfo', dict()). \
|
|
514
|
+
get('type', None) == 'url'
|
|
515
|
+
|
|
516
|
+
if item is not None:
|
|
517
|
+
orientation = item.system.get('exif', {}).get('Orientation', 0)
|
|
518
|
+
else:
|
|
519
|
+
orientation = 0
|
|
520
|
+
if item.width is not None and item.height is not None:
|
|
521
|
+
if orientation in [5, 6, 7, 8]:
|
|
522
|
+
img_shape = (item.width, item.height)
|
|
523
|
+
else:
|
|
524
|
+
img_shape = (item.height, item.width)
|
|
525
|
+
elif ('image' in item.mimetype and img_filepath is not None) or \
|
|
526
|
+
(is_url_item and img_filepath is not None):
|
|
527
|
+
img_shape = Image.open(img_filepath).size[::-1]
|
|
528
|
+
else:
|
|
529
|
+
img_shape = (0, 0)
|
|
530
|
+
|
|
531
|
+
# download all annotation options
|
|
532
|
+
for option in annotation_options:
|
|
533
|
+
# get path and create dirs
|
|
534
|
+
annotation_filepath = os.path.join(local_path, option, annotation_rel_path)
|
|
535
|
+
if not os.path.isdir(os.path.dirname(annotation_filepath)):
|
|
536
|
+
os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
|
|
537
|
+
|
|
538
|
+
if export_version == entities.ExportVersion.V1:
|
|
539
|
+
temp_path, ext = os.path.splitext(annotation_filepath)
|
|
540
|
+
else:
|
|
541
|
+
temp_path = annotation_filepath
|
|
542
|
+
|
|
543
|
+
if option == entities.ViewAnnotationOptions.JSON:
|
|
544
|
+
if not os.path.isfile(annotations_json_filepath):
|
|
545
|
+
annotations.download(
|
|
546
|
+
filepath=annotations_json_filepath,
|
|
547
|
+
annotation_format=option,
|
|
548
|
+
height=img_shape[0],
|
|
549
|
+
width=img_shape[1],
|
|
550
|
+
)
|
|
551
|
+
elif option in [entities.ViewAnnotationOptions.MASK,
|
|
552
|
+
entities.ViewAnnotationOptions.INSTANCE,
|
|
553
|
+
entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE,
|
|
554
|
+
entities.ViewAnnotationOptions.OBJECT_ID,
|
|
555
|
+
entities.ViewAnnotationOptions.VTT]:
|
|
556
|
+
if option == entities.ViewAnnotationOptions.VTT:
|
|
557
|
+
annotation_filepath = temp_path + ".vtt"
|
|
558
|
+
else:
|
|
559
|
+
if 'video' in item.mimetype:
|
|
560
|
+
annotation_filepath = temp_path + ".mp4"
|
|
561
|
+
else:
|
|
562
|
+
annotation_filepath = temp_path + ".png"
|
|
563
|
+
if not os.path.isfile(annotation_filepath) or overwrite:
|
|
564
|
+
# if not exists OR (exists AND overwrite)
|
|
565
|
+
if not os.path.exists(os.path.dirname(annotation_filepath)):
|
|
566
|
+
# create folder if not exists
|
|
567
|
+
os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
|
|
568
|
+
if option == entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE and img_filepath is None:
|
|
569
|
+
raise PlatformException(
|
|
570
|
+
error="1002",
|
|
571
|
+
message="Missing image for annotation option dl.ViewAnnotationOptions.ANNOTATION_ON_IMAGE")
|
|
572
|
+
annotations.download(
|
|
573
|
+
filepath=annotation_filepath,
|
|
574
|
+
img_filepath=img_filepath,
|
|
575
|
+
annotation_format=option,
|
|
576
|
+
height=img_shape[0],
|
|
577
|
+
width=img_shape[1],
|
|
578
|
+
thickness=thickness,
|
|
579
|
+
alpha=alpha,
|
|
580
|
+
with_text=with_text,
|
|
581
|
+
orientation=orientation
|
|
582
|
+
)
|
|
583
|
+
else:
|
|
584
|
+
raise PlatformException(error="400", message="Unknown annotation option: {}".format(option))
|
|
585
|
+
|
|
586
|
+
@staticmethod
|
|
587
|
+
def __get_local_filepath(local_path, item, to_items_folder, without_relative_path=None, is_folder=False):
|
|
588
|
+
# create paths
|
|
589
|
+
_, ext = os.path.splitext(local_path)
|
|
590
|
+
if ext and not is_folder:
|
|
591
|
+
# local_path is a filename
|
|
592
|
+
local_filepath = local_path
|
|
593
|
+
local_path = os.path.dirname(local_filepath)
|
|
594
|
+
else:
|
|
595
|
+
# if directory - get item's filename
|
|
596
|
+
if to_items_folder:
|
|
597
|
+
local_path = os.path.join(local_path, "items")
|
|
598
|
+
elif is_folder:
|
|
599
|
+
local_path = os.path.join(local_path, "")
|
|
600
|
+
if without_relative_path is not None:
|
|
601
|
+
local_filepath = os.path.join(local_path, item.name)
|
|
602
|
+
else:
|
|
603
|
+
local_filepath = os.path.join(local_path, item.filename[1:])
|
|
604
|
+
return local_path, local_filepath
|
|
605
|
+
|
|
606
|
+
@staticmethod
|
|
607
|
+
def __get_link_source(item):
|
|
608
|
+
assert isinstance(item, entities.Item)
|
|
609
|
+
if not item.is_fetched:
|
|
610
|
+
return item, '', False
|
|
611
|
+
|
|
612
|
+
if not item.filename.endswith('.json') or \
|
|
613
|
+
item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') != 'link':
|
|
614
|
+
return item, '', False
|
|
615
|
+
|
|
616
|
+
# recursively get next id link item
|
|
617
|
+
while item.filename.endswith('.json') and \
|
|
618
|
+
item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
|
|
619
|
+
item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'id':
|
|
620
|
+
item = item.dataset.items.get(item_id=item.metadata['system']['shebang']['linkInfo']['ref'])
|
|
621
|
+
|
|
622
|
+
# check if link
|
|
623
|
+
if item.filename.endswith('.json') and \
|
|
624
|
+
item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
|
|
625
|
+
item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'url':
|
|
626
|
+
url = item.metadata['system']['shebang']['linkInfo']['ref']
|
|
627
|
+
return item, url, True
|
|
628
|
+
else:
|
|
629
|
+
return item, '', False
|
|
630
|
+
|
|
631
|
+
def __file_validation(self, item, downloaded_file):
|
|
632
|
+
res = False
|
|
633
|
+
resume = True
|
|
634
|
+
if isinstance(downloaded_file, io.BytesIO):
|
|
635
|
+
file_size = downloaded_file.getbuffer().nbytes
|
|
636
|
+
else:
|
|
637
|
+
file_size = os.stat(downloaded_file).st_size
|
|
638
|
+
expected_size = item.metadata['system']['size']
|
|
639
|
+
size_diff = file_size - expected_size
|
|
640
|
+
if size_diff == 0:
|
|
641
|
+
res = True
|
|
642
|
+
if size_diff > 0:
|
|
643
|
+
resume = False
|
|
644
|
+
return res, file_size, resume
|
|
645
|
+
|
|
646
|
+
def __thread_download(self,
|
|
647
|
+
item,
|
|
648
|
+
save_locally,
|
|
649
|
+
local_path,
|
|
650
|
+
to_array,
|
|
651
|
+
local_filepath,
|
|
652
|
+
overwrite,
|
|
653
|
+
annotation_options,
|
|
654
|
+
annotation_filters,
|
|
655
|
+
chunk_size=8192,
|
|
656
|
+
thickness=1,
|
|
657
|
+
with_text=False,
|
|
658
|
+
alpha=1,
|
|
659
|
+
export_version=entities.ExportVersion.V1
|
|
660
|
+
):
|
|
661
|
+
"""
|
|
662
|
+
Get a single item's binary data
|
|
663
|
+
Calling this method will returns the item body itself , an image for example with the proper mimetype.
|
|
664
|
+
|
|
665
|
+
:param item: Item entity to download
|
|
666
|
+
:param save_locally: bool. save to file or return buffer
|
|
667
|
+
:param local_path: item local folder to save to.
|
|
668
|
+
:param to_array: returns Ndarray when True and local_path = False
|
|
669
|
+
:param local_filepath: item local filepath
|
|
670
|
+
:param overwrite: overwrite the file is existing
|
|
671
|
+
:param annotation_options: download annotations options: list(dl.ViewAnnotationOptions)
|
|
672
|
+
:param annotation_filters: Filters entity to filter item's annotation
|
|
673
|
+
:param chunk_size: size of chunks to download - optional. default = 8192
|
|
674
|
+
:param thickness: optional - line thickness, if -1 annotation will be filled, default =1
|
|
675
|
+
:param with_text: optional - add text to annotations, default = False
|
|
676
|
+
:param alpha: opacity value [0 1], default 1
|
|
677
|
+
:param ExportVersion export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
678
|
+
:return:
|
|
679
|
+
"""
|
|
680
|
+
# check if need to download image binary from platform
|
|
681
|
+
need_to_download = True
|
|
682
|
+
if save_locally and os.path.isfile(local_filepath):
|
|
683
|
+
need_to_download = overwrite
|
|
684
|
+
|
|
685
|
+
item, url, is_url = self.__get_link_source(item=item)
|
|
686
|
+
|
|
687
|
+
# save as byte stream
|
|
688
|
+
data = io.BytesIO()
|
|
689
|
+
if need_to_download:
|
|
690
|
+
chunk_resume = {0: 0}
|
|
691
|
+
start_point = 0
|
|
692
|
+
download_done = False
|
|
693
|
+
while chunk_resume.get(start_point, '') != 3 and not download_done:
|
|
694
|
+
if not is_url:
|
|
695
|
+
headers = {'x-dl-sanitize': '0', 'Range': 'bytes={}-'.format(start_point)}
|
|
696
|
+
result, response = self.items_repository._client_api.gen_request(req_type="get",
|
|
697
|
+
headers=headers,
|
|
698
|
+
path="/items/{}/stream".format(
|
|
699
|
+
item.id),
|
|
700
|
+
stream=True,
|
|
701
|
+
dataset_id=item.dataset_id)
|
|
702
|
+
if not result:
|
|
703
|
+
if os.path.isfile(local_filepath + '.download'):
|
|
704
|
+
os.remove(local_filepath + '.download')
|
|
705
|
+
raise PlatformException(response)
|
|
706
|
+
else:
|
|
707
|
+
_, ext = os.path.splitext(item.metadata['system']['shebang']['linkInfo']['ref'].split('?')[0])
|
|
708
|
+
if local_filepath:
|
|
709
|
+
local_filepath += ext
|
|
710
|
+
response = self.get_url_stream(url=url)
|
|
711
|
+
|
|
712
|
+
if save_locally:
|
|
713
|
+
# save to file
|
|
714
|
+
if not os.path.exists(os.path.dirname(local_filepath)):
|
|
715
|
+
# create folder if not exists
|
|
716
|
+
os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
|
|
717
|
+
|
|
718
|
+
# decide if create progress bar for item
|
|
719
|
+
total_length = response.headers.get("content-length")
|
|
720
|
+
one_file_pbar = None
|
|
721
|
+
try:
|
|
722
|
+
one_file_progress_bar = total_length is not None and int(
|
|
723
|
+
total_length) > 10e6 # size larger than 10 MB
|
|
724
|
+
if one_file_progress_bar:
|
|
725
|
+
one_file_pbar = tqdm.tqdm(total=int(total_length),
|
|
726
|
+
unit='B',
|
|
727
|
+
unit_scale=True,
|
|
728
|
+
unit_divisor=1024,
|
|
729
|
+
position=1,
|
|
730
|
+
file=sys.stdout,
|
|
731
|
+
disable=self.items_repository._client_api.verbose.disable_progress_bar_download_item,
|
|
732
|
+
desc='Download Item')
|
|
733
|
+
except Exception as err:
|
|
734
|
+
one_file_progress_bar = False
|
|
735
|
+
logger.debug('Cant decide downloaded file length, bar will not be presented: {}'.format(err))
|
|
736
|
+
|
|
737
|
+
# start download
|
|
738
|
+
if self.items_repository._client_api.sdk_cache.use_cache and \
|
|
739
|
+
self.items_repository._client_api.cache is not None:
|
|
740
|
+
response_output = os.path.normpath(response.content)
|
|
741
|
+
if isinstance(response_output, bytes):
|
|
742
|
+
response_output = response_output.decode('utf-8')[1:-1]
|
|
743
|
+
|
|
744
|
+
if os.path.isfile(os.path.normpath(response_output)):
|
|
745
|
+
if response_output != local_filepath:
|
|
746
|
+
source_path = os.path.normpath(response_output)
|
|
747
|
+
shutil.copyfile(source_path, local_filepath)
|
|
748
|
+
else:
|
|
749
|
+
try:
|
|
750
|
+
temp_file_path = local_filepath + '.download'
|
|
751
|
+
with open(temp_file_path, "ab") as f:
|
|
752
|
+
try:
|
|
753
|
+
for chunk in response.iter_content(chunk_size=chunk_size):
|
|
754
|
+
if chunk: # filter out keep-alive new chunks
|
|
755
|
+
f.write(chunk)
|
|
756
|
+
if one_file_progress_bar:
|
|
757
|
+
one_file_pbar.update(len(chunk))
|
|
758
|
+
except Exception as err:
|
|
759
|
+
pass
|
|
760
|
+
|
|
761
|
+
file_validation = True
|
|
762
|
+
if not is_url:
|
|
763
|
+
file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
|
|
764
|
+
download_progress=temp_file_path,
|
|
765
|
+
chunk_resume=chunk_resume)
|
|
766
|
+
if file_validation:
|
|
767
|
+
shutil.move(temp_file_path, local_filepath)
|
|
768
|
+
download_done = True
|
|
769
|
+
except Exception as err:
|
|
770
|
+
if os.path.isfile(temp_file_path):
|
|
771
|
+
os.remove(temp_file_path)
|
|
772
|
+
raise err
|
|
773
|
+
if one_file_progress_bar:
|
|
774
|
+
one_file_pbar.close()
|
|
775
|
+
# save to output variable
|
|
776
|
+
data = local_filepath
|
|
777
|
+
# if image - can download annotation mask
|
|
778
|
+
if item.annotated and annotation_options:
|
|
779
|
+
self._download_img_annotations(item=item,
|
|
780
|
+
img_filepath=local_filepath,
|
|
781
|
+
annotation_options=annotation_options,
|
|
782
|
+
annotation_filters=annotation_filters,
|
|
783
|
+
local_path=local_path,
|
|
784
|
+
overwrite=overwrite,
|
|
785
|
+
thickness=thickness,
|
|
786
|
+
alpha=alpha,
|
|
787
|
+
with_text=with_text,
|
|
788
|
+
export_version=export_version
|
|
789
|
+
)
|
|
790
|
+
else:
|
|
791
|
+
if self.items_repository._client_api.sdk_cache.use_cache and \
|
|
792
|
+
self.items_repository._client_api.cache is not None:
|
|
793
|
+
response_output = os.path.normpath(response.content)
|
|
794
|
+
if isinstance(response_output, bytes):
|
|
795
|
+
response_output = response_output.decode('utf-8')[1:-1]
|
|
796
|
+
|
|
797
|
+
if os.path.isfile(response_output):
|
|
798
|
+
source_file = response_output
|
|
799
|
+
with open(source_file, 'wb') as f:
|
|
800
|
+
data = f.read()
|
|
801
|
+
else:
|
|
802
|
+
try:
|
|
803
|
+
for chunk in response.iter_content(chunk_size=chunk_size):
|
|
804
|
+
if chunk: # filter out keep-alive new chunks
|
|
805
|
+
data.write(chunk)
|
|
806
|
+
|
|
807
|
+
file_validation = True
|
|
808
|
+
if not is_url:
|
|
809
|
+
file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
|
|
810
|
+
download_progress=data,
|
|
811
|
+
chunk_resume=chunk_resume)
|
|
812
|
+
if file_validation:
|
|
813
|
+
download_done = True
|
|
814
|
+
else:
|
|
815
|
+
continue
|
|
816
|
+
except Exception as err:
|
|
817
|
+
raise err
|
|
818
|
+
# go back to the beginning of the stream
|
|
819
|
+
data.seek(0)
|
|
820
|
+
data.name = item.name
|
|
821
|
+
if not save_locally and to_array:
|
|
822
|
+
if 'image' not in item.mimetype and not is_url:
|
|
823
|
+
raise PlatformException(
|
|
824
|
+
error="400",
|
|
825
|
+
message='Download element type numpy.ndarray support for image only. '
|
|
826
|
+
'Item Id: {} is {} type'.format(item.id, item.mimetype))
|
|
827
|
+
|
|
828
|
+
data = np.array(Image.open(data))
|
|
829
|
+
else:
|
|
830
|
+
data = local_filepath
|
|
831
|
+
return data
|
|
832
|
+
|
|
833
|
+
def __get_next_chunk(self, item, download_progress, chunk_resume):
|
|
834
|
+
size_validation, file_size, resume = self.__file_validation(item=item,
|
|
835
|
+
downloaded_file=download_progress)
|
|
836
|
+
start_point = file_size
|
|
837
|
+
if not size_validation:
|
|
838
|
+
if chunk_resume.get(start_point, None) is None:
|
|
839
|
+
chunk_resume = {start_point: 1}
|
|
840
|
+
else:
|
|
841
|
+
chunk_resume[start_point] += 1
|
|
842
|
+
if chunk_resume[start_point] == 3 or not resume:
|
|
843
|
+
raise PlatformException(
|
|
844
|
+
error=500,
|
|
845
|
+
message='The downloaded file is corrupted. Please try again. If the issue repeats please contact support.')
|
|
846
|
+
return size_validation, start_point, chunk_resume
|
|
847
|
+
|
|
848
|
+
def __default_local_path(self):
|
|
849
|
+
|
|
850
|
+
# create default local path
|
|
851
|
+
if self.items_repository._dataset is None:
|
|
852
|
+
local_path = os.path.join(
|
|
853
|
+
self.items_repository._client_api.sdk_cache.cache_path_bin,
|
|
854
|
+
"items",
|
|
855
|
+
)
|
|
856
|
+
else:
|
|
857
|
+
if self.items_repository.dataset._project is None:
|
|
858
|
+
# by dataset name
|
|
859
|
+
local_path = os.path.join(
|
|
860
|
+
self.items_repository._client_api.sdk_cache.cache_path_bin,
|
|
861
|
+
"datasets",
|
|
862
|
+
"{}_{}".format(self.items_repository.dataset.name, self.items_repository.dataset.id),
|
|
863
|
+
)
|
|
864
|
+
else:
|
|
865
|
+
# by dataset and project name
|
|
866
|
+
local_path = os.path.join(
|
|
867
|
+
self.items_repository._client_api.sdk_cache.cache_path_bin,
|
|
868
|
+
"projects",
|
|
869
|
+
self.items_repository.dataset.project.name,
|
|
870
|
+
"datasets",
|
|
871
|
+
self.items_repository.dataset.name,
|
|
872
|
+
)
|
|
873
|
+
logger.info("Downloading to: {}".format(local_path))
|
|
874
|
+
return local_path
|
|
875
|
+
|
|
876
|
+
@staticmethod
|
|
877
|
+
def get_url_stream(url):
|
|
878
|
+
"""
|
|
879
|
+
:param url:
|
|
880
|
+
"""
|
|
881
|
+
# This will download the binaries from the URL user provided
|
|
882
|
+
prepared_request = requests.Request(method='GET', url=url).prepare()
|
|
883
|
+
with requests.Session() as s:
|
|
884
|
+
retry = Retry(
|
|
885
|
+
total=3,
|
|
886
|
+
read=3,
|
|
887
|
+
connect=3,
|
|
888
|
+
backoff_factor=1,
|
|
889
|
+
)
|
|
890
|
+
adapter = HTTPAdapter(max_retries=retry)
|
|
891
|
+
s.mount('http://', adapter)
|
|
892
|
+
s.mount('https://', adapter)
|
|
893
|
+
response = s.send(request=prepared_request, stream=True)
|
|
894
|
+
|
|
895
|
+
return response
|