dtlpy 1.113.10__py3-none-any.whl → 1.114.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. dtlpy/__init__.py +488 -488
  2. dtlpy/__version__.py +1 -1
  3. dtlpy/assets/__init__.py +26 -26
  4. dtlpy/assets/__pycache__/__init__.cpython-38.pyc +0 -0
  5. dtlpy/assets/code_server/config.yaml +2 -2
  6. dtlpy/assets/code_server/installation.sh +24 -24
  7. dtlpy/assets/code_server/launch.json +13 -13
  8. dtlpy/assets/code_server/settings.json +2 -2
  9. dtlpy/assets/main.py +53 -53
  10. dtlpy/assets/main_partial.py +18 -18
  11. dtlpy/assets/mock.json +11 -11
  12. dtlpy/assets/model_adapter.py +83 -83
  13. dtlpy/assets/package.json +61 -61
  14. dtlpy/assets/package_catalog.json +29 -29
  15. dtlpy/assets/package_gitignore +307 -307
  16. dtlpy/assets/service_runners/__init__.py +33 -33
  17. dtlpy/assets/service_runners/converter.py +96 -96
  18. dtlpy/assets/service_runners/multi_method.py +49 -49
  19. dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
  20. dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
  21. dtlpy/assets/service_runners/multi_method_item.py +52 -52
  22. dtlpy/assets/service_runners/multi_method_json.py +52 -52
  23. dtlpy/assets/service_runners/single_method.py +37 -37
  24. dtlpy/assets/service_runners/single_method_annotation.py +43 -43
  25. dtlpy/assets/service_runners/single_method_dataset.py +43 -43
  26. dtlpy/assets/service_runners/single_method_item.py +41 -41
  27. dtlpy/assets/service_runners/single_method_json.py +42 -42
  28. dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
  29. dtlpy/assets/voc_annotation_template.xml +23 -23
  30. dtlpy/caches/base_cache.py +32 -32
  31. dtlpy/caches/cache.py +473 -473
  32. dtlpy/caches/dl_cache.py +201 -201
  33. dtlpy/caches/filesystem_cache.py +89 -89
  34. dtlpy/caches/redis_cache.py +84 -84
  35. dtlpy/dlp/__init__.py +20 -20
  36. dtlpy/dlp/cli_utilities.py +367 -367
  37. dtlpy/dlp/command_executor.py +764 -764
  38. dtlpy/dlp/dlp +1 -1
  39. dtlpy/dlp/dlp.bat +1 -1
  40. dtlpy/dlp/dlp.py +128 -128
  41. dtlpy/dlp/parser.py +651 -651
  42. dtlpy/entities/__init__.py +83 -83
  43. dtlpy/entities/analytic.py +311 -311
  44. dtlpy/entities/annotation.py +1879 -1879
  45. dtlpy/entities/annotation_collection.py +699 -699
  46. dtlpy/entities/annotation_definitions/__init__.py +20 -20
  47. dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
  48. dtlpy/entities/annotation_definitions/box.py +195 -195
  49. dtlpy/entities/annotation_definitions/classification.py +67 -67
  50. dtlpy/entities/annotation_definitions/comparison.py +72 -72
  51. dtlpy/entities/annotation_definitions/cube.py +204 -204
  52. dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
  53. dtlpy/entities/annotation_definitions/description.py +32 -32
  54. dtlpy/entities/annotation_definitions/ellipse.py +124 -124
  55. dtlpy/entities/annotation_definitions/free_text.py +62 -62
  56. dtlpy/entities/annotation_definitions/gis.py +69 -69
  57. dtlpy/entities/annotation_definitions/note.py +139 -139
  58. dtlpy/entities/annotation_definitions/point.py +117 -117
  59. dtlpy/entities/annotation_definitions/polygon.py +182 -182
  60. dtlpy/entities/annotation_definitions/polyline.py +111 -111
  61. dtlpy/entities/annotation_definitions/pose.py +92 -92
  62. dtlpy/entities/annotation_definitions/ref_image.py +86 -86
  63. dtlpy/entities/annotation_definitions/segmentation.py +240 -240
  64. dtlpy/entities/annotation_definitions/subtitle.py +34 -34
  65. dtlpy/entities/annotation_definitions/text.py +85 -85
  66. dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
  67. dtlpy/entities/app.py +220 -220
  68. dtlpy/entities/app_module.py +107 -107
  69. dtlpy/entities/artifact.py +174 -174
  70. dtlpy/entities/assignment.py +399 -399
  71. dtlpy/entities/base_entity.py +214 -214
  72. dtlpy/entities/bot.py +113 -113
  73. dtlpy/entities/codebase.py +296 -296
  74. dtlpy/entities/collection.py +38 -38
  75. dtlpy/entities/command.py +169 -169
  76. dtlpy/entities/compute.py +442 -442
  77. dtlpy/entities/dataset.py +1285 -1285
  78. dtlpy/entities/directory_tree.py +44 -44
  79. dtlpy/entities/dpk.py +470 -470
  80. dtlpy/entities/driver.py +222 -222
  81. dtlpy/entities/execution.py +397 -397
  82. dtlpy/entities/feature.py +124 -124
  83. dtlpy/entities/feature_set.py +145 -145
  84. dtlpy/entities/filters.py +641 -641
  85. dtlpy/entities/gis_item.py +107 -107
  86. dtlpy/entities/integration.py +184 -184
  87. dtlpy/entities/item.py +953 -953
  88. dtlpy/entities/label.py +123 -123
  89. dtlpy/entities/links.py +85 -85
  90. dtlpy/entities/message.py +175 -175
  91. dtlpy/entities/model.py +694 -691
  92. dtlpy/entities/node.py +1005 -1005
  93. dtlpy/entities/ontology.py +803 -803
  94. dtlpy/entities/organization.py +287 -287
  95. dtlpy/entities/package.py +657 -657
  96. dtlpy/entities/package_defaults.py +5 -5
  97. dtlpy/entities/package_function.py +185 -185
  98. dtlpy/entities/package_module.py +113 -113
  99. dtlpy/entities/package_slot.py +118 -118
  100. dtlpy/entities/paged_entities.py +290 -267
  101. dtlpy/entities/pipeline.py +593 -593
  102. dtlpy/entities/pipeline_execution.py +279 -279
  103. dtlpy/entities/project.py +394 -394
  104. dtlpy/entities/prompt_item.py +499 -499
  105. dtlpy/entities/recipe.py +301 -301
  106. dtlpy/entities/reflect_dict.py +102 -102
  107. dtlpy/entities/resource_execution.py +138 -138
  108. dtlpy/entities/service.py +958 -958
  109. dtlpy/entities/service_driver.py +117 -117
  110. dtlpy/entities/setting.py +294 -294
  111. dtlpy/entities/task.py +491 -491
  112. dtlpy/entities/time_series.py +143 -143
  113. dtlpy/entities/trigger.py +426 -426
  114. dtlpy/entities/user.py +118 -118
  115. dtlpy/entities/webhook.py +124 -124
  116. dtlpy/examples/__init__.py +19 -19
  117. dtlpy/examples/add_labels.py +135 -135
  118. dtlpy/examples/add_metadata_to_item.py +21 -21
  119. dtlpy/examples/annotate_items_using_model.py +65 -65
  120. dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
  121. dtlpy/examples/annotations_convert_to_voc.py +9 -9
  122. dtlpy/examples/annotations_convert_to_yolo.py +9 -9
  123. dtlpy/examples/convert_annotation_types.py +51 -51
  124. dtlpy/examples/converter.py +143 -143
  125. dtlpy/examples/copy_annotations.py +22 -22
  126. dtlpy/examples/copy_folder.py +31 -31
  127. dtlpy/examples/create_annotations.py +51 -51
  128. dtlpy/examples/create_video_annotations.py +83 -83
  129. dtlpy/examples/delete_annotations.py +26 -26
  130. dtlpy/examples/filters.py +113 -113
  131. dtlpy/examples/move_item.py +23 -23
  132. dtlpy/examples/play_video_annotation.py +13 -13
  133. dtlpy/examples/show_item_and_mask.py +53 -53
  134. dtlpy/examples/triggers.py +49 -49
  135. dtlpy/examples/upload_batch_of_items.py +20 -20
  136. dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
  137. dtlpy/examples/upload_items_with_modalities.py +43 -43
  138. dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
  139. dtlpy/examples/upload_yolo_format_annotations.py +70 -70
  140. dtlpy/exceptions.py +125 -125
  141. dtlpy/miscellaneous/__init__.py +20 -20
  142. dtlpy/miscellaneous/dict_differ.py +95 -95
  143. dtlpy/miscellaneous/git_utils.py +217 -217
  144. dtlpy/miscellaneous/json_utils.py +14 -14
  145. dtlpy/miscellaneous/list_print.py +105 -105
  146. dtlpy/miscellaneous/zipping.py +130 -130
  147. dtlpy/ml/__init__.py +20 -20
  148. dtlpy/ml/base_feature_extractor_adapter.py +27 -27
  149. dtlpy/ml/base_model_adapter.py +945 -940
  150. dtlpy/ml/metrics.py +461 -461
  151. dtlpy/ml/predictions_utils.py +274 -274
  152. dtlpy/ml/summary_writer.py +57 -57
  153. dtlpy/ml/train_utils.py +60 -60
  154. dtlpy/new_instance.py +252 -252
  155. dtlpy/repositories/__init__.py +56 -56
  156. dtlpy/repositories/analytics.py +85 -85
  157. dtlpy/repositories/annotations.py +916 -916
  158. dtlpy/repositories/apps.py +383 -383
  159. dtlpy/repositories/artifacts.py +452 -452
  160. dtlpy/repositories/assignments.py +599 -599
  161. dtlpy/repositories/bots.py +213 -213
  162. dtlpy/repositories/codebases.py +559 -559
  163. dtlpy/repositories/collections.py +332 -348
  164. dtlpy/repositories/commands.py +158 -158
  165. dtlpy/repositories/compositions.py +61 -61
  166. dtlpy/repositories/computes.py +434 -406
  167. dtlpy/repositories/datasets.py +1291 -1291
  168. dtlpy/repositories/downloader.py +895 -895
  169. dtlpy/repositories/dpks.py +433 -433
  170. dtlpy/repositories/drivers.py +266 -266
  171. dtlpy/repositories/executions.py +817 -817
  172. dtlpy/repositories/feature_sets.py +226 -226
  173. dtlpy/repositories/features.py +238 -238
  174. dtlpy/repositories/integrations.py +484 -484
  175. dtlpy/repositories/items.py +909 -915
  176. dtlpy/repositories/messages.py +94 -94
  177. dtlpy/repositories/models.py +877 -867
  178. dtlpy/repositories/nodes.py +80 -80
  179. dtlpy/repositories/ontologies.py +511 -511
  180. dtlpy/repositories/organizations.py +525 -525
  181. dtlpy/repositories/packages.py +1941 -1941
  182. dtlpy/repositories/pipeline_executions.py +448 -448
  183. dtlpy/repositories/pipelines.py +642 -642
  184. dtlpy/repositories/projects.py +539 -539
  185. dtlpy/repositories/recipes.py +399 -399
  186. dtlpy/repositories/resource_executions.py +137 -137
  187. dtlpy/repositories/schema.py +120 -120
  188. dtlpy/repositories/service_drivers.py +213 -213
  189. dtlpy/repositories/services.py +1704 -1704
  190. dtlpy/repositories/settings.py +339 -339
  191. dtlpy/repositories/tasks.py +1124 -1124
  192. dtlpy/repositories/times_series.py +278 -278
  193. dtlpy/repositories/triggers.py +536 -536
  194. dtlpy/repositories/upload_element.py +257 -257
  195. dtlpy/repositories/uploader.py +651 -651
  196. dtlpy/repositories/webhooks.py +249 -249
  197. dtlpy/services/__init__.py +22 -22
  198. dtlpy/services/aihttp_retry.py +131 -131
  199. dtlpy/services/api_client.py +1782 -1782
  200. dtlpy/services/api_reference.py +40 -40
  201. dtlpy/services/async_utils.py +133 -133
  202. dtlpy/services/calls_counter.py +44 -44
  203. dtlpy/services/check_sdk.py +68 -68
  204. dtlpy/services/cookie.py +115 -115
  205. dtlpy/services/create_logger.py +156 -156
  206. dtlpy/services/events.py +84 -84
  207. dtlpy/services/logins.py +235 -235
  208. dtlpy/services/reporter.py +256 -256
  209. dtlpy/services/service_defaults.py +91 -91
  210. dtlpy/utilities/__init__.py +20 -20
  211. dtlpy/utilities/annotations/__init__.py +16 -16
  212. dtlpy/utilities/annotations/annotation_converters.py +269 -269
  213. dtlpy/utilities/base_package_runner.py +264 -264
  214. dtlpy/utilities/converter.py +1650 -1650
  215. dtlpy/utilities/dataset_generators/__init__.py +1 -1
  216. dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
  217. dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
  218. dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
  219. dtlpy/utilities/local_development/__init__.py +1 -1
  220. dtlpy/utilities/local_development/local_session.py +179 -179
  221. dtlpy/utilities/reports/__init__.py +2 -2
  222. dtlpy/utilities/reports/figures.py +343 -343
  223. dtlpy/utilities/reports/report.py +71 -71
  224. dtlpy/utilities/videos/__init__.py +17 -17
  225. dtlpy/utilities/videos/video_player.py +598 -598
  226. dtlpy/utilities/videos/videos.py +470 -470
  227. {dtlpy-1.113.10.data → dtlpy-1.114.13.data}/scripts/dlp +1 -1
  228. dtlpy-1.114.13.data/scripts/dlp.bat +2 -0
  229. {dtlpy-1.113.10.data → dtlpy-1.114.13.data}/scripts/dlp.py +128 -128
  230. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/LICENSE +200 -200
  231. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/METADATA +172 -172
  232. dtlpy-1.114.13.dist-info/RECORD +240 -0
  233. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/WHEEL +1 -1
  234. tests/features/environment.py +551 -550
  235. dtlpy-1.113.10.data/scripts/dlp.bat +0 -2
  236. dtlpy-1.113.10.dist-info/RECORD +0 -244
  237. tests/assets/__init__.py +0 -0
  238. tests/assets/models_flow/__init__.py +0 -0
  239. tests/assets/models_flow/failedmain.py +0 -52
  240. tests/assets/models_flow/main.py +0 -62
  241. tests/assets/models_flow/main_model.py +0 -54
  242. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/entry_points.txt +0 -0
  243. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/top_level.txt +0 -0
@@ -1,895 +1,895 @@
1
- from requests.adapters import HTTPAdapter
2
- from urllib3.util import Retry
3
- from PIL import Image
4
- import numpy as np
5
- import traceback
6
- import warnings
7
- import requests
8
- import logging
9
- import shutil
10
- import json
11
- import tqdm
12
- import sys
13
- import os
14
- import io
15
-
16
- from .. import entities, repositories, miscellaneous, PlatformException, exceptions
17
- from ..services import Reporter
18
-
19
- logger = logging.getLogger(name='dtlpy')
20
-
21
- NUM_TRIES = 3 # try to download 3 time before fail on item
22
-
23
-
24
- class Downloader:
25
- def __init__(self, items_repository):
26
- self.items_repository = items_repository
27
-
28
- def download(self,
29
- # filter options
30
- filters: entities.Filters = None,
31
- items=None,
32
- # download options
33
- local_path=None,
34
- file_types=None,
35
- save_locally=True,
36
- to_array=False,
37
- overwrite=False,
38
- annotation_filters: entities.Filters = None,
39
- annotation_options: entities.ViewAnnotationOptions = None,
40
- to_items_folder=True,
41
- thickness=1,
42
- with_text=False,
43
- without_relative_path=None,
44
- avoid_unnecessary_annotation_download=False,
45
- include_annotations_in_output=True,
46
- export_png_files=False,
47
- filter_output_annotations=False,
48
- alpha=1,
49
- export_version=entities.ExportVersion.V1,
50
- dataset_lock=False,
51
- lock_timeout_sec=None,
52
- export_summary=False
53
- ):
54
- """
55
- Download dataset by filters.
56
- Filtering the dataset for items and save them local
57
- Optional - also download annotation, mask, instance and image mask of the item
58
-
59
- :param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
60
- :param items: download Item entity or item_id (or a list of item)
61
- :param local_path: local folder or filename to save to.
62
- :param file_types: a list of file type to download. e.g ['video/webm', 'video/mp4', 'image/jpeg', 'image/png']
63
- :param save_locally: bool. save to disk or return a buffer
64
- :param to_array: returns Ndarray when True and local_path = False
65
- :param overwrite: optional - default = False
66
- :param annotation_options: download annotations options. options: list(dl.ViewAnnotationOptions)
67
- :param annotation_filters: Filters entity to filter annotations for download
68
- :param to_items_folder: Create 'items' folder and download items to it
69
- :param with_text: optional - add text to annotations, default = False
70
- :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
71
- :param without_relative_path: bool - download items without the relative path from platform
72
- :param avoid_unnecessary_annotation_download: DEPRECATED only items and annotations in filters are downloaded
73
- :param include_annotations_in_output: default - False , if export should contain annotations
74
- :param export_png_files: default - True, if semantic annotations should be exported as png files
75
- :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
76
- :param alpha: opacity value [0 1], default 1
77
- :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
78
- :param bool dataset_lock: optional - default = False
79
- :param bool export_summary: optional - default = False
80
- :param int lock_timeout_sec: optional
81
- :return: Output (list)
82
- """
83
-
84
- ###################
85
- # Default options #
86
- ###################
87
- # annotation options
88
- if annotation_options is None:
89
- annotation_options = list()
90
- elif not isinstance(annotation_options, list):
91
- annotation_options = [annotation_options]
92
- for ann_option in annotation_options:
93
- if not isinstance(ann_option, entities.ViewAnnotationOptions):
94
- if ann_option not in list(entities.ViewAnnotationOptions):
95
- raise PlatformException(
96
- error='400',
97
- message='Unknown annotation download option: {}, please choose from: {}'.format(
98
- ann_option, list(entities.ViewAnnotationOptions)))
99
- # normalize items argument: treat empty list as “no items specified”
100
- if isinstance(items, list) and len(items) == 0:
101
- items = None
102
- #####################
103
- # items to download #
104
- #####################
105
- if items is not None:
106
- # convert input to a list
107
- if not isinstance(items, list):
108
- items = [items]
109
- # get items by id
110
- if isinstance(items[0], str):
111
- items = [self.items_repository.get(item_id=item_id) for item_id in items]
112
- elif isinstance(items[0], entities.Item):
113
- pass
114
- else:
115
- raise PlatformException(
116
- error="400",
117
- message='Unknown items type to download. Expecting str or Item entities. Got "{}" instead'.format(
118
- type(items[0])
119
- )
120
- )
121
- # create filters to download annotations
122
- filters = entities.Filters(field='id',
123
- values=[item.id for item in items],
124
- operator=entities.FiltersOperations.IN)
125
- filters._user_query = 'false'
126
-
127
- # convert to list of list (like pages and page)
128
- items_to_download = [items]
129
- num_items = len(items)
130
- else:
131
- # filters
132
- if filters is None:
133
- filters = entities.Filters()
134
- filters._user_query = 'false'
135
- # file types
136
- if file_types is not None:
137
- filters.add(field='metadata.system.mimetype', values=file_types, operator=entities.FiltersOperations.IN)
138
- if annotation_filters is not None:
139
- for annotation_filter_and in annotation_filters.and_filter_list:
140
- filters.add_join(field=annotation_filter_and.field,
141
- values=annotation_filter_and.values,
142
- operator=annotation_filter_and.operator,
143
- method=entities.FiltersMethod.AND)
144
- for annotation_filter_or in annotation_filters.or_filter_list:
145
- filters.add_join(field=annotation_filter_or.field,
146
- values=annotation_filter_or.values,
147
- operator=annotation_filter_or.operator,
148
- method=entities.FiltersMethod.OR)
149
- else:
150
- annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
151
- filters._user_query = 'false'
152
-
153
- items_to_download = self.items_repository.list(filters=filters)
154
- num_items = items_to_download.items_count
155
-
156
- if num_items == 0:
157
- logger.warning('No items found! Nothing was downloaded')
158
- return list()
159
-
160
- ##############
161
- # local path #
162
- ##############
163
- is_folder = False
164
- if local_path is None:
165
- # create default local path
166
- local_path = self.__default_local_path()
167
-
168
- if os.path.isdir(local_path):
169
- logger.info('Local folder already exists:{}. merge/overwrite according to "overwrite option"'.format(
170
- local_path))
171
- is_folder = True
172
- else:
173
- # check if filename
174
- _, ext = os.path.splitext(local_path)
175
- if num_items > 1:
176
- is_folder = True
177
- else:
178
- item_to_download = items_to_download[0][0]
179
- file_name = item_to_download.name
180
- _, ext_download = os.path.splitext(file_name)
181
- if ext_download != ext:
182
- is_folder = True
183
- if is_folder and save_locally:
184
- path_to_create = local_path
185
- if local_path.endswith('*'):
186
- path_to_create = os.path.dirname(local_path)
187
- logger.info("Creating new directory for download: {}".format(path_to_create))
188
- os.makedirs(path_to_create, exist_ok=True)
189
-
190
- ####################
191
- # annotations json #
192
- ####################
193
- # download annotations' json files in a new thread
194
- # items will start downloading and if json not exists yet - will download for each file
195
- if num_items > 1 and annotation_options:
196
- # a new folder named 'json' will be created under the "local_path"
197
- logger.info("Downloading annotations formats: {}".format(annotation_options))
198
- self.download_annotations(**{
199
- "dataset": self.items_repository.dataset,
200
- "filters": filters,
201
- "annotation_filters": annotation_filters,
202
- "local_path": local_path,
203
- 'overwrite': overwrite,
204
- 'include_annotations_in_output': include_annotations_in_output,
205
- 'export_png_files': export_png_files,
206
- 'filter_output_annotations': filter_output_annotations,
207
- 'export_version': export_version,
208
- 'dataset_lock': dataset_lock,
209
- 'lock_timeout_sec': lock_timeout_sec,
210
- 'export_summary': export_summary
211
- })
212
- ###############
213
- # downloading #
214
- ###############
215
- # create result lists
216
- client_api = self.items_repository._client_api
217
-
218
- reporter = Reporter(num_workers=num_items,
219
- resource=Reporter.ITEMS_DOWNLOAD,
220
- print_error_logs=client_api.verbose.print_error_logs,
221
- client_api=client_api)
222
- jobs = [None for _ in range(num_items)]
223
- # pool
224
- pool = client_api.thread_pools(pool_name='item.download')
225
- # download
226
- pbar = tqdm.tqdm(total=num_items, disable=client_api.verbose.disable_progress_bar_download_dataset, file=sys.stdout,
227
- desc='Download Items')
228
- try:
229
- i_item = 0
230
- for page in items_to_download:
231
- for item in page:
232
- if item.type == "dir":
233
- continue
234
- if save_locally:
235
- # get local file path
236
- item_local_path, item_local_filepath = self.__get_local_filepath(
237
- local_path=local_path,
238
- without_relative_path=without_relative_path,
239
- item=item,
240
- to_items_folder=to_items_folder,
241
- is_folder=is_folder)
242
-
243
- if os.path.isfile(item_local_filepath) and not overwrite:
244
- logger.debug("File Exists: {}".format(item_local_filepath))
245
- reporter.set_index(ref=item.id, status='exist', output=item_local_filepath, success=True)
246
- pbar.update()
247
- if annotation_options and item.annotated:
248
- # download annotations only
249
- jobs[i_item] = pool.submit(
250
- self._download_img_annotations,
251
- **{
252
- "item": item,
253
- "img_filepath": item_local_filepath,
254
- "overwrite": overwrite,
255
- "annotation_options": annotation_options,
256
- "annotation_filters": annotation_filters,
257
- "local_path": item_local_path,
258
- "thickness": thickness,
259
- "alpha": alpha,
260
- "with_text": with_text,
261
- "export_version": export_version,
262
- },
263
- )
264
- i_item += 1
265
- continue
266
- else:
267
- item_local_path = None
268
- item_local_filepath = None
269
-
270
- # download single item
271
- jobs[i_item] = pool.submit(
272
- self.__thread_download_wrapper,
273
- **{
274
- "i_item": i_item,
275
- "item": item,
276
- "item_local_path": item_local_path,
277
- "item_local_filepath": item_local_filepath,
278
- "save_locally": save_locally,
279
- "to_array": to_array,
280
- "annotation_options": annotation_options,
281
- "annotation_filters": annotation_filters,
282
- "reporter": reporter,
283
- "pbar": pbar,
284
- "overwrite": overwrite,
285
- "thickness": thickness,
286
- "alpha": alpha,
287
- "with_text": with_text,
288
- "export_version": export_version
289
- },
290
- )
291
- i_item += 1
292
- except Exception:
293
- logger.exception('Error downloading:')
294
- finally:
295
- _ = [j.result() for j in jobs if j is not None]
296
- pbar.close()
297
- # reporting
298
- n_download = reporter.status_count(status='download')
299
- n_exist = reporter.status_count(status='exist')
300
- n_error = reporter.status_count(status='error')
301
- logger.info("Number of files downloaded:{}".format(n_download))
302
- logger.info("Number of files exists: {}".format(n_exist))
303
- logger.info("Total number of files: {}".format(n_download + n_exist))
304
-
305
- # log error
306
- if n_error > 0:
307
- log_filepath = reporter.generate_log_files()
308
- if log_filepath is not None:
309
- logger.warning("Errors in {} files. See {} for full log".format(n_error, log_filepath))
310
- if int(n_download) <= 1 and int(n_exist) <= 1:
311
- try:
312
- return next(reporter.output)
313
- except StopIteration:
314
- return None
315
- return reporter.output
316
-
317
- def __thread_download_wrapper(self, i_item,
318
- # item params
319
- item, item_local_path, item_local_filepath,
320
- save_locally, to_array, overwrite,
321
- # annotations params
322
- annotation_options, annotation_filters, with_text, thickness,
323
- # threading params
324
- reporter, pbar, alpha, export_version):
325
-
326
- download = None
327
- err = None
328
- trace = None
329
- for i_try in range(NUM_TRIES):
330
- try:
331
- logger.debug("Download item: {path}. Try {i}/{n}. Starting..".format(path=item.filename,
332
- i=i_try + 1,
333
- n=NUM_TRIES))
334
- download = self.__thread_download(item=item,
335
- save_locally=save_locally,
336
- to_array=to_array,
337
- local_path=item_local_path,
338
- local_filepath=item_local_filepath,
339
- annotation_options=annotation_options,
340
- annotation_filters=annotation_filters,
341
- overwrite=overwrite,
342
- thickness=thickness,
343
- alpha=alpha,
344
- with_text=with_text,
345
- export_version=export_version)
346
- logger.debug("Download item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=item.filename,
347
- i=i_try + 1,
348
- n=NUM_TRIES,
349
- id=item.id))
350
- if download is not None:
351
- break
352
- except Exception as e:
353
- logger.debug("Download item: {path}. Try {i}/{n}. Fail.".format(path=item.filename,
354
- i=i_try + 1,
355
- n=NUM_TRIES))
356
- err = e
357
- trace = traceback.format_exc()
358
- pbar.update()
359
- if download is None:
360
- if err is None:
361
- err = self.items_repository._client_api.platform_exception
362
- reporter.set_index(status="error", ref=item.id, success=False,
363
- error="{}\n{}".format(err, trace))
364
- else:
365
- reporter.set_index(ref=item.id, status="download", output=download, success=True)
366
-
367
- @staticmethod
368
- def download_annotations(dataset: entities.Dataset,
369
- local_path: str,
370
- filters: entities.Filters = None,
371
- annotation_filters: entities.Filters = None,
372
- overwrite=False,
373
- include_annotations_in_output=True,
374
- export_png_files=False,
375
- filter_output_annotations=False,
376
- export_version=entities.ExportVersion.V1,
377
- dataset_lock=False,
378
- lock_timeout_sec=None,
379
- export_summary=False
380
- ):
381
- """
382
- Download annotations json for entire dataset
383
-
384
- :param dataset: Dataset entity
385
- :param local_path:
386
- :param dtlpy.entities.filters.Filters filters: dl.Filters entity to filters items
387
- :param annotation_filters: dl.Filters entity to filters items' annotations
388
- :param overwrite: optional - overwrite annotations if exist, default = false
389
- :param include_annotations_in_output: default - True , if export should contain annotations
390
- :param export_png_files: default - if True, semantic annotations should be exported as png files
391
- :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
392
- :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
393
- :param bool dataset_lock: optional - default = False
394
- :param bool export_summary: optional - default = False
395
- :param int lock_timeout_sec: optional
396
- :return:
397
- """
398
- local_path = os.path.join(local_path, "json")
399
- zip_filepath = None
400
- # only if json folder does not exist or exist and overwrite
401
- if not os.path.isdir(os.path.join(local_path, 'json')) or overwrite:
402
- # create local path to download and save to
403
- if not os.path.isdir(local_path):
404
- os.makedirs(local_path)
405
-
406
- try:
407
- payload = dict()
408
- if filters is not None:
409
- payload['itemsQuery'] = filters.prepare()
410
- payload['annotations'] = {
411
- "include": include_annotations_in_output,
412
- "convertSemantic": export_png_files
413
- }
414
- payload['exportVersion'] = export_version
415
- if annotation_filters is not None:
416
- payload['annotationsQuery'] = annotation_filters.prepare()
417
- payload['annotations']['filter'] = filter_output_annotations
418
- if dataset_lock:
419
- payload['datasetLock'] = dataset_lock
420
-
421
- if export_summary:
422
- payload['summary'] = export_summary
423
-
424
- if lock_timeout_sec:
425
- payload['lockTimeoutSec'] = lock_timeout_sec
426
-
427
- success, response = dataset._client_api.gen_request(req_type='post',
428
- path='/datasets/{}/export'.format(dataset.id),
429
- json_req=payload,
430
- headers={'user_query': filters._user_query})
431
- if not success:
432
- raise exceptions.PlatformException(response)
433
- command = entities.Command.from_json(_json=response.json(),
434
- client_api=dataset._client_api)
435
- command = command.wait(timeout=0)
436
- if 'outputItemId' not in command.spec:
437
- raise exceptions.PlatformException(
438
- error='400',
439
- message="outputItemId key is missing in command response: {}".format(response))
440
- item_id = command.spec['outputItemId']
441
- annotation_zip_item = repositories.Items(client_api=dataset._client_api).get(item_id=item_id)
442
- zip_filepath = annotation_zip_item.download(local_path=local_path, export_version=export_version)
443
- # unzipping annotations to directory
444
- if isinstance(zip_filepath, list) or not os.path.isfile(zip_filepath):
445
- raise exceptions.PlatformException(
446
- error='404',
447
- message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
448
- annotation_zip_item.id))
449
- try:
450
- miscellaneous.Zipping.unzip_directory(zip_filename=zip_filepath,
451
- to_directory=local_path)
452
- except Exception as e:
453
- logger.warning("Failed to extract zip file error: {}".format(e))
454
-
455
- finally:
456
- # cleanup
457
- if isinstance(zip_filepath, str) and os.path.isfile(zip_filepath):
458
- os.remove(zip_filepath)
459
-
460
- @staticmethod
461
- def _download_img_annotations(item: entities.Item,
462
- img_filepath,
463
- local_path,
464
- overwrite,
465
- annotation_options,
466
- annotation_filters,
467
- thickness=1,
468
- with_text=False,
469
- alpha=1,
470
- export_version=entities.ExportVersion.V1
471
- ):
472
-
473
- # check if local_path is a file name
474
- _, ext = os.path.splitext(local_path)
475
- if ext:
476
- # take the dir of the file for the annotations save
477
- local_path = os.path.dirname(local_path)
478
-
479
- # fix local path
480
- if local_path.endswith("/items") or local_path.endswith("\\items"):
481
- local_path = os.path.dirname(local_path)
482
-
483
- annotation_rel_path = item.filename[1:]
484
- if img_filepath is not None:
485
- dir_name = os.path.dirname(annotation_rel_path)
486
- base_name = os.path.basename(img_filepath)
487
- annotation_rel_path = os.path.join(dir_name, base_name)
488
-
489
- # find annotations json
490
- annotations_json_filepath = os.path.join(local_path, "json", annotation_rel_path)
491
- if export_version == entities.ExportVersion.V1:
492
- name, _ = os.path.splitext(annotations_json_filepath)
493
- else:
494
- name = annotations_json_filepath
495
- annotations_json_filepath = name + ".json"
496
-
497
- if os.path.isfile(annotations_json_filepath) and annotation_filters is None:
498
- # if exists take from json file
499
- with open(annotations_json_filepath, "r", encoding="utf8") as f:
500
- data = json.load(f)
501
- if "annotations" in data:
502
- data = data["annotations"]
503
- annotations = entities.AnnotationCollection.from_json(_json=data, item=item)
504
- # no need to use the filters here because the annotations were already downloaded with annotation_filters
505
- else:
506
- # if json file doesnt exist get the annotations from platform
507
- annotations = item.annotations.list(filters=annotation_filters)
508
-
509
- # get image shape
510
- is_url_item = item.metadata. \
511
- get('system', dict()). \
512
- get('shebang', dict()). \
513
- get('linkInfo', dict()). \
514
- get('type', None) == 'url'
515
-
516
- if item is not None:
517
- orientation = item.system.get('exif', {}).get('Orientation', 0)
518
- else:
519
- orientation = 0
520
- if item.width is not None and item.height is not None:
521
- if orientation in [5, 6, 7, 8]:
522
- img_shape = (item.width, item.height)
523
- else:
524
- img_shape = (item.height, item.width)
525
- elif ('image' in item.mimetype and img_filepath is not None) or \
526
- (is_url_item and img_filepath is not None):
527
- img_shape = Image.open(img_filepath).size[::-1]
528
- else:
529
- img_shape = (0, 0)
530
-
531
- # download all annotation options
532
- for option in annotation_options:
533
- # get path and create dirs
534
- annotation_filepath = os.path.join(local_path, option, annotation_rel_path)
535
- if not os.path.isdir(os.path.dirname(annotation_filepath)):
536
- os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
537
-
538
- if export_version == entities.ExportVersion.V1:
539
- temp_path, ext = os.path.splitext(annotation_filepath)
540
- else:
541
- temp_path = annotation_filepath
542
-
543
- if option == entities.ViewAnnotationOptions.JSON:
544
- if not os.path.isfile(annotations_json_filepath):
545
- annotations.download(
546
- filepath=annotations_json_filepath,
547
- annotation_format=option,
548
- height=img_shape[0],
549
- width=img_shape[1],
550
- )
551
- elif option in [entities.ViewAnnotationOptions.MASK,
552
- entities.ViewAnnotationOptions.INSTANCE,
553
- entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE,
554
- entities.ViewAnnotationOptions.OBJECT_ID,
555
- entities.ViewAnnotationOptions.VTT]:
556
- if option == entities.ViewAnnotationOptions.VTT:
557
- annotation_filepath = temp_path + ".vtt"
558
- else:
559
- if 'video' in item.mimetype:
560
- annotation_filepath = temp_path + ".mp4"
561
- else:
562
- annotation_filepath = temp_path + ".png"
563
- if not os.path.isfile(annotation_filepath) or overwrite:
564
- # if not exists OR (exists AND overwrite)
565
- if not os.path.exists(os.path.dirname(annotation_filepath)):
566
- # create folder if not exists
567
- os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
568
- if option == entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE and img_filepath is None:
569
- raise PlatformException(
570
- error="1002",
571
- message="Missing image for annotation option dl.ViewAnnotationOptions.ANNOTATION_ON_IMAGE")
572
- annotations.download(
573
- filepath=annotation_filepath,
574
- img_filepath=img_filepath,
575
- annotation_format=option,
576
- height=img_shape[0],
577
- width=img_shape[1],
578
- thickness=thickness,
579
- alpha=alpha,
580
- with_text=with_text,
581
- orientation=orientation
582
- )
583
- else:
584
- raise PlatformException(error="400", message="Unknown annotation option: {}".format(option))
585
-
586
- @staticmethod
587
- def __get_local_filepath(local_path, item, to_items_folder, without_relative_path=None, is_folder=False):
588
- # create paths
589
- _, ext = os.path.splitext(local_path)
590
- if ext and not is_folder:
591
- # local_path is a filename
592
- local_filepath = local_path
593
- local_path = os.path.dirname(local_filepath)
594
- else:
595
- # if directory - get item's filename
596
- if to_items_folder:
597
- local_path = os.path.join(local_path, "items")
598
- elif is_folder:
599
- local_path = os.path.join(local_path, "")
600
- if without_relative_path is not None:
601
- local_filepath = os.path.join(local_path, item.name)
602
- else:
603
- local_filepath = os.path.join(local_path, item.filename[1:])
604
- return local_path, local_filepath
605
-
606
- @staticmethod
607
- def __get_link_source(item):
608
- assert isinstance(item, entities.Item)
609
- if not item.is_fetched:
610
- return item, '', False
611
-
612
- if not item.filename.endswith('.json') or \
613
- item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') != 'link':
614
- return item, '', False
615
-
616
- # recursively get next id link item
617
- while item.filename.endswith('.json') and \
618
- item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
619
- item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'id':
620
- item = item.dataset.items.get(item_id=item.metadata['system']['shebang']['linkInfo']['ref'])
621
-
622
- # check if link
623
- if item.filename.endswith('.json') and \
624
- item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
625
- item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'url':
626
- url = item.metadata['system']['shebang']['linkInfo']['ref']
627
- return item, url, True
628
- else:
629
- return item, '', False
630
-
631
- def __file_validation(self, item, downloaded_file):
632
- res = False
633
- resume = True
634
- if isinstance(downloaded_file, io.BytesIO):
635
- file_size = downloaded_file.getbuffer().nbytes
636
- else:
637
- file_size = os.stat(downloaded_file).st_size
638
- expected_size = item.metadata['system']['size']
639
- size_diff = file_size - expected_size
640
- if size_diff == 0:
641
- res = True
642
- if size_diff > 0:
643
- resume = False
644
- return res, file_size, resume
645
-
646
- def __thread_download(self,
647
- item,
648
- save_locally,
649
- local_path,
650
- to_array,
651
- local_filepath,
652
- overwrite,
653
- annotation_options,
654
- annotation_filters,
655
- chunk_size=8192,
656
- thickness=1,
657
- with_text=False,
658
- alpha=1,
659
- export_version=entities.ExportVersion.V1
660
- ):
661
- """
662
- Get a single item's binary data
663
- Calling this method will returns the item body itself , an image for example with the proper mimetype.
664
-
665
- :param item: Item entity to download
666
- :param save_locally: bool. save to file or return buffer
667
- :param local_path: item local folder to save to.
668
- :param to_array: returns Ndarray when True and local_path = False
669
- :param local_filepath: item local filepath
670
- :param overwrite: overwrite the file is existing
671
- :param annotation_options: download annotations options: list(dl.ViewAnnotationOptions)
672
- :param annotation_filters: Filters entity to filter item's annotation
673
- :param chunk_size: size of chunks to download - optional. default = 8192
674
- :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
675
- :param with_text: optional - add text to annotations, default = False
676
- :param alpha: opacity value [0 1], default 1
677
- :param ExportVersion export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
678
- :return:
679
- """
680
- # check if need to download image binary from platform
681
- need_to_download = True
682
- if save_locally and os.path.isfile(local_filepath):
683
- need_to_download = overwrite
684
-
685
- item, url, is_url = self.__get_link_source(item=item)
686
-
687
- # save as byte stream
688
- data = io.BytesIO()
689
- if need_to_download:
690
- chunk_resume = {0: 0}
691
- start_point = 0
692
- download_done = False
693
- while chunk_resume.get(start_point, '') != 3 and not download_done:
694
- if not is_url:
695
- headers = {'x-dl-sanitize': '0', 'Range': 'bytes={}-'.format(start_point)}
696
- result, response = self.items_repository._client_api.gen_request(req_type="get",
697
- headers=headers,
698
- path="/items/{}/stream".format(
699
- item.id),
700
- stream=True,
701
- dataset_id=item.dataset_id)
702
- if not result:
703
- if os.path.isfile(local_filepath + '.download'):
704
- os.remove(local_filepath + '.download')
705
- raise PlatformException(response)
706
- else:
707
- _, ext = os.path.splitext(item.metadata['system']['shebang']['linkInfo']['ref'].split('?')[0])
708
- if local_filepath:
709
- local_filepath += ext
710
- response = self.get_url_stream(url=url)
711
-
712
- if save_locally:
713
- # save to file
714
- if not os.path.exists(os.path.dirname(local_filepath)):
715
- # create folder if not exists
716
- os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
717
-
718
- # decide if create progress bar for item
719
- total_length = response.headers.get("content-length")
720
- one_file_pbar = None
721
- try:
722
- one_file_progress_bar = total_length is not None and int(
723
- total_length) > 10e6 # size larger than 10 MB
724
- if one_file_progress_bar:
725
- one_file_pbar = tqdm.tqdm(total=int(total_length),
726
- unit='B',
727
- unit_scale=True,
728
- unit_divisor=1024,
729
- position=1,
730
- file=sys.stdout,
731
- disable=self.items_repository._client_api.verbose.disable_progress_bar_download_item,
732
- desc='Download Item')
733
- except Exception as err:
734
- one_file_progress_bar = False
735
- logger.debug('Cant decide downloaded file length, bar will not be presented: {}'.format(err))
736
-
737
- # start download
738
- if self.items_repository._client_api.sdk_cache.use_cache and \
739
- self.items_repository._client_api.cache is not None:
740
- response_output = os.path.normpath(response.content)
741
- if isinstance(response_output, bytes):
742
- response_output = response_output.decode('utf-8')[1:-1]
743
-
744
- if os.path.isfile(os.path.normpath(response_output)):
745
- if response_output != local_filepath:
746
- source_path = os.path.normpath(response_output)
747
- shutil.copyfile(source_path, local_filepath)
748
- else:
749
- try:
750
- temp_file_path = local_filepath + '.download'
751
- with open(temp_file_path, "ab") as f:
752
- try:
753
- for chunk in response.iter_content(chunk_size=chunk_size):
754
- if chunk: # filter out keep-alive new chunks
755
- f.write(chunk)
756
- if one_file_progress_bar:
757
- one_file_pbar.update(len(chunk))
758
- except Exception as err:
759
- pass
760
-
761
- file_validation = True
762
- if not is_url:
763
- file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
764
- download_progress=temp_file_path,
765
- chunk_resume=chunk_resume)
766
- if file_validation:
767
- shutil.move(temp_file_path, local_filepath)
768
- download_done = True
769
- except Exception as err:
770
- if os.path.isfile(temp_file_path):
771
- os.remove(temp_file_path)
772
- raise err
773
- if one_file_progress_bar:
774
- one_file_pbar.close()
775
- # save to output variable
776
- data = local_filepath
777
- # if image - can download annotation mask
778
- if item.annotated and annotation_options:
779
- self._download_img_annotations(item=item,
780
- img_filepath=local_filepath,
781
- annotation_options=annotation_options,
782
- annotation_filters=annotation_filters,
783
- local_path=local_path,
784
- overwrite=overwrite,
785
- thickness=thickness,
786
- alpha=alpha,
787
- with_text=with_text,
788
- export_version=export_version
789
- )
790
- else:
791
- if self.items_repository._client_api.sdk_cache.use_cache and \
792
- self.items_repository._client_api.cache is not None:
793
- response_output = os.path.normpath(response.content)
794
- if isinstance(response_output, bytes):
795
- response_output = response_output.decode('utf-8')[1:-1]
796
-
797
- if os.path.isfile(response_output):
798
- source_file = response_output
799
- with open(source_file, 'wb') as f:
800
- data = f.read()
801
- else:
802
- try:
803
- for chunk in response.iter_content(chunk_size=chunk_size):
804
- if chunk: # filter out keep-alive new chunks
805
- data.write(chunk)
806
-
807
- file_validation = True
808
- if not is_url:
809
- file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
810
- download_progress=data,
811
- chunk_resume=chunk_resume)
812
- if file_validation:
813
- download_done = True
814
- else:
815
- continue
816
- except Exception as err:
817
- raise err
818
- # go back to the beginning of the stream
819
- data.seek(0)
820
- data.name = item.name
821
- if not save_locally and to_array:
822
- if 'image' not in item.mimetype and not is_url:
823
- raise PlatformException(
824
- error="400",
825
- message='Download element type numpy.ndarray support for image only. '
826
- 'Item Id: {} is {} type'.format(item.id, item.mimetype))
827
-
828
- data = np.array(Image.open(data))
829
- else:
830
- data = local_filepath
831
- return data
832
-
833
- def __get_next_chunk(self, item, download_progress, chunk_resume):
834
- size_validation, file_size, resume = self.__file_validation(item=item,
835
- downloaded_file=download_progress)
836
- start_point = file_size
837
- if not size_validation:
838
- if chunk_resume.get(start_point, None) is None:
839
- chunk_resume = {start_point: 1}
840
- else:
841
- chunk_resume[start_point] += 1
842
- if chunk_resume[start_point] == 3 or not resume:
843
- raise PlatformException(
844
- error=500,
845
- message='The downloaded file is corrupted. Please try again. If the issue repeats please contact support.')
846
- return size_validation, start_point, chunk_resume
847
-
848
- def __default_local_path(self):
849
-
850
- # create default local path
851
- if self.items_repository._dataset is None:
852
- local_path = os.path.join(
853
- self.items_repository._client_api.sdk_cache.cache_path_bin,
854
- "items",
855
- )
856
- else:
857
- if self.items_repository.dataset._project is None:
858
- # by dataset name
859
- local_path = os.path.join(
860
- self.items_repository._client_api.sdk_cache.cache_path_bin,
861
- "datasets",
862
- "{}_{}".format(self.items_repository.dataset.name, self.items_repository.dataset.id),
863
- )
864
- else:
865
- # by dataset and project name
866
- local_path = os.path.join(
867
- self.items_repository._client_api.sdk_cache.cache_path_bin,
868
- "projects",
869
- self.items_repository.dataset.project.name,
870
- "datasets",
871
- self.items_repository.dataset.name,
872
- )
873
- logger.info("Downloading to: {}".format(local_path))
874
- return local_path
875
-
876
- @staticmethod
877
- def get_url_stream(url):
878
- """
879
- :param url:
880
- """
881
- # This will download the binaries from the URL user provided
882
- prepared_request = requests.Request(method='GET', url=url).prepare()
883
- with requests.Session() as s:
884
- retry = Retry(
885
- total=3,
886
- read=3,
887
- connect=3,
888
- backoff_factor=1,
889
- )
890
- adapter = HTTPAdapter(max_retries=retry)
891
- s.mount('http://', adapter)
892
- s.mount('https://', adapter)
893
- response = s.send(request=prepared_request, stream=True)
894
-
895
- return response
1
+ from requests.adapters import HTTPAdapter
2
+ from urllib3.util import Retry
3
+ from PIL import Image
4
+ import numpy as np
5
+ import traceback
6
+ import warnings
7
+ import requests
8
+ import logging
9
+ import shutil
10
+ import json
11
+ import tqdm
12
+ import sys
13
+ import os
14
+ import io
15
+
16
+ from .. import entities, repositories, miscellaneous, PlatformException, exceptions
17
+ from ..services import Reporter
18
+
19
+ logger = logging.getLogger(name='dtlpy')
20
+
21
+ NUM_TRIES = 3 # try to download 3 time before fail on item
22
+
23
+
24
+ class Downloader:
25
+ def __init__(self, items_repository):
26
+ self.items_repository = items_repository
27
+
28
+ def download(self,
29
+ # filter options
30
+ filters: entities.Filters = None,
31
+ items=None,
32
+ # download options
33
+ local_path=None,
34
+ file_types=None,
35
+ save_locally=True,
36
+ to_array=False,
37
+ overwrite=False,
38
+ annotation_filters: entities.Filters = None,
39
+ annotation_options: entities.ViewAnnotationOptions = None,
40
+ to_items_folder=True,
41
+ thickness=1,
42
+ with_text=False,
43
+ without_relative_path=None,
44
+ avoid_unnecessary_annotation_download=False,
45
+ include_annotations_in_output=True,
46
+ export_png_files=False,
47
+ filter_output_annotations=False,
48
+ alpha=1,
49
+ export_version=entities.ExportVersion.V1,
50
+ dataset_lock=False,
51
+ lock_timeout_sec=None,
52
+ export_summary=False
53
+ ):
54
+ """
55
+ Download dataset by filters.
56
+ Filtering the dataset for items and save them local
57
+ Optional - also download annotation, mask, instance and image mask of the item
58
+
59
+ :param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
60
+ :param items: download Item entity or item_id (or a list of item)
61
+ :param local_path: local folder or filename to save to.
62
+ :param file_types: a list of file type to download. e.g ['video/webm', 'video/mp4', 'image/jpeg', 'image/png']
63
+ :param save_locally: bool. save to disk or return a buffer
64
+ :param to_array: returns Ndarray when True and local_path = False
65
+ :param overwrite: optional - default = False
66
+ :param annotation_options: download annotations options. options: list(dl.ViewAnnotationOptions)
67
+ :param annotation_filters: Filters entity to filter annotations for download
68
+ :param to_items_folder: Create 'items' folder and download items to it
69
+ :param with_text: optional - add text to annotations, default = False
70
+ :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
71
+ :param without_relative_path: bool - download items without the relative path from platform
72
+ :param avoid_unnecessary_annotation_download: DEPRECATED only items and annotations in filters are downloaded
73
+ :param include_annotations_in_output: default - False , if export should contain annotations
74
+ :param export_png_files: default - True, if semantic annotations should be exported as png files
75
+ :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
76
+ :param alpha: opacity value [0 1], default 1
77
+ :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
78
+ :param bool dataset_lock: optional - default = False
79
+ :param bool export_summary: optional - default = False
80
+ :param int lock_timeout_sec: optional
81
+ :return: Output (list)
82
+ """
83
+
84
+ ###################
85
+ # Default options #
86
+ ###################
87
+ # annotation options
88
+ if annotation_options is None:
89
+ annotation_options = list()
90
+ elif not isinstance(annotation_options, list):
91
+ annotation_options = [annotation_options]
92
+ for ann_option in annotation_options:
93
+ if not isinstance(ann_option, entities.ViewAnnotationOptions):
94
+ if ann_option not in list(entities.ViewAnnotationOptions):
95
+ raise PlatformException(
96
+ error='400',
97
+ message='Unknown annotation download option: {}, please choose from: {}'.format(
98
+ ann_option, list(entities.ViewAnnotationOptions)))
99
+ # normalize items argument: treat empty list as “no items specified”
100
+ if isinstance(items, list) and len(items) == 0:
101
+ items = None
102
+ #####################
103
+ # items to download #
104
+ #####################
105
+ if items is not None:
106
+ # convert input to a list
107
+ if not isinstance(items, list):
108
+ items = [items]
109
+ # get items by id
110
+ if isinstance(items[0], str):
111
+ items = [self.items_repository.get(item_id=item_id) for item_id in items]
112
+ elif isinstance(items[0], entities.Item):
113
+ pass
114
+ else:
115
+ raise PlatformException(
116
+ error="400",
117
+ message='Unknown items type to download. Expecting str or Item entities. Got "{}" instead'.format(
118
+ type(items[0])
119
+ )
120
+ )
121
+ # create filters to download annotations
122
+ filters = entities.Filters(field='id',
123
+ values=[item.id for item in items],
124
+ operator=entities.FiltersOperations.IN)
125
+ filters._user_query = 'false'
126
+
127
+ # convert to list of list (like pages and page)
128
+ items_to_download = [items]
129
+ num_items = len(items)
130
+ else:
131
+ # filters
132
+ if filters is None:
133
+ filters = entities.Filters()
134
+ filters._user_query = 'false'
135
+ # file types
136
+ if file_types is not None:
137
+ filters.add(field='metadata.system.mimetype', values=file_types, operator=entities.FiltersOperations.IN)
138
+ if annotation_filters is not None:
139
+ for annotation_filter_and in annotation_filters.and_filter_list:
140
+ filters.add_join(field=annotation_filter_and.field,
141
+ values=annotation_filter_and.values,
142
+ operator=annotation_filter_and.operator,
143
+ method=entities.FiltersMethod.AND)
144
+ for annotation_filter_or in annotation_filters.or_filter_list:
145
+ filters.add_join(field=annotation_filter_or.field,
146
+ values=annotation_filter_or.values,
147
+ operator=annotation_filter_or.operator,
148
+ method=entities.FiltersMethod.OR)
149
+ else:
150
+ annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
151
+ filters._user_query = 'false'
152
+
153
+ items_to_download = self.items_repository.list(filters=filters)
154
+ num_items = items_to_download.items_count
155
+
156
+ if num_items == 0:
157
+ logger.warning('No items found! Nothing was downloaded')
158
+ return list()
159
+
160
+ ##############
161
+ # local path #
162
+ ##############
163
+ is_folder = False
164
+ if local_path is None:
165
+ # create default local path
166
+ local_path = self.__default_local_path()
167
+
168
+ if os.path.isdir(local_path):
169
+ logger.info('Local folder already exists:{}. merge/overwrite according to "overwrite option"'.format(
170
+ local_path))
171
+ is_folder = True
172
+ else:
173
+ # check if filename
174
+ _, ext = os.path.splitext(local_path)
175
+ if num_items > 1:
176
+ is_folder = True
177
+ else:
178
+ item_to_download = items_to_download[0][0]
179
+ file_name = item_to_download.name
180
+ _, ext_download = os.path.splitext(file_name)
181
+ if ext_download != ext:
182
+ is_folder = True
183
+ if is_folder and save_locally:
184
+ path_to_create = local_path
185
+ if local_path.endswith('*'):
186
+ path_to_create = os.path.dirname(local_path)
187
+ logger.info("Creating new directory for download: {}".format(path_to_create))
188
+ os.makedirs(path_to_create, exist_ok=True)
189
+
190
+ ####################
191
+ # annotations json #
192
+ ####################
193
+ # download annotations' json files in a new thread
194
+ # items will start downloading and if json not exists yet - will download for each file
195
+ if num_items > 1 and annotation_options:
196
+ # a new folder named 'json' will be created under the "local_path"
197
+ logger.info("Downloading annotations formats: {}".format(annotation_options))
198
+ self.download_annotations(**{
199
+ "dataset": self.items_repository.dataset,
200
+ "filters": filters,
201
+ "annotation_filters": annotation_filters,
202
+ "local_path": local_path,
203
+ 'overwrite': overwrite,
204
+ 'include_annotations_in_output': include_annotations_in_output,
205
+ 'export_png_files': export_png_files,
206
+ 'filter_output_annotations': filter_output_annotations,
207
+ 'export_version': export_version,
208
+ 'dataset_lock': dataset_lock,
209
+ 'lock_timeout_sec': lock_timeout_sec,
210
+ 'export_summary': export_summary
211
+ })
212
+ ###############
213
+ # downloading #
214
+ ###############
215
+ # create result lists
216
+ client_api = self.items_repository._client_api
217
+
218
+ reporter = Reporter(num_workers=num_items,
219
+ resource=Reporter.ITEMS_DOWNLOAD,
220
+ print_error_logs=client_api.verbose.print_error_logs,
221
+ client_api=client_api)
222
+ jobs = [None for _ in range(num_items)]
223
+ # pool
224
+ pool = client_api.thread_pools(pool_name='item.download')
225
+ # download
226
+ pbar = tqdm.tqdm(total=num_items, disable=client_api.verbose.disable_progress_bar_download_dataset, file=sys.stdout,
227
+ desc='Download Items')
228
+ try:
229
+ i_item = 0
230
+ for page in items_to_download:
231
+ for item in page:
232
+ if item.type == "dir":
233
+ continue
234
+ if save_locally:
235
+ # get local file path
236
+ item_local_path, item_local_filepath = self.__get_local_filepath(
237
+ local_path=local_path,
238
+ without_relative_path=without_relative_path,
239
+ item=item,
240
+ to_items_folder=to_items_folder,
241
+ is_folder=is_folder)
242
+
243
+ if os.path.isfile(item_local_filepath) and not overwrite:
244
+ logger.debug("File Exists: {}".format(item_local_filepath))
245
+ reporter.set_index(ref=item.id, status='exist', output=item_local_filepath, success=True)
246
+ pbar.update()
247
+ if annotation_options and item.annotated:
248
+ # download annotations only
249
+ jobs[i_item] = pool.submit(
250
+ self._download_img_annotations,
251
+ **{
252
+ "item": item,
253
+ "img_filepath": item_local_filepath,
254
+ "overwrite": overwrite,
255
+ "annotation_options": annotation_options,
256
+ "annotation_filters": annotation_filters,
257
+ "local_path": item_local_path,
258
+ "thickness": thickness,
259
+ "alpha": alpha,
260
+ "with_text": with_text,
261
+ "export_version": export_version,
262
+ },
263
+ )
264
+ i_item += 1
265
+ continue
266
+ else:
267
+ item_local_path = None
268
+ item_local_filepath = None
269
+
270
+ # download single item
271
+ jobs[i_item] = pool.submit(
272
+ self.__thread_download_wrapper,
273
+ **{
274
+ "i_item": i_item,
275
+ "item": item,
276
+ "item_local_path": item_local_path,
277
+ "item_local_filepath": item_local_filepath,
278
+ "save_locally": save_locally,
279
+ "to_array": to_array,
280
+ "annotation_options": annotation_options,
281
+ "annotation_filters": annotation_filters,
282
+ "reporter": reporter,
283
+ "pbar": pbar,
284
+ "overwrite": overwrite,
285
+ "thickness": thickness,
286
+ "alpha": alpha,
287
+ "with_text": with_text,
288
+ "export_version": export_version
289
+ },
290
+ )
291
+ i_item += 1
292
+ except Exception:
293
+ logger.exception('Error downloading:')
294
+ finally:
295
+ _ = [j.result() for j in jobs if j is not None]
296
+ pbar.close()
297
+ # reporting
298
+ n_download = reporter.status_count(status='download')
299
+ n_exist = reporter.status_count(status='exist')
300
+ n_error = reporter.status_count(status='error')
301
+ logger.info("Number of files downloaded:{}".format(n_download))
302
+ logger.info("Number of files exists: {}".format(n_exist))
303
+ logger.info("Total number of files: {}".format(n_download + n_exist))
304
+
305
+ # log error
306
+ if n_error > 0:
307
+ log_filepath = reporter.generate_log_files()
308
+ if log_filepath is not None:
309
+ logger.warning("Errors in {} files. See {} for full log".format(n_error, log_filepath))
310
+ if int(n_download) <= 1 and int(n_exist) <= 1:
311
+ try:
312
+ return next(reporter.output)
313
+ except StopIteration:
314
+ return None
315
+ return reporter.output
316
+
317
+ def __thread_download_wrapper(self, i_item,
318
+ # item params
319
+ item, item_local_path, item_local_filepath,
320
+ save_locally, to_array, overwrite,
321
+ # annotations params
322
+ annotation_options, annotation_filters, with_text, thickness,
323
+ # threading params
324
+ reporter, pbar, alpha, export_version):
325
+
326
+ download = None
327
+ err = None
328
+ trace = None
329
+ for i_try in range(NUM_TRIES):
330
+ try:
331
+ logger.debug("Download item: {path}. Try {i}/{n}. Starting..".format(path=item.filename,
332
+ i=i_try + 1,
333
+ n=NUM_TRIES))
334
+ download = self.__thread_download(item=item,
335
+ save_locally=save_locally,
336
+ to_array=to_array,
337
+ local_path=item_local_path,
338
+ local_filepath=item_local_filepath,
339
+ annotation_options=annotation_options,
340
+ annotation_filters=annotation_filters,
341
+ overwrite=overwrite,
342
+ thickness=thickness,
343
+ alpha=alpha,
344
+ with_text=with_text,
345
+ export_version=export_version)
346
+ logger.debug("Download item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=item.filename,
347
+ i=i_try + 1,
348
+ n=NUM_TRIES,
349
+ id=item.id))
350
+ if download is not None:
351
+ break
352
+ except Exception as e:
353
+ logger.debug("Download item: {path}. Try {i}/{n}. Fail.".format(path=item.filename,
354
+ i=i_try + 1,
355
+ n=NUM_TRIES))
356
+ err = e
357
+ trace = traceback.format_exc()
358
+ pbar.update()
359
+ if download is None:
360
+ if err is None:
361
+ err = self.items_repository._client_api.platform_exception
362
+ reporter.set_index(status="error", ref=item.id, success=False,
363
+ error="{}\n{}".format(err, trace))
364
+ else:
365
+ reporter.set_index(ref=item.id, status="download", output=download, success=True)
366
+
367
+ @staticmethod
368
+ def download_annotations(dataset: entities.Dataset,
369
+ local_path: str,
370
+ filters: entities.Filters = None,
371
+ annotation_filters: entities.Filters = None,
372
+ overwrite=False,
373
+ include_annotations_in_output=True,
374
+ export_png_files=False,
375
+ filter_output_annotations=False,
376
+ export_version=entities.ExportVersion.V1,
377
+ dataset_lock=False,
378
+ lock_timeout_sec=None,
379
+ export_summary=False
380
+ ):
381
+ """
382
+ Download annotations json for entire dataset
383
+
384
+ :param dataset: Dataset entity
385
+ :param local_path:
386
+ :param dtlpy.entities.filters.Filters filters: dl.Filters entity to filters items
387
+ :param annotation_filters: dl.Filters entity to filters items' annotations
388
+ :param overwrite: optional - overwrite annotations if exist, default = false
389
+ :param include_annotations_in_output: default - True , if export should contain annotations
390
+ :param export_png_files: default - if True, semantic annotations should be exported as png files
391
+ :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
392
+ :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
393
+ :param bool dataset_lock: optional - default = False
394
+ :param bool export_summary: optional - default = False
395
+ :param int lock_timeout_sec: optional
396
+ :return:
397
+ """
398
+ local_path = os.path.join(local_path, "json")
399
+ zip_filepath = None
400
+ # only if json folder does not exist or exist and overwrite
401
+ if not os.path.isdir(os.path.join(local_path, 'json')) or overwrite:
402
+ # create local path to download and save to
403
+ if not os.path.isdir(local_path):
404
+ os.makedirs(local_path)
405
+
406
+ try:
407
+ payload = dict()
408
+ if filters is not None:
409
+ payload['itemsQuery'] = filters.prepare()
410
+ payload['annotations'] = {
411
+ "include": include_annotations_in_output,
412
+ "convertSemantic": export_png_files
413
+ }
414
+ payload['exportVersion'] = export_version
415
+ if annotation_filters is not None:
416
+ payload['annotationsQuery'] = annotation_filters.prepare()
417
+ payload['annotations']['filter'] = filter_output_annotations
418
+ if dataset_lock:
419
+ payload['datasetLock'] = dataset_lock
420
+
421
+ if export_summary:
422
+ payload['summary'] = export_summary
423
+
424
+ if lock_timeout_sec:
425
+ payload['lockTimeoutSec'] = lock_timeout_sec
426
+
427
+ success, response = dataset._client_api.gen_request(req_type='post',
428
+ path='/datasets/{}/export'.format(dataset.id),
429
+ json_req=payload,
430
+ headers={'user_query': filters._user_query})
431
+ if not success:
432
+ raise exceptions.PlatformException(response)
433
+ command = entities.Command.from_json(_json=response.json(),
434
+ client_api=dataset._client_api)
435
+ command = command.wait(timeout=0)
436
+ if 'outputItemId' not in command.spec:
437
+ raise exceptions.PlatformException(
438
+ error='400',
439
+ message="outputItemId key is missing in command response: {}".format(response))
440
+ item_id = command.spec['outputItemId']
441
+ annotation_zip_item = repositories.Items(client_api=dataset._client_api).get(item_id=item_id)
442
+ zip_filepath = annotation_zip_item.download(local_path=local_path, export_version=export_version)
443
+ # unzipping annotations to directory
444
+ if isinstance(zip_filepath, list) or not os.path.isfile(zip_filepath):
445
+ raise exceptions.PlatformException(
446
+ error='404',
447
+ message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
448
+ annotation_zip_item.id))
449
+ try:
450
+ miscellaneous.Zipping.unzip_directory(zip_filename=zip_filepath,
451
+ to_directory=local_path)
452
+ except Exception as e:
453
+ logger.warning("Failed to extract zip file error: {}".format(e))
454
+
455
+ finally:
456
+ # cleanup
457
+ if isinstance(zip_filepath, str) and os.path.isfile(zip_filepath):
458
+ os.remove(zip_filepath)
459
+
460
+ @staticmethod
461
+ def _download_img_annotations(item: entities.Item,
462
+ img_filepath,
463
+ local_path,
464
+ overwrite,
465
+ annotation_options,
466
+ annotation_filters,
467
+ thickness=1,
468
+ with_text=False,
469
+ alpha=1,
470
+ export_version=entities.ExportVersion.V1
471
+ ):
472
+
473
+ # check if local_path is a file name
474
+ _, ext = os.path.splitext(local_path)
475
+ if ext:
476
+ # take the dir of the file for the annotations save
477
+ local_path = os.path.dirname(local_path)
478
+
479
+ # fix local path
480
+ if local_path.endswith("/items") or local_path.endswith("\\items"):
481
+ local_path = os.path.dirname(local_path)
482
+
483
+ annotation_rel_path = item.filename[1:]
484
+ if img_filepath is not None:
485
+ dir_name = os.path.dirname(annotation_rel_path)
486
+ base_name = os.path.basename(img_filepath)
487
+ annotation_rel_path = os.path.join(dir_name, base_name)
488
+
489
+ # find annotations json
490
+ annotations_json_filepath = os.path.join(local_path, "json", annotation_rel_path)
491
+ if export_version == entities.ExportVersion.V1:
492
+ name, _ = os.path.splitext(annotations_json_filepath)
493
+ else:
494
+ name = annotations_json_filepath
495
+ annotations_json_filepath = name + ".json"
496
+
497
+ if os.path.isfile(annotations_json_filepath) and annotation_filters is None:
498
+ # if exists take from json file
499
+ with open(annotations_json_filepath, "r", encoding="utf8") as f:
500
+ data = json.load(f)
501
+ if "annotations" in data:
502
+ data = data["annotations"]
503
+ annotations = entities.AnnotationCollection.from_json(_json=data, item=item)
504
+ # no need to use the filters here because the annotations were already downloaded with annotation_filters
505
+ else:
506
+ # if json file doesnt exist get the annotations from platform
507
+ annotations = item.annotations.list(filters=annotation_filters)
508
+
509
+ # get image shape
510
+ is_url_item = item.metadata. \
511
+ get('system', dict()). \
512
+ get('shebang', dict()). \
513
+ get('linkInfo', dict()). \
514
+ get('type', None) == 'url'
515
+
516
+ if item is not None:
517
+ orientation = item.system.get('exif', {}).get('Orientation', 0)
518
+ else:
519
+ orientation = 0
520
+ if item.width is not None and item.height is not None:
521
+ if orientation in [5, 6, 7, 8]:
522
+ img_shape = (item.width, item.height)
523
+ else:
524
+ img_shape = (item.height, item.width)
525
+ elif ('image' in item.mimetype and img_filepath is not None) or \
526
+ (is_url_item and img_filepath is not None):
527
+ img_shape = Image.open(img_filepath).size[::-1]
528
+ else:
529
+ img_shape = (0, 0)
530
+
531
+ # download all annotation options
532
+ for option in annotation_options:
533
+ # get path and create dirs
534
+ annotation_filepath = os.path.join(local_path, option, annotation_rel_path)
535
+ if not os.path.isdir(os.path.dirname(annotation_filepath)):
536
+ os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
537
+
538
+ if export_version == entities.ExportVersion.V1:
539
+ temp_path, ext = os.path.splitext(annotation_filepath)
540
+ else:
541
+ temp_path = annotation_filepath
542
+
543
+ if option == entities.ViewAnnotationOptions.JSON:
544
+ if not os.path.isfile(annotations_json_filepath):
545
+ annotations.download(
546
+ filepath=annotations_json_filepath,
547
+ annotation_format=option,
548
+ height=img_shape[0],
549
+ width=img_shape[1],
550
+ )
551
+ elif option in [entities.ViewAnnotationOptions.MASK,
552
+ entities.ViewAnnotationOptions.INSTANCE,
553
+ entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE,
554
+ entities.ViewAnnotationOptions.OBJECT_ID,
555
+ entities.ViewAnnotationOptions.VTT]:
556
+ if option == entities.ViewAnnotationOptions.VTT:
557
+ annotation_filepath = temp_path + ".vtt"
558
+ else:
559
+ if 'video' in item.mimetype:
560
+ annotation_filepath = temp_path + ".mp4"
561
+ else:
562
+ annotation_filepath = temp_path + ".png"
563
+ if not os.path.isfile(annotation_filepath) or overwrite:
564
+ # if not exists OR (exists AND overwrite)
565
+ if not os.path.exists(os.path.dirname(annotation_filepath)):
566
+ # create folder if not exists
567
+ os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
568
+ if option == entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE and img_filepath is None:
569
+ raise PlatformException(
570
+ error="1002",
571
+ message="Missing image for annotation option dl.ViewAnnotationOptions.ANNOTATION_ON_IMAGE")
572
+ annotations.download(
573
+ filepath=annotation_filepath,
574
+ img_filepath=img_filepath,
575
+ annotation_format=option,
576
+ height=img_shape[0],
577
+ width=img_shape[1],
578
+ thickness=thickness,
579
+ alpha=alpha,
580
+ with_text=with_text,
581
+ orientation=orientation
582
+ )
583
+ else:
584
+ raise PlatformException(error="400", message="Unknown annotation option: {}".format(option))
585
+
586
+ @staticmethod
587
+ def __get_local_filepath(local_path, item, to_items_folder, without_relative_path=None, is_folder=False):
588
+ # create paths
589
+ _, ext = os.path.splitext(local_path)
590
+ if ext and not is_folder:
591
+ # local_path is a filename
592
+ local_filepath = local_path
593
+ local_path = os.path.dirname(local_filepath)
594
+ else:
595
+ # if directory - get item's filename
596
+ if to_items_folder:
597
+ local_path = os.path.join(local_path, "items")
598
+ elif is_folder:
599
+ local_path = os.path.join(local_path, "")
600
+ if without_relative_path is not None:
601
+ local_filepath = os.path.join(local_path, item.name)
602
+ else:
603
+ local_filepath = os.path.join(local_path, item.filename[1:])
604
+ return local_path, local_filepath
605
+
606
+ @staticmethod
607
+ def __get_link_source(item):
608
+ assert isinstance(item, entities.Item)
609
+ if not item.is_fetched:
610
+ return item, '', False
611
+
612
+ if not item.filename.endswith('.json') or \
613
+ item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') != 'link':
614
+ return item, '', False
615
+
616
+ # recursively get next id link item
617
+ while item.filename.endswith('.json') and \
618
+ item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
619
+ item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'id':
620
+ item = item.dataset.items.get(item_id=item.metadata['system']['shebang']['linkInfo']['ref'])
621
+
622
+ # check if link
623
+ if item.filename.endswith('.json') and \
624
+ item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
625
+ item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'url':
626
+ url = item.metadata['system']['shebang']['linkInfo']['ref']
627
+ return item, url, True
628
+ else:
629
+ return item, '', False
630
+
631
+ def __file_validation(self, item, downloaded_file):
632
+ res = False
633
+ resume = True
634
+ if isinstance(downloaded_file, io.BytesIO):
635
+ file_size = downloaded_file.getbuffer().nbytes
636
+ else:
637
+ file_size = os.stat(downloaded_file).st_size
638
+ expected_size = item.metadata['system']['size']
639
+ size_diff = file_size - expected_size
640
+ if size_diff == 0:
641
+ res = True
642
+ if size_diff > 0:
643
+ resume = False
644
+ return res, file_size, resume
645
+
646
+ def __thread_download(self,
647
+ item,
648
+ save_locally,
649
+ local_path,
650
+ to_array,
651
+ local_filepath,
652
+ overwrite,
653
+ annotation_options,
654
+ annotation_filters,
655
+ chunk_size=8192,
656
+ thickness=1,
657
+ with_text=False,
658
+ alpha=1,
659
+ export_version=entities.ExportVersion.V1
660
+ ):
661
+ """
662
+ Get a single item's binary data
663
+ Calling this method will returns the item body itself , an image for example with the proper mimetype.
664
+
665
+ :param item: Item entity to download
666
+ :param save_locally: bool. save to file or return buffer
667
+ :param local_path: item local folder to save to.
668
+ :param to_array: returns Ndarray when True and local_path = False
669
+ :param local_filepath: item local filepath
670
+ :param overwrite: overwrite the file is existing
671
+ :param annotation_options: download annotations options: list(dl.ViewAnnotationOptions)
672
+ :param annotation_filters: Filters entity to filter item's annotation
673
+ :param chunk_size: size of chunks to download - optional. default = 8192
674
+ :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
675
+ :param with_text: optional - add text to annotations, default = False
676
+ :param alpha: opacity value [0 1], default 1
677
+ :param ExportVersion export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
678
+ :return:
679
+ """
680
+ # check if need to download image binary from platform
681
+ need_to_download = True
682
+ if save_locally and os.path.isfile(local_filepath):
683
+ need_to_download = overwrite
684
+
685
+ item, url, is_url = self.__get_link_source(item=item)
686
+
687
+ # save as byte stream
688
+ data = io.BytesIO()
689
+ if need_to_download:
690
+ chunk_resume = {0: 0}
691
+ start_point = 0
692
+ download_done = False
693
+ while chunk_resume.get(start_point, '') != 3 and not download_done:
694
+ if not is_url:
695
+ headers = {'x-dl-sanitize': '0', 'Range': 'bytes={}-'.format(start_point)}
696
+ result, response = self.items_repository._client_api.gen_request(req_type="get",
697
+ headers=headers,
698
+ path="/items/{}/stream".format(
699
+ item.id),
700
+ stream=True,
701
+ dataset_id=item.dataset_id)
702
+ if not result:
703
+ if os.path.isfile(local_filepath + '.download'):
704
+ os.remove(local_filepath + '.download')
705
+ raise PlatformException(response)
706
+ else:
707
+ _, ext = os.path.splitext(item.metadata['system']['shebang']['linkInfo']['ref'].split('?')[0])
708
+ if local_filepath:
709
+ local_filepath += ext
710
+ response = self.get_url_stream(url=url)
711
+
712
+ if save_locally:
713
+ # save to file
714
+ if not os.path.exists(os.path.dirname(local_filepath)):
715
+ # create folder if not exists
716
+ os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
717
+
718
+ # decide if create progress bar for item
719
+ total_length = response.headers.get("content-length")
720
+ one_file_pbar = None
721
+ try:
722
+ one_file_progress_bar = total_length is not None and int(
723
+ total_length) > 10e6 # size larger than 10 MB
724
+ if one_file_progress_bar:
725
+ one_file_pbar = tqdm.tqdm(total=int(total_length),
726
+ unit='B',
727
+ unit_scale=True,
728
+ unit_divisor=1024,
729
+ position=1,
730
+ file=sys.stdout,
731
+ disable=self.items_repository._client_api.verbose.disable_progress_bar_download_item,
732
+ desc='Download Item')
733
+ except Exception as err:
734
+ one_file_progress_bar = False
735
+ logger.debug('Cant decide downloaded file length, bar will not be presented: {}'.format(err))
736
+
737
+ # start download
738
+ if self.items_repository._client_api.sdk_cache.use_cache and \
739
+ self.items_repository._client_api.cache is not None:
740
+ response_output = os.path.normpath(response.content)
741
+ if isinstance(response_output, bytes):
742
+ response_output = response_output.decode('utf-8')[1:-1]
743
+
744
+ if os.path.isfile(os.path.normpath(response_output)):
745
+ if response_output != local_filepath:
746
+ source_path = os.path.normpath(response_output)
747
+ shutil.copyfile(source_path, local_filepath)
748
+ else:
749
+ try:
750
+ temp_file_path = local_filepath + '.download'
751
+ with open(temp_file_path, "ab") as f:
752
+ try:
753
+ for chunk in response.iter_content(chunk_size=chunk_size):
754
+ if chunk: # filter out keep-alive new chunks
755
+ f.write(chunk)
756
+ if one_file_progress_bar:
757
+ one_file_pbar.update(len(chunk))
758
+ except Exception as err:
759
+ pass
760
+
761
+ file_validation = True
762
+ if not is_url:
763
+ file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
764
+ download_progress=temp_file_path,
765
+ chunk_resume=chunk_resume)
766
+ if file_validation:
767
+ shutil.move(temp_file_path, local_filepath)
768
+ download_done = True
769
+ except Exception as err:
770
+ if os.path.isfile(temp_file_path):
771
+ os.remove(temp_file_path)
772
+ raise err
773
+ if one_file_progress_bar:
774
+ one_file_pbar.close()
775
+ # save to output variable
776
+ data = local_filepath
777
+ # if image - can download annotation mask
778
+ if item.annotated and annotation_options:
779
+ self._download_img_annotations(item=item,
780
+ img_filepath=local_filepath,
781
+ annotation_options=annotation_options,
782
+ annotation_filters=annotation_filters,
783
+ local_path=local_path,
784
+ overwrite=overwrite,
785
+ thickness=thickness,
786
+ alpha=alpha,
787
+ with_text=with_text,
788
+ export_version=export_version
789
+ )
790
+ else:
791
+ if self.items_repository._client_api.sdk_cache.use_cache and \
792
+ self.items_repository._client_api.cache is not None:
793
+ response_output = os.path.normpath(response.content)
794
+ if isinstance(response_output, bytes):
795
+ response_output = response_output.decode('utf-8')[1:-1]
796
+
797
+ if os.path.isfile(response_output):
798
+ source_file = response_output
799
+ with open(source_file, 'wb') as f:
800
+ data = f.read()
801
+ else:
802
+ try:
803
+ for chunk in response.iter_content(chunk_size=chunk_size):
804
+ if chunk: # filter out keep-alive new chunks
805
+ data.write(chunk)
806
+
807
+ file_validation = True
808
+ if not is_url:
809
+ file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
810
+ download_progress=data,
811
+ chunk_resume=chunk_resume)
812
+ if file_validation:
813
+ download_done = True
814
+ else:
815
+ continue
816
+ except Exception as err:
817
+ raise err
818
+ # go back to the beginning of the stream
819
+ data.seek(0)
820
+ data.name = item.name
821
+ if not save_locally and to_array:
822
+ if 'image' not in item.mimetype and not is_url:
823
+ raise PlatformException(
824
+ error="400",
825
+ message='Download element type numpy.ndarray support for image only. '
826
+ 'Item Id: {} is {} type'.format(item.id, item.mimetype))
827
+
828
+ data = np.array(Image.open(data))
829
+ else:
830
+ data = local_filepath
831
+ return data
832
+
833
+ def __get_next_chunk(self, item, download_progress, chunk_resume):
834
+ size_validation, file_size, resume = self.__file_validation(item=item,
835
+ downloaded_file=download_progress)
836
+ start_point = file_size
837
+ if not size_validation:
838
+ if chunk_resume.get(start_point, None) is None:
839
+ chunk_resume = {start_point: 1}
840
+ else:
841
+ chunk_resume[start_point] += 1
842
+ if chunk_resume[start_point] == 3 or not resume:
843
+ raise PlatformException(
844
+ error=500,
845
+ message='The downloaded file is corrupted. Please try again. If the issue repeats please contact support.')
846
+ return size_validation, start_point, chunk_resume
847
+
848
+ def __default_local_path(self):
849
+
850
+ # create default local path
851
+ if self.items_repository._dataset is None:
852
+ local_path = os.path.join(
853
+ self.items_repository._client_api.sdk_cache.cache_path_bin,
854
+ "items",
855
+ )
856
+ else:
857
+ if self.items_repository.dataset._project is None:
858
+ # by dataset name
859
+ local_path = os.path.join(
860
+ self.items_repository._client_api.sdk_cache.cache_path_bin,
861
+ "datasets",
862
+ "{}_{}".format(self.items_repository.dataset.name, self.items_repository.dataset.id),
863
+ )
864
+ else:
865
+ # by dataset and project name
866
+ local_path = os.path.join(
867
+ self.items_repository._client_api.sdk_cache.cache_path_bin,
868
+ "projects",
869
+ self.items_repository.dataset.project.name,
870
+ "datasets",
871
+ self.items_repository.dataset.name,
872
+ )
873
+ logger.info("Downloading to: {}".format(local_path))
874
+ return local_path
875
+
876
+ @staticmethod
877
+ def get_url_stream(url):
878
+ """
879
+ :param url:
880
+ """
881
+ # This will download the binaries from the URL user provided
882
+ prepared_request = requests.Request(method='GET', url=url).prepare()
883
+ with requests.Session() as s:
884
+ retry = Retry(
885
+ total=3,
886
+ read=3,
887
+ connect=3,
888
+ backoff_factor=1,
889
+ )
890
+ adapter = HTTPAdapter(max_retries=retry)
891
+ s.mount('http://', adapter)
892
+ s.mount('https://', adapter)
893
+ response = s.send(request=prepared_request, stream=True)
894
+
895
+ return response