dtlpy 1.114.17__py3-none-any.whl → 1.116.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. dtlpy/__init__.py +491 -491
  2. dtlpy/__version__.py +1 -1
  3. dtlpy/assets/__init__.py +26 -26
  4. dtlpy/assets/code_server/config.yaml +2 -2
  5. dtlpy/assets/code_server/installation.sh +24 -24
  6. dtlpy/assets/code_server/launch.json +13 -13
  7. dtlpy/assets/code_server/settings.json +2 -2
  8. dtlpy/assets/main.py +53 -53
  9. dtlpy/assets/main_partial.py +18 -18
  10. dtlpy/assets/mock.json +11 -11
  11. dtlpy/assets/model_adapter.py +83 -83
  12. dtlpy/assets/package.json +61 -61
  13. dtlpy/assets/package_catalog.json +29 -29
  14. dtlpy/assets/package_gitignore +307 -307
  15. dtlpy/assets/service_runners/__init__.py +33 -33
  16. dtlpy/assets/service_runners/converter.py +96 -96
  17. dtlpy/assets/service_runners/multi_method.py +49 -49
  18. dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
  19. dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
  20. dtlpy/assets/service_runners/multi_method_item.py +52 -52
  21. dtlpy/assets/service_runners/multi_method_json.py +52 -52
  22. dtlpy/assets/service_runners/single_method.py +37 -37
  23. dtlpy/assets/service_runners/single_method_annotation.py +43 -43
  24. dtlpy/assets/service_runners/single_method_dataset.py +43 -43
  25. dtlpy/assets/service_runners/single_method_item.py +41 -41
  26. dtlpy/assets/service_runners/single_method_json.py +42 -42
  27. dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
  28. dtlpy/assets/voc_annotation_template.xml +23 -23
  29. dtlpy/caches/base_cache.py +32 -32
  30. dtlpy/caches/cache.py +473 -473
  31. dtlpy/caches/dl_cache.py +201 -201
  32. dtlpy/caches/filesystem_cache.py +89 -89
  33. dtlpy/caches/redis_cache.py +84 -84
  34. dtlpy/dlp/__init__.py +20 -20
  35. dtlpy/dlp/cli_utilities.py +367 -367
  36. dtlpy/dlp/command_executor.py +764 -764
  37. dtlpy/dlp/dlp +1 -1
  38. dtlpy/dlp/dlp.bat +1 -1
  39. dtlpy/dlp/dlp.py +128 -128
  40. dtlpy/dlp/parser.py +651 -651
  41. dtlpy/entities/__init__.py +83 -83
  42. dtlpy/entities/analytic.py +347 -311
  43. dtlpy/entities/annotation.py +1879 -1879
  44. dtlpy/entities/annotation_collection.py +699 -699
  45. dtlpy/entities/annotation_definitions/__init__.py +20 -20
  46. dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
  47. dtlpy/entities/annotation_definitions/box.py +195 -195
  48. dtlpy/entities/annotation_definitions/classification.py +67 -67
  49. dtlpy/entities/annotation_definitions/comparison.py +72 -72
  50. dtlpy/entities/annotation_definitions/cube.py +204 -204
  51. dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
  52. dtlpy/entities/annotation_definitions/description.py +32 -32
  53. dtlpy/entities/annotation_definitions/ellipse.py +124 -124
  54. dtlpy/entities/annotation_definitions/free_text.py +62 -62
  55. dtlpy/entities/annotation_definitions/gis.py +69 -69
  56. dtlpy/entities/annotation_definitions/note.py +139 -139
  57. dtlpy/entities/annotation_definitions/point.py +117 -117
  58. dtlpy/entities/annotation_definitions/polygon.py +182 -182
  59. dtlpy/entities/annotation_definitions/polyline.py +111 -111
  60. dtlpy/entities/annotation_definitions/pose.py +92 -92
  61. dtlpy/entities/annotation_definitions/ref_image.py +86 -86
  62. dtlpy/entities/annotation_definitions/segmentation.py +240 -240
  63. dtlpy/entities/annotation_definitions/subtitle.py +34 -34
  64. dtlpy/entities/annotation_definitions/text.py +85 -85
  65. dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
  66. dtlpy/entities/app.py +220 -220
  67. dtlpy/entities/app_module.py +107 -107
  68. dtlpy/entities/artifact.py +174 -174
  69. dtlpy/entities/assignment.py +399 -399
  70. dtlpy/entities/base_entity.py +214 -214
  71. dtlpy/entities/bot.py +113 -113
  72. dtlpy/entities/codebase.py +292 -296
  73. dtlpy/entities/collection.py +38 -38
  74. dtlpy/entities/command.py +169 -169
  75. dtlpy/entities/compute.py +449 -442
  76. dtlpy/entities/dataset.py +1299 -1285
  77. dtlpy/entities/directory_tree.py +44 -44
  78. dtlpy/entities/dpk.py +470 -470
  79. dtlpy/entities/driver.py +235 -223
  80. dtlpy/entities/execution.py +397 -397
  81. dtlpy/entities/feature.py +124 -124
  82. dtlpy/entities/feature_set.py +145 -145
  83. dtlpy/entities/filters.py +798 -645
  84. dtlpy/entities/gis_item.py +107 -107
  85. dtlpy/entities/integration.py +184 -184
  86. dtlpy/entities/item.py +959 -953
  87. dtlpy/entities/label.py +123 -123
  88. dtlpy/entities/links.py +85 -85
  89. dtlpy/entities/message.py +175 -175
  90. dtlpy/entities/model.py +684 -684
  91. dtlpy/entities/node.py +1005 -1005
  92. dtlpy/entities/ontology.py +810 -803
  93. dtlpy/entities/organization.py +287 -287
  94. dtlpy/entities/package.py +657 -657
  95. dtlpy/entities/package_defaults.py +5 -5
  96. dtlpy/entities/package_function.py +185 -185
  97. dtlpy/entities/package_module.py +113 -113
  98. dtlpy/entities/package_slot.py +118 -118
  99. dtlpy/entities/paged_entities.py +299 -299
  100. dtlpy/entities/pipeline.py +624 -624
  101. dtlpy/entities/pipeline_execution.py +279 -279
  102. dtlpy/entities/project.py +394 -394
  103. dtlpy/entities/prompt_item.py +505 -499
  104. dtlpy/entities/recipe.py +301 -301
  105. dtlpy/entities/reflect_dict.py +102 -102
  106. dtlpy/entities/resource_execution.py +138 -138
  107. dtlpy/entities/service.py +963 -958
  108. dtlpy/entities/service_driver.py +117 -117
  109. dtlpy/entities/setting.py +294 -294
  110. dtlpy/entities/task.py +495 -495
  111. dtlpy/entities/time_series.py +143 -143
  112. dtlpy/entities/trigger.py +426 -426
  113. dtlpy/entities/user.py +118 -118
  114. dtlpy/entities/webhook.py +124 -124
  115. dtlpy/examples/__init__.py +19 -19
  116. dtlpy/examples/add_labels.py +135 -135
  117. dtlpy/examples/add_metadata_to_item.py +21 -21
  118. dtlpy/examples/annotate_items_using_model.py +65 -65
  119. dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
  120. dtlpy/examples/annotations_convert_to_voc.py +9 -9
  121. dtlpy/examples/annotations_convert_to_yolo.py +9 -9
  122. dtlpy/examples/convert_annotation_types.py +51 -51
  123. dtlpy/examples/converter.py +143 -143
  124. dtlpy/examples/copy_annotations.py +22 -22
  125. dtlpy/examples/copy_folder.py +31 -31
  126. dtlpy/examples/create_annotations.py +51 -51
  127. dtlpy/examples/create_video_annotations.py +83 -83
  128. dtlpy/examples/delete_annotations.py +26 -26
  129. dtlpy/examples/filters.py +113 -113
  130. dtlpy/examples/move_item.py +23 -23
  131. dtlpy/examples/play_video_annotation.py +13 -13
  132. dtlpy/examples/show_item_and_mask.py +53 -53
  133. dtlpy/examples/triggers.py +49 -49
  134. dtlpy/examples/upload_batch_of_items.py +20 -20
  135. dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
  136. dtlpy/examples/upload_items_with_modalities.py +43 -43
  137. dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
  138. dtlpy/examples/upload_yolo_format_annotations.py +70 -70
  139. dtlpy/exceptions.py +125 -125
  140. dtlpy/miscellaneous/__init__.py +20 -20
  141. dtlpy/miscellaneous/dict_differ.py +95 -95
  142. dtlpy/miscellaneous/git_utils.py +217 -217
  143. dtlpy/miscellaneous/json_utils.py +14 -14
  144. dtlpy/miscellaneous/list_print.py +105 -105
  145. dtlpy/miscellaneous/zipping.py +130 -130
  146. dtlpy/ml/__init__.py +20 -20
  147. dtlpy/ml/base_feature_extractor_adapter.py +27 -27
  148. dtlpy/ml/base_model_adapter.py +1257 -1086
  149. dtlpy/ml/metrics.py +461 -461
  150. dtlpy/ml/predictions_utils.py +274 -274
  151. dtlpy/ml/summary_writer.py +57 -57
  152. dtlpy/ml/train_utils.py +60 -60
  153. dtlpy/new_instance.py +252 -252
  154. dtlpy/repositories/__init__.py +56 -56
  155. dtlpy/repositories/analytics.py +85 -85
  156. dtlpy/repositories/annotations.py +916 -916
  157. dtlpy/repositories/apps.py +383 -383
  158. dtlpy/repositories/artifacts.py +452 -452
  159. dtlpy/repositories/assignments.py +599 -599
  160. dtlpy/repositories/bots.py +213 -213
  161. dtlpy/repositories/codebases.py +559 -559
  162. dtlpy/repositories/collections.py +332 -332
  163. dtlpy/repositories/commands.py +152 -158
  164. dtlpy/repositories/compositions.py +61 -61
  165. dtlpy/repositories/computes.py +439 -435
  166. dtlpy/repositories/datasets.py +1504 -1291
  167. dtlpy/repositories/downloader.py +976 -903
  168. dtlpy/repositories/dpks.py +433 -433
  169. dtlpy/repositories/drivers.py +482 -470
  170. dtlpy/repositories/executions.py +815 -817
  171. dtlpy/repositories/feature_sets.py +226 -226
  172. dtlpy/repositories/features.py +255 -238
  173. dtlpy/repositories/integrations.py +484 -484
  174. dtlpy/repositories/items.py +912 -909
  175. dtlpy/repositories/messages.py +94 -94
  176. dtlpy/repositories/models.py +1000 -988
  177. dtlpy/repositories/nodes.py +80 -80
  178. dtlpy/repositories/ontologies.py +511 -511
  179. dtlpy/repositories/organizations.py +525 -525
  180. dtlpy/repositories/packages.py +1941 -1941
  181. dtlpy/repositories/pipeline_executions.py +451 -451
  182. dtlpy/repositories/pipelines.py +640 -640
  183. dtlpy/repositories/projects.py +539 -539
  184. dtlpy/repositories/recipes.py +419 -399
  185. dtlpy/repositories/resource_executions.py +137 -137
  186. dtlpy/repositories/schema.py +120 -120
  187. dtlpy/repositories/service_drivers.py +213 -213
  188. dtlpy/repositories/services.py +1704 -1704
  189. dtlpy/repositories/settings.py +339 -339
  190. dtlpy/repositories/tasks.py +1477 -1477
  191. dtlpy/repositories/times_series.py +278 -278
  192. dtlpy/repositories/triggers.py +536 -536
  193. dtlpy/repositories/upload_element.py +257 -257
  194. dtlpy/repositories/uploader.py +661 -651
  195. dtlpy/repositories/webhooks.py +249 -249
  196. dtlpy/services/__init__.py +22 -22
  197. dtlpy/services/aihttp_retry.py +131 -131
  198. dtlpy/services/api_client.py +1785 -1782
  199. dtlpy/services/api_reference.py +40 -40
  200. dtlpy/services/async_utils.py +133 -133
  201. dtlpy/services/calls_counter.py +44 -44
  202. dtlpy/services/check_sdk.py +68 -68
  203. dtlpy/services/cookie.py +115 -115
  204. dtlpy/services/create_logger.py +156 -156
  205. dtlpy/services/events.py +84 -84
  206. dtlpy/services/logins.py +235 -235
  207. dtlpy/services/reporter.py +256 -256
  208. dtlpy/services/service_defaults.py +91 -91
  209. dtlpy/utilities/__init__.py +20 -20
  210. dtlpy/utilities/annotations/__init__.py +16 -16
  211. dtlpy/utilities/annotations/annotation_converters.py +269 -269
  212. dtlpy/utilities/base_package_runner.py +285 -264
  213. dtlpy/utilities/converter.py +1650 -1650
  214. dtlpy/utilities/dataset_generators/__init__.py +1 -1
  215. dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
  216. dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
  217. dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
  218. dtlpy/utilities/local_development/__init__.py +1 -1
  219. dtlpy/utilities/local_development/local_session.py +179 -179
  220. dtlpy/utilities/reports/__init__.py +2 -2
  221. dtlpy/utilities/reports/figures.py +343 -343
  222. dtlpy/utilities/reports/report.py +71 -71
  223. dtlpy/utilities/videos/__init__.py +17 -17
  224. dtlpy/utilities/videos/video_player.py +598 -598
  225. dtlpy/utilities/videos/videos.py +470 -470
  226. {dtlpy-1.114.17.data → dtlpy-1.116.6.data}/scripts/dlp +1 -1
  227. dtlpy-1.116.6.data/scripts/dlp.bat +2 -0
  228. {dtlpy-1.114.17.data → dtlpy-1.116.6.data}/scripts/dlp.py +128 -128
  229. {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/METADATA +186 -183
  230. dtlpy-1.116.6.dist-info/RECORD +239 -0
  231. {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/WHEEL +1 -1
  232. {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/licenses/LICENSE +200 -200
  233. tests/features/environment.py +551 -551
  234. dtlpy/assets/__pycache__/__init__.cpython-310.pyc +0 -0
  235. dtlpy-1.114.17.data/scripts/dlp.bat +0 -2
  236. dtlpy-1.114.17.dist-info/RECORD +0 -240
  237. {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/entry_points.txt +0 -0
  238. {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/top_level.txt +0 -0
@@ -1,903 +1,976 @@
1
- from requests.adapters import HTTPAdapter
2
- from urllib3.util import Retry
3
- from PIL import Image
4
- import numpy as np
5
- import traceback
6
- import warnings
7
- import requests
8
- import logging
9
- import shutil
10
- import json
11
- import tqdm
12
- import sys
13
- import os
14
- import io
15
-
16
- from .. import entities, repositories, miscellaneous, PlatformException, exceptions
17
- from ..services import Reporter
18
-
19
- logger = logging.getLogger(name='dtlpy')
20
-
21
- NUM_TRIES = 3 # try to download 3 time before fail on item
22
-
23
-
24
- class Downloader:
25
- def __init__(self, items_repository):
26
- self.items_repository = items_repository
27
-
28
- def download(self,
29
- # filter options
30
- filters: entities.Filters = None,
31
- items=None,
32
- # download options
33
- local_path=None,
34
- file_types=None,
35
- save_locally=True,
36
- to_array=False,
37
- overwrite=False,
38
- annotation_filters: entities.Filters = None,
39
- annotation_options: entities.ViewAnnotationOptions = None,
40
- to_items_folder=True,
41
- thickness=1,
42
- with_text=False,
43
- without_relative_path=None,
44
- avoid_unnecessary_annotation_download=False,
45
- include_annotations_in_output=True,
46
- export_png_files=False,
47
- filter_output_annotations=False,
48
- alpha=1,
49
- export_version=entities.ExportVersion.V1,
50
- dataset_lock=False,
51
- lock_timeout_sec=None,
52
- export_summary=False
53
- ):
54
- """
55
- Download dataset by filters.
56
- Filtering the dataset for items and save them local
57
- Optional - also download annotation, mask, instance and image mask of the item
58
-
59
- :param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
60
- :param items: download Item entity or item_id (or a list of item)
61
- :param local_path: local folder or filename to save to.
62
- :param file_types: a list of file type to download. e.g ['video/webm', 'video/mp4', 'image/jpeg', 'image/png']
63
- :param save_locally: bool. save to disk or return a buffer
64
- :param to_array: returns Ndarray when True and local_path = False
65
- :param overwrite: optional - default = False
66
- :param annotation_options: download annotations options. options: list(dl.ViewAnnotationOptions)
67
- :param annotation_filters: Filters entity to filter annotations for download
68
- :param to_items_folder: Create 'items' folder and download items to it
69
- :param with_text: optional - add text to annotations, default = False
70
- :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
71
- :param without_relative_path: bool - download items without the relative path from platform
72
- :param avoid_unnecessary_annotation_download: DEPRECATED only items and annotations in filters are downloaded
73
- :param include_annotations_in_output: default - False , if export should contain annotations
74
- :param export_png_files: default - True, if semantic annotations should be exported as png files
75
- :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
76
- :param alpha: opacity value [0 1], default 1
77
- :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
78
- :param bool dataset_lock: optional - default = False
79
- :param bool export_summary: optional - default = False
80
- :param int lock_timeout_sec: optional
81
- :return: Output (list)
82
- """
83
-
84
- ###################
85
- # Default options #
86
- ###################
87
- # annotation options
88
- if annotation_options is None:
89
- annotation_options = list()
90
- elif not isinstance(annotation_options, list):
91
- annotation_options = [annotation_options]
92
- for ann_option in annotation_options:
93
- if not isinstance(ann_option, entities.ViewAnnotationOptions):
94
- if ann_option not in list(entities.ViewAnnotationOptions):
95
- raise PlatformException(
96
- error='400',
97
- message='Unknown annotation download option: {}, please choose from: {}'.format(
98
- ann_option, list(entities.ViewAnnotationOptions)))
99
- # normalize items argument: treat empty list as “no items specified”
100
- if isinstance(items, list) and len(items) == 0:
101
- items = None
102
- #####################
103
- # items to download #
104
- #####################
105
- if items is not None:
106
- # convert input to a list
107
- if not isinstance(items, list):
108
- items = [items]
109
- # get items by id
110
- if isinstance(items[0], str):
111
- items = [self.items_repository.get(item_id=item_id) for item_id in items]
112
- elif isinstance(items[0], entities.Item):
113
- pass
114
- else:
115
- raise PlatformException(
116
- error="400",
117
- message='Unknown items type to download. Expecting str or Item entities. Got "{}" instead'.format(
118
- type(items[0])
119
- )
120
- )
121
- # create filters to download annotations
122
- filters = entities.Filters(field='id',
123
- values=[item.id for item in items],
124
- operator=entities.FiltersOperations.IN)
125
- filters._user_query = 'false'
126
-
127
- # convert to list of list (like pages and page)
128
- items_to_download = [items]
129
- num_items = len(items)
130
- else:
131
- # filters
132
- if filters is None:
133
- filters = entities.Filters()
134
- filters._user_query = 'false'
135
- # file types
136
- if file_types is not None:
137
- filters.add(field='metadata.system.mimetype', values=file_types, operator=entities.FiltersOperations.IN)
138
- if annotation_filters is not None:
139
- if len(annotation_filters.and_filter_list) > 0 or len(annotation_filters.or_filter_list) > 0:
140
- for annotation_filter_and in annotation_filters.and_filter_list:
141
- filters.add_join(field=annotation_filter_and.field,
142
- values=annotation_filter_and.values,
143
- operator=annotation_filter_and.operator,
144
- method=entities.FiltersMethod.AND)
145
- for annotation_filter_or in annotation_filters.or_filter_list:
146
- filters.add_join(field=annotation_filter_or.field,
147
- values=annotation_filter_or.values,
148
- operator=annotation_filter_or.operator,
149
- method=entities.FiltersMethod.OR)
150
- elif annotation_filters.custom_filter is not None:
151
- annotation_query_dict = annotation_filters.prepare()
152
- items_query_dict = filters.prepare()
153
- items_query_dict["join"] = annotation_query_dict
154
- filters.reset()
155
- filters.custom_filter = items_query_dict
156
-
157
- else:
158
- annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
159
- filters._user_query = 'false'
160
-
161
- items_to_download = self.items_repository.list(filters=filters)
162
- num_items = items_to_download.items_count
163
-
164
- if num_items == 0:
165
- logger.warning('No items found! Nothing was downloaded')
166
- return list()
167
-
168
- ##############
169
- # local path #
170
- ##############
171
- is_folder = False
172
- if local_path is None:
173
- # create default local path
174
- local_path = self.__default_local_path()
175
-
176
- if os.path.isdir(local_path):
177
- logger.info('Local folder already exists:{}. merge/overwrite according to "overwrite option"'.format(
178
- local_path))
179
- is_folder = True
180
- else:
181
- # check if filename
182
- _, ext = os.path.splitext(local_path)
183
- if num_items > 1:
184
- is_folder = True
185
- else:
186
- item_to_download = items_to_download[0][0]
187
- file_name = item_to_download.name
188
- _, ext_download = os.path.splitext(file_name)
189
- if ext_download != ext:
190
- is_folder = True
191
- if is_folder and save_locally:
192
- path_to_create = local_path
193
- if local_path.endswith('*'):
194
- path_to_create = os.path.dirname(local_path)
195
- logger.info("Creating new directory for download: {}".format(path_to_create))
196
- os.makedirs(path_to_create, exist_ok=True)
197
-
198
- ####################
199
- # annotations json #
200
- ####################
201
- # download annotations' json files in a new thread
202
- # items will start downloading and if json not exists yet - will download for each file
203
- if num_items > 1 and annotation_options:
204
- # a new folder named 'json' will be created under the "local_path"
205
- logger.info("Downloading annotations formats: {}".format(annotation_options))
206
- self.download_annotations(**{
207
- "dataset": self.items_repository.dataset,
208
- "filters": filters,
209
- "annotation_filters": annotation_filters,
210
- "local_path": local_path,
211
- 'overwrite': overwrite,
212
- 'include_annotations_in_output': include_annotations_in_output,
213
- 'export_png_files': export_png_files,
214
- 'filter_output_annotations': filter_output_annotations,
215
- 'export_version': export_version,
216
- 'dataset_lock': dataset_lock,
217
- 'lock_timeout_sec': lock_timeout_sec,
218
- 'export_summary': export_summary
219
- })
220
- ###############
221
- # downloading #
222
- ###############
223
- # create result lists
224
- client_api = self.items_repository._client_api
225
-
226
- reporter = Reporter(num_workers=num_items,
227
- resource=Reporter.ITEMS_DOWNLOAD,
228
- print_error_logs=client_api.verbose.print_error_logs,
229
- client_api=client_api)
230
- jobs = [None for _ in range(num_items)]
231
- # pool
232
- pool = client_api.thread_pools(pool_name='item.download')
233
- # download
234
- pbar = tqdm.tqdm(total=num_items, disable=client_api.verbose.disable_progress_bar_download_dataset, file=sys.stdout,
235
- desc='Download Items')
236
- try:
237
- i_item = 0
238
- for page in items_to_download:
239
- for item in page:
240
- if item.type == "dir":
241
- continue
242
- if save_locally:
243
- # get local file path
244
- item_local_path, item_local_filepath = self.__get_local_filepath(
245
- local_path=local_path,
246
- without_relative_path=without_relative_path,
247
- item=item,
248
- to_items_folder=to_items_folder,
249
- is_folder=is_folder)
250
-
251
- if os.path.isfile(item_local_filepath) and not overwrite:
252
- logger.debug("File Exists: {}".format(item_local_filepath))
253
- reporter.set_index(ref=item.id, status='exist', output=item_local_filepath, success=True)
254
- pbar.update()
255
- if annotation_options and item.annotated:
256
- # download annotations only
257
- jobs[i_item] = pool.submit(
258
- self._download_img_annotations,
259
- **{
260
- "item": item,
261
- "img_filepath": item_local_filepath,
262
- "overwrite": overwrite,
263
- "annotation_options": annotation_options,
264
- "annotation_filters": annotation_filters,
265
- "local_path": item_local_path,
266
- "thickness": thickness,
267
- "alpha": alpha,
268
- "with_text": with_text,
269
- "export_version": export_version,
270
- },
271
- )
272
- i_item += 1
273
- continue
274
- else:
275
- item_local_path = None
276
- item_local_filepath = None
277
-
278
- # download single item
279
- jobs[i_item] = pool.submit(
280
- self.__thread_download_wrapper,
281
- **{
282
- "i_item": i_item,
283
- "item": item,
284
- "item_local_path": item_local_path,
285
- "item_local_filepath": item_local_filepath,
286
- "save_locally": save_locally,
287
- "to_array": to_array,
288
- "annotation_options": annotation_options,
289
- "annotation_filters": annotation_filters,
290
- "reporter": reporter,
291
- "pbar": pbar,
292
- "overwrite": overwrite,
293
- "thickness": thickness,
294
- "alpha": alpha,
295
- "with_text": with_text,
296
- "export_version": export_version
297
- },
298
- )
299
- i_item += 1
300
- except Exception:
301
- logger.exception('Error downloading:')
302
- finally:
303
- _ = [j.result() for j in jobs if j is not None]
304
- pbar.close()
305
- # reporting
306
- n_download = reporter.status_count(status='download')
307
- n_exist = reporter.status_count(status='exist')
308
- n_error = reporter.status_count(status='error')
309
- logger.info("Number of files downloaded:{}".format(n_download))
310
- logger.info("Number of files exists: {}".format(n_exist))
311
- logger.info("Total number of files: {}".format(n_download + n_exist))
312
-
313
- # log error
314
- if n_error > 0:
315
- log_filepath = reporter.generate_log_files()
316
- if log_filepath is not None:
317
- logger.warning("Errors in {} files. See {} for full log".format(n_error, log_filepath))
318
- if int(n_download) <= 1 and int(n_exist) <= 1:
319
- try:
320
- return next(reporter.output)
321
- except StopIteration:
322
- return None
323
- return reporter.output
324
-
325
- def __thread_download_wrapper(self, i_item,
326
- # item params
327
- item, item_local_path, item_local_filepath,
328
- save_locally, to_array, overwrite,
329
- # annotations params
330
- annotation_options, annotation_filters, with_text, thickness,
331
- # threading params
332
- reporter, pbar, alpha, export_version):
333
-
334
- download = None
335
- err = None
336
- trace = None
337
- for i_try in range(NUM_TRIES):
338
- try:
339
- logger.debug("Download item: {path}. Try {i}/{n}. Starting..".format(path=item.filename,
340
- i=i_try + 1,
341
- n=NUM_TRIES))
342
- download = self.__thread_download(item=item,
343
- save_locally=save_locally,
344
- to_array=to_array,
345
- local_path=item_local_path,
346
- local_filepath=item_local_filepath,
347
- annotation_options=annotation_options,
348
- annotation_filters=annotation_filters,
349
- overwrite=overwrite,
350
- thickness=thickness,
351
- alpha=alpha,
352
- with_text=with_text,
353
- export_version=export_version)
354
- logger.debug("Download item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=item.filename,
355
- i=i_try + 1,
356
- n=NUM_TRIES,
357
- id=item.id))
358
- if download is not None:
359
- break
360
- except Exception as e:
361
- logger.debug("Download item: {path}. Try {i}/{n}. Fail.".format(path=item.filename,
362
- i=i_try + 1,
363
- n=NUM_TRIES))
364
- err = e
365
- trace = traceback.format_exc()
366
- pbar.update()
367
- if download is None:
368
- if err is None:
369
- err = self.items_repository._client_api.platform_exception
370
- reporter.set_index(status="error", ref=item.id, success=False,
371
- error="{}\n{}".format(err, trace))
372
- else:
373
- reporter.set_index(ref=item.id, status="download", output=download, success=True)
374
-
375
- @staticmethod
376
- def download_annotations(dataset: entities.Dataset,
377
- local_path: str,
378
- filters: entities.Filters = None,
379
- annotation_filters: entities.Filters = None,
380
- overwrite=False,
381
- include_annotations_in_output=True,
382
- export_png_files=False,
383
- filter_output_annotations=False,
384
- export_version=entities.ExportVersion.V1,
385
- dataset_lock=False,
386
- lock_timeout_sec=None,
387
- export_summary=False
388
- ):
389
- """
390
- Download annotations json for entire dataset
391
-
392
- :param dataset: Dataset entity
393
- :param local_path:
394
- :param dtlpy.entities.filters.Filters filters: dl.Filters entity to filters items
395
- :param annotation_filters: dl.Filters entity to filters items' annotations
396
- :param overwrite: optional - overwrite annotations if exist, default = false
397
- :param include_annotations_in_output: default - True , if export should contain annotations
398
- :param export_png_files: default - if True, semantic annotations should be exported as png files
399
- :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
400
- :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
401
- :param bool dataset_lock: optional - default = False
402
- :param bool export_summary: optional - default = False
403
- :param int lock_timeout_sec: optional
404
- :return:
405
- """
406
- local_path = os.path.join(local_path, "json")
407
- zip_filepath = None
408
- # only if json folder does not exist or exist and overwrite
409
- if not os.path.isdir(os.path.join(local_path, 'json')) or overwrite:
410
- # create local path to download and save to
411
- if not os.path.isdir(local_path):
412
- os.makedirs(local_path)
413
-
414
- try:
415
- payload = dict()
416
- if filters is not None:
417
- payload['itemsQuery'] = filters.prepare()
418
- payload['annotations'] = {
419
- "include": include_annotations_in_output,
420
- "convertSemantic": export_png_files
421
- }
422
- payload['exportVersion'] = export_version
423
- if annotation_filters is not None:
424
- payload['annotationsQuery'] = annotation_filters.prepare()
425
- payload['annotations']['filter'] = filter_output_annotations
426
- if dataset_lock:
427
- payload['datasetLock'] = dataset_lock
428
-
429
- if export_summary:
430
- payload['summary'] = export_summary
431
-
432
- if lock_timeout_sec:
433
- payload['lockTimeoutSec'] = lock_timeout_sec
434
-
435
- success, response = dataset._client_api.gen_request(req_type='post',
436
- path='/datasets/{}/export'.format(dataset.id),
437
- json_req=payload,
438
- headers={'user_query': filters._user_query})
439
- if not success:
440
- raise exceptions.PlatformException(response)
441
- command = entities.Command.from_json(_json=response.json(),
442
- client_api=dataset._client_api)
443
- command = command.wait(timeout=0)
444
- if 'outputItemId' not in command.spec:
445
- raise exceptions.PlatformException(
446
- error='400',
447
- message="outputItemId key is missing in command response: {}".format(response))
448
- item_id = command.spec['outputItemId']
449
- annotation_zip_item = repositories.Items(client_api=dataset._client_api).get(item_id=item_id)
450
- zip_filepath = annotation_zip_item.download(local_path=local_path, export_version=export_version)
451
- # unzipping annotations to directory
452
- if isinstance(zip_filepath, list) or not os.path.isfile(zip_filepath):
453
- raise exceptions.PlatformException(
454
- error='404',
455
- message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
456
- annotation_zip_item.id))
457
- try:
458
- miscellaneous.Zipping.unzip_directory(zip_filename=zip_filepath,
459
- to_directory=local_path)
460
- except Exception as e:
461
- logger.warning("Failed to extract zip file error: {}".format(e))
462
-
463
- finally:
464
- # cleanup
465
- if isinstance(zip_filepath, str) and os.path.isfile(zip_filepath):
466
- os.remove(zip_filepath)
467
-
468
- @staticmethod
469
- def _download_img_annotations(item: entities.Item,
470
- img_filepath,
471
- local_path,
472
- overwrite,
473
- annotation_options,
474
- annotation_filters,
475
- thickness=1,
476
- with_text=False,
477
- alpha=1,
478
- export_version=entities.ExportVersion.V1
479
- ):
480
-
481
- # check if local_path is a file name
482
- _, ext = os.path.splitext(local_path)
483
- if ext:
484
- # take the dir of the file for the annotations save
485
- local_path = os.path.dirname(local_path)
486
-
487
- # fix local path
488
- if local_path.endswith("/items") or local_path.endswith("\\items"):
489
- local_path = os.path.dirname(local_path)
490
-
491
- annotation_rel_path = item.filename[1:]
492
- if img_filepath is not None:
493
- dir_name = os.path.dirname(annotation_rel_path)
494
- base_name = os.path.basename(img_filepath)
495
- annotation_rel_path = os.path.join(dir_name, base_name)
496
-
497
- # find annotations json
498
- annotations_json_filepath = os.path.join(local_path, "json", annotation_rel_path)
499
- if export_version == entities.ExportVersion.V1:
500
- name, _ = os.path.splitext(annotations_json_filepath)
501
- else:
502
- name = annotations_json_filepath
503
- annotations_json_filepath = name + ".json"
504
-
505
- if os.path.isfile(annotations_json_filepath) and annotation_filters is None:
506
- # if exists take from json file
507
- with open(annotations_json_filepath, "r", encoding="utf8") as f:
508
- data = json.load(f)
509
- if "annotations" in data:
510
- data = data["annotations"]
511
- annotations = entities.AnnotationCollection.from_json(_json=data, item=item)
512
- # no need to use the filters here because the annotations were already downloaded with annotation_filters
513
- else:
514
- # if json file doesnt exist get the annotations from platform
515
- annotations = item.annotations.list(filters=annotation_filters)
516
-
517
- # get image shape
518
- is_url_item = item.metadata. \
519
- get('system', dict()). \
520
- get('shebang', dict()). \
521
- get('linkInfo', dict()). \
522
- get('type', None) == 'url'
523
-
524
- if item is not None:
525
- orientation = item.system.get('exif', {}).get('Orientation', 0)
526
- else:
527
- orientation = 0
528
- if item.width is not None and item.height is not None:
529
- if orientation in [5, 6, 7, 8]:
530
- img_shape = (item.width, item.height)
531
- else:
532
- img_shape = (item.height, item.width)
533
- elif ('image' in item.mimetype and img_filepath is not None) or \
534
- (is_url_item and img_filepath is not None):
535
- img_shape = Image.open(img_filepath).size[::-1]
536
- else:
537
- img_shape = (0, 0)
538
-
539
- # download all annotation options
540
- for option in annotation_options:
541
- # get path and create dirs
542
- annotation_filepath = os.path.join(local_path, option, annotation_rel_path)
543
- if not os.path.isdir(os.path.dirname(annotation_filepath)):
544
- os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
545
-
546
- if export_version == entities.ExportVersion.V1:
547
- temp_path, ext = os.path.splitext(annotation_filepath)
548
- else:
549
- temp_path = annotation_filepath
550
-
551
- if option == entities.ViewAnnotationOptions.JSON:
552
- if not os.path.isfile(annotations_json_filepath):
553
- annotations.download(
554
- filepath=annotations_json_filepath,
555
- annotation_format=option,
556
- height=img_shape[0],
557
- width=img_shape[1],
558
- )
559
- elif option in [entities.ViewAnnotationOptions.MASK,
560
- entities.ViewAnnotationOptions.INSTANCE,
561
- entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE,
562
- entities.ViewAnnotationOptions.OBJECT_ID,
563
- entities.ViewAnnotationOptions.VTT]:
564
- if option == entities.ViewAnnotationOptions.VTT:
565
- annotation_filepath = temp_path + ".vtt"
566
- else:
567
- if 'video' in item.mimetype:
568
- annotation_filepath = temp_path + ".mp4"
569
- else:
570
- annotation_filepath = temp_path + ".png"
571
- if not os.path.isfile(annotation_filepath) or overwrite:
572
- # if not exists OR (exists AND overwrite)
573
- if not os.path.exists(os.path.dirname(annotation_filepath)):
574
- # create folder if not exists
575
- os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
576
- if option == entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE and img_filepath is None:
577
- raise PlatformException(
578
- error="1002",
579
- message="Missing image for annotation option dl.ViewAnnotationOptions.ANNOTATION_ON_IMAGE")
580
- annotations.download(
581
- filepath=annotation_filepath,
582
- img_filepath=img_filepath,
583
- annotation_format=option,
584
- height=img_shape[0],
585
- width=img_shape[1],
586
- thickness=thickness,
587
- alpha=alpha,
588
- with_text=with_text,
589
- orientation=orientation
590
- )
591
- else:
592
- raise PlatformException(error="400", message="Unknown annotation option: {}".format(option))
593
-
594
- @staticmethod
595
- def __get_local_filepath(local_path, item, to_items_folder, without_relative_path=None, is_folder=False):
596
- # create paths
597
- _, ext = os.path.splitext(local_path)
598
- if ext and not is_folder:
599
- # local_path is a filename
600
- local_filepath = local_path
601
- local_path = os.path.dirname(local_filepath)
602
- else:
603
- # if directory - get item's filename
604
- if to_items_folder:
605
- local_path = os.path.join(local_path, "items")
606
- elif is_folder:
607
- local_path = os.path.join(local_path, "")
608
- if without_relative_path is not None:
609
- local_filepath = os.path.join(local_path, item.name)
610
- else:
611
- local_filepath = os.path.join(local_path, item.filename[1:])
612
- return local_path, local_filepath
613
-
614
- @staticmethod
615
- def __get_link_source(item):
616
- assert isinstance(item, entities.Item)
617
- if not item.is_fetched:
618
- return item, '', False
619
-
620
- if not item.filename.endswith('.json') or \
621
- item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') != 'link':
622
- return item, '', False
623
-
624
- # recursively get next id link item
625
- while item.filename.endswith('.json') and \
626
- item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
627
- item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'id':
628
- item = item.dataset.items.get(item_id=item.metadata['system']['shebang']['linkInfo']['ref'])
629
-
630
- # check if link
631
- if item.filename.endswith('.json') and \
632
- item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
633
- item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'url':
634
- url = item.metadata['system']['shebang']['linkInfo']['ref']
635
- return item, url, True
636
- else:
637
- return item, '', False
638
-
639
- def __file_validation(self, item, downloaded_file):
640
- res = False
641
- resume = True
642
- if isinstance(downloaded_file, io.BytesIO):
643
- file_size = downloaded_file.getbuffer().nbytes
644
- else:
645
- file_size = os.stat(downloaded_file).st_size
646
- expected_size = item.metadata['system']['size']
647
- size_diff = file_size - expected_size
648
- if size_diff == 0:
649
- res = True
650
- if size_diff > 0:
651
- resume = False
652
- return res, file_size, resume
653
-
654
- def __thread_download(self,
655
- item,
656
- save_locally,
657
- local_path,
658
- to_array,
659
- local_filepath,
660
- overwrite,
661
- annotation_options,
662
- annotation_filters,
663
- chunk_size=8192,
664
- thickness=1,
665
- with_text=False,
666
- alpha=1,
667
- export_version=entities.ExportVersion.V1
668
- ):
669
- """
670
- Get a single item's binary data
671
- Calling this method will returns the item body itself , an image for example with the proper mimetype.
672
-
673
- :param item: Item entity to download
674
- :param save_locally: bool. save to file or return buffer
675
- :param local_path: item local folder to save to.
676
- :param to_array: returns Ndarray when True and local_path = False
677
- :param local_filepath: item local filepath
678
- :param overwrite: overwrite the file is existing
679
- :param annotation_options: download annotations options: list(dl.ViewAnnotationOptions)
680
- :param annotation_filters: Filters entity to filter item's annotation
681
- :param chunk_size: size of chunks to download - optional. default = 8192
682
- :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
683
- :param with_text: optional - add text to annotations, default = False
684
- :param alpha: opacity value [0 1], default 1
685
- :param ExportVersion export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
686
- :return:
687
- """
688
- # check if need to download image binary from platform
689
- need_to_download = True
690
- if save_locally and os.path.isfile(local_filepath):
691
- need_to_download = overwrite
692
-
693
- item, url, is_url = self.__get_link_source(item=item)
694
-
695
- # save as byte stream
696
- data = io.BytesIO()
697
- if need_to_download:
698
- chunk_resume = {0: 0}
699
- start_point = 0
700
- download_done = False
701
- while chunk_resume.get(start_point, '') != 3 and not download_done:
702
- if not is_url:
703
- headers = {'x-dl-sanitize': '0', 'Range': 'bytes={}-'.format(start_point)}
704
- result, response = self.items_repository._client_api.gen_request(req_type="get",
705
- headers=headers,
706
- path="/items/{}/stream".format(
707
- item.id),
708
- stream=True,
709
- dataset_id=item.dataset_id)
710
- if not result:
711
- if os.path.isfile(local_filepath + '.download'):
712
- os.remove(local_filepath + '.download')
713
- raise PlatformException(response)
714
- else:
715
- _, ext = os.path.splitext(item.metadata['system']['shebang']['linkInfo']['ref'].split('?')[0])
716
- if local_filepath:
717
- local_filepath += ext
718
- response = self.get_url_stream(url=url)
719
-
720
- if save_locally:
721
- # save to file
722
- if not os.path.exists(os.path.dirname(local_filepath)):
723
- # create folder if not exists
724
- os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
725
-
726
- # decide if create progress bar for item
727
- total_length = response.headers.get("content-length")
728
- one_file_pbar = None
729
- try:
730
- one_file_progress_bar = total_length is not None and int(
731
- total_length) > 10e6 # size larger than 10 MB
732
- if one_file_progress_bar:
733
- one_file_pbar = tqdm.tqdm(total=int(total_length),
734
- unit='B',
735
- unit_scale=True,
736
- unit_divisor=1024,
737
- position=1,
738
- file=sys.stdout,
739
- disable=self.items_repository._client_api.verbose.disable_progress_bar_download_item,
740
- desc='Download Item')
741
- except Exception as err:
742
- one_file_progress_bar = False
743
- logger.debug('Cant decide downloaded file length, bar will not be presented: {}'.format(err))
744
-
745
- # start download
746
- if self.items_repository._client_api.sdk_cache.use_cache and \
747
- self.items_repository._client_api.cache is not None:
748
- response_output = os.path.normpath(response.content)
749
- if isinstance(response_output, bytes):
750
- response_output = response_output.decode('utf-8')[1:-1]
751
-
752
- if os.path.isfile(os.path.normpath(response_output)):
753
- if response_output != local_filepath:
754
- source_path = os.path.normpath(response_output)
755
- shutil.copyfile(source_path, local_filepath)
756
- else:
757
- try:
758
- temp_file_path = local_filepath + '.download'
759
- with open(temp_file_path, "ab") as f:
760
- try:
761
- for chunk in response.iter_content(chunk_size=chunk_size):
762
- if chunk: # filter out keep-alive new chunks
763
- f.write(chunk)
764
- if one_file_progress_bar:
765
- one_file_pbar.update(len(chunk))
766
- except Exception as err:
767
- pass
768
-
769
- file_validation = True
770
- if not is_url:
771
- file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
772
- download_progress=temp_file_path,
773
- chunk_resume=chunk_resume)
774
- if file_validation:
775
- shutil.move(temp_file_path, local_filepath)
776
- download_done = True
777
- except Exception as err:
778
- if os.path.isfile(temp_file_path):
779
- os.remove(temp_file_path)
780
- raise err
781
- if one_file_progress_bar:
782
- one_file_pbar.close()
783
- # save to output variable
784
- data = local_filepath
785
- # if image - can download annotation mask
786
- if item.annotated and annotation_options:
787
- self._download_img_annotations(item=item,
788
- img_filepath=local_filepath,
789
- annotation_options=annotation_options,
790
- annotation_filters=annotation_filters,
791
- local_path=local_path,
792
- overwrite=overwrite,
793
- thickness=thickness,
794
- alpha=alpha,
795
- with_text=with_text,
796
- export_version=export_version
797
- )
798
- else:
799
- if self.items_repository._client_api.sdk_cache.use_cache and \
800
- self.items_repository._client_api.cache is not None:
801
- response_output = os.path.normpath(response.content)
802
- if isinstance(response_output, bytes):
803
- response_output = response_output.decode('utf-8')[1:-1]
804
-
805
- if os.path.isfile(response_output):
806
- source_file = response_output
807
- with open(source_file, 'wb') as f:
808
- data = f.read()
809
- else:
810
- try:
811
- for chunk in response.iter_content(chunk_size=chunk_size):
812
- if chunk: # filter out keep-alive new chunks
813
- data.write(chunk)
814
-
815
- file_validation = True
816
- if not is_url:
817
- file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
818
- download_progress=data,
819
- chunk_resume=chunk_resume)
820
- if file_validation:
821
- download_done = True
822
- else:
823
- continue
824
- except Exception as err:
825
- raise err
826
- # go back to the beginning of the stream
827
- data.seek(0)
828
- data.name = item.name
829
- if not save_locally and to_array:
830
- if 'image' not in item.mimetype and not is_url:
831
- raise PlatformException(
832
- error="400",
833
- message='Download element type numpy.ndarray support for image only. '
834
- 'Item Id: {} is {} type'.format(item.id, item.mimetype))
835
-
836
- data = np.array(Image.open(data))
837
- else:
838
- data = local_filepath
839
- return data
840
-
841
- def __get_next_chunk(self, item, download_progress, chunk_resume):
842
- size_validation, file_size, resume = self.__file_validation(item=item,
843
- downloaded_file=download_progress)
844
- start_point = file_size
845
- if not size_validation:
846
- if chunk_resume.get(start_point, None) is None:
847
- chunk_resume = {start_point: 1}
848
- else:
849
- chunk_resume[start_point] += 1
850
- if chunk_resume[start_point] == 3 or not resume:
851
- raise PlatformException(
852
- error=500,
853
- message='The downloaded file is corrupted. Please try again. If the issue repeats please contact support.')
854
- return size_validation, start_point, chunk_resume
855
-
856
- def __default_local_path(self):
857
-
858
- # create default local path
859
- if self.items_repository._dataset is None:
860
- local_path = os.path.join(
861
- self.items_repository._client_api.sdk_cache.cache_path_bin,
862
- "items",
863
- )
864
- else:
865
- if self.items_repository.dataset._project is None:
866
- # by dataset name
867
- local_path = os.path.join(
868
- self.items_repository._client_api.sdk_cache.cache_path_bin,
869
- "datasets",
870
- "{}_{}".format(self.items_repository.dataset.name, self.items_repository.dataset.id),
871
- )
872
- else:
873
- # by dataset and project name
874
- local_path = os.path.join(
875
- self.items_repository._client_api.sdk_cache.cache_path_bin,
876
- "projects",
877
- self.items_repository.dataset.project.name,
878
- "datasets",
879
- self.items_repository.dataset.name,
880
- )
881
- logger.info("Downloading to: {}".format(local_path))
882
- return local_path
883
-
884
- @staticmethod
885
- def get_url_stream(url):
886
- """
887
- :param url:
888
- """
889
- # This will download the binaries from the URL user provided
890
- prepared_request = requests.Request(method='GET', url=url).prepare()
891
- with requests.Session() as s:
892
- retry = Retry(
893
- total=3,
894
- read=3,
895
- connect=3,
896
- backoff_factor=1,
897
- )
898
- adapter = HTTPAdapter(max_retries=retry)
899
- s.mount('http://', adapter)
900
- s.mount('https://', adapter)
901
- response = s.send(request=prepared_request, stream=True)
902
-
903
- return response
1
+ from pathlib import Path
2
+ from requests.adapters import HTTPAdapter
3
+ from urllib3.util import Retry
4
+ from PIL import Image
5
+ import numpy as np
6
+ import traceback
7
+ from urllib.parse import urlparse, unquote
8
+ import requests
9
+ import logging
10
+ import shutil
11
+ import json
12
+ import tqdm
13
+ import sys
14
+ import os
15
+ import io
16
+
17
+ from .. import entities, repositories, miscellaneous, PlatformException, exceptions
18
+ from ..services import Reporter
19
+
20
+ logger = logging.getLogger(name='dtlpy')
21
+
22
+ NUM_TRIES = 3 # try to download 3 time before fail on item
23
+
24
+
25
+ class Downloader:
26
+ def __init__(self, items_repository):
27
+ self.items_repository = items_repository
28
+
29
+ def download(self,
30
+ # filter options
31
+ filters: entities.Filters = None,
32
+ items=None,
33
+ # download options
34
+ local_path=None,
35
+ file_types=None,
36
+ save_locally=True,
37
+ to_array=False,
38
+ overwrite=False,
39
+ annotation_filters: entities.Filters = None,
40
+ annotation_options: entities.ViewAnnotationOptions = None,
41
+ to_items_folder=True,
42
+ thickness=1,
43
+ with_text=False,
44
+ without_relative_path=None,
45
+ avoid_unnecessary_annotation_download=False,
46
+ include_annotations_in_output=True,
47
+ export_png_files=False,
48
+ filter_output_annotations=False,
49
+ alpha=1,
50
+ export_version=entities.ExportVersion.V1,
51
+ dataset_lock=False,
52
+ lock_timeout_sec=None,
53
+ export_summary=False,
54
+ raise_on_error=False
55
+ ):
56
+ """
57
+ Download dataset by filters.
58
+ Filtering the dataset for items and save them local
59
+ Optional - also download annotation, mask, instance and image mask of the item
60
+
61
+ :param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
62
+ :param items: download Item entity or item_id (or a list of item)
63
+ :param local_path: local folder or filename to save to.
64
+ :param file_types: a list of file type to download. e.g ['video/webm', 'video/mp4', 'image/jpeg', 'image/png']
65
+ :param save_locally: bool. save to disk or return a buffer
66
+ :param to_array: returns Ndarray when True and local_path = False
67
+ :param overwrite: optional - default = False
68
+ :param annotation_options: download annotations options. options: list(dl.ViewAnnotationOptions)
69
+ :param annotation_filters: Filters entity to filter annotations for download
70
+ :param to_items_folder: Create 'items' folder and download items to it
71
+ :param with_text: optional - add text to annotations, default = False
72
+ :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
73
+ :param without_relative_path: bool - download items without the relative path from platform
74
+ :param avoid_unnecessary_annotation_download: DEPRECATED only items and annotations in filters are downloaded
75
+ :param include_annotations_in_output: default - False , if export should contain annotations
76
+ :param export_png_files: default - True, if semantic annotations should be exported as png files
77
+ :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
78
+ :param alpha: opacity value [0 1], default 1
79
+ :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
80
+ :param bool dataset_lock: optional - default = False
81
+ :param bool export_summary: optional - default = False
82
+ :param int lock_timeout_sec: optional
83
+ :param bool raise_on_error: raise an exception if an error occurs
84
+ :return: Output (list)
85
+ """
86
+
87
+ ###################
88
+ # Default options #
89
+ ###################
90
+ # annotation options
91
+ if annotation_options is None:
92
+ annotation_options = list()
93
+ elif not isinstance(annotation_options, list):
94
+ annotation_options = [annotation_options]
95
+ for ann_option in annotation_options:
96
+ if not isinstance(ann_option, entities.ViewAnnotationOptions):
97
+ if ann_option not in list(entities.ViewAnnotationOptions):
98
+ raise PlatformException(
99
+ error='400',
100
+ message='Unknown annotation download option: {}, please choose from: {}'.format(
101
+ ann_option, list(entities.ViewAnnotationOptions)))
102
+ # normalize items argument: treat empty list as “no items specified”
103
+ if isinstance(items, list) and len(items) == 0:
104
+ items = None
105
+ #####################
106
+ # items to download #
107
+ #####################
108
+ if items is not None:
109
+ # convert input to a list
110
+ if not isinstance(items, list):
111
+ items = [items]
112
+ # get items by id
113
+ if isinstance(items[0], str):
114
+ items = [self.items_repository.get(item_id=item_id) for item_id in items]
115
+ elif isinstance(items[0], entities.Item):
116
+ pass
117
+ else:
118
+ raise PlatformException(
119
+ error="400",
120
+ message='Unknown items type to download. Expecting str or Item entities. Got "{}" instead'.format(
121
+ type(items[0])
122
+ )
123
+ )
124
+ # create filters to download annotations
125
+ filters = entities.Filters(field='id',
126
+ values=[item.id for item in items],
127
+ operator=entities.FiltersOperations.IN)
128
+ filters._user_query = 'false'
129
+
130
+ # convert to list of list (like pages and page)
131
+ items_to_download = [items]
132
+ num_items = len(items)
133
+ else:
134
+ # filters
135
+ if filters is None:
136
+ filters = entities.Filters()
137
+ filters._user_query = 'false'
138
+ # file types
139
+ if file_types is not None:
140
+ filters.add(field='metadata.system.mimetype', values=file_types, operator=entities.FiltersOperations.IN)
141
+ if annotation_filters is not None:
142
+ if len(annotation_filters.and_filter_list) > 0 or len(annotation_filters.or_filter_list) > 0:
143
+ for annotation_filter_and in annotation_filters.and_filter_list:
144
+ filters.add_join(field=annotation_filter_and.field,
145
+ values=annotation_filter_and.values,
146
+ operator=annotation_filter_and.operator,
147
+ method=entities.FiltersMethod.AND)
148
+ for annotation_filter_or in annotation_filters.or_filter_list:
149
+ filters.add_join(field=annotation_filter_or.field,
150
+ values=annotation_filter_or.values,
151
+ operator=annotation_filter_or.operator,
152
+ method=entities.FiltersMethod.OR)
153
+ elif annotation_filters.custom_filter is not None:
154
+ annotation_query_dict = annotation_filters.prepare()
155
+ items_query_dict = filters.prepare()
156
+ items_query_dict["join"] = annotation_query_dict
157
+ filters.reset()
158
+ filters.custom_filter = items_query_dict
159
+
160
+ else:
161
+ annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
162
+ filters._user_query = 'false'
163
+
164
+ items_to_download = self.items_repository.list(filters=filters)
165
+ num_items = items_to_download.items_count
166
+
167
+ if num_items == 0:
168
+ logger.warning('No items found! Nothing was downloaded')
169
+ return list()
170
+
171
+ ##############
172
+ # local path #
173
+ ##############
174
+ is_folder = False
175
+ if local_path is None:
176
+ # create default local path
177
+ local_path = self.__default_local_path()
178
+
179
+ if os.path.isdir(local_path):
180
+ logger.info('Local folder already exists:{}. merge/overwrite according to "overwrite option"'.format(
181
+ local_path))
182
+ is_folder = True
183
+ else:
184
+ # check if filename
185
+ _, ext = os.path.splitext(local_path)
186
+ if num_items > 1:
187
+ is_folder = True
188
+ else:
189
+ item_to_download = items_to_download[0][0]
190
+ file_name = item_to_download.name
191
+ _, ext_download = os.path.splitext(file_name)
192
+ if ext_download != ext:
193
+ is_folder = True
194
+ if is_folder and save_locally:
195
+ path_to_create = local_path
196
+ if local_path.endswith('*'):
197
+ path_to_create = os.path.dirname(local_path)
198
+ logger.info("Creating new directory for download: {}".format(path_to_create))
199
+ os.makedirs(path_to_create, exist_ok=True)
200
+
201
+ ####################
202
+ # annotations json #
203
+ ####################
204
+ # download annotations' json files in a new thread
205
+ # items will start downloading and if json not exists yet - will download for each file
206
+ if num_items > 1 and annotation_options:
207
+ # a new folder named 'json' will be created under the "local_path"
208
+ logger.info("Downloading annotations formats: {}".format(annotation_options))
209
+ self.download_annotations(**{
210
+ "dataset": self.items_repository.dataset,
211
+ "filters": filters,
212
+ "annotation_filters": annotation_filters,
213
+ "local_path": local_path,
214
+ 'overwrite': overwrite,
215
+ 'include_annotations_in_output': include_annotations_in_output,
216
+ 'export_png_files': export_png_files,
217
+ 'filter_output_annotations': filter_output_annotations,
218
+ 'export_version': export_version,
219
+ 'dataset_lock': dataset_lock,
220
+ 'lock_timeout_sec': lock_timeout_sec,
221
+ 'export_summary': export_summary
222
+ })
223
+ ###############
224
+ # downloading #
225
+ ###############
226
+ # create result lists
227
+ client_api = self.items_repository._client_api
228
+
229
+ reporter = Reporter(num_workers=num_items,
230
+ resource=Reporter.ITEMS_DOWNLOAD,
231
+ print_error_logs=client_api.verbose.print_error_logs,
232
+ client_api=client_api)
233
+ jobs = [None for _ in range(num_items)]
234
+ # pool
235
+ pool = client_api.thread_pools(pool_name='item.download')
236
+ # download
237
+ pbar = tqdm.tqdm(total=num_items, disable=client_api.verbose.disable_progress_bar_download_dataset, file=sys.stdout,
238
+ desc='Download Items')
239
+ try:
240
+ i_item = 0
241
+ for page in items_to_download:
242
+ for item in page:
243
+ if item.type == "dir":
244
+ continue
245
+ if save_locally:
246
+ # get local file path
247
+ item_local_path, item_local_filepath = self.__get_local_filepath(
248
+ local_path=local_path,
249
+ without_relative_path=without_relative_path,
250
+ item=item,
251
+ to_items_folder=to_items_folder,
252
+ is_folder=is_folder)
253
+
254
+ if os.path.isfile(item_local_filepath) and not overwrite:
255
+ logger.debug("File Exists: {}".format(item_local_filepath))
256
+ reporter.set_index(ref=item.id, status='exist', output=item_local_filepath, success=True)
257
+ pbar.update()
258
+ if annotation_options and item.annotated:
259
+ # download annotations only
260
+ jobs[i_item] = pool.submit(
261
+ self._download_img_annotations,
262
+ **{
263
+ "item": item,
264
+ "img_filepath": item_local_filepath,
265
+ "overwrite": overwrite,
266
+ "annotation_options": annotation_options,
267
+ "annotation_filters": annotation_filters,
268
+ "local_path": item_local_path,
269
+ "thickness": thickness,
270
+ "alpha": alpha,
271
+ "with_text": with_text,
272
+ "export_version": export_version,
273
+ },
274
+ )
275
+ i_item += 1
276
+ continue
277
+ else:
278
+ item_local_path = None
279
+ item_local_filepath = None
280
+
281
+ # download single item
282
+ jobs[i_item] = pool.submit(
283
+ self.__thread_download_wrapper,
284
+ **{
285
+ "i_item": i_item,
286
+ "item": item,
287
+ "item_local_path": item_local_path,
288
+ "item_local_filepath": item_local_filepath,
289
+ "save_locally": save_locally,
290
+ "to_array": to_array,
291
+ "annotation_options": annotation_options,
292
+ "annotation_filters": annotation_filters,
293
+ "reporter": reporter,
294
+ "pbar": pbar,
295
+ "overwrite": overwrite,
296
+ "thickness": thickness,
297
+ "alpha": alpha,
298
+ "with_text": with_text,
299
+ "export_version": export_version
300
+ },
301
+ )
302
+ i_item += 1
303
+ except Exception:
304
+ logger.exception('Error downloading:')
305
+ finally:
306
+ _ = [j.result() for j in jobs if j is not None]
307
+ pbar.close()
308
+ # reporting
309
+ n_download = reporter.status_count(status='download')
310
+ n_exist = reporter.status_count(status='exist')
311
+ n_error = reporter.status_count(status='error')
312
+ logger.info("Number of files downloaded:{}".format(n_download))
313
+ logger.info("Number of files exists: {}".format(n_exist))
314
+ logger.info("Total number of files: {}".format(n_download + n_exist))
315
+
316
+ # log error
317
+ if n_error > 0:
318
+ log_filepath = reporter.generate_log_files()
319
+ # Get up to 5 error examples for the exception message
320
+ error_text = ""
321
+ error_counter = 0
322
+ if reporter._errors:
323
+ for _id, error in reporter._errors.items():
324
+ error_counter += 1
325
+ error_text += f"Item ID: {_id}, Error: {error} | "
326
+ if error_counter >= 5:
327
+ break
328
+ error_message = f"Errors in {n_error} files. Errors: {error_text}"
329
+ if log_filepath is not None:
330
+ error_message += f", see {log_filepath} for full log"
331
+ if raise_on_error is True:
332
+ raise PlatformException(
333
+ error="400", message=error_message
334
+ )
335
+ else:
336
+ logger.warning(error_message)
337
+ if int(n_download) <= 1 and int(n_exist) <= 1:
338
+ try:
339
+ return next(reporter.output)
340
+ except StopIteration:
341
+ return None
342
+ return reporter.output
343
+
344
+ def __thread_download_wrapper(self, i_item,
345
+ # item params
346
+ item, item_local_path, item_local_filepath,
347
+ save_locally, to_array, overwrite,
348
+ # annotations params
349
+ annotation_options, annotation_filters, with_text, thickness,
350
+ # threading params
351
+ reporter, pbar, alpha, export_version):
352
+
353
+ download = None
354
+ err = None
355
+ trace = None
356
+ for i_try in range(NUM_TRIES):
357
+ try:
358
+ logger.debug("Download item: {path}. Try {i}/{n}. Starting..".format(path=item.filename,
359
+ i=i_try + 1,
360
+ n=NUM_TRIES))
361
+ download = self.__thread_download(item=item,
362
+ save_locally=save_locally,
363
+ to_array=to_array,
364
+ local_path=item_local_path,
365
+ local_filepath=item_local_filepath,
366
+ annotation_options=annotation_options,
367
+ annotation_filters=annotation_filters,
368
+ overwrite=overwrite,
369
+ thickness=thickness,
370
+ alpha=alpha,
371
+ with_text=with_text,
372
+ export_version=export_version)
373
+ logger.debug("Download item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=item.filename,
374
+ i=i_try + 1,
375
+ n=NUM_TRIES,
376
+ id=item.id))
377
+ if download is not None:
378
+ break
379
+ except Exception as e:
380
+ logger.debug("Download item: {path}. Try {i}/{n}. Fail.".format(path=item.filename,
381
+ i=i_try + 1,
382
+ n=NUM_TRIES))
383
+ err = e
384
+ trace = traceback.format_exc()
385
+ pbar.update()
386
+ if download is None:
387
+ if err is None:
388
+ err = self.items_repository._client_api.platform_exception
389
+ reporter.set_index(status="error", ref=item.id, success=False,
390
+ error="{}\n{}".format(err, trace))
391
+ else:
392
+ reporter.set_index(ref=item.id, status="download", output=download, success=True)
393
+
394
+ @staticmethod
395
+ def download_annotations(dataset: entities.Dataset,
396
+ local_path: str,
397
+ filters: entities.Filters = None,
398
+ annotation_filters: entities.Filters = None,
399
+ overwrite=False,
400
+ include_annotations_in_output=True,
401
+ export_png_files=False,
402
+ filter_output_annotations=False,
403
+ export_version=entities.ExportVersion.V1,
404
+ dataset_lock=False,
405
+ lock_timeout_sec=None,
406
+ export_summary=False
407
+ ):
408
+ """
409
+ Download annotations json for entire dataset
410
+
411
+ :param dataset: Dataset entity
412
+ :param local_path:
413
+ :param dtlpy.entities.filters.Filters filters: dl.Filters entity to filters items
414
+ :param annotation_filters: dl.Filters entity to filters items' annotations
415
+ :param overwrite: optional - overwrite annotations if exist, default = false
416
+ :param include_annotations_in_output: default - True , if export should contain annotations
417
+ :param export_png_files: default - if True, semantic annotations should be exported as png files
418
+ :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
419
+ :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
420
+ :param bool dataset_lock: optional - default = False
421
+ :param bool export_summary: optional - default = False
422
+ :param int lock_timeout_sec: optional
423
+ :return:
424
+ """
425
+ local_path = os.path.join(local_path, "json")
426
+ zip_filepath = None
427
+ # only if json folder does not exist or exist and overwrite
428
+ if not os.path.isdir(os.path.join(local_path, 'json')) or overwrite:
429
+ # create local path to download and save to
430
+ if not os.path.isdir(local_path):
431
+ os.makedirs(local_path)
432
+
433
+ try:
434
+ payload = dict()
435
+ if filters is not None:
436
+ payload['itemsQuery'] = filters.prepare()
437
+ payload['annotations'] = {
438
+ "include": include_annotations_in_output,
439
+ "convertSemantic": export_png_files
440
+ }
441
+ payload['exportVersion'] = export_version
442
+ if annotation_filters is not None:
443
+ payload['annotationsQuery'] = annotation_filters.prepare()
444
+ payload['annotations']['filter'] = filter_output_annotations
445
+ if dataset_lock:
446
+ payload['datasetLock'] = dataset_lock
447
+
448
+ if export_summary:
449
+ payload['summary'] = export_summary
450
+
451
+ if lock_timeout_sec:
452
+ payload['lockTimeoutSec'] = lock_timeout_sec
453
+
454
+ success, response = dataset._client_api.gen_request(req_type='post',
455
+ path='/datasets/{}/export'.format(dataset.id),
456
+ json_req=payload,
457
+ headers={'user_query': filters._user_query})
458
+ if not success:
459
+ raise exceptions.PlatformException(response)
460
+ command = entities.Command.from_json(_json=response.json(),
461
+ client_api=dataset._client_api)
462
+ command = command.wait(timeout=0)
463
+ if 'outputItemId' not in command.spec:
464
+ raise exceptions.PlatformException(
465
+ error='400',
466
+ message="outputItemId key is missing in command response: {}".format(response))
467
+ item_id = command.spec['outputItemId']
468
+ annotation_zip_item = repositories.Items(client_api=dataset._client_api).get(item_id=item_id)
469
+ zip_filepath = annotation_zip_item.download(local_path=local_path, export_version=export_version)
470
+ # unzipping annotations to directory
471
+ if isinstance(zip_filepath, list) or not os.path.isfile(zip_filepath):
472
+ raise exceptions.PlatformException(
473
+ error='404',
474
+ message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
475
+ annotation_zip_item.id))
476
+ try:
477
+ miscellaneous.Zipping.unzip_directory(zip_filename=zip_filepath,
478
+ to_directory=local_path)
479
+ except Exception as e:
480
+ logger.warning("Failed to extract zip file error: {}".format(e))
481
+
482
+ finally:
483
+ # cleanup
484
+ if isinstance(zip_filepath, str) and os.path.isfile(zip_filepath):
485
+ os.remove(zip_filepath)
486
+
487
+ @staticmethod
488
+ def _download_img_annotations(item: entities.Item,
489
+ img_filepath,
490
+ local_path,
491
+ overwrite,
492
+ annotation_options,
493
+ annotation_filters,
494
+ thickness=1,
495
+ with_text=False,
496
+ alpha=1,
497
+ export_version=entities.ExportVersion.V1
498
+ ):
499
+
500
+ # check if local_path is a file name
501
+ _, ext = os.path.splitext(local_path)
502
+ if ext:
503
+ # take the dir of the file for the annotations save
504
+ local_path = os.path.dirname(local_path)
505
+
506
+ # fix local path
507
+ if local_path.endswith("/items") or local_path.endswith("\\items"):
508
+ local_path = os.path.dirname(local_path)
509
+
510
+ annotation_rel_path = item.filename[1:]
511
+ if img_filepath is not None:
512
+ dir_name = os.path.dirname(annotation_rel_path)
513
+ base_name = os.path.basename(img_filepath)
514
+ annotation_rel_path = os.path.join(dir_name, base_name)
515
+
516
+ # find annotations json
517
+ annotations_json_filepath = os.path.join(local_path, "json", annotation_rel_path)
518
+ if export_version == entities.ExportVersion.V1:
519
+ name, _ = os.path.splitext(annotations_json_filepath)
520
+ else:
521
+ name = annotations_json_filepath
522
+ annotations_json_filepath = name + ".json"
523
+
524
+ if os.path.isfile(annotations_json_filepath) and annotation_filters is None:
525
+ # if exists take from json file
526
+ with open(annotations_json_filepath, "r", encoding="utf8") as f:
527
+ data = json.load(f)
528
+ if "annotations" in data:
529
+ data = data["annotations"]
530
+ annotations = entities.AnnotationCollection.from_json(_json=data, item=item)
531
+ # no need to use the filters here because the annotations were already downloaded with annotation_filters
532
+ else:
533
+ # if json file doesnt exist get the annotations from platform
534
+ annotations = item.annotations.list(filters=annotation_filters)
535
+
536
+ # get image shape
537
+ is_url_item = item.metadata. \
538
+ get('system', dict()). \
539
+ get('shebang', dict()). \
540
+ get('linkInfo', dict()). \
541
+ get('type', None) == 'url'
542
+
543
+ if item is not None:
544
+ orientation = item.system.get('exif', {}).get('Orientation', 0)
545
+ else:
546
+ orientation = 0
547
+ if item.width is not None and item.height is not None:
548
+ if orientation in [5, 6, 7, 8]:
549
+ img_shape = (item.width, item.height)
550
+ else:
551
+ img_shape = (item.height, item.width)
552
+ elif ('image' in item.mimetype and img_filepath is not None) or \
553
+ (is_url_item and img_filepath is not None):
554
+ img_shape = Image.open(img_filepath).size[::-1]
555
+ else:
556
+ img_shape = (0, 0)
557
+
558
+ # download all annotation options
559
+ for option in annotation_options:
560
+ # get path and create dirs
561
+ annotation_filepath = os.path.join(local_path, option, annotation_rel_path)
562
+ if not os.path.isdir(os.path.dirname(annotation_filepath)):
563
+ os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
564
+
565
+ if export_version == entities.ExportVersion.V1:
566
+ temp_path, ext = os.path.splitext(annotation_filepath)
567
+ else:
568
+ temp_path = annotation_filepath
569
+
570
+ if option == entities.ViewAnnotationOptions.JSON:
571
+ if not os.path.isfile(annotations_json_filepath):
572
+ annotations.download(
573
+ filepath=annotations_json_filepath,
574
+ annotation_format=option,
575
+ height=img_shape[0],
576
+ width=img_shape[1],
577
+ )
578
+ elif option in [entities.ViewAnnotationOptions.MASK,
579
+ entities.ViewAnnotationOptions.INSTANCE,
580
+ entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE,
581
+ entities.ViewAnnotationOptions.OBJECT_ID,
582
+ entities.ViewAnnotationOptions.VTT]:
583
+ if option == entities.ViewAnnotationOptions.VTT:
584
+ annotation_filepath = temp_path + ".vtt"
585
+ else:
586
+ if 'video' in item.mimetype:
587
+ annotation_filepath = temp_path + ".mp4"
588
+ else:
589
+ annotation_filepath = temp_path + ".png"
590
+ if not os.path.isfile(annotation_filepath) or overwrite:
591
+ # if not exists OR (exists AND overwrite)
592
+ if not os.path.exists(os.path.dirname(annotation_filepath)):
593
+ # create folder if not exists
594
+ os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
595
+ if option == entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE and img_filepath is None:
596
+ raise PlatformException(
597
+ error="1002",
598
+ message="Missing image for annotation option dl.ViewAnnotationOptions.ANNOTATION_ON_IMAGE")
599
+ annotations.download(
600
+ filepath=annotation_filepath,
601
+ img_filepath=img_filepath,
602
+ annotation_format=option,
603
+ height=img_shape[0],
604
+ width=img_shape[1],
605
+ thickness=thickness,
606
+ alpha=alpha,
607
+ with_text=with_text,
608
+ orientation=orientation
609
+ )
610
+ else:
611
+ raise PlatformException(error="400", message="Unknown annotation option: {}".format(option))
612
+
613
+ @staticmethod
614
+ def __get_local_filepath(local_path, item, to_items_folder, without_relative_path=None, is_folder=False):
615
+ # create paths
616
+ _, ext = os.path.splitext(local_path)
617
+ if ext and not is_folder:
618
+ # local_path is a filename
619
+ local_filepath = local_path
620
+ local_path = os.path.dirname(local_filepath)
621
+ else:
622
+ # if directory - get item's filename
623
+ if to_items_folder:
624
+ local_path = os.path.join(local_path, "items")
625
+ elif is_folder:
626
+ local_path = os.path.join(local_path, "")
627
+ if without_relative_path is not None:
628
+ local_filepath = os.path.join(local_path, item.name)
629
+ else:
630
+ local_filepath = os.path.join(local_path, item.filename[1:])
631
+ return local_path, local_filepath
632
+
633
+ @staticmethod
634
+ def __get_link_source(item):
635
+ assert isinstance(item, entities.Item)
636
+ if not item.is_fetched:
637
+ return item, '', False
638
+
639
+ if not item.filename.endswith('.json') or \
640
+ item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') != 'link':
641
+ return item, '', False
642
+
643
+ # recursively get next id link item
644
+ while item.filename.endswith('.json') and \
645
+ item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
646
+ item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'id':
647
+ item = item.dataset.items.get(item_id=item.metadata['system']['shebang']['linkInfo']['ref'])
648
+
649
+ # check if link
650
+ if item.filename.endswith('.json') and \
651
+ item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
652
+ item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'url':
653
+ url = item.metadata['system']['shebang']['linkInfo']['ref']
654
+ return item, url, True
655
+ else:
656
+ return item, '', False
657
+
658
+ def __file_validation(self, item, downloaded_file):
659
+ res = False
660
+ resume = True
661
+ if isinstance(downloaded_file, io.BytesIO):
662
+ file_size = downloaded_file.getbuffer().nbytes
663
+ else:
664
+ file_size = os.stat(downloaded_file).st_size
665
+ expected_size = item.metadata['system']['size']
666
+ size_diff = file_size - expected_size
667
+ if size_diff == 0:
668
+ res = True
669
+ if size_diff > 0:
670
+ resume = False
671
+ return res, file_size, resume
672
+
673
+ def __thread_download(self,
674
+ item,
675
+ save_locally,
676
+ local_path,
677
+ to_array,
678
+ local_filepath,
679
+ overwrite,
680
+ annotation_options,
681
+ annotation_filters,
682
+ chunk_size=8192,
683
+ thickness=1,
684
+ with_text=False,
685
+ alpha=1,
686
+ export_version=entities.ExportVersion.V1
687
+ ):
688
+ """
689
+ Get a single item's binary data
690
+ Calling this method will returns the item body itself , an image for example with the proper mimetype.
691
+
692
+ :param item: Item entity to download
693
+ :param save_locally: bool. save to file or return buffer
694
+ :param local_path: item local folder to save to.
695
+ :param to_array: returns Ndarray when True and local_path = False
696
+ :param local_filepath: item local filepath
697
+ :param overwrite: overwrite the file is existing
698
+ :param annotation_options: download annotations options: list(dl.ViewAnnotationOptions)
699
+ :param annotation_filters: Filters entity to filter item's annotation
700
+ :param chunk_size: size of chunks to download - optional. default = 8192
701
+ :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
702
+ :param with_text: optional - add text to annotations, default = False
703
+ :param alpha: opacity value [0 1], default 1
704
+ :param ExportVersion export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
705
+ :return:
706
+ """
707
+ # check if need to download image binary from platform
708
+ need_to_download = True
709
+ if save_locally and os.path.isfile(local_filepath):
710
+ need_to_download = overwrite
711
+
712
+ item, url, is_url = self.__get_link_source(item=item)
713
+ is_local_link = isinstance(url, str) and url.startswith('file://')
714
+
715
+ # save as byte stream
716
+ data = io.BytesIO()
717
+ if need_to_download:
718
+ chunk_resume = {0: 0}
719
+ start_point = 0
720
+ download_done = False
721
+ while chunk_resume.get(start_point, '') != 3 and not download_done:
722
+ if not is_url:
723
+ headers = {'x-dl-sanitize': '0', 'Range': 'bytes={}-'.format(start_point)}
724
+ result, response = self.items_repository._client_api.gen_request(req_type="get",
725
+ headers=headers,
726
+ path="/items/{}/stream".format(
727
+ item.id),
728
+ stream=True,
729
+ dataset_id=item.dataset_id)
730
+ if not result:
731
+ if os.path.isfile(local_filepath + '.download'):
732
+ os.remove(local_filepath + '.download')
733
+ raise PlatformException(response)
734
+ else:
735
+ _, ext = os.path.splitext(item.metadata['system']['shebang']['linkInfo']['ref'].split('?')[0])
736
+ if local_filepath:
737
+ local_filepath += ext
738
+ response = self.get_url_stream(url=url)
739
+
740
+ if save_locally:
741
+ # save to file
742
+ if not os.path.exists(os.path.dirname(local_filepath)):
743
+ # create folder if not exists
744
+ os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
745
+
746
+ # decide if create progress bar for item
747
+ if not is_local_link:
748
+ total_length = response.headers.get("content-length")
749
+ else:
750
+ response.seek(0, 2)
751
+ total_length = response.tell()
752
+ response.seek(0)
753
+ one_file_pbar = None
754
+ try:
755
+ one_file_progress_bar = total_length is not None and int(
756
+ total_length) > 10e6 # size larger than 10 MB
757
+ if one_file_progress_bar:
758
+ one_file_pbar = tqdm.tqdm(total=int(total_length),
759
+ unit='B',
760
+ unit_scale=True,
761
+ unit_divisor=1024,
762
+ position=1,
763
+ file=sys.stdout,
764
+ disable=self.items_repository._client_api.verbose.disable_progress_bar_download_item,
765
+ desc='Download Item')
766
+ except Exception as err:
767
+ one_file_progress_bar = False
768
+ logger.debug('Cant decide downloaded file length, bar will not be presented: {}'.format(err))
769
+
770
+ # start download
771
+ if self.items_repository._client_api.sdk_cache.use_cache and \
772
+ self.items_repository._client_api.cache is not None:
773
+ response_output = os.path.normpath(response.content)
774
+ if isinstance(response_output, bytes):
775
+ response_output = response_output.decode('utf-8')[1:-1]
776
+
777
+ if os.path.isfile(os.path.normpath(response_output)):
778
+ if response_output != local_filepath:
779
+ source_path = os.path.normpath(response_output)
780
+ shutil.copyfile(source_path, local_filepath)
781
+ download_done = True
782
+ else:
783
+ try:
784
+ temp_file_path = local_filepath + '.download'
785
+ with open(temp_file_path, "ab") as f:
786
+ try:
787
+ if is_local_link and isinstance(response, io.BufferedReader):
788
+ generator = iter(lambda: response.read(chunk_size), b'')
789
+ else:
790
+ generator = response.iter_content(chunk_size=chunk_size)
791
+ for chunk in generator:
792
+ if chunk: # filter out keep-alive new chunks
793
+ f.write(chunk)
794
+ if one_file_progress_bar:
795
+ one_file_pbar.update(len(chunk))
796
+ except Exception as err:
797
+ pass
798
+ finally:
799
+ if is_local_link and isinstance(response, io.BufferedReader):
800
+ try:
801
+ response.close()
802
+ except Exception as err:
803
+ pass
804
+
805
+ file_validation = True
806
+ if not is_url:
807
+ file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
808
+ download_progress=temp_file_path,
809
+ chunk_resume=chunk_resume)
810
+ if file_validation:
811
+ shutil.move(temp_file_path, local_filepath)
812
+ download_done = True
813
+ except Exception as err:
814
+ if os.path.isfile(temp_file_path):
815
+ os.remove(temp_file_path)
816
+ raise err
817
+ if one_file_progress_bar:
818
+ one_file_pbar.close()
819
+ # save to output variable
820
+ data = local_filepath
821
+ # if image - can download annotation mask
822
+ if item.annotated and annotation_options:
823
+ self._download_img_annotations(item=item,
824
+ img_filepath=local_filepath,
825
+ annotation_options=annotation_options,
826
+ annotation_filters=annotation_filters,
827
+ local_path=local_path,
828
+ overwrite=overwrite,
829
+ thickness=thickness,
830
+ alpha=alpha,
831
+ with_text=with_text,
832
+ export_version=export_version
833
+ )
834
+ else:
835
+ if self.items_repository._client_api.sdk_cache.use_cache and \
836
+ self.items_repository._client_api.cache is not None:
837
+ response_output = os.path.normpath(response.content)
838
+ if isinstance(response_output, bytes):
839
+ response_output = response_output.decode('utf-8')[1:-1]
840
+
841
+ if os.path.isfile(response_output):
842
+ source_file = response_output
843
+ with open(source_file, 'wb') as f:
844
+ data = f.read()
845
+ download_done = True
846
+ else:
847
+ try:
848
+ if is_local_link and isinstance(response, io.BufferedReader):
849
+ generator = iter(lambda: response.read(chunk_size), b'')
850
+ else:
851
+ generator = response.iter_content(chunk_size=chunk_size)
852
+ for chunk in generator:
853
+ if chunk: # filter out keep-alive new chunks
854
+ data.write(chunk)
855
+
856
+ file_validation = True
857
+ if not is_url:
858
+ file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
859
+ download_progress=data,
860
+ chunk_resume=chunk_resume)
861
+ if file_validation:
862
+ download_done = True
863
+ else:
864
+ continue
865
+ except Exception as err:
866
+ raise err
867
+ finally:
868
+ if is_local_link and isinstance(response, io.BufferedReader):
869
+ try:
870
+ response.close()
871
+ except Exception as err:
872
+ pass
873
+ # go back to the beginning of the stream
874
+ data.seek(0)
875
+ data.name = item.name
876
+ if not save_locally and to_array:
877
+ if 'image' not in item.mimetype and not is_url:
878
+ raise PlatformException(
879
+ error="400",
880
+ message='Download element type numpy.ndarray support for image only. '
881
+ 'Item Id: {} is {} type'.format(item.id, item.mimetype))
882
+
883
+ data = np.array(Image.open(data))
884
+ else:
885
+ data = local_filepath
886
+ return data
887
+
888
+ def __get_next_chunk(self, item, download_progress, chunk_resume):
889
+ size_validation, file_size, resume = self.__file_validation(item=item,
890
+ downloaded_file=download_progress)
891
+ start_point = file_size
892
+ if not size_validation:
893
+ if chunk_resume.get(start_point, None) is None:
894
+ chunk_resume = {start_point: 1}
895
+ else:
896
+ chunk_resume[start_point] += 1
897
+ if chunk_resume[start_point] == 3 or not resume:
898
+ raise PlatformException(
899
+ error=500,
900
+ message='The downloaded file is corrupted. Please try again. If the issue repeats please contact support.')
901
+ return size_validation, start_point, chunk_resume
902
+
903
+ def __default_local_path(self):
904
+
905
+ # create default local path
906
+ if self.items_repository._dataset is None:
907
+ local_path = os.path.join(
908
+ self.items_repository._client_api.sdk_cache.cache_path_bin,
909
+ "items",
910
+ )
911
+ else:
912
+ if self.items_repository.dataset._project is None:
913
+ # by dataset name
914
+ local_path = os.path.join(
915
+ self.items_repository._client_api.sdk_cache.cache_path_bin,
916
+ "datasets",
917
+ "{}_{}".format(self.items_repository.dataset.name, self.items_repository.dataset.id),
918
+ )
919
+ else:
920
+ # by dataset and project name
921
+ local_path = os.path.join(
922
+ self.items_repository._client_api.sdk_cache.cache_path_bin,
923
+ "projects",
924
+ self.items_repository.dataset.project.name,
925
+ "datasets",
926
+ self.items_repository.dataset.name,
927
+ )
928
+ logger.info("Downloading to: {}".format(local_path))
929
+ return local_path
930
+
931
+ @staticmethod
932
+ def get_url_stream(url):
933
+ """
934
+ :param url:
935
+ """
936
+
937
+ if url.startswith('file://'):
938
+ parsed = urlparse(url)
939
+ path = unquote(parsed.path)
940
+ if parsed.netloc:
941
+ path = f"/{parsed.netloc}{path}"
942
+ path = Path(path).expanduser().resolve()
943
+
944
+ if not path.exists():
945
+ raise PlatformException(
946
+ error='404',
947
+ message=f'Local file not found: {url}'
948
+ )
949
+ if not path.is_file():
950
+ raise PlatformException(
951
+ error='400',
952
+ message=f'Path is not a file: {url}'
953
+ )
954
+
955
+ try:
956
+ return io.BufferedReader(io.FileIO(path, 'rb'))
957
+ except PermissionError as e:
958
+ raise PlatformException(
959
+ error='403',
960
+ message=f'Permission denied accessing file: {url}'
961
+ ) from e
962
+
963
+ prepared_request = requests.Request(method='GET', url=url).prepare()
964
+ with requests.Session() as s:
965
+ retry = Retry(
966
+ total=3,
967
+ read=3,
968
+ connect=3,
969
+ backoff_factor=1,
970
+ )
971
+ adapter = HTTPAdapter(max_retries=retry)
972
+ s.mount('http://', adapter)
973
+ s.mount('https://', adapter)
974
+ response = s.send(request=prepared_request, stream=True)
975
+
976
+ return response