dtlpy 1.115.44__py3-none-any.whl → 1.116.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. dtlpy/__init__.py +491 -491
  2. dtlpy/__version__.py +1 -1
  3. dtlpy/assets/__init__.py +26 -26
  4. dtlpy/assets/code_server/config.yaml +2 -2
  5. dtlpy/assets/code_server/installation.sh +24 -24
  6. dtlpy/assets/code_server/launch.json +13 -13
  7. dtlpy/assets/code_server/settings.json +2 -2
  8. dtlpy/assets/main.py +53 -53
  9. dtlpy/assets/main_partial.py +18 -18
  10. dtlpy/assets/mock.json +11 -11
  11. dtlpy/assets/model_adapter.py +83 -83
  12. dtlpy/assets/package.json +61 -61
  13. dtlpy/assets/package_catalog.json +29 -29
  14. dtlpy/assets/package_gitignore +307 -307
  15. dtlpy/assets/service_runners/__init__.py +33 -33
  16. dtlpy/assets/service_runners/converter.py +96 -96
  17. dtlpy/assets/service_runners/multi_method.py +49 -49
  18. dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
  19. dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
  20. dtlpy/assets/service_runners/multi_method_item.py +52 -52
  21. dtlpy/assets/service_runners/multi_method_json.py +52 -52
  22. dtlpy/assets/service_runners/single_method.py +37 -37
  23. dtlpy/assets/service_runners/single_method_annotation.py +43 -43
  24. dtlpy/assets/service_runners/single_method_dataset.py +43 -43
  25. dtlpy/assets/service_runners/single_method_item.py +41 -41
  26. dtlpy/assets/service_runners/single_method_json.py +42 -42
  27. dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
  28. dtlpy/assets/voc_annotation_template.xml +23 -23
  29. dtlpy/caches/base_cache.py +32 -32
  30. dtlpy/caches/cache.py +473 -473
  31. dtlpy/caches/dl_cache.py +201 -201
  32. dtlpy/caches/filesystem_cache.py +89 -89
  33. dtlpy/caches/redis_cache.py +84 -84
  34. dtlpy/dlp/__init__.py +20 -20
  35. dtlpy/dlp/cli_utilities.py +367 -367
  36. dtlpy/dlp/command_executor.py +764 -764
  37. dtlpy/dlp/dlp +1 -1
  38. dtlpy/dlp/dlp.bat +1 -1
  39. dtlpy/dlp/dlp.py +128 -128
  40. dtlpy/dlp/parser.py +651 -651
  41. dtlpy/entities/__init__.py +83 -83
  42. dtlpy/entities/analytic.py +347 -347
  43. dtlpy/entities/annotation.py +1879 -1879
  44. dtlpy/entities/annotation_collection.py +699 -699
  45. dtlpy/entities/annotation_definitions/__init__.py +20 -20
  46. dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
  47. dtlpy/entities/annotation_definitions/box.py +195 -195
  48. dtlpy/entities/annotation_definitions/classification.py +67 -67
  49. dtlpy/entities/annotation_definitions/comparison.py +72 -72
  50. dtlpy/entities/annotation_definitions/cube.py +204 -204
  51. dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
  52. dtlpy/entities/annotation_definitions/description.py +32 -32
  53. dtlpy/entities/annotation_definitions/ellipse.py +124 -124
  54. dtlpy/entities/annotation_definitions/free_text.py +62 -62
  55. dtlpy/entities/annotation_definitions/gis.py +69 -69
  56. dtlpy/entities/annotation_definitions/note.py +139 -139
  57. dtlpy/entities/annotation_definitions/point.py +117 -117
  58. dtlpy/entities/annotation_definitions/polygon.py +182 -182
  59. dtlpy/entities/annotation_definitions/polyline.py +111 -111
  60. dtlpy/entities/annotation_definitions/pose.py +92 -92
  61. dtlpy/entities/annotation_definitions/ref_image.py +86 -86
  62. dtlpy/entities/annotation_definitions/segmentation.py +240 -240
  63. dtlpy/entities/annotation_definitions/subtitle.py +34 -34
  64. dtlpy/entities/annotation_definitions/text.py +85 -85
  65. dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
  66. dtlpy/entities/app.py +220 -220
  67. dtlpy/entities/app_module.py +107 -107
  68. dtlpy/entities/artifact.py +174 -174
  69. dtlpy/entities/assignment.py +399 -399
  70. dtlpy/entities/base_entity.py +214 -214
  71. dtlpy/entities/bot.py +113 -113
  72. dtlpy/entities/codebase.py +292 -292
  73. dtlpy/entities/collection.py +38 -38
  74. dtlpy/entities/command.py +169 -169
  75. dtlpy/entities/compute.py +449 -449
  76. dtlpy/entities/dataset.py +1299 -1299
  77. dtlpy/entities/directory_tree.py +44 -44
  78. dtlpy/entities/dpk.py +470 -470
  79. dtlpy/entities/driver.py +235 -235
  80. dtlpy/entities/execution.py +397 -397
  81. dtlpy/entities/feature.py +124 -124
  82. dtlpy/entities/feature_set.py +145 -145
  83. dtlpy/entities/filters.py +798 -798
  84. dtlpy/entities/gis_item.py +107 -107
  85. dtlpy/entities/integration.py +184 -184
  86. dtlpy/entities/item.py +959 -959
  87. dtlpy/entities/label.py +123 -123
  88. dtlpy/entities/links.py +85 -85
  89. dtlpy/entities/message.py +175 -175
  90. dtlpy/entities/model.py +684 -684
  91. dtlpy/entities/node.py +1005 -1005
  92. dtlpy/entities/ontology.py +810 -803
  93. dtlpy/entities/organization.py +287 -287
  94. dtlpy/entities/package.py +657 -657
  95. dtlpy/entities/package_defaults.py +5 -5
  96. dtlpy/entities/package_function.py +185 -185
  97. dtlpy/entities/package_module.py +113 -113
  98. dtlpy/entities/package_slot.py +118 -118
  99. dtlpy/entities/paged_entities.py +299 -299
  100. dtlpy/entities/pipeline.py +624 -624
  101. dtlpy/entities/pipeline_execution.py +279 -279
  102. dtlpy/entities/project.py +394 -394
  103. dtlpy/entities/prompt_item.py +505 -505
  104. dtlpy/entities/recipe.py +301 -301
  105. dtlpy/entities/reflect_dict.py +102 -102
  106. dtlpy/entities/resource_execution.py +138 -138
  107. dtlpy/entities/service.py +963 -963
  108. dtlpy/entities/service_driver.py +117 -117
  109. dtlpy/entities/setting.py +294 -294
  110. dtlpy/entities/task.py +495 -495
  111. dtlpy/entities/time_series.py +143 -143
  112. dtlpy/entities/trigger.py +426 -426
  113. dtlpy/entities/user.py +118 -118
  114. dtlpy/entities/webhook.py +124 -124
  115. dtlpy/examples/__init__.py +19 -19
  116. dtlpy/examples/add_labels.py +135 -135
  117. dtlpy/examples/add_metadata_to_item.py +21 -21
  118. dtlpy/examples/annotate_items_using_model.py +65 -65
  119. dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
  120. dtlpy/examples/annotations_convert_to_voc.py +9 -9
  121. dtlpy/examples/annotations_convert_to_yolo.py +9 -9
  122. dtlpy/examples/convert_annotation_types.py +51 -51
  123. dtlpy/examples/converter.py +143 -143
  124. dtlpy/examples/copy_annotations.py +22 -22
  125. dtlpy/examples/copy_folder.py +31 -31
  126. dtlpy/examples/create_annotations.py +51 -51
  127. dtlpy/examples/create_video_annotations.py +83 -83
  128. dtlpy/examples/delete_annotations.py +26 -26
  129. dtlpy/examples/filters.py +113 -113
  130. dtlpy/examples/move_item.py +23 -23
  131. dtlpy/examples/play_video_annotation.py +13 -13
  132. dtlpy/examples/show_item_and_mask.py +53 -53
  133. dtlpy/examples/triggers.py +49 -49
  134. dtlpy/examples/upload_batch_of_items.py +20 -20
  135. dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
  136. dtlpy/examples/upload_items_with_modalities.py +43 -43
  137. dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
  138. dtlpy/examples/upload_yolo_format_annotations.py +70 -70
  139. dtlpy/exceptions.py +125 -125
  140. dtlpy/miscellaneous/__init__.py +20 -20
  141. dtlpy/miscellaneous/dict_differ.py +95 -95
  142. dtlpy/miscellaneous/git_utils.py +217 -217
  143. dtlpy/miscellaneous/json_utils.py +14 -14
  144. dtlpy/miscellaneous/list_print.py +105 -105
  145. dtlpy/miscellaneous/zipping.py +130 -130
  146. dtlpy/ml/__init__.py +20 -20
  147. dtlpy/ml/base_feature_extractor_adapter.py +27 -27
  148. dtlpy/ml/base_model_adapter.py +1257 -1230
  149. dtlpy/ml/metrics.py +461 -461
  150. dtlpy/ml/predictions_utils.py +274 -274
  151. dtlpy/ml/summary_writer.py +57 -57
  152. dtlpy/ml/train_utils.py +60 -60
  153. dtlpy/new_instance.py +252 -252
  154. dtlpy/repositories/__init__.py +56 -56
  155. dtlpy/repositories/analytics.py +85 -85
  156. dtlpy/repositories/annotations.py +916 -916
  157. dtlpy/repositories/apps.py +383 -383
  158. dtlpy/repositories/artifacts.py +452 -452
  159. dtlpy/repositories/assignments.py +599 -599
  160. dtlpy/repositories/bots.py +213 -213
  161. dtlpy/repositories/codebases.py +559 -559
  162. dtlpy/repositories/collections.py +332 -332
  163. dtlpy/repositories/commands.py +152 -152
  164. dtlpy/repositories/compositions.py +61 -61
  165. dtlpy/repositories/computes.py +439 -439
  166. dtlpy/repositories/datasets.py +1504 -1504
  167. dtlpy/repositories/downloader.py +976 -923
  168. dtlpy/repositories/dpks.py +433 -433
  169. dtlpy/repositories/drivers.py +482 -482
  170. dtlpy/repositories/executions.py +815 -815
  171. dtlpy/repositories/feature_sets.py +226 -226
  172. dtlpy/repositories/features.py +255 -255
  173. dtlpy/repositories/integrations.py +484 -484
  174. dtlpy/repositories/items.py +912 -912
  175. dtlpy/repositories/messages.py +94 -94
  176. dtlpy/repositories/models.py +1000 -1000
  177. dtlpy/repositories/nodes.py +80 -80
  178. dtlpy/repositories/ontologies.py +511 -511
  179. dtlpy/repositories/organizations.py +525 -525
  180. dtlpy/repositories/packages.py +1941 -1941
  181. dtlpy/repositories/pipeline_executions.py +451 -451
  182. dtlpy/repositories/pipelines.py +640 -640
  183. dtlpy/repositories/projects.py +539 -539
  184. dtlpy/repositories/recipes.py +419 -399
  185. dtlpy/repositories/resource_executions.py +137 -137
  186. dtlpy/repositories/schema.py +120 -120
  187. dtlpy/repositories/service_drivers.py +213 -213
  188. dtlpy/repositories/services.py +1704 -1704
  189. dtlpy/repositories/settings.py +339 -339
  190. dtlpy/repositories/tasks.py +1477 -1477
  191. dtlpy/repositories/times_series.py +278 -278
  192. dtlpy/repositories/triggers.py +536 -536
  193. dtlpy/repositories/upload_element.py +257 -257
  194. dtlpy/repositories/uploader.py +661 -661
  195. dtlpy/repositories/webhooks.py +249 -249
  196. dtlpy/services/__init__.py +22 -22
  197. dtlpy/services/aihttp_retry.py +131 -131
  198. dtlpy/services/api_client.py +1785 -1785
  199. dtlpy/services/api_reference.py +40 -40
  200. dtlpy/services/async_utils.py +133 -133
  201. dtlpy/services/calls_counter.py +44 -44
  202. dtlpy/services/check_sdk.py +68 -68
  203. dtlpy/services/cookie.py +115 -115
  204. dtlpy/services/create_logger.py +156 -156
  205. dtlpy/services/events.py +84 -84
  206. dtlpy/services/logins.py +235 -235
  207. dtlpy/services/reporter.py +256 -256
  208. dtlpy/services/service_defaults.py +91 -91
  209. dtlpy/utilities/__init__.py +20 -20
  210. dtlpy/utilities/annotations/__init__.py +16 -16
  211. dtlpy/utilities/annotations/annotation_converters.py +269 -269
  212. dtlpy/utilities/base_package_runner.py +285 -264
  213. dtlpy/utilities/converter.py +1650 -1650
  214. dtlpy/utilities/dataset_generators/__init__.py +1 -1
  215. dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
  216. dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
  217. dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
  218. dtlpy/utilities/local_development/__init__.py +1 -1
  219. dtlpy/utilities/local_development/local_session.py +179 -179
  220. dtlpy/utilities/reports/__init__.py +2 -2
  221. dtlpy/utilities/reports/figures.py +343 -343
  222. dtlpy/utilities/reports/report.py +71 -71
  223. dtlpy/utilities/videos/__init__.py +17 -17
  224. dtlpy/utilities/videos/video_player.py +598 -598
  225. dtlpy/utilities/videos/videos.py +470 -470
  226. {dtlpy-1.115.44.data → dtlpy-1.116.6.data}/scripts/dlp +1 -1
  227. dtlpy-1.116.6.data/scripts/dlp.bat +2 -0
  228. {dtlpy-1.115.44.data → dtlpy-1.116.6.data}/scripts/dlp.py +128 -128
  229. {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/METADATA +186 -186
  230. dtlpy-1.116.6.dist-info/RECORD +239 -0
  231. {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/WHEEL +1 -1
  232. {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/licenses/LICENSE +200 -200
  233. tests/features/environment.py +551 -551
  234. dtlpy/assets/__pycache__/__init__.cpython-310.pyc +0 -0
  235. dtlpy-1.115.44.data/scripts/dlp.bat +0 -2
  236. dtlpy-1.115.44.dist-info/RECORD +0 -240
  237. {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/entry_points.txt +0 -0
  238. {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/top_level.txt +0 -0
@@ -1,923 +1,976 @@
1
- from requests.adapters import HTTPAdapter
2
- from urllib3.util import Retry
3
- from PIL import Image
4
- import numpy as np
5
- import traceback
6
- import warnings
7
- import requests
8
- import logging
9
- import shutil
10
- import json
11
- import tqdm
12
- import sys
13
- import os
14
- import io
15
-
16
- from .. import entities, repositories, miscellaneous, PlatformException, exceptions
17
- from ..services import Reporter
18
-
19
- logger = logging.getLogger(name='dtlpy')
20
-
21
- NUM_TRIES = 3 # try to download 3 time before fail on item
22
-
23
-
24
- class Downloader:
25
- def __init__(self, items_repository):
26
- self.items_repository = items_repository
27
-
28
- def download(self,
29
- # filter options
30
- filters: entities.Filters = None,
31
- items=None,
32
- # download options
33
- local_path=None,
34
- file_types=None,
35
- save_locally=True,
36
- to_array=False,
37
- overwrite=False,
38
- annotation_filters: entities.Filters = None,
39
- annotation_options: entities.ViewAnnotationOptions = None,
40
- to_items_folder=True,
41
- thickness=1,
42
- with_text=False,
43
- without_relative_path=None,
44
- avoid_unnecessary_annotation_download=False,
45
- include_annotations_in_output=True,
46
- export_png_files=False,
47
- filter_output_annotations=False,
48
- alpha=1,
49
- export_version=entities.ExportVersion.V1,
50
- dataset_lock=False,
51
- lock_timeout_sec=None,
52
- export_summary=False,
53
- raise_on_error=False
54
- ):
55
- """
56
- Download dataset by filters.
57
- Filtering the dataset for items and save them local
58
- Optional - also download annotation, mask, instance and image mask of the item
59
-
60
- :param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
61
- :param items: download Item entity or item_id (or a list of item)
62
- :param local_path: local folder or filename to save to.
63
- :param file_types: a list of file type to download. e.g ['video/webm', 'video/mp4', 'image/jpeg', 'image/png']
64
- :param save_locally: bool. save to disk or return a buffer
65
- :param to_array: returns Ndarray when True and local_path = False
66
- :param overwrite: optional - default = False
67
- :param annotation_options: download annotations options. options: list(dl.ViewAnnotationOptions)
68
- :param annotation_filters: Filters entity to filter annotations for download
69
- :param to_items_folder: Create 'items' folder and download items to it
70
- :param with_text: optional - add text to annotations, default = False
71
- :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
72
- :param without_relative_path: bool - download items without the relative path from platform
73
- :param avoid_unnecessary_annotation_download: DEPRECATED only items and annotations in filters are downloaded
74
- :param include_annotations_in_output: default - False , if export should contain annotations
75
- :param export_png_files: default - True, if semantic annotations should be exported as png files
76
- :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
77
- :param alpha: opacity value [0 1], default 1
78
- :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
79
- :param bool dataset_lock: optional - default = False
80
- :param bool export_summary: optional - default = False
81
- :param int lock_timeout_sec: optional
82
- :param bool raise_on_error: raise an exception if an error occurs
83
- :return: Output (list)
84
- """
85
-
86
- ###################
87
- # Default options #
88
- ###################
89
- # annotation options
90
- if annotation_options is None:
91
- annotation_options = list()
92
- elif not isinstance(annotation_options, list):
93
- annotation_options = [annotation_options]
94
- for ann_option in annotation_options:
95
- if not isinstance(ann_option, entities.ViewAnnotationOptions):
96
- if ann_option not in list(entities.ViewAnnotationOptions):
97
- raise PlatformException(
98
- error='400',
99
- message='Unknown annotation download option: {}, please choose from: {}'.format(
100
- ann_option, list(entities.ViewAnnotationOptions)))
101
- # normalize items argument: treat empty list as “no items specified”
102
- if isinstance(items, list) and len(items) == 0:
103
- items = None
104
- #####################
105
- # items to download #
106
- #####################
107
- if items is not None:
108
- # convert input to a list
109
- if not isinstance(items, list):
110
- items = [items]
111
- # get items by id
112
- if isinstance(items[0], str):
113
- items = [self.items_repository.get(item_id=item_id) for item_id in items]
114
- elif isinstance(items[0], entities.Item):
115
- pass
116
- else:
117
- raise PlatformException(
118
- error="400",
119
- message='Unknown items type to download. Expecting str or Item entities. Got "{}" instead'.format(
120
- type(items[0])
121
- )
122
- )
123
- # create filters to download annotations
124
- filters = entities.Filters(field='id',
125
- values=[item.id for item in items],
126
- operator=entities.FiltersOperations.IN)
127
- filters._user_query = 'false'
128
-
129
- # convert to list of list (like pages and page)
130
- items_to_download = [items]
131
- num_items = len(items)
132
- else:
133
- # filters
134
- if filters is None:
135
- filters = entities.Filters()
136
- filters._user_query = 'false'
137
- # file types
138
- if file_types is not None:
139
- filters.add(field='metadata.system.mimetype', values=file_types, operator=entities.FiltersOperations.IN)
140
- if annotation_filters is not None:
141
- if len(annotation_filters.and_filter_list) > 0 or len(annotation_filters.or_filter_list) > 0:
142
- for annotation_filter_and in annotation_filters.and_filter_list:
143
- filters.add_join(field=annotation_filter_and.field,
144
- values=annotation_filter_and.values,
145
- operator=annotation_filter_and.operator,
146
- method=entities.FiltersMethod.AND)
147
- for annotation_filter_or in annotation_filters.or_filter_list:
148
- filters.add_join(field=annotation_filter_or.field,
149
- values=annotation_filter_or.values,
150
- operator=annotation_filter_or.operator,
151
- method=entities.FiltersMethod.OR)
152
- elif annotation_filters.custom_filter is not None:
153
- annotation_query_dict = annotation_filters.prepare()
154
- items_query_dict = filters.prepare()
155
- items_query_dict["join"] = annotation_query_dict
156
- filters.reset()
157
- filters.custom_filter = items_query_dict
158
-
159
- else:
160
- annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
161
- filters._user_query = 'false'
162
-
163
- items_to_download = self.items_repository.list(filters=filters)
164
- num_items = items_to_download.items_count
165
-
166
- if num_items == 0:
167
- logger.warning('No items found! Nothing was downloaded')
168
- return list()
169
-
170
- ##############
171
- # local path #
172
- ##############
173
- is_folder = False
174
- if local_path is None:
175
- # create default local path
176
- local_path = self.__default_local_path()
177
-
178
- if os.path.isdir(local_path):
179
- logger.info('Local folder already exists:{}. merge/overwrite according to "overwrite option"'.format(
180
- local_path))
181
- is_folder = True
182
- else:
183
- # check if filename
184
- _, ext = os.path.splitext(local_path)
185
- if num_items > 1:
186
- is_folder = True
187
- else:
188
- item_to_download = items_to_download[0][0]
189
- file_name = item_to_download.name
190
- _, ext_download = os.path.splitext(file_name)
191
- if ext_download != ext:
192
- is_folder = True
193
- if is_folder and save_locally:
194
- path_to_create = local_path
195
- if local_path.endswith('*'):
196
- path_to_create = os.path.dirname(local_path)
197
- logger.info("Creating new directory for download: {}".format(path_to_create))
198
- os.makedirs(path_to_create, exist_ok=True)
199
-
200
- ####################
201
- # annotations json #
202
- ####################
203
- # download annotations' json files in a new thread
204
- # items will start downloading and if json not exists yet - will download for each file
205
- if num_items > 1 and annotation_options:
206
- # a new folder named 'json' will be created under the "local_path"
207
- logger.info("Downloading annotations formats: {}".format(annotation_options))
208
- self.download_annotations(**{
209
- "dataset": self.items_repository.dataset,
210
- "filters": filters,
211
- "annotation_filters": annotation_filters,
212
- "local_path": local_path,
213
- 'overwrite': overwrite,
214
- 'include_annotations_in_output': include_annotations_in_output,
215
- 'export_png_files': export_png_files,
216
- 'filter_output_annotations': filter_output_annotations,
217
- 'export_version': export_version,
218
- 'dataset_lock': dataset_lock,
219
- 'lock_timeout_sec': lock_timeout_sec,
220
- 'export_summary': export_summary
221
- })
222
- ###############
223
- # downloading #
224
- ###############
225
- # create result lists
226
- client_api = self.items_repository._client_api
227
-
228
- reporter = Reporter(num_workers=num_items,
229
- resource=Reporter.ITEMS_DOWNLOAD,
230
- print_error_logs=client_api.verbose.print_error_logs,
231
- client_api=client_api)
232
- jobs = [None for _ in range(num_items)]
233
- # pool
234
- pool = client_api.thread_pools(pool_name='item.download')
235
- # download
236
- pbar = tqdm.tqdm(total=num_items, disable=client_api.verbose.disable_progress_bar_download_dataset, file=sys.stdout,
237
- desc='Download Items')
238
- try:
239
- i_item = 0
240
- for page in items_to_download:
241
- for item in page:
242
- if item.type == "dir":
243
- continue
244
- if save_locally:
245
- # get local file path
246
- item_local_path, item_local_filepath = self.__get_local_filepath(
247
- local_path=local_path,
248
- without_relative_path=without_relative_path,
249
- item=item,
250
- to_items_folder=to_items_folder,
251
- is_folder=is_folder)
252
-
253
- if os.path.isfile(item_local_filepath) and not overwrite:
254
- logger.debug("File Exists: {}".format(item_local_filepath))
255
- reporter.set_index(ref=item.id, status='exist', output=item_local_filepath, success=True)
256
- pbar.update()
257
- if annotation_options and item.annotated:
258
- # download annotations only
259
- jobs[i_item] = pool.submit(
260
- self._download_img_annotations,
261
- **{
262
- "item": item,
263
- "img_filepath": item_local_filepath,
264
- "overwrite": overwrite,
265
- "annotation_options": annotation_options,
266
- "annotation_filters": annotation_filters,
267
- "local_path": item_local_path,
268
- "thickness": thickness,
269
- "alpha": alpha,
270
- "with_text": with_text,
271
- "export_version": export_version,
272
- },
273
- )
274
- i_item += 1
275
- continue
276
- else:
277
- item_local_path = None
278
- item_local_filepath = None
279
-
280
- # download single item
281
- jobs[i_item] = pool.submit(
282
- self.__thread_download_wrapper,
283
- **{
284
- "i_item": i_item,
285
- "item": item,
286
- "item_local_path": item_local_path,
287
- "item_local_filepath": item_local_filepath,
288
- "save_locally": save_locally,
289
- "to_array": to_array,
290
- "annotation_options": annotation_options,
291
- "annotation_filters": annotation_filters,
292
- "reporter": reporter,
293
- "pbar": pbar,
294
- "overwrite": overwrite,
295
- "thickness": thickness,
296
- "alpha": alpha,
297
- "with_text": with_text,
298
- "export_version": export_version
299
- },
300
- )
301
- i_item += 1
302
- except Exception:
303
- logger.exception('Error downloading:')
304
- finally:
305
- _ = [j.result() for j in jobs if j is not None]
306
- pbar.close()
307
- # reporting
308
- n_download = reporter.status_count(status='download')
309
- n_exist = reporter.status_count(status='exist')
310
- n_error = reporter.status_count(status='error')
311
- logger.info("Number of files downloaded:{}".format(n_download))
312
- logger.info("Number of files exists: {}".format(n_exist))
313
- logger.info("Total number of files: {}".format(n_download + n_exist))
314
-
315
- # log error
316
- if n_error > 0:
317
- log_filepath = reporter.generate_log_files()
318
- # Get up to 5 error examples for the exception message
319
- error_text = ""
320
- error_counter = 0
321
- if reporter._errors:
322
- for _id, error in reporter._errors.items():
323
- error_counter += 1
324
- error_text += f"Item ID: {_id}, Error: {error} | "
325
- if error_counter >= 5:
326
- break
327
- error_message = f"Errors in {n_error} files. Errors: {error_text}"
328
- if log_filepath is not None:
329
- error_message += f", see {log_filepath} for full log"
330
- if raise_on_error is True:
331
- raise PlatformException(
332
- error="400", message=error_message
333
- )
334
- else:
335
- logger.warning(error_message)
336
- if int(n_download) <= 1 and int(n_exist) <= 1:
337
- try:
338
- return next(reporter.output)
339
- except StopIteration:
340
- return None
341
- return reporter.output
342
-
343
- def __thread_download_wrapper(self, i_item,
344
- # item params
345
- item, item_local_path, item_local_filepath,
346
- save_locally, to_array, overwrite,
347
- # annotations params
348
- annotation_options, annotation_filters, with_text, thickness,
349
- # threading params
350
- reporter, pbar, alpha, export_version):
351
-
352
- download = None
353
- err = None
354
- trace = None
355
- for i_try in range(NUM_TRIES):
356
- try:
357
- logger.debug("Download item: {path}. Try {i}/{n}. Starting..".format(path=item.filename,
358
- i=i_try + 1,
359
- n=NUM_TRIES))
360
- download = self.__thread_download(item=item,
361
- save_locally=save_locally,
362
- to_array=to_array,
363
- local_path=item_local_path,
364
- local_filepath=item_local_filepath,
365
- annotation_options=annotation_options,
366
- annotation_filters=annotation_filters,
367
- overwrite=overwrite,
368
- thickness=thickness,
369
- alpha=alpha,
370
- with_text=with_text,
371
- export_version=export_version)
372
- logger.debug("Download item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=item.filename,
373
- i=i_try + 1,
374
- n=NUM_TRIES,
375
- id=item.id))
376
- if download is not None:
377
- break
378
- except Exception as e:
379
- logger.debug("Download item: {path}. Try {i}/{n}. Fail.".format(path=item.filename,
380
- i=i_try + 1,
381
- n=NUM_TRIES))
382
- err = e
383
- trace = traceback.format_exc()
384
- pbar.update()
385
- if download is None:
386
- if err is None:
387
- err = self.items_repository._client_api.platform_exception
388
- reporter.set_index(status="error", ref=item.id, success=False,
389
- error="{}\n{}".format(err, trace))
390
- else:
391
- reporter.set_index(ref=item.id, status="download", output=download, success=True)
392
-
393
- @staticmethod
394
- def download_annotations(dataset: entities.Dataset,
395
- local_path: str,
396
- filters: entities.Filters = None,
397
- annotation_filters: entities.Filters = None,
398
- overwrite=False,
399
- include_annotations_in_output=True,
400
- export_png_files=False,
401
- filter_output_annotations=False,
402
- export_version=entities.ExportVersion.V1,
403
- dataset_lock=False,
404
- lock_timeout_sec=None,
405
- export_summary=False
406
- ):
407
- """
408
- Download annotations json for entire dataset
409
-
410
- :param dataset: Dataset entity
411
- :param local_path:
412
- :param dtlpy.entities.filters.Filters filters: dl.Filters entity to filters items
413
- :param annotation_filters: dl.Filters entity to filters items' annotations
414
- :param overwrite: optional - overwrite annotations if exist, default = false
415
- :param include_annotations_in_output: default - True , if export should contain annotations
416
- :param export_png_files: default - if True, semantic annotations should be exported as png files
417
- :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
418
- :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
419
- :param bool dataset_lock: optional - default = False
420
- :param bool export_summary: optional - default = False
421
- :param int lock_timeout_sec: optional
422
- :return:
423
- """
424
- local_path = os.path.join(local_path, "json")
425
- zip_filepath = None
426
- # only if json folder does not exist or exist and overwrite
427
- if not os.path.isdir(os.path.join(local_path, 'json')) or overwrite:
428
- # create local path to download and save to
429
- if not os.path.isdir(local_path):
430
- os.makedirs(local_path)
431
-
432
- try:
433
- payload = dict()
434
- if filters is not None:
435
- payload['itemsQuery'] = filters.prepare()
436
- payload['annotations'] = {
437
- "include": include_annotations_in_output,
438
- "convertSemantic": export_png_files
439
- }
440
- payload['exportVersion'] = export_version
441
- if annotation_filters is not None:
442
- payload['annotationsQuery'] = annotation_filters.prepare()
443
- payload['annotations']['filter'] = filter_output_annotations
444
- if dataset_lock:
445
- payload['datasetLock'] = dataset_lock
446
-
447
- if export_summary:
448
- payload['summary'] = export_summary
449
-
450
- if lock_timeout_sec:
451
- payload['lockTimeoutSec'] = lock_timeout_sec
452
-
453
- success, response = dataset._client_api.gen_request(req_type='post',
454
- path='/datasets/{}/export'.format(dataset.id),
455
- json_req=payload,
456
- headers={'user_query': filters._user_query})
457
- if not success:
458
- raise exceptions.PlatformException(response)
459
- command = entities.Command.from_json(_json=response.json(),
460
- client_api=dataset._client_api)
461
- command = command.wait(timeout=0)
462
- if 'outputItemId' not in command.spec:
463
- raise exceptions.PlatformException(
464
- error='400',
465
- message="outputItemId key is missing in command response: {}".format(response))
466
- item_id = command.spec['outputItemId']
467
- annotation_zip_item = repositories.Items(client_api=dataset._client_api).get(item_id=item_id)
468
- zip_filepath = annotation_zip_item.download(local_path=local_path, export_version=export_version)
469
- # unzipping annotations to directory
470
- if isinstance(zip_filepath, list) or not os.path.isfile(zip_filepath):
471
- raise exceptions.PlatformException(
472
- error='404',
473
- message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
474
- annotation_zip_item.id))
475
- try:
476
- miscellaneous.Zipping.unzip_directory(zip_filename=zip_filepath,
477
- to_directory=local_path)
478
- except Exception as e:
479
- logger.warning("Failed to extract zip file error: {}".format(e))
480
-
481
- finally:
482
- # cleanup
483
- if isinstance(zip_filepath, str) and os.path.isfile(zip_filepath):
484
- os.remove(zip_filepath)
485
-
486
- @staticmethod
487
- def _download_img_annotations(item: entities.Item,
488
- img_filepath,
489
- local_path,
490
- overwrite,
491
- annotation_options,
492
- annotation_filters,
493
- thickness=1,
494
- with_text=False,
495
- alpha=1,
496
- export_version=entities.ExportVersion.V1
497
- ):
498
-
499
- # check if local_path is a file name
500
- _, ext = os.path.splitext(local_path)
501
- if ext:
502
- # take the dir of the file for the annotations save
503
- local_path = os.path.dirname(local_path)
504
-
505
- # fix local path
506
- if local_path.endswith("/items") or local_path.endswith("\\items"):
507
- local_path = os.path.dirname(local_path)
508
-
509
- annotation_rel_path = item.filename[1:]
510
- if img_filepath is not None:
511
- dir_name = os.path.dirname(annotation_rel_path)
512
- base_name = os.path.basename(img_filepath)
513
- annotation_rel_path = os.path.join(dir_name, base_name)
514
-
515
- # find annotations json
516
- annotations_json_filepath = os.path.join(local_path, "json", annotation_rel_path)
517
- if export_version == entities.ExportVersion.V1:
518
- name, _ = os.path.splitext(annotations_json_filepath)
519
- else:
520
- name = annotations_json_filepath
521
- annotations_json_filepath = name + ".json"
522
-
523
- if os.path.isfile(annotations_json_filepath) and annotation_filters is None:
524
- # if exists take from json file
525
- with open(annotations_json_filepath, "r", encoding="utf8") as f:
526
- data = json.load(f)
527
- if "annotations" in data:
528
- data = data["annotations"]
529
- annotations = entities.AnnotationCollection.from_json(_json=data, item=item)
530
- # no need to use the filters here because the annotations were already downloaded with annotation_filters
531
- else:
532
- # if json file doesnt exist get the annotations from platform
533
- annotations = item.annotations.list(filters=annotation_filters)
534
-
535
- # get image shape
536
- is_url_item = item.metadata. \
537
- get('system', dict()). \
538
- get('shebang', dict()). \
539
- get('linkInfo', dict()). \
540
- get('type', None) == 'url'
541
-
542
- if item is not None:
543
- orientation = item.system.get('exif', {}).get('Orientation', 0)
544
- else:
545
- orientation = 0
546
- if item.width is not None and item.height is not None:
547
- if orientation in [5, 6, 7, 8]:
548
- img_shape = (item.width, item.height)
549
- else:
550
- img_shape = (item.height, item.width)
551
- elif ('image' in item.mimetype and img_filepath is not None) or \
552
- (is_url_item and img_filepath is not None):
553
- img_shape = Image.open(img_filepath).size[::-1]
554
- else:
555
- img_shape = (0, 0)
556
-
557
- # download all annotation options
558
- for option in annotation_options:
559
- # get path and create dirs
560
- annotation_filepath = os.path.join(local_path, option, annotation_rel_path)
561
- if not os.path.isdir(os.path.dirname(annotation_filepath)):
562
- os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
563
-
564
- if export_version == entities.ExportVersion.V1:
565
- temp_path, ext = os.path.splitext(annotation_filepath)
566
- else:
567
- temp_path = annotation_filepath
568
-
569
- if option == entities.ViewAnnotationOptions.JSON:
570
- if not os.path.isfile(annotations_json_filepath):
571
- annotations.download(
572
- filepath=annotations_json_filepath,
573
- annotation_format=option,
574
- height=img_shape[0],
575
- width=img_shape[1],
576
- )
577
- elif option in [entities.ViewAnnotationOptions.MASK,
578
- entities.ViewAnnotationOptions.INSTANCE,
579
- entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE,
580
- entities.ViewAnnotationOptions.OBJECT_ID,
581
- entities.ViewAnnotationOptions.VTT]:
582
- if option == entities.ViewAnnotationOptions.VTT:
583
- annotation_filepath = temp_path + ".vtt"
584
- else:
585
- if 'video' in item.mimetype:
586
- annotation_filepath = temp_path + ".mp4"
587
- else:
588
- annotation_filepath = temp_path + ".png"
589
- if not os.path.isfile(annotation_filepath) or overwrite:
590
- # if not exists OR (exists AND overwrite)
591
- if not os.path.exists(os.path.dirname(annotation_filepath)):
592
- # create folder if not exists
593
- os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
594
- if option == entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE and img_filepath is None:
595
- raise PlatformException(
596
- error="1002",
597
- message="Missing image for annotation option dl.ViewAnnotationOptions.ANNOTATION_ON_IMAGE")
598
- annotations.download(
599
- filepath=annotation_filepath,
600
- img_filepath=img_filepath,
601
- annotation_format=option,
602
- height=img_shape[0],
603
- width=img_shape[1],
604
- thickness=thickness,
605
- alpha=alpha,
606
- with_text=with_text,
607
- orientation=orientation
608
- )
609
- else:
610
- raise PlatformException(error="400", message="Unknown annotation option: {}".format(option))
611
-
612
- @staticmethod
613
- def __get_local_filepath(local_path, item, to_items_folder, without_relative_path=None, is_folder=False):
614
- # create paths
615
- _, ext = os.path.splitext(local_path)
616
- if ext and not is_folder:
617
- # local_path is a filename
618
- local_filepath = local_path
619
- local_path = os.path.dirname(local_filepath)
620
- else:
621
- # if directory - get item's filename
622
- if to_items_folder:
623
- local_path = os.path.join(local_path, "items")
624
- elif is_folder:
625
- local_path = os.path.join(local_path, "")
626
- if without_relative_path is not None:
627
- local_filepath = os.path.join(local_path, item.name)
628
- else:
629
- local_filepath = os.path.join(local_path, item.filename[1:])
630
- return local_path, local_filepath
631
-
632
- @staticmethod
633
- def __get_link_source(item):
634
- assert isinstance(item, entities.Item)
635
- if not item.is_fetched:
636
- return item, '', False
637
-
638
- if not item.filename.endswith('.json') or \
639
- item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') != 'link':
640
- return item, '', False
641
-
642
- # recursively get next id link item
643
- while item.filename.endswith('.json') and \
644
- item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
645
- item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'id':
646
- item = item.dataset.items.get(item_id=item.metadata['system']['shebang']['linkInfo']['ref'])
647
-
648
- # check if link
649
- if item.filename.endswith('.json') and \
650
- item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
651
- item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'url':
652
- url = item.metadata['system']['shebang']['linkInfo']['ref']
653
- return item, url, True
654
- else:
655
- return item, '', False
656
-
657
- def __file_validation(self, item, downloaded_file):
658
- res = False
659
- resume = True
660
- if isinstance(downloaded_file, io.BytesIO):
661
- file_size = downloaded_file.getbuffer().nbytes
662
- else:
663
- file_size = os.stat(downloaded_file).st_size
664
- expected_size = item.metadata['system']['size']
665
- size_diff = file_size - expected_size
666
- if size_diff == 0:
667
- res = True
668
- if size_diff > 0:
669
- resume = False
670
- return res, file_size, resume
671
-
672
- def __thread_download(self,
673
- item,
674
- save_locally,
675
- local_path,
676
- to_array,
677
- local_filepath,
678
- overwrite,
679
- annotation_options,
680
- annotation_filters,
681
- chunk_size=8192,
682
- thickness=1,
683
- with_text=False,
684
- alpha=1,
685
- export_version=entities.ExportVersion.V1
686
- ):
687
- """
688
- Get a single item's binary data
689
- Calling this method will returns the item body itself , an image for example with the proper mimetype.
690
-
691
- :param item: Item entity to download
692
- :param save_locally: bool. save to file or return buffer
693
- :param local_path: item local folder to save to.
694
- :param to_array: returns Ndarray when True and local_path = False
695
- :param local_filepath: item local filepath
696
- :param overwrite: overwrite the file is existing
697
- :param annotation_options: download annotations options: list(dl.ViewAnnotationOptions)
698
- :param annotation_filters: Filters entity to filter item's annotation
699
- :param chunk_size: size of chunks to download - optional. default = 8192
700
- :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
701
- :param with_text: optional - add text to annotations, default = False
702
- :param alpha: opacity value [0 1], default 1
703
- :param ExportVersion export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
704
- :return:
705
- """
706
- # check if need to download image binary from platform
707
- need_to_download = True
708
- if save_locally and os.path.isfile(local_filepath):
709
- need_to_download = overwrite
710
-
711
- item, url, is_url = self.__get_link_source(item=item)
712
-
713
- # save as byte stream
714
- data = io.BytesIO()
715
- if need_to_download:
716
- chunk_resume = {0: 0}
717
- start_point = 0
718
- download_done = False
719
- while chunk_resume.get(start_point, '') != 3 and not download_done:
720
- if not is_url:
721
- headers = {'x-dl-sanitize': '0', 'Range': 'bytes={}-'.format(start_point)}
722
- result, response = self.items_repository._client_api.gen_request(req_type="get",
723
- headers=headers,
724
- path="/items/{}/stream".format(
725
- item.id),
726
- stream=True,
727
- dataset_id=item.dataset_id)
728
- if not result:
729
- if os.path.isfile(local_filepath + '.download'):
730
- os.remove(local_filepath + '.download')
731
- raise PlatformException(response)
732
- else:
733
- _, ext = os.path.splitext(item.metadata['system']['shebang']['linkInfo']['ref'].split('?')[0])
734
- if local_filepath:
735
- local_filepath += ext
736
- response = self.get_url_stream(url=url)
737
-
738
- if save_locally:
739
- # save to file
740
- if not os.path.exists(os.path.dirname(local_filepath)):
741
- # create folder if not exists
742
- os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
743
-
744
- # decide if create progress bar for item
745
- total_length = response.headers.get("content-length")
746
- one_file_pbar = None
747
- try:
748
- one_file_progress_bar = total_length is not None and int(
749
- total_length) > 10e6 # size larger than 10 MB
750
- if one_file_progress_bar:
751
- one_file_pbar = tqdm.tqdm(total=int(total_length),
752
- unit='B',
753
- unit_scale=True,
754
- unit_divisor=1024,
755
- position=1,
756
- file=sys.stdout,
757
- disable=self.items_repository._client_api.verbose.disable_progress_bar_download_item,
758
- desc='Download Item')
759
- except Exception as err:
760
- one_file_progress_bar = False
761
- logger.debug('Cant decide downloaded file length, bar will not be presented: {}'.format(err))
762
-
763
- # start download
764
- if self.items_repository._client_api.sdk_cache.use_cache and \
765
- self.items_repository._client_api.cache is not None:
766
- response_output = os.path.normpath(response.content)
767
- if isinstance(response_output, bytes):
768
- response_output = response_output.decode('utf-8')[1:-1]
769
-
770
- if os.path.isfile(os.path.normpath(response_output)):
771
- if response_output != local_filepath:
772
- source_path = os.path.normpath(response_output)
773
- shutil.copyfile(source_path, local_filepath)
774
- download_done = True
775
- else:
776
- try:
777
- temp_file_path = local_filepath + '.download'
778
- with open(temp_file_path, "ab") as f:
779
- try:
780
- for chunk in response.iter_content(chunk_size=chunk_size):
781
- if chunk: # filter out keep-alive new chunks
782
- f.write(chunk)
783
- if one_file_progress_bar:
784
- one_file_pbar.update(len(chunk))
785
- except Exception as err:
786
- pass
787
-
788
- file_validation = True
789
- if not is_url:
790
- file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
791
- download_progress=temp_file_path,
792
- chunk_resume=chunk_resume)
793
- if file_validation:
794
- shutil.move(temp_file_path, local_filepath)
795
- download_done = True
796
- except Exception as err:
797
- if os.path.isfile(temp_file_path):
798
- os.remove(temp_file_path)
799
- raise err
800
- if one_file_progress_bar:
801
- one_file_pbar.close()
802
- # save to output variable
803
- data = local_filepath
804
- # if image - can download annotation mask
805
- if item.annotated and annotation_options:
806
- self._download_img_annotations(item=item,
807
- img_filepath=local_filepath,
808
- annotation_options=annotation_options,
809
- annotation_filters=annotation_filters,
810
- local_path=local_path,
811
- overwrite=overwrite,
812
- thickness=thickness,
813
- alpha=alpha,
814
- with_text=with_text,
815
- export_version=export_version
816
- )
817
- else:
818
- if self.items_repository._client_api.sdk_cache.use_cache and \
819
- self.items_repository._client_api.cache is not None:
820
- response_output = os.path.normpath(response.content)
821
- if isinstance(response_output, bytes):
822
- response_output = response_output.decode('utf-8')[1:-1]
823
-
824
- if os.path.isfile(response_output):
825
- source_file = response_output
826
- with open(source_file, 'wb') as f:
827
- data = f.read()
828
- download_done = True
829
- else:
830
- try:
831
- for chunk in response.iter_content(chunk_size=chunk_size):
832
- if chunk: # filter out keep-alive new chunks
833
- data.write(chunk)
834
-
835
- file_validation = True
836
- if not is_url:
837
- file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
838
- download_progress=data,
839
- chunk_resume=chunk_resume)
840
- if file_validation:
841
- download_done = True
842
- else:
843
- continue
844
- except Exception as err:
845
- raise err
846
- # go back to the beginning of the stream
847
- data.seek(0)
848
- data.name = item.name
849
- if not save_locally and to_array:
850
- if 'image' not in item.mimetype and not is_url:
851
- raise PlatformException(
852
- error="400",
853
- message='Download element type numpy.ndarray support for image only. '
854
- 'Item Id: {} is {} type'.format(item.id, item.mimetype))
855
-
856
- data = np.array(Image.open(data))
857
- else:
858
- data = local_filepath
859
- return data
860
-
861
- def __get_next_chunk(self, item, download_progress, chunk_resume):
862
- size_validation, file_size, resume = self.__file_validation(item=item,
863
- downloaded_file=download_progress)
864
- start_point = file_size
865
- if not size_validation:
866
- if chunk_resume.get(start_point, None) is None:
867
- chunk_resume = {start_point: 1}
868
- else:
869
- chunk_resume[start_point] += 1
870
- if chunk_resume[start_point] == 3 or not resume:
871
- raise PlatformException(
872
- error=500,
873
- message='The downloaded file is corrupted. Please try again. If the issue repeats please contact support.')
874
- return size_validation, start_point, chunk_resume
875
-
876
- def __default_local_path(self):
877
-
878
- # create default local path
879
- if self.items_repository._dataset is None:
880
- local_path = os.path.join(
881
- self.items_repository._client_api.sdk_cache.cache_path_bin,
882
- "items",
883
- )
884
- else:
885
- if self.items_repository.dataset._project is None:
886
- # by dataset name
887
- local_path = os.path.join(
888
- self.items_repository._client_api.sdk_cache.cache_path_bin,
889
- "datasets",
890
- "{}_{}".format(self.items_repository.dataset.name, self.items_repository.dataset.id),
891
- )
892
- else:
893
- # by dataset and project name
894
- local_path = os.path.join(
895
- self.items_repository._client_api.sdk_cache.cache_path_bin,
896
- "projects",
897
- self.items_repository.dataset.project.name,
898
- "datasets",
899
- self.items_repository.dataset.name,
900
- )
901
- logger.info("Downloading to: {}".format(local_path))
902
- return local_path
903
-
904
- @staticmethod
905
- def get_url_stream(url):
906
- """
907
- :param url:
908
- """
909
- # This will download the binaries from the URL user provided
910
- prepared_request = requests.Request(method='GET', url=url).prepare()
911
- with requests.Session() as s:
912
- retry = Retry(
913
- total=3,
914
- read=3,
915
- connect=3,
916
- backoff_factor=1,
917
- )
918
- adapter = HTTPAdapter(max_retries=retry)
919
- s.mount('http://', adapter)
920
- s.mount('https://', adapter)
921
- response = s.send(request=prepared_request, stream=True)
922
-
923
- return response
1
+ from pathlib import Path
2
+ from requests.adapters import HTTPAdapter
3
+ from urllib3.util import Retry
4
+ from PIL import Image
5
+ import numpy as np
6
+ import traceback
7
+ from urllib.parse import urlparse, unquote
8
+ import requests
9
+ import logging
10
+ import shutil
11
+ import json
12
+ import tqdm
13
+ import sys
14
+ import os
15
+ import io
16
+
17
+ from .. import entities, repositories, miscellaneous, PlatformException, exceptions
18
+ from ..services import Reporter
19
+
20
+ logger = logging.getLogger(name='dtlpy')
21
+
22
+ NUM_TRIES = 3 # try to download 3 time before fail on item
23
+
24
+
25
+ class Downloader:
26
+ def __init__(self, items_repository):
27
+ self.items_repository = items_repository
28
+
29
+ def download(self,
30
+ # filter options
31
+ filters: entities.Filters = None,
32
+ items=None,
33
+ # download options
34
+ local_path=None,
35
+ file_types=None,
36
+ save_locally=True,
37
+ to_array=False,
38
+ overwrite=False,
39
+ annotation_filters: entities.Filters = None,
40
+ annotation_options: entities.ViewAnnotationOptions = None,
41
+ to_items_folder=True,
42
+ thickness=1,
43
+ with_text=False,
44
+ without_relative_path=None,
45
+ avoid_unnecessary_annotation_download=False,
46
+ include_annotations_in_output=True,
47
+ export_png_files=False,
48
+ filter_output_annotations=False,
49
+ alpha=1,
50
+ export_version=entities.ExportVersion.V1,
51
+ dataset_lock=False,
52
+ lock_timeout_sec=None,
53
+ export_summary=False,
54
+ raise_on_error=False
55
+ ):
56
+ """
57
+ Download dataset by filters.
58
+ Filtering the dataset for items and save them local
59
+ Optional - also download annotation, mask, instance and image mask of the item
60
+
61
+ :param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
62
+ :param items: download Item entity or item_id (or a list of item)
63
+ :param local_path: local folder or filename to save to.
64
+ :param file_types: a list of file type to download. e.g ['video/webm', 'video/mp4', 'image/jpeg', 'image/png']
65
+ :param save_locally: bool. save to disk or return a buffer
66
+ :param to_array: returns Ndarray when True and local_path = False
67
+ :param overwrite: optional - default = False
68
+ :param annotation_options: download annotations options. options: list(dl.ViewAnnotationOptions)
69
+ :param annotation_filters: Filters entity to filter annotations for download
70
+ :param to_items_folder: Create 'items' folder and download items to it
71
+ :param with_text: optional - add text to annotations, default = False
72
+ :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
73
+ :param without_relative_path: bool - download items without the relative path from platform
74
+ :param avoid_unnecessary_annotation_download: DEPRECATED only items and annotations in filters are downloaded
75
+ :param include_annotations_in_output: default - False , if export should contain annotations
76
+ :param export_png_files: default - True, if semantic annotations should be exported as png files
77
+ :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
78
+ :param alpha: opacity value [0 1], default 1
79
+ :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
80
+ :param bool dataset_lock: optional - default = False
81
+ :param bool export_summary: optional - default = False
82
+ :param int lock_timeout_sec: optional
83
+ :param bool raise_on_error: raise an exception if an error occurs
84
+ :return: Output (list)
85
+ """
86
+
87
+ ###################
88
+ # Default options #
89
+ ###################
90
+ # annotation options
91
+ if annotation_options is None:
92
+ annotation_options = list()
93
+ elif not isinstance(annotation_options, list):
94
+ annotation_options = [annotation_options]
95
+ for ann_option in annotation_options:
96
+ if not isinstance(ann_option, entities.ViewAnnotationOptions):
97
+ if ann_option not in list(entities.ViewAnnotationOptions):
98
+ raise PlatformException(
99
+ error='400',
100
+ message='Unknown annotation download option: {}, please choose from: {}'.format(
101
+ ann_option, list(entities.ViewAnnotationOptions)))
102
+ # normalize items argument: treat empty list as “no items specified”
103
+ if isinstance(items, list) and len(items) == 0:
104
+ items = None
105
+ #####################
106
+ # items to download #
107
+ #####################
108
+ if items is not None:
109
+ # convert input to a list
110
+ if not isinstance(items, list):
111
+ items = [items]
112
+ # get items by id
113
+ if isinstance(items[0], str):
114
+ items = [self.items_repository.get(item_id=item_id) for item_id in items]
115
+ elif isinstance(items[0], entities.Item):
116
+ pass
117
+ else:
118
+ raise PlatformException(
119
+ error="400",
120
+ message='Unknown items type to download. Expecting str or Item entities. Got "{}" instead'.format(
121
+ type(items[0])
122
+ )
123
+ )
124
+ # create filters to download annotations
125
+ filters = entities.Filters(field='id',
126
+ values=[item.id for item in items],
127
+ operator=entities.FiltersOperations.IN)
128
+ filters._user_query = 'false'
129
+
130
+ # convert to list of list (like pages and page)
131
+ items_to_download = [items]
132
+ num_items = len(items)
133
+ else:
134
+ # filters
135
+ if filters is None:
136
+ filters = entities.Filters()
137
+ filters._user_query = 'false'
138
+ # file types
139
+ if file_types is not None:
140
+ filters.add(field='metadata.system.mimetype', values=file_types, operator=entities.FiltersOperations.IN)
141
+ if annotation_filters is not None:
142
+ if len(annotation_filters.and_filter_list) > 0 or len(annotation_filters.or_filter_list) > 0:
143
+ for annotation_filter_and in annotation_filters.and_filter_list:
144
+ filters.add_join(field=annotation_filter_and.field,
145
+ values=annotation_filter_and.values,
146
+ operator=annotation_filter_and.operator,
147
+ method=entities.FiltersMethod.AND)
148
+ for annotation_filter_or in annotation_filters.or_filter_list:
149
+ filters.add_join(field=annotation_filter_or.field,
150
+ values=annotation_filter_or.values,
151
+ operator=annotation_filter_or.operator,
152
+ method=entities.FiltersMethod.OR)
153
+ elif annotation_filters.custom_filter is not None:
154
+ annotation_query_dict = annotation_filters.prepare()
155
+ items_query_dict = filters.prepare()
156
+ items_query_dict["join"] = annotation_query_dict
157
+ filters.reset()
158
+ filters.custom_filter = items_query_dict
159
+
160
+ else:
161
+ annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
162
+ filters._user_query = 'false'
163
+
164
+ items_to_download = self.items_repository.list(filters=filters)
165
+ num_items = items_to_download.items_count
166
+
167
+ if num_items == 0:
168
+ logger.warning('No items found! Nothing was downloaded')
169
+ return list()
170
+
171
+ ##############
172
+ # local path #
173
+ ##############
174
+ is_folder = False
175
+ if local_path is None:
176
+ # create default local path
177
+ local_path = self.__default_local_path()
178
+
179
+ if os.path.isdir(local_path):
180
+ logger.info('Local folder already exists:{}. merge/overwrite according to "overwrite option"'.format(
181
+ local_path))
182
+ is_folder = True
183
+ else:
184
+ # check if filename
185
+ _, ext = os.path.splitext(local_path)
186
+ if num_items > 1:
187
+ is_folder = True
188
+ else:
189
+ item_to_download = items_to_download[0][0]
190
+ file_name = item_to_download.name
191
+ _, ext_download = os.path.splitext(file_name)
192
+ if ext_download != ext:
193
+ is_folder = True
194
+ if is_folder and save_locally:
195
+ path_to_create = local_path
196
+ if local_path.endswith('*'):
197
+ path_to_create = os.path.dirname(local_path)
198
+ logger.info("Creating new directory for download: {}".format(path_to_create))
199
+ os.makedirs(path_to_create, exist_ok=True)
200
+
201
+ ####################
202
+ # annotations json #
203
+ ####################
204
+ # download annotations' json files in a new thread
205
+ # items will start downloading and if json not exists yet - will download for each file
206
+ if num_items > 1 and annotation_options:
207
+ # a new folder named 'json' will be created under the "local_path"
208
+ logger.info("Downloading annotations formats: {}".format(annotation_options))
209
+ self.download_annotations(**{
210
+ "dataset": self.items_repository.dataset,
211
+ "filters": filters,
212
+ "annotation_filters": annotation_filters,
213
+ "local_path": local_path,
214
+ 'overwrite': overwrite,
215
+ 'include_annotations_in_output': include_annotations_in_output,
216
+ 'export_png_files': export_png_files,
217
+ 'filter_output_annotations': filter_output_annotations,
218
+ 'export_version': export_version,
219
+ 'dataset_lock': dataset_lock,
220
+ 'lock_timeout_sec': lock_timeout_sec,
221
+ 'export_summary': export_summary
222
+ })
223
+ ###############
224
+ # downloading #
225
+ ###############
226
+ # create result lists
227
+ client_api = self.items_repository._client_api
228
+
229
+ reporter = Reporter(num_workers=num_items,
230
+ resource=Reporter.ITEMS_DOWNLOAD,
231
+ print_error_logs=client_api.verbose.print_error_logs,
232
+ client_api=client_api)
233
+ jobs = [None for _ in range(num_items)]
234
+ # pool
235
+ pool = client_api.thread_pools(pool_name='item.download')
236
+ # download
237
+ pbar = tqdm.tqdm(total=num_items, disable=client_api.verbose.disable_progress_bar_download_dataset, file=sys.stdout,
238
+ desc='Download Items')
239
+ try:
240
+ i_item = 0
241
+ for page in items_to_download:
242
+ for item in page:
243
+ if item.type == "dir":
244
+ continue
245
+ if save_locally:
246
+ # get local file path
247
+ item_local_path, item_local_filepath = self.__get_local_filepath(
248
+ local_path=local_path,
249
+ without_relative_path=without_relative_path,
250
+ item=item,
251
+ to_items_folder=to_items_folder,
252
+ is_folder=is_folder)
253
+
254
+ if os.path.isfile(item_local_filepath) and not overwrite:
255
+ logger.debug("File Exists: {}".format(item_local_filepath))
256
+ reporter.set_index(ref=item.id, status='exist', output=item_local_filepath, success=True)
257
+ pbar.update()
258
+ if annotation_options and item.annotated:
259
+ # download annotations only
260
+ jobs[i_item] = pool.submit(
261
+ self._download_img_annotations,
262
+ **{
263
+ "item": item,
264
+ "img_filepath": item_local_filepath,
265
+ "overwrite": overwrite,
266
+ "annotation_options": annotation_options,
267
+ "annotation_filters": annotation_filters,
268
+ "local_path": item_local_path,
269
+ "thickness": thickness,
270
+ "alpha": alpha,
271
+ "with_text": with_text,
272
+ "export_version": export_version,
273
+ },
274
+ )
275
+ i_item += 1
276
+ continue
277
+ else:
278
+ item_local_path = None
279
+ item_local_filepath = None
280
+
281
+ # download single item
282
+ jobs[i_item] = pool.submit(
283
+ self.__thread_download_wrapper,
284
+ **{
285
+ "i_item": i_item,
286
+ "item": item,
287
+ "item_local_path": item_local_path,
288
+ "item_local_filepath": item_local_filepath,
289
+ "save_locally": save_locally,
290
+ "to_array": to_array,
291
+ "annotation_options": annotation_options,
292
+ "annotation_filters": annotation_filters,
293
+ "reporter": reporter,
294
+ "pbar": pbar,
295
+ "overwrite": overwrite,
296
+ "thickness": thickness,
297
+ "alpha": alpha,
298
+ "with_text": with_text,
299
+ "export_version": export_version
300
+ },
301
+ )
302
+ i_item += 1
303
+ except Exception:
304
+ logger.exception('Error downloading:')
305
+ finally:
306
+ _ = [j.result() for j in jobs if j is not None]
307
+ pbar.close()
308
+ # reporting
309
+ n_download = reporter.status_count(status='download')
310
+ n_exist = reporter.status_count(status='exist')
311
+ n_error = reporter.status_count(status='error')
312
+ logger.info("Number of files downloaded:{}".format(n_download))
313
+ logger.info("Number of files exists: {}".format(n_exist))
314
+ logger.info("Total number of files: {}".format(n_download + n_exist))
315
+
316
+ # log error
317
+ if n_error > 0:
318
+ log_filepath = reporter.generate_log_files()
319
+ # Get up to 5 error examples for the exception message
320
+ error_text = ""
321
+ error_counter = 0
322
+ if reporter._errors:
323
+ for _id, error in reporter._errors.items():
324
+ error_counter += 1
325
+ error_text += f"Item ID: {_id}, Error: {error} | "
326
+ if error_counter >= 5:
327
+ break
328
+ error_message = f"Errors in {n_error} files. Errors: {error_text}"
329
+ if log_filepath is not None:
330
+ error_message += f", see {log_filepath} for full log"
331
+ if raise_on_error is True:
332
+ raise PlatformException(
333
+ error="400", message=error_message
334
+ )
335
+ else:
336
+ logger.warning(error_message)
337
+ if int(n_download) <= 1 and int(n_exist) <= 1:
338
+ try:
339
+ return next(reporter.output)
340
+ except StopIteration:
341
+ return None
342
+ return reporter.output
343
+
344
+ def __thread_download_wrapper(self, i_item,
345
+ # item params
346
+ item, item_local_path, item_local_filepath,
347
+ save_locally, to_array, overwrite,
348
+ # annotations params
349
+ annotation_options, annotation_filters, with_text, thickness,
350
+ # threading params
351
+ reporter, pbar, alpha, export_version):
352
+
353
+ download = None
354
+ err = None
355
+ trace = None
356
+ for i_try in range(NUM_TRIES):
357
+ try:
358
+ logger.debug("Download item: {path}. Try {i}/{n}. Starting..".format(path=item.filename,
359
+ i=i_try + 1,
360
+ n=NUM_TRIES))
361
+ download = self.__thread_download(item=item,
362
+ save_locally=save_locally,
363
+ to_array=to_array,
364
+ local_path=item_local_path,
365
+ local_filepath=item_local_filepath,
366
+ annotation_options=annotation_options,
367
+ annotation_filters=annotation_filters,
368
+ overwrite=overwrite,
369
+ thickness=thickness,
370
+ alpha=alpha,
371
+ with_text=with_text,
372
+ export_version=export_version)
373
+ logger.debug("Download item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=item.filename,
374
+ i=i_try + 1,
375
+ n=NUM_TRIES,
376
+ id=item.id))
377
+ if download is not None:
378
+ break
379
+ except Exception as e:
380
+ logger.debug("Download item: {path}. Try {i}/{n}. Fail.".format(path=item.filename,
381
+ i=i_try + 1,
382
+ n=NUM_TRIES))
383
+ err = e
384
+ trace = traceback.format_exc()
385
+ pbar.update()
386
+ if download is None:
387
+ if err is None:
388
+ err = self.items_repository._client_api.platform_exception
389
+ reporter.set_index(status="error", ref=item.id, success=False,
390
+ error="{}\n{}".format(err, trace))
391
+ else:
392
+ reporter.set_index(ref=item.id, status="download", output=download, success=True)
393
+
394
+ @staticmethod
395
+ def download_annotations(dataset: entities.Dataset,
396
+ local_path: str,
397
+ filters: entities.Filters = None,
398
+ annotation_filters: entities.Filters = None,
399
+ overwrite=False,
400
+ include_annotations_in_output=True,
401
+ export_png_files=False,
402
+ filter_output_annotations=False,
403
+ export_version=entities.ExportVersion.V1,
404
+ dataset_lock=False,
405
+ lock_timeout_sec=None,
406
+ export_summary=False
407
+ ):
408
+ """
409
+ Download annotations json for entire dataset
410
+
411
+ :param dataset: Dataset entity
412
+ :param local_path:
413
+ :param dtlpy.entities.filters.Filters filters: dl.Filters entity to filters items
414
+ :param annotation_filters: dl.Filters entity to filters items' annotations
415
+ :param overwrite: optional - overwrite annotations if exist, default = false
416
+ :param include_annotations_in_output: default - True , if export should contain annotations
417
+ :param export_png_files: default - if True, semantic annotations should be exported as png files
418
+ :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
419
+ :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
420
+ :param bool dataset_lock: optional - default = False
421
+ :param bool export_summary: optional - default = False
422
+ :param int lock_timeout_sec: optional
423
+ :return:
424
+ """
425
+ local_path = os.path.join(local_path, "json")
426
+ zip_filepath = None
427
+ # only if json folder does not exist or exist and overwrite
428
+ if not os.path.isdir(os.path.join(local_path, 'json')) or overwrite:
429
+ # create local path to download and save to
430
+ if not os.path.isdir(local_path):
431
+ os.makedirs(local_path)
432
+
433
+ try:
434
+ payload = dict()
435
+ if filters is not None:
436
+ payload['itemsQuery'] = filters.prepare()
437
+ payload['annotations'] = {
438
+ "include": include_annotations_in_output,
439
+ "convertSemantic": export_png_files
440
+ }
441
+ payload['exportVersion'] = export_version
442
+ if annotation_filters is not None:
443
+ payload['annotationsQuery'] = annotation_filters.prepare()
444
+ payload['annotations']['filter'] = filter_output_annotations
445
+ if dataset_lock:
446
+ payload['datasetLock'] = dataset_lock
447
+
448
+ if export_summary:
449
+ payload['summary'] = export_summary
450
+
451
+ if lock_timeout_sec:
452
+ payload['lockTimeoutSec'] = lock_timeout_sec
453
+
454
+ success, response = dataset._client_api.gen_request(req_type='post',
455
+ path='/datasets/{}/export'.format(dataset.id),
456
+ json_req=payload,
457
+ headers={'user_query': filters._user_query})
458
+ if not success:
459
+ raise exceptions.PlatformException(response)
460
+ command = entities.Command.from_json(_json=response.json(),
461
+ client_api=dataset._client_api)
462
+ command = command.wait(timeout=0)
463
+ if 'outputItemId' not in command.spec:
464
+ raise exceptions.PlatformException(
465
+ error='400',
466
+ message="outputItemId key is missing in command response: {}".format(response))
467
+ item_id = command.spec['outputItemId']
468
+ annotation_zip_item = repositories.Items(client_api=dataset._client_api).get(item_id=item_id)
469
+ zip_filepath = annotation_zip_item.download(local_path=local_path, export_version=export_version)
470
+ # unzipping annotations to directory
471
+ if isinstance(zip_filepath, list) or not os.path.isfile(zip_filepath):
472
+ raise exceptions.PlatformException(
473
+ error='404',
474
+ message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
475
+ annotation_zip_item.id))
476
+ try:
477
+ miscellaneous.Zipping.unzip_directory(zip_filename=zip_filepath,
478
+ to_directory=local_path)
479
+ except Exception as e:
480
+ logger.warning("Failed to extract zip file error: {}".format(e))
481
+
482
+ finally:
483
+ # cleanup
484
+ if isinstance(zip_filepath, str) and os.path.isfile(zip_filepath):
485
+ os.remove(zip_filepath)
486
+
487
+ @staticmethod
488
+ def _download_img_annotations(item: entities.Item,
489
+ img_filepath,
490
+ local_path,
491
+ overwrite,
492
+ annotation_options,
493
+ annotation_filters,
494
+ thickness=1,
495
+ with_text=False,
496
+ alpha=1,
497
+ export_version=entities.ExportVersion.V1
498
+ ):
499
+
500
+ # check if local_path is a file name
501
+ _, ext = os.path.splitext(local_path)
502
+ if ext:
503
+ # take the dir of the file for the annotations save
504
+ local_path = os.path.dirname(local_path)
505
+
506
+ # fix local path
507
+ if local_path.endswith("/items") or local_path.endswith("\\items"):
508
+ local_path = os.path.dirname(local_path)
509
+
510
+ annotation_rel_path = item.filename[1:]
511
+ if img_filepath is not None:
512
+ dir_name = os.path.dirname(annotation_rel_path)
513
+ base_name = os.path.basename(img_filepath)
514
+ annotation_rel_path = os.path.join(dir_name, base_name)
515
+
516
+ # find annotations json
517
+ annotations_json_filepath = os.path.join(local_path, "json", annotation_rel_path)
518
+ if export_version == entities.ExportVersion.V1:
519
+ name, _ = os.path.splitext(annotations_json_filepath)
520
+ else:
521
+ name = annotations_json_filepath
522
+ annotations_json_filepath = name + ".json"
523
+
524
+ if os.path.isfile(annotations_json_filepath) and annotation_filters is None:
525
+ # if exists take from json file
526
+ with open(annotations_json_filepath, "r", encoding="utf8") as f:
527
+ data = json.load(f)
528
+ if "annotations" in data:
529
+ data = data["annotations"]
530
+ annotations = entities.AnnotationCollection.from_json(_json=data, item=item)
531
+ # no need to use the filters here because the annotations were already downloaded with annotation_filters
532
+ else:
533
+ # if json file doesnt exist get the annotations from platform
534
+ annotations = item.annotations.list(filters=annotation_filters)
535
+
536
+ # get image shape
537
+ is_url_item = item.metadata. \
538
+ get('system', dict()). \
539
+ get('shebang', dict()). \
540
+ get('linkInfo', dict()). \
541
+ get('type', None) == 'url'
542
+
543
+ if item is not None:
544
+ orientation = item.system.get('exif', {}).get('Orientation', 0)
545
+ else:
546
+ orientation = 0
547
+ if item.width is not None and item.height is not None:
548
+ if orientation in [5, 6, 7, 8]:
549
+ img_shape = (item.width, item.height)
550
+ else:
551
+ img_shape = (item.height, item.width)
552
+ elif ('image' in item.mimetype and img_filepath is not None) or \
553
+ (is_url_item and img_filepath is not None):
554
+ img_shape = Image.open(img_filepath).size[::-1]
555
+ else:
556
+ img_shape = (0, 0)
557
+
558
+ # download all annotation options
559
+ for option in annotation_options:
560
+ # get path and create dirs
561
+ annotation_filepath = os.path.join(local_path, option, annotation_rel_path)
562
+ if not os.path.isdir(os.path.dirname(annotation_filepath)):
563
+ os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
564
+
565
+ if export_version == entities.ExportVersion.V1:
566
+ temp_path, ext = os.path.splitext(annotation_filepath)
567
+ else:
568
+ temp_path = annotation_filepath
569
+
570
+ if option == entities.ViewAnnotationOptions.JSON:
571
+ if not os.path.isfile(annotations_json_filepath):
572
+ annotations.download(
573
+ filepath=annotations_json_filepath,
574
+ annotation_format=option,
575
+ height=img_shape[0],
576
+ width=img_shape[1],
577
+ )
578
+ elif option in [entities.ViewAnnotationOptions.MASK,
579
+ entities.ViewAnnotationOptions.INSTANCE,
580
+ entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE,
581
+ entities.ViewAnnotationOptions.OBJECT_ID,
582
+ entities.ViewAnnotationOptions.VTT]:
583
+ if option == entities.ViewAnnotationOptions.VTT:
584
+ annotation_filepath = temp_path + ".vtt"
585
+ else:
586
+ if 'video' in item.mimetype:
587
+ annotation_filepath = temp_path + ".mp4"
588
+ else:
589
+ annotation_filepath = temp_path + ".png"
590
+ if not os.path.isfile(annotation_filepath) or overwrite:
591
+ # if not exists OR (exists AND overwrite)
592
+ if not os.path.exists(os.path.dirname(annotation_filepath)):
593
+ # create folder if not exists
594
+ os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
595
+ if option == entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE and img_filepath is None:
596
+ raise PlatformException(
597
+ error="1002",
598
+ message="Missing image for annotation option dl.ViewAnnotationOptions.ANNOTATION_ON_IMAGE")
599
+ annotations.download(
600
+ filepath=annotation_filepath,
601
+ img_filepath=img_filepath,
602
+ annotation_format=option,
603
+ height=img_shape[0],
604
+ width=img_shape[1],
605
+ thickness=thickness,
606
+ alpha=alpha,
607
+ with_text=with_text,
608
+ orientation=orientation
609
+ )
610
+ else:
611
+ raise PlatformException(error="400", message="Unknown annotation option: {}".format(option))
612
+
613
+ @staticmethod
614
+ def __get_local_filepath(local_path, item, to_items_folder, without_relative_path=None, is_folder=False):
615
+ # create paths
616
+ _, ext = os.path.splitext(local_path)
617
+ if ext and not is_folder:
618
+ # local_path is a filename
619
+ local_filepath = local_path
620
+ local_path = os.path.dirname(local_filepath)
621
+ else:
622
+ # if directory - get item's filename
623
+ if to_items_folder:
624
+ local_path = os.path.join(local_path, "items")
625
+ elif is_folder:
626
+ local_path = os.path.join(local_path, "")
627
+ if without_relative_path is not None:
628
+ local_filepath = os.path.join(local_path, item.name)
629
+ else:
630
+ local_filepath = os.path.join(local_path, item.filename[1:])
631
+ return local_path, local_filepath
632
+
633
+ @staticmethod
634
+ def __get_link_source(item):
635
+ assert isinstance(item, entities.Item)
636
+ if not item.is_fetched:
637
+ return item, '', False
638
+
639
+ if not item.filename.endswith('.json') or \
640
+ item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') != 'link':
641
+ return item, '', False
642
+
643
+ # recursively get next id link item
644
+ while item.filename.endswith('.json') and \
645
+ item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
646
+ item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'id':
647
+ item = item.dataset.items.get(item_id=item.metadata['system']['shebang']['linkInfo']['ref'])
648
+
649
+ # check if link
650
+ if item.filename.endswith('.json') and \
651
+ item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
652
+ item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'url':
653
+ url = item.metadata['system']['shebang']['linkInfo']['ref']
654
+ return item, url, True
655
+ else:
656
+ return item, '', False
657
+
658
+ def __file_validation(self, item, downloaded_file):
659
+ res = False
660
+ resume = True
661
+ if isinstance(downloaded_file, io.BytesIO):
662
+ file_size = downloaded_file.getbuffer().nbytes
663
+ else:
664
+ file_size = os.stat(downloaded_file).st_size
665
+ expected_size = item.metadata['system']['size']
666
+ size_diff = file_size - expected_size
667
+ if size_diff == 0:
668
+ res = True
669
+ if size_diff > 0:
670
+ resume = False
671
+ return res, file_size, resume
672
+
673
+ def __thread_download(self,
674
+ item,
675
+ save_locally,
676
+ local_path,
677
+ to_array,
678
+ local_filepath,
679
+ overwrite,
680
+ annotation_options,
681
+ annotation_filters,
682
+ chunk_size=8192,
683
+ thickness=1,
684
+ with_text=False,
685
+ alpha=1,
686
+ export_version=entities.ExportVersion.V1
687
+ ):
688
+ """
689
+ Get a single item's binary data
690
+ Calling this method will returns the item body itself , an image for example with the proper mimetype.
691
+
692
+ :param item: Item entity to download
693
+ :param save_locally: bool. save to file or return buffer
694
+ :param local_path: item local folder to save to.
695
+ :param to_array: returns Ndarray when True and local_path = False
696
+ :param local_filepath: item local filepath
697
+ :param overwrite: overwrite the file is existing
698
+ :param annotation_options: download annotations options: list(dl.ViewAnnotationOptions)
699
+ :param annotation_filters: Filters entity to filter item's annotation
700
+ :param chunk_size: size of chunks to download - optional. default = 8192
701
+ :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
702
+ :param with_text: optional - add text to annotations, default = False
703
+ :param alpha: opacity value [0 1], default 1
704
+ :param ExportVersion export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
705
+ :return:
706
+ """
707
+ # check if need to download image binary from platform
708
+ need_to_download = True
709
+ if save_locally and os.path.isfile(local_filepath):
710
+ need_to_download = overwrite
711
+
712
+ item, url, is_url = self.__get_link_source(item=item)
713
+ is_local_link = isinstance(url, str) and url.startswith('file://')
714
+
715
+ # save as byte stream
716
+ data = io.BytesIO()
717
+ if need_to_download:
718
+ chunk_resume = {0: 0}
719
+ start_point = 0
720
+ download_done = False
721
+ while chunk_resume.get(start_point, '') != 3 and not download_done:
722
+ if not is_url:
723
+ headers = {'x-dl-sanitize': '0', 'Range': 'bytes={}-'.format(start_point)}
724
+ result, response = self.items_repository._client_api.gen_request(req_type="get",
725
+ headers=headers,
726
+ path="/items/{}/stream".format(
727
+ item.id),
728
+ stream=True,
729
+ dataset_id=item.dataset_id)
730
+ if not result:
731
+ if os.path.isfile(local_filepath + '.download'):
732
+ os.remove(local_filepath + '.download')
733
+ raise PlatformException(response)
734
+ else:
735
+ _, ext = os.path.splitext(item.metadata['system']['shebang']['linkInfo']['ref'].split('?')[0])
736
+ if local_filepath:
737
+ local_filepath += ext
738
+ response = self.get_url_stream(url=url)
739
+
740
+ if save_locally:
741
+ # save to file
742
+ if not os.path.exists(os.path.dirname(local_filepath)):
743
+ # create folder if not exists
744
+ os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
745
+
746
+ # decide if create progress bar for item
747
+ if not is_local_link:
748
+ total_length = response.headers.get("content-length")
749
+ else:
750
+ response.seek(0, 2)
751
+ total_length = response.tell()
752
+ response.seek(0)
753
+ one_file_pbar = None
754
+ try:
755
+ one_file_progress_bar = total_length is not None and int(
756
+ total_length) > 10e6 # size larger than 10 MB
757
+ if one_file_progress_bar:
758
+ one_file_pbar = tqdm.tqdm(total=int(total_length),
759
+ unit='B',
760
+ unit_scale=True,
761
+ unit_divisor=1024,
762
+ position=1,
763
+ file=sys.stdout,
764
+ disable=self.items_repository._client_api.verbose.disable_progress_bar_download_item,
765
+ desc='Download Item')
766
+ except Exception as err:
767
+ one_file_progress_bar = False
768
+ logger.debug('Cant decide downloaded file length, bar will not be presented: {}'.format(err))
769
+
770
+ # start download
771
+ if self.items_repository._client_api.sdk_cache.use_cache and \
772
+ self.items_repository._client_api.cache is not None:
773
+ response_output = os.path.normpath(response.content)
774
+ if isinstance(response_output, bytes):
775
+ response_output = response_output.decode('utf-8')[1:-1]
776
+
777
+ if os.path.isfile(os.path.normpath(response_output)):
778
+ if response_output != local_filepath:
779
+ source_path = os.path.normpath(response_output)
780
+ shutil.copyfile(source_path, local_filepath)
781
+ download_done = True
782
+ else:
783
+ try:
784
+ temp_file_path = local_filepath + '.download'
785
+ with open(temp_file_path, "ab") as f:
786
+ try:
787
+ if is_local_link and isinstance(response, io.BufferedReader):
788
+ generator = iter(lambda: response.read(chunk_size), b'')
789
+ else:
790
+ generator = response.iter_content(chunk_size=chunk_size)
791
+ for chunk in generator:
792
+ if chunk: # filter out keep-alive new chunks
793
+ f.write(chunk)
794
+ if one_file_progress_bar:
795
+ one_file_pbar.update(len(chunk))
796
+ except Exception as err:
797
+ pass
798
+ finally:
799
+ if is_local_link and isinstance(response, io.BufferedReader):
800
+ try:
801
+ response.close()
802
+ except Exception as err:
803
+ pass
804
+
805
+ file_validation = True
806
+ if not is_url:
807
+ file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
808
+ download_progress=temp_file_path,
809
+ chunk_resume=chunk_resume)
810
+ if file_validation:
811
+ shutil.move(temp_file_path, local_filepath)
812
+ download_done = True
813
+ except Exception as err:
814
+ if os.path.isfile(temp_file_path):
815
+ os.remove(temp_file_path)
816
+ raise err
817
+ if one_file_progress_bar:
818
+ one_file_pbar.close()
819
+ # save to output variable
820
+ data = local_filepath
821
+ # if image - can download annotation mask
822
+ if item.annotated and annotation_options:
823
+ self._download_img_annotations(item=item,
824
+ img_filepath=local_filepath,
825
+ annotation_options=annotation_options,
826
+ annotation_filters=annotation_filters,
827
+ local_path=local_path,
828
+ overwrite=overwrite,
829
+ thickness=thickness,
830
+ alpha=alpha,
831
+ with_text=with_text,
832
+ export_version=export_version
833
+ )
834
+ else:
835
+ if self.items_repository._client_api.sdk_cache.use_cache and \
836
+ self.items_repository._client_api.cache is not None:
837
+ response_output = os.path.normpath(response.content)
838
+ if isinstance(response_output, bytes):
839
+ response_output = response_output.decode('utf-8')[1:-1]
840
+
841
+ if os.path.isfile(response_output):
842
+ source_file = response_output
843
+ with open(source_file, 'wb') as f:
844
+ data = f.read()
845
+ download_done = True
846
+ else:
847
+ try:
848
+ if is_local_link and isinstance(response, io.BufferedReader):
849
+ generator = iter(lambda: response.read(chunk_size), b'')
850
+ else:
851
+ generator = response.iter_content(chunk_size=chunk_size)
852
+ for chunk in generator:
853
+ if chunk: # filter out keep-alive new chunks
854
+ data.write(chunk)
855
+
856
+ file_validation = True
857
+ if not is_url:
858
+ file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
859
+ download_progress=data,
860
+ chunk_resume=chunk_resume)
861
+ if file_validation:
862
+ download_done = True
863
+ else:
864
+ continue
865
+ except Exception as err:
866
+ raise err
867
+ finally:
868
+ if is_local_link and isinstance(response, io.BufferedReader):
869
+ try:
870
+ response.close()
871
+ except Exception as err:
872
+ pass
873
+ # go back to the beginning of the stream
874
+ data.seek(0)
875
+ data.name = item.name
876
+ if not save_locally and to_array:
877
+ if 'image' not in item.mimetype and not is_url:
878
+ raise PlatformException(
879
+ error="400",
880
+ message='Download element type numpy.ndarray support for image only. '
881
+ 'Item Id: {} is {} type'.format(item.id, item.mimetype))
882
+
883
+ data = np.array(Image.open(data))
884
+ else:
885
+ data = local_filepath
886
+ return data
887
+
888
+ def __get_next_chunk(self, item, download_progress, chunk_resume):
889
+ size_validation, file_size, resume = self.__file_validation(item=item,
890
+ downloaded_file=download_progress)
891
+ start_point = file_size
892
+ if not size_validation:
893
+ if chunk_resume.get(start_point, None) is None:
894
+ chunk_resume = {start_point: 1}
895
+ else:
896
+ chunk_resume[start_point] += 1
897
+ if chunk_resume[start_point] == 3 or not resume:
898
+ raise PlatformException(
899
+ error=500,
900
+ message='The downloaded file is corrupted. Please try again. If the issue repeats please contact support.')
901
+ return size_validation, start_point, chunk_resume
902
+
903
+ def __default_local_path(self):
904
+
905
+ # create default local path
906
+ if self.items_repository._dataset is None:
907
+ local_path = os.path.join(
908
+ self.items_repository._client_api.sdk_cache.cache_path_bin,
909
+ "items",
910
+ )
911
+ else:
912
+ if self.items_repository.dataset._project is None:
913
+ # by dataset name
914
+ local_path = os.path.join(
915
+ self.items_repository._client_api.sdk_cache.cache_path_bin,
916
+ "datasets",
917
+ "{}_{}".format(self.items_repository.dataset.name, self.items_repository.dataset.id),
918
+ )
919
+ else:
920
+ # by dataset and project name
921
+ local_path = os.path.join(
922
+ self.items_repository._client_api.sdk_cache.cache_path_bin,
923
+ "projects",
924
+ self.items_repository.dataset.project.name,
925
+ "datasets",
926
+ self.items_repository.dataset.name,
927
+ )
928
+ logger.info("Downloading to: {}".format(local_path))
929
+ return local_path
930
+
931
+ @staticmethod
932
+ def get_url_stream(url):
933
+ """
934
+ :param url:
935
+ """
936
+
937
+ if url.startswith('file://'):
938
+ parsed = urlparse(url)
939
+ path = unquote(parsed.path)
940
+ if parsed.netloc:
941
+ path = f"/{parsed.netloc}{path}"
942
+ path = Path(path).expanduser().resolve()
943
+
944
+ if not path.exists():
945
+ raise PlatformException(
946
+ error='404',
947
+ message=f'Local file not found: {url}'
948
+ )
949
+ if not path.is_file():
950
+ raise PlatformException(
951
+ error='400',
952
+ message=f'Path is not a file: {url}'
953
+ )
954
+
955
+ try:
956
+ return io.BufferedReader(io.FileIO(path, 'rb'))
957
+ except PermissionError as e:
958
+ raise PlatformException(
959
+ error='403',
960
+ message=f'Permission denied accessing file: {url}'
961
+ ) from e
962
+
963
+ prepared_request = requests.Request(method='GET', url=url).prepare()
964
+ with requests.Session() as s:
965
+ retry = Retry(
966
+ total=3,
967
+ read=3,
968
+ connect=3,
969
+ backoff_factor=1,
970
+ )
971
+ adapter = HTTPAdapter(max_retries=retry)
972
+ s.mount('http://', adapter)
973
+ s.mount('https://', adapter)
974
+ response = s.send(request=prepared_request, stream=True)
975
+
976
+ return response