dtlpy 1.115.44__py3-none-any.whl → 1.117.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. dtlpy/__init__.py +491 -491
  2. dtlpy/__version__.py +1 -1
  3. dtlpy/assets/__init__.py +26 -26
  4. dtlpy/assets/code_server/config.yaml +2 -2
  5. dtlpy/assets/code_server/installation.sh +24 -24
  6. dtlpy/assets/code_server/launch.json +13 -13
  7. dtlpy/assets/code_server/settings.json +2 -2
  8. dtlpy/assets/main.py +53 -53
  9. dtlpy/assets/main_partial.py +18 -18
  10. dtlpy/assets/mock.json +11 -11
  11. dtlpy/assets/model_adapter.py +83 -83
  12. dtlpy/assets/package.json +61 -61
  13. dtlpy/assets/package_catalog.json +29 -29
  14. dtlpy/assets/package_gitignore +307 -307
  15. dtlpy/assets/service_runners/__init__.py +33 -33
  16. dtlpy/assets/service_runners/converter.py +96 -96
  17. dtlpy/assets/service_runners/multi_method.py +49 -49
  18. dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
  19. dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
  20. dtlpy/assets/service_runners/multi_method_item.py +52 -52
  21. dtlpy/assets/service_runners/multi_method_json.py +52 -52
  22. dtlpy/assets/service_runners/single_method.py +37 -37
  23. dtlpy/assets/service_runners/single_method_annotation.py +43 -43
  24. dtlpy/assets/service_runners/single_method_dataset.py +43 -43
  25. dtlpy/assets/service_runners/single_method_item.py +41 -41
  26. dtlpy/assets/service_runners/single_method_json.py +42 -42
  27. dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
  28. dtlpy/assets/voc_annotation_template.xml +23 -23
  29. dtlpy/caches/base_cache.py +32 -32
  30. dtlpy/caches/cache.py +473 -473
  31. dtlpy/caches/dl_cache.py +201 -201
  32. dtlpy/caches/filesystem_cache.py +89 -89
  33. dtlpy/caches/redis_cache.py +84 -84
  34. dtlpy/dlp/__init__.py +20 -20
  35. dtlpy/dlp/cli_utilities.py +367 -367
  36. dtlpy/dlp/command_executor.py +764 -764
  37. dtlpy/dlp/dlp +1 -1
  38. dtlpy/dlp/dlp.bat +1 -1
  39. dtlpy/dlp/dlp.py +128 -128
  40. dtlpy/dlp/parser.py +651 -651
  41. dtlpy/entities/__init__.py +83 -83
  42. dtlpy/entities/analytic.py +347 -347
  43. dtlpy/entities/annotation.py +1879 -1879
  44. dtlpy/entities/annotation_collection.py +699 -699
  45. dtlpy/entities/annotation_definitions/__init__.py +20 -20
  46. dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
  47. dtlpy/entities/annotation_definitions/box.py +195 -195
  48. dtlpy/entities/annotation_definitions/classification.py +67 -67
  49. dtlpy/entities/annotation_definitions/comparison.py +72 -72
  50. dtlpy/entities/annotation_definitions/cube.py +204 -204
  51. dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
  52. dtlpy/entities/annotation_definitions/description.py +32 -32
  53. dtlpy/entities/annotation_definitions/ellipse.py +124 -124
  54. dtlpy/entities/annotation_definitions/free_text.py +62 -62
  55. dtlpy/entities/annotation_definitions/gis.py +69 -69
  56. dtlpy/entities/annotation_definitions/note.py +139 -139
  57. dtlpy/entities/annotation_definitions/point.py +117 -117
  58. dtlpy/entities/annotation_definitions/polygon.py +182 -182
  59. dtlpy/entities/annotation_definitions/polyline.py +111 -111
  60. dtlpy/entities/annotation_definitions/pose.py +92 -92
  61. dtlpy/entities/annotation_definitions/ref_image.py +86 -86
  62. dtlpy/entities/annotation_definitions/segmentation.py +240 -240
  63. dtlpy/entities/annotation_definitions/subtitle.py +34 -34
  64. dtlpy/entities/annotation_definitions/text.py +85 -85
  65. dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
  66. dtlpy/entities/app.py +220 -220
  67. dtlpy/entities/app_module.py +107 -107
  68. dtlpy/entities/artifact.py +174 -174
  69. dtlpy/entities/assignment.py +399 -399
  70. dtlpy/entities/base_entity.py +214 -214
  71. dtlpy/entities/bot.py +113 -113
  72. dtlpy/entities/codebase.py +292 -292
  73. dtlpy/entities/collection.py +38 -38
  74. dtlpy/entities/command.py +169 -169
  75. dtlpy/entities/compute.py +449 -449
  76. dtlpy/entities/dataset.py +1299 -1299
  77. dtlpy/entities/directory_tree.py +44 -44
  78. dtlpy/entities/dpk.py +470 -470
  79. dtlpy/entities/driver.py +235 -235
  80. dtlpy/entities/execution.py +397 -397
  81. dtlpy/entities/feature.py +124 -124
  82. dtlpy/entities/feature_set.py +152 -145
  83. dtlpy/entities/filters.py +798 -798
  84. dtlpy/entities/gis_item.py +107 -107
  85. dtlpy/entities/integration.py +184 -184
  86. dtlpy/entities/item.py +975 -959
  87. dtlpy/entities/label.py +123 -123
  88. dtlpy/entities/links.py +85 -85
  89. dtlpy/entities/message.py +175 -175
  90. dtlpy/entities/model.py +684 -684
  91. dtlpy/entities/node.py +1005 -1005
  92. dtlpy/entities/ontology.py +810 -803
  93. dtlpy/entities/organization.py +287 -287
  94. dtlpy/entities/package.py +657 -657
  95. dtlpy/entities/package_defaults.py +5 -5
  96. dtlpy/entities/package_function.py +185 -185
  97. dtlpy/entities/package_module.py +113 -113
  98. dtlpy/entities/package_slot.py +118 -118
  99. dtlpy/entities/paged_entities.py +299 -299
  100. dtlpy/entities/pipeline.py +624 -624
  101. dtlpy/entities/pipeline_execution.py +279 -279
  102. dtlpy/entities/project.py +394 -394
  103. dtlpy/entities/prompt_item.py +505 -505
  104. dtlpy/entities/recipe.py +301 -301
  105. dtlpy/entities/reflect_dict.py +102 -102
  106. dtlpy/entities/resource_execution.py +138 -138
  107. dtlpy/entities/service.py +974 -963
  108. dtlpy/entities/service_driver.py +117 -117
  109. dtlpy/entities/setting.py +294 -294
  110. dtlpy/entities/task.py +495 -495
  111. dtlpy/entities/time_series.py +143 -143
  112. dtlpy/entities/trigger.py +426 -426
  113. dtlpy/entities/user.py +118 -118
  114. dtlpy/entities/webhook.py +124 -124
  115. dtlpy/examples/__init__.py +19 -19
  116. dtlpy/examples/add_labels.py +135 -135
  117. dtlpy/examples/add_metadata_to_item.py +21 -21
  118. dtlpy/examples/annotate_items_using_model.py +65 -65
  119. dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
  120. dtlpy/examples/annotations_convert_to_voc.py +9 -9
  121. dtlpy/examples/annotations_convert_to_yolo.py +9 -9
  122. dtlpy/examples/convert_annotation_types.py +51 -51
  123. dtlpy/examples/converter.py +143 -143
  124. dtlpy/examples/copy_annotations.py +22 -22
  125. dtlpy/examples/copy_folder.py +31 -31
  126. dtlpy/examples/create_annotations.py +51 -51
  127. dtlpy/examples/create_video_annotations.py +83 -83
  128. dtlpy/examples/delete_annotations.py +26 -26
  129. dtlpy/examples/filters.py +113 -113
  130. dtlpy/examples/move_item.py +23 -23
  131. dtlpy/examples/play_video_annotation.py +13 -13
  132. dtlpy/examples/show_item_and_mask.py +53 -53
  133. dtlpy/examples/triggers.py +49 -49
  134. dtlpy/examples/upload_batch_of_items.py +20 -20
  135. dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
  136. dtlpy/examples/upload_items_with_modalities.py +43 -43
  137. dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
  138. dtlpy/examples/upload_yolo_format_annotations.py +70 -70
  139. dtlpy/exceptions.py +125 -125
  140. dtlpy/miscellaneous/__init__.py +20 -20
  141. dtlpy/miscellaneous/dict_differ.py +95 -95
  142. dtlpy/miscellaneous/git_utils.py +217 -217
  143. dtlpy/miscellaneous/json_utils.py +14 -14
  144. dtlpy/miscellaneous/list_print.py +105 -105
  145. dtlpy/miscellaneous/zipping.py +130 -130
  146. dtlpy/ml/__init__.py +20 -20
  147. dtlpy/ml/base_feature_extractor_adapter.py +27 -27
  148. dtlpy/ml/base_model_adapter.py +1287 -1230
  149. dtlpy/ml/metrics.py +461 -461
  150. dtlpy/ml/predictions_utils.py +274 -274
  151. dtlpy/ml/summary_writer.py +57 -57
  152. dtlpy/ml/train_utils.py +60 -60
  153. dtlpy/new_instance.py +252 -252
  154. dtlpy/repositories/__init__.py +56 -56
  155. dtlpy/repositories/analytics.py +85 -85
  156. dtlpy/repositories/annotations.py +916 -916
  157. dtlpy/repositories/apps.py +383 -383
  158. dtlpy/repositories/artifacts.py +452 -452
  159. dtlpy/repositories/assignments.py +599 -599
  160. dtlpy/repositories/bots.py +213 -213
  161. dtlpy/repositories/codebases.py +559 -559
  162. dtlpy/repositories/collections.py +332 -332
  163. dtlpy/repositories/commands.py +152 -152
  164. dtlpy/repositories/compositions.py +61 -61
  165. dtlpy/repositories/computes.py +439 -439
  166. dtlpy/repositories/datasets.py +1585 -1504
  167. dtlpy/repositories/downloader.py +1157 -923
  168. dtlpy/repositories/dpks.py +433 -433
  169. dtlpy/repositories/drivers.py +482 -482
  170. dtlpy/repositories/executions.py +815 -815
  171. dtlpy/repositories/feature_sets.py +256 -226
  172. dtlpy/repositories/features.py +255 -255
  173. dtlpy/repositories/integrations.py +484 -484
  174. dtlpy/repositories/items.py +912 -912
  175. dtlpy/repositories/messages.py +94 -94
  176. dtlpy/repositories/models.py +1000 -1000
  177. dtlpy/repositories/nodes.py +80 -80
  178. dtlpy/repositories/ontologies.py +511 -511
  179. dtlpy/repositories/organizations.py +525 -525
  180. dtlpy/repositories/packages.py +1941 -1941
  181. dtlpy/repositories/pipeline_executions.py +451 -451
  182. dtlpy/repositories/pipelines.py +640 -640
  183. dtlpy/repositories/projects.py +539 -539
  184. dtlpy/repositories/recipes.py +429 -399
  185. dtlpy/repositories/resource_executions.py +137 -137
  186. dtlpy/repositories/schema.py +120 -120
  187. dtlpy/repositories/service_drivers.py +213 -213
  188. dtlpy/repositories/services.py +1704 -1704
  189. dtlpy/repositories/settings.py +339 -339
  190. dtlpy/repositories/tasks.py +1477 -1477
  191. dtlpy/repositories/times_series.py +278 -278
  192. dtlpy/repositories/triggers.py +536 -536
  193. dtlpy/repositories/upload_element.py +257 -257
  194. dtlpy/repositories/uploader.py +661 -661
  195. dtlpy/repositories/webhooks.py +249 -249
  196. dtlpy/services/__init__.py +22 -22
  197. dtlpy/services/aihttp_retry.py +131 -131
  198. dtlpy/services/api_client.py +1786 -1785
  199. dtlpy/services/api_reference.py +40 -40
  200. dtlpy/services/async_utils.py +133 -133
  201. dtlpy/services/calls_counter.py +44 -44
  202. dtlpy/services/check_sdk.py +68 -68
  203. dtlpy/services/cookie.py +115 -115
  204. dtlpy/services/create_logger.py +156 -156
  205. dtlpy/services/events.py +84 -84
  206. dtlpy/services/logins.py +235 -235
  207. dtlpy/services/reporter.py +256 -256
  208. dtlpy/services/service_defaults.py +91 -91
  209. dtlpy/utilities/__init__.py +20 -20
  210. dtlpy/utilities/annotations/__init__.py +16 -16
  211. dtlpy/utilities/annotations/annotation_converters.py +269 -269
  212. dtlpy/utilities/base_package_runner.py +285 -264
  213. dtlpy/utilities/converter.py +1650 -1650
  214. dtlpy/utilities/dataset_generators/__init__.py +1 -1
  215. dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
  216. dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
  217. dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
  218. dtlpy/utilities/local_development/__init__.py +1 -1
  219. dtlpy/utilities/local_development/local_session.py +179 -179
  220. dtlpy/utilities/reports/__init__.py +2 -2
  221. dtlpy/utilities/reports/figures.py +343 -343
  222. dtlpy/utilities/reports/report.py +71 -71
  223. dtlpy/utilities/videos/__init__.py +17 -17
  224. dtlpy/utilities/videos/video_player.py +598 -598
  225. dtlpy/utilities/videos/videos.py +470 -470
  226. {dtlpy-1.115.44.data → dtlpy-1.117.6.data}/scripts/dlp +1 -1
  227. dtlpy-1.117.6.data/scripts/dlp.bat +2 -0
  228. {dtlpy-1.115.44.data → dtlpy-1.117.6.data}/scripts/dlp.py +128 -128
  229. {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/METADATA +186 -186
  230. dtlpy-1.117.6.dist-info/RECORD +239 -0
  231. {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/WHEEL +1 -1
  232. {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/licenses/LICENSE +200 -200
  233. tests/features/environment.py +551 -551
  234. dtlpy/assets/__pycache__/__init__.cpython-310.pyc +0 -0
  235. dtlpy-1.115.44.data/scripts/dlp.bat +0 -2
  236. dtlpy-1.115.44.dist-info/RECORD +0 -240
  237. {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/entry_points.txt +0 -0
  238. {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/top_level.txt +0 -0
@@ -1,923 +1,1157 @@
1
- from requests.adapters import HTTPAdapter
2
- from urllib3.util import Retry
3
- from PIL import Image
4
- import numpy as np
5
- import traceback
6
- import warnings
7
- import requests
8
- import logging
9
- import shutil
10
- import json
11
- import tqdm
12
- import sys
13
- import os
14
- import io
15
-
16
- from .. import entities, repositories, miscellaneous, PlatformException, exceptions
17
- from ..services import Reporter
18
-
19
- logger = logging.getLogger(name='dtlpy')
20
-
21
- NUM_TRIES = 3 # try to download 3 time before fail on item
22
-
23
-
24
- class Downloader:
25
- def __init__(self, items_repository):
26
- self.items_repository = items_repository
27
-
28
- def download(self,
29
- # filter options
30
- filters: entities.Filters = None,
31
- items=None,
32
- # download options
33
- local_path=None,
34
- file_types=None,
35
- save_locally=True,
36
- to_array=False,
37
- overwrite=False,
38
- annotation_filters: entities.Filters = None,
39
- annotation_options: entities.ViewAnnotationOptions = None,
40
- to_items_folder=True,
41
- thickness=1,
42
- with_text=False,
43
- without_relative_path=None,
44
- avoid_unnecessary_annotation_download=False,
45
- include_annotations_in_output=True,
46
- export_png_files=False,
47
- filter_output_annotations=False,
48
- alpha=1,
49
- export_version=entities.ExportVersion.V1,
50
- dataset_lock=False,
51
- lock_timeout_sec=None,
52
- export_summary=False,
53
- raise_on_error=False
54
- ):
55
- """
56
- Download dataset by filters.
57
- Filtering the dataset for items and save them local
58
- Optional - also download annotation, mask, instance and image mask of the item
59
-
60
- :param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
61
- :param items: download Item entity or item_id (or a list of item)
62
- :param local_path: local folder or filename to save to.
63
- :param file_types: a list of file type to download. e.g ['video/webm', 'video/mp4', 'image/jpeg', 'image/png']
64
- :param save_locally: bool. save to disk or return a buffer
65
- :param to_array: returns Ndarray when True and local_path = False
66
- :param overwrite: optional - default = False
67
- :param annotation_options: download annotations options. options: list(dl.ViewAnnotationOptions)
68
- :param annotation_filters: Filters entity to filter annotations for download
69
- :param to_items_folder: Create 'items' folder and download items to it
70
- :param with_text: optional - add text to annotations, default = False
71
- :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
72
- :param without_relative_path: bool - download items without the relative path from platform
73
- :param avoid_unnecessary_annotation_download: DEPRECATED only items and annotations in filters are downloaded
74
- :param include_annotations_in_output: default - False , if export should contain annotations
75
- :param export_png_files: default - True, if semantic annotations should be exported as png files
76
- :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
77
- :param alpha: opacity value [0 1], default 1
78
- :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
79
- :param bool dataset_lock: optional - default = False
80
- :param bool export_summary: optional - default = False
81
- :param int lock_timeout_sec: optional
82
- :param bool raise_on_error: raise an exception if an error occurs
83
- :return: Output (list)
84
- """
85
-
86
- ###################
87
- # Default options #
88
- ###################
89
- # annotation options
90
- if annotation_options is None:
91
- annotation_options = list()
92
- elif not isinstance(annotation_options, list):
93
- annotation_options = [annotation_options]
94
- for ann_option in annotation_options:
95
- if not isinstance(ann_option, entities.ViewAnnotationOptions):
96
- if ann_option not in list(entities.ViewAnnotationOptions):
97
- raise PlatformException(
98
- error='400',
99
- message='Unknown annotation download option: {}, please choose from: {}'.format(
100
- ann_option, list(entities.ViewAnnotationOptions)))
101
- # normalize items argument: treat empty list as “no items specified”
102
- if isinstance(items, list) and len(items) == 0:
103
- items = None
104
- #####################
105
- # items to download #
106
- #####################
107
- if items is not None:
108
- # convert input to a list
109
- if not isinstance(items, list):
110
- items = [items]
111
- # get items by id
112
- if isinstance(items[0], str):
113
- items = [self.items_repository.get(item_id=item_id) for item_id in items]
114
- elif isinstance(items[0], entities.Item):
115
- pass
116
- else:
117
- raise PlatformException(
118
- error="400",
119
- message='Unknown items type to download. Expecting str or Item entities. Got "{}" instead'.format(
120
- type(items[0])
121
- )
122
- )
123
- # create filters to download annotations
124
- filters = entities.Filters(field='id',
125
- values=[item.id for item in items],
126
- operator=entities.FiltersOperations.IN)
127
- filters._user_query = 'false'
128
-
129
- # convert to list of list (like pages and page)
130
- items_to_download = [items]
131
- num_items = len(items)
132
- else:
133
- # filters
134
- if filters is None:
135
- filters = entities.Filters()
136
- filters._user_query = 'false'
137
- # file types
138
- if file_types is not None:
139
- filters.add(field='metadata.system.mimetype', values=file_types, operator=entities.FiltersOperations.IN)
140
- if annotation_filters is not None:
141
- if len(annotation_filters.and_filter_list) > 0 or len(annotation_filters.or_filter_list) > 0:
142
- for annotation_filter_and in annotation_filters.and_filter_list:
143
- filters.add_join(field=annotation_filter_and.field,
144
- values=annotation_filter_and.values,
145
- operator=annotation_filter_and.operator,
146
- method=entities.FiltersMethod.AND)
147
- for annotation_filter_or in annotation_filters.or_filter_list:
148
- filters.add_join(field=annotation_filter_or.field,
149
- values=annotation_filter_or.values,
150
- operator=annotation_filter_or.operator,
151
- method=entities.FiltersMethod.OR)
152
- elif annotation_filters.custom_filter is not None:
153
- annotation_query_dict = annotation_filters.prepare()
154
- items_query_dict = filters.prepare()
155
- items_query_dict["join"] = annotation_query_dict
156
- filters.reset()
157
- filters.custom_filter = items_query_dict
158
-
159
- else:
160
- annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
161
- filters._user_query = 'false'
162
-
163
- items_to_download = self.items_repository.list(filters=filters)
164
- num_items = items_to_download.items_count
165
-
166
- if num_items == 0:
167
- logger.warning('No items found! Nothing was downloaded')
168
- return list()
169
-
170
- ##############
171
- # local path #
172
- ##############
173
- is_folder = False
174
- if local_path is None:
175
- # create default local path
176
- local_path = self.__default_local_path()
177
-
178
- if os.path.isdir(local_path):
179
- logger.info('Local folder already exists:{}. merge/overwrite according to "overwrite option"'.format(
180
- local_path))
181
- is_folder = True
182
- else:
183
- # check if filename
184
- _, ext = os.path.splitext(local_path)
185
- if num_items > 1:
186
- is_folder = True
187
- else:
188
- item_to_download = items_to_download[0][0]
189
- file_name = item_to_download.name
190
- _, ext_download = os.path.splitext(file_name)
191
- if ext_download != ext:
192
- is_folder = True
193
- if is_folder and save_locally:
194
- path_to_create = local_path
195
- if local_path.endswith('*'):
196
- path_to_create = os.path.dirname(local_path)
197
- logger.info("Creating new directory for download: {}".format(path_to_create))
198
- os.makedirs(path_to_create, exist_ok=True)
199
-
200
- ####################
201
- # annotations json #
202
- ####################
203
- # download annotations' json files in a new thread
204
- # items will start downloading and if json not exists yet - will download for each file
205
- if num_items > 1 and annotation_options:
206
- # a new folder named 'json' will be created under the "local_path"
207
- logger.info("Downloading annotations formats: {}".format(annotation_options))
208
- self.download_annotations(**{
209
- "dataset": self.items_repository.dataset,
210
- "filters": filters,
211
- "annotation_filters": annotation_filters,
212
- "local_path": local_path,
213
- 'overwrite': overwrite,
214
- 'include_annotations_in_output': include_annotations_in_output,
215
- 'export_png_files': export_png_files,
216
- 'filter_output_annotations': filter_output_annotations,
217
- 'export_version': export_version,
218
- 'dataset_lock': dataset_lock,
219
- 'lock_timeout_sec': lock_timeout_sec,
220
- 'export_summary': export_summary
221
- })
222
- ###############
223
- # downloading #
224
- ###############
225
- # create result lists
226
- client_api = self.items_repository._client_api
227
-
228
- reporter = Reporter(num_workers=num_items,
229
- resource=Reporter.ITEMS_DOWNLOAD,
230
- print_error_logs=client_api.verbose.print_error_logs,
231
- client_api=client_api)
232
- jobs = [None for _ in range(num_items)]
233
- # pool
234
- pool = client_api.thread_pools(pool_name='item.download')
235
- # download
236
- pbar = tqdm.tqdm(total=num_items, disable=client_api.verbose.disable_progress_bar_download_dataset, file=sys.stdout,
237
- desc='Download Items')
238
- try:
239
- i_item = 0
240
- for page in items_to_download:
241
- for item in page:
242
- if item.type == "dir":
243
- continue
244
- if save_locally:
245
- # get local file path
246
- item_local_path, item_local_filepath = self.__get_local_filepath(
247
- local_path=local_path,
248
- without_relative_path=without_relative_path,
249
- item=item,
250
- to_items_folder=to_items_folder,
251
- is_folder=is_folder)
252
-
253
- if os.path.isfile(item_local_filepath) and not overwrite:
254
- logger.debug("File Exists: {}".format(item_local_filepath))
255
- reporter.set_index(ref=item.id, status='exist', output=item_local_filepath, success=True)
256
- pbar.update()
257
- if annotation_options and item.annotated:
258
- # download annotations only
259
- jobs[i_item] = pool.submit(
260
- self._download_img_annotations,
261
- **{
262
- "item": item,
263
- "img_filepath": item_local_filepath,
264
- "overwrite": overwrite,
265
- "annotation_options": annotation_options,
266
- "annotation_filters": annotation_filters,
267
- "local_path": item_local_path,
268
- "thickness": thickness,
269
- "alpha": alpha,
270
- "with_text": with_text,
271
- "export_version": export_version,
272
- },
273
- )
274
- i_item += 1
275
- continue
276
- else:
277
- item_local_path = None
278
- item_local_filepath = None
279
-
280
- # download single item
281
- jobs[i_item] = pool.submit(
282
- self.__thread_download_wrapper,
283
- **{
284
- "i_item": i_item,
285
- "item": item,
286
- "item_local_path": item_local_path,
287
- "item_local_filepath": item_local_filepath,
288
- "save_locally": save_locally,
289
- "to_array": to_array,
290
- "annotation_options": annotation_options,
291
- "annotation_filters": annotation_filters,
292
- "reporter": reporter,
293
- "pbar": pbar,
294
- "overwrite": overwrite,
295
- "thickness": thickness,
296
- "alpha": alpha,
297
- "with_text": with_text,
298
- "export_version": export_version
299
- },
300
- )
301
- i_item += 1
302
- except Exception:
303
- logger.exception('Error downloading:')
304
- finally:
305
- _ = [j.result() for j in jobs if j is not None]
306
- pbar.close()
307
- # reporting
308
- n_download = reporter.status_count(status='download')
309
- n_exist = reporter.status_count(status='exist')
310
- n_error = reporter.status_count(status='error')
311
- logger.info("Number of files downloaded:{}".format(n_download))
312
- logger.info("Number of files exists: {}".format(n_exist))
313
- logger.info("Total number of files: {}".format(n_download + n_exist))
314
-
315
- # log error
316
- if n_error > 0:
317
- log_filepath = reporter.generate_log_files()
318
- # Get up to 5 error examples for the exception message
319
- error_text = ""
320
- error_counter = 0
321
- if reporter._errors:
322
- for _id, error in reporter._errors.items():
323
- error_counter += 1
324
- error_text += f"Item ID: {_id}, Error: {error} | "
325
- if error_counter >= 5:
326
- break
327
- error_message = f"Errors in {n_error} files. Errors: {error_text}"
328
- if log_filepath is not None:
329
- error_message += f", see {log_filepath} for full log"
330
- if raise_on_error is True:
331
- raise PlatformException(
332
- error="400", message=error_message
333
- )
334
- else:
335
- logger.warning(error_message)
336
- if int(n_download) <= 1 and int(n_exist) <= 1:
337
- try:
338
- return next(reporter.output)
339
- except StopIteration:
340
- return None
341
- return reporter.output
342
-
343
- def __thread_download_wrapper(self, i_item,
344
- # item params
345
- item, item_local_path, item_local_filepath,
346
- save_locally, to_array, overwrite,
347
- # annotations params
348
- annotation_options, annotation_filters, with_text, thickness,
349
- # threading params
350
- reporter, pbar, alpha, export_version):
351
-
352
- download = None
353
- err = None
354
- trace = None
355
- for i_try in range(NUM_TRIES):
356
- try:
357
- logger.debug("Download item: {path}. Try {i}/{n}. Starting..".format(path=item.filename,
358
- i=i_try + 1,
359
- n=NUM_TRIES))
360
- download = self.__thread_download(item=item,
361
- save_locally=save_locally,
362
- to_array=to_array,
363
- local_path=item_local_path,
364
- local_filepath=item_local_filepath,
365
- annotation_options=annotation_options,
366
- annotation_filters=annotation_filters,
367
- overwrite=overwrite,
368
- thickness=thickness,
369
- alpha=alpha,
370
- with_text=with_text,
371
- export_version=export_version)
372
- logger.debug("Download item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=item.filename,
373
- i=i_try + 1,
374
- n=NUM_TRIES,
375
- id=item.id))
376
- if download is not None:
377
- break
378
- except Exception as e:
379
- logger.debug("Download item: {path}. Try {i}/{n}. Fail.".format(path=item.filename,
380
- i=i_try + 1,
381
- n=NUM_TRIES))
382
- err = e
383
- trace = traceback.format_exc()
384
- pbar.update()
385
- if download is None:
386
- if err is None:
387
- err = self.items_repository._client_api.platform_exception
388
- reporter.set_index(status="error", ref=item.id, success=False,
389
- error="{}\n{}".format(err, trace))
390
- else:
391
- reporter.set_index(ref=item.id, status="download", output=download, success=True)
392
-
393
- @staticmethod
394
- def download_annotations(dataset: entities.Dataset,
395
- local_path: str,
396
- filters: entities.Filters = None,
397
- annotation_filters: entities.Filters = None,
398
- overwrite=False,
399
- include_annotations_in_output=True,
400
- export_png_files=False,
401
- filter_output_annotations=False,
402
- export_version=entities.ExportVersion.V1,
403
- dataset_lock=False,
404
- lock_timeout_sec=None,
405
- export_summary=False
406
- ):
407
- """
408
- Download annotations json for entire dataset
409
-
410
- :param dataset: Dataset entity
411
- :param local_path:
412
- :param dtlpy.entities.filters.Filters filters: dl.Filters entity to filters items
413
- :param annotation_filters: dl.Filters entity to filters items' annotations
414
- :param overwrite: optional - overwrite annotations if exist, default = false
415
- :param include_annotations_in_output: default - True , if export should contain annotations
416
- :param export_png_files: default - if True, semantic annotations should be exported as png files
417
- :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
418
- :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
419
- :param bool dataset_lock: optional - default = False
420
- :param bool export_summary: optional - default = False
421
- :param int lock_timeout_sec: optional
422
- :return:
423
- """
424
- local_path = os.path.join(local_path, "json")
425
- zip_filepath = None
426
- # only if json folder does not exist or exist and overwrite
427
- if not os.path.isdir(os.path.join(local_path, 'json')) or overwrite:
428
- # create local path to download and save to
429
- if not os.path.isdir(local_path):
430
- os.makedirs(local_path)
431
-
432
- try:
433
- payload = dict()
434
- if filters is not None:
435
- payload['itemsQuery'] = filters.prepare()
436
- payload['annotations'] = {
437
- "include": include_annotations_in_output,
438
- "convertSemantic": export_png_files
439
- }
440
- payload['exportVersion'] = export_version
441
- if annotation_filters is not None:
442
- payload['annotationsQuery'] = annotation_filters.prepare()
443
- payload['annotations']['filter'] = filter_output_annotations
444
- if dataset_lock:
445
- payload['datasetLock'] = dataset_lock
446
-
447
- if export_summary:
448
- payload['summary'] = export_summary
449
-
450
- if lock_timeout_sec:
451
- payload['lockTimeoutSec'] = lock_timeout_sec
452
-
453
- success, response = dataset._client_api.gen_request(req_type='post',
454
- path='/datasets/{}/export'.format(dataset.id),
455
- json_req=payload,
456
- headers={'user_query': filters._user_query})
457
- if not success:
458
- raise exceptions.PlatformException(response)
459
- command = entities.Command.from_json(_json=response.json(),
460
- client_api=dataset._client_api)
461
- command = command.wait(timeout=0)
462
- if 'outputItemId' not in command.spec:
463
- raise exceptions.PlatformException(
464
- error='400',
465
- message="outputItemId key is missing in command response: {}".format(response))
466
- item_id = command.spec['outputItemId']
467
- annotation_zip_item = repositories.Items(client_api=dataset._client_api).get(item_id=item_id)
468
- zip_filepath = annotation_zip_item.download(local_path=local_path, export_version=export_version)
469
- # unzipping annotations to directory
470
- if isinstance(zip_filepath, list) or not os.path.isfile(zip_filepath):
471
- raise exceptions.PlatformException(
472
- error='404',
473
- message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
474
- annotation_zip_item.id))
475
- try:
476
- miscellaneous.Zipping.unzip_directory(zip_filename=zip_filepath,
477
- to_directory=local_path)
478
- except Exception as e:
479
- logger.warning("Failed to extract zip file error: {}".format(e))
480
-
481
- finally:
482
- # cleanup
483
- if isinstance(zip_filepath, str) and os.path.isfile(zip_filepath):
484
- os.remove(zip_filepath)
485
-
486
- @staticmethod
487
- def _download_img_annotations(item: entities.Item,
488
- img_filepath,
489
- local_path,
490
- overwrite,
491
- annotation_options,
492
- annotation_filters,
493
- thickness=1,
494
- with_text=False,
495
- alpha=1,
496
- export_version=entities.ExportVersion.V1
497
- ):
498
-
499
- # check if local_path is a file name
500
- _, ext = os.path.splitext(local_path)
501
- if ext:
502
- # take the dir of the file for the annotations save
503
- local_path = os.path.dirname(local_path)
504
-
505
- # fix local path
506
- if local_path.endswith("/items") or local_path.endswith("\\items"):
507
- local_path = os.path.dirname(local_path)
508
-
509
- annotation_rel_path = item.filename[1:]
510
- if img_filepath is not None:
511
- dir_name = os.path.dirname(annotation_rel_path)
512
- base_name = os.path.basename(img_filepath)
513
- annotation_rel_path = os.path.join(dir_name, base_name)
514
-
515
- # find annotations json
516
- annotations_json_filepath = os.path.join(local_path, "json", annotation_rel_path)
517
- if export_version == entities.ExportVersion.V1:
518
- name, _ = os.path.splitext(annotations_json_filepath)
519
- else:
520
- name = annotations_json_filepath
521
- annotations_json_filepath = name + ".json"
522
-
523
- if os.path.isfile(annotations_json_filepath) and annotation_filters is None:
524
- # if exists take from json file
525
- with open(annotations_json_filepath, "r", encoding="utf8") as f:
526
- data = json.load(f)
527
- if "annotations" in data:
528
- data = data["annotations"]
529
- annotations = entities.AnnotationCollection.from_json(_json=data, item=item)
530
- # no need to use the filters here because the annotations were already downloaded with annotation_filters
531
- else:
532
- # if json file doesnt exist get the annotations from platform
533
- annotations = item.annotations.list(filters=annotation_filters)
534
-
535
- # get image shape
536
- is_url_item = item.metadata. \
537
- get('system', dict()). \
538
- get('shebang', dict()). \
539
- get('linkInfo', dict()). \
540
- get('type', None) == 'url'
541
-
542
- if item is not None:
543
- orientation = item.system.get('exif', {}).get('Orientation', 0)
544
- else:
545
- orientation = 0
546
- if item.width is not None and item.height is not None:
547
- if orientation in [5, 6, 7, 8]:
548
- img_shape = (item.width, item.height)
549
- else:
550
- img_shape = (item.height, item.width)
551
- elif ('image' in item.mimetype and img_filepath is not None) or \
552
- (is_url_item and img_filepath is not None):
553
- img_shape = Image.open(img_filepath).size[::-1]
554
- else:
555
- img_shape = (0, 0)
556
-
557
- # download all annotation options
558
- for option in annotation_options:
559
- # get path and create dirs
560
- annotation_filepath = os.path.join(local_path, option, annotation_rel_path)
561
- if not os.path.isdir(os.path.dirname(annotation_filepath)):
562
- os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
563
-
564
- if export_version == entities.ExportVersion.V1:
565
- temp_path, ext = os.path.splitext(annotation_filepath)
566
- else:
567
- temp_path = annotation_filepath
568
-
569
- if option == entities.ViewAnnotationOptions.JSON:
570
- if not os.path.isfile(annotations_json_filepath):
571
- annotations.download(
572
- filepath=annotations_json_filepath,
573
- annotation_format=option,
574
- height=img_shape[0],
575
- width=img_shape[1],
576
- )
577
- elif option in [entities.ViewAnnotationOptions.MASK,
578
- entities.ViewAnnotationOptions.INSTANCE,
579
- entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE,
580
- entities.ViewAnnotationOptions.OBJECT_ID,
581
- entities.ViewAnnotationOptions.VTT]:
582
- if option == entities.ViewAnnotationOptions.VTT:
583
- annotation_filepath = temp_path + ".vtt"
584
- else:
585
- if 'video' in item.mimetype:
586
- annotation_filepath = temp_path + ".mp4"
587
- else:
588
- annotation_filepath = temp_path + ".png"
589
- if not os.path.isfile(annotation_filepath) or overwrite:
590
- # if not exists OR (exists AND overwrite)
591
- if not os.path.exists(os.path.dirname(annotation_filepath)):
592
- # create folder if not exists
593
- os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
594
- if option == entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE and img_filepath is None:
595
- raise PlatformException(
596
- error="1002",
597
- message="Missing image for annotation option dl.ViewAnnotationOptions.ANNOTATION_ON_IMAGE")
598
- annotations.download(
599
- filepath=annotation_filepath,
600
- img_filepath=img_filepath,
601
- annotation_format=option,
602
- height=img_shape[0],
603
- width=img_shape[1],
604
- thickness=thickness,
605
- alpha=alpha,
606
- with_text=with_text,
607
- orientation=orientation
608
- )
609
- else:
610
- raise PlatformException(error="400", message="Unknown annotation option: {}".format(option))
611
-
612
- @staticmethod
613
- def __get_local_filepath(local_path, item, to_items_folder, without_relative_path=None, is_folder=False):
614
- # create paths
615
- _, ext = os.path.splitext(local_path)
616
- if ext and not is_folder:
617
- # local_path is a filename
618
- local_filepath = local_path
619
- local_path = os.path.dirname(local_filepath)
620
- else:
621
- # if directory - get item's filename
622
- if to_items_folder:
623
- local_path = os.path.join(local_path, "items")
624
- elif is_folder:
625
- local_path = os.path.join(local_path, "")
626
- if without_relative_path is not None:
627
- local_filepath = os.path.join(local_path, item.name)
628
- else:
629
- local_filepath = os.path.join(local_path, item.filename[1:])
630
- return local_path, local_filepath
631
-
632
- @staticmethod
633
- def __get_link_source(item):
634
- assert isinstance(item, entities.Item)
635
- if not item.is_fetched:
636
- return item, '', False
637
-
638
- if not item.filename.endswith('.json') or \
639
- item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') != 'link':
640
- return item, '', False
641
-
642
- # recursively get next id link item
643
- while item.filename.endswith('.json') and \
644
- item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
645
- item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'id':
646
- item = item.dataset.items.get(item_id=item.metadata['system']['shebang']['linkInfo']['ref'])
647
-
648
- # check if link
649
- if item.filename.endswith('.json') and \
650
- item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
651
- item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'url':
652
- url = item.metadata['system']['shebang']['linkInfo']['ref']
653
- return item, url, True
654
- else:
655
- return item, '', False
656
-
657
- def __file_validation(self, item, downloaded_file):
658
- res = False
659
- resume = True
660
- if isinstance(downloaded_file, io.BytesIO):
661
- file_size = downloaded_file.getbuffer().nbytes
662
- else:
663
- file_size = os.stat(downloaded_file).st_size
664
- expected_size = item.metadata['system']['size']
665
- size_diff = file_size - expected_size
666
- if size_diff == 0:
667
- res = True
668
- if size_diff > 0:
669
- resume = False
670
- return res, file_size, resume
671
-
672
- def __thread_download(self,
673
- item,
674
- save_locally,
675
- local_path,
676
- to_array,
677
- local_filepath,
678
- overwrite,
679
- annotation_options,
680
- annotation_filters,
681
- chunk_size=8192,
682
- thickness=1,
683
- with_text=False,
684
- alpha=1,
685
- export_version=entities.ExportVersion.V1
686
- ):
687
- """
688
- Get a single item's binary data
689
- Calling this method will returns the item body itself , an image for example with the proper mimetype.
690
-
691
- :param item: Item entity to download
692
- :param save_locally: bool. save to file or return buffer
693
- :param local_path: item local folder to save to.
694
- :param to_array: returns Ndarray when True and local_path = False
695
- :param local_filepath: item local filepath
696
- :param overwrite: overwrite the file is existing
697
- :param annotation_options: download annotations options: list(dl.ViewAnnotationOptions)
698
- :param annotation_filters: Filters entity to filter item's annotation
699
- :param chunk_size: size of chunks to download - optional. default = 8192
700
- :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
701
- :param with_text: optional - add text to annotations, default = False
702
- :param alpha: opacity value [0 1], default 1
703
- :param ExportVersion export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
704
- :return:
705
- """
706
- # check if need to download image binary from platform
707
- need_to_download = True
708
- if save_locally and os.path.isfile(local_filepath):
709
- need_to_download = overwrite
710
-
711
- item, url, is_url = self.__get_link_source(item=item)
712
-
713
- # save as byte stream
714
- data = io.BytesIO()
715
- if need_to_download:
716
- chunk_resume = {0: 0}
717
- start_point = 0
718
- download_done = False
719
- while chunk_resume.get(start_point, '') != 3 and not download_done:
720
- if not is_url:
721
- headers = {'x-dl-sanitize': '0', 'Range': 'bytes={}-'.format(start_point)}
722
- result, response = self.items_repository._client_api.gen_request(req_type="get",
723
- headers=headers,
724
- path="/items/{}/stream".format(
725
- item.id),
726
- stream=True,
727
- dataset_id=item.dataset_id)
728
- if not result:
729
- if os.path.isfile(local_filepath + '.download'):
730
- os.remove(local_filepath + '.download')
731
- raise PlatformException(response)
732
- else:
733
- _, ext = os.path.splitext(item.metadata['system']['shebang']['linkInfo']['ref'].split('?')[0])
734
- if local_filepath:
735
- local_filepath += ext
736
- response = self.get_url_stream(url=url)
737
-
738
- if save_locally:
739
- # save to file
740
- if not os.path.exists(os.path.dirname(local_filepath)):
741
- # create folder if not exists
742
- os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
743
-
744
- # decide if create progress bar for item
745
- total_length = response.headers.get("content-length")
746
- one_file_pbar = None
747
- try:
748
- one_file_progress_bar = total_length is not None and int(
749
- total_length) > 10e6 # size larger than 10 MB
750
- if one_file_progress_bar:
751
- one_file_pbar = tqdm.tqdm(total=int(total_length),
752
- unit='B',
753
- unit_scale=True,
754
- unit_divisor=1024,
755
- position=1,
756
- file=sys.stdout,
757
- disable=self.items_repository._client_api.verbose.disable_progress_bar_download_item,
758
- desc='Download Item')
759
- except Exception as err:
760
- one_file_progress_bar = False
761
- logger.debug('Cant decide downloaded file length, bar will not be presented: {}'.format(err))
762
-
763
- # start download
764
- if self.items_repository._client_api.sdk_cache.use_cache and \
765
- self.items_repository._client_api.cache is not None:
766
- response_output = os.path.normpath(response.content)
767
- if isinstance(response_output, bytes):
768
- response_output = response_output.decode('utf-8')[1:-1]
769
-
770
- if os.path.isfile(os.path.normpath(response_output)):
771
- if response_output != local_filepath:
772
- source_path = os.path.normpath(response_output)
773
- shutil.copyfile(source_path, local_filepath)
774
- download_done = True
775
- else:
776
- try:
777
- temp_file_path = local_filepath + '.download'
778
- with open(temp_file_path, "ab") as f:
779
- try:
780
- for chunk in response.iter_content(chunk_size=chunk_size):
781
- if chunk: # filter out keep-alive new chunks
782
- f.write(chunk)
783
- if one_file_progress_bar:
784
- one_file_pbar.update(len(chunk))
785
- except Exception as err:
786
- pass
787
-
788
- file_validation = True
789
- if not is_url:
790
- file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
791
- download_progress=temp_file_path,
792
- chunk_resume=chunk_resume)
793
- if file_validation:
794
- shutil.move(temp_file_path, local_filepath)
795
- download_done = True
796
- except Exception as err:
797
- if os.path.isfile(temp_file_path):
798
- os.remove(temp_file_path)
799
- raise err
800
- if one_file_progress_bar:
801
- one_file_pbar.close()
802
- # save to output variable
803
- data = local_filepath
804
- # if image - can download annotation mask
805
- if item.annotated and annotation_options:
806
- self._download_img_annotations(item=item,
807
- img_filepath=local_filepath,
808
- annotation_options=annotation_options,
809
- annotation_filters=annotation_filters,
810
- local_path=local_path,
811
- overwrite=overwrite,
812
- thickness=thickness,
813
- alpha=alpha,
814
- with_text=with_text,
815
- export_version=export_version
816
- )
817
- else:
818
- if self.items_repository._client_api.sdk_cache.use_cache and \
819
- self.items_repository._client_api.cache is not None:
820
- response_output = os.path.normpath(response.content)
821
- if isinstance(response_output, bytes):
822
- response_output = response_output.decode('utf-8')[1:-1]
823
-
824
- if os.path.isfile(response_output):
825
- source_file = response_output
826
- with open(source_file, 'wb') as f:
827
- data = f.read()
828
- download_done = True
829
- else:
830
- try:
831
- for chunk in response.iter_content(chunk_size=chunk_size):
832
- if chunk: # filter out keep-alive new chunks
833
- data.write(chunk)
834
-
835
- file_validation = True
836
- if not is_url:
837
- file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
838
- download_progress=data,
839
- chunk_resume=chunk_resume)
840
- if file_validation:
841
- download_done = True
842
- else:
843
- continue
844
- except Exception as err:
845
- raise err
846
- # go back to the beginning of the stream
847
- data.seek(0)
848
- data.name = item.name
849
- if not save_locally and to_array:
850
- if 'image' not in item.mimetype and not is_url:
851
- raise PlatformException(
852
- error="400",
853
- message='Download element type numpy.ndarray support for image only. '
854
- 'Item Id: {} is {} type'.format(item.id, item.mimetype))
855
-
856
- data = np.array(Image.open(data))
857
- else:
858
- data = local_filepath
859
- return data
860
-
861
- def __get_next_chunk(self, item, download_progress, chunk_resume):
862
- size_validation, file_size, resume = self.__file_validation(item=item,
863
- downloaded_file=download_progress)
864
- start_point = file_size
865
- if not size_validation:
866
- if chunk_resume.get(start_point, None) is None:
867
- chunk_resume = {start_point: 1}
868
- else:
869
- chunk_resume[start_point] += 1
870
- if chunk_resume[start_point] == 3 or not resume:
871
- raise PlatformException(
872
- error=500,
873
- message='The downloaded file is corrupted. Please try again. If the issue repeats please contact support.')
874
- return size_validation, start_point, chunk_resume
875
-
876
- def __default_local_path(self):
877
-
878
- # create default local path
879
- if self.items_repository._dataset is None:
880
- local_path = os.path.join(
881
- self.items_repository._client_api.sdk_cache.cache_path_bin,
882
- "items",
883
- )
884
- else:
885
- if self.items_repository.dataset._project is None:
886
- # by dataset name
887
- local_path = os.path.join(
888
- self.items_repository._client_api.sdk_cache.cache_path_bin,
889
- "datasets",
890
- "{}_{}".format(self.items_repository.dataset.name, self.items_repository.dataset.id),
891
- )
892
- else:
893
- # by dataset and project name
894
- local_path = os.path.join(
895
- self.items_repository._client_api.sdk_cache.cache_path_bin,
896
- "projects",
897
- self.items_repository.dataset.project.name,
898
- "datasets",
899
- self.items_repository.dataset.name,
900
- )
901
- logger.info("Downloading to: {}".format(local_path))
902
- return local_path
903
-
904
- @staticmethod
905
- def get_url_stream(url):
906
- """
907
- :param url:
908
- """
909
- # This will download the binaries from the URL user provided
910
- prepared_request = requests.Request(method='GET', url=url).prepare()
911
- with requests.Session() as s:
912
- retry = Retry(
913
- total=3,
914
- read=3,
915
- connect=3,
916
- backoff_factor=1,
917
- )
918
- adapter = HTTPAdapter(max_retries=retry)
919
- s.mount('http://', adapter)
920
- s.mount('https://', adapter)
921
- response = s.send(request=prepared_request, stream=True)
922
-
923
- return response
1
+ import copy
2
+ import io
3
+ import json
4
+ import logging
5
+ import multiprocessing
6
+ import os
7
+ import shutil
8
+ import sys
9
+ import tempfile
10
+ import traceback
11
+ from pathlib import Path
12
+ from urllib.parse import unquote, urlparse
13
+
14
+ import numpy as np
15
+ import requests
16
+ import tqdm
17
+ from PIL import Image
18
+ from requests.adapters import HTTPAdapter
19
+ from urllib3.util import Retry
20
+
21
+ from .. import entities, repositories, miscellaneous, PlatformException, exceptions
22
+ from ..services import Reporter
23
+
24
+ logger = logging.getLogger(name='dtlpy')
25
+
26
+ NUM_TRIES = 3 # try to download 3 time before fail on item
27
+ DOWNLOAD_MAX_ITEMS_PER_SUBSET = 1000
28
+
29
+ class Downloader:
30
+ def __init__(self, items_repository):
31
+ self.items_repository = items_repository
32
+
33
+ def _process_download_results(self, reporter, raise_on_error=False):
34
+ """
35
+ Process download results and generate summary report.
36
+
37
+ :param reporter: Reporter instance containing download results
38
+ :param raise_on_error: If True, raise exception on download errors
39
+ :return: Output from reporter
40
+ """
41
+ # reporting
42
+ n_download = reporter.status_count(status='download')
43
+ n_exist = reporter.status_count(status='exist')
44
+ n_error = reporter.status_count(status='error')
45
+ logger.info(f"Number of files downloaded:{n_download}")
46
+ logger.info(f"Number of files exists: {n_exist}")
47
+ logger.info(f"Total number of files: {n_download + n_exist}")
48
+
49
+ # log error
50
+ if n_error > 0:
51
+ log_filepath = reporter.generate_log_files()
52
+ # Get up to 5 error examples for the exception message
53
+ error_text = ""
54
+ error_counter = 0
55
+ if reporter._errors:
56
+ for _id, error in reporter._errors.items():
57
+ error_counter += 1
58
+ error_text += f"Item ID: {_id}, Error: {error} | "
59
+ if error_counter >= 5:
60
+ break
61
+ error_message = f"Errors in {n_error} files. Errors: {error_text}"
62
+ if log_filepath is not None:
63
+ error_message += f", see {log_filepath} for full log"
64
+ if raise_on_error is True:
65
+ raise PlatformException(
66
+ error="400", message=error_message
67
+ )
68
+ else:
69
+ logger.warning(error_message)
70
+
71
+ if int(n_download) <= 1 and int(n_exist) <= 1:
72
+ try:
73
+ return next(reporter.output)
74
+ except StopIteration:
75
+ return None
76
+ return reporter.output
77
+
78
+ def _process_item_json(self, local_path, item_json, reporter, pbar, overwrite=False):
79
+ """
80
+ Process a single item JSON for download, saving both the item file and metadata.
81
+
82
+ :param local_path: Local path to save files
83
+ :param item_json: Item JSON metadata
84
+ :param reporter: Reporter instance for tracking progress
85
+ :param pbar: Progress bar instance
86
+ :param overwrite: Whether to overwrite existing files
87
+ :return: Error message, traceback, and downloaded filepath
88
+ """
89
+ err = None
90
+ trace = None
91
+ downloaded_filepath = None
92
+ item_id = item_json['id']
93
+ filename = item_json['filename'].lstrip('/')
94
+
95
+ for i_try in range(NUM_TRIES):
96
+ try:
97
+ # Download the image
98
+ image_path = Path(local_path) / 'items' / filename
99
+ # Ensure the directory for the image file exists (in case filename has subdirectories)
100
+ image_path.parent.mkdir(parents=True, exist_ok=True)
101
+ item = entities.Item.from_json(_json = item_json, client_api=self.items_repository._client_api, is_fetched=False)
102
+ downloaded_data = self.__thread_download(
103
+ item=item,
104
+ local_path=str(image_path.parent),
105
+ local_filepath=str(image_path),
106
+ save_locally=True,
107
+ to_array=False,
108
+ overwrite=overwrite,
109
+ annotation_options=[],
110
+ annotation_filters=None,
111
+ )
112
+
113
+ if downloaded_data is None:
114
+ err = 'Failed to download image'
115
+ trace = ''
116
+ else:
117
+ # Save the item JSON directly
118
+ json_filename = Path(filename).stem + '.json'
119
+ json_path = Path(local_path) / 'json' / Path(filename).parent / json_filename
120
+
121
+ # Ensure the directory for the JSON file exists (in case filename has subdirectories)
122
+ json_path.parent.mkdir(parents=True, exist_ok=True)
123
+
124
+ # Save the original item_json directly
125
+ with open(json_path, 'w', encoding='utf-8') as f:
126
+ json.dump(item_json, f, indent=2, ensure_ascii=False)
127
+
128
+ downloaded_filepath = str(image_path)
129
+
130
+ if downloaded_filepath is not None:
131
+ break
132
+
133
+ except Exception as e:
134
+ logger.debug(f"Download item: {filename}. Try {i_try + 1}/{NUM_TRIES}. Fail.")
135
+ err = e
136
+ trace = traceback.format_exc()
137
+
138
+ pbar.update()
139
+ if downloaded_filepath is None:
140
+ if err is None:
141
+ err = self.items_repository._client_api.platform_exception
142
+ reporter.set_index(status="error", ref=item_id, success=False, error=f"{err}\n{trace}")
143
+ else:
144
+ reporter.set_index(ref=item_id, status="download", output=downloaded_filepath, success=True)
145
+
146
+ def _download_recursive(
147
+ self,
148
+ local_path=None,
149
+ filters: entities.Filters = None,
150
+ annotation_filters: entities.Filters = None,
151
+ file_types=None,
152
+ overwrite=False,
153
+ raise_on_error=False,
154
+ dataset_lock=False,
155
+ lock_timeout_sec=None,
156
+ ):
157
+ """
158
+ Download items recursively from a dataset.
159
+
160
+ :param local_path: Local path to save downloaded items
161
+ :param filters: Filters entity to filter items
162
+ :param annotation_filters: Filters entity to filter annotations
163
+ :param file_types: List of file types to download
164
+ :param overwrite: Whether to overwrite existing files
165
+ :param raise_on_error: Raise error if download fails
166
+ :param dataset_lock: Lock dataset during download
167
+ :param lock_timeout_sec: Lock timeout in seconds
168
+ """
169
+ filters, annotation_filters = self._prepare_filters(filters=filters,annotation_filters=annotation_filters,file_types=file_types)
170
+ filter_copy = copy.deepcopy(filters)
171
+ filter_copy.page_size = 0
172
+ num_items = self.items_repository.list(filters=filter_copy).items_count
173
+ if num_items == 0:
174
+ return list()
175
+ client_api = self.items_repository._client_api
176
+ reporter = Reporter(
177
+ num_workers=num_items,
178
+ resource=Reporter.ITEMS_DOWNLOAD,
179
+ print_error_logs=client_api.verbose.print_error_logs,
180
+ client_api=client_api,
181
+ )
182
+
183
+ # Create directories once using pathlib
184
+ local_path_obj = Path(local_path)
185
+ items_dir = local_path_obj / 'items'
186
+ jsons_dir = local_path_obj / 'json'
187
+ items_dir.mkdir(parents=True, exist_ok=True)
188
+ jsons_dir.mkdir(parents=True, exist_ok=True)
189
+
190
+ jobs = [None for _ in range(num_items)]
191
+ # crrently keep the thread count to default.
192
+ # client_api._thread_pools_names['item.download'] = 5 * multiprocessing.cpu_count()
193
+ pool = client_api.thread_pools(pool_name='item.download')
194
+ pbar = tqdm.tqdm(
195
+ total=num_items,
196
+ disable=client_api.verbose.disable_progress_bar_download_dataset,
197
+ file=sys.stdout,
198
+ desc='Download Items',
199
+ )
200
+ try:
201
+ i_item = 0
202
+ import time
203
+ start_time = time.time()
204
+ for json_file in self.items_repository.dataset.project.datasets._export_recursive(
205
+ dataset=self.items_repository.dataset,
206
+ local_path=tempfile.mkdtemp(prefix='download_recursive_jsons_'),
207
+ max_items_per_subset=DOWNLOAD_MAX_ITEMS_PER_SUBSET,
208
+ include_annotations=True,
209
+ filters=filters,
210
+ annotation_filters=annotation_filters,
211
+ dataset_lock=dataset_lock,
212
+ lock_timeout_sec=lock_timeout_sec,
213
+ ):
214
+ end_time = time.time()
215
+ with open(json_file, 'r') as f:
216
+ data = json.load(f)
217
+ for item_json in data:
218
+ jobs[i_item] = pool.submit(
219
+ self._process_item_json,
220
+ **{
221
+ "local_path": local_path,
222
+ "item_json": item_json,
223
+ "reporter": reporter,
224
+ "pbar": pbar,
225
+ "overwrite": overwrite,
226
+ },
227
+ )
228
+ i_item += 1
229
+ finally:
230
+ _ = [j.result() for j in jobs if j is not None]
231
+ pbar.close()
232
+ return self._process_download_results(reporter=reporter, raise_on_error=raise_on_error)
233
+
234
+ @staticmethod
235
+ def _prepare_filters(filters: entities.Filters = None,
236
+ annotation_filters: entities.Filters = None,
237
+ file_types=None):
238
+ """
239
+ Prepare and merge filters with annotation filters.
240
+
241
+ :param filters: Filters entity or None
242
+ :param annotation_filters: Annotation filters to merge with item filters
243
+ :param file_types: List of file types to filter
244
+ :return: Prepared filters entity
245
+ """
246
+ # filters
247
+ if filters is None:
248
+ filters = entities.Filters()
249
+ filters._user_query = 'false'
250
+ # file types
251
+ if file_types is not None:
252
+ filters.add(field='metadata.system.mimetype', values=file_types, operator=entities.FiltersOperations.IN)
253
+ if annotation_filters is not None:
254
+ if len(annotation_filters.and_filter_list) > 0 or len(annotation_filters.or_filter_list) > 0:
255
+ for annotation_filter_and in annotation_filters.and_filter_list:
256
+ filters.add_join(field=annotation_filter_and.field,
257
+ values=annotation_filter_and.values,
258
+ operator=annotation_filter_and.operator,
259
+ method=entities.FiltersMethod.AND)
260
+ for annotation_filter_or in annotation_filters.or_filter_list:
261
+ filters.add_join(field=annotation_filter_or.field,
262
+ values=annotation_filter_or.values,
263
+ operator=annotation_filter_or.operator,
264
+ method=entities.FiltersMethod.OR)
265
+ elif annotation_filters.custom_filter is not None:
266
+ annotation_query_dict = annotation_filters.prepare()
267
+ items_query_dict = filters.prepare()
268
+ items_query_dict["join"] = annotation_query_dict
269
+ filters.reset()
270
+ filters.custom_filter = items_query_dict
271
+
272
+ else:
273
+ annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
274
+ filters._user_query = 'false'
275
+
276
+ return filters, annotation_filters
277
+
278
+ def download(self,
279
+ # filter options
280
+ filters: entities.Filters = None,
281
+ items=None,
282
+ # download options
283
+ local_path=None,
284
+ file_types=None,
285
+ save_locally=True,
286
+ to_array=False,
287
+ overwrite=False,
288
+ annotation_filters: entities.Filters = None,
289
+ annotation_options: entities.ViewAnnotationOptions = None,
290
+ to_items_folder=True,
291
+ thickness=1,
292
+ with_text=False,
293
+ without_relative_path=None,
294
+ avoid_unnecessary_annotation_download=False,
295
+ include_annotations_in_output=True,
296
+ export_png_files=False,
297
+ filter_output_annotations=False,
298
+ alpha=1,
299
+ export_version=entities.ExportVersion.V1,
300
+ dataset_lock=False,
301
+ lock_timeout_sec=None,
302
+ export_summary=False,
303
+ raise_on_error=False
304
+ ):
305
+ """
306
+ Download dataset by filters.
307
+ Filtering the dataset for items and save them local
308
+ Optional - also download annotation, mask, instance and image mask of the item
309
+
310
+ :param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
311
+ :param items: download Item entity or item_id (or a list of item)
312
+ :param local_path: local folder or filename to save to.
313
+ :param file_types: a list of file type to download. e.g ['video/webm', 'video/mp4', 'image/jpeg', 'image/png']
314
+ :param save_locally: bool. save to disk or return a buffer
315
+ :param to_array: returns Ndarray when True and local_path = False
316
+ :param overwrite: optional - default = False
317
+ :param annotation_options: download annotations options. options: list(dl.ViewAnnotationOptions)
318
+ :param annotation_filters: Filters entity to filter annotations for download
319
+ :param to_items_folder: Create 'items' folder and download items to it
320
+ :param with_text: optional - add text to annotations, default = False
321
+ :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
322
+ :param without_relative_path: bool - download items without the relative path from platform
323
+ :param avoid_unnecessary_annotation_download: DEPRECATED only items and annotations in filters are downloaded
324
+ :param include_annotations_in_output: default - False , if export should contain annotations
325
+ :param export_png_files: default - True, if semantic annotations should be exported as png files
326
+ :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
327
+ :param alpha: opacity value [0 1], default 1
328
+ :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
329
+ :param bool dataset_lock: optional - default = False
330
+ :param bool export_summary: optional - default = False
331
+ :param int lock_timeout_sec: optional
332
+ :param bool raise_on_error: raise an exception if an error occurs
333
+ :return: Output (list)
334
+ """
335
+
336
+ ###################
337
+ # Default options #
338
+ ###################
339
+ # annotation options
340
+ if annotation_options is None:
341
+ annotation_options = list()
342
+ elif not isinstance(annotation_options, list):
343
+ annotation_options = [annotation_options]
344
+ for ann_option in annotation_options:
345
+ if not isinstance(ann_option, entities.ViewAnnotationOptions):
346
+ if ann_option not in list(entities.ViewAnnotationOptions):
347
+ raise PlatformException(
348
+ error='400',
349
+ message='Unknown annotation download option: {}, please choose from: {}'.format(
350
+ ann_option, list(entities.ViewAnnotationOptions)))
351
+ # normalize items argument: treat empty list as “no items specified”
352
+ if isinstance(items, list) and len(items) == 0:
353
+ items = None
354
+ #####################
355
+ # items to download #
356
+ #####################
357
+ if items is not None:
358
+ # convert input to a list
359
+ if not isinstance(items, list):
360
+ items = [items]
361
+ # get items by id
362
+ if isinstance(items[0], str):
363
+ items = [self.items_repository.get(item_id=item_id) for item_id in items]
364
+ elif isinstance(items[0], entities.Item):
365
+ pass
366
+ else:
367
+ raise PlatformException(
368
+ error="400",
369
+ message='Unknown items type to download. Expecting str or Item entities. Got "{}" instead'.format(
370
+ type(items[0])
371
+ )
372
+ )
373
+ # create filters to download annotations
374
+ filters = entities.Filters(field='id',
375
+ values=[item.id for item in items],
376
+ operator=entities.FiltersOperations.IN)
377
+ filters._user_query = 'false'
378
+
379
+ # convert to list of list (like pages and page)
380
+ items_to_download = [items]
381
+ num_items = len(items)
382
+ else:
383
+ # Prepare and merge filters
384
+ filters, annotation_filters = self._prepare_filters(
385
+ filters=filters,
386
+ annotation_filters=annotation_filters,
387
+ file_types=file_types
388
+ )
389
+
390
+ items_to_download = self.items_repository.list(filters=filters)
391
+ num_items = items_to_download.items_count
392
+
393
+ if num_items == 0:
394
+ logger.warning('No items found! Nothing was downloaded')
395
+ return list()
396
+
397
+ ##############
398
+ # local path #
399
+ ##############
400
+ is_folder = False
401
+ if local_path is None:
402
+ # create default local path
403
+ local_path = self.__default_local_path()
404
+
405
+ if os.path.isdir(local_path):
406
+ logger.info('Local folder already exists:{}. merge/overwrite according to "overwrite option"'.format(
407
+ local_path))
408
+ is_folder = True
409
+ else:
410
+ # check if filename
411
+ _, ext = os.path.splitext(local_path)
412
+ if num_items > 1:
413
+ is_folder = True
414
+ else:
415
+ item_to_download = items_to_download[0][0]
416
+ file_name = item_to_download.name
417
+ _, ext_download = os.path.splitext(file_name)
418
+ if ext_download != ext:
419
+ is_folder = True
420
+ if is_folder and save_locally:
421
+ path_to_create = local_path
422
+ if local_path.endswith('*'):
423
+ path_to_create = os.path.dirname(local_path)
424
+ logger.info("Creating new directory for download: {}".format(path_to_create))
425
+ os.makedirs(path_to_create, exist_ok=True)
426
+
427
+ ####################
428
+ # annotations json #
429
+ ####################
430
+ # download annotations' json files in a new thread
431
+ # items will start downloading and if json not exists yet - will download for each file
432
+ if num_items > 1 and annotation_options:
433
+ # a new folder named 'json' will be created under the "local_path"
434
+ logger.info("Downloading annotations formats: {}".format(annotation_options))
435
+ self.download_annotations(**{
436
+ "dataset": self.items_repository.dataset,
437
+ "filters": filters,
438
+ "annotation_filters": annotation_filters,
439
+ "local_path": local_path,
440
+ 'overwrite': overwrite,
441
+ 'include_annotations_in_output': include_annotations_in_output,
442
+ 'export_png_files': export_png_files,
443
+ 'filter_output_annotations': filter_output_annotations,
444
+ 'export_version': export_version,
445
+ 'dataset_lock': dataset_lock,
446
+ 'lock_timeout_sec': lock_timeout_sec,
447
+ 'export_summary': export_summary
448
+ })
449
+ ###############
450
+ # downloading #
451
+ ###############
452
+ # create result lists
453
+ client_api = self.items_repository._client_api
454
+
455
+ reporter = Reporter(num_workers=num_items,
456
+ resource=Reporter.ITEMS_DOWNLOAD,
457
+ print_error_logs=client_api.verbose.print_error_logs,
458
+ client_api=client_api)
459
+ jobs = [None for _ in range(num_items)]
460
+ # pool
461
+ pool = client_api.thread_pools(pool_name='item.download')
462
+ # download
463
+ pbar = tqdm.tqdm(total=num_items, disable=client_api.verbose.disable_progress_bar_download_dataset,
464
+ file=sys.stdout,
465
+ desc='Download Items')
466
+ try:
467
+ i_item = 0
468
+ for page in items_to_download:
469
+ for item in page:
470
+ if item.type == "dir":
471
+ continue
472
+ if save_locally:
473
+ # get local file path
474
+ item_local_path, item_local_filepath = self.__get_local_filepath(
475
+ local_path=local_path,
476
+ without_relative_path=without_relative_path,
477
+ item=item,
478
+ to_items_folder=to_items_folder,
479
+ is_folder=is_folder)
480
+
481
+ if os.path.isfile(item_local_filepath) and not overwrite:
482
+ logger.debug("File Exists: {}".format(item_local_filepath))
483
+ reporter.set_index(ref=item.id, status='exist', output=item_local_filepath, success=True)
484
+ pbar.update()
485
+ if annotation_options and item.annotated:
486
+ # download annotations only
487
+ jobs[i_item] = pool.submit(
488
+ self._download_img_annotations,
489
+ **{
490
+ "item": item,
491
+ "img_filepath": item_local_filepath,
492
+ "overwrite": overwrite,
493
+ "annotation_options": annotation_options,
494
+ "annotation_filters": annotation_filters,
495
+ "local_path": item_local_path,
496
+ "thickness": thickness,
497
+ "alpha": alpha,
498
+ "with_text": with_text,
499
+ "export_version": export_version,
500
+ },
501
+ )
502
+ i_item += 1
503
+ continue
504
+ else:
505
+ item_local_path = None
506
+ item_local_filepath = None
507
+
508
+ # download single item
509
+ jobs[i_item] = pool.submit(
510
+ self.__thread_download_wrapper,
511
+ **{
512
+ "i_item": i_item,
513
+ "item": item,
514
+ "item_local_path": item_local_path,
515
+ "item_local_filepath": item_local_filepath,
516
+ "save_locally": save_locally,
517
+ "to_array": to_array,
518
+ "annotation_options": annotation_options,
519
+ "annotation_filters": annotation_filters,
520
+ "reporter": reporter,
521
+ "pbar": pbar,
522
+ "overwrite": overwrite,
523
+ "thickness": thickness,
524
+ "alpha": alpha,
525
+ "with_text": with_text,
526
+ "export_version": export_version
527
+ },
528
+ )
529
+ i_item += 1
530
+ except Exception:
531
+ logger.exception('Error downloading:')
532
+ finally:
533
+ _ = [j.result() for j in jobs if j is not None]
534
+ pbar.close()
535
+
536
+ return self._process_download_results(reporter=reporter, raise_on_error=raise_on_error)
537
+
538
+ def __thread_download_wrapper(self, i_item,
539
+ # item params
540
+ item, item_local_path, item_local_filepath,
541
+ save_locally, to_array, overwrite,
542
+ # annotations params
543
+ annotation_options, annotation_filters, with_text, thickness,
544
+ # threading params
545
+ reporter, pbar, alpha, export_version):
546
+
547
+ download = None
548
+ err = None
549
+ trace = None
550
+ for i_try in range(NUM_TRIES):
551
+ try:
552
+ logger.debug("Download item: {path}. Try {i}/{n}. Starting..".format(path=item.filename,
553
+ i=i_try + 1,
554
+ n=NUM_TRIES))
555
+ download = self.__thread_download(item=item,
556
+ save_locally=save_locally,
557
+ to_array=to_array,
558
+ local_path=item_local_path,
559
+ local_filepath=item_local_filepath,
560
+ annotation_options=annotation_options,
561
+ annotation_filters=annotation_filters,
562
+ overwrite=overwrite,
563
+ thickness=thickness,
564
+ alpha=alpha,
565
+ with_text=with_text,
566
+ export_version=export_version)
567
+ logger.debug("Download item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=item.filename,
568
+ i=i_try + 1,
569
+ n=NUM_TRIES,
570
+ id=item.id))
571
+ if download is not None:
572
+ break
573
+ except Exception as e:
574
+ logger.debug("Download item: {path}. Try {i}/{n}. Fail.".format(path=item.filename,
575
+ i=i_try + 1,
576
+ n=NUM_TRIES))
577
+ err = e
578
+ trace = traceback.format_exc()
579
+ pbar.update()
580
+ if download is None:
581
+ if err is None:
582
+ err = self.items_repository._client_api.platform_exception
583
+ reporter.set_index(status="error", ref=item.id, success=False,
584
+ error="{}\n{}".format(err, trace))
585
+ else:
586
+ reporter.set_index(ref=item.id, status="download", output=download, success=True)
587
+
588
+ @staticmethod
589
+ def download_annotations(dataset: entities.Dataset,
590
+ local_path: str,
591
+ filters: entities.Filters = None,
592
+ annotation_filters: entities.Filters = None,
593
+ overwrite=False,
594
+ include_annotations_in_output=True,
595
+ export_png_files=False,
596
+ filter_output_annotations=False,
597
+ export_version=entities.ExportVersion.V1,
598
+ dataset_lock=False,
599
+ lock_timeout_sec=None,
600
+ export_summary=False
601
+ ):
602
+ """
603
+ Download annotations json for entire dataset
604
+
605
+ :param dataset: Dataset entity
606
+ :param local_path:
607
+ :param dtlpy.entities.filters.Filters filters: dl.Filters entity to filters items
608
+ :param annotation_filters: dl.Filters entity to filters items' annotations
609
+ :param overwrite: optional - overwrite annotations if exist, default = false
610
+ :param include_annotations_in_output: default - True , if export should contain annotations
611
+ :param export_png_files: default - if True, semantic annotations should be exported as png files
612
+ :param filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
613
+ :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
614
+ :param bool dataset_lock: optional - default = False
615
+ :param bool export_summary: optional - default = False
616
+ :param int lock_timeout_sec: optional
617
+ :return:
618
+ """
619
+ local_path = os.path.join(local_path, "json")
620
+ zip_filepath = None
621
+ # only if json folder does not exist or exist and overwrite
622
+ if not os.path.isdir(os.path.join(local_path, 'json')) or overwrite:
623
+ # create local path to download and save to
624
+ if not os.path.isdir(local_path):
625
+ os.makedirs(local_path)
626
+
627
+ try:
628
+ payload = dict()
629
+ if filters is not None:
630
+ payload['itemsQuery'] = filters.prepare()
631
+ payload['annotations'] = {
632
+ "include": include_annotations_in_output,
633
+ "convertSemantic": export_png_files
634
+ }
635
+ payload['exportVersion'] = export_version
636
+ if annotation_filters is not None:
637
+ payload['annotationsQuery'] = annotation_filters.prepare()
638
+ payload['annotations']['filter'] = filter_output_annotations
639
+ if dataset_lock:
640
+ payload['datasetLock'] = dataset_lock
641
+
642
+ if export_summary:
643
+ payload['summary'] = export_summary
644
+
645
+ if lock_timeout_sec:
646
+ payload['lockTimeoutSec'] = lock_timeout_sec
647
+
648
+ success, response = dataset._client_api.gen_request(req_type='post',
649
+ path='/datasets/{}/export'.format(dataset.id),
650
+ json_req=payload,
651
+ headers={'user_query': filters._user_query})
652
+ if not success:
653
+ raise exceptions.PlatformException(response)
654
+ command = entities.Command.from_json(_json=response.json(),
655
+ client_api=dataset._client_api)
656
+ command = command.wait(timeout=0)
657
+ if 'outputItemId' not in command.spec:
658
+ raise exceptions.PlatformException(
659
+ error='400',
660
+ message="outputItemId key is missing in command response: {}".format(response))
661
+ item_id = command.spec['outputItemId']
662
+ annotation_zip_item = repositories.Items(client_api=dataset._client_api).get(item_id=item_id)
663
+ zip_filepath = annotation_zip_item.download(local_path=local_path, export_version=export_version)
664
+ # unzipping annotations to directory
665
+ if isinstance(zip_filepath, list) or not os.path.isfile(zip_filepath):
666
+ raise exceptions.PlatformException(
667
+ error='404',
668
+ message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
669
+ annotation_zip_item.id))
670
+ try:
671
+ miscellaneous.Zipping.unzip_directory(zip_filename=zip_filepath,
672
+ to_directory=local_path)
673
+ except Exception as e:
674
+ logger.warning("Failed to extract zip file error: {}".format(e))
675
+
676
+ finally:
677
+ # cleanup
678
+ if isinstance(zip_filepath, str) and os.path.isfile(zip_filepath):
679
+ os.remove(zip_filepath)
680
+
681
+ @staticmethod
682
+ def _download_img_annotations(item: entities.Item,
683
+ img_filepath,
684
+ local_path,
685
+ overwrite,
686
+ annotation_options,
687
+ annotation_filters,
688
+ thickness=1,
689
+ with_text=False,
690
+ alpha=1,
691
+ export_version=entities.ExportVersion.V1
692
+ ):
693
+
694
+ # check if local_path is a file name
695
+ _, ext = os.path.splitext(local_path)
696
+ if ext:
697
+ # take the dir of the file for the annotations save
698
+ local_path = os.path.dirname(local_path)
699
+
700
+ # fix local path
701
+ if local_path.endswith("/items") or local_path.endswith("\\items"):
702
+ local_path = os.path.dirname(local_path)
703
+
704
+ annotation_rel_path = item.filename[1:]
705
+ if img_filepath is not None:
706
+ dir_name = os.path.dirname(annotation_rel_path)
707
+ base_name = os.path.basename(img_filepath)
708
+ annotation_rel_path = os.path.join(dir_name, base_name)
709
+
710
+ # find annotations json
711
+ annotations_json_filepath = os.path.join(local_path, "json", annotation_rel_path)
712
+ if export_version == entities.ExportVersion.V1:
713
+ name, _ = os.path.splitext(annotations_json_filepath)
714
+ else:
715
+ name = annotations_json_filepath
716
+ annotations_json_filepath = name + ".json"
717
+
718
+ if os.path.isfile(annotations_json_filepath) and annotation_filters is None:
719
+ # if exists take from json file
720
+ with open(annotations_json_filepath, "r", encoding="utf8") as f:
721
+ data = json.load(f)
722
+ if "annotations" in data:
723
+ data = data["annotations"]
724
+ annotations = entities.AnnotationCollection.from_json(_json=data, item=item)
725
+ # no need to use the filters here because the annotations were already downloaded with annotation_filters
726
+ else:
727
+ # if json file doesnt exist get the annotations from platform
728
+ annotations = item.annotations.list(filters=annotation_filters)
729
+
730
+ # get image shape
731
+ is_url_item = item.metadata. \
732
+ get('system', dict()). \
733
+ get('shebang', dict()). \
734
+ get('linkInfo', dict()). \
735
+ get('type', None) == 'url'
736
+
737
+ if item is not None:
738
+ orientation = item.system.get('exif', {}).get('Orientation', 0)
739
+ else:
740
+ orientation = 0
741
+ if item.width is not None and item.height is not None:
742
+ if orientation in [5, 6, 7, 8]:
743
+ img_shape = (item.width, item.height)
744
+ else:
745
+ img_shape = (item.height, item.width)
746
+ elif ('image' in item.mimetype and img_filepath is not None) or \
747
+ (is_url_item and img_filepath is not None):
748
+ img_shape = Image.open(img_filepath).size[::-1]
749
+ else:
750
+ img_shape = (0, 0)
751
+
752
+ # download all annotation options
753
+ for option in annotation_options:
754
+ # get path and create dirs
755
+ annotation_filepath = os.path.join(local_path, option, annotation_rel_path)
756
+ if not os.path.isdir(os.path.dirname(annotation_filepath)):
757
+ os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
758
+
759
+ if export_version == entities.ExportVersion.V1:
760
+ temp_path, ext = os.path.splitext(annotation_filepath)
761
+ else:
762
+ temp_path = annotation_filepath
763
+
764
+ if option == entities.ViewAnnotationOptions.JSON:
765
+ if not os.path.isfile(annotations_json_filepath):
766
+ annotations.download(
767
+ filepath=annotations_json_filepath,
768
+ annotation_format=option,
769
+ height=img_shape[0],
770
+ width=img_shape[1],
771
+ )
772
+ elif option in [entities.ViewAnnotationOptions.MASK,
773
+ entities.ViewAnnotationOptions.INSTANCE,
774
+ entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE,
775
+ entities.ViewAnnotationOptions.OBJECT_ID,
776
+ entities.ViewAnnotationOptions.VTT]:
777
+ if option == entities.ViewAnnotationOptions.VTT:
778
+ annotation_filepath = temp_path + ".vtt"
779
+ else:
780
+ if 'video' in item.mimetype:
781
+ annotation_filepath = temp_path + ".mp4"
782
+ else:
783
+ annotation_filepath = temp_path + ".png"
784
+ if not os.path.isfile(annotation_filepath) or overwrite:
785
+ # if not exists OR (exists AND overwrite)
786
+ if not os.path.exists(os.path.dirname(annotation_filepath)):
787
+ # create folder if not exists
788
+ os.makedirs(os.path.dirname(annotation_filepath), exist_ok=True)
789
+ if option == entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE and img_filepath is None:
790
+ raise PlatformException(
791
+ error="1002",
792
+ message="Missing image for annotation option dl.ViewAnnotationOptions.ANNOTATION_ON_IMAGE")
793
+ annotations.download(
794
+ filepath=annotation_filepath,
795
+ img_filepath=img_filepath,
796
+ annotation_format=option,
797
+ height=img_shape[0],
798
+ width=img_shape[1],
799
+ thickness=thickness,
800
+ alpha=alpha,
801
+ with_text=with_text,
802
+ orientation=orientation
803
+ )
804
+ else:
805
+ raise PlatformException(error="400", message="Unknown annotation option: {}".format(option))
806
+
807
+ @staticmethod
808
+ def __get_local_filepath(local_path, item, to_items_folder, without_relative_path=None, is_folder=False):
809
+ # create paths
810
+ _, ext = os.path.splitext(local_path)
811
+ if ext and not is_folder:
812
+ # local_path is a filename
813
+ local_filepath = local_path
814
+ local_path = os.path.dirname(local_filepath)
815
+ else:
816
+ # if directory - get item's filename
817
+ if to_items_folder:
818
+ local_path = os.path.join(local_path, "items")
819
+ elif is_folder:
820
+ local_path = os.path.join(local_path, "")
821
+ if without_relative_path is not None:
822
+ local_filepath = os.path.join(local_path, item.name)
823
+ else:
824
+ local_filepath = os.path.join(local_path, item.filename[1:])
825
+ return local_path, local_filepath
826
+
827
+ @staticmethod
828
+ def __get_link_source(item):
829
+ assert isinstance(item, entities.Item)
830
+ is_url = False
831
+ url = item.resolved_stream
832
+ if item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'url':
833
+ is_url = True
834
+
835
+ return item, url, is_url, url.startswith('file://')
836
+
837
+ def __file_validation(self, item, downloaded_file):
838
+ res = False
839
+ resume = True
840
+ if isinstance(downloaded_file, io.BytesIO):
841
+ file_size = downloaded_file.getbuffer().nbytes
842
+ else:
843
+ file_size = os.stat(downloaded_file).st_size
844
+ expected_size = item.metadata['system']['size']
845
+ size_diff = file_size - expected_size
846
+ if size_diff == 0:
847
+ res = True
848
+ if size_diff > 0:
849
+ resume = False
850
+ return res, file_size, resume
851
+
852
+ def __thread_download(self,
853
+ item,
854
+ save_locally,
855
+ local_path,
856
+ to_array,
857
+ local_filepath,
858
+ overwrite,
859
+ annotation_options,
860
+ annotation_filters,
861
+ chunk_size=8192,
862
+ thickness=1,
863
+ with_text=False,
864
+ alpha=1,
865
+ export_version=entities.ExportVersion.V1
866
+ ):
867
+ """
868
+ Get a single item's binary data
869
+ Calling this method will returns the item body itself , an image for example with the proper mimetype.
870
+
871
+ :param item: Item entity to download
872
+ :param save_locally: bool. save to file or return buffer
873
+ :param local_path: item local folder to save to.
874
+ :param to_array: returns Ndarray when True and local_path = False
875
+ :param local_filepath: item local filepath
876
+ :param overwrite: overwrite the file is existing
877
+ :param annotation_options: download annotations options: list(dl.ViewAnnotationOptions)
878
+ :param annotation_filters: Filters entity to filter item's annotation
879
+ :param chunk_size: size of chunks to download - optional. default = 8192
880
+ :param thickness: optional - line thickness, if -1 annotation will be filled, default =1
881
+ :param with_text: optional - add text to annotations, default = False
882
+ :param alpha: opacity value [0 1], default 1
883
+ :param ExportVersion export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
884
+ :return:
885
+ """
886
+ # check if need to download image binary from platform
887
+ need_to_download = True
888
+ if save_locally and os.path.isfile(local_filepath):
889
+ need_to_download = overwrite
890
+
891
+ item, url, is_url, is_local_link = self.__get_link_source(item=item)
892
+
893
+ # save as byte stream
894
+ data = io.BytesIO()
895
+ if need_to_download:
896
+ chunk_resume = {0: 0}
897
+ start_point = 0
898
+ download_done = False
899
+ while chunk_resume.get(start_point, '') != 3 and not download_done:
900
+ if not is_url:
901
+ headers = {'x-dl-sanitize': '0', 'Range': 'bytes={}-'.format(start_point)}
902
+ result, response = self.items_repository._client_api.gen_request(req_type="get",
903
+ headers=headers,
904
+ path="/items/{}/stream".format(
905
+ item.id),
906
+ stream=True,
907
+ dataset_id=item.dataset_id)
908
+ if not result:
909
+ if os.path.isfile(local_filepath + '.download'):
910
+ os.remove(local_filepath + '.download')
911
+ raise PlatformException(response)
912
+ else:
913
+ _, ext = os.path.splitext(item.metadata['system']['shebang']['linkInfo']['ref'].split('?')[0])
914
+ if local_filepath:
915
+ local_filepath += ext
916
+ response = self.get_url_stream(url=url)
917
+
918
+ if save_locally:
919
+ # save to file
920
+ if not os.path.exists(os.path.dirname(local_filepath)):
921
+ # create folder if not exists
922
+ os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
923
+
924
+ # decide if create progress bar for item
925
+ if not is_local_link:
926
+ total_length = response.headers.get("content-length")
927
+ else:
928
+ response.seek(0, 2)
929
+ total_length = response.tell()
930
+ response.seek(0)
931
+ one_file_pbar = None
932
+ try:
933
+ one_file_progress_bar = total_length is not None and int(
934
+ total_length) > 10e6 # size larger than 10 MB
935
+ if one_file_progress_bar:
936
+ one_file_pbar = tqdm.tqdm(total=int(total_length),
937
+ unit='B',
938
+ unit_scale=True,
939
+ unit_divisor=1024,
940
+ position=1,
941
+ file=sys.stdout,
942
+ disable=self.items_repository._client_api.verbose.disable_progress_bar_download_item,
943
+ desc='Download Item')
944
+ except Exception as err:
945
+ one_file_progress_bar = False
946
+ logger.debug('Cant decide downloaded file length, bar will not be presented: {}'.format(err))
947
+
948
+ # start download
949
+ if self.items_repository._client_api.sdk_cache.use_cache and \
950
+ self.items_repository._client_api.cache is not None:
951
+ response_output = os.path.normpath(response.content)
952
+ if isinstance(response_output, bytes):
953
+ response_output = response_output.decode('utf-8')[1:-1]
954
+
955
+ if os.path.isfile(os.path.normpath(response_output)):
956
+ if response_output != local_filepath:
957
+ source_path = os.path.normpath(response_output)
958
+ shutil.copyfile(source_path, local_filepath)
959
+ download_done = True
960
+ else:
961
+ try:
962
+ temp_file_path = local_filepath + '.download'
963
+ with open(temp_file_path, "ab") as f:
964
+ try:
965
+ if is_local_link and isinstance(response, io.BufferedReader):
966
+ generator = iter(lambda: response.read(chunk_size), b'')
967
+ else:
968
+ generator = response.iter_content(chunk_size=chunk_size)
969
+ for chunk in generator:
970
+ if chunk: # filter out keep-alive new chunks
971
+ f.write(chunk)
972
+ if one_file_progress_bar:
973
+ one_file_pbar.update(len(chunk))
974
+ except Exception as err:
975
+ pass
976
+ finally:
977
+ if is_local_link and isinstance(response, io.BufferedReader):
978
+ try:
979
+ response.close()
980
+ except Exception as err:
981
+ pass
982
+
983
+ file_validation = True
984
+ if not is_url:
985
+ file_validation, start_point, chunk_resume = self.__get_next_chunk(
986
+ item=item,
987
+ download_progress=temp_file_path,
988
+ chunk_resume=chunk_resume
989
+ )
990
+ if file_validation:
991
+ shutil.move(temp_file_path, local_filepath)
992
+ download_done = True
993
+ except Exception as err:
994
+ if os.path.isfile(temp_file_path):
995
+ os.remove(temp_file_path)
996
+ raise err
997
+ if one_file_progress_bar:
998
+ one_file_pbar.close()
999
+ # save to output variable
1000
+ data = local_filepath
1001
+ # if image - can download annotation mask
1002
+ if item.annotated and annotation_options:
1003
+ self._download_img_annotations(item=item,
1004
+ img_filepath=local_filepath,
1005
+ annotation_options=annotation_options,
1006
+ annotation_filters=annotation_filters,
1007
+ local_path=local_path,
1008
+ overwrite=overwrite,
1009
+ thickness=thickness,
1010
+ alpha=alpha,
1011
+ with_text=with_text,
1012
+ export_version=export_version
1013
+ )
1014
+ else:
1015
+ if self.items_repository._client_api.sdk_cache.use_cache and \
1016
+ self.items_repository._client_api.cache is not None:
1017
+ response_output = os.path.normpath(response.content)
1018
+ if isinstance(response_output, bytes):
1019
+ response_output = response_output.decode('utf-8')[1:-1]
1020
+
1021
+ if os.path.isfile(response_output):
1022
+ source_file = response_output
1023
+ with open(source_file, 'wb') as f:
1024
+ data = f.read()
1025
+ download_done = True
1026
+ else:
1027
+ try:
1028
+ if is_local_link and isinstance(response, io.BufferedReader):
1029
+ generator = iter(lambda: response.read(chunk_size), b'')
1030
+ else:
1031
+ generator = response.iter_content(chunk_size=chunk_size)
1032
+ for chunk in generator:
1033
+ if chunk: # filter out keep-alive new chunks
1034
+ data.write(chunk)
1035
+
1036
+ file_validation = True
1037
+ if not is_url:
1038
+ file_validation, start_point, chunk_resume = self.__get_next_chunk(item=item,
1039
+ download_progress=data,
1040
+ chunk_resume=chunk_resume)
1041
+ if file_validation:
1042
+ download_done = True
1043
+ else:
1044
+ continue
1045
+ except Exception as err:
1046
+ raise err
1047
+ finally:
1048
+ if is_local_link and isinstance(response, io.BufferedReader):
1049
+ try:
1050
+ response.close()
1051
+ except Exception as err:
1052
+ pass
1053
+ # go back to the beginning of the stream
1054
+ data.seek(0)
1055
+ data.name = item.name
1056
+ if not save_locally and to_array:
1057
+ if 'image' not in item.mimetype and not is_url:
1058
+ raise PlatformException(
1059
+ error="400",
1060
+ message='Download element type numpy.ndarray support for image only. '
1061
+ 'Item Id: {} is {} type'.format(item.id, item.mimetype))
1062
+
1063
+ data = np.array(Image.open(data))
1064
+ else:
1065
+ data = local_filepath
1066
+ return data
1067
+
1068
+ def __get_next_chunk(self, item, download_progress, chunk_resume):
1069
+ size_validation, file_size, resume = self.__file_validation(item=item,
1070
+ downloaded_file=download_progress)
1071
+ start_point = file_size
1072
+ if not size_validation:
1073
+ if chunk_resume.get(start_point, None) is None:
1074
+ chunk_resume = {start_point: 1}
1075
+ else:
1076
+ chunk_resume[start_point] += 1
1077
+ if chunk_resume[start_point] == 3 or not resume:
1078
+ raise PlatformException(
1079
+ error=500,
1080
+ message='The downloaded file is corrupted. Please try again. If the issue repeats please contact support.')
1081
+ return size_validation, start_point, chunk_resume
1082
+
1083
+ def __default_local_path(self):
1084
+
1085
+ # create default local path
1086
+ if self.items_repository._dataset is None:
1087
+ local_path = os.path.join(
1088
+ self.items_repository._client_api.sdk_cache.cache_path_bin,
1089
+ "items",
1090
+ )
1091
+ else:
1092
+ if self.items_repository.dataset._project is None:
1093
+ # by dataset name
1094
+ local_path = os.path.join(
1095
+ self.items_repository._client_api.sdk_cache.cache_path_bin,
1096
+ "datasets",
1097
+ "{}_{}".format(self.items_repository.dataset.name, self.items_repository.dataset.id),
1098
+ )
1099
+ else:
1100
+ # by dataset and project name
1101
+ local_path = os.path.join(
1102
+ self.items_repository._client_api.sdk_cache.cache_path_bin,
1103
+ "projects",
1104
+ self.items_repository.dataset.project.name,
1105
+ "datasets",
1106
+ self.items_repository.dataset.name,
1107
+ )
1108
+ logger.info("Downloading to: {}".format(local_path))
1109
+ return local_path
1110
+
1111
+ @staticmethod
1112
+ def get_url_stream(url):
1113
+ """
1114
+ :param url:
1115
+ """
1116
+ response = None
1117
+
1118
+ if url.startswith('file://'):
1119
+ parsed = urlparse(url)
1120
+ path = unquote(parsed.path)
1121
+ if parsed.netloc:
1122
+ path = f"/{parsed.netloc}{path}"
1123
+ path = Path(path).expanduser().resolve()
1124
+
1125
+ if not path.exists():
1126
+ raise PlatformException(
1127
+ error='404',
1128
+ message=f'Local file not found: {url}'
1129
+ )
1130
+ if not path.is_file():
1131
+ raise PlatformException(
1132
+ error='400',
1133
+ message=f'Path is not a file: {url}'
1134
+ )
1135
+
1136
+ try:
1137
+ response = io.BufferedReader(io.FileIO(path, 'rb'))
1138
+ except PermissionError as e:
1139
+ raise PlatformException(
1140
+ error='403',
1141
+ message=f'Permission denied accessing file: {url}'
1142
+ ) from e
1143
+ else:
1144
+ prepared_request = requests.Request(method='GET', url=url).prepare()
1145
+ with requests.Session() as s:
1146
+ retry = Retry(
1147
+ total=3,
1148
+ read=3,
1149
+ connect=3,
1150
+ backoff_factor=1,
1151
+ )
1152
+ adapter = HTTPAdapter(max_retries=retry)
1153
+ s.mount('http://', adapter)
1154
+ s.mount('https://', adapter)
1155
+ response = s.send(request=prepared_request, stream=True)
1156
+
1157
+ return response