dtlpy 1.113.10__py3-none-any.whl → 1.114.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. dtlpy/__init__.py +488 -488
  2. dtlpy/__version__.py +1 -1
  3. dtlpy/assets/__init__.py +26 -26
  4. dtlpy/assets/__pycache__/__init__.cpython-38.pyc +0 -0
  5. dtlpy/assets/code_server/config.yaml +2 -2
  6. dtlpy/assets/code_server/installation.sh +24 -24
  7. dtlpy/assets/code_server/launch.json +13 -13
  8. dtlpy/assets/code_server/settings.json +2 -2
  9. dtlpy/assets/main.py +53 -53
  10. dtlpy/assets/main_partial.py +18 -18
  11. dtlpy/assets/mock.json +11 -11
  12. dtlpy/assets/model_adapter.py +83 -83
  13. dtlpy/assets/package.json +61 -61
  14. dtlpy/assets/package_catalog.json +29 -29
  15. dtlpy/assets/package_gitignore +307 -307
  16. dtlpy/assets/service_runners/__init__.py +33 -33
  17. dtlpy/assets/service_runners/converter.py +96 -96
  18. dtlpy/assets/service_runners/multi_method.py +49 -49
  19. dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
  20. dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
  21. dtlpy/assets/service_runners/multi_method_item.py +52 -52
  22. dtlpy/assets/service_runners/multi_method_json.py +52 -52
  23. dtlpy/assets/service_runners/single_method.py +37 -37
  24. dtlpy/assets/service_runners/single_method_annotation.py +43 -43
  25. dtlpy/assets/service_runners/single_method_dataset.py +43 -43
  26. dtlpy/assets/service_runners/single_method_item.py +41 -41
  27. dtlpy/assets/service_runners/single_method_json.py +42 -42
  28. dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
  29. dtlpy/assets/voc_annotation_template.xml +23 -23
  30. dtlpy/caches/base_cache.py +32 -32
  31. dtlpy/caches/cache.py +473 -473
  32. dtlpy/caches/dl_cache.py +201 -201
  33. dtlpy/caches/filesystem_cache.py +89 -89
  34. dtlpy/caches/redis_cache.py +84 -84
  35. dtlpy/dlp/__init__.py +20 -20
  36. dtlpy/dlp/cli_utilities.py +367 -367
  37. dtlpy/dlp/command_executor.py +764 -764
  38. dtlpy/dlp/dlp +1 -1
  39. dtlpy/dlp/dlp.bat +1 -1
  40. dtlpy/dlp/dlp.py +128 -128
  41. dtlpy/dlp/parser.py +651 -651
  42. dtlpy/entities/__init__.py +83 -83
  43. dtlpy/entities/analytic.py +311 -311
  44. dtlpy/entities/annotation.py +1879 -1879
  45. dtlpy/entities/annotation_collection.py +699 -699
  46. dtlpy/entities/annotation_definitions/__init__.py +20 -20
  47. dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
  48. dtlpy/entities/annotation_definitions/box.py +195 -195
  49. dtlpy/entities/annotation_definitions/classification.py +67 -67
  50. dtlpy/entities/annotation_definitions/comparison.py +72 -72
  51. dtlpy/entities/annotation_definitions/cube.py +204 -204
  52. dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
  53. dtlpy/entities/annotation_definitions/description.py +32 -32
  54. dtlpy/entities/annotation_definitions/ellipse.py +124 -124
  55. dtlpy/entities/annotation_definitions/free_text.py +62 -62
  56. dtlpy/entities/annotation_definitions/gis.py +69 -69
  57. dtlpy/entities/annotation_definitions/note.py +139 -139
  58. dtlpy/entities/annotation_definitions/point.py +117 -117
  59. dtlpy/entities/annotation_definitions/polygon.py +182 -182
  60. dtlpy/entities/annotation_definitions/polyline.py +111 -111
  61. dtlpy/entities/annotation_definitions/pose.py +92 -92
  62. dtlpy/entities/annotation_definitions/ref_image.py +86 -86
  63. dtlpy/entities/annotation_definitions/segmentation.py +240 -240
  64. dtlpy/entities/annotation_definitions/subtitle.py +34 -34
  65. dtlpy/entities/annotation_definitions/text.py +85 -85
  66. dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
  67. dtlpy/entities/app.py +220 -220
  68. dtlpy/entities/app_module.py +107 -107
  69. dtlpy/entities/artifact.py +174 -174
  70. dtlpy/entities/assignment.py +399 -399
  71. dtlpy/entities/base_entity.py +214 -214
  72. dtlpy/entities/bot.py +113 -113
  73. dtlpy/entities/codebase.py +296 -296
  74. dtlpy/entities/collection.py +38 -38
  75. dtlpy/entities/command.py +169 -169
  76. dtlpy/entities/compute.py +442 -442
  77. dtlpy/entities/dataset.py +1285 -1285
  78. dtlpy/entities/directory_tree.py +44 -44
  79. dtlpy/entities/dpk.py +470 -470
  80. dtlpy/entities/driver.py +222 -222
  81. dtlpy/entities/execution.py +397 -397
  82. dtlpy/entities/feature.py +124 -124
  83. dtlpy/entities/feature_set.py +145 -145
  84. dtlpy/entities/filters.py +641 -641
  85. dtlpy/entities/gis_item.py +107 -107
  86. dtlpy/entities/integration.py +184 -184
  87. dtlpy/entities/item.py +953 -953
  88. dtlpy/entities/label.py +123 -123
  89. dtlpy/entities/links.py +85 -85
  90. dtlpy/entities/message.py +175 -175
  91. dtlpy/entities/model.py +694 -691
  92. dtlpy/entities/node.py +1005 -1005
  93. dtlpy/entities/ontology.py +803 -803
  94. dtlpy/entities/organization.py +287 -287
  95. dtlpy/entities/package.py +657 -657
  96. dtlpy/entities/package_defaults.py +5 -5
  97. dtlpy/entities/package_function.py +185 -185
  98. dtlpy/entities/package_module.py +113 -113
  99. dtlpy/entities/package_slot.py +118 -118
  100. dtlpy/entities/paged_entities.py +290 -267
  101. dtlpy/entities/pipeline.py +593 -593
  102. dtlpy/entities/pipeline_execution.py +279 -279
  103. dtlpy/entities/project.py +394 -394
  104. dtlpy/entities/prompt_item.py +499 -499
  105. dtlpy/entities/recipe.py +301 -301
  106. dtlpy/entities/reflect_dict.py +102 -102
  107. dtlpy/entities/resource_execution.py +138 -138
  108. dtlpy/entities/service.py +958 -958
  109. dtlpy/entities/service_driver.py +117 -117
  110. dtlpy/entities/setting.py +294 -294
  111. dtlpy/entities/task.py +491 -491
  112. dtlpy/entities/time_series.py +143 -143
  113. dtlpy/entities/trigger.py +426 -426
  114. dtlpy/entities/user.py +118 -118
  115. dtlpy/entities/webhook.py +124 -124
  116. dtlpy/examples/__init__.py +19 -19
  117. dtlpy/examples/add_labels.py +135 -135
  118. dtlpy/examples/add_metadata_to_item.py +21 -21
  119. dtlpy/examples/annotate_items_using_model.py +65 -65
  120. dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
  121. dtlpy/examples/annotations_convert_to_voc.py +9 -9
  122. dtlpy/examples/annotations_convert_to_yolo.py +9 -9
  123. dtlpy/examples/convert_annotation_types.py +51 -51
  124. dtlpy/examples/converter.py +143 -143
  125. dtlpy/examples/copy_annotations.py +22 -22
  126. dtlpy/examples/copy_folder.py +31 -31
  127. dtlpy/examples/create_annotations.py +51 -51
  128. dtlpy/examples/create_video_annotations.py +83 -83
  129. dtlpy/examples/delete_annotations.py +26 -26
  130. dtlpy/examples/filters.py +113 -113
  131. dtlpy/examples/move_item.py +23 -23
  132. dtlpy/examples/play_video_annotation.py +13 -13
  133. dtlpy/examples/show_item_and_mask.py +53 -53
  134. dtlpy/examples/triggers.py +49 -49
  135. dtlpy/examples/upload_batch_of_items.py +20 -20
  136. dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
  137. dtlpy/examples/upload_items_with_modalities.py +43 -43
  138. dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
  139. dtlpy/examples/upload_yolo_format_annotations.py +70 -70
  140. dtlpy/exceptions.py +125 -125
  141. dtlpy/miscellaneous/__init__.py +20 -20
  142. dtlpy/miscellaneous/dict_differ.py +95 -95
  143. dtlpy/miscellaneous/git_utils.py +217 -217
  144. dtlpy/miscellaneous/json_utils.py +14 -14
  145. dtlpy/miscellaneous/list_print.py +105 -105
  146. dtlpy/miscellaneous/zipping.py +130 -130
  147. dtlpy/ml/__init__.py +20 -20
  148. dtlpy/ml/base_feature_extractor_adapter.py +27 -27
  149. dtlpy/ml/base_model_adapter.py +945 -940
  150. dtlpy/ml/metrics.py +461 -461
  151. dtlpy/ml/predictions_utils.py +274 -274
  152. dtlpy/ml/summary_writer.py +57 -57
  153. dtlpy/ml/train_utils.py +60 -60
  154. dtlpy/new_instance.py +252 -252
  155. dtlpy/repositories/__init__.py +56 -56
  156. dtlpy/repositories/analytics.py +85 -85
  157. dtlpy/repositories/annotations.py +916 -916
  158. dtlpy/repositories/apps.py +383 -383
  159. dtlpy/repositories/artifacts.py +452 -452
  160. dtlpy/repositories/assignments.py +599 -599
  161. dtlpy/repositories/bots.py +213 -213
  162. dtlpy/repositories/codebases.py +559 -559
  163. dtlpy/repositories/collections.py +332 -348
  164. dtlpy/repositories/commands.py +158 -158
  165. dtlpy/repositories/compositions.py +61 -61
  166. dtlpy/repositories/computes.py +434 -406
  167. dtlpy/repositories/datasets.py +1291 -1291
  168. dtlpy/repositories/downloader.py +895 -895
  169. dtlpy/repositories/dpks.py +433 -433
  170. dtlpy/repositories/drivers.py +266 -266
  171. dtlpy/repositories/executions.py +817 -817
  172. dtlpy/repositories/feature_sets.py +226 -226
  173. dtlpy/repositories/features.py +238 -238
  174. dtlpy/repositories/integrations.py +484 -484
  175. dtlpy/repositories/items.py +909 -915
  176. dtlpy/repositories/messages.py +94 -94
  177. dtlpy/repositories/models.py +877 -867
  178. dtlpy/repositories/nodes.py +80 -80
  179. dtlpy/repositories/ontologies.py +511 -511
  180. dtlpy/repositories/organizations.py +525 -525
  181. dtlpy/repositories/packages.py +1941 -1941
  182. dtlpy/repositories/pipeline_executions.py +448 -448
  183. dtlpy/repositories/pipelines.py +642 -642
  184. dtlpy/repositories/projects.py +539 -539
  185. dtlpy/repositories/recipes.py +399 -399
  186. dtlpy/repositories/resource_executions.py +137 -137
  187. dtlpy/repositories/schema.py +120 -120
  188. dtlpy/repositories/service_drivers.py +213 -213
  189. dtlpy/repositories/services.py +1704 -1704
  190. dtlpy/repositories/settings.py +339 -339
  191. dtlpy/repositories/tasks.py +1124 -1124
  192. dtlpy/repositories/times_series.py +278 -278
  193. dtlpy/repositories/triggers.py +536 -536
  194. dtlpy/repositories/upload_element.py +257 -257
  195. dtlpy/repositories/uploader.py +651 -651
  196. dtlpy/repositories/webhooks.py +249 -249
  197. dtlpy/services/__init__.py +22 -22
  198. dtlpy/services/aihttp_retry.py +131 -131
  199. dtlpy/services/api_client.py +1782 -1782
  200. dtlpy/services/api_reference.py +40 -40
  201. dtlpy/services/async_utils.py +133 -133
  202. dtlpy/services/calls_counter.py +44 -44
  203. dtlpy/services/check_sdk.py +68 -68
  204. dtlpy/services/cookie.py +115 -115
  205. dtlpy/services/create_logger.py +156 -156
  206. dtlpy/services/events.py +84 -84
  207. dtlpy/services/logins.py +235 -235
  208. dtlpy/services/reporter.py +256 -256
  209. dtlpy/services/service_defaults.py +91 -91
  210. dtlpy/utilities/__init__.py +20 -20
  211. dtlpy/utilities/annotations/__init__.py +16 -16
  212. dtlpy/utilities/annotations/annotation_converters.py +269 -269
  213. dtlpy/utilities/base_package_runner.py +264 -264
  214. dtlpy/utilities/converter.py +1650 -1650
  215. dtlpy/utilities/dataset_generators/__init__.py +1 -1
  216. dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
  217. dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
  218. dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
  219. dtlpy/utilities/local_development/__init__.py +1 -1
  220. dtlpy/utilities/local_development/local_session.py +179 -179
  221. dtlpy/utilities/reports/__init__.py +2 -2
  222. dtlpy/utilities/reports/figures.py +343 -343
  223. dtlpy/utilities/reports/report.py +71 -71
  224. dtlpy/utilities/videos/__init__.py +17 -17
  225. dtlpy/utilities/videos/video_player.py +598 -598
  226. dtlpy/utilities/videos/videos.py +470 -470
  227. {dtlpy-1.113.10.data → dtlpy-1.114.13.data}/scripts/dlp +1 -1
  228. dtlpy-1.114.13.data/scripts/dlp.bat +2 -0
  229. {dtlpy-1.113.10.data → dtlpy-1.114.13.data}/scripts/dlp.py +128 -128
  230. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/LICENSE +200 -200
  231. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/METADATA +172 -172
  232. dtlpy-1.114.13.dist-info/RECORD +240 -0
  233. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/WHEEL +1 -1
  234. tests/features/environment.py +551 -550
  235. dtlpy-1.113.10.data/scripts/dlp.bat +0 -2
  236. dtlpy-1.113.10.dist-info/RECORD +0 -244
  237. tests/assets/__init__.py +0 -0
  238. tests/assets/models_flow/__init__.py +0 -0
  239. tests/assets/models_flow/failedmain.py +0 -52
  240. tests/assets/models_flow/main.py +0 -62
  241. tests/assets/models_flow/main_model.py +0 -54
  242. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/entry_points.txt +0 -0
  243. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/top_level.txt +0 -0
@@ -1,651 +1,651 @@
1
- import sys
2
- from collections import deque
3
- import validators
4
- import traceback
5
- import tempfile
6
- import requests
7
- import asyncio
8
- import logging
9
- import pandas
10
- import shutil
11
- import json
12
- import time
13
- import tqdm
14
- import os
15
- import io
16
- import numpy as np
17
- from requests.adapters import HTTPAdapter
18
- from urllib3.util import Retry
19
- from PIL import Image
20
-
21
- from . import upload_element
22
-
23
- from .. import PlatformException, entities, repositories, exceptions
24
- from ..services import Reporter
25
-
26
- logger = logging.getLogger(name='dtlpy')
27
-
28
- NUM_TRIES = 5 # try to upload 3 time before fail on item
29
-
30
-
31
- class Uploader:
32
- def __init__(self, items_repository: repositories.Items, output_entity=entities.Item, no_output=False):
33
- assert isinstance(items_repository, repositories.Items)
34
- self.items_repository = items_repository
35
- self.remote_url = "/datasets/{}/items".format(self.items_repository.dataset.id)
36
- self.__stop_create_existence_dict = False
37
- self.mode = 'skip'
38
- self.num_files = 0
39
- self.i_item = 0
40
- self.pbar = tqdm.tqdm(total=0,
41
- disable=self.items_repository._client_api.verbose.disable_progress_bar_upload_items,
42
- file=sys.stdout, desc='Upload Items')
43
- self.reporter = Reporter(num_workers=0,
44
- resource=Reporter.ITEMS_UPLOAD,
45
- print_error_logs=items_repository._client_api.verbose.print_error_logs,
46
- output_entity=output_entity,
47
- client_api=items_repository._client_api,
48
- no_output=no_output)
49
-
50
- def upload(
51
- self,
52
- # what to upload
53
- local_path,
54
- local_annotations_path=None,
55
- # upload options
56
- remote_path=None,
57
- remote_name=None,
58
- file_types=None,
59
- overwrite=False,
60
- item_metadata=None,
61
- export_version: str = entities.ExportVersion.V1,
62
- item_description=None,
63
- raise_on_error=False,
64
- return_as_list=False
65
- ):
66
- """
67
- Upload local file to dataset.
68
- Local filesystem will remain.
69
- If `*` at the end of local_path (e.g. '/images/*') items will be uploaded without head directory
70
-
71
- :param local_path: local file or folder to upload
72
- :param local_annotations_path: path to Dataloop format annotations json files.
73
- :param remote_path: remote path to save.
74
- :param remote_name: remote base name to save.
75
- :param file_types: list of file type to upload. e.g ['.jpg', '.png']. default is all
76
- :param overwrite: optional - default = False
77
- :param item_metadata: upload the items with the metadata dictionary
78
- :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
79
- :param str item_description: add a string description to the uploaded item
80
- :param bool raise_on_error: raise an exception if an error occurs
81
- :param bool return_as_list: always return a list of items
82
-
83
- :return: Output (list)
84
- """
85
- ###################
86
- # Default options #
87
- ###################
88
- if overwrite:
89
- self.mode = 'overwrite'
90
- if isinstance(local_path, pandas.DataFrame):
91
- futures = self._build_elements_from_df(local_path)
92
- else:
93
- futures = self._build_elements_from_inputs(local_path=local_path,
94
- local_annotations_path=local_annotations_path,
95
- # upload options
96
- remote_path=remote_path,
97
- remote_name=remote_name,
98
- file_types=file_types,
99
- item_metadata=item_metadata,
100
- export_version=export_version,
101
- item_description=item_description)
102
- num_files = len(futures)
103
- while futures:
104
- futures.popleft().result()
105
- logger.info("Uploading {} items..".format(num_files))
106
- self.pbar.close()
107
- # summary
108
- logger.info("Number of total files: {}".format(num_files))
109
- status_list = self.reporter.status_list
110
- for action in set(status_list):
111
- n_for_action = self.reporter.status_count(status=action)
112
- logger.info("Number of files {}: {}".format(action, n_for_action))
113
-
114
- # log error
115
- errors_count = self.reporter.failure_count
116
- if errors_count > 0:
117
- log_filepath = self.reporter.generate_log_files()
118
- if log_filepath is not None:
119
- logger.warning("Errors in {n_error} files. See {log_filepath} for full log".format(
120
- n_error=errors_count, log_filepath=log_filepath))
121
- if raise_on_error is True:
122
- raise PlatformException(error="400",
123
- message=f"Errors in {errors_count} files. See above trace for more information")
124
-
125
- if return_as_list is True:
126
- # return list of items
127
- return list(self.reporter.output)
128
- if len(status_list) == 1:
129
- # if there is only one item, return it
130
- try:
131
- return next(self.reporter.output)
132
- except StopIteration:
133
- # if there is no items, return None
134
- return None
135
- # if there are multiple items, return the generator
136
- return self.reporter.output
137
-
138
- def _build_elements_from_inputs(self,
139
- local_path,
140
- local_annotations_path,
141
- # upload options
142
- remote_path,
143
- file_types,
144
- remote_name,
145
- item_metadata,
146
- export_version: str = entities.ExportVersion.V1,
147
- item_description=None):
148
- # fix remote path
149
- if remote_path is None:
150
- if isinstance(local_path, str) and local_path.startswith('external://'):
151
- remote_path = None
152
- else:
153
- remote_path = "/"
154
- if remote_path and not remote_path.startswith('/'):
155
- remote_path = f"/{remote_path}"
156
- if remote_path and not remote_path.endswith("/"):
157
- remote_path = f"{remote_path}/"
158
-
159
- if remote_name:
160
- remote_name = remote_name.lstrip('/')
161
-
162
- if file_types is not None and not isinstance(file_types, list):
163
- msg = '"file_types" should be a list of file extension. e.g [".jpg", ".png"]'
164
- raise PlatformException(error="400", message=msg)
165
- if item_metadata is not None:
166
- if not isinstance(item_metadata, dict) and not isinstance(item_metadata, entities.ExportMetadata):
167
- msg = '"item_metadata" should be a metadata dictionary. Got type: {}'.format(type(item_metadata))
168
- raise PlatformException(error="400", message=msg)
169
- if item_description is not None:
170
- if not isinstance(item_description, str):
171
- msg = '"item_description" should be a string. Got type: {}'.format(type(item_description))
172
- raise PlatformException(error="400", message=msg)
173
-
174
- ##########################
175
- # Convert inputs to list #
176
- ##########################
177
- local_annotations_path_list = None
178
- remote_name_list = None
179
- if not isinstance(local_path, list):
180
- local_path_list = [local_path]
181
- if remote_name is not None:
182
- if not isinstance(remote_name, str):
183
- raise PlatformException(error="400",
184
- message='remote_name must be a string, got: {}'.format(type(remote_name)))
185
- remote_name_list = [remote_name]
186
- if local_annotations_path is not None:
187
- if not isinstance(local_annotations_path, str):
188
- raise PlatformException(error="400",
189
- message='local_annotations_path must be a string, got: {}'.format(
190
- type(local_annotations_path)))
191
- local_annotations_path_list = [local_annotations_path]
192
- else:
193
- local_path_list = local_path
194
- if remote_name is not None:
195
- if not isinstance(remote_name, list):
196
- raise PlatformException(error="400",
197
- message='remote_name must be a list, got: {}'.format(type(remote_name)))
198
- if not len(remote_name) == len(local_path_list):
199
- raise PlatformException(error="400",
200
- message='remote_name and local_path_list must be of same length. '
201
- 'Received: remote_name: {}, '
202
- 'local_path_list: {}'.format(len(remote_name),
203
- len(local_path_list)))
204
- remote_name_list = remote_name
205
- if local_annotations_path is not None:
206
- if not len(local_annotations_path) == len(local_path_list):
207
- raise PlatformException(error="400",
208
- message='local_annotations_path and local_path_list must be of same lenght.'
209
- ' Received: local_annotations_path: {}, '
210
- 'local_path_list: {}'.format(len(local_annotations_path),
211
- len(local_path_list)))
212
- local_annotations_path_list = local_annotations_path
213
-
214
- if local_annotations_path is None:
215
- local_annotations_path_list = [None] * len(local_path_list)
216
-
217
- if remote_name is None:
218
- remote_name_list = [None] * len(local_path_list)
219
-
220
- try:
221
- driver_path = self.items_repository.dataset.project.drivers.get(
222
- driver_id=self.items_repository.dataset.driver).path
223
- except Exception:
224
- driver_path = None
225
-
226
- futures = deque()
227
- total_size = 0
228
- for upload_item_element, remote_name, upload_annotations_element in zip(local_path_list,
229
- remote_name_list,
230
- local_annotations_path_list):
231
- if isinstance(upload_item_element, np.ndarray):
232
- # convert numpy.ndarray to io.BytesI
233
- if remote_name is None:
234
- raise PlatformException(
235
- error="400",
236
- message='Upload element type was numpy.ndarray. providing param "remote_name" is mandatory')
237
- file_extension = os.path.splitext(remote_name)
238
- if file_extension[1].lower() in ['.jpg', '.jpeg']:
239
- item_format = 'JPEG'
240
- elif file_extension[1].lower() == '.png':
241
- item_format = 'PNG'
242
- else:
243
- raise PlatformException(
244
- error="400",
245
- message='"remote_name" with .jpg/.jpeg or .png extension are supported '
246
- 'when upload element of numpy.ndarray type.')
247
-
248
- buffer = io.BytesIO()
249
- Image.fromarray(upload_item_element).save(buffer, format=item_format)
250
- buffer.seek(0)
251
- buffer.name = remote_name
252
- upload_item_element = buffer
253
-
254
- all_upload_elements = {
255
- 'upload_item_element': upload_item_element,
256
- 'total_size': total_size,
257
- 'remote_name': remote_name,
258
- 'remote_path': remote_path,
259
- 'upload_annotations_element': upload_annotations_element,
260
- 'item_metadata': item_metadata,
261
- 'annotations_filepath': None,
262
- 'with_head_folder': None,
263
- 'filename': None,
264
- 'root': None,
265
- 'export_version': export_version,
266
- 'item_description': item_description,
267
- 'driver_path': driver_path
268
- }
269
- if isinstance(upload_item_element, str):
270
- with_head_folder = True
271
- if upload_item_element.endswith('*'):
272
- with_head_folder = False
273
- upload_item_element = os.path.dirname(upload_item_element)
274
- all_upload_elements['upload_item_element'] = upload_item_element
275
-
276
- if os.path.isdir(upload_item_element):
277
- for root, subdirs, files in os.walk(upload_item_element):
278
- for filename in files:
279
- all_upload_elements['with_head_folder'] = with_head_folder
280
- all_upload_elements['filename'] = filename
281
- all_upload_elements['root'] = root
282
- _, ext = os.path.splitext(filename)
283
- if file_types is None or ext in file_types:
284
- upload_elem = upload_element.DirUploadElement(all_upload_elements=all_upload_elements)
285
- futures.append(self.upload_single_element(upload_elem))
286
- continue
287
-
288
- # add single file
289
- elif os.path.isfile(upload_item_element):
290
- upload_elem = upload_element.FileUploadElement(all_upload_elements=all_upload_elements)
291
-
292
- elif upload_item_element.startswith('external://'):
293
- upload_elem = upload_element.ExternalItemUploadElement(all_upload_elements=all_upload_elements)
294
-
295
- elif self.is_url(upload_item_element):
296
- upload_elem = upload_element.UrlUploadElement(all_upload_elements=all_upload_elements)
297
-
298
- else:
299
- raise PlatformException("404", "Unknown local path: {}".format(local_path))
300
-
301
- elif isinstance(upload_item_element, entities.Item):
302
- upload_elem = upload_element.ItemLinkUploadElement(all_upload_elements=all_upload_elements)
303
-
304
- elif isinstance(upload_item_element, entities.Link):
305
- upload_elem = upload_element.LinkUploadElement(all_upload_elements=all_upload_elements)
306
-
307
- elif isinstance(upload_item_element, entities.PromptItem):
308
- upload_elem = upload_element.PromptUploadElement(all_upload_elements=all_upload_elements)
309
-
310
- elif isinstance(upload_item_element, entities.ItemGis):
311
- buffer = io.BytesIO(json.dumps(upload_item_element.to_json()).encode('utf-8'))
312
- buffer.name = upload_item_element.name
313
- all_upload_elements['upload_item_element'] = buffer
314
- upload_elem = upload_element.BinaryUploadElement(all_upload_elements=all_upload_elements)
315
-
316
- elif isinstance(upload_item_element, bytes) or \
317
- isinstance(upload_item_element, io.BytesIO) or \
318
- isinstance(upload_item_element, io.BufferedReader) or \
319
- isinstance(upload_item_element, io.TextIOWrapper):
320
- upload_elem = upload_element.BinaryUploadElement(all_upload_elements=all_upload_elements)
321
- # get size from binaries
322
- try:
323
- total_size += upload_item_element.__sizeof__()
324
- except Exception:
325
- logger.warning("Cant get binaries size")
326
-
327
- else:
328
- raise PlatformException(
329
- error="400",
330
- message=f"Unknown element type to upload ('local_path'). received type: {type(upload_item_element)}. "
331
- "known types (or list of those types): str (dir, file, url), bytes, io.BytesIO, "
332
- "numpy.ndarray, io.TextIOWrapper, Dataloop.Item, Dataloop.Link")
333
-
334
- futures.append(self.upload_single_element(upload_elem))
335
- return futures
336
-
337
- def upload_single_element(self, elem):
338
- """
339
- upload a signal element
340
- :param elem: UploadElement
341
- """
342
- self.num_files += 1
343
- self.i_item += 1
344
- self.pbar.total += 1
345
- self.reporter.upcount_num_workers()
346
- future = asyncio.run_coroutine_threadsafe(
347
- self.__upload_single_item_wrapper(element=elem,
348
- mode=self.mode,
349
- pbar=self.pbar,
350
- reporter=self.reporter),
351
- loop=self.items_repository._client_api.event_loop.loop)
352
- return future
353
-
354
- def _build_elements_from_df(self, df: pandas.DataFrame):
355
- futures = deque()
356
- for index, row in df.iterrows():
357
- # DEFAULTS
358
- elem = {'local_annotations_path': None,
359
- 'remote_path': None,
360
- 'remote_name': None,
361
- 'file_types': None,
362
- 'item_metadata': None,
363
- 'item_description': None}
364
- elem.update(row)
365
- future = self._build_elements_from_inputs(**elem)
366
- # append deque using +
367
- futures += future
368
- return futures
369
-
370
- async def __single_external_sync(self, element):
371
- storage_id = element.buffer.split('//')[1]
372
- req_json = dict()
373
- req_json['filename'] = element.remote_filepath
374
- req_json['storageId'] = storage_id
375
- success, response = self.items_repository._client_api.gen_request(req_type='post',
376
- path='/datasets/{}/imports'.format(
377
- self.items_repository.dataset.id),
378
- json_req=[req_json])
379
-
380
- if success:
381
- items = entities.Item.from_json(client_api=self.items_repository._client_api, _json=response.json()[0],
382
- project=self.items_repository._dataset._project,
383
- dataset=self.items_repository.dataset)
384
- else:
385
- raise exceptions.PlatformException(response)
386
- return items, response.headers.get('x-item-op', 'na')
387
-
388
- async def __single_async_upload(self,
389
- filepath,
390
- remote_path,
391
- uploaded_filename,
392
- last_try,
393
- mode,
394
- item_metadata,
395
- callback,
396
- item_description
397
- ):
398
- """
399
- Upload an item to dataset
400
-
401
- :param filepath: local filepath of the item
402
- :param remote_path: remote directory of filepath to upload
403
- :param uploaded_filename: optional - remote filename
404
- :param last_try: print log error only if last try
405
- :param mode: 'skip' 'overwrite'
406
- :param item_metadata: item metadata
407
- :param str item_description: add a string description to the uploaded item
408
- :param callback:
409
- :return: Item object
410
- """
411
-
412
- need_close = False
413
- if isinstance(filepath, str):
414
- # upload local file
415
- if not os.path.isfile(filepath):
416
- raise PlatformException(error="404", message="Filepath doesnt exists. file: {}".format(filepath))
417
- if uploaded_filename is None:
418
- uploaded_filename = os.path.basename(filepath)
419
- if os.path.isfile(filepath):
420
- item_type = 'file'
421
- else:
422
- item_type = 'dir'
423
- item_size = os.stat(filepath).st_size
424
- to_upload = open(filepath, 'rb')
425
- need_close = True
426
-
427
- else:
428
- # upload from buffer
429
- if isinstance(filepath, bytes):
430
- to_upload = io.BytesIO(filepath)
431
- elif isinstance(filepath, io.BytesIO):
432
- to_upload = filepath
433
- elif isinstance(filepath, io.BufferedReader):
434
- to_upload = filepath
435
- elif isinstance(filepath, io.TextIOWrapper):
436
- to_upload = filepath
437
- else:
438
- raise PlatformException("400", "Unknown input filepath type received: {}".format(type(filepath)))
439
-
440
- if uploaded_filename is None:
441
- if hasattr(filepath, "name"):
442
- uploaded_filename = filepath.name
443
- else:
444
- raise PlatformException(error="400",
445
- message="Must have filename when uploading bytes array (uploaded_filename)")
446
-
447
- item_size = to_upload.seek(0, 2)
448
- to_upload.seek(0)
449
- item_type = 'file'
450
- try:
451
- response = await self.items_repository._client_api.upload_file_async(to_upload=to_upload,
452
- item_type=item_type,
453
- item_size=item_size,
454
- item_metadata=item_metadata,
455
- remote_url=self.remote_url,
456
- uploaded_filename=uploaded_filename,
457
- remote_path=remote_path,
458
- callback=callback,
459
- mode=mode,
460
- item_description=item_description)
461
- except Exception:
462
- raise
463
- finally:
464
- if need_close:
465
- to_upload.close()
466
-
467
- if response.ok:
468
- if item_size != response.json().get('metadata', {}).get('system', {}).get('size', 0):
469
- self.items_repository.delete(item_id=response.json()['id'])
470
- raise PlatformException(500,
471
- "The uploaded file is corrupted. "
472
- "Please try again. If it happens again please contact support.")
473
- item = self.items_repository.items_entity.from_json(client_api=self.items_repository._client_api,
474
- _json=response.json(),
475
- dataset=self.items_repository.dataset)
476
- else:
477
- raise PlatformException(response)
478
- return item, response.headers.get('x-item-op', 'na')
479
-
480
- async def __upload_single_item_wrapper(self, element, pbar, reporter, mode):
481
- async with self.items_repository._client_api.event_loop.semaphore('items.upload', 5):
482
- # assert isinstance(element, UploadElement)
483
- item = False
484
- err = None
485
- trace = None
486
- saved_locally = False
487
- temp_dir = None
488
- action = 'na'
489
- remote_folder, remote_name = os.path.split(element.remote_filepath)
490
-
491
- if element.type == 'url':
492
- saved_locally, element.buffer, temp_dir = self.url_to_data(element.buffer)
493
- elif element.type == 'link':
494
- element.buffer = self.link(ref=element.buffer.ref, dataset_id=element.buffer.dataset_id,
495
- type=element.buffer.type, mimetype=element.buffer.mimetype)
496
-
497
- for i_try in range(NUM_TRIES):
498
- try:
499
- logger.debug("Upload item: {path}. Try {i}/{n}. Starting..".format(path=remote_name,
500
- i=i_try + 1,
501
- n=NUM_TRIES))
502
- if element.type == 'external_file':
503
- item, action = await self.__single_external_sync(element)
504
- else:
505
- if element.annotations_filepath is not None and \
506
- element.item_metadata == entities.ExportMetadata.FROM_JSON:
507
- element.item_metadata = {}
508
- with open(element.annotations_filepath) as ann_f:
509
- item_metadata = json.load(ann_f)
510
- if 'metadata' in item_metadata:
511
- element.item_metadata = item_metadata['metadata']
512
- item, action = await self.__single_async_upload(filepath=element.buffer,
513
- mode=mode,
514
- item_metadata=element.item_metadata,
515
- remote_path=remote_folder,
516
- uploaded_filename=remote_name,
517
- last_try=(i_try + 1) == NUM_TRIES,
518
- callback=None,
519
- item_description=element.item_description)
520
- logger.debug("Upload item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=remote_name,
521
- i=i_try + 1,
522
- n=NUM_TRIES,
523
- id=item.id))
524
- if isinstance(item, entities.Item):
525
- break
526
- time.sleep(0.3 * (2 ** i_try))
527
- except Exception as e:
528
- err = e
529
- trace = traceback.format_exc()
530
- logger.debug("Upload item: {path}. Try {i}/{n}. Fail.\n{trace}".format(path=remote_name,
531
- i=i_try + 1,
532
- n=NUM_TRIES,
533
- trace=trace))
534
-
535
- finally:
536
- if saved_locally and os.path.isdir(temp_dir):
537
- shutil.rmtree(temp_dir)
538
- if item:
539
- if action in ['overwrite', 'created'] and element.annotations_filepath is not None:
540
- try:
541
- await self.__async_upload_annotations(annotations_filepath=element.annotations_filepath,
542
- item=item)
543
- except Exception:
544
- logger.exception('Error uploading annotations to item id: {}'.format(item.id))
545
-
546
- reporter.set_index(status=action,
547
- output=item.to_json(),
548
- success=True,
549
- ref=item.id)
550
- if pbar is not None:
551
- pbar.update()
552
- self.items_repository._client_api.callbacks.run_on_event(
553
- event=self.items_repository._client_api.callbacks.CallbackEvent.ITEMS_UPLOAD,
554
- context={'item_id': item.id, 'dataset_id': item.dataset_id},
555
- progress=round(pbar.n / pbar.total * 100, 0))
556
- else:
557
- if isinstance(element.buffer, str):
558
- ref = element.buffer
559
- elif hasattr(element.buffer, "name"):
560
- ref = element.buffer.name
561
- else:
562
- ref = 'Unknown'
563
- reporter.set_index(ref=ref, status='error',
564
- success=False,
565
- error="{}\n{}".format(err, trace))
566
-
567
- async def __async_upload_annotations(self, annotations_filepath, item):
568
- with open(annotations_filepath, 'r', encoding="utf8") as f:
569
- annotations = json.load(f)
570
- # wait for coroutines on the current event loop
571
- return await item.annotations._async_upload_annotations(annotations=annotations['annotations'])
572
-
573
- @staticmethod
574
- def url_to_data(url):
575
- chunk_size = 8192
576
- max_size = 30000000
577
- temp_dir = None
578
-
579
- # This will download the binaries from the URL user provided
580
- prepared_request = requests.Request(method='GET', url=url).prepare()
581
- with requests.Session() as s:
582
- retry = Retry(
583
- total=3,
584
- read=3,
585
- connect=3,
586
- backoff_factor=1,
587
- )
588
- adapter = HTTPAdapter(max_retries=retry)
589
- s.mount('http://', adapter)
590
- s.mount('https://', adapter)
591
- response = s.send(request=prepared_request, stream=True)
592
-
593
- total_length = response.headers.get("content-length")
594
- save_locally = int(total_length) > max_size
595
-
596
- if save_locally:
597
- # save to file
598
- temp_dir = tempfile.mkdtemp()
599
- temp_path = os.path.join(temp_dir, url.split('/')[-1].split('?')[0])
600
- with open(temp_path, "wb") as f:
601
- for chunk in response.iter_content(chunk_size=chunk_size):
602
- if chunk: # filter out keep-alive new chunks
603
- f.write(chunk)
604
- # save to output variable
605
- data = temp_path
606
- else:
607
- # save as byte stream
608
- data = io.BytesIO()
609
- for chunk in response.iter_content(chunk_size=chunk_size):
610
- if chunk: # filter out keep-alive new chunks
611
- data.write(chunk)
612
- # go back to the beginning of the stream
613
- data.seek(0)
614
- data.name = url.split('/')[-1]
615
-
616
- return save_locally, data, temp_dir
617
-
618
- @staticmethod
619
- def is_url(url):
620
- try:
621
- return validators.url(url)
622
- except Exception:
623
- return False
624
-
625
- @staticmethod
626
- def link(ref, type, mimetype=None, dataset_id=None):
627
- """
628
- :param ref:
629
- :param type:
630
- :param mimetype:
631
- :param dataset_id:
632
- """
633
- link_info = {'type': type,
634
- 'ref': ref}
635
-
636
- if mimetype:
637
- link_info['mimetype'] = mimetype
638
-
639
- if dataset_id is not None:
640
- link_info['datasetId'] = dataset_id
641
-
642
- _json = {'type': 'link',
643
- 'shebang': 'dataloop',
644
- 'metadata': {'dltype': 'link',
645
- 'linkInfo': link_info}}
646
-
647
- uploaded_byte_io = io.BytesIO()
648
- uploaded_byte_io.write(json.dumps(_json).encode())
649
- uploaded_byte_io.seek(0)
650
-
651
- return uploaded_byte_io
1
+ import sys
2
+ from collections import deque
3
+ import validators
4
+ import traceback
5
+ import tempfile
6
+ import requests
7
+ import asyncio
8
+ import logging
9
+ import pandas
10
+ import shutil
11
+ import json
12
+ import time
13
+ import tqdm
14
+ import os
15
+ import io
16
+ import numpy as np
17
+ from requests.adapters import HTTPAdapter
18
+ from urllib3.util import Retry
19
+ from PIL import Image
20
+
21
+ from . import upload_element
22
+
23
+ from .. import PlatformException, entities, repositories, exceptions
24
+ from ..services import Reporter
25
+
26
+ logger = logging.getLogger(name='dtlpy')
27
+
28
+ NUM_TRIES = 5 # try to upload 3 time before fail on item
29
+
30
+
31
+ class Uploader:
32
+ def __init__(self, items_repository: repositories.Items, output_entity=entities.Item, no_output=False):
33
+ assert isinstance(items_repository, repositories.Items)
34
+ self.items_repository = items_repository
35
+ self.remote_url = "/datasets/{}/items".format(self.items_repository.dataset.id)
36
+ self.__stop_create_existence_dict = False
37
+ self.mode = 'skip'
38
+ self.num_files = 0
39
+ self.i_item = 0
40
+ self.pbar = tqdm.tqdm(total=0,
41
+ disable=self.items_repository._client_api.verbose.disable_progress_bar_upload_items,
42
+ file=sys.stdout, desc='Upload Items')
43
+ self.reporter = Reporter(num_workers=0,
44
+ resource=Reporter.ITEMS_UPLOAD,
45
+ print_error_logs=items_repository._client_api.verbose.print_error_logs,
46
+ output_entity=output_entity,
47
+ client_api=items_repository._client_api,
48
+ no_output=no_output)
49
+
50
+ def upload(
51
+ self,
52
+ # what to upload
53
+ local_path,
54
+ local_annotations_path=None,
55
+ # upload options
56
+ remote_path=None,
57
+ remote_name=None,
58
+ file_types=None,
59
+ overwrite=False,
60
+ item_metadata=None,
61
+ export_version: str = entities.ExportVersion.V1,
62
+ item_description=None,
63
+ raise_on_error=False,
64
+ return_as_list=False
65
+ ):
66
+ """
67
+ Upload local file to dataset.
68
+ Local filesystem will remain.
69
+ If `*` at the end of local_path (e.g. '/images/*') items will be uploaded without head directory
70
+
71
+ :param local_path: local file or folder to upload
72
+ :param local_annotations_path: path to Dataloop format annotations json files.
73
+ :param remote_path: remote path to save.
74
+ :param remote_name: remote base name to save.
75
+ :param file_types: list of file type to upload. e.g ['.jpg', '.png']. default is all
76
+ :param overwrite: optional - default = False
77
+ :param item_metadata: upload the items with the metadata dictionary
78
+ :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
79
+ :param str item_description: add a string description to the uploaded item
80
+ :param bool raise_on_error: raise an exception if an error occurs
81
+ :param bool return_as_list: always return a list of items
82
+
83
+ :return: Output (list)
84
+ """
85
+ ###################
86
+ # Default options #
87
+ ###################
88
+ if overwrite:
89
+ self.mode = 'overwrite'
90
+ if isinstance(local_path, pandas.DataFrame):
91
+ futures = self._build_elements_from_df(local_path)
92
+ else:
93
+ futures = self._build_elements_from_inputs(local_path=local_path,
94
+ local_annotations_path=local_annotations_path,
95
+ # upload options
96
+ remote_path=remote_path,
97
+ remote_name=remote_name,
98
+ file_types=file_types,
99
+ item_metadata=item_metadata,
100
+ export_version=export_version,
101
+ item_description=item_description)
102
+ num_files = len(futures)
103
+ while futures:
104
+ futures.popleft().result()
105
+ logger.info("Uploading {} items..".format(num_files))
106
+ self.pbar.close()
107
+ # summary
108
+ logger.info("Number of total files: {}".format(num_files))
109
+ status_list = self.reporter.status_list
110
+ for action in set(status_list):
111
+ n_for_action = self.reporter.status_count(status=action)
112
+ logger.info("Number of files {}: {}".format(action, n_for_action))
113
+
114
+ # log error
115
+ errors_count = self.reporter.failure_count
116
+ if errors_count > 0:
117
+ log_filepath = self.reporter.generate_log_files()
118
+ if log_filepath is not None:
119
+ logger.warning("Errors in {n_error} files. See {log_filepath} for full log".format(
120
+ n_error=errors_count, log_filepath=log_filepath))
121
+ if raise_on_error is True:
122
+ raise PlatformException(error="400",
123
+ message=f"Errors in {errors_count} files. See above trace for more information")
124
+
125
+ if return_as_list is True:
126
+ # return list of items
127
+ return list(self.reporter.output)
128
+ if len(status_list) == 1:
129
+ # if there is only one item, return it
130
+ try:
131
+ return next(self.reporter.output)
132
+ except StopIteration:
133
+ # if there is no items, return None
134
+ return None
135
+ # if there are multiple items, return the generator
136
+ return self.reporter.output
137
+
138
+ def _build_elements_from_inputs(self,
139
+ local_path,
140
+ local_annotations_path,
141
+ # upload options
142
+ remote_path,
143
+ file_types,
144
+ remote_name,
145
+ item_metadata,
146
+ export_version: str = entities.ExportVersion.V1,
147
+ item_description=None):
148
+ # fix remote path
149
+ if remote_path is None:
150
+ if isinstance(local_path, str) and local_path.startswith('external://'):
151
+ remote_path = None
152
+ else:
153
+ remote_path = "/"
154
+ if remote_path and not remote_path.startswith('/'):
155
+ remote_path = f"/{remote_path}"
156
+ if remote_path and not remote_path.endswith("/"):
157
+ remote_path = f"{remote_path}/"
158
+
159
+ if remote_name:
160
+ remote_name = remote_name.lstrip('/')
161
+
162
+ if file_types is not None and not isinstance(file_types, list):
163
+ msg = '"file_types" should be a list of file extension. e.g [".jpg", ".png"]'
164
+ raise PlatformException(error="400", message=msg)
165
+ if item_metadata is not None:
166
+ if not isinstance(item_metadata, dict) and not isinstance(item_metadata, entities.ExportMetadata):
167
+ msg = '"item_metadata" should be a metadata dictionary. Got type: {}'.format(type(item_metadata))
168
+ raise PlatformException(error="400", message=msg)
169
+ if item_description is not None:
170
+ if not isinstance(item_description, str):
171
+ msg = '"item_description" should be a string. Got type: {}'.format(type(item_description))
172
+ raise PlatformException(error="400", message=msg)
173
+
174
+ ##########################
175
+ # Convert inputs to list #
176
+ ##########################
177
+ local_annotations_path_list = None
178
+ remote_name_list = None
179
+ if not isinstance(local_path, list):
180
+ local_path_list = [local_path]
181
+ if remote_name is not None:
182
+ if not isinstance(remote_name, str):
183
+ raise PlatformException(error="400",
184
+ message='remote_name must be a string, got: {}'.format(type(remote_name)))
185
+ remote_name_list = [remote_name]
186
+ if local_annotations_path is not None:
187
+ if not isinstance(local_annotations_path, str):
188
+ raise PlatformException(error="400",
189
+ message='local_annotations_path must be a string, got: {}'.format(
190
+ type(local_annotations_path)))
191
+ local_annotations_path_list = [local_annotations_path]
192
+ else:
193
+ local_path_list = local_path
194
+ if remote_name is not None:
195
+ if not isinstance(remote_name, list):
196
+ raise PlatformException(error="400",
197
+ message='remote_name must be a list, got: {}'.format(type(remote_name)))
198
+ if not len(remote_name) == len(local_path_list):
199
+ raise PlatformException(error="400",
200
+ message='remote_name and local_path_list must be of same length. '
201
+ 'Received: remote_name: {}, '
202
+ 'local_path_list: {}'.format(len(remote_name),
203
+ len(local_path_list)))
204
+ remote_name_list = remote_name
205
+ if local_annotations_path is not None:
206
+ if not len(local_annotations_path) == len(local_path_list):
207
+ raise PlatformException(error="400",
208
+ message='local_annotations_path and local_path_list must be of same lenght.'
209
+ ' Received: local_annotations_path: {}, '
210
+ 'local_path_list: {}'.format(len(local_annotations_path),
211
+ len(local_path_list)))
212
+ local_annotations_path_list = local_annotations_path
213
+
214
+ if local_annotations_path is None:
215
+ local_annotations_path_list = [None] * len(local_path_list)
216
+
217
+ if remote_name is None:
218
+ remote_name_list = [None] * len(local_path_list)
219
+
220
+ try:
221
+ driver_path = self.items_repository.dataset.project.drivers.get(
222
+ driver_id=self.items_repository.dataset.driver).path
223
+ except Exception:
224
+ driver_path = None
225
+
226
+ futures = deque()
227
+ total_size = 0
228
+ for upload_item_element, remote_name, upload_annotations_element in zip(local_path_list,
229
+ remote_name_list,
230
+ local_annotations_path_list):
231
+ if isinstance(upload_item_element, np.ndarray):
232
+ # convert numpy.ndarray to io.BytesI
233
+ if remote_name is None:
234
+ raise PlatformException(
235
+ error="400",
236
+ message='Upload element type was numpy.ndarray. providing param "remote_name" is mandatory')
237
+ file_extension = os.path.splitext(remote_name)
238
+ if file_extension[1].lower() in ['.jpg', '.jpeg']:
239
+ item_format = 'JPEG'
240
+ elif file_extension[1].lower() == '.png':
241
+ item_format = 'PNG'
242
+ else:
243
+ raise PlatformException(
244
+ error="400",
245
+ message='"remote_name" with .jpg/.jpeg or .png extension are supported '
246
+ 'when upload element of numpy.ndarray type.')
247
+
248
+ buffer = io.BytesIO()
249
+ Image.fromarray(upload_item_element).save(buffer, format=item_format)
250
+ buffer.seek(0)
251
+ buffer.name = remote_name
252
+ upload_item_element = buffer
253
+
254
+ all_upload_elements = {
255
+ 'upload_item_element': upload_item_element,
256
+ 'total_size': total_size,
257
+ 'remote_name': remote_name,
258
+ 'remote_path': remote_path,
259
+ 'upload_annotations_element': upload_annotations_element,
260
+ 'item_metadata': item_metadata,
261
+ 'annotations_filepath': None,
262
+ 'with_head_folder': None,
263
+ 'filename': None,
264
+ 'root': None,
265
+ 'export_version': export_version,
266
+ 'item_description': item_description,
267
+ 'driver_path': driver_path
268
+ }
269
+ if isinstance(upload_item_element, str):
270
+ with_head_folder = True
271
+ if upload_item_element.endswith('*'):
272
+ with_head_folder = False
273
+ upload_item_element = os.path.dirname(upload_item_element)
274
+ all_upload_elements['upload_item_element'] = upload_item_element
275
+
276
+ if os.path.isdir(upload_item_element):
277
+ for root, subdirs, files in os.walk(upload_item_element):
278
+ for filename in files:
279
+ all_upload_elements['with_head_folder'] = with_head_folder
280
+ all_upload_elements['filename'] = filename
281
+ all_upload_elements['root'] = root
282
+ _, ext = os.path.splitext(filename)
283
+ if file_types is None or ext in file_types:
284
+ upload_elem = upload_element.DirUploadElement(all_upload_elements=all_upload_elements)
285
+ futures.append(self.upload_single_element(upload_elem))
286
+ continue
287
+
288
+ # add single file
289
+ elif os.path.isfile(upload_item_element):
290
+ upload_elem = upload_element.FileUploadElement(all_upload_elements=all_upload_elements)
291
+
292
+ elif upload_item_element.startswith('external://'):
293
+ upload_elem = upload_element.ExternalItemUploadElement(all_upload_elements=all_upload_elements)
294
+
295
+ elif self.is_url(upload_item_element):
296
+ upload_elem = upload_element.UrlUploadElement(all_upload_elements=all_upload_elements)
297
+
298
+ else:
299
+ raise PlatformException("404", "Unknown local path: {}".format(local_path))
300
+
301
+ elif isinstance(upload_item_element, entities.Item):
302
+ upload_elem = upload_element.ItemLinkUploadElement(all_upload_elements=all_upload_elements)
303
+
304
+ elif isinstance(upload_item_element, entities.Link):
305
+ upload_elem = upload_element.LinkUploadElement(all_upload_elements=all_upload_elements)
306
+
307
+ elif isinstance(upload_item_element, entities.PromptItem):
308
+ upload_elem = upload_element.PromptUploadElement(all_upload_elements=all_upload_elements)
309
+
310
+ elif isinstance(upload_item_element, entities.ItemGis):
311
+ buffer = io.BytesIO(json.dumps(upload_item_element.to_json()).encode('utf-8'))
312
+ buffer.name = upload_item_element.name
313
+ all_upload_elements['upload_item_element'] = buffer
314
+ upload_elem = upload_element.BinaryUploadElement(all_upload_elements=all_upload_elements)
315
+
316
+ elif isinstance(upload_item_element, bytes) or \
317
+ isinstance(upload_item_element, io.BytesIO) or \
318
+ isinstance(upload_item_element, io.BufferedReader) or \
319
+ isinstance(upload_item_element, io.TextIOWrapper):
320
+ upload_elem = upload_element.BinaryUploadElement(all_upload_elements=all_upload_elements)
321
+ # get size from binaries
322
+ try:
323
+ total_size += upload_item_element.__sizeof__()
324
+ except Exception:
325
+ logger.warning("Cant get binaries size")
326
+
327
+ else:
328
+ raise PlatformException(
329
+ error="400",
330
+ message=f"Unknown element type to upload ('local_path'). received type: {type(upload_item_element)}. "
331
+ "known types (or list of those types): str (dir, file, url), bytes, io.BytesIO, "
332
+ "numpy.ndarray, io.TextIOWrapper, Dataloop.Item, Dataloop.Link")
333
+
334
+ futures.append(self.upload_single_element(upload_elem))
335
+ return futures
336
+
337
+ def upload_single_element(self, elem):
338
+ """
339
+ upload a signal element
340
+ :param elem: UploadElement
341
+ """
342
+ self.num_files += 1
343
+ self.i_item += 1
344
+ self.pbar.total += 1
345
+ self.reporter.upcount_num_workers()
346
+ future = asyncio.run_coroutine_threadsafe(
347
+ self.__upload_single_item_wrapper(element=elem,
348
+ mode=self.mode,
349
+ pbar=self.pbar,
350
+ reporter=self.reporter),
351
+ loop=self.items_repository._client_api.event_loop.loop)
352
+ return future
353
+
354
+ def _build_elements_from_df(self, df: pandas.DataFrame):
355
+ futures = deque()
356
+ for index, row in df.iterrows():
357
+ # DEFAULTS
358
+ elem = {'local_annotations_path': None,
359
+ 'remote_path': None,
360
+ 'remote_name': None,
361
+ 'file_types': None,
362
+ 'item_metadata': None,
363
+ 'item_description': None}
364
+ elem.update(row)
365
+ future = self._build_elements_from_inputs(**elem)
366
+ # append deque using +
367
+ futures += future
368
+ return futures
369
+
370
+ async def __single_external_sync(self, element):
371
+ storage_id = element.buffer.split('//')[1]
372
+ req_json = dict()
373
+ req_json['filename'] = element.remote_filepath
374
+ req_json['storageId'] = storage_id
375
+ success, response = self.items_repository._client_api.gen_request(req_type='post',
376
+ path='/datasets/{}/imports'.format(
377
+ self.items_repository.dataset.id),
378
+ json_req=[req_json])
379
+
380
+ if success:
381
+ items = entities.Item.from_json(client_api=self.items_repository._client_api, _json=response.json()[0],
382
+ project=self.items_repository._dataset._project,
383
+ dataset=self.items_repository.dataset)
384
+ else:
385
+ raise exceptions.PlatformException(response)
386
+ return items, response.headers.get('x-item-op', 'na')
387
+
388
+ async def __single_async_upload(self,
389
+ filepath,
390
+ remote_path,
391
+ uploaded_filename,
392
+ last_try,
393
+ mode,
394
+ item_metadata,
395
+ callback,
396
+ item_description
397
+ ):
398
+ """
399
+ Upload an item to dataset
400
+
401
+ :param filepath: local filepath of the item
402
+ :param remote_path: remote directory of filepath to upload
403
+ :param uploaded_filename: optional - remote filename
404
+ :param last_try: print log error only if last try
405
+ :param mode: 'skip' 'overwrite'
406
+ :param item_metadata: item metadata
407
+ :param str item_description: add a string description to the uploaded item
408
+ :param callback:
409
+ :return: Item object
410
+ """
411
+
412
+ need_close = False
413
+ if isinstance(filepath, str):
414
+ # upload local file
415
+ if not os.path.isfile(filepath):
416
+ raise PlatformException(error="404", message="Filepath doesnt exists. file: {}".format(filepath))
417
+ if uploaded_filename is None:
418
+ uploaded_filename = os.path.basename(filepath)
419
+ if os.path.isfile(filepath):
420
+ item_type = 'file'
421
+ else:
422
+ item_type = 'dir'
423
+ item_size = os.stat(filepath).st_size
424
+ to_upload = open(filepath, 'rb')
425
+ need_close = True
426
+
427
+ else:
428
+ # upload from buffer
429
+ if isinstance(filepath, bytes):
430
+ to_upload = io.BytesIO(filepath)
431
+ elif isinstance(filepath, io.BytesIO):
432
+ to_upload = filepath
433
+ elif isinstance(filepath, io.BufferedReader):
434
+ to_upload = filepath
435
+ elif isinstance(filepath, io.TextIOWrapper):
436
+ to_upload = filepath
437
+ else:
438
+ raise PlatformException("400", "Unknown input filepath type received: {}".format(type(filepath)))
439
+
440
+ if uploaded_filename is None:
441
+ if hasattr(filepath, "name"):
442
+ uploaded_filename = filepath.name
443
+ else:
444
+ raise PlatformException(error="400",
445
+ message="Must have filename when uploading bytes array (uploaded_filename)")
446
+
447
+ item_size = to_upload.seek(0, 2)
448
+ to_upload.seek(0)
449
+ item_type = 'file'
450
+ try:
451
+ response = await self.items_repository._client_api.upload_file_async(to_upload=to_upload,
452
+ item_type=item_type,
453
+ item_size=item_size,
454
+ item_metadata=item_metadata,
455
+ remote_url=self.remote_url,
456
+ uploaded_filename=uploaded_filename,
457
+ remote_path=remote_path,
458
+ callback=callback,
459
+ mode=mode,
460
+ item_description=item_description)
461
+ except Exception:
462
+ raise
463
+ finally:
464
+ if need_close:
465
+ to_upload.close()
466
+
467
+ if response.ok:
468
+ if item_size != response.json().get('metadata', {}).get('system', {}).get('size', 0):
469
+ self.items_repository.delete(item_id=response.json()['id'])
470
+ raise PlatformException(500,
471
+ "The uploaded file is corrupted. "
472
+ "Please try again. If it happens again please contact support.")
473
+ item = self.items_repository.items_entity.from_json(client_api=self.items_repository._client_api,
474
+ _json=response.json(),
475
+ dataset=self.items_repository.dataset)
476
+ else:
477
+ raise PlatformException(response)
478
+ return item, response.headers.get('x-item-op', 'na')
479
+
480
+ async def __upload_single_item_wrapper(self, element, pbar, reporter, mode):
481
+ async with self.items_repository._client_api.event_loop.semaphore('items.upload', 5):
482
+ # assert isinstance(element, UploadElement)
483
+ item = False
484
+ err = None
485
+ trace = None
486
+ saved_locally = False
487
+ temp_dir = None
488
+ action = 'na'
489
+ remote_folder, remote_name = os.path.split(element.remote_filepath)
490
+
491
+ if element.type == 'url':
492
+ saved_locally, element.buffer, temp_dir = self.url_to_data(element.buffer)
493
+ elif element.type == 'link':
494
+ element.buffer = self.link(ref=element.buffer.ref, dataset_id=element.buffer.dataset_id,
495
+ type=element.buffer.type, mimetype=element.buffer.mimetype)
496
+
497
+ for i_try in range(NUM_TRIES):
498
+ try:
499
+ logger.debug("Upload item: {path}. Try {i}/{n}. Starting..".format(path=remote_name,
500
+ i=i_try + 1,
501
+ n=NUM_TRIES))
502
+ if element.type == 'external_file':
503
+ item, action = await self.__single_external_sync(element)
504
+ else:
505
+ if element.annotations_filepath is not None and \
506
+ element.item_metadata == entities.ExportMetadata.FROM_JSON:
507
+ element.item_metadata = {}
508
+ with open(element.annotations_filepath) as ann_f:
509
+ item_metadata = json.load(ann_f)
510
+ if 'metadata' in item_metadata:
511
+ element.item_metadata = item_metadata['metadata']
512
+ item, action = await self.__single_async_upload(filepath=element.buffer,
513
+ mode=mode,
514
+ item_metadata=element.item_metadata,
515
+ remote_path=remote_folder,
516
+ uploaded_filename=remote_name,
517
+ last_try=(i_try + 1) == NUM_TRIES,
518
+ callback=None,
519
+ item_description=element.item_description)
520
+ logger.debug("Upload item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=remote_name,
521
+ i=i_try + 1,
522
+ n=NUM_TRIES,
523
+ id=item.id))
524
+ if isinstance(item, entities.Item):
525
+ break
526
+ time.sleep(0.3 * (2 ** i_try))
527
+ except Exception as e:
528
+ err = e
529
+ trace = traceback.format_exc()
530
+ logger.debug("Upload item: {path}. Try {i}/{n}. Fail.\n{trace}".format(path=remote_name,
531
+ i=i_try + 1,
532
+ n=NUM_TRIES,
533
+ trace=trace))
534
+
535
+ finally:
536
+ if saved_locally and os.path.isdir(temp_dir):
537
+ shutil.rmtree(temp_dir)
538
+ if item:
539
+ if action in ['overwrite', 'created'] and element.annotations_filepath is not None:
540
+ try:
541
+ await self.__async_upload_annotations(annotations_filepath=element.annotations_filepath,
542
+ item=item)
543
+ except Exception:
544
+ logger.exception('Error uploading annotations to item id: {}'.format(item.id))
545
+
546
+ reporter.set_index(status=action,
547
+ output=item.to_json(),
548
+ success=True,
549
+ ref=item.id)
550
+ if pbar is not None:
551
+ pbar.update()
552
+ self.items_repository._client_api.callbacks.run_on_event(
553
+ event=self.items_repository._client_api.callbacks.CallbackEvent.ITEMS_UPLOAD,
554
+ context={'item_id': item.id, 'dataset_id': item.dataset_id},
555
+ progress=round(pbar.n / pbar.total * 100, 0))
556
+ else:
557
+ if isinstance(element.buffer, str):
558
+ ref = element.buffer
559
+ elif hasattr(element.buffer, "name"):
560
+ ref = element.buffer.name
561
+ else:
562
+ ref = 'Unknown'
563
+ reporter.set_index(ref=ref, status='error',
564
+ success=False,
565
+ error="{}\n{}".format(err, trace))
566
+
567
+ async def __async_upload_annotations(self, annotations_filepath, item):
568
+ with open(annotations_filepath, 'r', encoding="utf8") as f:
569
+ annotations = json.load(f)
570
+ # wait for coroutines on the current event loop
571
+ return await item.annotations._async_upload_annotations(annotations=annotations['annotations'])
572
+
573
+ @staticmethod
574
+ def url_to_data(url):
575
+ chunk_size = 8192
576
+ max_size = 30000000
577
+ temp_dir = None
578
+
579
+ # This will download the binaries from the URL user provided
580
+ prepared_request = requests.Request(method='GET', url=url).prepare()
581
+ with requests.Session() as s:
582
+ retry = Retry(
583
+ total=3,
584
+ read=3,
585
+ connect=3,
586
+ backoff_factor=1,
587
+ )
588
+ adapter = HTTPAdapter(max_retries=retry)
589
+ s.mount('http://', adapter)
590
+ s.mount('https://', adapter)
591
+ response = s.send(request=prepared_request, stream=True)
592
+
593
+ total_length = response.headers.get("content-length")
594
+ save_locally = int(total_length) > max_size
595
+
596
+ if save_locally:
597
+ # save to file
598
+ temp_dir = tempfile.mkdtemp()
599
+ temp_path = os.path.join(temp_dir, url.split('/')[-1].split('?')[0])
600
+ with open(temp_path, "wb") as f:
601
+ for chunk in response.iter_content(chunk_size=chunk_size):
602
+ if chunk: # filter out keep-alive new chunks
603
+ f.write(chunk)
604
+ # save to output variable
605
+ data = temp_path
606
+ else:
607
+ # save as byte stream
608
+ data = io.BytesIO()
609
+ for chunk in response.iter_content(chunk_size=chunk_size):
610
+ if chunk: # filter out keep-alive new chunks
611
+ data.write(chunk)
612
+ # go back to the beginning of the stream
613
+ data.seek(0)
614
+ data.name = url.split('/')[-1]
615
+
616
+ return save_locally, data, temp_dir
617
+
618
+ @staticmethod
619
+ def is_url(url):
620
+ try:
621
+ return validators.url(url)
622
+ except Exception:
623
+ return False
624
+
625
+ @staticmethod
626
+ def link(ref, type, mimetype=None, dataset_id=None):
627
+ """
628
+ :param ref:
629
+ :param type:
630
+ :param mimetype:
631
+ :param dataset_id:
632
+ """
633
+ link_info = {'type': type,
634
+ 'ref': ref}
635
+
636
+ if mimetype:
637
+ link_info['mimetype'] = mimetype
638
+
639
+ if dataset_id is not None:
640
+ link_info['datasetId'] = dataset_id
641
+
642
+ _json = {'type': 'link',
643
+ 'shebang': 'dataloop',
644
+ 'metadata': {'dltype': 'link',
645
+ 'linkInfo': link_info}}
646
+
647
+ uploaded_byte_io = io.BytesIO()
648
+ uploaded_byte_io.write(json.dumps(_json).encode())
649
+ uploaded_byte_io.seek(0)
650
+
651
+ return uploaded_byte_io