dtlpy 1.115.44__py3-none-any.whl → 1.117.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. dtlpy/__init__.py +491 -491
  2. dtlpy/__version__.py +1 -1
  3. dtlpy/assets/__init__.py +26 -26
  4. dtlpy/assets/code_server/config.yaml +2 -2
  5. dtlpy/assets/code_server/installation.sh +24 -24
  6. dtlpy/assets/code_server/launch.json +13 -13
  7. dtlpy/assets/code_server/settings.json +2 -2
  8. dtlpy/assets/main.py +53 -53
  9. dtlpy/assets/main_partial.py +18 -18
  10. dtlpy/assets/mock.json +11 -11
  11. dtlpy/assets/model_adapter.py +83 -83
  12. dtlpy/assets/package.json +61 -61
  13. dtlpy/assets/package_catalog.json +29 -29
  14. dtlpy/assets/package_gitignore +307 -307
  15. dtlpy/assets/service_runners/__init__.py +33 -33
  16. dtlpy/assets/service_runners/converter.py +96 -96
  17. dtlpy/assets/service_runners/multi_method.py +49 -49
  18. dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
  19. dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
  20. dtlpy/assets/service_runners/multi_method_item.py +52 -52
  21. dtlpy/assets/service_runners/multi_method_json.py +52 -52
  22. dtlpy/assets/service_runners/single_method.py +37 -37
  23. dtlpy/assets/service_runners/single_method_annotation.py +43 -43
  24. dtlpy/assets/service_runners/single_method_dataset.py +43 -43
  25. dtlpy/assets/service_runners/single_method_item.py +41 -41
  26. dtlpy/assets/service_runners/single_method_json.py +42 -42
  27. dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
  28. dtlpy/assets/voc_annotation_template.xml +23 -23
  29. dtlpy/caches/base_cache.py +32 -32
  30. dtlpy/caches/cache.py +473 -473
  31. dtlpy/caches/dl_cache.py +201 -201
  32. dtlpy/caches/filesystem_cache.py +89 -89
  33. dtlpy/caches/redis_cache.py +84 -84
  34. dtlpy/dlp/__init__.py +20 -20
  35. dtlpy/dlp/cli_utilities.py +367 -367
  36. dtlpy/dlp/command_executor.py +764 -764
  37. dtlpy/dlp/dlp +1 -1
  38. dtlpy/dlp/dlp.bat +1 -1
  39. dtlpy/dlp/dlp.py +128 -128
  40. dtlpy/dlp/parser.py +651 -651
  41. dtlpy/entities/__init__.py +83 -83
  42. dtlpy/entities/analytic.py +347 -347
  43. dtlpy/entities/annotation.py +1879 -1879
  44. dtlpy/entities/annotation_collection.py +699 -699
  45. dtlpy/entities/annotation_definitions/__init__.py +20 -20
  46. dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
  47. dtlpy/entities/annotation_definitions/box.py +195 -195
  48. dtlpy/entities/annotation_definitions/classification.py +67 -67
  49. dtlpy/entities/annotation_definitions/comparison.py +72 -72
  50. dtlpy/entities/annotation_definitions/cube.py +204 -204
  51. dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
  52. dtlpy/entities/annotation_definitions/description.py +32 -32
  53. dtlpy/entities/annotation_definitions/ellipse.py +124 -124
  54. dtlpy/entities/annotation_definitions/free_text.py +62 -62
  55. dtlpy/entities/annotation_definitions/gis.py +69 -69
  56. dtlpy/entities/annotation_definitions/note.py +139 -139
  57. dtlpy/entities/annotation_definitions/point.py +117 -117
  58. dtlpy/entities/annotation_definitions/polygon.py +182 -182
  59. dtlpy/entities/annotation_definitions/polyline.py +111 -111
  60. dtlpy/entities/annotation_definitions/pose.py +92 -92
  61. dtlpy/entities/annotation_definitions/ref_image.py +86 -86
  62. dtlpy/entities/annotation_definitions/segmentation.py +240 -240
  63. dtlpy/entities/annotation_definitions/subtitle.py +34 -34
  64. dtlpy/entities/annotation_definitions/text.py +85 -85
  65. dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
  66. dtlpy/entities/app.py +220 -220
  67. dtlpy/entities/app_module.py +107 -107
  68. dtlpy/entities/artifact.py +174 -174
  69. dtlpy/entities/assignment.py +399 -399
  70. dtlpy/entities/base_entity.py +214 -214
  71. dtlpy/entities/bot.py +113 -113
  72. dtlpy/entities/codebase.py +292 -292
  73. dtlpy/entities/collection.py +38 -38
  74. dtlpy/entities/command.py +169 -169
  75. dtlpy/entities/compute.py +449 -449
  76. dtlpy/entities/dataset.py +1299 -1299
  77. dtlpy/entities/directory_tree.py +44 -44
  78. dtlpy/entities/dpk.py +470 -470
  79. dtlpy/entities/driver.py +235 -235
  80. dtlpy/entities/execution.py +397 -397
  81. dtlpy/entities/feature.py +124 -124
  82. dtlpy/entities/feature_set.py +152 -145
  83. dtlpy/entities/filters.py +798 -798
  84. dtlpy/entities/gis_item.py +107 -107
  85. dtlpy/entities/integration.py +184 -184
  86. dtlpy/entities/item.py +975 -959
  87. dtlpy/entities/label.py +123 -123
  88. dtlpy/entities/links.py +85 -85
  89. dtlpy/entities/message.py +175 -175
  90. dtlpy/entities/model.py +684 -684
  91. dtlpy/entities/node.py +1005 -1005
  92. dtlpy/entities/ontology.py +810 -803
  93. dtlpy/entities/organization.py +287 -287
  94. dtlpy/entities/package.py +657 -657
  95. dtlpy/entities/package_defaults.py +5 -5
  96. dtlpy/entities/package_function.py +185 -185
  97. dtlpy/entities/package_module.py +113 -113
  98. dtlpy/entities/package_slot.py +118 -118
  99. dtlpy/entities/paged_entities.py +299 -299
  100. dtlpy/entities/pipeline.py +624 -624
  101. dtlpy/entities/pipeline_execution.py +279 -279
  102. dtlpy/entities/project.py +394 -394
  103. dtlpy/entities/prompt_item.py +505 -505
  104. dtlpy/entities/recipe.py +301 -301
  105. dtlpy/entities/reflect_dict.py +102 -102
  106. dtlpy/entities/resource_execution.py +138 -138
  107. dtlpy/entities/service.py +974 -963
  108. dtlpy/entities/service_driver.py +117 -117
  109. dtlpy/entities/setting.py +294 -294
  110. dtlpy/entities/task.py +495 -495
  111. dtlpy/entities/time_series.py +143 -143
  112. dtlpy/entities/trigger.py +426 -426
  113. dtlpy/entities/user.py +118 -118
  114. dtlpy/entities/webhook.py +124 -124
  115. dtlpy/examples/__init__.py +19 -19
  116. dtlpy/examples/add_labels.py +135 -135
  117. dtlpy/examples/add_metadata_to_item.py +21 -21
  118. dtlpy/examples/annotate_items_using_model.py +65 -65
  119. dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
  120. dtlpy/examples/annotations_convert_to_voc.py +9 -9
  121. dtlpy/examples/annotations_convert_to_yolo.py +9 -9
  122. dtlpy/examples/convert_annotation_types.py +51 -51
  123. dtlpy/examples/converter.py +143 -143
  124. dtlpy/examples/copy_annotations.py +22 -22
  125. dtlpy/examples/copy_folder.py +31 -31
  126. dtlpy/examples/create_annotations.py +51 -51
  127. dtlpy/examples/create_video_annotations.py +83 -83
  128. dtlpy/examples/delete_annotations.py +26 -26
  129. dtlpy/examples/filters.py +113 -113
  130. dtlpy/examples/move_item.py +23 -23
  131. dtlpy/examples/play_video_annotation.py +13 -13
  132. dtlpy/examples/show_item_and_mask.py +53 -53
  133. dtlpy/examples/triggers.py +49 -49
  134. dtlpy/examples/upload_batch_of_items.py +20 -20
  135. dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
  136. dtlpy/examples/upload_items_with_modalities.py +43 -43
  137. dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
  138. dtlpy/examples/upload_yolo_format_annotations.py +70 -70
  139. dtlpy/exceptions.py +125 -125
  140. dtlpy/miscellaneous/__init__.py +20 -20
  141. dtlpy/miscellaneous/dict_differ.py +95 -95
  142. dtlpy/miscellaneous/git_utils.py +217 -217
  143. dtlpy/miscellaneous/json_utils.py +14 -14
  144. dtlpy/miscellaneous/list_print.py +105 -105
  145. dtlpy/miscellaneous/zipping.py +130 -130
  146. dtlpy/ml/__init__.py +20 -20
  147. dtlpy/ml/base_feature_extractor_adapter.py +27 -27
  148. dtlpy/ml/base_model_adapter.py +1287 -1230
  149. dtlpy/ml/metrics.py +461 -461
  150. dtlpy/ml/predictions_utils.py +274 -274
  151. dtlpy/ml/summary_writer.py +57 -57
  152. dtlpy/ml/train_utils.py +60 -60
  153. dtlpy/new_instance.py +252 -252
  154. dtlpy/repositories/__init__.py +56 -56
  155. dtlpy/repositories/analytics.py +85 -85
  156. dtlpy/repositories/annotations.py +916 -916
  157. dtlpy/repositories/apps.py +383 -383
  158. dtlpy/repositories/artifacts.py +452 -452
  159. dtlpy/repositories/assignments.py +599 -599
  160. dtlpy/repositories/bots.py +213 -213
  161. dtlpy/repositories/codebases.py +559 -559
  162. dtlpy/repositories/collections.py +332 -332
  163. dtlpy/repositories/commands.py +152 -152
  164. dtlpy/repositories/compositions.py +61 -61
  165. dtlpy/repositories/computes.py +439 -439
  166. dtlpy/repositories/datasets.py +1585 -1504
  167. dtlpy/repositories/downloader.py +1157 -923
  168. dtlpy/repositories/dpks.py +433 -433
  169. dtlpy/repositories/drivers.py +482 -482
  170. dtlpy/repositories/executions.py +815 -815
  171. dtlpy/repositories/feature_sets.py +256 -226
  172. dtlpy/repositories/features.py +255 -255
  173. dtlpy/repositories/integrations.py +484 -484
  174. dtlpy/repositories/items.py +912 -912
  175. dtlpy/repositories/messages.py +94 -94
  176. dtlpy/repositories/models.py +1000 -1000
  177. dtlpy/repositories/nodes.py +80 -80
  178. dtlpy/repositories/ontologies.py +511 -511
  179. dtlpy/repositories/organizations.py +525 -525
  180. dtlpy/repositories/packages.py +1941 -1941
  181. dtlpy/repositories/pipeline_executions.py +451 -451
  182. dtlpy/repositories/pipelines.py +640 -640
  183. dtlpy/repositories/projects.py +539 -539
  184. dtlpy/repositories/recipes.py +429 -399
  185. dtlpy/repositories/resource_executions.py +137 -137
  186. dtlpy/repositories/schema.py +120 -120
  187. dtlpy/repositories/service_drivers.py +213 -213
  188. dtlpy/repositories/services.py +1704 -1704
  189. dtlpy/repositories/settings.py +339 -339
  190. dtlpy/repositories/tasks.py +1477 -1477
  191. dtlpy/repositories/times_series.py +278 -278
  192. dtlpy/repositories/triggers.py +536 -536
  193. dtlpy/repositories/upload_element.py +257 -257
  194. dtlpy/repositories/uploader.py +661 -661
  195. dtlpy/repositories/webhooks.py +249 -249
  196. dtlpy/services/__init__.py +22 -22
  197. dtlpy/services/aihttp_retry.py +131 -131
  198. dtlpy/services/api_client.py +1786 -1785
  199. dtlpy/services/api_reference.py +40 -40
  200. dtlpy/services/async_utils.py +133 -133
  201. dtlpy/services/calls_counter.py +44 -44
  202. dtlpy/services/check_sdk.py +68 -68
  203. dtlpy/services/cookie.py +115 -115
  204. dtlpy/services/create_logger.py +156 -156
  205. dtlpy/services/events.py +84 -84
  206. dtlpy/services/logins.py +235 -235
  207. dtlpy/services/reporter.py +256 -256
  208. dtlpy/services/service_defaults.py +91 -91
  209. dtlpy/utilities/__init__.py +20 -20
  210. dtlpy/utilities/annotations/__init__.py +16 -16
  211. dtlpy/utilities/annotations/annotation_converters.py +269 -269
  212. dtlpy/utilities/base_package_runner.py +285 -264
  213. dtlpy/utilities/converter.py +1650 -1650
  214. dtlpy/utilities/dataset_generators/__init__.py +1 -1
  215. dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
  216. dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
  217. dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
  218. dtlpy/utilities/local_development/__init__.py +1 -1
  219. dtlpy/utilities/local_development/local_session.py +179 -179
  220. dtlpy/utilities/reports/__init__.py +2 -2
  221. dtlpy/utilities/reports/figures.py +343 -343
  222. dtlpy/utilities/reports/report.py +71 -71
  223. dtlpy/utilities/videos/__init__.py +17 -17
  224. dtlpy/utilities/videos/video_player.py +598 -598
  225. dtlpy/utilities/videos/videos.py +470 -470
  226. {dtlpy-1.115.44.data → dtlpy-1.117.6.data}/scripts/dlp +1 -1
  227. dtlpy-1.117.6.data/scripts/dlp.bat +2 -0
  228. {dtlpy-1.115.44.data → dtlpy-1.117.6.data}/scripts/dlp.py +128 -128
  229. {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/METADATA +186 -186
  230. dtlpy-1.117.6.dist-info/RECORD +239 -0
  231. {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/WHEEL +1 -1
  232. {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/licenses/LICENSE +200 -200
  233. tests/features/environment.py +551 -551
  234. dtlpy/assets/__pycache__/__init__.cpython-310.pyc +0 -0
  235. dtlpy-1.115.44.data/scripts/dlp.bat +0 -2
  236. dtlpy-1.115.44.dist-info/RECORD +0 -240
  237. {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/entry_points.txt +0 -0
  238. {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/top_level.txt +0 -0
@@ -1,661 +1,661 @@
1
- import sys
2
- from collections import deque
3
- import validators
4
- import traceback
5
- import tempfile
6
- import requests
7
- import asyncio
8
- import logging
9
- import pandas
10
- import shutil
11
- import json
12
- import time
13
- import tqdm
14
- import os
15
- import io
16
- import numpy as np
17
- from requests.adapters import HTTPAdapter
18
- from urllib3.util import Retry
19
- from PIL import Image
20
-
21
- from . import upload_element
22
-
23
- from .. import PlatformException, entities, repositories, exceptions
24
- from ..services import Reporter
25
-
26
- logger = logging.getLogger(name='dtlpy')
27
-
28
- NUM_TRIES = 5 # try to upload 3 time before fail on item
29
-
30
-
31
- class Uploader:
32
- def __init__(self, items_repository: repositories.Items, output_entity=entities.Item, no_output=False):
33
- assert isinstance(items_repository, repositories.Items)
34
- self.items_repository = items_repository
35
- self.remote_url = "/datasets/{}/items".format(self.items_repository.dataset.id)
36
- self.__stop_create_existence_dict = False
37
- self.mode = 'skip'
38
- self.num_files = 0
39
- self.i_item = 0
40
- self.pbar = tqdm.tqdm(total=0,
41
- disable=self.items_repository._client_api.verbose.disable_progress_bar_upload_items,
42
- file=sys.stdout, desc='Upload Items')
43
- self.reporter = Reporter(num_workers=0,
44
- resource=Reporter.ITEMS_UPLOAD,
45
- print_error_logs=items_repository._client_api.verbose.print_error_logs,
46
- output_entity=output_entity,
47
- client_api=items_repository._client_api,
48
- no_output=no_output)
49
-
50
- def upload(
51
- self,
52
- # what to upload
53
- local_path,
54
- local_annotations_path=None,
55
- # upload options
56
- remote_path=None,
57
- remote_name=None,
58
- file_types=None,
59
- overwrite=False,
60
- item_metadata=None,
61
- export_version: str = entities.ExportVersion.V1,
62
- item_description=None,
63
- raise_on_error=False,
64
- return_as_list=False
65
- ):
66
- """
67
- Upload local file to dataset.
68
- Local filesystem will remain.
69
- If `*` at the end of local_path (e.g. '/images/*') items will be uploaded without head directory
70
-
71
- :param local_path: local file or folder to upload
72
- :param local_annotations_path: path to Dataloop format annotations json files.
73
- :param remote_path: remote path to save.
74
- :param remote_name: remote base name to save.
75
- :param file_types: list of file type to upload. e.g ['.jpg', '.png']. default is all
76
- :param overwrite: optional - default = False
77
- :param item_metadata: upload the items with the metadata dictionary
78
- :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
79
- :param str item_description: add a string description to the uploaded item
80
- :param bool raise_on_error: raise an exception if an error occurs
81
- :param bool return_as_list: always return a list of items
82
-
83
- :return: Output (list)
84
- """
85
- ###################
86
- # Default options #
87
- ###################
88
- if overwrite:
89
- self.mode = 'overwrite'
90
- if isinstance(local_path, pandas.DataFrame):
91
- futures = self._build_elements_from_df(local_path)
92
- else:
93
- start_time = time.time()
94
- logger.debug(f"Building elements from inputs started: start time: {start_time}")
95
- futures = self._build_elements_from_inputs(local_path=local_path,
96
- local_annotations_path=local_annotations_path,
97
- # upload options
98
- remote_path=remote_path,
99
- remote_name=remote_name,
100
- file_types=file_types,
101
- item_metadata=item_metadata,
102
- export_version=export_version,
103
- item_description=item_description)
104
- logger.debug(f"Building elements from inputs completed: time taken: {time.time() - start_time}")
105
- num_files = len(futures)
106
- while futures:
107
- futures.popleft().result()
108
- logger.info("Uploading {} items..".format(num_files))
109
- self.pbar.close()
110
- # summary
111
- logger.info("Number of total files: {}".format(num_files))
112
- status_list = self.reporter.status_list
113
- for action in set(status_list):
114
- n_for_action = self.reporter.status_count(status=action)
115
- logger.info("Number of files {}: {}".format(action, n_for_action))
116
-
117
- # log error
118
- errors_count = self.reporter.failure_count
119
- if errors_count > 0:
120
- error_text = ""
121
- log_filepath = self.reporter.generate_log_files()
122
- # Get up to 5 error examples for the exception message
123
- if self.reporter._errors:
124
- error_examples = list(self.reporter._errors.values())[:5]
125
- error_text = " | ".join(error_examples)
126
- error_message = f"Errors in {errors_count} files. Errors: {error_text}"
127
- if log_filepath is not None:
128
- error_message += f", see {log_filepath} for full log"
129
- if raise_on_error is True:
130
- raise PlatformException(
131
- error="400", message=error_message
132
- )
133
- else:
134
- logger.warning(error_message)
135
-
136
- if return_as_list is True:
137
- # return list of items
138
- return list(self.reporter.output)
139
- if len(status_list) == 1:
140
- # if there is only one item, return it
141
- try:
142
- return next(self.reporter.output)
143
- except StopIteration:
144
- # if there is no items, return None
145
- return None
146
- # if there are multiple items, return the generator
147
- return self.reporter.output
148
-
149
- def _build_elements_from_inputs(self,
150
- local_path,
151
- local_annotations_path,
152
- # upload options
153
- remote_path,
154
- file_types,
155
- remote_name,
156
- item_metadata,
157
- export_version: str = entities.ExportVersion.V1,
158
- item_description=None):
159
- # fix remote path
160
- if remote_path is None:
161
- if isinstance(local_path, str) and local_path.startswith('external://'):
162
- remote_path = None
163
- else:
164
- remote_path = "/"
165
- if remote_path and not remote_path.startswith('/'):
166
- remote_path = f"/{remote_path}"
167
- if remote_path and not remote_path.endswith("/"):
168
- remote_path = f"{remote_path}/"
169
-
170
- if remote_name:
171
- remote_name = remote_name.lstrip('/')
172
-
173
- if file_types is not None and not isinstance(file_types, list):
174
- msg = '"file_types" should be a list of file extension. e.g [".jpg", ".png"]'
175
- raise PlatformException(error="400", message=msg)
176
- if item_metadata is not None:
177
- if not isinstance(item_metadata, dict) and not isinstance(item_metadata, entities.ExportMetadata):
178
- msg = '"item_metadata" should be a metadata dictionary. Got type: {}'.format(type(item_metadata))
179
- raise PlatformException(error="400", message=msg)
180
- if item_description is not None:
181
- if not isinstance(item_description, str):
182
- msg = '"item_description" should be a string. Got type: {}'.format(type(item_description))
183
- raise PlatformException(error="400", message=msg)
184
-
185
- ##########################
186
- # Convert inputs to list #
187
- ##########################
188
- local_annotations_path_list = None
189
- remote_name_list = None
190
- if not isinstance(local_path, list):
191
- local_path_list = [local_path]
192
- if remote_name is not None:
193
- if not isinstance(remote_name, str):
194
- raise PlatformException(error="400",
195
- message='remote_name must be a string, got: {}'.format(type(remote_name)))
196
- remote_name_list = [remote_name]
197
- if local_annotations_path is not None:
198
- if not isinstance(local_annotations_path, str):
199
- raise PlatformException(error="400",
200
- message='local_annotations_path must be a string, got: {}'.format(
201
- type(local_annotations_path)))
202
- local_annotations_path_list = [local_annotations_path]
203
- else:
204
- local_path_list = local_path
205
- if remote_name is not None:
206
- if not isinstance(remote_name, list):
207
- raise PlatformException(error="400",
208
- message='remote_name must be a list, got: {}'.format(type(remote_name)))
209
- if not len(remote_name) == len(local_path_list):
210
- raise PlatformException(error="400",
211
- message='remote_name and local_path_list must be of same length. '
212
- 'Received: remote_name: {}, '
213
- 'local_path_list: {}'.format(len(remote_name),
214
- len(local_path_list)))
215
- remote_name_list = remote_name
216
- if local_annotations_path is not None:
217
- if not len(local_annotations_path) == len(local_path_list):
218
- raise PlatformException(error="400",
219
- message='local_annotations_path and local_path_list must be of same lenght.'
220
- ' Received: local_annotations_path: {}, '
221
- 'local_path_list: {}'.format(len(local_annotations_path),
222
- len(local_path_list)))
223
- local_annotations_path_list = local_annotations_path
224
-
225
- if local_annotations_path is None:
226
- local_annotations_path_list = [None] * len(local_path_list)
227
-
228
- if remote_name is None:
229
- remote_name_list = [None] * len(local_path_list)
230
-
231
- futures = deque()
232
- total_size = 0
233
- for upload_item_element, remote_name, upload_annotations_element in zip(local_path_list,
234
- remote_name_list,
235
- local_annotations_path_list):
236
- if isinstance(upload_item_element, np.ndarray):
237
- # convert numpy.ndarray to io.BytesI
238
- if remote_name is None:
239
- raise PlatformException(
240
- error="400",
241
- message='Upload element type was numpy.ndarray. providing param "remote_name" is mandatory')
242
- file_extension = os.path.splitext(remote_name)
243
- if file_extension[1].lower() in ['.jpg', '.jpeg']:
244
- item_format = 'JPEG'
245
- elif file_extension[1].lower() == '.png':
246
- item_format = 'PNG'
247
- else:
248
- raise PlatformException(
249
- error="400",
250
- message='"remote_name" with .jpg/.jpeg or .png extension are supported '
251
- 'when upload element of numpy.ndarray type.')
252
-
253
- buffer = io.BytesIO()
254
- Image.fromarray(upload_item_element).save(buffer, format=item_format)
255
- buffer.seek(0)
256
- buffer.name = remote_name
257
- upload_item_element = buffer
258
-
259
- all_upload_elements = {
260
- 'upload_item_element': upload_item_element,
261
- 'total_size': total_size,
262
- 'remote_name': remote_name,
263
- 'remote_path': remote_path,
264
- 'upload_annotations_element': upload_annotations_element,
265
- 'item_metadata': item_metadata,
266
- 'annotations_filepath': None,
267
- 'with_head_folder': None,
268
- 'filename': None,
269
- 'root': None,
270
- 'export_version': export_version,
271
- 'item_description': item_description,
272
- 'driver_path': None
273
- }
274
- if isinstance(upload_item_element, str):
275
- with_head_folder = True
276
- if upload_item_element.endswith('*'):
277
- with_head_folder = False
278
- upload_item_element = os.path.dirname(upload_item_element)
279
- all_upload_elements['upload_item_element'] = upload_item_element
280
-
281
- if os.path.isdir(upload_item_element):
282
- for root, subdirs, files in os.walk(upload_item_element):
283
- for filename in files:
284
- all_upload_elements['with_head_folder'] = with_head_folder
285
- all_upload_elements['filename'] = filename
286
- all_upload_elements['root'] = root
287
- _, ext = os.path.splitext(filename)
288
- if file_types is None or ext in file_types:
289
- upload_elem = upload_element.DirUploadElement(all_upload_elements=all_upload_elements)
290
- futures.append(self.upload_single_element(upload_elem))
291
- continue
292
-
293
- # add single file
294
- elif os.path.isfile(upload_item_element):
295
- upload_elem = upload_element.FileUploadElement(all_upload_elements=all_upload_elements)
296
-
297
- elif upload_item_element.startswith('external://'):
298
- try:
299
- driver_path = repositories.Drivers.get(driver_id=self.items_repository.dataset.driver).path
300
- all_upload_elements['driver_path'] = driver_path
301
- except Exception:
302
- logger.error("Attempting to upload external item without driver path. This may cause issues.")
303
- upload_elem = upload_element.ExternalItemUploadElement(all_upload_elements=all_upload_elements)
304
-
305
- elif self.is_url(upload_item_element):
306
- upload_elem = upload_element.UrlUploadElement(all_upload_elements=all_upload_elements)
307
-
308
- else:
309
- raise PlatformException("404", "Unknown local path: {}".format(local_path))
310
-
311
- elif isinstance(upload_item_element, entities.Item):
312
- upload_elem = upload_element.ItemLinkUploadElement(all_upload_elements=all_upload_elements)
313
-
314
- elif isinstance(upload_item_element, entities.Link):
315
- upload_elem = upload_element.LinkUploadElement(all_upload_elements=all_upload_elements)
316
-
317
- elif isinstance(upload_item_element, entities.PromptItem):
318
- upload_elem = upload_element.PromptUploadElement(all_upload_elements=all_upload_elements)
319
-
320
- elif isinstance(upload_item_element, entities.ItemGis):
321
- buffer = io.BytesIO(json.dumps(upload_item_element.to_json()).encode('utf-8'))
322
- buffer.name = upload_item_element.name
323
- all_upload_elements['upload_item_element'] = buffer
324
- upload_elem = upload_element.BinaryUploadElement(all_upload_elements=all_upload_elements)
325
-
326
- elif isinstance(upload_item_element, bytes) or \
327
- isinstance(upload_item_element, io.BytesIO) or \
328
- isinstance(upload_item_element, io.BufferedReader) or \
329
- isinstance(upload_item_element, io.TextIOWrapper):
330
- upload_elem = upload_element.BinaryUploadElement(all_upload_elements=all_upload_elements)
331
- # get size from binaries
332
- try:
333
- total_size += upload_item_element.__sizeof__()
334
- except Exception:
335
- logger.warning("Cant get binaries size")
336
-
337
- else:
338
- raise PlatformException(
339
- error="400",
340
- message=f"Unknown element type to upload ('local_path'). received type: {type(upload_item_element)}. "
341
- "known types (or list of those types): str (dir, file, url), bytes, io.BytesIO, "
342
- "numpy.ndarray, io.TextIOWrapper, Dataloop.Item, Dataloop.Link")
343
-
344
- futures.append(self.upload_single_element(upload_elem))
345
- return futures
346
-
347
- def upload_single_element(self, elem):
348
- """
349
- upload a signal element
350
- :param elem: UploadElement
351
- """
352
- self.num_files += 1
353
- self.i_item += 1
354
- self.pbar.total += 1
355
- self.reporter.upcount_num_workers()
356
- future = asyncio.run_coroutine_threadsafe(
357
- self.__upload_single_item_wrapper(element=elem,
358
- mode=self.mode,
359
- pbar=self.pbar,
360
- reporter=self.reporter),
361
- loop=self.items_repository._client_api.event_loop.loop)
362
- return future
363
-
364
- def _build_elements_from_df(self, df: pandas.DataFrame):
365
- futures = deque()
366
- for index, row in df.iterrows():
367
- # DEFAULTS
368
- elem = {'local_annotations_path': None,
369
- 'remote_path': None,
370
- 'remote_name': None,
371
- 'file_types': None,
372
- 'item_metadata': None,
373
- 'item_description': None}
374
- elem.update(row)
375
- future = self._build_elements_from_inputs(**elem)
376
- # append deque using +
377
- futures += future
378
- return futures
379
-
380
- async def __single_external_sync(self, element):
381
- storage_id = element.buffer.split('//')[1]
382
- req_json = dict()
383
- req_json['filename'] = element.remote_filepath
384
- req_json['storageId'] = storage_id
385
- success, response = self.items_repository._client_api.gen_request(req_type='post',
386
- path='/datasets/{}/imports'.format(
387
- self.items_repository.dataset.id),
388
- json_req=[req_json])
389
-
390
- if success:
391
- items = entities.Item.from_json(client_api=self.items_repository._client_api, _json=response.json()[0],
392
- project=self.items_repository._dataset._project,
393
- dataset=self.items_repository.dataset)
394
- else:
395
- raise exceptions.PlatformException(response)
396
- return items, response.headers.get('x-item-op', 'na')
397
-
398
- async def __single_async_upload(self,
399
- filepath,
400
- remote_path,
401
- uploaded_filename,
402
- last_try,
403
- mode,
404
- item_metadata,
405
- callback,
406
- item_description
407
- ):
408
- """
409
- Upload an item to dataset
410
-
411
- :param filepath: local filepath of the item
412
- :param remote_path: remote directory of filepath to upload
413
- :param uploaded_filename: optional - remote filename
414
- :param last_try: print log error only if last try
415
- :param mode: 'skip' 'overwrite'
416
- :param item_metadata: item metadata
417
- :param str item_description: add a string description to the uploaded item
418
- :param callback:
419
- :return: Item object
420
- """
421
-
422
- need_close = False
423
- if isinstance(filepath, str):
424
- # upload local file
425
- if not os.path.isfile(filepath):
426
- raise PlatformException(error="404", message="Filepath doesnt exists. file: {}".format(filepath))
427
- if uploaded_filename is None:
428
- uploaded_filename = os.path.basename(filepath)
429
- if os.path.isfile(filepath):
430
- item_type = 'file'
431
- else:
432
- item_type = 'dir'
433
- item_size = os.stat(filepath).st_size
434
- to_upload = open(filepath, 'rb')
435
- need_close = True
436
-
437
- else:
438
- # upload from buffer
439
- if isinstance(filepath, bytes):
440
- to_upload = io.BytesIO(filepath)
441
- elif isinstance(filepath, io.BytesIO):
442
- to_upload = filepath
443
- elif isinstance(filepath, io.BufferedReader):
444
- to_upload = filepath
445
- elif isinstance(filepath, io.TextIOWrapper):
446
- to_upload = filepath
447
- else:
448
- raise PlatformException("400", "Unknown input filepath type received: {}".format(type(filepath)))
449
-
450
- if uploaded_filename is None:
451
- if hasattr(filepath, "name"):
452
- uploaded_filename = filepath.name
453
- else:
454
- raise PlatformException(error="400",
455
- message="Must have filename when uploading bytes array (uploaded_filename)")
456
-
457
- item_size = to_upload.seek(0, 2)
458
- to_upload.seek(0)
459
- item_type = 'file'
460
- try:
461
- response = await self.items_repository._client_api.upload_file_async(to_upload=to_upload,
462
- item_type=item_type,
463
- item_size=item_size,
464
- item_metadata=item_metadata,
465
- remote_url=self.remote_url,
466
- uploaded_filename=uploaded_filename,
467
- remote_path=remote_path,
468
- callback=callback,
469
- mode=mode,
470
- item_description=item_description)
471
- except Exception:
472
- raise
473
- finally:
474
- if need_close:
475
- to_upload.close()
476
-
477
- if response.ok:
478
- if item_size != response.json().get('metadata', {}).get('system', {}).get('size', 0):
479
- self.items_repository.delete(item_id=response.json()['id'])
480
- raise PlatformException(500,
481
- "The uploaded file is corrupted. "
482
- "Please try again. If it happens again please contact support.")
483
- item = self.items_repository.items_entity.from_json(client_api=self.items_repository._client_api,
484
- _json=response.json(),
485
- dataset=self.items_repository.dataset)
486
- else:
487
- raise PlatformException(response)
488
- return item, response.headers.get('x-item-op', 'na')
489
-
490
- async def __upload_single_item_wrapper(self, element, pbar, reporter, mode):
491
- async with self.items_repository._client_api.event_loop.semaphore('items.upload', 5):
492
- # assert isinstance(element, UploadElement)
493
- item = False
494
- err = None
495
- trace = None
496
- saved_locally = False
497
- temp_dir = None
498
- action = 'na'
499
- remote_folder, remote_name = os.path.split(element.remote_filepath)
500
-
501
- if element.type == 'url':
502
- saved_locally, element.buffer, temp_dir = self.url_to_data(element.buffer)
503
- elif element.type == 'link':
504
- element.buffer = self.link(ref=element.buffer.ref, dataset_id=element.buffer.dataset_id,
505
- type=element.buffer.type, mimetype=element.buffer.mimetype)
506
-
507
- for i_try in range(NUM_TRIES):
508
- try:
509
- logger.debug("Upload item: {path}. Try {i}/{n}. Starting..".format(path=remote_name,
510
- i=i_try + 1,
511
- n=NUM_TRIES))
512
- if element.type == 'external_file':
513
- item, action = await self.__single_external_sync(element)
514
- else:
515
- if element.annotations_filepath is not None and \
516
- element.item_metadata == entities.ExportMetadata.FROM_JSON:
517
- element.item_metadata = {}
518
- with open(element.annotations_filepath) as ann_f:
519
- item_metadata = json.load(ann_f)
520
- if 'metadata' in item_metadata:
521
- element.item_metadata = item_metadata['metadata']
522
- item, action = await self.__single_async_upload(filepath=element.buffer,
523
- mode=mode,
524
- item_metadata=element.item_metadata,
525
- remote_path=remote_folder,
526
- uploaded_filename=remote_name,
527
- last_try=(i_try + 1) == NUM_TRIES,
528
- callback=None,
529
- item_description=element.item_description)
530
- logger.debug("Upload item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=remote_name,
531
- i=i_try + 1,
532
- n=NUM_TRIES,
533
- id=item.id))
534
- if isinstance(item, entities.Item):
535
- break
536
- time.sleep(0.3 * (2 ** i_try))
537
- except Exception as e:
538
- err = e
539
- trace = traceback.format_exc()
540
- logger.debug("Upload item: {path}. Try {i}/{n}. Fail.\n{trace}".format(path=remote_name,
541
- i=i_try + 1,
542
- n=NUM_TRIES,
543
- trace=trace))
544
-
545
- finally:
546
- if saved_locally and os.path.isdir(temp_dir):
547
- shutil.rmtree(temp_dir)
548
- if item:
549
- if action in ['overwrite', 'created'] and element.annotations_filepath is not None:
550
- try:
551
- await self.__async_upload_annotations(annotations_filepath=element.annotations_filepath,
552
- item=item)
553
- except Exception:
554
- logger.exception('Error uploading annotations to item id: {}'.format(item.id))
555
-
556
- reporter.set_index(status=action,
557
- output=item.to_json(),
558
- success=True,
559
- ref=item.id)
560
- if pbar is not None:
561
- pbar.update()
562
- self.items_repository._client_api.callbacks.run_on_event(
563
- event=self.items_repository._client_api.callbacks.CallbackEvent.ITEMS_UPLOAD,
564
- context={'item_id': item.id, 'dataset_id': item.dataset_id},
565
- progress=round(pbar.n / pbar.total * 100, 0))
566
- else:
567
- if isinstance(element.buffer, str):
568
- ref = element.buffer
569
- elif hasattr(element.buffer, "name"):
570
- ref = element.buffer.name
571
- else:
572
- ref = 'Unknown'
573
- reporter.set_index(ref=ref, status='error',
574
- success=False,
575
- error="{}\n{}".format(err, trace))
576
-
577
- async def __async_upload_annotations(self, annotations_filepath, item):
578
- with open(annotations_filepath, 'r', encoding="utf8") as f:
579
- annotations = json.load(f)
580
- # wait for coroutines on the current event loop
581
- return await item.annotations._async_upload_annotations(annotations=annotations['annotations'])
582
-
583
- @staticmethod
584
- def url_to_data(url):
585
- chunk_size = 8192
586
- max_size = 30000000
587
- temp_dir = None
588
-
589
- # This will download the binaries from the URL user provided
590
- prepared_request = requests.Request(method='GET', url=url).prepare()
591
- with requests.Session() as s:
592
- retry = Retry(
593
- total=3,
594
- read=3,
595
- connect=3,
596
- backoff_factor=1,
597
- )
598
- adapter = HTTPAdapter(max_retries=retry)
599
- s.mount('http://', adapter)
600
- s.mount('https://', adapter)
601
- response = s.send(request=prepared_request, stream=True)
602
-
603
- total_length = response.headers.get("content-length")
604
- save_locally = int(total_length) > max_size
605
-
606
- if save_locally:
607
- # save to file
608
- temp_dir = tempfile.mkdtemp()
609
- temp_path = os.path.join(temp_dir, url.split('/')[-1].split('?')[0])
610
- with open(temp_path, "wb") as f:
611
- for chunk in response.iter_content(chunk_size=chunk_size):
612
- if chunk: # filter out keep-alive new chunks
613
- f.write(chunk)
614
- # save to output variable
615
- data = temp_path
616
- else:
617
- # save as byte stream
618
- data = io.BytesIO()
619
- for chunk in response.iter_content(chunk_size=chunk_size):
620
- if chunk: # filter out keep-alive new chunks
621
- data.write(chunk)
622
- # go back to the beginning of the stream
623
- data.seek(0)
624
- data.name = url.split('/')[-1]
625
-
626
- return save_locally, data, temp_dir
627
-
628
- @staticmethod
629
- def is_url(url):
630
- try:
631
- return validators.url(url)
632
- except Exception:
633
- return False
634
-
635
- @staticmethod
636
- def link(ref, type, mimetype=None, dataset_id=None):
637
- """
638
- :param ref:
639
- :param type:
640
- :param mimetype:
641
- :param dataset_id:
642
- """
643
- link_info = {'type': type,
644
- 'ref': ref}
645
-
646
- if mimetype:
647
- link_info['mimetype'] = mimetype
648
-
649
- if dataset_id is not None:
650
- link_info['datasetId'] = dataset_id
651
-
652
- _json = {'type': 'link',
653
- 'shebang': 'dataloop',
654
- 'metadata': {'dltype': 'link',
655
- 'linkInfo': link_info}}
656
-
657
- uploaded_byte_io = io.BytesIO()
658
- uploaded_byte_io.write(json.dumps(_json).encode())
659
- uploaded_byte_io.seek(0)
660
-
661
- return uploaded_byte_io
1
+ import sys
2
+ from collections import deque
3
+ import validators
4
+ import traceback
5
+ import tempfile
6
+ import requests
7
+ import asyncio
8
+ import logging
9
+ import pandas
10
+ import shutil
11
+ import json
12
+ import time
13
+ import tqdm
14
+ import os
15
+ import io
16
+ import numpy as np
17
+ from requests.adapters import HTTPAdapter
18
+ from urllib3.util import Retry
19
+ from PIL import Image
20
+
21
+ from . import upload_element
22
+
23
+ from .. import PlatformException, entities, repositories, exceptions
24
+ from ..services import Reporter
25
+
26
+ logger = logging.getLogger(name='dtlpy')
27
+
28
+ NUM_TRIES = 5 # try to upload 3 time before fail on item
29
+
30
+
31
+ class Uploader:
32
+ def __init__(self, items_repository: repositories.Items, output_entity=entities.Item, no_output=False):
33
+ assert isinstance(items_repository, repositories.Items)
34
+ self.items_repository = items_repository
35
+ self.remote_url = "/datasets/{}/items".format(self.items_repository.dataset.id)
36
+ self.__stop_create_existence_dict = False
37
+ self.mode = 'skip'
38
+ self.num_files = 0
39
+ self.i_item = 0
40
+ self.pbar = tqdm.tqdm(total=0,
41
+ disable=self.items_repository._client_api.verbose.disable_progress_bar_upload_items,
42
+ file=sys.stdout, desc='Upload Items')
43
+ self.reporter = Reporter(num_workers=0,
44
+ resource=Reporter.ITEMS_UPLOAD,
45
+ print_error_logs=items_repository._client_api.verbose.print_error_logs,
46
+ output_entity=output_entity,
47
+ client_api=items_repository._client_api,
48
+ no_output=no_output)
49
+
50
+ def upload(
51
+ self,
52
+ # what to upload
53
+ local_path,
54
+ local_annotations_path=None,
55
+ # upload options
56
+ remote_path=None,
57
+ remote_name=None,
58
+ file_types=None,
59
+ overwrite=False,
60
+ item_metadata=None,
61
+ export_version: str = entities.ExportVersion.V1,
62
+ item_description=None,
63
+ raise_on_error=False,
64
+ return_as_list=False
65
+ ):
66
+ """
67
+ Upload local file to dataset.
68
+ Local filesystem will remain.
69
+ If `*` at the end of local_path (e.g. '/images/*') items will be uploaded without head directory
70
+
71
+ :param local_path: local file or folder to upload
72
+ :param local_annotations_path: path to Dataloop format annotations json files.
73
+ :param remote_path: remote path to save.
74
+ :param remote_name: remote base name to save.
75
+ :param file_types: list of file type to upload. e.g ['.jpg', '.png']. default is all
76
+ :param overwrite: optional - default = False
77
+ :param item_metadata: upload the items with the metadata dictionary
78
+ :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
79
+ :param str item_description: add a string description to the uploaded item
80
+ :param bool raise_on_error: raise an exception if an error occurs
81
+ :param bool return_as_list: always return a list of items
82
+
83
+ :return: Output (list)
84
+ """
85
+ ###################
86
+ # Default options #
87
+ ###################
88
+ if overwrite:
89
+ self.mode = 'overwrite'
90
+ if isinstance(local_path, pandas.DataFrame):
91
+ futures = self._build_elements_from_df(local_path)
92
+ else:
93
+ start_time = time.time()
94
+ logger.debug(f"Building elements from inputs started: start time: {start_time}")
95
+ futures = self._build_elements_from_inputs(local_path=local_path,
96
+ local_annotations_path=local_annotations_path,
97
+ # upload options
98
+ remote_path=remote_path,
99
+ remote_name=remote_name,
100
+ file_types=file_types,
101
+ item_metadata=item_metadata,
102
+ export_version=export_version,
103
+ item_description=item_description)
104
+ logger.debug(f"Building elements from inputs completed: time taken: {time.time() - start_time}")
105
+ num_files = len(futures)
106
+ while futures:
107
+ futures.popleft().result()
108
+ logger.info("Uploading {} items..".format(num_files))
109
+ self.pbar.close()
110
+ # summary
111
+ logger.info("Number of total files: {}".format(num_files))
112
+ status_list = self.reporter.status_list
113
+ for action in set(status_list):
114
+ n_for_action = self.reporter.status_count(status=action)
115
+ logger.info("Number of files {}: {}".format(action, n_for_action))
116
+
117
+ # log error
118
+ errors_count = self.reporter.failure_count
119
+ if errors_count > 0:
120
+ error_text = ""
121
+ log_filepath = self.reporter.generate_log_files()
122
+ # Get up to 5 error examples for the exception message
123
+ if self.reporter._errors:
124
+ error_examples = list(self.reporter._errors.values())[:5]
125
+ error_text = " | ".join(error_examples)
126
+ error_message = f"Errors in {errors_count} files. Errors: {error_text}"
127
+ if log_filepath is not None:
128
+ error_message += f", see {log_filepath} for full log"
129
+ if raise_on_error is True:
130
+ raise PlatformException(
131
+ error="400", message=error_message
132
+ )
133
+ else:
134
+ logger.warning(error_message)
135
+
136
+ if return_as_list is True:
137
+ # return list of items
138
+ return list(self.reporter.output)
139
+ if len(status_list) == 1:
140
+ # if there is only one item, return it
141
+ try:
142
+ return next(self.reporter.output)
143
+ except StopIteration:
144
+ # if there is no items, return None
145
+ return None
146
+ # if there are multiple items, return the generator
147
+ return self.reporter.output
148
+
149
+ def _build_elements_from_inputs(self,
150
+ local_path,
151
+ local_annotations_path,
152
+ # upload options
153
+ remote_path,
154
+ file_types,
155
+ remote_name,
156
+ item_metadata,
157
+ export_version: str = entities.ExportVersion.V1,
158
+ item_description=None):
159
+ # fix remote path
160
+ if remote_path is None:
161
+ if isinstance(local_path, str) and local_path.startswith('external://'):
162
+ remote_path = None
163
+ else:
164
+ remote_path = "/"
165
+ if remote_path and not remote_path.startswith('/'):
166
+ remote_path = f"/{remote_path}"
167
+ if remote_path and not remote_path.endswith("/"):
168
+ remote_path = f"{remote_path}/"
169
+
170
+ if remote_name:
171
+ remote_name = remote_name.lstrip('/')
172
+
173
+ if file_types is not None and not isinstance(file_types, list):
174
+ msg = '"file_types" should be a list of file extension. e.g [".jpg", ".png"]'
175
+ raise PlatformException(error="400", message=msg)
176
+ if item_metadata is not None:
177
+ if not isinstance(item_metadata, dict) and not isinstance(item_metadata, entities.ExportMetadata):
178
+ msg = '"item_metadata" should be a metadata dictionary. Got type: {}'.format(type(item_metadata))
179
+ raise PlatformException(error="400", message=msg)
180
+ if item_description is not None:
181
+ if not isinstance(item_description, str):
182
+ msg = '"item_description" should be a string. Got type: {}'.format(type(item_description))
183
+ raise PlatformException(error="400", message=msg)
184
+
185
+ ##########################
186
+ # Convert inputs to list #
187
+ ##########################
188
+ local_annotations_path_list = None
189
+ remote_name_list = None
190
+ if not isinstance(local_path, list):
191
+ local_path_list = [local_path]
192
+ if remote_name is not None:
193
+ if not isinstance(remote_name, str):
194
+ raise PlatformException(error="400",
195
+ message='remote_name must be a string, got: {}'.format(type(remote_name)))
196
+ remote_name_list = [remote_name]
197
+ if local_annotations_path is not None:
198
+ if not isinstance(local_annotations_path, str):
199
+ raise PlatformException(error="400",
200
+ message='local_annotations_path must be a string, got: {}'.format(
201
+ type(local_annotations_path)))
202
+ local_annotations_path_list = [local_annotations_path]
203
+ else:
204
+ local_path_list = local_path
205
+ if remote_name is not None:
206
+ if not isinstance(remote_name, list):
207
+ raise PlatformException(error="400",
208
+ message='remote_name must be a list, got: {}'.format(type(remote_name)))
209
+ if not len(remote_name) == len(local_path_list):
210
+ raise PlatformException(error="400",
211
+ message='remote_name and local_path_list must be of same length. '
212
+ 'Received: remote_name: {}, '
213
+ 'local_path_list: {}'.format(len(remote_name),
214
+ len(local_path_list)))
215
+ remote_name_list = remote_name
216
+ if local_annotations_path is not None:
217
+ if not len(local_annotations_path) == len(local_path_list):
218
+ raise PlatformException(error="400",
219
+ message='local_annotations_path and local_path_list must be of same lenght.'
220
+ ' Received: local_annotations_path: {}, '
221
+ 'local_path_list: {}'.format(len(local_annotations_path),
222
+ len(local_path_list)))
223
+ local_annotations_path_list = local_annotations_path
224
+
225
+ if local_annotations_path is None:
226
+ local_annotations_path_list = [None] * len(local_path_list)
227
+
228
+ if remote_name is None:
229
+ remote_name_list = [None] * len(local_path_list)
230
+
231
+ futures = deque()
232
+ total_size = 0
233
+ for upload_item_element, remote_name, upload_annotations_element in zip(local_path_list,
234
+ remote_name_list,
235
+ local_annotations_path_list):
236
+ if isinstance(upload_item_element, np.ndarray):
237
+ # convert numpy.ndarray to io.BytesI
238
+ if remote_name is None:
239
+ raise PlatformException(
240
+ error="400",
241
+ message='Upload element type was numpy.ndarray. providing param "remote_name" is mandatory')
242
+ file_extension = os.path.splitext(remote_name)
243
+ if file_extension[1].lower() in ['.jpg', '.jpeg']:
244
+ item_format = 'JPEG'
245
+ elif file_extension[1].lower() == '.png':
246
+ item_format = 'PNG'
247
+ else:
248
+ raise PlatformException(
249
+ error="400",
250
+ message='"remote_name" with .jpg/.jpeg or .png extension are supported '
251
+ 'when upload element of numpy.ndarray type.')
252
+
253
+ buffer = io.BytesIO()
254
+ Image.fromarray(upload_item_element).save(buffer, format=item_format)
255
+ buffer.seek(0)
256
+ buffer.name = remote_name
257
+ upload_item_element = buffer
258
+
259
+ all_upload_elements = {
260
+ 'upload_item_element': upload_item_element,
261
+ 'total_size': total_size,
262
+ 'remote_name': remote_name,
263
+ 'remote_path': remote_path,
264
+ 'upload_annotations_element': upload_annotations_element,
265
+ 'item_metadata': item_metadata,
266
+ 'annotations_filepath': None,
267
+ 'with_head_folder': None,
268
+ 'filename': None,
269
+ 'root': None,
270
+ 'export_version': export_version,
271
+ 'item_description': item_description,
272
+ 'driver_path': None
273
+ }
274
+ if isinstance(upload_item_element, str):
275
+ with_head_folder = True
276
+ if upload_item_element.endswith('*'):
277
+ with_head_folder = False
278
+ upload_item_element = os.path.dirname(upload_item_element)
279
+ all_upload_elements['upload_item_element'] = upload_item_element
280
+
281
+ if os.path.isdir(upload_item_element):
282
+ for root, subdirs, files in os.walk(upload_item_element):
283
+ for filename in files:
284
+ all_upload_elements['with_head_folder'] = with_head_folder
285
+ all_upload_elements['filename'] = filename
286
+ all_upload_elements['root'] = root
287
+ _, ext = os.path.splitext(filename)
288
+ if file_types is None or ext in file_types:
289
+ upload_elem = upload_element.DirUploadElement(all_upload_elements=all_upload_elements)
290
+ futures.append(self.upload_single_element(upload_elem))
291
+ continue
292
+
293
+ # add single file
294
+ elif os.path.isfile(upload_item_element):
295
+ upload_elem = upload_element.FileUploadElement(all_upload_elements=all_upload_elements)
296
+
297
+ elif upload_item_element.startswith('external://'):
298
+ try:
299
+ driver_path = repositories.Drivers.get(driver_id=self.items_repository.dataset.driver).path
300
+ all_upload_elements['driver_path'] = driver_path
301
+ except Exception:
302
+ logger.error("Attempting to upload external item without driver path. This may cause issues.")
303
+ upload_elem = upload_element.ExternalItemUploadElement(all_upload_elements=all_upload_elements)
304
+
305
+ elif self.is_url(upload_item_element):
306
+ upload_elem = upload_element.UrlUploadElement(all_upload_elements=all_upload_elements)
307
+
308
+ else:
309
+ raise PlatformException("404", "Unknown local path: {}".format(local_path))
310
+
311
+ elif isinstance(upload_item_element, entities.Item):
312
+ upload_elem = upload_element.ItemLinkUploadElement(all_upload_elements=all_upload_elements)
313
+
314
+ elif isinstance(upload_item_element, entities.Link):
315
+ upload_elem = upload_element.LinkUploadElement(all_upload_elements=all_upload_elements)
316
+
317
+ elif isinstance(upload_item_element, entities.PromptItem):
318
+ upload_elem = upload_element.PromptUploadElement(all_upload_elements=all_upload_elements)
319
+
320
+ elif isinstance(upload_item_element, entities.ItemGis):
321
+ buffer = io.BytesIO(json.dumps(upload_item_element.to_json()).encode('utf-8'))
322
+ buffer.name = upload_item_element.name
323
+ all_upload_elements['upload_item_element'] = buffer
324
+ upload_elem = upload_element.BinaryUploadElement(all_upload_elements=all_upload_elements)
325
+
326
+ elif isinstance(upload_item_element, bytes) or \
327
+ isinstance(upload_item_element, io.BytesIO) or \
328
+ isinstance(upload_item_element, io.BufferedReader) or \
329
+ isinstance(upload_item_element, io.TextIOWrapper):
330
+ upload_elem = upload_element.BinaryUploadElement(all_upload_elements=all_upload_elements)
331
+ # get size from binaries
332
+ try:
333
+ total_size += upload_item_element.__sizeof__()
334
+ except Exception:
335
+ logger.warning("Cant get binaries size")
336
+
337
+ else:
338
+ raise PlatformException(
339
+ error="400",
340
+ message=f"Unknown element type to upload ('local_path'). received type: {type(upload_item_element)}. "
341
+ "known types (or list of those types): str (dir, file, url), bytes, io.BytesIO, "
342
+ "numpy.ndarray, io.TextIOWrapper, Dataloop.Item, Dataloop.Link")
343
+
344
+ futures.append(self.upload_single_element(upload_elem))
345
+ return futures
346
+
347
+ def upload_single_element(self, elem):
348
+ """
349
+ upload a signal element
350
+ :param elem: UploadElement
351
+ """
352
+ self.num_files += 1
353
+ self.i_item += 1
354
+ self.pbar.total += 1
355
+ self.reporter.upcount_num_workers()
356
+ future = asyncio.run_coroutine_threadsafe(
357
+ self.__upload_single_item_wrapper(element=elem,
358
+ mode=self.mode,
359
+ pbar=self.pbar,
360
+ reporter=self.reporter),
361
+ loop=self.items_repository._client_api.event_loop.loop)
362
+ return future
363
+
364
+ def _build_elements_from_df(self, df: pandas.DataFrame):
365
+ futures = deque()
366
+ for index, row in df.iterrows():
367
+ # DEFAULTS
368
+ elem = {'local_annotations_path': None,
369
+ 'remote_path': None,
370
+ 'remote_name': None,
371
+ 'file_types': None,
372
+ 'item_metadata': None,
373
+ 'item_description': None}
374
+ elem.update(row)
375
+ future = self._build_elements_from_inputs(**elem)
376
+ # append deque using +
377
+ futures += future
378
+ return futures
379
+
380
+ async def __single_external_sync(self, element):
381
+ storage_id = element.buffer.split('//')[1]
382
+ req_json = dict()
383
+ req_json['filename'] = element.remote_filepath
384
+ req_json['storageId'] = storage_id
385
+ success, response = self.items_repository._client_api.gen_request(req_type='post',
386
+ path='/datasets/{}/imports'.format(
387
+ self.items_repository.dataset.id),
388
+ json_req=[req_json])
389
+
390
+ if success:
391
+ items = entities.Item.from_json(client_api=self.items_repository._client_api, _json=response.json()[0],
392
+ project=self.items_repository._dataset._project,
393
+ dataset=self.items_repository.dataset)
394
+ else:
395
+ raise exceptions.PlatformException(response)
396
+ return items, response.headers.get('x-item-op', 'na')
397
+
398
+ async def __single_async_upload(self,
399
+ filepath,
400
+ remote_path,
401
+ uploaded_filename,
402
+ last_try,
403
+ mode,
404
+ item_metadata,
405
+ callback,
406
+ item_description
407
+ ):
408
+ """
409
+ Upload an item to dataset
410
+
411
+ :param filepath: local filepath of the item
412
+ :param remote_path: remote directory of filepath to upload
413
+ :param uploaded_filename: optional - remote filename
414
+ :param last_try: print log error only if last try
415
+ :param mode: 'skip' 'overwrite'
416
+ :param item_metadata: item metadata
417
+ :param str item_description: add a string description to the uploaded item
418
+ :param callback:
419
+ :return: Item object
420
+ """
421
+
422
+ need_close = False
423
+ if isinstance(filepath, str):
424
+ # upload local file
425
+ if not os.path.isfile(filepath):
426
+ raise PlatformException(error="404", message="Filepath doesnt exists. file: {}".format(filepath))
427
+ if uploaded_filename is None:
428
+ uploaded_filename = os.path.basename(filepath)
429
+ if os.path.isfile(filepath):
430
+ item_type = 'file'
431
+ else:
432
+ item_type = 'dir'
433
+ item_size = os.stat(filepath).st_size
434
+ to_upload = open(filepath, 'rb')
435
+ need_close = True
436
+
437
+ else:
438
+ # upload from buffer
439
+ if isinstance(filepath, bytes):
440
+ to_upload = io.BytesIO(filepath)
441
+ elif isinstance(filepath, io.BytesIO):
442
+ to_upload = filepath
443
+ elif isinstance(filepath, io.BufferedReader):
444
+ to_upload = filepath
445
+ elif isinstance(filepath, io.TextIOWrapper):
446
+ to_upload = filepath
447
+ else:
448
+ raise PlatformException("400", "Unknown input filepath type received: {}".format(type(filepath)))
449
+
450
+ if uploaded_filename is None:
451
+ if hasattr(filepath, "name"):
452
+ uploaded_filename = filepath.name
453
+ else:
454
+ raise PlatformException(error="400",
455
+ message="Must have filename when uploading bytes array (uploaded_filename)")
456
+
457
+ item_size = to_upload.seek(0, 2)
458
+ to_upload.seek(0)
459
+ item_type = 'file'
460
+ try:
461
+ response = await self.items_repository._client_api.upload_file_async(to_upload=to_upload,
462
+ item_type=item_type,
463
+ item_size=item_size,
464
+ item_metadata=item_metadata,
465
+ remote_url=self.remote_url,
466
+ uploaded_filename=uploaded_filename,
467
+ remote_path=remote_path,
468
+ callback=callback,
469
+ mode=mode,
470
+ item_description=item_description)
471
+ except Exception:
472
+ raise
473
+ finally:
474
+ if need_close:
475
+ to_upload.close()
476
+
477
+ if response.ok:
478
+ if item_size != response.json().get('metadata', {}).get('system', {}).get('size', 0):
479
+ self.items_repository.delete(item_id=response.json()['id'])
480
+ raise PlatformException(500,
481
+ "The uploaded file is corrupted. "
482
+ "Please try again. If it happens again please contact support.")
483
+ item = self.items_repository.items_entity.from_json(client_api=self.items_repository._client_api,
484
+ _json=response.json(),
485
+ dataset=self.items_repository.dataset)
486
+ else:
487
+ raise PlatformException(response)
488
+ return item, response.headers.get('x-item-op', 'na')
489
+
490
+ async def __upload_single_item_wrapper(self, element, pbar, reporter, mode):
491
+ async with self.items_repository._client_api.event_loop.semaphore('items.upload', 5):
492
+ # assert isinstance(element, UploadElement)
493
+ item = False
494
+ err = None
495
+ trace = None
496
+ saved_locally = False
497
+ temp_dir = None
498
+ action = 'na'
499
+ remote_folder, remote_name = os.path.split(element.remote_filepath)
500
+
501
+ if element.type == 'url':
502
+ saved_locally, element.buffer, temp_dir = self.url_to_data(element.buffer)
503
+ elif element.type == 'link':
504
+ element.buffer = self.link(ref=element.buffer.ref, dataset_id=element.buffer.dataset_id,
505
+ type=element.buffer.type, mimetype=element.buffer.mimetype)
506
+
507
+ for i_try in range(NUM_TRIES):
508
+ try:
509
+ logger.debug("Upload item: {path}. Try {i}/{n}. Starting..".format(path=remote_name,
510
+ i=i_try + 1,
511
+ n=NUM_TRIES))
512
+ if element.type == 'external_file':
513
+ item, action = await self.__single_external_sync(element)
514
+ else:
515
+ if element.annotations_filepath is not None and \
516
+ element.item_metadata == entities.ExportMetadata.FROM_JSON:
517
+ element.item_metadata = {}
518
+ with open(element.annotations_filepath) as ann_f:
519
+ item_metadata = json.load(ann_f)
520
+ if 'metadata' in item_metadata:
521
+ element.item_metadata = item_metadata['metadata']
522
+ item, action = await self.__single_async_upload(filepath=element.buffer,
523
+ mode=mode,
524
+ item_metadata=element.item_metadata,
525
+ remote_path=remote_folder,
526
+ uploaded_filename=remote_name,
527
+ last_try=(i_try + 1) == NUM_TRIES,
528
+ callback=None,
529
+ item_description=element.item_description)
530
+ logger.debug("Upload item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=remote_name,
531
+ i=i_try + 1,
532
+ n=NUM_TRIES,
533
+ id=item.id))
534
+ if isinstance(item, entities.Item):
535
+ break
536
+ time.sleep(0.3 * (2 ** i_try))
537
+ except Exception as e:
538
+ err = e
539
+ trace = traceback.format_exc()
540
+ logger.debug("Upload item: {path}. Try {i}/{n}. Fail.\n{trace}".format(path=remote_name,
541
+ i=i_try + 1,
542
+ n=NUM_TRIES,
543
+ trace=trace))
544
+
545
+ finally:
546
+ if saved_locally and os.path.isdir(temp_dir):
547
+ shutil.rmtree(temp_dir)
548
+ if item:
549
+ if action in ['overwrite', 'created'] and element.annotations_filepath is not None:
550
+ try:
551
+ await self.__async_upload_annotations(annotations_filepath=element.annotations_filepath,
552
+ item=item)
553
+ except Exception:
554
+ logger.exception('Error uploading annotations to item id: {}'.format(item.id))
555
+
556
+ reporter.set_index(status=action,
557
+ output=item.to_json(),
558
+ success=True,
559
+ ref=item.id)
560
+ if pbar is not None:
561
+ pbar.update()
562
+ self.items_repository._client_api.callbacks.run_on_event(
563
+ event=self.items_repository._client_api.callbacks.CallbackEvent.ITEMS_UPLOAD,
564
+ context={'item_id': item.id, 'dataset_id': item.dataset_id},
565
+ progress=round(pbar.n / pbar.total * 100, 0))
566
+ else:
567
+ if isinstance(element.buffer, str):
568
+ ref = element.buffer
569
+ elif hasattr(element.buffer, "name"):
570
+ ref = element.buffer.name
571
+ else:
572
+ ref = 'Unknown'
573
+ reporter.set_index(ref=ref, status='error',
574
+ success=False,
575
+ error="{}\n{}".format(err, trace))
576
+
577
+ async def __async_upload_annotations(self, annotations_filepath, item):
578
+ with open(annotations_filepath, 'r', encoding="utf8") as f:
579
+ annotations = json.load(f)
580
+ # wait for coroutines on the current event loop
581
+ return await item.annotations._async_upload_annotations(annotations=annotations['annotations'])
582
+
583
+ @staticmethod
584
+ def url_to_data(url):
585
+ chunk_size = 8192
586
+ max_size = 30000000
587
+ temp_dir = None
588
+
589
+ # This will download the binaries from the URL user provided
590
+ prepared_request = requests.Request(method='GET', url=url).prepare()
591
+ with requests.Session() as s:
592
+ retry = Retry(
593
+ total=3,
594
+ read=3,
595
+ connect=3,
596
+ backoff_factor=1,
597
+ )
598
+ adapter = HTTPAdapter(max_retries=retry)
599
+ s.mount('http://', adapter)
600
+ s.mount('https://', adapter)
601
+ response = s.send(request=prepared_request, stream=True)
602
+
603
+ total_length = response.headers.get("content-length")
604
+ save_locally = int(total_length) > max_size
605
+
606
+ if save_locally:
607
+ # save to file
608
+ temp_dir = tempfile.mkdtemp()
609
+ temp_path = os.path.join(temp_dir, url.split('/')[-1].split('?')[0])
610
+ with open(temp_path, "wb") as f:
611
+ for chunk in response.iter_content(chunk_size=chunk_size):
612
+ if chunk: # filter out keep-alive new chunks
613
+ f.write(chunk)
614
+ # save to output variable
615
+ data = temp_path
616
+ else:
617
+ # save as byte stream
618
+ data = io.BytesIO()
619
+ for chunk in response.iter_content(chunk_size=chunk_size):
620
+ if chunk: # filter out keep-alive new chunks
621
+ data.write(chunk)
622
+ # go back to the beginning of the stream
623
+ data.seek(0)
624
+ data.name = url.split('/')[-1]
625
+
626
+ return save_locally, data, temp_dir
627
+
628
+ @staticmethod
629
+ def is_url(url):
630
+ try:
631
+ return validators.url(url)
632
+ except Exception:
633
+ return False
634
+
635
+ @staticmethod
636
+ def link(ref, type, mimetype=None, dataset_id=None):
637
+ """
638
+ :param ref:
639
+ :param type:
640
+ :param mimetype:
641
+ :param dataset_id:
642
+ """
643
+ link_info = {'type': type,
644
+ 'ref': ref}
645
+
646
+ if mimetype:
647
+ link_info['mimetype'] = mimetype
648
+
649
+ if dataset_id is not None:
650
+ link_info['datasetId'] = dataset_id
651
+
652
+ _json = {'type': 'link',
653
+ 'shebang': 'dataloop',
654
+ 'metadata': {'dltype': 'link',
655
+ 'linkInfo': link_info}}
656
+
657
+ uploaded_byte_io = io.BytesIO()
658
+ uploaded_byte_io.write(json.dumps(_json).encode())
659
+ uploaded_byte_io.seek(0)
660
+
661
+ return uploaded_byte_io