dtlpy 1.114.17__py3-none-any.whl → 1.116.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtlpy/__init__.py +491 -491
- dtlpy/__version__.py +1 -1
- dtlpy/assets/__init__.py +26 -26
- dtlpy/assets/code_server/config.yaml +2 -2
- dtlpy/assets/code_server/installation.sh +24 -24
- dtlpy/assets/code_server/launch.json +13 -13
- dtlpy/assets/code_server/settings.json +2 -2
- dtlpy/assets/main.py +53 -53
- dtlpy/assets/main_partial.py +18 -18
- dtlpy/assets/mock.json +11 -11
- dtlpy/assets/model_adapter.py +83 -83
- dtlpy/assets/package.json +61 -61
- dtlpy/assets/package_catalog.json +29 -29
- dtlpy/assets/package_gitignore +307 -307
- dtlpy/assets/service_runners/__init__.py +33 -33
- dtlpy/assets/service_runners/converter.py +96 -96
- dtlpy/assets/service_runners/multi_method.py +49 -49
- dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
- dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
- dtlpy/assets/service_runners/multi_method_item.py +52 -52
- dtlpy/assets/service_runners/multi_method_json.py +52 -52
- dtlpy/assets/service_runners/single_method.py +37 -37
- dtlpy/assets/service_runners/single_method_annotation.py +43 -43
- dtlpy/assets/service_runners/single_method_dataset.py +43 -43
- dtlpy/assets/service_runners/single_method_item.py +41 -41
- dtlpy/assets/service_runners/single_method_json.py +42 -42
- dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
- dtlpy/assets/voc_annotation_template.xml +23 -23
- dtlpy/caches/base_cache.py +32 -32
- dtlpy/caches/cache.py +473 -473
- dtlpy/caches/dl_cache.py +201 -201
- dtlpy/caches/filesystem_cache.py +89 -89
- dtlpy/caches/redis_cache.py +84 -84
- dtlpy/dlp/__init__.py +20 -20
- dtlpy/dlp/cli_utilities.py +367 -367
- dtlpy/dlp/command_executor.py +764 -764
- dtlpy/dlp/dlp +1 -1
- dtlpy/dlp/dlp.bat +1 -1
- dtlpy/dlp/dlp.py +128 -128
- dtlpy/dlp/parser.py +651 -651
- dtlpy/entities/__init__.py +83 -83
- dtlpy/entities/analytic.py +347 -311
- dtlpy/entities/annotation.py +1879 -1879
- dtlpy/entities/annotation_collection.py +699 -699
- dtlpy/entities/annotation_definitions/__init__.py +20 -20
- dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
- dtlpy/entities/annotation_definitions/box.py +195 -195
- dtlpy/entities/annotation_definitions/classification.py +67 -67
- dtlpy/entities/annotation_definitions/comparison.py +72 -72
- dtlpy/entities/annotation_definitions/cube.py +204 -204
- dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
- dtlpy/entities/annotation_definitions/description.py +32 -32
- dtlpy/entities/annotation_definitions/ellipse.py +124 -124
- dtlpy/entities/annotation_definitions/free_text.py +62 -62
- dtlpy/entities/annotation_definitions/gis.py +69 -69
- dtlpy/entities/annotation_definitions/note.py +139 -139
- dtlpy/entities/annotation_definitions/point.py +117 -117
- dtlpy/entities/annotation_definitions/polygon.py +182 -182
- dtlpy/entities/annotation_definitions/polyline.py +111 -111
- dtlpy/entities/annotation_definitions/pose.py +92 -92
- dtlpy/entities/annotation_definitions/ref_image.py +86 -86
- dtlpy/entities/annotation_definitions/segmentation.py +240 -240
- dtlpy/entities/annotation_definitions/subtitle.py +34 -34
- dtlpy/entities/annotation_definitions/text.py +85 -85
- dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
- dtlpy/entities/app.py +220 -220
- dtlpy/entities/app_module.py +107 -107
- dtlpy/entities/artifact.py +174 -174
- dtlpy/entities/assignment.py +399 -399
- dtlpy/entities/base_entity.py +214 -214
- dtlpy/entities/bot.py +113 -113
- dtlpy/entities/codebase.py +292 -296
- dtlpy/entities/collection.py +38 -38
- dtlpy/entities/command.py +169 -169
- dtlpy/entities/compute.py +449 -442
- dtlpy/entities/dataset.py +1299 -1285
- dtlpy/entities/directory_tree.py +44 -44
- dtlpy/entities/dpk.py +470 -470
- dtlpy/entities/driver.py +235 -223
- dtlpy/entities/execution.py +397 -397
- dtlpy/entities/feature.py +124 -124
- dtlpy/entities/feature_set.py +145 -145
- dtlpy/entities/filters.py +798 -645
- dtlpy/entities/gis_item.py +107 -107
- dtlpy/entities/integration.py +184 -184
- dtlpy/entities/item.py +959 -953
- dtlpy/entities/label.py +123 -123
- dtlpy/entities/links.py +85 -85
- dtlpy/entities/message.py +175 -175
- dtlpy/entities/model.py +684 -684
- dtlpy/entities/node.py +1005 -1005
- dtlpy/entities/ontology.py +810 -803
- dtlpy/entities/organization.py +287 -287
- dtlpy/entities/package.py +657 -657
- dtlpy/entities/package_defaults.py +5 -5
- dtlpy/entities/package_function.py +185 -185
- dtlpy/entities/package_module.py +113 -113
- dtlpy/entities/package_slot.py +118 -118
- dtlpy/entities/paged_entities.py +299 -299
- dtlpy/entities/pipeline.py +624 -624
- dtlpy/entities/pipeline_execution.py +279 -279
- dtlpy/entities/project.py +394 -394
- dtlpy/entities/prompt_item.py +505 -499
- dtlpy/entities/recipe.py +301 -301
- dtlpy/entities/reflect_dict.py +102 -102
- dtlpy/entities/resource_execution.py +138 -138
- dtlpy/entities/service.py +963 -958
- dtlpy/entities/service_driver.py +117 -117
- dtlpy/entities/setting.py +294 -294
- dtlpy/entities/task.py +495 -495
- dtlpy/entities/time_series.py +143 -143
- dtlpy/entities/trigger.py +426 -426
- dtlpy/entities/user.py +118 -118
- dtlpy/entities/webhook.py +124 -124
- dtlpy/examples/__init__.py +19 -19
- dtlpy/examples/add_labels.py +135 -135
- dtlpy/examples/add_metadata_to_item.py +21 -21
- dtlpy/examples/annotate_items_using_model.py +65 -65
- dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
- dtlpy/examples/annotations_convert_to_voc.py +9 -9
- dtlpy/examples/annotations_convert_to_yolo.py +9 -9
- dtlpy/examples/convert_annotation_types.py +51 -51
- dtlpy/examples/converter.py +143 -143
- dtlpy/examples/copy_annotations.py +22 -22
- dtlpy/examples/copy_folder.py +31 -31
- dtlpy/examples/create_annotations.py +51 -51
- dtlpy/examples/create_video_annotations.py +83 -83
- dtlpy/examples/delete_annotations.py +26 -26
- dtlpy/examples/filters.py +113 -113
- dtlpy/examples/move_item.py +23 -23
- dtlpy/examples/play_video_annotation.py +13 -13
- dtlpy/examples/show_item_and_mask.py +53 -53
- dtlpy/examples/triggers.py +49 -49
- dtlpy/examples/upload_batch_of_items.py +20 -20
- dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
- dtlpy/examples/upload_items_with_modalities.py +43 -43
- dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
- dtlpy/examples/upload_yolo_format_annotations.py +70 -70
- dtlpy/exceptions.py +125 -125
- dtlpy/miscellaneous/__init__.py +20 -20
- dtlpy/miscellaneous/dict_differ.py +95 -95
- dtlpy/miscellaneous/git_utils.py +217 -217
- dtlpy/miscellaneous/json_utils.py +14 -14
- dtlpy/miscellaneous/list_print.py +105 -105
- dtlpy/miscellaneous/zipping.py +130 -130
- dtlpy/ml/__init__.py +20 -20
- dtlpy/ml/base_feature_extractor_adapter.py +27 -27
- dtlpy/ml/base_model_adapter.py +1257 -1086
- dtlpy/ml/metrics.py +461 -461
- dtlpy/ml/predictions_utils.py +274 -274
- dtlpy/ml/summary_writer.py +57 -57
- dtlpy/ml/train_utils.py +60 -60
- dtlpy/new_instance.py +252 -252
- dtlpy/repositories/__init__.py +56 -56
- dtlpy/repositories/analytics.py +85 -85
- dtlpy/repositories/annotations.py +916 -916
- dtlpy/repositories/apps.py +383 -383
- dtlpy/repositories/artifacts.py +452 -452
- dtlpy/repositories/assignments.py +599 -599
- dtlpy/repositories/bots.py +213 -213
- dtlpy/repositories/codebases.py +559 -559
- dtlpy/repositories/collections.py +332 -332
- dtlpy/repositories/commands.py +152 -158
- dtlpy/repositories/compositions.py +61 -61
- dtlpy/repositories/computes.py +439 -435
- dtlpy/repositories/datasets.py +1504 -1291
- dtlpy/repositories/downloader.py +976 -903
- dtlpy/repositories/dpks.py +433 -433
- dtlpy/repositories/drivers.py +482 -470
- dtlpy/repositories/executions.py +815 -817
- dtlpy/repositories/feature_sets.py +226 -226
- dtlpy/repositories/features.py +255 -238
- dtlpy/repositories/integrations.py +484 -484
- dtlpy/repositories/items.py +912 -909
- dtlpy/repositories/messages.py +94 -94
- dtlpy/repositories/models.py +1000 -988
- dtlpy/repositories/nodes.py +80 -80
- dtlpy/repositories/ontologies.py +511 -511
- dtlpy/repositories/organizations.py +525 -525
- dtlpy/repositories/packages.py +1941 -1941
- dtlpy/repositories/pipeline_executions.py +451 -451
- dtlpy/repositories/pipelines.py +640 -640
- dtlpy/repositories/projects.py +539 -539
- dtlpy/repositories/recipes.py +419 -399
- dtlpy/repositories/resource_executions.py +137 -137
- dtlpy/repositories/schema.py +120 -120
- dtlpy/repositories/service_drivers.py +213 -213
- dtlpy/repositories/services.py +1704 -1704
- dtlpy/repositories/settings.py +339 -339
- dtlpy/repositories/tasks.py +1477 -1477
- dtlpy/repositories/times_series.py +278 -278
- dtlpy/repositories/triggers.py +536 -536
- dtlpy/repositories/upload_element.py +257 -257
- dtlpy/repositories/uploader.py +661 -651
- dtlpy/repositories/webhooks.py +249 -249
- dtlpy/services/__init__.py +22 -22
- dtlpy/services/aihttp_retry.py +131 -131
- dtlpy/services/api_client.py +1785 -1782
- dtlpy/services/api_reference.py +40 -40
- dtlpy/services/async_utils.py +133 -133
- dtlpy/services/calls_counter.py +44 -44
- dtlpy/services/check_sdk.py +68 -68
- dtlpy/services/cookie.py +115 -115
- dtlpy/services/create_logger.py +156 -156
- dtlpy/services/events.py +84 -84
- dtlpy/services/logins.py +235 -235
- dtlpy/services/reporter.py +256 -256
- dtlpy/services/service_defaults.py +91 -91
- dtlpy/utilities/__init__.py +20 -20
- dtlpy/utilities/annotations/__init__.py +16 -16
- dtlpy/utilities/annotations/annotation_converters.py +269 -269
- dtlpy/utilities/base_package_runner.py +285 -264
- dtlpy/utilities/converter.py +1650 -1650
- dtlpy/utilities/dataset_generators/__init__.py +1 -1
- dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
- dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
- dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
- dtlpy/utilities/local_development/__init__.py +1 -1
- dtlpy/utilities/local_development/local_session.py +179 -179
- dtlpy/utilities/reports/__init__.py +2 -2
- dtlpy/utilities/reports/figures.py +343 -343
- dtlpy/utilities/reports/report.py +71 -71
- dtlpy/utilities/videos/__init__.py +17 -17
- dtlpy/utilities/videos/video_player.py +598 -598
- dtlpy/utilities/videos/videos.py +470 -470
- {dtlpy-1.114.17.data → dtlpy-1.116.6.data}/scripts/dlp +1 -1
- dtlpy-1.116.6.data/scripts/dlp.bat +2 -0
- {dtlpy-1.114.17.data → dtlpy-1.116.6.data}/scripts/dlp.py +128 -128
- {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/METADATA +186 -183
- dtlpy-1.116.6.dist-info/RECORD +239 -0
- {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/WHEEL +1 -1
- {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/licenses/LICENSE +200 -200
- tests/features/environment.py +551 -551
- dtlpy/assets/__pycache__/__init__.cpython-310.pyc +0 -0
- dtlpy-1.114.17.data/scripts/dlp.bat +0 -2
- dtlpy-1.114.17.dist-info/RECORD +0 -240
- {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/entry_points.txt +0 -0
- {dtlpy-1.114.17.dist-info → dtlpy-1.116.6.dist-info}/top_level.txt +0 -0
dtlpy/repositories/uploader.py
CHANGED
|
@@ -1,651 +1,661 @@
|
|
|
1
|
-
import sys
|
|
2
|
-
from collections import deque
|
|
3
|
-
import validators
|
|
4
|
-
import traceback
|
|
5
|
-
import tempfile
|
|
6
|
-
import requests
|
|
7
|
-
import asyncio
|
|
8
|
-
import logging
|
|
9
|
-
import pandas
|
|
10
|
-
import shutil
|
|
11
|
-
import json
|
|
12
|
-
import time
|
|
13
|
-
import tqdm
|
|
14
|
-
import os
|
|
15
|
-
import io
|
|
16
|
-
import numpy as np
|
|
17
|
-
from requests.adapters import HTTPAdapter
|
|
18
|
-
from urllib3.util import Retry
|
|
19
|
-
from PIL import Image
|
|
20
|
-
|
|
21
|
-
from . import upload_element
|
|
22
|
-
|
|
23
|
-
from .. import PlatformException, entities, repositories, exceptions
|
|
24
|
-
from ..services import Reporter
|
|
25
|
-
|
|
26
|
-
logger = logging.getLogger(name='dtlpy')
|
|
27
|
-
|
|
28
|
-
NUM_TRIES = 5 # try to upload 3 time before fail on item
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class Uploader:
|
|
32
|
-
def __init__(self, items_repository: repositories.Items, output_entity=entities.Item, no_output=False):
|
|
33
|
-
assert isinstance(items_repository, repositories.Items)
|
|
34
|
-
self.items_repository = items_repository
|
|
35
|
-
self.remote_url = "/datasets/{}/items".format(self.items_repository.dataset.id)
|
|
36
|
-
self.__stop_create_existence_dict = False
|
|
37
|
-
self.mode = 'skip'
|
|
38
|
-
self.num_files = 0
|
|
39
|
-
self.i_item = 0
|
|
40
|
-
self.pbar = tqdm.tqdm(total=0,
|
|
41
|
-
disable=self.items_repository._client_api.verbose.disable_progress_bar_upload_items,
|
|
42
|
-
file=sys.stdout, desc='Upload Items')
|
|
43
|
-
self.reporter = Reporter(num_workers=0,
|
|
44
|
-
resource=Reporter.ITEMS_UPLOAD,
|
|
45
|
-
print_error_logs=items_repository._client_api.verbose.print_error_logs,
|
|
46
|
-
output_entity=output_entity,
|
|
47
|
-
client_api=items_repository._client_api,
|
|
48
|
-
no_output=no_output)
|
|
49
|
-
|
|
50
|
-
def upload(
|
|
51
|
-
self,
|
|
52
|
-
# what to upload
|
|
53
|
-
local_path,
|
|
54
|
-
local_annotations_path=None,
|
|
55
|
-
# upload options
|
|
56
|
-
remote_path=None,
|
|
57
|
-
remote_name=None,
|
|
58
|
-
file_types=None,
|
|
59
|
-
overwrite=False,
|
|
60
|
-
item_metadata=None,
|
|
61
|
-
export_version: str = entities.ExportVersion.V1,
|
|
62
|
-
item_description=None,
|
|
63
|
-
raise_on_error=False,
|
|
64
|
-
return_as_list=False
|
|
65
|
-
):
|
|
66
|
-
"""
|
|
67
|
-
Upload local file to dataset.
|
|
68
|
-
Local filesystem will remain.
|
|
69
|
-
If `*` at the end of local_path (e.g. '/images/*') items will be uploaded without head directory
|
|
70
|
-
|
|
71
|
-
:param local_path: local file or folder to upload
|
|
72
|
-
:param local_annotations_path: path to Dataloop format annotations json files.
|
|
73
|
-
:param remote_path: remote path to save.
|
|
74
|
-
:param remote_name: remote base name to save.
|
|
75
|
-
:param file_types: list of file type to upload. e.g ['.jpg', '.png']. default is all
|
|
76
|
-
:param overwrite: optional - default = False
|
|
77
|
-
:param item_metadata: upload the items with the metadata dictionary
|
|
78
|
-
:param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
79
|
-
:param str item_description: add a string description to the uploaded item
|
|
80
|
-
:param bool raise_on_error: raise an exception if an error occurs
|
|
81
|
-
:param bool return_as_list: always return a list of items
|
|
82
|
-
|
|
83
|
-
:return: Output (list)
|
|
84
|
-
"""
|
|
85
|
-
###################
|
|
86
|
-
# Default options #
|
|
87
|
-
###################
|
|
88
|
-
if overwrite:
|
|
89
|
-
self.mode = 'overwrite'
|
|
90
|
-
if isinstance(local_path, pandas.DataFrame):
|
|
91
|
-
futures = self._build_elements_from_df(local_path)
|
|
92
|
-
else:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
logger.info("
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
if
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
if
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
if not
|
|
199
|
-
raise PlatformException(error="400",
|
|
200
|
-
message='
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
if
|
|
206
|
-
if not
|
|
207
|
-
raise PlatformException(error="400",
|
|
208
|
-
message='
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
if
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
'
|
|
261
|
-
'
|
|
262
|
-
'
|
|
263
|
-
'
|
|
264
|
-
'
|
|
265
|
-
'
|
|
266
|
-
'
|
|
267
|
-
'
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
if
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
if
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
item =
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
logger.debug("Upload item: {path}. Try {i}/{n}.
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
# save
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
:
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
1
|
+
import sys
|
|
2
|
+
from collections import deque
|
|
3
|
+
import validators
|
|
4
|
+
import traceback
|
|
5
|
+
import tempfile
|
|
6
|
+
import requests
|
|
7
|
+
import asyncio
|
|
8
|
+
import logging
|
|
9
|
+
import pandas
|
|
10
|
+
import shutil
|
|
11
|
+
import json
|
|
12
|
+
import time
|
|
13
|
+
import tqdm
|
|
14
|
+
import os
|
|
15
|
+
import io
|
|
16
|
+
import numpy as np
|
|
17
|
+
from requests.adapters import HTTPAdapter
|
|
18
|
+
from urllib3.util import Retry
|
|
19
|
+
from PIL import Image
|
|
20
|
+
|
|
21
|
+
from . import upload_element
|
|
22
|
+
|
|
23
|
+
from .. import PlatformException, entities, repositories, exceptions
|
|
24
|
+
from ..services import Reporter
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(name='dtlpy')
|
|
27
|
+
|
|
28
|
+
NUM_TRIES = 5 # try to upload 3 time before fail on item
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Uploader:
|
|
32
|
+
def __init__(self, items_repository: repositories.Items, output_entity=entities.Item, no_output=False):
|
|
33
|
+
assert isinstance(items_repository, repositories.Items)
|
|
34
|
+
self.items_repository = items_repository
|
|
35
|
+
self.remote_url = "/datasets/{}/items".format(self.items_repository.dataset.id)
|
|
36
|
+
self.__stop_create_existence_dict = False
|
|
37
|
+
self.mode = 'skip'
|
|
38
|
+
self.num_files = 0
|
|
39
|
+
self.i_item = 0
|
|
40
|
+
self.pbar = tqdm.tqdm(total=0,
|
|
41
|
+
disable=self.items_repository._client_api.verbose.disable_progress_bar_upload_items,
|
|
42
|
+
file=sys.stdout, desc='Upload Items')
|
|
43
|
+
self.reporter = Reporter(num_workers=0,
|
|
44
|
+
resource=Reporter.ITEMS_UPLOAD,
|
|
45
|
+
print_error_logs=items_repository._client_api.verbose.print_error_logs,
|
|
46
|
+
output_entity=output_entity,
|
|
47
|
+
client_api=items_repository._client_api,
|
|
48
|
+
no_output=no_output)
|
|
49
|
+
|
|
50
|
+
def upload(
|
|
51
|
+
self,
|
|
52
|
+
# what to upload
|
|
53
|
+
local_path,
|
|
54
|
+
local_annotations_path=None,
|
|
55
|
+
# upload options
|
|
56
|
+
remote_path=None,
|
|
57
|
+
remote_name=None,
|
|
58
|
+
file_types=None,
|
|
59
|
+
overwrite=False,
|
|
60
|
+
item_metadata=None,
|
|
61
|
+
export_version: str = entities.ExportVersion.V1,
|
|
62
|
+
item_description=None,
|
|
63
|
+
raise_on_error=False,
|
|
64
|
+
return_as_list=False
|
|
65
|
+
):
|
|
66
|
+
"""
|
|
67
|
+
Upload local file to dataset.
|
|
68
|
+
Local filesystem will remain.
|
|
69
|
+
If `*` at the end of local_path (e.g. '/images/*') items will be uploaded without head directory
|
|
70
|
+
|
|
71
|
+
:param local_path: local file or folder to upload
|
|
72
|
+
:param local_annotations_path: path to Dataloop format annotations json files.
|
|
73
|
+
:param remote_path: remote path to save.
|
|
74
|
+
:param remote_name: remote base name to save.
|
|
75
|
+
:param file_types: list of file type to upload. e.g ['.jpg', '.png']. default is all
|
|
76
|
+
:param overwrite: optional - default = False
|
|
77
|
+
:param item_metadata: upload the items with the metadata dictionary
|
|
78
|
+
:param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
79
|
+
:param str item_description: add a string description to the uploaded item
|
|
80
|
+
:param bool raise_on_error: raise an exception if an error occurs
|
|
81
|
+
:param bool return_as_list: always return a list of items
|
|
82
|
+
|
|
83
|
+
:return: Output (list)
|
|
84
|
+
"""
|
|
85
|
+
###################
|
|
86
|
+
# Default options #
|
|
87
|
+
###################
|
|
88
|
+
if overwrite:
|
|
89
|
+
self.mode = 'overwrite'
|
|
90
|
+
if isinstance(local_path, pandas.DataFrame):
|
|
91
|
+
futures = self._build_elements_from_df(local_path)
|
|
92
|
+
else:
|
|
93
|
+
start_time = time.time()
|
|
94
|
+
logger.debug(f"Building elements from inputs started: start time: {start_time}")
|
|
95
|
+
futures = self._build_elements_from_inputs(local_path=local_path,
|
|
96
|
+
local_annotations_path=local_annotations_path,
|
|
97
|
+
# upload options
|
|
98
|
+
remote_path=remote_path,
|
|
99
|
+
remote_name=remote_name,
|
|
100
|
+
file_types=file_types,
|
|
101
|
+
item_metadata=item_metadata,
|
|
102
|
+
export_version=export_version,
|
|
103
|
+
item_description=item_description)
|
|
104
|
+
logger.debug(f"Building elements from inputs completed: time taken: {time.time() - start_time}")
|
|
105
|
+
num_files = len(futures)
|
|
106
|
+
while futures:
|
|
107
|
+
futures.popleft().result()
|
|
108
|
+
logger.info("Uploading {} items..".format(num_files))
|
|
109
|
+
self.pbar.close()
|
|
110
|
+
# summary
|
|
111
|
+
logger.info("Number of total files: {}".format(num_files))
|
|
112
|
+
status_list = self.reporter.status_list
|
|
113
|
+
for action in set(status_list):
|
|
114
|
+
n_for_action = self.reporter.status_count(status=action)
|
|
115
|
+
logger.info("Number of files {}: {}".format(action, n_for_action))
|
|
116
|
+
|
|
117
|
+
# log error
|
|
118
|
+
errors_count = self.reporter.failure_count
|
|
119
|
+
if errors_count > 0:
|
|
120
|
+
error_text = ""
|
|
121
|
+
log_filepath = self.reporter.generate_log_files()
|
|
122
|
+
# Get up to 5 error examples for the exception message
|
|
123
|
+
if self.reporter._errors:
|
|
124
|
+
error_examples = list(self.reporter._errors.values())[:5]
|
|
125
|
+
error_text = " | ".join(error_examples)
|
|
126
|
+
error_message = f"Errors in {errors_count} files. Errors: {error_text}"
|
|
127
|
+
if log_filepath is not None:
|
|
128
|
+
error_message += f", see {log_filepath} for full log"
|
|
129
|
+
if raise_on_error is True:
|
|
130
|
+
raise PlatformException(
|
|
131
|
+
error="400", message=error_message
|
|
132
|
+
)
|
|
133
|
+
else:
|
|
134
|
+
logger.warning(error_message)
|
|
135
|
+
|
|
136
|
+
if return_as_list is True:
|
|
137
|
+
# return list of items
|
|
138
|
+
return list(self.reporter.output)
|
|
139
|
+
if len(status_list) == 1:
|
|
140
|
+
# if there is only one item, return it
|
|
141
|
+
try:
|
|
142
|
+
return next(self.reporter.output)
|
|
143
|
+
except StopIteration:
|
|
144
|
+
# if there is no items, return None
|
|
145
|
+
return None
|
|
146
|
+
# if there are multiple items, return the generator
|
|
147
|
+
return self.reporter.output
|
|
148
|
+
|
|
149
|
+
def _build_elements_from_inputs(self,
|
|
150
|
+
local_path,
|
|
151
|
+
local_annotations_path,
|
|
152
|
+
# upload options
|
|
153
|
+
remote_path,
|
|
154
|
+
file_types,
|
|
155
|
+
remote_name,
|
|
156
|
+
item_metadata,
|
|
157
|
+
export_version: str = entities.ExportVersion.V1,
|
|
158
|
+
item_description=None):
|
|
159
|
+
# fix remote path
|
|
160
|
+
if remote_path is None:
|
|
161
|
+
if isinstance(local_path, str) and local_path.startswith('external://'):
|
|
162
|
+
remote_path = None
|
|
163
|
+
else:
|
|
164
|
+
remote_path = "/"
|
|
165
|
+
if remote_path and not remote_path.startswith('/'):
|
|
166
|
+
remote_path = f"/{remote_path}"
|
|
167
|
+
if remote_path and not remote_path.endswith("/"):
|
|
168
|
+
remote_path = f"{remote_path}/"
|
|
169
|
+
|
|
170
|
+
if remote_name:
|
|
171
|
+
remote_name = remote_name.lstrip('/')
|
|
172
|
+
|
|
173
|
+
if file_types is not None and not isinstance(file_types, list):
|
|
174
|
+
msg = '"file_types" should be a list of file extension. e.g [".jpg", ".png"]'
|
|
175
|
+
raise PlatformException(error="400", message=msg)
|
|
176
|
+
if item_metadata is not None:
|
|
177
|
+
if not isinstance(item_metadata, dict) and not isinstance(item_metadata, entities.ExportMetadata):
|
|
178
|
+
msg = '"item_metadata" should be a metadata dictionary. Got type: {}'.format(type(item_metadata))
|
|
179
|
+
raise PlatformException(error="400", message=msg)
|
|
180
|
+
if item_description is not None:
|
|
181
|
+
if not isinstance(item_description, str):
|
|
182
|
+
msg = '"item_description" should be a string. Got type: {}'.format(type(item_description))
|
|
183
|
+
raise PlatformException(error="400", message=msg)
|
|
184
|
+
|
|
185
|
+
##########################
|
|
186
|
+
# Convert inputs to list #
|
|
187
|
+
##########################
|
|
188
|
+
local_annotations_path_list = None
|
|
189
|
+
remote_name_list = None
|
|
190
|
+
if not isinstance(local_path, list):
|
|
191
|
+
local_path_list = [local_path]
|
|
192
|
+
if remote_name is not None:
|
|
193
|
+
if not isinstance(remote_name, str):
|
|
194
|
+
raise PlatformException(error="400",
|
|
195
|
+
message='remote_name must be a string, got: {}'.format(type(remote_name)))
|
|
196
|
+
remote_name_list = [remote_name]
|
|
197
|
+
if local_annotations_path is not None:
|
|
198
|
+
if not isinstance(local_annotations_path, str):
|
|
199
|
+
raise PlatformException(error="400",
|
|
200
|
+
message='local_annotations_path must be a string, got: {}'.format(
|
|
201
|
+
type(local_annotations_path)))
|
|
202
|
+
local_annotations_path_list = [local_annotations_path]
|
|
203
|
+
else:
|
|
204
|
+
local_path_list = local_path
|
|
205
|
+
if remote_name is not None:
|
|
206
|
+
if not isinstance(remote_name, list):
|
|
207
|
+
raise PlatformException(error="400",
|
|
208
|
+
message='remote_name must be a list, got: {}'.format(type(remote_name)))
|
|
209
|
+
if not len(remote_name) == len(local_path_list):
|
|
210
|
+
raise PlatformException(error="400",
|
|
211
|
+
message='remote_name and local_path_list must be of same length. '
|
|
212
|
+
'Received: remote_name: {}, '
|
|
213
|
+
'local_path_list: {}'.format(len(remote_name),
|
|
214
|
+
len(local_path_list)))
|
|
215
|
+
remote_name_list = remote_name
|
|
216
|
+
if local_annotations_path is not None:
|
|
217
|
+
if not len(local_annotations_path) == len(local_path_list):
|
|
218
|
+
raise PlatformException(error="400",
|
|
219
|
+
message='local_annotations_path and local_path_list must be of same lenght.'
|
|
220
|
+
' Received: local_annotations_path: {}, '
|
|
221
|
+
'local_path_list: {}'.format(len(local_annotations_path),
|
|
222
|
+
len(local_path_list)))
|
|
223
|
+
local_annotations_path_list = local_annotations_path
|
|
224
|
+
|
|
225
|
+
if local_annotations_path is None:
|
|
226
|
+
local_annotations_path_list = [None] * len(local_path_list)
|
|
227
|
+
|
|
228
|
+
if remote_name is None:
|
|
229
|
+
remote_name_list = [None] * len(local_path_list)
|
|
230
|
+
|
|
231
|
+
futures = deque()
|
|
232
|
+
total_size = 0
|
|
233
|
+
for upload_item_element, remote_name, upload_annotations_element in zip(local_path_list,
|
|
234
|
+
remote_name_list,
|
|
235
|
+
local_annotations_path_list):
|
|
236
|
+
if isinstance(upload_item_element, np.ndarray):
|
|
237
|
+
# convert numpy.ndarray to io.BytesI
|
|
238
|
+
if remote_name is None:
|
|
239
|
+
raise PlatformException(
|
|
240
|
+
error="400",
|
|
241
|
+
message='Upload element type was numpy.ndarray. providing param "remote_name" is mandatory')
|
|
242
|
+
file_extension = os.path.splitext(remote_name)
|
|
243
|
+
if file_extension[1].lower() in ['.jpg', '.jpeg']:
|
|
244
|
+
item_format = 'JPEG'
|
|
245
|
+
elif file_extension[1].lower() == '.png':
|
|
246
|
+
item_format = 'PNG'
|
|
247
|
+
else:
|
|
248
|
+
raise PlatformException(
|
|
249
|
+
error="400",
|
|
250
|
+
message='"remote_name" with .jpg/.jpeg or .png extension are supported '
|
|
251
|
+
'when upload element of numpy.ndarray type.')
|
|
252
|
+
|
|
253
|
+
buffer = io.BytesIO()
|
|
254
|
+
Image.fromarray(upload_item_element).save(buffer, format=item_format)
|
|
255
|
+
buffer.seek(0)
|
|
256
|
+
buffer.name = remote_name
|
|
257
|
+
upload_item_element = buffer
|
|
258
|
+
|
|
259
|
+
all_upload_elements = {
|
|
260
|
+
'upload_item_element': upload_item_element,
|
|
261
|
+
'total_size': total_size,
|
|
262
|
+
'remote_name': remote_name,
|
|
263
|
+
'remote_path': remote_path,
|
|
264
|
+
'upload_annotations_element': upload_annotations_element,
|
|
265
|
+
'item_metadata': item_metadata,
|
|
266
|
+
'annotations_filepath': None,
|
|
267
|
+
'with_head_folder': None,
|
|
268
|
+
'filename': None,
|
|
269
|
+
'root': None,
|
|
270
|
+
'export_version': export_version,
|
|
271
|
+
'item_description': item_description,
|
|
272
|
+
'driver_path': None
|
|
273
|
+
}
|
|
274
|
+
if isinstance(upload_item_element, str):
|
|
275
|
+
with_head_folder = True
|
|
276
|
+
if upload_item_element.endswith('*'):
|
|
277
|
+
with_head_folder = False
|
|
278
|
+
upload_item_element = os.path.dirname(upload_item_element)
|
|
279
|
+
all_upload_elements['upload_item_element'] = upload_item_element
|
|
280
|
+
|
|
281
|
+
if os.path.isdir(upload_item_element):
|
|
282
|
+
for root, subdirs, files in os.walk(upload_item_element):
|
|
283
|
+
for filename in files:
|
|
284
|
+
all_upload_elements['with_head_folder'] = with_head_folder
|
|
285
|
+
all_upload_elements['filename'] = filename
|
|
286
|
+
all_upload_elements['root'] = root
|
|
287
|
+
_, ext = os.path.splitext(filename)
|
|
288
|
+
if file_types is None or ext in file_types:
|
|
289
|
+
upload_elem = upload_element.DirUploadElement(all_upload_elements=all_upload_elements)
|
|
290
|
+
futures.append(self.upload_single_element(upload_elem))
|
|
291
|
+
continue
|
|
292
|
+
|
|
293
|
+
# add single file
|
|
294
|
+
elif os.path.isfile(upload_item_element):
|
|
295
|
+
upload_elem = upload_element.FileUploadElement(all_upload_elements=all_upload_elements)
|
|
296
|
+
|
|
297
|
+
elif upload_item_element.startswith('external://'):
|
|
298
|
+
try:
|
|
299
|
+
driver_path = repositories.Drivers.get(driver_id=self.items_repository.dataset.driver).path
|
|
300
|
+
all_upload_elements['driver_path'] = driver_path
|
|
301
|
+
except Exception:
|
|
302
|
+
logger.error("Attempting to upload external item without driver path. This may cause issues.")
|
|
303
|
+
upload_elem = upload_element.ExternalItemUploadElement(all_upload_elements=all_upload_elements)
|
|
304
|
+
|
|
305
|
+
elif self.is_url(upload_item_element):
|
|
306
|
+
upload_elem = upload_element.UrlUploadElement(all_upload_elements=all_upload_elements)
|
|
307
|
+
|
|
308
|
+
else:
|
|
309
|
+
raise PlatformException("404", "Unknown local path: {}".format(local_path))
|
|
310
|
+
|
|
311
|
+
elif isinstance(upload_item_element, entities.Item):
|
|
312
|
+
upload_elem = upload_element.ItemLinkUploadElement(all_upload_elements=all_upload_elements)
|
|
313
|
+
|
|
314
|
+
elif isinstance(upload_item_element, entities.Link):
|
|
315
|
+
upload_elem = upload_element.LinkUploadElement(all_upload_elements=all_upload_elements)
|
|
316
|
+
|
|
317
|
+
elif isinstance(upload_item_element, entities.PromptItem):
|
|
318
|
+
upload_elem = upload_element.PromptUploadElement(all_upload_elements=all_upload_elements)
|
|
319
|
+
|
|
320
|
+
elif isinstance(upload_item_element, entities.ItemGis):
|
|
321
|
+
buffer = io.BytesIO(json.dumps(upload_item_element.to_json()).encode('utf-8'))
|
|
322
|
+
buffer.name = upload_item_element.name
|
|
323
|
+
all_upload_elements['upload_item_element'] = buffer
|
|
324
|
+
upload_elem = upload_element.BinaryUploadElement(all_upload_elements=all_upload_elements)
|
|
325
|
+
|
|
326
|
+
elif isinstance(upload_item_element, bytes) or \
|
|
327
|
+
isinstance(upload_item_element, io.BytesIO) or \
|
|
328
|
+
isinstance(upload_item_element, io.BufferedReader) or \
|
|
329
|
+
isinstance(upload_item_element, io.TextIOWrapper):
|
|
330
|
+
upload_elem = upload_element.BinaryUploadElement(all_upload_elements=all_upload_elements)
|
|
331
|
+
# get size from binaries
|
|
332
|
+
try:
|
|
333
|
+
total_size += upload_item_element.__sizeof__()
|
|
334
|
+
except Exception:
|
|
335
|
+
logger.warning("Cant get binaries size")
|
|
336
|
+
|
|
337
|
+
else:
|
|
338
|
+
raise PlatformException(
|
|
339
|
+
error="400",
|
|
340
|
+
message=f"Unknown element type to upload ('local_path'). received type: {type(upload_item_element)}. "
|
|
341
|
+
"known types (or list of those types): str (dir, file, url), bytes, io.BytesIO, "
|
|
342
|
+
"numpy.ndarray, io.TextIOWrapper, Dataloop.Item, Dataloop.Link")
|
|
343
|
+
|
|
344
|
+
futures.append(self.upload_single_element(upload_elem))
|
|
345
|
+
return futures
|
|
346
|
+
|
|
347
|
+
def upload_single_element(self, elem):
|
|
348
|
+
"""
|
|
349
|
+
upload a signal element
|
|
350
|
+
:param elem: UploadElement
|
|
351
|
+
"""
|
|
352
|
+
self.num_files += 1
|
|
353
|
+
self.i_item += 1
|
|
354
|
+
self.pbar.total += 1
|
|
355
|
+
self.reporter.upcount_num_workers()
|
|
356
|
+
future = asyncio.run_coroutine_threadsafe(
|
|
357
|
+
self.__upload_single_item_wrapper(element=elem,
|
|
358
|
+
mode=self.mode,
|
|
359
|
+
pbar=self.pbar,
|
|
360
|
+
reporter=self.reporter),
|
|
361
|
+
loop=self.items_repository._client_api.event_loop.loop)
|
|
362
|
+
return future
|
|
363
|
+
|
|
364
|
+
def _build_elements_from_df(self, df: pandas.DataFrame):
|
|
365
|
+
futures = deque()
|
|
366
|
+
for index, row in df.iterrows():
|
|
367
|
+
# DEFAULTS
|
|
368
|
+
elem = {'local_annotations_path': None,
|
|
369
|
+
'remote_path': None,
|
|
370
|
+
'remote_name': None,
|
|
371
|
+
'file_types': None,
|
|
372
|
+
'item_metadata': None,
|
|
373
|
+
'item_description': None}
|
|
374
|
+
elem.update(row)
|
|
375
|
+
future = self._build_elements_from_inputs(**elem)
|
|
376
|
+
# append deque using +
|
|
377
|
+
futures += future
|
|
378
|
+
return futures
|
|
379
|
+
|
|
380
|
+
async def __single_external_sync(self, element):
|
|
381
|
+
storage_id = element.buffer.split('//')[1]
|
|
382
|
+
req_json = dict()
|
|
383
|
+
req_json['filename'] = element.remote_filepath
|
|
384
|
+
req_json['storageId'] = storage_id
|
|
385
|
+
success, response = self.items_repository._client_api.gen_request(req_type='post',
|
|
386
|
+
path='/datasets/{}/imports'.format(
|
|
387
|
+
self.items_repository.dataset.id),
|
|
388
|
+
json_req=[req_json])
|
|
389
|
+
|
|
390
|
+
if success:
|
|
391
|
+
items = entities.Item.from_json(client_api=self.items_repository._client_api, _json=response.json()[0],
|
|
392
|
+
project=self.items_repository._dataset._project,
|
|
393
|
+
dataset=self.items_repository.dataset)
|
|
394
|
+
else:
|
|
395
|
+
raise exceptions.PlatformException(response)
|
|
396
|
+
return items, response.headers.get('x-item-op', 'na')
|
|
397
|
+
|
|
398
|
+
async def __single_async_upload(self,
|
|
399
|
+
filepath,
|
|
400
|
+
remote_path,
|
|
401
|
+
uploaded_filename,
|
|
402
|
+
last_try,
|
|
403
|
+
mode,
|
|
404
|
+
item_metadata,
|
|
405
|
+
callback,
|
|
406
|
+
item_description
|
|
407
|
+
):
|
|
408
|
+
"""
|
|
409
|
+
Upload an item to dataset
|
|
410
|
+
|
|
411
|
+
:param filepath: local filepath of the item
|
|
412
|
+
:param remote_path: remote directory of filepath to upload
|
|
413
|
+
:param uploaded_filename: optional - remote filename
|
|
414
|
+
:param last_try: print log error only if last try
|
|
415
|
+
:param mode: 'skip' 'overwrite'
|
|
416
|
+
:param item_metadata: item metadata
|
|
417
|
+
:param str item_description: add a string description to the uploaded item
|
|
418
|
+
:param callback:
|
|
419
|
+
:return: Item object
|
|
420
|
+
"""
|
|
421
|
+
|
|
422
|
+
need_close = False
|
|
423
|
+
if isinstance(filepath, str):
|
|
424
|
+
# upload local file
|
|
425
|
+
if not os.path.isfile(filepath):
|
|
426
|
+
raise PlatformException(error="404", message="Filepath doesnt exists. file: {}".format(filepath))
|
|
427
|
+
if uploaded_filename is None:
|
|
428
|
+
uploaded_filename = os.path.basename(filepath)
|
|
429
|
+
if os.path.isfile(filepath):
|
|
430
|
+
item_type = 'file'
|
|
431
|
+
else:
|
|
432
|
+
item_type = 'dir'
|
|
433
|
+
item_size = os.stat(filepath).st_size
|
|
434
|
+
to_upload = open(filepath, 'rb')
|
|
435
|
+
need_close = True
|
|
436
|
+
|
|
437
|
+
else:
|
|
438
|
+
# upload from buffer
|
|
439
|
+
if isinstance(filepath, bytes):
|
|
440
|
+
to_upload = io.BytesIO(filepath)
|
|
441
|
+
elif isinstance(filepath, io.BytesIO):
|
|
442
|
+
to_upload = filepath
|
|
443
|
+
elif isinstance(filepath, io.BufferedReader):
|
|
444
|
+
to_upload = filepath
|
|
445
|
+
elif isinstance(filepath, io.TextIOWrapper):
|
|
446
|
+
to_upload = filepath
|
|
447
|
+
else:
|
|
448
|
+
raise PlatformException("400", "Unknown input filepath type received: {}".format(type(filepath)))
|
|
449
|
+
|
|
450
|
+
if uploaded_filename is None:
|
|
451
|
+
if hasattr(filepath, "name"):
|
|
452
|
+
uploaded_filename = filepath.name
|
|
453
|
+
else:
|
|
454
|
+
raise PlatformException(error="400",
|
|
455
|
+
message="Must have filename when uploading bytes array (uploaded_filename)")
|
|
456
|
+
|
|
457
|
+
item_size = to_upload.seek(0, 2)
|
|
458
|
+
to_upload.seek(0)
|
|
459
|
+
item_type = 'file'
|
|
460
|
+
try:
|
|
461
|
+
response = await self.items_repository._client_api.upload_file_async(to_upload=to_upload,
|
|
462
|
+
item_type=item_type,
|
|
463
|
+
item_size=item_size,
|
|
464
|
+
item_metadata=item_metadata,
|
|
465
|
+
remote_url=self.remote_url,
|
|
466
|
+
uploaded_filename=uploaded_filename,
|
|
467
|
+
remote_path=remote_path,
|
|
468
|
+
callback=callback,
|
|
469
|
+
mode=mode,
|
|
470
|
+
item_description=item_description)
|
|
471
|
+
except Exception:
|
|
472
|
+
raise
|
|
473
|
+
finally:
|
|
474
|
+
if need_close:
|
|
475
|
+
to_upload.close()
|
|
476
|
+
|
|
477
|
+
if response.ok:
|
|
478
|
+
if item_size != response.json().get('metadata', {}).get('system', {}).get('size', 0):
|
|
479
|
+
self.items_repository.delete(item_id=response.json()['id'])
|
|
480
|
+
raise PlatformException(500,
|
|
481
|
+
"The uploaded file is corrupted. "
|
|
482
|
+
"Please try again. If it happens again please contact support.")
|
|
483
|
+
item = self.items_repository.items_entity.from_json(client_api=self.items_repository._client_api,
|
|
484
|
+
_json=response.json(),
|
|
485
|
+
dataset=self.items_repository.dataset)
|
|
486
|
+
else:
|
|
487
|
+
raise PlatformException(response)
|
|
488
|
+
return item, response.headers.get('x-item-op', 'na')
|
|
489
|
+
|
|
490
|
+
async def __upload_single_item_wrapper(self, element, pbar, reporter, mode):
|
|
491
|
+
async with self.items_repository._client_api.event_loop.semaphore('items.upload', 5):
|
|
492
|
+
# assert isinstance(element, UploadElement)
|
|
493
|
+
item = False
|
|
494
|
+
err = None
|
|
495
|
+
trace = None
|
|
496
|
+
saved_locally = False
|
|
497
|
+
temp_dir = None
|
|
498
|
+
action = 'na'
|
|
499
|
+
remote_folder, remote_name = os.path.split(element.remote_filepath)
|
|
500
|
+
|
|
501
|
+
if element.type == 'url':
|
|
502
|
+
saved_locally, element.buffer, temp_dir = self.url_to_data(element.buffer)
|
|
503
|
+
elif element.type == 'link':
|
|
504
|
+
element.buffer = self.link(ref=element.buffer.ref, dataset_id=element.buffer.dataset_id,
|
|
505
|
+
type=element.buffer.type, mimetype=element.buffer.mimetype)
|
|
506
|
+
|
|
507
|
+
for i_try in range(NUM_TRIES):
|
|
508
|
+
try:
|
|
509
|
+
logger.debug("Upload item: {path}. Try {i}/{n}. Starting..".format(path=remote_name,
|
|
510
|
+
i=i_try + 1,
|
|
511
|
+
n=NUM_TRIES))
|
|
512
|
+
if element.type == 'external_file':
|
|
513
|
+
item, action = await self.__single_external_sync(element)
|
|
514
|
+
else:
|
|
515
|
+
if element.annotations_filepath is not None and \
|
|
516
|
+
element.item_metadata == entities.ExportMetadata.FROM_JSON:
|
|
517
|
+
element.item_metadata = {}
|
|
518
|
+
with open(element.annotations_filepath) as ann_f:
|
|
519
|
+
item_metadata = json.load(ann_f)
|
|
520
|
+
if 'metadata' in item_metadata:
|
|
521
|
+
element.item_metadata = item_metadata['metadata']
|
|
522
|
+
item, action = await self.__single_async_upload(filepath=element.buffer,
|
|
523
|
+
mode=mode,
|
|
524
|
+
item_metadata=element.item_metadata,
|
|
525
|
+
remote_path=remote_folder,
|
|
526
|
+
uploaded_filename=remote_name,
|
|
527
|
+
last_try=(i_try + 1) == NUM_TRIES,
|
|
528
|
+
callback=None,
|
|
529
|
+
item_description=element.item_description)
|
|
530
|
+
logger.debug("Upload item: {path}. Try {i}/{n}. Success. Item id: {id}".format(path=remote_name,
|
|
531
|
+
i=i_try + 1,
|
|
532
|
+
n=NUM_TRIES,
|
|
533
|
+
id=item.id))
|
|
534
|
+
if isinstance(item, entities.Item):
|
|
535
|
+
break
|
|
536
|
+
time.sleep(0.3 * (2 ** i_try))
|
|
537
|
+
except Exception as e:
|
|
538
|
+
err = e
|
|
539
|
+
trace = traceback.format_exc()
|
|
540
|
+
logger.debug("Upload item: {path}. Try {i}/{n}. Fail.\n{trace}".format(path=remote_name,
|
|
541
|
+
i=i_try + 1,
|
|
542
|
+
n=NUM_TRIES,
|
|
543
|
+
trace=trace))
|
|
544
|
+
|
|
545
|
+
finally:
|
|
546
|
+
if saved_locally and os.path.isdir(temp_dir):
|
|
547
|
+
shutil.rmtree(temp_dir)
|
|
548
|
+
if item:
|
|
549
|
+
if action in ['overwrite', 'created'] and element.annotations_filepath is not None:
|
|
550
|
+
try:
|
|
551
|
+
await self.__async_upload_annotations(annotations_filepath=element.annotations_filepath,
|
|
552
|
+
item=item)
|
|
553
|
+
except Exception:
|
|
554
|
+
logger.exception('Error uploading annotations to item id: {}'.format(item.id))
|
|
555
|
+
|
|
556
|
+
reporter.set_index(status=action,
|
|
557
|
+
output=item.to_json(),
|
|
558
|
+
success=True,
|
|
559
|
+
ref=item.id)
|
|
560
|
+
if pbar is not None:
|
|
561
|
+
pbar.update()
|
|
562
|
+
self.items_repository._client_api.callbacks.run_on_event(
|
|
563
|
+
event=self.items_repository._client_api.callbacks.CallbackEvent.ITEMS_UPLOAD,
|
|
564
|
+
context={'item_id': item.id, 'dataset_id': item.dataset_id},
|
|
565
|
+
progress=round(pbar.n / pbar.total * 100, 0))
|
|
566
|
+
else:
|
|
567
|
+
if isinstance(element.buffer, str):
|
|
568
|
+
ref = element.buffer
|
|
569
|
+
elif hasattr(element.buffer, "name"):
|
|
570
|
+
ref = element.buffer.name
|
|
571
|
+
else:
|
|
572
|
+
ref = 'Unknown'
|
|
573
|
+
reporter.set_index(ref=ref, status='error',
|
|
574
|
+
success=False,
|
|
575
|
+
error="{}\n{}".format(err, trace))
|
|
576
|
+
|
|
577
|
+
async def __async_upload_annotations(self, annotations_filepath, item):
|
|
578
|
+
with open(annotations_filepath, 'r', encoding="utf8") as f:
|
|
579
|
+
annotations = json.load(f)
|
|
580
|
+
# wait for coroutines on the current event loop
|
|
581
|
+
return await item.annotations._async_upload_annotations(annotations=annotations['annotations'])
|
|
582
|
+
|
|
583
|
+
@staticmethod
|
|
584
|
+
def url_to_data(url):
|
|
585
|
+
chunk_size = 8192
|
|
586
|
+
max_size = 30000000
|
|
587
|
+
temp_dir = None
|
|
588
|
+
|
|
589
|
+
# This will download the binaries from the URL user provided
|
|
590
|
+
prepared_request = requests.Request(method='GET', url=url).prepare()
|
|
591
|
+
with requests.Session() as s:
|
|
592
|
+
retry = Retry(
|
|
593
|
+
total=3,
|
|
594
|
+
read=3,
|
|
595
|
+
connect=3,
|
|
596
|
+
backoff_factor=1,
|
|
597
|
+
)
|
|
598
|
+
adapter = HTTPAdapter(max_retries=retry)
|
|
599
|
+
s.mount('http://', adapter)
|
|
600
|
+
s.mount('https://', adapter)
|
|
601
|
+
response = s.send(request=prepared_request, stream=True)
|
|
602
|
+
|
|
603
|
+
total_length = response.headers.get("content-length")
|
|
604
|
+
save_locally = int(total_length) > max_size
|
|
605
|
+
|
|
606
|
+
if save_locally:
|
|
607
|
+
# save to file
|
|
608
|
+
temp_dir = tempfile.mkdtemp()
|
|
609
|
+
temp_path = os.path.join(temp_dir, url.split('/')[-1].split('?')[0])
|
|
610
|
+
with open(temp_path, "wb") as f:
|
|
611
|
+
for chunk in response.iter_content(chunk_size=chunk_size):
|
|
612
|
+
if chunk: # filter out keep-alive new chunks
|
|
613
|
+
f.write(chunk)
|
|
614
|
+
# save to output variable
|
|
615
|
+
data = temp_path
|
|
616
|
+
else:
|
|
617
|
+
# save as byte stream
|
|
618
|
+
data = io.BytesIO()
|
|
619
|
+
for chunk in response.iter_content(chunk_size=chunk_size):
|
|
620
|
+
if chunk: # filter out keep-alive new chunks
|
|
621
|
+
data.write(chunk)
|
|
622
|
+
# go back to the beginning of the stream
|
|
623
|
+
data.seek(0)
|
|
624
|
+
data.name = url.split('/')[-1]
|
|
625
|
+
|
|
626
|
+
return save_locally, data, temp_dir
|
|
627
|
+
|
|
628
|
+
@staticmethod
|
|
629
|
+
def is_url(url):
|
|
630
|
+
try:
|
|
631
|
+
return validators.url(url)
|
|
632
|
+
except Exception:
|
|
633
|
+
return False
|
|
634
|
+
|
|
635
|
+
@staticmethod
|
|
636
|
+
def link(ref, type, mimetype=None, dataset_id=None):
|
|
637
|
+
"""
|
|
638
|
+
:param ref:
|
|
639
|
+
:param type:
|
|
640
|
+
:param mimetype:
|
|
641
|
+
:param dataset_id:
|
|
642
|
+
"""
|
|
643
|
+
link_info = {'type': type,
|
|
644
|
+
'ref': ref}
|
|
645
|
+
|
|
646
|
+
if mimetype:
|
|
647
|
+
link_info['mimetype'] = mimetype
|
|
648
|
+
|
|
649
|
+
if dataset_id is not None:
|
|
650
|
+
link_info['datasetId'] = dataset_id
|
|
651
|
+
|
|
652
|
+
_json = {'type': 'link',
|
|
653
|
+
'shebang': 'dataloop',
|
|
654
|
+
'metadata': {'dltype': 'link',
|
|
655
|
+
'linkInfo': link_info}}
|
|
656
|
+
|
|
657
|
+
uploaded_byte_io = io.BytesIO()
|
|
658
|
+
uploaded_byte_io.write(json.dumps(_json).encode())
|
|
659
|
+
uploaded_byte_io.seek(0)
|
|
660
|
+
|
|
661
|
+
return uploaded_byte_io
|