dtlpy 1.116.6__py3-none-any.whl → 1.118.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtlpy/__init__.py +1 -1
- dtlpy/__version__.py +1 -1
- dtlpy/entities/__init__.py +1 -1
- dtlpy/entities/annotation.py +1 -1
- dtlpy/entities/app.py +1 -1
- dtlpy/entities/compute.py +1 -0
- dtlpy/entities/dataset.py +17 -2
- dtlpy/entities/feature_set.py +7 -0
- dtlpy/entities/item.py +16 -0
- dtlpy/entities/model.py +1 -1
- dtlpy/entities/ontology.py +1 -1
- dtlpy/entities/paged_entities.py +7 -3
- dtlpy/entities/service.py +11 -0
- dtlpy/ml/base_model_adapter.py +68 -37
- dtlpy/repositories/apps.py +12 -13
- dtlpy/repositories/datasets.py +165 -84
- dtlpy/repositories/downloader.py +299 -118
- dtlpy/repositories/feature_sets.py +159 -70
- dtlpy/repositories/recipes.py +15 -5
- dtlpy/services/api_client.py +5 -4
- {dtlpy-1.116.6.dist-info → dtlpy-1.118.12.dist-info}/METADATA +14 -15
- {dtlpy-1.116.6.dist-info → dtlpy-1.118.12.dist-info}/RECORD +29 -31
- {dtlpy-1.116.6.dist-info → dtlpy-1.118.12.dist-info}/WHEEL +1 -1
- {dtlpy-1.116.6.dist-info → dtlpy-1.118.12.dist-info}/top_level.txt +0 -1
- tests/features/__init__.py +0 -0
- tests/features/environment.py +0 -551
- {dtlpy-1.116.6.data → dtlpy-1.118.12.data}/scripts/dlp +0 -0
- {dtlpy-1.116.6.data → dtlpy-1.118.12.data}/scripts/dlp.bat +0 -0
- {dtlpy-1.116.6.data → dtlpy-1.118.12.data}/scripts/dlp.py +0 -0
- {dtlpy-1.116.6.dist-info → dtlpy-1.118.12.dist-info}/entry_points.txt +0 -0
- {dtlpy-1.116.6.dist-info → dtlpy-1.118.12.dist-info}/licenses/LICENSE +0 -0
dtlpy/repositories/downloader.py
CHANGED
|
@@ -1,18 +1,22 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import io
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import multiprocessing
|
|
6
|
+
import os
|
|
7
|
+
import shutil
|
|
8
|
+
import sys
|
|
9
|
+
import tempfile
|
|
10
|
+
import traceback
|
|
1
11
|
from pathlib import Path
|
|
2
|
-
from
|
|
3
|
-
|
|
4
|
-
from PIL import Image
|
|
12
|
+
from urllib.parse import unquote, urlparse
|
|
13
|
+
|
|
5
14
|
import numpy as np
|
|
6
|
-
import traceback
|
|
7
|
-
from urllib.parse import urlparse, unquote
|
|
8
15
|
import requests
|
|
9
|
-
import logging
|
|
10
|
-
import shutil
|
|
11
|
-
import json
|
|
12
16
|
import tqdm
|
|
13
|
-
import
|
|
14
|
-
import
|
|
15
|
-
import
|
|
17
|
+
from PIL import Image
|
|
18
|
+
from requests.adapters import HTTPAdapter
|
|
19
|
+
from urllib3.util import Retry
|
|
16
20
|
|
|
17
21
|
from .. import entities, repositories, miscellaneous, PlatformException, exceptions
|
|
18
22
|
from ..services import Reporter
|
|
@@ -20,12 +24,257 @@ from ..services import Reporter
|
|
|
20
24
|
logger = logging.getLogger(name='dtlpy')
|
|
21
25
|
|
|
22
26
|
NUM_TRIES = 3 # try to download 3 time before fail on item
|
|
23
|
-
|
|
27
|
+
DOWNLOAD_MAX_ITEMS_PER_SUBSET = 1000
|
|
24
28
|
|
|
25
29
|
class Downloader:
|
|
26
30
|
def __init__(self, items_repository):
|
|
27
31
|
self.items_repository = items_repository
|
|
28
32
|
|
|
33
|
+
def _process_download_results(self, reporter, raise_on_error=False):
|
|
34
|
+
"""
|
|
35
|
+
Process download results and generate summary report.
|
|
36
|
+
|
|
37
|
+
:param reporter: Reporter instance containing download results
|
|
38
|
+
:param raise_on_error: If True, raise exception on download errors
|
|
39
|
+
:return: Output from reporter
|
|
40
|
+
"""
|
|
41
|
+
# reporting
|
|
42
|
+
n_download = reporter.status_count(status='download')
|
|
43
|
+
n_exist = reporter.status_count(status='exist')
|
|
44
|
+
n_error = reporter.status_count(status='error')
|
|
45
|
+
logger.info(f"Number of files downloaded:{n_download}")
|
|
46
|
+
logger.info(f"Number of files exists: {n_exist}")
|
|
47
|
+
logger.info(f"Total number of files: {n_download + n_exist}")
|
|
48
|
+
|
|
49
|
+
# log error
|
|
50
|
+
if n_error > 0:
|
|
51
|
+
log_filepath = reporter.generate_log_files()
|
|
52
|
+
# Get up to 5 error examples for the exception message
|
|
53
|
+
error_text = ""
|
|
54
|
+
error_counter = 0
|
|
55
|
+
if reporter._errors:
|
|
56
|
+
for _id, error in reporter._errors.items():
|
|
57
|
+
error_counter += 1
|
|
58
|
+
error_text += f"Item ID: {_id}, Error: {error} | "
|
|
59
|
+
if error_counter >= 5:
|
|
60
|
+
break
|
|
61
|
+
error_message = f"Errors in {n_error} files. Errors: {error_text}"
|
|
62
|
+
if log_filepath is not None:
|
|
63
|
+
error_message += f", see {log_filepath} for full log"
|
|
64
|
+
if raise_on_error is True:
|
|
65
|
+
raise PlatformException(
|
|
66
|
+
error="400", message=error_message
|
|
67
|
+
)
|
|
68
|
+
else:
|
|
69
|
+
logger.warning(error_message)
|
|
70
|
+
|
|
71
|
+
if int(n_download) <= 1 and int(n_exist) <= 1:
|
|
72
|
+
try:
|
|
73
|
+
return next(reporter.output)
|
|
74
|
+
except StopIteration:
|
|
75
|
+
return None
|
|
76
|
+
return reporter.output
|
|
77
|
+
|
|
78
|
+
def _process_item_json(self, local_path, item_json, reporter, pbar, overwrite=False):
|
|
79
|
+
"""
|
|
80
|
+
Process a single item JSON for download, saving both the item file and metadata.
|
|
81
|
+
|
|
82
|
+
:param local_path: Local path to save files
|
|
83
|
+
:param item_json: Item JSON metadata
|
|
84
|
+
:param reporter: Reporter instance for tracking progress
|
|
85
|
+
:param pbar: Progress bar instance
|
|
86
|
+
:param overwrite: Whether to overwrite existing files
|
|
87
|
+
:return: Error message, traceback, and downloaded filepath
|
|
88
|
+
"""
|
|
89
|
+
err = None
|
|
90
|
+
trace = None
|
|
91
|
+
downloaded_filepath = None
|
|
92
|
+
item_id = item_json['id']
|
|
93
|
+
filename = item_json['filename'].lstrip('/')
|
|
94
|
+
|
|
95
|
+
for i_try in range(NUM_TRIES):
|
|
96
|
+
try:
|
|
97
|
+
# Download the image
|
|
98
|
+
image_path = Path(local_path) / 'items' / filename
|
|
99
|
+
# Ensure the directory for the image file exists (in case filename has subdirectories)
|
|
100
|
+
image_path.parent.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
item = entities.Item.from_json(_json = item_json, client_api=self.items_repository._client_api, is_fetched=False)
|
|
102
|
+
downloaded_data = self.__thread_download(
|
|
103
|
+
item=item,
|
|
104
|
+
local_path=str(image_path.parent),
|
|
105
|
+
local_filepath=str(image_path),
|
|
106
|
+
save_locally=True,
|
|
107
|
+
to_array=False,
|
|
108
|
+
overwrite=overwrite,
|
|
109
|
+
annotation_options=[],
|
|
110
|
+
annotation_filters=None,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
if downloaded_data is None:
|
|
114
|
+
err = 'Failed to download image'
|
|
115
|
+
trace = ''
|
|
116
|
+
else:
|
|
117
|
+
# Save the item JSON directly
|
|
118
|
+
json_filename = Path(filename).stem + '.json'
|
|
119
|
+
json_path = Path(local_path) / 'json' / Path(filename).parent / json_filename
|
|
120
|
+
|
|
121
|
+
# Ensure the directory for the JSON file exists (in case filename has subdirectories)
|
|
122
|
+
json_path.parent.mkdir(parents=True, exist_ok=True)
|
|
123
|
+
|
|
124
|
+
# Save the original item_json directly
|
|
125
|
+
with open(json_path, 'w', encoding='utf-8') as f:
|
|
126
|
+
json.dump(item_json, f, indent=2, ensure_ascii=False)
|
|
127
|
+
|
|
128
|
+
downloaded_filepath = str(image_path)
|
|
129
|
+
|
|
130
|
+
if downloaded_filepath is not None:
|
|
131
|
+
break
|
|
132
|
+
|
|
133
|
+
except Exception as e:
|
|
134
|
+
logger.debug(f"Download item: {filename}. Try {i_try + 1}/{NUM_TRIES}. Fail.")
|
|
135
|
+
err = e
|
|
136
|
+
trace = traceback.format_exc()
|
|
137
|
+
|
|
138
|
+
pbar.update()
|
|
139
|
+
if downloaded_filepath is None:
|
|
140
|
+
if err is None:
|
|
141
|
+
err = self.items_repository._client_api.platform_exception
|
|
142
|
+
reporter.set_index(status="error", ref=item_id, success=False, error=f"{err}\n{trace}")
|
|
143
|
+
else:
|
|
144
|
+
reporter.set_index(ref=item_id, status="download", output=downloaded_filepath, success=True)
|
|
145
|
+
|
|
146
|
+
def _download_recursive(
|
|
147
|
+
self,
|
|
148
|
+
local_path=None,
|
|
149
|
+
filters: entities.Filters = None,
|
|
150
|
+
annotation_filters: entities.Filters = None,
|
|
151
|
+
file_types=None,
|
|
152
|
+
overwrite=False,
|
|
153
|
+
raise_on_error=False,
|
|
154
|
+
dataset_lock=False,
|
|
155
|
+
lock_timeout_sec=None,
|
|
156
|
+
):
|
|
157
|
+
"""
|
|
158
|
+
Download items recursively from a dataset.
|
|
159
|
+
|
|
160
|
+
:param local_path: Local path to save downloaded items
|
|
161
|
+
:param filters: Filters entity to filter items
|
|
162
|
+
:param annotation_filters: Filters entity to filter annotations
|
|
163
|
+
:param file_types: List of file types to download
|
|
164
|
+
:param overwrite: Whether to overwrite existing files
|
|
165
|
+
:param raise_on_error: Raise error if download fails
|
|
166
|
+
:param dataset_lock: Lock dataset during download
|
|
167
|
+
:param lock_timeout_sec: Lock timeout in seconds
|
|
168
|
+
"""
|
|
169
|
+
filters, annotation_filters = self._prepare_filters(filters=filters,annotation_filters=annotation_filters,file_types=file_types)
|
|
170
|
+
filter_copy = copy.deepcopy(filters)
|
|
171
|
+
filter_copy.page_size = 0
|
|
172
|
+
num_items = self.items_repository.list(filters=filter_copy).items_count
|
|
173
|
+
if num_items == 0:
|
|
174
|
+
return list()
|
|
175
|
+
client_api = self.items_repository._client_api
|
|
176
|
+
reporter = Reporter(
|
|
177
|
+
num_workers=num_items,
|
|
178
|
+
resource=Reporter.ITEMS_DOWNLOAD,
|
|
179
|
+
print_error_logs=client_api.verbose.print_error_logs,
|
|
180
|
+
client_api=client_api,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Create directories once using pathlib
|
|
184
|
+
local_path_obj = Path(local_path)
|
|
185
|
+
items_dir = local_path_obj / 'items'
|
|
186
|
+
jsons_dir = local_path_obj / 'json'
|
|
187
|
+
items_dir.mkdir(parents=True, exist_ok=True)
|
|
188
|
+
jsons_dir.mkdir(parents=True, exist_ok=True)
|
|
189
|
+
|
|
190
|
+
jobs = [None for _ in range(num_items)]
|
|
191
|
+
# crrently keep the thread count to default.
|
|
192
|
+
# client_api._thread_pools_names['item.download'] = 5 * multiprocessing.cpu_count()
|
|
193
|
+
pool = client_api.thread_pools(pool_name='item.download')
|
|
194
|
+
pbar = tqdm.tqdm(
|
|
195
|
+
total=num_items,
|
|
196
|
+
disable=client_api.verbose.disable_progress_bar_download_dataset,
|
|
197
|
+
file=sys.stdout,
|
|
198
|
+
desc='Download Items',
|
|
199
|
+
)
|
|
200
|
+
try:
|
|
201
|
+
i_item = 0
|
|
202
|
+
import time
|
|
203
|
+
start_time = time.time()
|
|
204
|
+
for json_file in self.items_repository.dataset.project.datasets._export_recursive(
|
|
205
|
+
dataset=self.items_repository.dataset,
|
|
206
|
+
local_path=tempfile.mkdtemp(prefix='download_recursive_jsons_'),
|
|
207
|
+
max_items_per_subset=DOWNLOAD_MAX_ITEMS_PER_SUBSET,
|
|
208
|
+
include_annotations=True,
|
|
209
|
+
filters=filters,
|
|
210
|
+
annotation_filters=annotation_filters,
|
|
211
|
+
dataset_lock=dataset_lock,
|
|
212
|
+
lock_timeout_sec=lock_timeout_sec,
|
|
213
|
+
):
|
|
214
|
+
end_time = time.time()
|
|
215
|
+
with open(json_file, 'r') as f:
|
|
216
|
+
data = json.load(f)
|
|
217
|
+
for item_json in data:
|
|
218
|
+
jobs[i_item] = pool.submit(
|
|
219
|
+
self._process_item_json,
|
|
220
|
+
**{
|
|
221
|
+
"local_path": local_path,
|
|
222
|
+
"item_json": item_json,
|
|
223
|
+
"reporter": reporter,
|
|
224
|
+
"pbar": pbar,
|
|
225
|
+
"overwrite": overwrite,
|
|
226
|
+
},
|
|
227
|
+
)
|
|
228
|
+
i_item += 1
|
|
229
|
+
finally:
|
|
230
|
+
_ = [j.result() for j in jobs if j is not None]
|
|
231
|
+
pbar.close()
|
|
232
|
+
return self._process_download_results(reporter=reporter, raise_on_error=raise_on_error)
|
|
233
|
+
|
|
234
|
+
@staticmethod
|
|
235
|
+
def _prepare_filters(filters: entities.Filters = None,
|
|
236
|
+
annotation_filters: entities.Filters = None,
|
|
237
|
+
file_types=None):
|
|
238
|
+
"""
|
|
239
|
+
Prepare and merge filters with annotation filters.
|
|
240
|
+
|
|
241
|
+
:param filters: Filters entity or None
|
|
242
|
+
:param annotation_filters: Annotation filters to merge with item filters
|
|
243
|
+
:param file_types: List of file types to filter
|
|
244
|
+
:return: Prepared filters entity
|
|
245
|
+
"""
|
|
246
|
+
# filters
|
|
247
|
+
if filters is None:
|
|
248
|
+
filters = entities.Filters()
|
|
249
|
+
filters._user_query = 'false'
|
|
250
|
+
# file types
|
|
251
|
+
if file_types is not None:
|
|
252
|
+
filters.add(field='metadata.system.mimetype', values=file_types, operator=entities.FiltersOperations.IN)
|
|
253
|
+
if annotation_filters is not None:
|
|
254
|
+
if len(annotation_filters.and_filter_list) > 0 or len(annotation_filters.or_filter_list) > 0:
|
|
255
|
+
for annotation_filter_and in annotation_filters.and_filter_list:
|
|
256
|
+
filters.add_join(field=annotation_filter_and.field,
|
|
257
|
+
values=annotation_filter_and.values,
|
|
258
|
+
operator=annotation_filter_and.operator,
|
|
259
|
+
method=entities.FiltersMethod.AND)
|
|
260
|
+
for annotation_filter_or in annotation_filters.or_filter_list:
|
|
261
|
+
filters.add_join(field=annotation_filter_or.field,
|
|
262
|
+
values=annotation_filter_or.values,
|
|
263
|
+
operator=annotation_filter_or.operator,
|
|
264
|
+
method=entities.FiltersMethod.OR)
|
|
265
|
+
elif annotation_filters.custom_filter is not None:
|
|
266
|
+
annotation_query_dict = annotation_filters.prepare()
|
|
267
|
+
items_query_dict = filters.prepare()
|
|
268
|
+
items_query_dict["join"] = annotation_query_dict
|
|
269
|
+
filters.reset()
|
|
270
|
+
filters.custom_filter = items_query_dict
|
|
271
|
+
|
|
272
|
+
else:
|
|
273
|
+
annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
|
|
274
|
+
filters._user_query = 'false'
|
|
275
|
+
|
|
276
|
+
return filters, annotation_filters
|
|
277
|
+
|
|
29
278
|
def download(self,
|
|
30
279
|
# filter options
|
|
31
280
|
filters: entities.Filters = None,
|
|
@@ -131,35 +380,12 @@ class Downloader:
|
|
|
131
380
|
items_to_download = [items]
|
|
132
381
|
num_items = len(items)
|
|
133
382
|
else:
|
|
134
|
-
# filters
|
|
135
|
-
|
|
136
|
-
filters
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
filters.add(field='metadata.system.mimetype', values=file_types, operator=entities.FiltersOperations.IN)
|
|
141
|
-
if annotation_filters is not None:
|
|
142
|
-
if len(annotation_filters.and_filter_list) > 0 or len(annotation_filters.or_filter_list) > 0:
|
|
143
|
-
for annotation_filter_and in annotation_filters.and_filter_list:
|
|
144
|
-
filters.add_join(field=annotation_filter_and.field,
|
|
145
|
-
values=annotation_filter_and.values,
|
|
146
|
-
operator=annotation_filter_and.operator,
|
|
147
|
-
method=entities.FiltersMethod.AND)
|
|
148
|
-
for annotation_filter_or in annotation_filters.or_filter_list:
|
|
149
|
-
filters.add_join(field=annotation_filter_or.field,
|
|
150
|
-
values=annotation_filter_or.values,
|
|
151
|
-
operator=annotation_filter_or.operator,
|
|
152
|
-
method=entities.FiltersMethod.OR)
|
|
153
|
-
elif annotation_filters.custom_filter is not None:
|
|
154
|
-
annotation_query_dict = annotation_filters.prepare()
|
|
155
|
-
items_query_dict = filters.prepare()
|
|
156
|
-
items_query_dict["join"] = annotation_query_dict
|
|
157
|
-
filters.reset()
|
|
158
|
-
filters.custom_filter = items_query_dict
|
|
159
|
-
|
|
160
|
-
else:
|
|
161
|
-
annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
|
|
162
|
-
filters._user_query = 'false'
|
|
383
|
+
# Prepare and merge filters
|
|
384
|
+
filters, annotation_filters = self._prepare_filters(
|
|
385
|
+
filters=filters,
|
|
386
|
+
annotation_filters=annotation_filters,
|
|
387
|
+
file_types=file_types
|
|
388
|
+
)
|
|
163
389
|
|
|
164
390
|
items_to_download = self.items_repository.list(filters=filters)
|
|
165
391
|
num_items = items_to_download.items_count
|
|
@@ -234,7 +460,8 @@ class Downloader:
|
|
|
234
460
|
# pool
|
|
235
461
|
pool = client_api.thread_pools(pool_name='item.download')
|
|
236
462
|
# download
|
|
237
|
-
pbar = tqdm.tqdm(total=num_items, disable=client_api.verbose.disable_progress_bar_download_dataset,
|
|
463
|
+
pbar = tqdm.tqdm(total=num_items, disable=client_api.verbose.disable_progress_bar_download_dataset,
|
|
464
|
+
file=sys.stdout,
|
|
238
465
|
desc='Download Items')
|
|
239
466
|
try:
|
|
240
467
|
i_item = 0
|
|
@@ -305,41 +532,8 @@ class Downloader:
|
|
|
305
532
|
finally:
|
|
306
533
|
_ = [j.result() for j in jobs if j is not None]
|
|
307
534
|
pbar.close()
|
|
308
|
-
# reporting
|
|
309
|
-
n_download = reporter.status_count(status='download')
|
|
310
|
-
n_exist = reporter.status_count(status='exist')
|
|
311
|
-
n_error = reporter.status_count(status='error')
|
|
312
|
-
logger.info("Number of files downloaded:{}".format(n_download))
|
|
313
|
-
logger.info("Number of files exists: {}".format(n_exist))
|
|
314
|
-
logger.info("Total number of files: {}".format(n_download + n_exist))
|
|
315
535
|
|
|
316
|
-
|
|
317
|
-
if n_error > 0:
|
|
318
|
-
log_filepath = reporter.generate_log_files()
|
|
319
|
-
# Get up to 5 error examples for the exception message
|
|
320
|
-
error_text = ""
|
|
321
|
-
error_counter = 0
|
|
322
|
-
if reporter._errors:
|
|
323
|
-
for _id, error in reporter._errors.items():
|
|
324
|
-
error_counter += 1
|
|
325
|
-
error_text += f"Item ID: {_id}, Error: {error} | "
|
|
326
|
-
if error_counter >= 5:
|
|
327
|
-
break
|
|
328
|
-
error_message = f"Errors in {n_error} files. Errors: {error_text}"
|
|
329
|
-
if log_filepath is not None:
|
|
330
|
-
error_message += f", see {log_filepath} for full log"
|
|
331
|
-
if raise_on_error is True:
|
|
332
|
-
raise PlatformException(
|
|
333
|
-
error="400", message=error_message
|
|
334
|
-
)
|
|
335
|
-
else:
|
|
336
|
-
logger.warning(error_message)
|
|
337
|
-
if int(n_download) <= 1 and int(n_exist) <= 1:
|
|
338
|
-
try:
|
|
339
|
-
return next(reporter.output)
|
|
340
|
-
except StopIteration:
|
|
341
|
-
return None
|
|
342
|
-
return reporter.output
|
|
536
|
+
return self._process_download_results(reporter=reporter, raise_on_error=raise_on_error)
|
|
343
537
|
|
|
344
538
|
def __thread_download_wrapper(self, i_item,
|
|
345
539
|
# item params
|
|
@@ -403,7 +597,7 @@ class Downloader:
|
|
|
403
597
|
export_version=entities.ExportVersion.V1,
|
|
404
598
|
dataset_lock=False,
|
|
405
599
|
lock_timeout_sec=None,
|
|
406
|
-
export_summary=False
|
|
600
|
+
export_summary=False
|
|
407
601
|
):
|
|
408
602
|
"""
|
|
409
603
|
Download annotations json for entire dataset
|
|
@@ -633,27 +827,12 @@ class Downloader:
|
|
|
633
827
|
@staticmethod
|
|
634
828
|
def __get_link_source(item):
|
|
635
829
|
assert isinstance(item, entities.Item)
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
# recursively get next id link item
|
|
644
|
-
while item.filename.endswith('.json') and \
|
|
645
|
-
item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
|
|
646
|
-
item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'id':
|
|
647
|
-
item = item.dataset.items.get(item_id=item.metadata['system']['shebang']['linkInfo']['ref'])
|
|
648
|
-
|
|
649
|
-
# check if link
|
|
650
|
-
if item.filename.endswith('.json') and \
|
|
651
|
-
item.metadata.get('system', {}).get('shebang', {}).get('dltype', '') == 'link' and \
|
|
652
|
-
item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'url':
|
|
653
|
-
url = item.metadata['system']['shebang']['linkInfo']['ref']
|
|
654
|
-
return item, url, True
|
|
655
|
-
else:
|
|
656
|
-
return item, '', False
|
|
830
|
+
is_url = False
|
|
831
|
+
url = item.resolved_stream
|
|
832
|
+
if item.metadata.get('system', {}).get('shebang', {}).get('linkInfo', {}).get('type', '') == 'url':
|
|
833
|
+
is_url = True
|
|
834
|
+
|
|
835
|
+
return item, url, is_url, url.startswith('file://')
|
|
657
836
|
|
|
658
837
|
def __file_validation(self, item, downloaded_file):
|
|
659
838
|
res = False
|
|
@@ -688,7 +867,7 @@ class Downloader:
|
|
|
688
867
|
"""
|
|
689
868
|
Get a single item's binary data
|
|
690
869
|
Calling this method will returns the item body itself , an image for example with the proper mimetype.
|
|
691
|
-
|
|
870
|
+
|
|
692
871
|
:param item: Item entity to download
|
|
693
872
|
:param save_locally: bool. save to file or return buffer
|
|
694
873
|
:param local_path: item local folder to save to.
|
|
@@ -709,8 +888,7 @@ class Downloader:
|
|
|
709
888
|
if save_locally and os.path.isfile(local_filepath):
|
|
710
889
|
need_to_download = overwrite
|
|
711
890
|
|
|
712
|
-
item, url, is_url = self.__get_link_source(item=item)
|
|
713
|
-
is_local_link = isinstance(url, str) and url.startswith('file://')
|
|
891
|
+
item, url, is_url, is_local_link = self.__get_link_source(item=item)
|
|
714
892
|
|
|
715
893
|
# save as byte stream
|
|
716
894
|
data = io.BytesIO()
|
|
@@ -804,9 +982,11 @@ class Downloader:
|
|
|
804
982
|
|
|
805
983
|
file_validation = True
|
|
806
984
|
if not is_url:
|
|
807
|
-
file_validation, start_point, chunk_resume = self.__get_next_chunk(
|
|
808
|
-
|
|
809
|
-
|
|
985
|
+
file_validation, start_point, chunk_resume = self.__get_next_chunk(
|
|
986
|
+
item=item,
|
|
987
|
+
download_progress=temp_file_path,
|
|
988
|
+
chunk_resume=chunk_resume
|
|
989
|
+
)
|
|
810
990
|
if file_validation:
|
|
811
991
|
shutil.move(temp_file_path, local_filepath)
|
|
812
992
|
download_done = True
|
|
@@ -933,6 +1113,7 @@ class Downloader:
|
|
|
933
1113
|
"""
|
|
934
1114
|
:param url:
|
|
935
1115
|
"""
|
|
1116
|
+
response = None
|
|
936
1117
|
|
|
937
1118
|
if url.startswith('file://'):
|
|
938
1119
|
parsed = urlparse(url)
|
|
@@ -953,24 +1134,24 @@ class Downloader:
|
|
|
953
1134
|
)
|
|
954
1135
|
|
|
955
1136
|
try:
|
|
956
|
-
|
|
1137
|
+
response = io.BufferedReader(io.FileIO(path, 'rb'))
|
|
957
1138
|
except PermissionError as e:
|
|
958
1139
|
raise PlatformException(
|
|
959
1140
|
error='403',
|
|
960
1141
|
message=f'Permission denied accessing file: {url}'
|
|
961
1142
|
) from e
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
1143
|
+
else:
|
|
1144
|
+
prepared_request = requests.Request(method='GET', url=url).prepare()
|
|
1145
|
+
with requests.Session() as s:
|
|
1146
|
+
retry = Retry(
|
|
1147
|
+
total=3,
|
|
1148
|
+
read=3,
|
|
1149
|
+
connect=3,
|
|
1150
|
+
backoff_factor=1,
|
|
1151
|
+
)
|
|
1152
|
+
adapter = HTTPAdapter(max_retries=retry)
|
|
1153
|
+
s.mount('http://', adapter)
|
|
1154
|
+
s.mount('https://', adapter)
|
|
1155
|
+
response = s.send(request=prepared_request, stream=True)
|
|
975
1156
|
|
|
976
1157
|
return response
|