ECOv003-L2T-STARS 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ECOv003_L2T_STARS/BRDF/BRDF.py +57 -0
- ECOv003_L2T_STARS/BRDF/SZA.py +65 -0
- ECOv003_L2T_STARS/BRDF/__init__.py +1 -0
- ECOv003_L2T_STARS/BRDF/statistical_radiative_transport.txt +90 -0
- ECOv003_L2T_STARS/BRDF/version.txt +1 -0
- ECOv003_L2T_STARS/ECOv003_DL.py +527 -0
- ECOv003_L2T_STARS/ECOv003_DL.xml +47 -0
- ECOv003_L2T_STARS/ECOv003_L2T_STARS.py +162 -0
- ECOv003_L2T_STARS/ECOv003_L2T_STARS.xml +47 -0
- ECOv003_L2T_STARS/L2TSTARSConfig.py +188 -0
- ECOv003_L2T_STARS/L2T_STARS.py +489 -0
- ECOv003_L2T_STARS/LPDAAC/LPDAACDataPool.py +444 -0
- ECOv003_L2T_STARS/LPDAAC/__init__.py +9 -0
- ECOv003_L2T_STARS/LPDAAC/version.txt +1 -0
- ECOv003_L2T_STARS/Manifest.toml +2332 -0
- ECOv003_L2T_STARS/Project.toml +14 -0
- ECOv003_L2T_STARS/VIIRS/VIIRSDataPool.py +294 -0
- ECOv003_L2T_STARS/VIIRS/VIIRSDownloader.py +26 -0
- ECOv003_L2T_STARS/VIIRS/VIIRS_CMR_LOGIN.py +36 -0
- ECOv003_L2T_STARS/VIIRS/VNP09GA.py +1277 -0
- ECOv003_L2T_STARS/VIIRS/VNP43IA4.py +288 -0
- ECOv003_L2T_STARS/VIIRS/VNP43MA3.py +323 -0
- ECOv003_L2T_STARS/VIIRS/__init__.py +9 -0
- ECOv003_L2T_STARS/VIIRS/version.txt +1 -0
- ECOv003_L2T_STARS/VNP43NRT/VNP43NRT.py +863 -0
- ECOv003_L2T_STARS/VNP43NRT/__init__.py +1 -0
- ECOv003_L2T_STARS/VNP43NRT/process_VNP43NRT.jl +169 -0
- ECOv003_L2T_STARS/VNP43NRT/version.txt +1 -0
- ECOv003_L2T_STARS/VNP43NRT_jl/Manifest.toml +995 -0
- ECOv003_L2T_STARS/VNP43NRT_jl/Project.toml +15 -0
- ECOv003_L2T_STARS/VNP43NRT_jl/__init__.py +0 -0
- ECOv003_L2T_STARS/VNP43NRT_jl/instantiate.jl +25 -0
- ECOv003_L2T_STARS/VNP43NRT_jl/instantiate.py +13 -0
- ECOv003_L2T_STARS/VNP43NRT_jl/src/VNP43NRT.jl +411 -0
- ECOv003_L2T_STARS/VNP43NRT_jl/src/__init__.py +0 -0
- ECOv003_L2T_STARS/__init__.py +3 -0
- ECOv003_L2T_STARS/calibrate_fine_to_coarse.py +60 -0
- ECOv003_L2T_STARS/constants.py +38 -0
- ECOv003_L2T_STARS/daterange/__init__.py +1 -0
- ECOv003_L2T_STARS/daterange/daterange.py +35 -0
- ECOv003_L2T_STARS/generate_L2T_STARS_runconfig.py +249 -0
- ECOv003_L2T_STARS/generate_NDVI_coarse_directory.py +21 -0
- ECOv003_L2T_STARS/generate_NDVI_coarse_image.py +30 -0
- ECOv003_L2T_STARS/generate_NDVI_fine_directory.py +14 -0
- ECOv003_L2T_STARS/generate_NDVI_fine_image.py +28 -0
- ECOv003_L2T_STARS/generate_STARS_inputs.py +231 -0
- ECOv003_L2T_STARS/generate_albedo_coarse_directory.py +18 -0
- ECOv003_L2T_STARS/generate_albedo_coarse_image.py +30 -0
- ECOv003_L2T_STARS/generate_albedo_fine_directory.py +17 -0
- ECOv003_L2T_STARS/generate_albedo_fine_image.py +30 -0
- ECOv003_L2T_STARS/generate_filename.py +37 -0
- ECOv003_L2T_STARS/generate_input_staging_directory.py +23 -0
- ECOv003_L2T_STARS/generate_model_state_tile_date_directory.py +28 -0
- ECOv003_L2T_STARS/generate_output_directory.py +28 -0
- ECOv003_L2T_STARS/install_STARS_jl.py +43 -0
- ECOv003_L2T_STARS/instantiate_STARS_jl.py +38 -0
- ECOv003_L2T_STARS/load_prior.py +248 -0
- ECOv003_L2T_STARS/prior.py +56 -0
- ECOv003_L2T_STARS/process_ECOSTRESS_data_fusion_distributed_bias.jl +420 -0
- ECOv003_L2T_STARS/process_STARS_product.py +507 -0
- ECOv003_L2T_STARS/process_julia_data_fusion.py +110 -0
- ECOv003_L2T_STARS/retrieve_STARS_sources.py +101 -0
- ECOv003_L2T_STARS/runconfig.py +70 -0
- ECOv003_L2T_STARS/timer/__init__.py +1 -0
- ECOv003_L2T_STARS/timer/timer.py +77 -0
- ECOv003_L2T_STARS/version.py +8 -0
- ECOv003_L2T_STARS/version.txt +1 -0
- {ECOv003_L2T_STARS-1.0.0.dist-info → ecov003_l2t_stars-1.1.0.dist-info}/METADATA +30 -23
- ecov003_l2t_stars-1.1.0.dist-info/RECORD +73 -0
- {ECOv003_L2T_STARS-1.0.0.dist-info → ecov003_l2t_stars-1.1.0.dist-info}/WHEEL +1 -1
- ecov003_l2t_stars-1.1.0.dist-info/entry_points.txt +3 -0
- ecov003_l2t_stars-1.1.0.dist-info/top_level.txt +1 -0
- ECOv003_L2T_STARS-1.0.0.dist-info/RECORD +0 -5
- ECOv003_L2T_STARS-1.0.0.dist-info/top_level.txt +0 -1
- {ECOv003_L2T_STARS-1.0.0.dist-info → ecov003_l2t_stars-1.1.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,444 @@
|
|
1
|
+
import base64
|
2
|
+
import hashlib
|
3
|
+
import json
|
4
|
+
import logging
|
5
|
+
import os
|
6
|
+
import posixpath
|
7
|
+
import re
|
8
|
+
import shutil
|
9
|
+
import urllib
|
10
|
+
from datetime import date
|
11
|
+
from fnmatch import fnmatch
|
12
|
+
from http.cookiejar import CookieJar
|
13
|
+
from os import makedirs, remove
|
14
|
+
from os.path import abspath
|
15
|
+
from os.path import dirname
|
16
|
+
from os.path import exists
|
17
|
+
from os.path import getsize
|
18
|
+
from os.path import isdir
|
19
|
+
from os.path import join
|
20
|
+
from time import sleep
|
21
|
+
from typing import List, OrderedDict
|
22
|
+
import netrc
|
23
|
+
import requests
|
24
|
+
import xmltodict
|
25
|
+
from bs4 import BeautifulSoup
|
26
|
+
from dateutil import parser
|
27
|
+
from pycksum import cksum
|
28
|
+
|
29
|
+
import colored_logging as cl
|
30
|
+
|
31
|
+
from ECOv003_exit_codes import *
|
32
|
+
|
33
|
+
CONNECTION_CLOSE = {
|
34
|
+
"Connection": "close",
|
35
|
+
}
|
36
|
+
|
37
|
+
DEFAULT_REMOTE = "https://e4ftl01.cr.usgs.gov"
|
38
|
+
RETRIES = 6
|
39
|
+
WAIT_SECONDS = 360
|
40
|
+
XML_RETRIES = RETRIES
|
41
|
+
XML_TIMEOUT = WAIT_SECONDS
|
42
|
+
DOWNLOAD_RETRIES = RETRIES
|
43
|
+
DOWNLOAD_WAIT_SECONDS = WAIT_SECONDS
|
44
|
+
|
45
|
+
__author__ = "Gregory Halverson"
|
46
|
+
|
47
|
+
logger = logging.getLogger(__name__)
|
48
|
+
|
49
|
+
|
50
|
+
class LPDAACServerUnreachable(ConnectionError):
|
51
|
+
pass
|
52
|
+
|
53
|
+
|
54
|
+
class LPDAACDataPool:
|
55
|
+
logger = logging.getLogger(__name__)
|
56
|
+
DEFAULT_CHUNK_SIZE = 2 ** 20
|
57
|
+
DATE_REGEX = re.compile('^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$')
|
58
|
+
DEFAULT_REMOTE = DEFAULT_REMOTE
|
59
|
+
|
60
|
+
def __init__(self, username: str = None, password: str = None, remote: str = None, offline_ok: bool = False):
|
61
|
+
if remote is None:
|
62
|
+
remote = DEFAULT_REMOTE
|
63
|
+
|
64
|
+
if username is None or password is None:
|
65
|
+
try:
|
66
|
+
netrc_file = netrc.netrc()
|
67
|
+
username, _, password = netrc_file.authenticators("urs.earthdata.nasa.gov")
|
68
|
+
except Exception as e:
|
69
|
+
logger.exception(e)
|
70
|
+
logger.warning("netrc credentials not found for urs.earthdata.nasa.gov")
|
71
|
+
|
72
|
+
if username is None or password is None:
|
73
|
+
if not "LPDAAC_USERNAME" in os.environ or not "LPDAAC_PASSWORD" in os.environ:
|
74
|
+
raise RuntimeError("Missing environment variable 'LPDAAC_USERNAME' or 'LPDAAC_PASSWORD'")
|
75
|
+
|
76
|
+
username = os.environ["LPDAAC_USERNAME"]
|
77
|
+
password = os.environ["LPDAAC_PASSWORD"]
|
78
|
+
|
79
|
+
self._remote = remote
|
80
|
+
self._username = username
|
81
|
+
self._password = password
|
82
|
+
self.offline_ok = offline_ok
|
83
|
+
|
84
|
+
# if self.offline_ok:
|
85
|
+
# logger.warning("going into offline mode")
|
86
|
+
|
87
|
+
self._listings = {}
|
88
|
+
|
89
|
+
try:
|
90
|
+
self._authenticate()
|
91
|
+
self._check_remote()
|
92
|
+
except Exception as e:
|
93
|
+
if self.offline_ok:
|
94
|
+
logger.warning("unable to connect to LP-DAAC data pool")
|
95
|
+
else:
|
96
|
+
raise e
|
97
|
+
|
98
|
+
def _authenticate(self):
|
99
|
+
try:
|
100
|
+
# https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python
|
101
|
+
|
102
|
+
password_manager = urllib.request.HTTPPasswordMgrWithDefaultRealm()
|
103
|
+
|
104
|
+
password_manager.add_password(
|
105
|
+
realm=None,
|
106
|
+
uri="https://urs.earthdata.nasa.gov",
|
107
|
+
user=self._username,
|
108
|
+
passwd=self._password
|
109
|
+
)
|
110
|
+
|
111
|
+
cookie_jar = CookieJar()
|
112
|
+
|
113
|
+
# Install all the handlers.
|
114
|
+
|
115
|
+
opener = urllib.request.build_opener(
|
116
|
+
urllib.request.HTTPBasicAuthHandler(password_manager),
|
117
|
+
# urllib2.HTTPHandler(debuglevel=1), # Uncomment these two lines to see
|
118
|
+
# urllib2.HTTPSHandler(debuglevel=1), # details of the requests/responses
|
119
|
+
urllib.request.HTTPCookieProcessor(cookie_jar)
|
120
|
+
)
|
121
|
+
|
122
|
+
urllib.request.install_opener(opener)
|
123
|
+
except Exception as e:
|
124
|
+
message = "unable to authenticate with LP-DAAC data pool"
|
125
|
+
if self.offline_ok:
|
126
|
+
logger.warning(message)
|
127
|
+
else:
|
128
|
+
raise ConnectionError(message)
|
129
|
+
|
130
|
+
def _check_remote(self):
|
131
|
+
logger.info(f"checking URL: {cl.URL(self.remote)}")
|
132
|
+
|
133
|
+
try:
|
134
|
+
response = requests.head(self.remote, headers=CONNECTION_CLOSE)
|
135
|
+
status = response.status_code
|
136
|
+
duration = response.elapsed.total_seconds()
|
137
|
+
except Exception as e:
|
138
|
+
logger.exception(e)
|
139
|
+
message = f"unable to connect to URL: {self.remote}"
|
140
|
+
|
141
|
+
if self.offline_ok:
|
142
|
+
logger.warning(message)
|
143
|
+
return
|
144
|
+
else:
|
145
|
+
raise LPDAACServerUnreachable(message)
|
146
|
+
|
147
|
+
if status == 200:
|
148
|
+
logger.info(
|
149
|
+
"remote verified with status " + cl.val(200) +
|
150
|
+
" in " + cl.time(f"{duration:0.2f}") +
|
151
|
+
" seconds: " + cl.URL(self.remote))
|
152
|
+
else:
|
153
|
+
message = f"status: {status} URL: {self.remote}"
|
154
|
+
|
155
|
+
if self.offline_ok:
|
156
|
+
logger.warning(message)
|
157
|
+
else:
|
158
|
+
raise ConnectionError(message)
|
159
|
+
|
160
|
+
@property
|
161
|
+
def remote(self):
|
162
|
+
return self._remote
|
163
|
+
|
164
|
+
def get_HTTP_text(self, URL: str) -> str:
|
165
|
+
try:
|
166
|
+
request = urllib.request.Request(URL)
|
167
|
+
response = urllib.request.urlopen(request)
|
168
|
+
body = response.read().decode()
|
169
|
+
except Exception as e:
|
170
|
+
logger.exception(e)
|
171
|
+
raise ConnectionError(f"cannot connect to URL: {URL}")
|
172
|
+
|
173
|
+
return body
|
174
|
+
|
175
|
+
def get_HTTP_listing(self, URL: str, pattern: str = None) -> List[str]:
|
176
|
+
if URL in self._listings:
|
177
|
+
listing = self._listings[URL]
|
178
|
+
else:
|
179
|
+
text = self.get_HTTP_text(URL)
|
180
|
+
soup = BeautifulSoup(text, 'html.parser')
|
181
|
+
links = list(soup.find_all('a', href=True))
|
182
|
+
|
183
|
+
# get directory names from links on http site
|
184
|
+
listing = sorted([link['href'].replace('/', '') for link in links])
|
185
|
+
self._listings[URL] = listing
|
186
|
+
|
187
|
+
if pattern is not None:
|
188
|
+
listing = sorted([
|
189
|
+
item
|
190
|
+
for item
|
191
|
+
in listing
|
192
|
+
if fnmatch(item, pattern)
|
193
|
+
])
|
194
|
+
|
195
|
+
return listing
|
196
|
+
|
197
|
+
def get_HTTP_date_listing(self, URL: str) -> List[date]:
|
198
|
+
return sorted([
|
199
|
+
parser.parse(item).date()
|
200
|
+
for item
|
201
|
+
in self.get_HTTP_listing(URL)
|
202
|
+
if self.DATE_REGEX.match(item)
|
203
|
+
])
|
204
|
+
|
205
|
+
def read_HTTP_XML(self, URL: str) -> OrderedDict:
|
206
|
+
return xmltodict.parse(self.get_HTTP_text(URL))
|
207
|
+
|
208
|
+
def generate_XML_URL(self, URL: str) -> str:
|
209
|
+
return f"{URL}.xml"
|
210
|
+
|
211
|
+
def get_metadata(self, data_URL: str) -> OrderedDict:
|
212
|
+
metadata_URL = f"{data_URL}.xml"
|
213
|
+
logger.info(f"checking metadata: {cl.URL(metadata_URL)}")
|
214
|
+
request = urllib.request.Request(metadata_URL)
|
215
|
+
response = urllib.request.urlopen(request)
|
216
|
+
duration = response.elapsed.total_seconds()
|
217
|
+
body = response.read().decode()
|
218
|
+
metadata = xmltodict.parse(body)
|
219
|
+
logger.info(f"metadata retrieved in {cl.val(f'{duration:0.2f}')} seconds: {cl.URL(metadata_URL)}")
|
220
|
+
|
221
|
+
return metadata
|
222
|
+
|
223
|
+
def get_remote_checksum(self, URL: str) -> int:
|
224
|
+
return int(self.get_metadata(URL)["GranuleMetaDataFile"]["GranuleURMetaData"]["DataFiles"]["DataFileContainer"][
|
225
|
+
"Checksum"])
|
226
|
+
|
227
|
+
def get_remote_filesize(self, URL: str) -> int:
|
228
|
+
return int(self.get_metadata(URL)["GranuleMetaDataFile"]["GranuleURMetaData"]["DataFiles"]["DataFileContainer"][
|
229
|
+
"FileSize"])
|
230
|
+
|
231
|
+
def get_local_checksum(self, filename: str, checksum_type: str = "CKSUM") -> str:
|
232
|
+
with open(filename, "rb") as file:
|
233
|
+
if checksum_type == "CKSUM":
|
234
|
+
return str(int(cksum(file)))
|
235
|
+
elif checksum_type == "MD5":
|
236
|
+
return str(hashlib.md5(file.read()).hexdigest())
|
237
|
+
|
238
|
+
def get_local_filesize(self, filename: str) -> int:
|
239
|
+
return getsize(filename)
|
240
|
+
|
241
|
+
def product_directory(self, platform: str, product: str, build: str = None) -> str:
|
242
|
+
if build is None:
|
243
|
+
build = "001"
|
244
|
+
elif isinstance(build, float):
|
245
|
+
build = f"{int(build * 10)}:03d"
|
246
|
+
elif isinstance(build, int):
|
247
|
+
build = f"{build:03d}"
|
248
|
+
|
249
|
+
URL = posixpath.join(self._remote, platform, f"{product}.{build}")
|
250
|
+
|
251
|
+
return URL
|
252
|
+
|
253
|
+
def dates(self, platform: str, product: str, build: str = None) -> List[date]:
|
254
|
+
return self.get_HTTP_date_listing(self.product_directory(platform, product, build))
|
255
|
+
|
256
|
+
def date_URL(
|
257
|
+
self,
|
258
|
+
platform: str,
|
259
|
+
product: str,
|
260
|
+
acquisition_date: date or str,
|
261
|
+
build: str = None) -> str:
|
262
|
+
if isinstance(acquisition_date, str):
|
263
|
+
acquisition_date = parser.parse(acquisition_date).date()
|
264
|
+
|
265
|
+
URL = posixpath.join(
|
266
|
+
self.product_directory(platform, product, build),
|
267
|
+
f"{acquisition_date:%Y.%m.%d}"
|
268
|
+
)
|
269
|
+
|
270
|
+
return URL
|
271
|
+
|
272
|
+
def files(
|
273
|
+
self,
|
274
|
+
platform: str,
|
275
|
+
product: str,
|
276
|
+
acquisition_date: date or str,
|
277
|
+
build: str = None,
|
278
|
+
pattern: str = None) -> List[str]:
|
279
|
+
URL = self.date_URL(platform, product, acquisition_date, build)
|
280
|
+
listing = self.get_HTTP_listing(URL, pattern)
|
281
|
+
|
282
|
+
return listing
|
283
|
+
|
284
|
+
def download_URL(
|
285
|
+
self,
|
286
|
+
URL: str,
|
287
|
+
download_location: str = None,
|
288
|
+
XML_retries: int = None,
|
289
|
+
XML_timeout_seconds: int = None,
|
290
|
+
download_retries: int = None,
|
291
|
+
download_wait_seconds: int = None) -> str:
|
292
|
+
if isdir(download_location):
|
293
|
+
filename = join(download_location, posixpath.basename(URL))
|
294
|
+
else:
|
295
|
+
filename = download_location
|
296
|
+
|
297
|
+
if exists(filename):
|
298
|
+
logger.info(f"file already retrieved: {cl.file(filename)}")
|
299
|
+
return filename
|
300
|
+
|
301
|
+
# metadata = self.get_metadata(URL)
|
302
|
+
metadata_URL = f"{URL}.xml"
|
303
|
+
logger.info(f"checking metadata: {cl.URL(metadata_URL)}")
|
304
|
+
|
305
|
+
if isdir(download_location):
|
306
|
+
metadata_filename = join(download_location, posixpath.basename(metadata_URL))
|
307
|
+
else:
|
308
|
+
metadata_filename = f"{download_location}.xml"
|
309
|
+
|
310
|
+
makedirs(dirname(metadata_filename), exist_ok=True)
|
311
|
+
|
312
|
+
if XML_retries is None:
|
313
|
+
XML_retries = XML_RETRIES
|
314
|
+
|
315
|
+
if XML_timeout_seconds is None:
|
316
|
+
XML_timeout_seconds = XML_TIMEOUT
|
317
|
+
|
318
|
+
if download_retries is None:
|
319
|
+
download_retries = DOWNLOAD_RETRIES
|
320
|
+
|
321
|
+
if download_wait_seconds is None:
|
322
|
+
download_wait_seconds = DOWNLOAD_WAIT_SECONDS
|
323
|
+
|
324
|
+
metadata = None
|
325
|
+
|
326
|
+
while XML_retries > 0:
|
327
|
+
XML_retries -= 1
|
328
|
+
command = f"wget -nc -c --user {self._username} --password {self._password} -O {metadata_filename} {metadata_URL}"
|
329
|
+
logger.info(command)
|
330
|
+
os.system(command)
|
331
|
+
|
332
|
+
if not exists(metadata_filename):
|
333
|
+
logger.warning(f"download not found for metadata URL: {metadata_URL}")
|
334
|
+
logger.warning(f"waiting {XML_timeout_seconds} for retry")
|
335
|
+
sleep(XML_timeout_seconds)
|
336
|
+
continue
|
337
|
+
|
338
|
+
XML_metadata_filesize = self.get_local_filesize(metadata_filename)
|
339
|
+
|
340
|
+
if XML_metadata_filesize == 0 and exists(metadata_filename):
|
341
|
+
logger.warning(f"removing corrupted zero-size metadata file: {metadata_filename}")
|
342
|
+
|
343
|
+
try:
|
344
|
+
os.remove(metadata_filename)
|
345
|
+
except:
|
346
|
+
logger.warning(f"unable to remove zero-size metadata file: {metadata_filename}")
|
347
|
+
|
348
|
+
logger.warning(f"waiting {XML_timeout_seconds} for retry")
|
349
|
+
sleep(XML_timeout_seconds)
|
350
|
+
continue
|
351
|
+
|
352
|
+
try:
|
353
|
+
with open(metadata_filename, "r") as file:
|
354
|
+
metadata = xmltodict.parse(file.read())
|
355
|
+
except Exception as e:
|
356
|
+
logger.warning(e)
|
357
|
+
logger.warning(f"unable to parse metadata file: {metadata_filename}")
|
358
|
+
os.remove(metadata_filename)
|
359
|
+
logger.warning(f"waiting {XML_timeout_seconds} for retry")
|
360
|
+
sleep(XML_timeout_seconds)
|
361
|
+
continue
|
362
|
+
|
363
|
+
if metadata is None:
|
364
|
+
raise DownloadFailed(f"unable to retrieve metadata URL: {metadata_URL}") # exit code 16
|
365
|
+
|
366
|
+
remote_checksum = str(
|
367
|
+
metadata["GranuleMetaDataFile"]["GranuleURMetaData"]["DataFiles"]["DataFileContainer"]["Checksum"])
|
368
|
+
checksum_type = str(
|
369
|
+
metadata["GranuleMetaDataFile"]["GranuleURMetaData"]["DataFiles"]["DataFileContainer"]["ChecksumType"])
|
370
|
+
remote_filesize = int(
|
371
|
+
metadata["GranuleMetaDataFile"]["GranuleURMetaData"]["DataFiles"]["DataFileContainer"]["FileSize"])
|
372
|
+
|
373
|
+
logger.info(
|
374
|
+
f"metadata retrieved {checksum_type} checksum: {cl.val(remote_checksum)} size: {cl.val(remote_filesize)} URL: {cl.URL(metadata_URL)}")
|
375
|
+
makedirs(dirname(filename), exist_ok=True)
|
376
|
+
logger.info(f"downloading {cl.URL(URL)} -> {cl.file(filename)}")
|
377
|
+
|
378
|
+
# Use a temporary file for downloading
|
379
|
+
temporary_filename = f"{filename}.download"
|
380
|
+
|
381
|
+
while download_retries > 0:
|
382
|
+
download_retries -=1
|
383
|
+
|
384
|
+
try:
|
385
|
+
if exists(temporary_filename):
|
386
|
+
temporary_filesize = self.get_local_filesize(temporary_filename)
|
387
|
+
|
388
|
+
if temporary_filesize > remote_filesize:
|
389
|
+
logger.warning(
|
390
|
+
f"removing corrupted file with size {temporary_filesize} greater than remote size {remote_filesize}: {temporary_filename}")
|
391
|
+
remove(temporary_filename)
|
392
|
+
|
393
|
+
elif temporary_filesize == remote_filesize:
|
394
|
+
local_checksum = self.get_local_checksum(temporary_filename, checksum_type=checksum_type)
|
395
|
+
|
396
|
+
if local_checksum == remote_checksum:
|
397
|
+
try:
|
398
|
+
shutil.move(temporary_filename, filename)
|
399
|
+
except Exception as e:
|
400
|
+
if exists(filename):
|
401
|
+
logger.warning(f"unable to move temporary file: {temporary_filename}")
|
402
|
+
return filename
|
403
|
+
|
404
|
+
logger.exception(e)
|
405
|
+
raise DownloadFailed(f"unable to move temporary file: {temporary_filename}")
|
406
|
+
|
407
|
+
return filename
|
408
|
+
else:
|
409
|
+
logger.warning(
|
410
|
+
f"removing corrupted file with local checksum {local_checksum} and remote checksum {remote_checksum}: {temporary_filename}")
|
411
|
+
remove(temporary_filename)
|
412
|
+
else:
|
413
|
+
logger.info(f"resuming incomplete download: {cl.file(temporary_filename)}")
|
414
|
+
|
415
|
+
command = f"wget -nc -c --user {self._username} --password {self._password} -O {temporary_filename} {URL}"
|
416
|
+
logger.info(command)
|
417
|
+
os.system(command)
|
418
|
+
|
419
|
+
if not exists(temporary_filename):
|
420
|
+
raise ConnectionError(f"unable to download URL: {URL}")
|
421
|
+
|
422
|
+
local_filesize = self.get_local_filesize(temporary_filename)
|
423
|
+
local_checksum = self.get_local_checksum(temporary_filename, checksum_type=checksum_type)
|
424
|
+
|
425
|
+
if local_filesize != remote_filesize or local_checksum != remote_checksum:
|
426
|
+
os.remove(temporary_filename)
|
427
|
+
raise ConnectionError(
|
428
|
+
f"removing corrupted file with local filesize {local_filesize} remote filesize {remote_filesize} local checksum {local_checksum} remote checksum {remote_checksum}: {temporary_filename}")
|
429
|
+
|
430
|
+
# Download successful, rename the temporary file to its proper name
|
431
|
+
shutil.move(temporary_filename, filename)
|
432
|
+
|
433
|
+
logger.info(
|
434
|
+
f"successful download with filesize {cl.val(local_filesize)} checksum {cl.val(local_checksum)}: {cl.file(filename)}")
|
435
|
+
|
436
|
+
return filename
|
437
|
+
except Exception as e:
|
438
|
+
if download_retries == 0:
|
439
|
+
raise e
|
440
|
+
else:
|
441
|
+
logger.warning(e)
|
442
|
+
logger.warning(f"waiting {download_wait_seconds} seconds to retry download")
|
443
|
+
sleep(download_wait_seconds)
|
444
|
+
continue
|
@@ -0,0 +1 @@
|
|
1
|
+
0.1.3
|