smashbox 1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smashbox/.spyproject/config/backups/codestyle.ini.bak +8 -0
- smashbox/.spyproject/config/backups/encoding.ini.bak +6 -0
- smashbox/.spyproject/config/backups/vcs.ini.bak +7 -0
- smashbox/.spyproject/config/backups/workspace.ini.bak +12 -0
- smashbox/.spyproject/config/codestyle.ini +8 -0
- smashbox/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini +5 -0
- smashbox/.spyproject/config/defaults/defaults-encoding-0.2.0.ini +3 -0
- smashbox/.spyproject/config/defaults/defaults-vcs-0.2.0.ini +4 -0
- smashbox/.spyproject/config/defaults/defaults-workspace-0.2.0.ini +6 -0
- smashbox/.spyproject/config/encoding.ini +6 -0
- smashbox/.spyproject/config/vcs.ini +7 -0
- smashbox/.spyproject/config/workspace.ini +12 -0
- smashbox/__init__.py +8 -0
- smashbox/asset/flwdir/flowdir_fr_1000m.tif +0 -0
- smashbox/asset/outlets/.Rhistory +0 -0
- smashbox/asset/outlets/db_bnbv_fr.csv +142704 -0
- smashbox/asset/outlets/db_bnbv_light.csv +42084 -0
- smashbox/asset/outlets/db_sites.csv +8700 -0
- smashbox/asset/outlets/db_stations.csv +2916 -0
- smashbox/asset/outlets/db_stations_example.csv +19 -0
- smashbox/asset/outlets/edit_database.py +185 -0
- smashbox/asset/outlets/readme.txt +5 -0
- smashbox/asset/params/ci.tif +0 -0
- smashbox/asset/params/cp.tif +0 -0
- smashbox/asset/params/ct.tif +0 -0
- smashbox/asset/params/kexc.tif +0 -0
- smashbox/asset/params/kmlt.tif +0 -0
- smashbox/asset/params/llr.tif +0 -0
- smashbox/asset/setup/setup_rhax_gr4_dt3600.yaml +15 -0
- smashbox/asset/setup/setup_rhax_gr4_dt900.yaml +15 -0
- smashbox/asset/setup/setup_rhax_gr5_dt3600.yaml +15 -0
- smashbox/asset/setup/setup_rhax_gr5_dt900.yaml +15 -0
- smashbox/init/README.md +3 -0
- smashbox/init/__init__.py +3 -0
- smashbox/init/multimodel_statistics.py +405 -0
- smashbox/init/param.py +799 -0
- smashbox/init/smashbox.py +186 -0
- smashbox/model/__init__.py +1 -0
- smashbox/model/atmos_data_connector.py +518 -0
- smashbox/model/mesh.py +185 -0
- smashbox/model/model.py +829 -0
- smashbox/model/setup.py +109 -0
- smashbox/plot/__init__.py +1 -0
- smashbox/plot/myplot.py +1133 -0
- smashbox/plot/plot.py +1662 -0
- smashbox/read_inputdata/__init__.py +1 -0
- smashbox/read_inputdata/read_data.py +1229 -0
- smashbox/read_inputdata/smashmodel.py +395 -0
- smashbox/stats/__init__.py +1 -0
- smashbox/stats/mystats.py +1632 -0
- smashbox/stats/stats.py +2022 -0
- smashbox/test.py +532 -0
- smashbox/test_average_stats.py +122 -0
- smashbox/test_mesh.r +8 -0
- smashbox/test_mesh_from_graffas.py +69 -0
- smashbox/tools/__init__.py +1 -0
- smashbox/tools/geo_toolbox.py +1028 -0
- smashbox/tools/tools.py +461 -0
- smashbox/tutorial_R.r +182 -0
- smashbox/tutorial_R_graffas.r +88 -0
- smashbox/tutorial_R_graffas_local.r +33 -0
- smashbox/tutorial_python.py +102 -0
- smashbox/tutorial_readme.py +261 -0
- smashbox/tutorial_report.py +58 -0
- smashbox/tutorials/Python_tutorial.md +124 -0
- smashbox/tutorials/R_Graffas_tutorial.md +153 -0
- smashbox/tutorials/R_tutorial.md +121 -0
- smashbox/tutorials/__init__.py +6 -0
- smashbox/tutorials/generate_doc.md +7 -0
- smashbox-1.0.dist-info/METADATA +998 -0
- smashbox-1.0.dist-info/RECORD +73 -0
- smashbox-1.0.dist-info/WHEEL +5 -0
- smashbox-1.0.dist-info/licenses/LICENSE +100 -0
|
@@ -0,0 +1,1229 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on Tue Jun 25 11:11:03 2024
|
|
5
|
+
|
|
6
|
+
@author: maxime
|
|
7
|
+
"""
|
|
8
|
+
import glob
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import warnings
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pandas as pd
|
|
14
|
+
import rasterio
|
|
15
|
+
from smash.fcore._mwd_mesh import MeshDT
|
|
16
|
+
import shutil
|
|
17
|
+
import tqdm
|
|
18
|
+
import smash
|
|
19
|
+
import pytz
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
import time
|
|
22
|
+
from smashbox.tools import geo_toolbox
|
|
23
|
+
from smashbox.tools import tools
|
|
24
|
+
|
|
25
|
+
RATIO_PET_HOURLY = np.array(
|
|
26
|
+
[
|
|
27
|
+
0,
|
|
28
|
+
0,
|
|
29
|
+
0,
|
|
30
|
+
0,
|
|
31
|
+
0,
|
|
32
|
+
0,
|
|
33
|
+
0,
|
|
34
|
+
0.035,
|
|
35
|
+
0.062,
|
|
36
|
+
0.079,
|
|
37
|
+
0.097,
|
|
38
|
+
0.11,
|
|
39
|
+
0.117,
|
|
40
|
+
0.117,
|
|
41
|
+
0.11,
|
|
42
|
+
0.097,
|
|
43
|
+
0.079,
|
|
44
|
+
0.062,
|
|
45
|
+
0.035,
|
|
46
|
+
0,
|
|
47
|
+
0,
|
|
48
|
+
0,
|
|
49
|
+
0,
|
|
50
|
+
0,
|
|
51
|
+
],
|
|
52
|
+
dtype=np.float32,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Compute the time laps between 2 timezones
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def tz_diff(date, tz1, tz2):
|
|
59
|
+
"""
|
|
60
|
+
Returns the difference in hours between timezone1 and timezone2
|
|
61
|
+
for a given date.
|
|
62
|
+
"""
|
|
63
|
+
date = pd.to_datetime(date)
|
|
64
|
+
return (tz1.localize(date) - tz2.localize(date).astimezone(tz1)).seconds / 3600
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# Must be done char by char to take account of various format. Must be documented.
|
|
68
|
+
# Can't support all date formatter.
|
|
69
|
+
# Avantage: can work even file are not sorted with the date.
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _build_date_regex_pattern(date_pattern) -> str:
|
|
73
|
+
# Supported date formatters
|
|
74
|
+
datefmt = {"%Y": 4, "%m": 2, "%d": 2, "%H": 2, "%M": 2, "%S": 2}
|
|
75
|
+
|
|
76
|
+
regex = ""
|
|
77
|
+
i = 0
|
|
78
|
+
while i < len(date_pattern):
|
|
79
|
+
if date_pattern[i : i + 2] in datefmt:
|
|
80
|
+
regex += r"\d{" + str(datefmt[date_pattern[i : i + 2]]) + "}"
|
|
81
|
+
i = i + 2
|
|
82
|
+
else:
|
|
83
|
+
regex += r"\\" + date_pattern[i]
|
|
84
|
+
i = i + 1
|
|
85
|
+
|
|
86
|
+
return regex
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# fonction which split the date_pattern in 2 part:
|
|
90
|
+
# 1)the date pattern
|
|
91
|
+
# 2)the position of the occurence of the date pattern when searching it in a text
|
|
92
|
+
# Exemple: date_pattern="%Y%m%d%H%1" return "%Y%m%d%H" and 1
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _split_date_occurence(date_pattern) -> str:
|
|
96
|
+
re_match = re.search(r"\%[0-9]+$", date_pattern)
|
|
97
|
+
if re_match is not None:
|
|
98
|
+
pattern = date_pattern[0 : int(re_match.span()[0])]
|
|
99
|
+
occurence = int(re_match.group()[1])
|
|
100
|
+
return pattern, occurence
|
|
101
|
+
else:
|
|
102
|
+
return date_pattern, 0
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# We suppose that the atmos file are sorted in ascendent order with the date
|
|
106
|
+
# (this a drawback of the method) ! Must be documented !
|
|
107
|
+
# Only used for daily internanuel pet
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _check_files_containing_date(
|
|
111
|
+
files: str, date: pd.Timestamp, date_pattern: str
|
|
112
|
+
) -> int:
|
|
113
|
+
date_string = date.strftime(date_pattern)
|
|
114
|
+
ind = -1
|
|
115
|
+
for i, f in enumerate(files):
|
|
116
|
+
# print(i,date_string,os.path.basename(f))
|
|
117
|
+
if date_string in os.path.basename(f):
|
|
118
|
+
ind = i
|
|
119
|
+
|
|
120
|
+
return ind
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# Test if files are sorted according the date
|
|
124
|
+
# Test if the model time step fit with the data
|
|
125
|
+
# Create a n samples of niter contigous date
|
|
126
|
+
# return True or false
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _sample_delta_time_in_file(files, date_pattern, dt):
|
|
130
|
+
d_pattern, occurence = _split_date_occurence(date_pattern)
|
|
131
|
+
regex_date = _build_date_regex_pattern(d_pattern)
|
|
132
|
+
|
|
133
|
+
if len(files) <= 2:
|
|
134
|
+
return False
|
|
135
|
+
|
|
136
|
+
nsample = 5
|
|
137
|
+
niter = 100
|
|
138
|
+
list_delta_time = []
|
|
139
|
+
list_date_block = []
|
|
140
|
+
|
|
141
|
+
for i in range(0, len(files), max(int(len(files) / nsample), niter)):
|
|
142
|
+
|
|
143
|
+
for j in range(min(niter, len(files) - i - 1)):
|
|
144
|
+
|
|
145
|
+
re_match_0 = re.findall(regex_date, os.path.basename(files[i + j]))
|
|
146
|
+
re_match_1 = re.findall(regex_date, os.path.basename(files[i + j + 1]))
|
|
147
|
+
|
|
148
|
+
if len(re_match_0) > 0 and len(re_match_1) > 0:
|
|
149
|
+
date_match_0 = pd.to_datetime(re_match_0[occurence], format=d_pattern)
|
|
150
|
+
date_match_1 = pd.to_datetime(re_match_1[occurence], format=d_pattern)
|
|
151
|
+
delta_time = date_match_1 - date_match_0
|
|
152
|
+
list_delta_time.append(delta_time.total_seconds())
|
|
153
|
+
|
|
154
|
+
if j == 0:
|
|
155
|
+
list_date_block.append(date_match_0)
|
|
156
|
+
|
|
157
|
+
if delta_time.total_seconds() < 0:
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
# Test if all sample date block are in ascendent order
|
|
161
|
+
if len(list_date_block) > 1:
|
|
162
|
+
for i in range(len(list_date_block) - 1):
|
|
163
|
+
delta_time = list_date_block[i + 1] - list_date_block[i]
|
|
164
|
+
|
|
165
|
+
if delta_time.total_seconds() < 0.0:
|
|
166
|
+
return False
|
|
167
|
+
|
|
168
|
+
if len(list_delta_time) > 0:
|
|
169
|
+
if np.min(np.array(list_delta_time)) != dt:
|
|
170
|
+
# sorted but not good time-step
|
|
171
|
+
raise ValueError(
|
|
172
|
+
"Precipitation files are sorted with the date pattern but not at the good time-step:"
|
|
173
|
+
f" prcp time-step={list_delta_time[0]}, model time-step={dt}"
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Finally test if all date are in ascendent order
|
|
177
|
+
if np.all(np.array(list_delta_time) > 0):
|
|
178
|
+
return True
|
|
179
|
+
else:
|
|
180
|
+
return False
|
|
181
|
+
|
|
182
|
+
else:
|
|
183
|
+
raise ValueError(
|
|
184
|
+
"Date Pattern was not found in the list of the precipitation files"
|
|
185
|
+
f" while searching pattern={d_pattern} with corresponding regex={regex_date}"
|
|
186
|
+
f" In filename {files[0]},"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# Search algorithm
|
|
191
|
+
# Search for a pattern in a sorted list of files
|
|
192
|
+
# return the first position after or equal at date in files
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _fast_index_search_for_date(files, date, date_pattern):
|
|
196
|
+
# print("search for",date)
|
|
197
|
+
d_pattern, occurence = _split_date_occurence(date_pattern)
|
|
198
|
+
regex_date = _build_date_regex_pattern(d_pattern)
|
|
199
|
+
|
|
200
|
+
re_match = re.findall(regex_date, os.path.basename(files[-1]))
|
|
201
|
+
if len(re_match) > 0:
|
|
202
|
+
date_match = pd.to_datetime(re_match[occurence], format=d_pattern)
|
|
203
|
+
|
|
204
|
+
# cas1
|
|
205
|
+
if date > date_match:
|
|
206
|
+
return len(files)
|
|
207
|
+
|
|
208
|
+
else:
|
|
209
|
+
raise ValueError(
|
|
210
|
+
f"Date formatter {d_pattern} corresponding to regex {regex_date}"
|
|
211
|
+
" not found in filename {os.path.basename(files[i])}"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
re_match = re.findall(regex_date, os.path.basename(files[0]))
|
|
215
|
+
if len(re_match) > 0:
|
|
216
|
+
date_match = pd.to_datetime(re_match[occurence], format=d_pattern)
|
|
217
|
+
|
|
218
|
+
# cas2
|
|
219
|
+
if date < date_match:
|
|
220
|
+
return 0
|
|
221
|
+
|
|
222
|
+
else:
|
|
223
|
+
raise ValueError(
|
|
224
|
+
f"Date formatter {d_pattern} corresponding to regex {regex_date}"
|
|
225
|
+
" not found in filename {os.path.basename(files[i])}"
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# cas3
|
|
229
|
+
pos = 0
|
|
230
|
+
final_pos = 0
|
|
231
|
+
move = 1
|
|
232
|
+
previous_move = 0
|
|
233
|
+
step = len(files) - 1
|
|
234
|
+
nb_iter = 0
|
|
235
|
+
while nb_iter < 100:
|
|
236
|
+
re_match = re.findall(regex_date, os.path.basename(files[pos]))
|
|
237
|
+
date_match = pd.to_datetime(re_match[occurence], format=d_pattern)
|
|
238
|
+
|
|
239
|
+
# print(
|
|
240
|
+
# "At iteration "
|
|
241
|
+
# + str(nb_iter)
|
|
242
|
+
# +f" searching {date},"
|
|
243
|
+
# + "and match "
|
|
244
|
+
# + date_match.strftime("%Y-%m-%d %H:%M:%S")
|
|
245
|
+
# + f" at pos {pos}"
|
|
246
|
+
# )
|
|
247
|
+
|
|
248
|
+
if date_match < date:
|
|
249
|
+
pos = min(pos + step, len(files) - 1)
|
|
250
|
+
move = 1
|
|
251
|
+
elif date_match > date:
|
|
252
|
+
pos = max(pos - step, 0)
|
|
253
|
+
move = -1
|
|
254
|
+
elif date_match == date:
|
|
255
|
+
final_pos = pos
|
|
256
|
+
break
|
|
257
|
+
|
|
258
|
+
if step == 0:
|
|
259
|
+
if date_match < date:
|
|
260
|
+
final_pos = pos + 1
|
|
261
|
+
break
|
|
262
|
+
elif date_match > date:
|
|
263
|
+
final_pos = pos
|
|
264
|
+
break
|
|
265
|
+
|
|
266
|
+
# print(f"-> Move to pos {pos} with step {step}")
|
|
267
|
+
|
|
268
|
+
# reduce step only if we change search direction
|
|
269
|
+
if previous_move != move:
|
|
270
|
+
step = int(step / 2)
|
|
271
|
+
|
|
272
|
+
nb_iter = nb_iter + 1
|
|
273
|
+
previous_move = move
|
|
274
|
+
|
|
275
|
+
# print(
|
|
276
|
+
# "At last iteration "
|
|
277
|
+
# + str(nb_iter)
|
|
278
|
+
# +f" searching {date},"
|
|
279
|
+
# + "and match "
|
|
280
|
+
# + date_match.strftime("%Y-%m-%d %H:%M:%S")
|
|
281
|
+
# + f" at pos {pos}"
|
|
282
|
+
# )
|
|
283
|
+
|
|
284
|
+
# iter>maxlimit:
|
|
285
|
+
if date_match < date:
|
|
286
|
+
final_pos = pos
|
|
287
|
+
elif date_match > date:
|
|
288
|
+
# back to previous pos
|
|
289
|
+
final_pos = pos - 2 * step
|
|
290
|
+
|
|
291
|
+
return final_pos
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
# Getting only files for the specified daterange
|
|
295
|
+
# work even files are not sorted according the date pattern
|
|
296
|
+
def _get_files_list_for_date_range(files, date_pattern, date_range):
|
|
297
|
+
d_pattern, occurence = _split_date_occurence(date_pattern)
|
|
298
|
+
regex_date = _build_date_regex_pattern(d_pattern)
|
|
299
|
+
|
|
300
|
+
vec_date = []
|
|
301
|
+
if len(files) > 0:
|
|
302
|
+
for i, f in enumerate(files):
|
|
303
|
+
re_match = re.findall(regex_date, os.path.basename(f))
|
|
304
|
+
|
|
305
|
+
if len(re_match) > 0:
|
|
306
|
+
vec_date.append(re_match[occurence])
|
|
307
|
+
|
|
308
|
+
vec_date = pd.to_datetime(vec_date, format=d_pattern)
|
|
309
|
+
vec_date = vec_date.strftime("%Y%m%d%H%M%S")
|
|
310
|
+
|
|
311
|
+
# convert to numpy array
|
|
312
|
+
np_lst_date = np.array(vec_date)
|
|
313
|
+
# sort but keep indexes
|
|
314
|
+
sorted_indices = np_lst_date.argsort()
|
|
315
|
+
# sort according previous indexes
|
|
316
|
+
np_lst_date_sorted = np_lst_date[sorted_indices]
|
|
317
|
+
|
|
318
|
+
np_date_range = np.array(date_range.strftime("%Y%m%d%H%M%S").to_list())
|
|
319
|
+
|
|
320
|
+
# build the list of index only for the daterange
|
|
321
|
+
index_list_for_daterange = []
|
|
322
|
+
for i in range(len(np_date_range)):
|
|
323
|
+
pos = np.where(np_lst_date_sorted == np_date_range[i])
|
|
324
|
+
if len(pos[0]) > 0:
|
|
325
|
+
index_list_for_daterange.append(pos[0][0])
|
|
326
|
+
else:
|
|
327
|
+
index_list_for_daterange.append(-1)
|
|
328
|
+
|
|
329
|
+
# find the final list of files for daterange (sorted)
|
|
330
|
+
final_list_files = []
|
|
331
|
+
for index in index_list_for_daterange:
|
|
332
|
+
if index >= 0:
|
|
333
|
+
final_list_files.append(files[sorted_indices[index]])
|
|
334
|
+
else:
|
|
335
|
+
final_list_files.append("miss")
|
|
336
|
+
|
|
337
|
+
return final_list_files
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# Getting only files for the specified daterange
|
|
341
|
+
# work even files are not sorted according the date pattern
|
|
342
|
+
# Duplicate of _get_files_list_for_date_range but with hack for leap year...
|
|
343
|
+
def _get_files_list_for_date_range_interannuel(files, date_pattern, date_range):
|
|
344
|
+
d_pattern, occurence = _split_date_occurence(date_pattern)
|
|
345
|
+
regex_date = _build_date_regex_pattern(d_pattern)
|
|
346
|
+
|
|
347
|
+
vec_date = []
|
|
348
|
+
if len(files) > 0:
|
|
349
|
+
for i, f in enumerate(files):
|
|
350
|
+
re_match = re.findall(regex_date, os.path.basename(f))
|
|
351
|
+
if len(re_match) > 0:
|
|
352
|
+
vec_date.append("1904" + re_match[occurence]) # annee bixetile
|
|
353
|
+
vec_date = pd.to_datetime(vec_date, format="%Y%m%d") # match 19040402
|
|
354
|
+
vec_date = vec_date.strftime("%Y%m%d%H%M%S")
|
|
355
|
+
|
|
356
|
+
# convert to numpy array
|
|
357
|
+
np_lst_date = np.array(vec_date)
|
|
358
|
+
# sort but keep indexes
|
|
359
|
+
sorted_indices = np_lst_date.argsort()
|
|
360
|
+
# sort according previous indexes
|
|
361
|
+
np_lst_date_sorted = np_lst_date[sorted_indices]
|
|
362
|
+
|
|
363
|
+
np_date_range = np.array(date_range.strftime("%Y%m%d%H%M%S").to_list())
|
|
364
|
+
|
|
365
|
+
# build the list of index only for the daterange
|
|
366
|
+
index_list_for_daterange = []
|
|
367
|
+
for i in range(len(np_date_range)):
|
|
368
|
+
pos = np.where(np_lst_date_sorted == np_date_range[i])
|
|
369
|
+
|
|
370
|
+
if len(pos[0]) > 0:
|
|
371
|
+
index_list_for_daterange.append(pos[0][0])
|
|
372
|
+
else:
|
|
373
|
+
index_list_for_daterange.append(-1)
|
|
374
|
+
|
|
375
|
+
# find the final list of files for daterange (sorted)
|
|
376
|
+
final_list_files = []
|
|
377
|
+
for index in index_list_for_daterange:
|
|
378
|
+
if index >= 0:
|
|
379
|
+
final_list_files.append(files[sorted_indices[index]])
|
|
380
|
+
else:
|
|
381
|
+
final_list_files.append("miss")
|
|
382
|
+
|
|
383
|
+
return final_list_files
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
# Get the list of files to read for a specific date_range
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _get_atmos_files(
|
|
390
|
+
dir: str,
|
|
391
|
+
fmt: str,
|
|
392
|
+
access: str,
|
|
393
|
+
dt: float,
|
|
394
|
+
date_range: pd.DatetimeIndex,
|
|
395
|
+
date_pattern: str,
|
|
396
|
+
daily_interannual: bool = False,
|
|
397
|
+
) -> list[str]:
|
|
398
|
+
# Set to drop duplicates after strftime
|
|
399
|
+
|
|
400
|
+
date_range_strftime_access = set(date_range.strftime(access)) if access else {""}
|
|
401
|
+
files = []
|
|
402
|
+
|
|
403
|
+
acces_try = 0
|
|
404
|
+
res = False
|
|
405
|
+
while res == False:
|
|
406
|
+
|
|
407
|
+
res = os.access(f"{dir}", os.R_OK)
|
|
408
|
+
if res:
|
|
409
|
+
for date_strftime_access in date_range_strftime_access:
|
|
410
|
+
prcp_path = Path(f"{dir}/{date_strftime_access}/")
|
|
411
|
+
files.extend(sorted(prcp_path.glob(f"**/*{fmt}")))
|
|
412
|
+
else:
|
|
413
|
+
time.sleep(0.1)
|
|
414
|
+
acces_try = acces_try + 1
|
|
415
|
+
print(
|
|
416
|
+
f"</> WARNING: {dir} is unvailable ... waiting 2s max and trying again {acces_try}/20..."
|
|
417
|
+
)
|
|
418
|
+
if acces_try > 20:
|
|
419
|
+
print(
|
|
420
|
+
f"</> WARNING !! atmos files directory {dir} is unaccesible after 20 try (2 seconds) => atmos files are missing !"
|
|
421
|
+
)
|
|
422
|
+
break
|
|
423
|
+
|
|
424
|
+
files = sorted(files)
|
|
425
|
+
|
|
426
|
+
# Return a list of files in date_range with potential missing files (366 files max)
|
|
427
|
+
if daily_interannual:
|
|
428
|
+
|
|
429
|
+
# list of daily date
|
|
430
|
+
date_range_interannual = pd.date_range(
|
|
431
|
+
start=date_range[0].strftime("%Y%m%d"),
|
|
432
|
+
end=date_range[-1].strftime("%Y%m%d"),
|
|
433
|
+
freq="1d",
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
# conversion au format inte-annuel + use year 1904 to manage leap year
|
|
437
|
+
date_range_interannual = pd.to_datetime(
|
|
438
|
+
"1904" + date_range_interannual.strftime("%m%d"), format="%Y%m%d"
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# we build the file list according the date_range (sorted): we have 1 date <=> 1file
|
|
442
|
+
final_list_files = _get_files_list_for_date_range_interannuel(
|
|
443
|
+
files, "%m%d", date_range_interannual
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
return final_list_files
|
|
447
|
+
|
|
448
|
+
# Return a list of files in date_range with potential missing files
|
|
449
|
+
else:
|
|
450
|
+
|
|
451
|
+
# Only here with model time-step
|
|
452
|
+
final_list_files = []
|
|
453
|
+
if len(files) == 0:
|
|
454
|
+
for i in range(len(date_range)):
|
|
455
|
+
final_list_files.append("miss")
|
|
456
|
+
return final_list_files
|
|
457
|
+
|
|
458
|
+
# Check if file are sorted with the date pattern (for long list of files)
|
|
459
|
+
if len(files) >= 100:
|
|
460
|
+
# ensure files are alphabetically sorted (glob does not sort list depend on the file system)
|
|
461
|
+
# this operation can be very long ?
|
|
462
|
+
# files.sort()
|
|
463
|
+
test = _sample_delta_time_in_file(files, date_pattern, dt)
|
|
464
|
+
|
|
465
|
+
# if sorted we quickly search index of date_start and date_end
|
|
466
|
+
# if not sorted we parse all files et keep the dates
|
|
467
|
+
if test:
|
|
468
|
+
# file seems sorted
|
|
469
|
+
pos_0 = _fast_index_search_for_date(files, date_range[0], date_pattern)
|
|
470
|
+
pos_1 = _fast_index_search_for_date(files, date_range[-1], date_pattern)
|
|
471
|
+
|
|
472
|
+
files = files[pos_0 : pos_1 + 1]
|
|
473
|
+
else:
|
|
474
|
+
print(
|
|
475
|
+
"Warnings, atmospheric filename are not sorted with date."
|
|
476
|
+
" Reading atmospheric data may take more time."
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
# we build the file list according the date_range (sorted): we have 1 date <=> 1file
|
|
480
|
+
final_list_files = _get_files_list_for_date_range(files, date_pattern, date_range)
|
|
481
|
+
|
|
482
|
+
return final_list_files
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
# This function seems to work better but require more tests
|
|
486
|
+
def _read_windowed_raster_new(
|
|
487
|
+
path,
|
|
488
|
+
mesh: dict | MeshDT | None = None,
|
|
489
|
+
transform: dict = None,
|
|
490
|
+
order: int = 0,
|
|
491
|
+
cval=-99.0,
|
|
492
|
+
grid_mode=True,
|
|
493
|
+
) -> tuple[np.ndarray, dict[str, int]]:
|
|
494
|
+
"""
|
|
495
|
+
|
|
496
|
+
Description
|
|
497
|
+
-----------
|
|
498
|
+
|
|
499
|
+
Read a part of a raster and resample it if required.
|
|
500
|
+
|
|
501
|
+
Paramerters
|
|
502
|
+
-----------
|
|
503
|
+
|
|
504
|
+
path: str()
|
|
505
|
+
Input geotif path
|
|
506
|
+
|
|
507
|
+
mesh: dict() or MeshDT
|
|
508
|
+
A smash mesh
|
|
509
|
+
|
|
510
|
+
transform: dict()
|
|
511
|
+
transform dictionnary from rasterio.get_transform() method. (xmin,ymax,xres,yres). May be to replace the mesh
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
resampling_method: str()
|
|
515
|
+
scipy-zoom or numpy use scipy zoom function or numpy array functions
|
|
516
|
+
|
|
517
|
+
order: int()
|
|
518
|
+
order of the resampling cubic interpolation
|
|
519
|
+
|
|
520
|
+
cval: float() | np.nan
|
|
521
|
+
fill value for the extended boundaries
|
|
522
|
+
|
|
523
|
+
grid_mode: bool()
|
|
524
|
+
True | False. if True coordinate start from the edge of the cell. If False coordinate starts from the center of the cell.
|
|
525
|
+
|
|
526
|
+
Return
|
|
527
|
+
------
|
|
528
|
+
|
|
529
|
+
numpy.array()
|
|
530
|
+
Cropped and resampled array according bbox_out and res_out
|
|
531
|
+
|
|
532
|
+
"""
|
|
533
|
+
|
|
534
|
+
if transform is not None:
|
|
535
|
+
mesh_dict = transform
|
|
536
|
+
else:
|
|
537
|
+
if not isinstance(mesh, dict):
|
|
538
|
+
mesh_dict = tools.read_object_as_dict(mesh)
|
|
539
|
+
else:
|
|
540
|
+
mesh_dict = mesh
|
|
541
|
+
|
|
542
|
+
ds_in = rasterio.open(path)
|
|
543
|
+
matrix = ds_in.read()
|
|
544
|
+
|
|
545
|
+
bbox_in = ds_in.bounds._asdict()
|
|
546
|
+
res_in = {"dx": ds_in.res[0], "dy": ds_in.res[1]}
|
|
547
|
+
|
|
548
|
+
bbox_out = geo_toolbox.get_bbox_from_smash_mesh(mesh_dict)
|
|
549
|
+
res_out = {"dx": mesh_dict["xres"], "dy": mesh_dict["yres"]}
|
|
550
|
+
|
|
551
|
+
cropped_array = geo_toolbox.crop_array(
|
|
552
|
+
matrix[0, :, :], bbox_in, res_in, bbox_out, res_out, order=order
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
warning = {"res": 0, "overlap": 0, "outofbound": 0}
|
|
556
|
+
|
|
557
|
+
if res_in["dx"] != res_out["dx"] or res_in["dy"] != res_out["dy"]:
|
|
558
|
+
warning["res"] = 1
|
|
559
|
+
|
|
560
|
+
return cropped_array, warning
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
def _rasterio_read_windowed_raster(path="", mesh: dict = None):
|
|
564
|
+
"""
|
|
565
|
+
Description
|
|
566
|
+
-----------
|
|
567
|
+
|
|
568
|
+
Read a part of a raster with rasterio and resample it if required.
|
|
569
|
+
|
|
570
|
+
Paramerters
|
|
571
|
+
-----------
|
|
572
|
+
|
|
573
|
+
path: str()
|
|
574
|
+
Input geotif path
|
|
575
|
+
|
|
576
|
+
mesh: dict() or MeshDT
|
|
577
|
+
A smash mesh
|
|
578
|
+
|
|
579
|
+
Return
|
|
580
|
+
------
|
|
581
|
+
|
|
582
|
+
numpy.array()
|
|
583
|
+
Cropped and resampled array according bbox_out and res_out
|
|
584
|
+
|
|
585
|
+
"""
|
|
586
|
+
|
|
587
|
+
ds_in = rasterio.open(path)
|
|
588
|
+
|
|
589
|
+
col_off = max(mesh["xmin"] - ds_in.bounds.left, 0) / ds_in.res[0]
|
|
590
|
+
row_off = max(ds_in.bounds.top - mesh["ymax"], 0) / ds_in.res[1]
|
|
591
|
+
ncols = (mesh["ncol"] * mesh["xres"]) / ds_in.res[0]
|
|
592
|
+
nrows = (mesh["nrow"] * mesh["yres"]) / ds_in.res[1]
|
|
593
|
+
|
|
594
|
+
window = rasterio.windows.Window(
|
|
595
|
+
round(col_off), round(row_off), int(ncols), int(nrows)
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
matrix = ds_in.read(
|
|
599
|
+
window=window,
|
|
600
|
+
out_shape=(
|
|
601
|
+
ds_in.count,
|
|
602
|
+
mesh["nrow"],
|
|
603
|
+
mesh["ncol"],
|
|
604
|
+
),
|
|
605
|
+
resampling=rasterio.enums.Resampling.nearest,
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
return matrix
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
# this function has a bug. It shift the grid and fill with nodata value the borders if shifted.
|
|
612
|
+
def _read_windowed_raster(
|
|
613
|
+
path, mesh: MeshDT | None = None, transform: dict = None
|
|
614
|
+
) -> tuple[np.ndarray, dict[str, int]]:
|
|
615
|
+
warning = {"res": 0, "overlap": 0, "outofbound": 0}
|
|
616
|
+
|
|
617
|
+
if transform is not None:
|
|
618
|
+
mesh = transform
|
|
619
|
+
|
|
620
|
+
# % Get raster information
|
|
621
|
+
with rasterio.open(path) as ds:
|
|
622
|
+
transform = ds.get_transform()
|
|
623
|
+
xmin = transform[0]
|
|
624
|
+
ymax = transform[3]
|
|
625
|
+
xres = transform[1]
|
|
626
|
+
yres = -transform[5]
|
|
627
|
+
|
|
628
|
+
# % Manage absolute tolerance wrt to xres or yres value
|
|
629
|
+
atol = 1e-2
|
|
630
|
+
xatol = atol * 10 ** min(0, np.floor(np.log10(np.abs(xres))))
|
|
631
|
+
yatol = atol * 10 ** min(0, np.floor(np.log10(np.abs(yres))))
|
|
632
|
+
|
|
633
|
+
# % Resolution missmatch
|
|
634
|
+
if not np.isclose(mesh.xres, xres, atol=xatol) or not np.isclose(
|
|
635
|
+
mesh.yres, yres, atol=yatol
|
|
636
|
+
):
|
|
637
|
+
warning["res"] = 1
|
|
638
|
+
|
|
639
|
+
# % Overlap missmatch
|
|
640
|
+
dxmin = mesh.xmin - xmin
|
|
641
|
+
dymax = ymax - mesh.ymax
|
|
642
|
+
xol_match = np.abs(dxmin / xres - np.round(dxmin / xres))
|
|
643
|
+
yol_match = np.abs(dymax / yres - np.round(dymax / yres))
|
|
644
|
+
if not np.isclose(xol_match, 0, atol=xatol) or not np.isclose(
|
|
645
|
+
yol_match, 0, atol=yatol
|
|
646
|
+
):
|
|
647
|
+
warning["overlap"] = 1
|
|
648
|
+
|
|
649
|
+
# # % Allocate buffer
|
|
650
|
+
arr = np.zeros(shape=(mesh.nrow, mesh.ncol), dtype=np.float32)
|
|
651
|
+
arr.fill(np.float32(-99))
|
|
652
|
+
|
|
653
|
+
# % Pad offset to the nearest integer
|
|
654
|
+
xoff = np.rint((mesh.xmin - xmin) / xres)
|
|
655
|
+
yoff = np.rint((ymax - mesh.ymax) / yres)
|
|
656
|
+
|
|
657
|
+
# % Resolution ratio
|
|
658
|
+
xres_ratio = mesh.xres / xres
|
|
659
|
+
yres_ratio = mesh.yres / yres
|
|
660
|
+
|
|
661
|
+
# Reading window
|
|
662
|
+
win_xsize = np.rint(mesh.ncol * xres_ratio)
|
|
663
|
+
win_ysize = np.rint(mesh.nrow * yres_ratio)
|
|
664
|
+
|
|
665
|
+
# % Totally out of bound
|
|
666
|
+
# % Return the buffer with no data
|
|
667
|
+
if (
|
|
668
|
+
xoff >= ds.width
|
|
669
|
+
or yoff >= ds.height
|
|
670
|
+
or xoff + win_xsize <= 0
|
|
671
|
+
or yoff + win_ysize <= 0
|
|
672
|
+
):
|
|
673
|
+
warning["outofbound"] = 2
|
|
674
|
+
return arr, warning
|
|
675
|
+
|
|
676
|
+
# % Partially out of bound
|
|
677
|
+
if (
|
|
678
|
+
xoff < 0
|
|
679
|
+
or yoff < 0
|
|
680
|
+
or xoff + win_xsize > ds.width
|
|
681
|
+
or yoff + win_ysize > ds.height
|
|
682
|
+
):
|
|
683
|
+
warning["outofbound"] = 1
|
|
684
|
+
|
|
685
|
+
# % Readjust offset
|
|
686
|
+
pxoff = max(0, xoff)
|
|
687
|
+
pyoff = max(0, yoff)
|
|
688
|
+
|
|
689
|
+
# % Readjust reading window
|
|
690
|
+
pwin_xsize = min(win_xsize + min(0, xoff), ds.width - pxoff)
|
|
691
|
+
pwin_ysize = min(win_ysize + min(0, yoff), ds.height - pyoff)
|
|
692
|
+
|
|
693
|
+
# % Buffer slices
|
|
694
|
+
xs = pxoff - xoff
|
|
695
|
+
ys = pyoff - yoff
|
|
696
|
+
xe = xs + pwin_xsize
|
|
697
|
+
ye = ys + pwin_ysize
|
|
698
|
+
arr_xslice = slice(int(xs * 1 / xres_ratio), int(xe * 1 / xres_ratio))
|
|
699
|
+
arr_yslice = slice(int(ys * 1 / yres_ratio), int(ye * 1 / yres_ratio))
|
|
700
|
+
|
|
701
|
+
# % Reading and writting into buffer
|
|
702
|
+
ds.read(
|
|
703
|
+
1,
|
|
704
|
+
window=rasterio.windows.Window(pxoff, pyoff, pwin_xsize, pwin_ysize),
|
|
705
|
+
out=arr[arr_yslice, arr_xslice],
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
# % Manage NoData
|
|
709
|
+
nodata = ds.nodata
|
|
710
|
+
if nodata is not None:
|
|
711
|
+
arr = np.where(arr == nodata, -99.0, arr)
|
|
712
|
+
|
|
713
|
+
return arr, warning
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def _get_reading_warning_message(reading_warning: dict[str, int | list[str]]) -> str:
|
|
717
|
+
msg = []
|
|
718
|
+
|
|
719
|
+
if reading_warning["got"]:
|
|
720
|
+
if len(reading_warning["got"]) > 1:
|
|
721
|
+
msg.append(
|
|
722
|
+
f"Reading sucess: read {len(reading_warning['got'])} file(s): "
|
|
723
|
+
f"{reading_warning['got'][0]}, ..., {reading_warning['got'][0-1]}"
|
|
724
|
+
)
|
|
725
|
+
else:
|
|
726
|
+
msg.append(
|
|
727
|
+
f"Reading sucess: read {len(reading_warning['got'])} file(s): "
|
|
728
|
+
f"{reading_warning['got'][0]}"
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
if reading_warning["miss"]:
|
|
732
|
+
msg.append(
|
|
733
|
+
f"Missing warning: missing {len(reading_warning['miss'])} file(s): "
|
|
734
|
+
f"{reading_warning['miss']}"
|
|
735
|
+
)
|
|
736
|
+
|
|
737
|
+
if reading_warning["res"]:
|
|
738
|
+
msg.append(
|
|
739
|
+
"Resolution warning: resolution missmatch between mesh and at least one raster file. Nearest "
|
|
740
|
+
"neighbour resampling algorithm has been used to match mesh resolution"
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
if reading_warning["overlap"]:
|
|
744
|
+
msg.append(
|
|
745
|
+
"Overlap warning: overlap missmatch between mesh and at least one raster file. Cropping domain "
|
|
746
|
+
"has been updated to the nearest overlapping cell"
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
if reading_warning["outofbound"]:
|
|
750
|
+
kind = "partially" if reading_warning["outofbound"] == 1 else "totally"
|
|
751
|
+
msg.append(
|
|
752
|
+
f"Out of bound warning: mesh is {kind} out of bound for at least one raster file. Out of bound "
|
|
753
|
+
f"domain has been filled in with no data"
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
return "\n".join(msg)
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
# TODO: Unique fun to read prcp, snow and temp
|
|
760
|
+
def _read_atmos_data(model, atmos_data, directories):
|
|
761
|
+
|
|
762
|
+
date_range = pd.date_range(
|
|
763
|
+
start=model.setup.start_time,
|
|
764
|
+
end=model.setup.end_time,
|
|
765
|
+
freq=f"{int(model.setup.dt)}s",
|
|
766
|
+
)[1:]
|
|
767
|
+
|
|
768
|
+
reading_warning = {"got": [], "miss": [], "res": 0, "overlap": 0, "outofbound": 0}
|
|
769
|
+
|
|
770
|
+
# atmos_data_directory = getattr(model.setup, f"{atmos_data}_directory")
|
|
771
|
+
atmos_data_format = getattr(model.setup, f"{atmos_data}_format")
|
|
772
|
+
atmos_date_pattern = getattr(model.setup, f"{atmos_data}_date_pattern")
|
|
773
|
+
prcp_access = getattr(model.setup, "prcp_access")
|
|
774
|
+
prcp_conversion_factor = getattr(model.setup, "prcp_conversion_factor")
|
|
775
|
+
sparse_storage = getattr(model.setup, "sparse_storage")
|
|
776
|
+
dt = getattr(model.setup, "dt")
|
|
777
|
+
|
|
778
|
+
sparse_matrix = None
|
|
779
|
+
std_storage = None
|
|
780
|
+
if sparse_storage:
|
|
781
|
+
sparse_matrix = getattr(model._input_data.atmos_data, f"sparse_{atmos_data}")
|
|
782
|
+
else:
|
|
783
|
+
std_storage = getattr(model._input_data.atmos_data, f"{atmos_data}")
|
|
784
|
+
|
|
785
|
+
if atmos_data_format == "tif":
|
|
786
|
+
|
|
787
|
+
file_list = []
|
|
788
|
+
keys = sorted(directories.keys())
|
|
789
|
+
for key in keys:
|
|
790
|
+
|
|
791
|
+
# atmos_data_directory = directories[i]
|
|
792
|
+
atmos_data_directory = directories[key]
|
|
793
|
+
files = _get_atmos_files(
|
|
794
|
+
atmos_data_directory,
|
|
795
|
+
atmos_data_format,
|
|
796
|
+
prcp_access,
|
|
797
|
+
dt,
|
|
798
|
+
date_range,
|
|
799
|
+
atmos_date_pattern,
|
|
800
|
+
)
|
|
801
|
+
|
|
802
|
+
if len(file_list) == 0:
|
|
803
|
+
file_list = files
|
|
804
|
+
|
|
805
|
+
lacuna = []
|
|
806
|
+
for k, f in enumerate(files):
|
|
807
|
+
if f == "miss":
|
|
808
|
+
lacuna.append(k)
|
|
809
|
+
else:
|
|
810
|
+
|
|
811
|
+
for k in lacuna:
|
|
812
|
+
file_list[k] = files[k]
|
|
813
|
+
|
|
814
|
+
lacuna = []
|
|
815
|
+
for k, f in enumerate(files):
|
|
816
|
+
if f == "miss":
|
|
817
|
+
lacuna.append(k)
|
|
818
|
+
|
|
819
|
+
if len(lacuna) == 0:
|
|
820
|
+
break
|
|
821
|
+
|
|
822
|
+
for i, date in enumerate(tqdm.tqdm(date_range, desc=f"</> Reading {atmos_data}")):
|
|
823
|
+
atmos_file = file_list[i]
|
|
824
|
+
if atmos_file == "miss":
|
|
825
|
+
reading_warning["miss"].append(
|
|
826
|
+
date.strftime("%Y-%m-%d %H:%M")
|
|
827
|
+
+ f", (matching pattern {date.strftime(atmos_date_pattern)})"
|
|
828
|
+
)
|
|
829
|
+
if sparse_storage:
|
|
830
|
+
matrix = np.zeros(
|
|
831
|
+
shape=(model.mesh.nrow, model.mesh.ncol),
|
|
832
|
+
dtype=np.float32,
|
|
833
|
+
order="F",
|
|
834
|
+
)
|
|
835
|
+
matrix.fill(np.float32(-99))
|
|
836
|
+
smash.fcore._mwd_sparse_matrix_manipulation.matrix_to_sparse_matrix(
|
|
837
|
+
model.mesh,
|
|
838
|
+
matrix,
|
|
839
|
+
np.float32(-99),
|
|
840
|
+
sparse_matrix[i],
|
|
841
|
+
)
|
|
842
|
+
|
|
843
|
+
else:
|
|
844
|
+
std_storage[..., i] = np.float32(-99)
|
|
845
|
+
|
|
846
|
+
else:
|
|
847
|
+
reading_warning["got"].append(
|
|
848
|
+
date.strftime("%Y-%m-%d %H:%M") + f" ({os.path.basename(atmos_file)})"
|
|
849
|
+
)
|
|
850
|
+
|
|
851
|
+
matrix, warning = _read_windowed_raster_new(atmos_file, model.mesh)
|
|
852
|
+
matrix *= prcp_conversion_factor
|
|
853
|
+
reading_warning.update(
|
|
854
|
+
{k: v for k, v in warning.items() if not reading_warning[k]}
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
if sparse_storage:
|
|
858
|
+
smash.fcore._mwd_sparse_matrix_manipulation.matrix_to_sparse_matrix(
|
|
859
|
+
model.mesh,
|
|
860
|
+
matrix,
|
|
861
|
+
np.float32(0),
|
|
862
|
+
sparse_matrix[i],
|
|
863
|
+
)
|
|
864
|
+
|
|
865
|
+
else:
|
|
866
|
+
std_storage[..., i] = matrix
|
|
867
|
+
|
|
868
|
+
# files = files[1:]
|
|
869
|
+
|
|
870
|
+
if sparse_matrix:
|
|
871
|
+
setattr(
|
|
872
|
+
model._input_data.atmos_data, f"sparse_{atmos_data}", sparse_matrix
|
|
873
|
+
)
|
|
874
|
+
else:
|
|
875
|
+
setattr(model._input_data.atmos_data, f"{atmos_data}", std_storage)
|
|
876
|
+
|
|
877
|
+
# % WIP
|
|
878
|
+
elif atmos_data_format == "nc":
|
|
879
|
+
raise NotImplementedError("NetCDF format not implemented yet")
|
|
880
|
+
|
|
881
|
+
msg = _get_reading_warning_message(reading_warning)
|
|
882
|
+
|
|
883
|
+
if msg:
|
|
884
|
+
warnings.warn(f"Warning(s) linked to {atmos_data} reading.\n{msg}", stacklevel=2)
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
def _read_interannual_pet(model, directories, tz="UTC"):
|
|
888
|
+
date_range = pd.date_range(
|
|
889
|
+
start=model.setup.start_time,
|
|
890
|
+
end=model.setup.end_time,
|
|
891
|
+
freq=f"{int(model.setup.dt)}s",
|
|
892
|
+
)[1:]
|
|
893
|
+
|
|
894
|
+
reading_warning = {"got": [], "miss": [], "res": 0, "overlap": 0, "outofbound": 0}
|
|
895
|
+
|
|
896
|
+
if model.setup.pet_format == "tif":
|
|
897
|
+
|
|
898
|
+
file_list = []
|
|
899
|
+
keys = sorted(directories.keys())
|
|
900
|
+
for key in keys:
|
|
901
|
+
|
|
902
|
+
atmos_data_directory = directories[key]
|
|
903
|
+
|
|
904
|
+
files = _get_atmos_files(
|
|
905
|
+
atmos_data_directory,
|
|
906
|
+
model.setup.pet_format,
|
|
907
|
+
model.setup.pet_access,
|
|
908
|
+
model.setup.dt,
|
|
909
|
+
date_range,
|
|
910
|
+
model.setup.pet_date_pattern,
|
|
911
|
+
model.setup.daily_interannual_pet,
|
|
912
|
+
)
|
|
913
|
+
|
|
914
|
+
if len(file_list) == 0:
|
|
915
|
+
file_list = files
|
|
916
|
+
|
|
917
|
+
lacuna = []
|
|
918
|
+
for k, f in enumerate(files):
|
|
919
|
+
if f == "miss":
|
|
920
|
+
lacuna.append(k)
|
|
921
|
+
else:
|
|
922
|
+
|
|
923
|
+
for k in lacuna:
|
|
924
|
+
file_list[k] = files[k]
|
|
925
|
+
|
|
926
|
+
lacuna = []
|
|
927
|
+
for k, f in enumerate(files):
|
|
928
|
+
if f == "miss":
|
|
929
|
+
lacuna.append(k)
|
|
930
|
+
|
|
931
|
+
if len(lacuna) == 0:
|
|
932
|
+
break
|
|
933
|
+
|
|
934
|
+
if model.setup.daily_interannual_pet:
|
|
935
|
+
leap_year_days = pd.date_range(
|
|
936
|
+
start="202001010000", end="202012310000", freq="1D"
|
|
937
|
+
)
|
|
938
|
+
step_offset = int(
|
|
939
|
+
(date_range[0] - date_range[0].floor("D")).total_seconds()
|
|
940
|
+
/ model.setup.dt
|
|
941
|
+
)
|
|
942
|
+
nstep_per_day = int(86_400 / model.setup.dt)
|
|
943
|
+
hourly_ratio = 3_600 / model.setup.dt
|
|
944
|
+
|
|
945
|
+
ratio_shift = tz_diff(
|
|
946
|
+
"2020/01/01 12:00", pytz.timezone("UTC"), pytz.timezone(tz)
|
|
947
|
+
)
|
|
948
|
+
RATIO_PET_HOURLY_SHIFTED = np.roll(RATIO_PET_HOURLY, int(-1 * ratio_shift))
|
|
949
|
+
|
|
950
|
+
if hourly_ratio >= 1:
|
|
951
|
+
ratio = np.repeat(RATIO_PET_HOURLY_SHIFTED, hourly_ratio) / hourly_ratio
|
|
952
|
+
|
|
953
|
+
else:
|
|
954
|
+
ratio = np.sum(
|
|
955
|
+
RATIO_PET_HOURLY_SHIFTED.reshape(-1, int(1 / hourly_ratio)), axis=1
|
|
956
|
+
)
|
|
957
|
+
|
|
958
|
+
matrix_dip = np.zeros(
|
|
959
|
+
shape=(model.mesh.nrow, model.mesh.ncol, len(leap_year_days)),
|
|
960
|
+
dtype=np.float32,
|
|
961
|
+
)
|
|
962
|
+
missing_day = np.empty(shape=0, dtype=np.int32)
|
|
963
|
+
|
|
964
|
+
list_date_in_daterange = date_range.strftime("%m%d")
|
|
965
|
+
list_date_in_leap_year_days = leap_year_days.strftime("%m%d")
|
|
966
|
+
|
|
967
|
+
for i, date in enumerate(
|
|
968
|
+
tqdm.tqdm(leap_year_days, desc="</> Reading daily interannual pet")
|
|
969
|
+
):
|
|
970
|
+
if list_date_in_leap_year_days[i] in list_date_in_daterange:
|
|
971
|
+
ind = _check_files_containing_date(file_list, date, "%m%d")
|
|
972
|
+
|
|
973
|
+
ind_date = list(list_date_in_daterange).index(
|
|
974
|
+
list_date_in_leap_year_days[i]
|
|
975
|
+
)
|
|
976
|
+
true_date = date_range[ind_date]
|
|
977
|
+
|
|
978
|
+
if ind == -1:
|
|
979
|
+
reading_warning["miss"].append(
|
|
980
|
+
date_range[ind_date].strftime("%Y-%m-%d")
|
|
981
|
+
+ f", (matching pattern {true_date.strftime('%m%d')})"
|
|
982
|
+
)
|
|
983
|
+
missing_day = np.append(date, true_date)
|
|
984
|
+
else:
|
|
985
|
+
reading_warning["got"].append(
|
|
986
|
+
true_date.strftime("%Y-%m-%d")
|
|
987
|
+
+ f" ({os.path.basename(file_list[ind])})"
|
|
988
|
+
)
|
|
989
|
+
matrix_dip[..., i], warning = _read_windowed_raster_new(
|
|
990
|
+
file_list[ind], model.mesh
|
|
991
|
+
)
|
|
992
|
+
matrix_dip[..., i] *= model.setup.pet_conversion_factor
|
|
993
|
+
reading_warning.update(
|
|
994
|
+
{k: v for k, v in warning.items() if not reading_warning[k]}
|
|
995
|
+
)
|
|
996
|
+
|
|
997
|
+
for i, date in enumerate(
|
|
998
|
+
tqdm.tqdm(date_range, desc="</> Disaggregating daily interannual pet")
|
|
999
|
+
):
|
|
1000
|
+
ratio_ind = (i + step_offset) % nstep_per_day
|
|
1001
|
+
|
|
1002
|
+
if date in missing_day:
|
|
1003
|
+
if model.setup.sparse_storage:
|
|
1004
|
+
matrix = np.zeros(
|
|
1005
|
+
shape=(model.mesh.nrow, model.mesh.ncol),
|
|
1006
|
+
dtype=np.float32,
|
|
1007
|
+
order="F",
|
|
1008
|
+
)
|
|
1009
|
+
matrix.fill(np.float32(-99))
|
|
1010
|
+
smash.fcore._mwd_sparse_matrix_manipulation.matrix_to_sparse_matrix(
|
|
1011
|
+
model.mesh,
|
|
1012
|
+
matrix,
|
|
1013
|
+
np.float32(-99),
|
|
1014
|
+
model._input_data.atmos_data.sparse_pet[i],
|
|
1015
|
+
)
|
|
1016
|
+
else:
|
|
1017
|
+
model._input_data.atmos_data.pet[..., i] = np.float32(-99)
|
|
1018
|
+
|
|
1019
|
+
else:
|
|
1020
|
+
index = list_date_in_leap_year_days.get_loc(list_date_in_daterange[i])
|
|
1021
|
+
matrix = matrix_dip[..., index] * ratio[ratio_ind]
|
|
1022
|
+
if model.setup.sparse_storage:
|
|
1023
|
+
smash.fcore._mwd_sparse_matrix_manipulation.matrix_to_sparse_matrix(
|
|
1024
|
+
model.mesh,
|
|
1025
|
+
matrix,
|
|
1026
|
+
np.float32(0),
|
|
1027
|
+
model._input_data.atmos_data.sparse_pet[i],
|
|
1028
|
+
)
|
|
1029
|
+
else:
|
|
1030
|
+
model._input_data.atmos_data.pet[..., i] = matrix
|
|
1031
|
+
# % WIP
|
|
1032
|
+
elif model.setup.pet_format == "nc":
|
|
1033
|
+
raise NotImplementedError("NetCDF format not implemented yet")
|
|
1034
|
+
|
|
1035
|
+
msg = _get_reading_warning_message(reading_warning)
|
|
1036
|
+
|
|
1037
|
+
if msg:
|
|
1038
|
+
warnings.warn(
|
|
1039
|
+
f"Warning(s) linked to potential evapotranspiration reading.\n{msg}",
|
|
1040
|
+
stacklevel=2,
|
|
1041
|
+
)
|
|
1042
|
+
|
|
1043
|
+
|
|
1044
|
+
def _read_qobs(model):
|
|
1045
|
+
start_time = pd.Timestamp(model.setup.start_time)
|
|
1046
|
+
end_time = pd.Timestamp(model.setup.end_time)
|
|
1047
|
+
miss = []
|
|
1048
|
+
|
|
1049
|
+
for i, c in enumerate(model.mesh.code):
|
|
1050
|
+
f = glob.glob(f"{model.setup.qobs_directory}/**/*{c}*.csv", recursive=True)
|
|
1051
|
+
|
|
1052
|
+
if f:
|
|
1053
|
+
dat = pd.read_csv(f[0])
|
|
1054
|
+
try:
|
|
1055
|
+
file_start_time = pd.Timestamp(dat.columns[0])
|
|
1056
|
+
except Exception:
|
|
1057
|
+
raise ValueError(
|
|
1058
|
+
f"Column header '{dat.columns[0]}' in the observed discharge file for catchment '{c}' "
|
|
1059
|
+
f"is not a valid date"
|
|
1060
|
+
) from None
|
|
1061
|
+
|
|
1062
|
+
file_end_time = file_start_time + pd.Timedelta(
|
|
1063
|
+
seconds=model.setup.dt * (len(dat) - 1)
|
|
1064
|
+
)
|
|
1065
|
+
start_diff = (
|
|
1066
|
+
int((start_time - file_start_time).total_seconds() / model.setup.dt) + 1
|
|
1067
|
+
)
|
|
1068
|
+
end_diff = (
|
|
1069
|
+
int((end_time - file_start_time).total_seconds() / model.setup.dt) + 1
|
|
1070
|
+
)
|
|
1071
|
+
|
|
1072
|
+
# % Check if observed discharge file contains data for corresponding simulation period
|
|
1073
|
+
if start_diff > dat.index.max() or end_diff < 0:
|
|
1074
|
+
warnings.warn(
|
|
1075
|
+
f"The provided observed discharge file for catchment '{c}' does not contain data for the "
|
|
1076
|
+
f"selected simulation period ['{start_time}', '{end_time}']. The file covers the period "
|
|
1077
|
+
f"['{file_start_time}', '{file_end_time}']",
|
|
1078
|
+
stacklevel=2,
|
|
1079
|
+
)
|
|
1080
|
+
else:
|
|
1081
|
+
ind_start_dat = max(0, start_diff)
|
|
1082
|
+
ind_end_dat = min(dat.index.max(), end_diff)
|
|
1083
|
+
ind_start_arr = max(0, -start_diff)
|
|
1084
|
+
ind_end_arr = ind_start_arr + ind_end_dat - ind_start_dat
|
|
1085
|
+
|
|
1086
|
+
model._input_data.response_data.q[i, ind_start_arr:ind_end_arr] = (
|
|
1087
|
+
dat.iloc[ind_start_dat:ind_end_dat, 0]
|
|
1088
|
+
)
|
|
1089
|
+
else:
|
|
1090
|
+
miss.append(c)
|
|
1091
|
+
|
|
1092
|
+
if miss:
|
|
1093
|
+
warnings.warn(
|
|
1094
|
+
f"Missing {len(miss)} observed discharge file(s): {miss}", stacklevel=2
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1097
|
+
|
|
1098
|
+
def serialize_setup_directories(directories):
|
|
1099
|
+
|
|
1100
|
+
if isinstance(directories, list):
|
|
1101
|
+
|
|
1102
|
+
if isinstance(directories[0], dict):
|
|
1103
|
+
dir_data = directories[0]
|
|
1104
|
+
else:
|
|
1105
|
+
raise Exception(
|
|
1106
|
+
f"Bad syntaxe for {directories} in setup. It should be a dictionnary"
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1109
|
+
elif isinstance(directories, dict):
|
|
1110
|
+
dir_data = directories
|
|
1111
|
+
|
|
1112
|
+
return dir_data
|
|
1113
|
+
|
|
1114
|
+
|
|
1115
|
+
def read_atmos_data(model, setup_options={}):
|
|
1116
|
+
|
|
1117
|
+
# update attributes
|
|
1118
|
+
for key, value in setup_options.items():
|
|
1119
|
+
if hasattr(model.setup, key):
|
|
1120
|
+
setattr(model.setup, key, value)
|
|
1121
|
+
|
|
1122
|
+
if model.setup.read_qobs:
|
|
1123
|
+
|
|
1124
|
+
_read_qobs(model)
|
|
1125
|
+
|
|
1126
|
+
# with rasterio.Env():
|
|
1127
|
+
if model.setup.read_prcp:
|
|
1128
|
+
|
|
1129
|
+
if len(model.setup.prcp_directories) > 0:
|
|
1130
|
+
dir_data = serialize_setup_directories(model.setup.prcp_directories)
|
|
1131
|
+
else:
|
|
1132
|
+
dir_data = {1: model.setup.prcp_directory}
|
|
1133
|
+
|
|
1134
|
+
_read_atmos_data(model, atmos_data="prcp", directories=dir_data)
|
|
1135
|
+
|
|
1136
|
+
if model.setup.read_pet:
|
|
1137
|
+
|
|
1138
|
+
if len(model.setup.pet_directories) > 0:
|
|
1139
|
+
dir_data = serialize_setup_directories(model.setup.pet_directories)
|
|
1140
|
+
else:
|
|
1141
|
+
dir_data = {1: model.setup.pet_directory}
|
|
1142
|
+
|
|
1143
|
+
if model.setup.daily_interannual_pet:
|
|
1144
|
+
_read_interannual_pet(model, directories=dir_data, tz=model.setup.timezone)
|
|
1145
|
+
else:
|
|
1146
|
+
_read_atmos_data(model, atmos_data="pet", directories=dir_data)
|
|
1147
|
+
|
|
1148
|
+
if model.setup.read_snow:
|
|
1149
|
+
|
|
1150
|
+
if len(model.setup.snow_directories) > 0:
|
|
1151
|
+
dir_data = serialize_setup_directories(model.setup.snow_directories)
|
|
1152
|
+
else:
|
|
1153
|
+
dir_data = {1: model.setup.snow_directory}
|
|
1154
|
+
|
|
1155
|
+
_read_atmos_data(model, atmos_data="snow", directories=dir_data)
|
|
1156
|
+
|
|
1157
|
+
if model.setup.read_temp:
|
|
1158
|
+
|
|
1159
|
+
if len(model.setup.temp_directories) > 0:
|
|
1160
|
+
dir_data = serialize_setup_directories(model.setup.temp_directories)
|
|
1161
|
+
else:
|
|
1162
|
+
dir_data = {1: model.setup.temp_directory}
|
|
1163
|
+
|
|
1164
|
+
_read_atmos_data(model, atmos_data="temp", directories=dir_data)
|
|
1165
|
+
|
|
1166
|
+
if model.setup.prcp_partitioning:
|
|
1167
|
+
|
|
1168
|
+
smash.fcore._mw_atmos_statistic.compute_prcp_partitioning(
|
|
1169
|
+
model.setup, model.mesh, model._input_data
|
|
1170
|
+
)
|
|
1171
|
+
|
|
1172
|
+
if model.setup.compute_mean_atmos:
|
|
1173
|
+
|
|
1174
|
+
print("</> Computing mean atmospheric data")
|
|
1175
|
+
smash.fcore._mw_atmos_statistic.compute_mean_atmos(
|
|
1176
|
+
model.setup, model.mesh, model._input_data
|
|
1177
|
+
)
|
|
1178
|
+
|
|
1179
|
+
|
|
1180
|
+
def write_atmos_data_aliases(path_to_alias, setup, input_data):
|
|
1181
|
+
|
|
1182
|
+
for atmos_data, directories in input_data.items():
|
|
1183
|
+
|
|
1184
|
+
atmos_data_format = getattr(setup, f"{atmos_data}_format")
|
|
1185
|
+
atmos_date_pattern = getattr(setup, f"{atmos_data}_date_pattern")
|
|
1186
|
+
prcp_access = getattr(setup, f"{atmos_data}_access")
|
|
1187
|
+
dt = getattr(setup, "dt")
|
|
1188
|
+
|
|
1189
|
+
date_range = pd.date_range(
|
|
1190
|
+
start=setup.start_time,
|
|
1191
|
+
end=setup.end_time,
|
|
1192
|
+
freq=f"{int(setup.dt)}s",
|
|
1193
|
+
)[1:]
|
|
1194
|
+
|
|
1195
|
+
array_file = None
|
|
1196
|
+
for atmos_data_directory in directories:
|
|
1197
|
+
|
|
1198
|
+
files = _get_atmos_files(
|
|
1199
|
+
atmos_data_directory,
|
|
1200
|
+
atmos_data_format,
|
|
1201
|
+
prcp_access,
|
|
1202
|
+
dt,
|
|
1203
|
+
date_range,
|
|
1204
|
+
atmos_date_pattern,
|
|
1205
|
+
)
|
|
1206
|
+
|
|
1207
|
+
if array_file is None:
|
|
1208
|
+
array_file = np.array(files, dtype=str)
|
|
1209
|
+
lacuna = np.where(array_file == "-1")
|
|
1210
|
+
else:
|
|
1211
|
+
array_file[lacuna] = np.array(files)[lacuna]
|
|
1212
|
+
lacuna = np.where(array_file == "-1")
|
|
1213
|
+
|
|
1214
|
+
if len(lacuna) == 0:
|
|
1215
|
+
break
|
|
1216
|
+
|
|
1217
|
+
if os.path.exists(os.path.join(path_to_alias, f"tmp_{atmos_data}")):
|
|
1218
|
+
shutil.rmtree(os.path.join(path_to_alias, f"tmp_{atmos_data}"))
|
|
1219
|
+
|
|
1220
|
+
if not os.path.exists(os.path.join(path_to_alias, f"tmp_{atmos_data}")):
|
|
1221
|
+
os.mkdir(os.path.join(path_to_alias, f"tmp_{atmos_data}"))
|
|
1222
|
+
|
|
1223
|
+
for i in range(len(array_file)):
|
|
1224
|
+
if array_file[i] != "-1":
|
|
1225
|
+
mytarget = array_file[i]
|
|
1226
|
+
mylink = os.path.join(
|
|
1227
|
+
path_to_alias, f"tmp_{atmos_data}", os.path.basename(array_file[i])
|
|
1228
|
+
)
|
|
1229
|
+
os.symlink(mytarget, mylink)
|