ngiab-data-preprocess 4.2.2__py3-none-any.whl → 4.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_processing/create_realization.py +39 -22
- data_processing/dask_utils.py +92 -0
- data_processing/dataset_utils.py +161 -44
- data_processing/datasets.py +18 -29
- data_processing/file_paths.py +7 -7
- data_processing/forcings.py +40 -38
- data_processing/gpkg_utils.py +13 -13
- data_processing/graph_utils.py +4 -6
- data_processing/s3fs_utils.py +1 -1
- data_processing/subset.py +39 -8
- data_sources/ngen-routing-template.yaml +1 -1
- data_sources/source_validation.py +72 -34
- map_app/__main__.py +3 -2
- map_app/static/css/main.css +14 -3
- map_app/static/js/data_processing.js +31 -55
- map_app/static/js/main.js +224 -106
- map_app/templates/index.html +10 -1
- map_app/views.py +17 -3
- ngiab_data_cli/__main__.py +32 -29
- ngiab_data_cli/arguments.py +0 -1
- ngiab_data_cli/forcing_cli.py +10 -19
- ngiab_data_preprocess-4.4.0.dist-info/METADATA +308 -0
- ngiab_data_preprocess-4.4.0.dist-info/RECORD +43 -0
- {ngiab_data_preprocess-4.2.2.dist-info → ngiab_data_preprocess-4.4.0.dist-info}/WHEEL +1 -1
- ngiab_data_preprocess-4.2.2.dist-info/METADATA +0 -258
- ngiab_data_preprocess-4.2.2.dist-info/RECORD +0 -42
- {ngiab_data_preprocess-4.2.2.dist-info → ngiab_data_preprocess-4.4.0.dist-info}/entry_points.txt +0 -0
- {ngiab_data_preprocess-4.2.2.dist-info → ngiab_data_preprocess-4.4.0.dist-info}/licenses/LICENSE +0 -0
- {ngiab_data_preprocess-4.2.2.dist-info → ngiab_data_preprocess-4.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,19 +1,29 @@
|
|
|
1
1
|
import gzip
|
|
2
|
+
import json
|
|
2
3
|
import os
|
|
3
4
|
import tarfile
|
|
4
5
|
import warnings
|
|
5
|
-
import json
|
|
6
|
-
import requests
|
|
7
|
-
from data_processing.file_paths import file_paths
|
|
8
|
-
from tqdm import TqdmExperimentalWarning
|
|
9
6
|
from time import sleep
|
|
7
|
+
|
|
10
8
|
import boto3
|
|
11
|
-
|
|
9
|
+
import psutil
|
|
10
|
+
import requests
|
|
12
11
|
from boto3.s3.transfer import TransferConfig
|
|
12
|
+
from botocore.exceptions import ClientError
|
|
13
|
+
import botocore
|
|
14
|
+
from data_processing.file_paths import file_paths
|
|
13
15
|
from rich.console import Console
|
|
16
|
+
from rich.progress import (Progress,
|
|
17
|
+
SpinnerColumn,
|
|
18
|
+
TextColumn,
|
|
19
|
+
TimeElapsedColumn,
|
|
20
|
+
BarColumn,
|
|
21
|
+
DownloadColumn,
|
|
22
|
+
TransferSpeedColumn)
|
|
14
23
|
from rich.prompt import Prompt
|
|
15
|
-
from
|
|
16
|
-
import
|
|
24
|
+
from tqdm import TqdmExperimentalWarning
|
|
25
|
+
from data_processing.gpkg_utils import verify_indices
|
|
26
|
+
import sqlite3
|
|
17
27
|
|
|
18
28
|
warnings.filterwarnings("ignore", category=TqdmExperimentalWarning)
|
|
19
29
|
|
|
@@ -23,25 +33,22 @@ S3_KEY = "hydrofabrics/community/conus_nextgen.tar.gz"
|
|
|
23
33
|
S3_REGION = "us-east-1"
|
|
24
34
|
hydrofabric_url = f"https://{S3_BUCKET}.s3.{S3_REGION}.amazonaws.com/{S3_KEY}"
|
|
25
35
|
|
|
36
|
+
|
|
26
37
|
def decompress_gzip_tar(file_path, output_dir):
|
|
27
|
-
# use rich to display "decompressing" message with a progress bar that just counts down from 30s
|
|
28
|
-
# actually measuring this is hard and it usually takes ~20s to decompress
|
|
29
38
|
console.print("Decompressing Hydrofabric...", style="bold green")
|
|
30
39
|
progress = Progress(
|
|
31
40
|
SpinnerColumn(),
|
|
32
41
|
TextColumn("[progress.description]{task.description}"),
|
|
33
|
-
TimeElapsedColumn(),
|
|
42
|
+
TimeElapsedColumn(),
|
|
34
43
|
)
|
|
35
44
|
task = progress.add_task("Decompressing", total=1)
|
|
36
|
-
progress
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
progress.update(task, completed=1)
|
|
44
|
-
progress.stop()
|
|
45
|
+
with progress:
|
|
46
|
+
with gzip.open(file_path, "rb") as f_in:
|
|
47
|
+
with tarfile.open(fileobj=f_in) as tar:
|
|
48
|
+
# Extract all contents
|
|
49
|
+
for member in tar:
|
|
50
|
+
tar.extract(member, path=output_dir)
|
|
51
|
+
progress.update(task, advance=1 / len(tar.getmembers()))
|
|
45
52
|
|
|
46
53
|
|
|
47
54
|
def download_from_s3(save_path, bucket=S3_BUCKET, key=S3_KEY, region=S3_REGION):
|
|
@@ -53,10 +60,13 @@ def download_from_s3(save_path, bucket=S3_BUCKET, key=S3_KEY, region=S3_REGION):
|
|
|
53
60
|
if os.path.exists(save_path):
|
|
54
61
|
console.print(f"File already exists: {save_path}", style="bold yellow")
|
|
55
62
|
os.remove(save_path)
|
|
56
|
-
|
|
63
|
+
|
|
64
|
+
client_config = botocore.config.Config(
|
|
65
|
+
max_pool_connections=75
|
|
66
|
+
)
|
|
57
67
|
# Initialize S3 client
|
|
58
68
|
s3_client = boto3.client(
|
|
59
|
-
"s3", aws_access_key_id="", aws_secret_access_key="", region_name=region
|
|
69
|
+
"s3", aws_access_key_id="", aws_secret_access_key="", region_name=region, config=client_config
|
|
60
70
|
)
|
|
61
71
|
# Disable request signing for public buckets
|
|
62
72
|
s3_client._request_signer.sign = lambda *args, **kwargs: None
|
|
@@ -92,19 +102,15 @@ def download_from_s3(save_path, bucket=S3_BUCKET, key=S3_KEY, region=S3_REGION):
|
|
|
92
102
|
use_threads=True,
|
|
93
103
|
)
|
|
94
104
|
|
|
95
|
-
console.print(f"Downloading {key} to {save_path}...", style="bold green")
|
|
96
|
-
console.print(
|
|
97
|
-
f"The file downloads faster with no progress indicator, this should take around 30s",
|
|
98
|
-
style="bold yellow",
|
|
99
|
-
)
|
|
100
|
-
console.print(
|
|
101
|
-
f"Please use network monitoring on your computer if you wish to track the download",
|
|
102
|
-
style="green",
|
|
103
|
-
)
|
|
104
105
|
|
|
105
106
|
try:
|
|
107
|
+
dl_progress = Progress(BarColumn(), DownloadColumn(), TransferSpeedColumn())
|
|
106
108
|
# Download file using optimized transfer config
|
|
107
|
-
|
|
109
|
+
with dl_progress:
|
|
110
|
+
task = dl_progress.add_task("Downloading...", total=total_size)
|
|
111
|
+
s3_client.download_file(Bucket=bucket, Key=key, Filename=save_path, Config=config,
|
|
112
|
+
Callback=lambda bytes_downloaded: dl_progress.update(
|
|
113
|
+
task, advance=bytes_downloaded))
|
|
108
114
|
return True
|
|
109
115
|
except Exception as e:
|
|
110
116
|
console.print(f"Error downloading file: {e}", style="bold red")
|
|
@@ -122,11 +128,33 @@ def get_headers():
|
|
|
122
128
|
|
|
123
129
|
|
|
124
130
|
def download_and_update_hf():
|
|
131
|
+
|
|
132
|
+
if file_paths.conus_hydrofabric.is_file():
|
|
133
|
+
console.print(
|
|
134
|
+
f"Hydrofabric already exists at {file_paths.conus_hydrofabric}, removing it to download the latest version.",
|
|
135
|
+
style="bold yellow",
|
|
136
|
+
)
|
|
137
|
+
file_paths.conus_hydrofabric.unlink()
|
|
138
|
+
|
|
125
139
|
download_from_s3(
|
|
126
140
|
file_paths.conus_hydrofabric.with_suffix(".tar.gz"),
|
|
127
141
|
bucket="communityhydrofabric",
|
|
128
142
|
key="hydrofabrics/community/conus_nextgen.tar.gz",
|
|
129
143
|
)
|
|
144
|
+
|
|
145
|
+
if file_paths.hydrofabric_graph.is_file():
|
|
146
|
+
console.print(
|
|
147
|
+
f"Hydrofabric graph already exists at {file_paths.hydrofabric_graph}, removing it to download the latest version.",
|
|
148
|
+
style="bold yellow",
|
|
149
|
+
)
|
|
150
|
+
file_paths.hydrofabric_graph.unlink()
|
|
151
|
+
|
|
152
|
+
download_from_s3(
|
|
153
|
+
file_paths.hydrofabric_graph,
|
|
154
|
+
bucket="communityhydrofabric",
|
|
155
|
+
key="hydrofabrics/community/conus_igraph_network.gpickle"
|
|
156
|
+
)
|
|
157
|
+
|
|
130
158
|
status, headers = get_headers()
|
|
131
159
|
|
|
132
160
|
if status == 200:
|
|
@@ -139,11 +167,10 @@ def download_and_update_hf():
|
|
|
139
167
|
file_paths.conus_hydrofabric.parent,
|
|
140
168
|
)
|
|
141
169
|
|
|
142
|
-
|
|
143
170
|
def validate_hydrofabric():
|
|
144
171
|
if not file_paths.conus_hydrofabric.is_file():
|
|
145
172
|
response = Prompt.ask(
|
|
146
|
-
"Hydrofabric
|
|
173
|
+
"Hydrofabric files are missing. Would you like to download them now?",
|
|
147
174
|
default="y",
|
|
148
175
|
choices=["y", "n"],
|
|
149
176
|
)
|
|
@@ -207,6 +234,17 @@ def validate_hydrofabric():
|
|
|
207
234
|
)
|
|
208
235
|
sleep(2)
|
|
209
236
|
return
|
|
237
|
+
|
|
238
|
+
# moved this from gpkg_utils to here to avoid potential nested rich live displays
|
|
239
|
+
if file_paths.conus_hydrofabric.is_file():
|
|
240
|
+
valid_hf = False
|
|
241
|
+
while not valid_hf:
|
|
242
|
+
try:
|
|
243
|
+
verify_indices()
|
|
244
|
+
valid_hf = True
|
|
245
|
+
except sqlite3.DatabaseError:
|
|
246
|
+
console.print(f"Hydrofabric {file_paths.conus_hydrofabric} is corrupted. Redownloading...", style="red")
|
|
247
|
+
download_and_update_hf()
|
|
210
248
|
|
|
211
249
|
|
|
212
250
|
def validate_output_dir():
|
|
@@ -220,7 +258,7 @@ def validate_output_dir():
|
|
|
220
258
|
response = Prompt.ask("Enter the path to the working directory")
|
|
221
259
|
if response == "" or response.lower() == "y":
|
|
222
260
|
response = "~/ngiab_preprocess_output/"
|
|
223
|
-
file_paths.set_working_dir(response)
|
|
261
|
+
file_paths.set_working_dir(response) # type: ignore
|
|
224
262
|
|
|
225
263
|
|
|
226
264
|
def validate_all():
|
map_app/__main__.py
CHANGED
|
@@ -39,12 +39,13 @@ def main():
|
|
|
39
39
|
Timer(2, set_logs_to_warning).start()
|
|
40
40
|
with open("app.log", "a") as f:
|
|
41
41
|
f.write("Running in debug mode\n")
|
|
42
|
-
app.run(debug=True, host="0.0.0.0", port="8080")
|
|
42
|
+
app.run(debug=True, host="0.0.0.0", port="8080") # type: ignore
|
|
43
43
|
else:
|
|
44
44
|
Timer(1, open_browser).start()
|
|
45
45
|
with open("app.log", "a") as f:
|
|
46
46
|
f.write("Running in production mode\n")
|
|
47
|
-
app.run(host="0.0.0.0", port="0")
|
|
47
|
+
app.run(host="0.0.0.0", port="0") # type: ignore
|
|
48
|
+
|
|
48
49
|
|
|
49
50
|
if __name__ == "__main__":
|
|
50
51
|
main()
|
map_app/static/css/main.css
CHANGED
|
@@ -28,10 +28,9 @@ main {
|
|
|
28
28
|
|
|
29
29
|
.maplibregl-popup-content {
|
|
30
30
|
background: var(--surface-color) !important;
|
|
31
|
-
|
|
32
31
|
}
|
|
33
32
|
|
|
34
|
-
#toggle-button {
|
|
33
|
+
#toggle-button-gages, #toggle-button-camels, #toggle-button-nwm, #toggle-button-aorc {
|
|
35
34
|
position: relative;
|
|
36
35
|
top: 20px;
|
|
37
36
|
left: 20px;
|
|
@@ -46,11 +45,23 @@ main {
|
|
|
46
45
|
z-index: 1;
|
|
47
46
|
}
|
|
48
47
|
|
|
49
|
-
#toggle-button:hover {
|
|
48
|
+
#toggle-button-gages:hover, #toggle-button-camels:hover, #toggle-button-nwm:hover, #toggle-button-aorc:hover {
|
|
50
49
|
scale: 1.1;
|
|
51
50
|
box-shadow: var(--shadow-md);
|
|
52
51
|
}
|
|
53
52
|
|
|
53
|
+
#toggle-button-camels {
|
|
54
|
+
left: 30px;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
#toggle-button-nwm {
|
|
58
|
+
left: 40px;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
#toggle-button-aorc {
|
|
62
|
+
left: 50px;
|
|
63
|
+
}
|
|
64
|
+
|
|
54
65
|
body {
|
|
55
66
|
font-family: 'Inter', system-ui, -apple-system, sans-serif;
|
|
56
67
|
margin: 0;
|
|
@@ -4,67 +4,44 @@ async function subset() {
|
|
|
4
4
|
alert('Please select at least one basin in the map before subsetting');
|
|
5
5
|
return;
|
|
6
6
|
}
|
|
7
|
-
|
|
8
|
-
document.getElementById('subset-button').disabled = true;
|
|
9
|
-
document.getElementById('subset-loading').style.visibility = "visible";
|
|
10
|
-
const startTime = performance.now(); // Start the timer
|
|
11
|
-
document.getElementById('output-path').innerHTML = "Subsetting...";
|
|
12
|
-
fetch('/subset', {
|
|
7
|
+
fetch('/subset_check', {
|
|
13
8
|
method: 'POST',
|
|
14
9
|
headers: { 'Content-Type': 'application/json' },
|
|
15
10
|
body: JSON.stringify([cat_id]),
|
|
16
11
|
})
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
12
|
+
.then((response) => {
|
|
13
|
+
// 409 if that subset gpkg path already exists
|
|
14
|
+
if (response.status == 409) {
|
|
15
|
+
console.log("check response")
|
|
16
|
+
if (!confirm('A geopackage already exists with that catchment name. Overwrite?')) {
|
|
17
|
+
alert("Subset canceled.");
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
const startTime = performance.now(); // Start the timer
|
|
22
|
+
fetch('/subset', {
|
|
23
|
+
method: 'POST',
|
|
24
|
+
headers: { 'Content-Type': 'application/json' },
|
|
25
|
+
body: JSON.stringify([cat_id]),
|
|
26
|
+
})
|
|
27
|
+
.then(response => response.text())
|
|
28
|
+
.then(filename => {
|
|
29
|
+
console.log(filename);
|
|
30
|
+
const endTime = performance.now(); // Stop the timer
|
|
31
|
+
const duration = endTime - startTime; // Calculate the duration in milliseconds
|
|
32
|
+
console.log('Request took ' + duration / 1000 + ' milliseconds');
|
|
33
|
+
document.getElementById('output-path').innerHTML = "Done in " + (duration / 1000).toFixed(2) + "s, subset to <a href='file://" + filename + "'>" + filename + "</a>";
|
|
34
|
+
})
|
|
35
|
+
.catch(error => {
|
|
36
|
+
console.error('Error:', error);
|
|
37
|
+
}).finally(() => {
|
|
38
|
+
document.getElementById('subset-button').disabled = false;
|
|
39
|
+
document.getElementById('subset-loading').style.visibility = "hidden";
|
|
40
|
+
});
|
|
41
|
+
});
|
|
31
42
|
}
|
|
32
43
|
|
|
33
|
-
|
|
34
|
-
// async function subset_to_file() {
|
|
35
|
-
// if (Object.keys(cat_id_dict).length === 0) {
|
|
36
|
-
// alert('Please select at least one basin in the map before subsetting');
|
|
37
|
-
// return;
|
|
38
|
-
// }
|
|
39
|
-
// console.log('subsetting to file');
|
|
40
|
-
// document.getElementById('subset-to-file-button').disabled = true;
|
|
41
|
-
// document.getElementById('subset-to-file-loading').style.visibility = "visible";
|
|
42
|
-
// const startTime = performance.now(); // Start the timer
|
|
43
|
-
// document.getElementById('output-path').innerHTML = "Subsetting...";
|
|
44
|
-
// fetch('/subset_to_file', {
|
|
45
|
-
// method: 'POST',
|
|
46
|
-
// headers: { 'Content-Type': 'application/json' },
|
|
47
|
-
// body: JSON.stringify(cat_id_dict),
|
|
48
|
-
// })
|
|
49
|
-
// .then(response => response.text())
|
|
50
|
-
// .then(filename => {
|
|
51
|
-
// console.log(filename);
|
|
52
|
-
// const endTime = performance.now(); // Stop the timer
|
|
53
|
-
// const duration = endTime - startTime; // Calculate the duration in milliseconds
|
|
54
|
-
// console.log('Request took ' + duration / 1000 + ' milliseconds');
|
|
55
|
-
// document.getElementById('output-path').innerHTML = "Done in " + duration / 1000 + "s, subset to <a href='file://" + filename + "'>" + filename + "</a>";
|
|
56
|
-
// })
|
|
57
|
-
// .catch(error => {
|
|
58
|
-
// console.error('Error:', error);
|
|
59
|
-
// }).finally(() => {
|
|
60
|
-
// document.getElementById('subset-to-file-button').disabled = false;
|
|
61
|
-
// document.getElementById('subset-to-file-loading').style.visibility = "hidden";
|
|
62
|
-
// });
|
|
63
|
-
// }
|
|
64
|
-
|
|
65
44
|
async function forcings() {
|
|
66
|
-
|
|
67
|
-
|
|
68
45
|
if (document.getElementById('output-path').textContent === '') {
|
|
69
46
|
alert('Please subset the data before getting forcings');
|
|
70
47
|
return;
|
|
@@ -139,6 +116,5 @@ async function realization() {
|
|
|
139
116
|
|
|
140
117
|
// These functions are exported by data_processing.js
|
|
141
118
|
document.getElementById('subset-button').addEventListener('click', subset);
|
|
142
|
-
// document.getElementById('subset-to-file-button').addEventListener('click', subset_to_file);
|
|
143
119
|
document.getElementById('forcings-button').addEventListener('click', forcings);
|
|
144
120
|
document.getElementById('realization-button').addEventListener('click', realization);
|