ngiab-data-preprocess 4.2.2__py3-none-any.whl → 4.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,29 @@
1
1
  import gzip
2
+ import json
2
3
  import os
3
4
  import tarfile
4
5
  import warnings
5
- import json
6
- import requests
7
- from data_processing.file_paths import file_paths
8
- from tqdm import TqdmExperimentalWarning
9
6
  from time import sleep
7
+
10
8
  import boto3
11
- from botocore.exceptions import ClientError
9
+ import psutil
10
+ import requests
12
11
  from boto3.s3.transfer import TransferConfig
12
+ from botocore.exceptions import ClientError
13
+ import botocore
14
+ from data_processing.file_paths import file_paths
13
15
  from rich.console import Console
16
+ from rich.progress import (Progress,
17
+ SpinnerColumn,
18
+ TextColumn,
19
+ TimeElapsedColumn,
20
+ BarColumn,
21
+ DownloadColumn,
22
+ TransferSpeedColumn)
14
23
  from rich.prompt import Prompt
15
- from rich.progress import Progress, TextColumn, TimeElapsedColumn, SpinnerColumn
16
- import psutil
24
+ from tqdm import TqdmExperimentalWarning
25
+ from data_processing.gpkg_utils import verify_indices
26
+ import sqlite3
17
27
 
18
28
  warnings.filterwarnings("ignore", category=TqdmExperimentalWarning)
19
29
 
@@ -23,25 +33,22 @@ S3_KEY = "hydrofabrics/community/conus_nextgen.tar.gz"
23
33
  S3_REGION = "us-east-1"
24
34
  hydrofabric_url = f"https://{S3_BUCKET}.s3.{S3_REGION}.amazonaws.com/{S3_KEY}"
25
35
 
36
+
26
37
  def decompress_gzip_tar(file_path, output_dir):
27
- # use rich to display "decompressing" message with a progress bar that just counts down from 30s
28
- # actually measuring this is hard and it usually takes ~20s to decompress
29
38
  console.print("Decompressing Hydrofabric...", style="bold green")
30
39
  progress = Progress(
31
40
  SpinnerColumn(),
32
41
  TextColumn("[progress.description]{task.description}"),
33
- TimeElapsedColumn(),
42
+ TimeElapsedColumn(),
34
43
  )
35
44
  task = progress.add_task("Decompressing", total=1)
36
- progress.start()
37
- with gzip.open(file_path, "rb") as f_in:
38
- with tarfile.open(fileobj=f_in) as tar:
39
- # Extract all contents
40
- for member in tar:
41
- tar.extract(member, path=output_dir)
42
- # Update the progress bar
43
- progress.update(task, completed=1)
44
- progress.stop()
45
+ with progress:
46
+ with gzip.open(file_path, "rb") as f_in:
47
+ with tarfile.open(fileobj=f_in) as tar:
48
+ # Extract all contents
49
+ for member in tar:
50
+ tar.extract(member, path=output_dir)
51
+ progress.update(task, advance=1 / len(tar.getmembers()))
45
52
 
46
53
 
47
54
  def download_from_s3(save_path, bucket=S3_BUCKET, key=S3_KEY, region=S3_REGION):
@@ -53,10 +60,13 @@ def download_from_s3(save_path, bucket=S3_BUCKET, key=S3_KEY, region=S3_REGION):
53
60
  if os.path.exists(save_path):
54
61
  console.print(f"File already exists: {save_path}", style="bold yellow")
55
62
  os.remove(save_path)
56
-
63
+
64
+ client_config = botocore.config.Config(
65
+ max_pool_connections=75
66
+ )
57
67
  # Initialize S3 client
58
68
  s3_client = boto3.client(
59
- "s3", aws_access_key_id="", aws_secret_access_key="", region_name=region
69
+ "s3", aws_access_key_id="", aws_secret_access_key="", region_name=region, config=client_config
60
70
  )
61
71
  # Disable request signing for public buckets
62
72
  s3_client._request_signer.sign = lambda *args, **kwargs: None
@@ -92,19 +102,15 @@ def download_from_s3(save_path, bucket=S3_BUCKET, key=S3_KEY, region=S3_REGION):
92
102
  use_threads=True,
93
103
  )
94
104
 
95
- console.print(f"Downloading {key} to {save_path}...", style="bold green")
96
- console.print(
97
- f"The file downloads faster with no progress indicator, this should take around 30s",
98
- style="bold yellow",
99
- )
100
- console.print(
101
- f"Please use network monitoring on your computer if you wish to track the download",
102
- style="green",
103
- )
104
105
 
105
106
  try:
107
+ dl_progress = Progress(BarColumn(), DownloadColumn(), TransferSpeedColumn())
106
108
  # Download file using optimized transfer config
107
- s3_client.download_file(Bucket=bucket, Key=key, Filename=save_path, Config=config)
109
+ with dl_progress:
110
+ task = dl_progress.add_task("Downloading...", total=total_size)
111
+ s3_client.download_file(Bucket=bucket, Key=key, Filename=save_path, Config=config,
112
+ Callback=lambda bytes_downloaded: dl_progress.update(
113
+ task, advance=bytes_downloaded))
108
114
  return True
109
115
  except Exception as e:
110
116
  console.print(f"Error downloading file: {e}", style="bold red")
@@ -122,11 +128,33 @@ def get_headers():
122
128
 
123
129
 
124
130
  def download_and_update_hf():
131
+
132
+ if file_paths.conus_hydrofabric.is_file():
133
+ console.print(
134
+ f"Hydrofabric already exists at {file_paths.conus_hydrofabric}, removing it to download the latest version.",
135
+ style="bold yellow",
136
+ )
137
+ file_paths.conus_hydrofabric.unlink()
138
+
125
139
  download_from_s3(
126
140
  file_paths.conus_hydrofabric.with_suffix(".tar.gz"),
127
141
  bucket="communityhydrofabric",
128
142
  key="hydrofabrics/community/conus_nextgen.tar.gz",
129
143
  )
144
+
145
+ if file_paths.hydrofabric_graph.is_file():
146
+ console.print(
147
+ f"Hydrofabric graph already exists at {file_paths.hydrofabric_graph}, removing it to download the latest version.",
148
+ style="bold yellow",
149
+ )
150
+ file_paths.hydrofabric_graph.unlink()
151
+
152
+ download_from_s3(
153
+ file_paths.hydrofabric_graph,
154
+ bucket="communityhydrofabric",
155
+ key="hydrofabrics/community/conus_igraph_network.gpickle"
156
+ )
157
+
130
158
  status, headers = get_headers()
131
159
 
132
160
  if status == 200:
@@ -139,11 +167,10 @@ def download_and_update_hf():
139
167
  file_paths.conus_hydrofabric.parent,
140
168
  )
141
169
 
142
-
143
170
  def validate_hydrofabric():
144
171
  if not file_paths.conus_hydrofabric.is_file():
145
172
  response = Prompt.ask(
146
- "Hydrofabric is missing. Would you like to download it now?",
173
+ "Hydrofabric files are missing. Would you like to download them now?",
147
174
  default="y",
148
175
  choices=["y", "n"],
149
176
  )
@@ -207,6 +234,17 @@ def validate_hydrofabric():
207
234
  )
208
235
  sleep(2)
209
236
  return
237
+
238
+ # moved this from gpkg_utils to here to avoid potential nested rich live displays
239
+ if file_paths.conus_hydrofabric.is_file():
240
+ valid_hf = False
241
+ while not valid_hf:
242
+ try:
243
+ verify_indices()
244
+ valid_hf = True
245
+ except sqlite3.DatabaseError:
246
+ console.print(f"Hydrofabric {file_paths.conus_hydrofabric} is corrupted. Redownloading...", style="red")
247
+ download_and_update_hf()
210
248
 
211
249
 
212
250
  def validate_output_dir():
@@ -220,7 +258,7 @@ def validate_output_dir():
220
258
  response = Prompt.ask("Enter the path to the working directory")
221
259
  if response == "" or response.lower() == "y":
222
260
  response = "~/ngiab_preprocess_output/"
223
- file_paths.set_working_dir(response)
261
+ file_paths.set_working_dir(response) # type: ignore
224
262
 
225
263
 
226
264
  def validate_all():
map_app/__main__.py CHANGED
@@ -39,12 +39,13 @@ def main():
39
39
  Timer(2, set_logs_to_warning).start()
40
40
  with open("app.log", "a") as f:
41
41
  f.write("Running in debug mode\n")
42
- app.run(debug=True, host="0.0.0.0", port="8080")
42
+ app.run(debug=True, host="0.0.0.0", port="8080") # type: ignore
43
43
  else:
44
44
  Timer(1, open_browser).start()
45
45
  with open("app.log", "a") as f:
46
46
  f.write("Running in production mode\n")
47
- app.run(host="0.0.0.0", port="0")
47
+ app.run(host="0.0.0.0", port="0") # type: ignore
48
+
48
49
 
49
50
  if __name__ == "__main__":
50
51
  main()
@@ -28,10 +28,9 @@ main {
28
28
 
29
29
  .maplibregl-popup-content {
30
30
  background: var(--surface-color) !important;
31
-
32
31
  }
33
32
 
34
- #toggle-button {
33
+ #toggle-button-gages, #toggle-button-camels, #toggle-button-nwm, #toggle-button-aorc {
35
34
  position: relative;
36
35
  top: 20px;
37
36
  left: 20px;
@@ -46,11 +45,23 @@ main {
46
45
  z-index: 1;
47
46
  }
48
47
 
49
- #toggle-button:hover {
48
+ #toggle-button-gages:hover, #toggle-button-camels:hover, #toggle-button-nwm:hover, #toggle-button-aorc:hover {
50
49
  scale: 1.1;
51
50
  box-shadow: var(--shadow-md);
52
51
  }
53
52
 
53
+ #toggle-button-camels {
54
+ left: 30px;
55
+ }
56
+
57
+ #toggle-button-nwm {
58
+ left: 40px;
59
+ }
60
+
61
+ #toggle-button-aorc {
62
+ left: 50px;
63
+ }
64
+
54
65
  body {
55
66
  font-family: 'Inter', system-ui, -apple-system, sans-serif;
56
67
  margin: 0;
@@ -4,67 +4,44 @@ async function subset() {
4
4
  alert('Please select at least one basin in the map before subsetting');
5
5
  return;
6
6
  }
7
- console.log('subsetting');
8
- document.getElementById('subset-button').disabled = true;
9
- document.getElementById('subset-loading').style.visibility = "visible";
10
- const startTime = performance.now(); // Start the timer
11
- document.getElementById('output-path').innerHTML = "Subsetting...";
12
- fetch('/subset', {
7
+ fetch('/subset_check', {
13
8
  method: 'POST',
14
9
  headers: { 'Content-Type': 'application/json' },
15
10
  body: JSON.stringify([cat_id]),
16
11
  })
17
- .then(response => response.text())
18
- .then(filename => {
19
- console.log(filename);
20
- const endTime = performance.now(); // Stop the timer
21
- const duration = endTime - startTime; // Calculate the duration in milliseconds
22
- console.log('Request took ' + duration / 1000 + ' milliseconds');
23
- document.getElementById('output-path').innerHTML = "Done in " + duration / 1000 + "s, subset to <a href='file://" + filename + "'>" + filename + "</a>";
24
- })
25
- .catch(error => {
26
- console.error('Error:', error);
27
- }).finally(() => {
28
- document.getElementById('subset-button').disabled = false;
29
- document.getElementById('subset-loading').style.visibility = "hidden";
30
- });
12
+ .then((response) => {
13
+ // 409 if that subset gpkg path already exists
14
+ if (response.status == 409) {
15
+ console.log("check response")
16
+ if (!confirm('A geopackage already exists with that catchment name. Overwrite?')) {
17
+ alert("Subset canceled.");
18
+ return;
19
+ }
20
+ }
21
+ const startTime = performance.now(); // Start the timer
22
+ fetch('/subset', {
23
+ method: 'POST',
24
+ headers: { 'Content-Type': 'application/json' },
25
+ body: JSON.stringify([cat_id]),
26
+ })
27
+ .then(response => response.text())
28
+ .then(filename => {
29
+ console.log(filename);
30
+ const endTime = performance.now(); // Stop the timer
31
+ const duration = endTime - startTime; // Calculate the duration in milliseconds
32
+ console.log('Request took ' + duration / 1000 + ' milliseconds');
33
+ document.getElementById('output-path').innerHTML = "Done in " + (duration / 1000).toFixed(2) + "s, subset to <a href='file://" + filename + "'>" + filename + "</a>";
34
+ })
35
+ .catch(error => {
36
+ console.error('Error:', error);
37
+ }).finally(() => {
38
+ document.getElementById('subset-button').disabled = false;
39
+ document.getElementById('subset-loading').style.visibility = "hidden";
40
+ });
41
+ });
31
42
  }
32
43
 
33
-
34
- // async function subset_to_file() {
35
- // if (Object.keys(cat_id_dict).length === 0) {
36
- // alert('Please select at least one basin in the map before subsetting');
37
- // return;
38
- // }
39
- // console.log('subsetting to file');
40
- // document.getElementById('subset-to-file-button').disabled = true;
41
- // document.getElementById('subset-to-file-loading').style.visibility = "visible";
42
- // const startTime = performance.now(); // Start the timer
43
- // document.getElementById('output-path').innerHTML = "Subsetting...";
44
- // fetch('/subset_to_file', {
45
- // method: 'POST',
46
- // headers: { 'Content-Type': 'application/json' },
47
- // body: JSON.stringify(cat_id_dict),
48
- // })
49
- // .then(response => response.text())
50
- // .then(filename => {
51
- // console.log(filename);
52
- // const endTime = performance.now(); // Stop the timer
53
- // const duration = endTime - startTime; // Calculate the duration in milliseconds
54
- // console.log('Request took ' + duration / 1000 + ' milliseconds');
55
- // document.getElementById('output-path').innerHTML = "Done in " + duration / 1000 + "s, subset to <a href='file://" + filename + "'>" + filename + "</a>";
56
- // })
57
- // .catch(error => {
58
- // console.error('Error:', error);
59
- // }).finally(() => {
60
- // document.getElementById('subset-to-file-button').disabled = false;
61
- // document.getElementById('subset-to-file-loading').style.visibility = "hidden";
62
- // });
63
- // }
64
-
65
44
  async function forcings() {
66
-
67
-
68
45
  if (document.getElementById('output-path').textContent === '') {
69
46
  alert('Please subset the data before getting forcings');
70
47
  return;
@@ -139,6 +116,5 @@ async function realization() {
139
116
 
140
117
  // These functions are exported by data_processing.js
141
118
  document.getElementById('subset-button').addEventListener('click', subset);
142
- // document.getElementById('subset-to-file-button').addEventListener('click', subset_to_file);
143
119
  document.getElementById('forcings-button').addEventListener('click', forcings);
144
120
  document.getElementById('realization-button').addEventListener('click', realization);