ngiab-data-preprocess 4.2.1__py3-none-any.whl → 4.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,11 +2,12 @@ import logging
2
2
  import sqlite3
3
3
  import struct
4
4
  from pathlib import Path
5
- from typing import List, Tuple, Union
5
+ from typing import List, Tuple, Dict
6
6
 
7
7
  import pyproj
8
8
  from data_processing.file_paths import file_paths
9
- from shapely.geometry import Point, Polygon
9
+ from shapely.geometry import Point
10
+ from shapely.geometry.base import BaseGeometry
10
11
  from shapely.ops import transform
11
12
  from shapely.wkb import loads
12
13
 
@@ -27,7 +28,7 @@ class GeoPackage:
27
28
  self.conn.close()
28
29
 
29
30
 
30
- def verify_indices(gpkg: str = file_paths.conus_hydrofabric) -> None:
31
+ def verify_indices(gpkg: Path = file_paths.conus_hydrofabric) -> None:
31
32
  """
32
33
  Verify that the indices in the specified geopackage are correct.
33
34
  If they are not, create the correct indices.
@@ -92,12 +93,9 @@ def add_triggers_to_gpkg(gpkg: Path) -> None:
92
93
  logger.debug(f"Added triggers to subset gpkg {gpkg}")
93
94
 
94
95
 
95
- # whenever this is imported, check if the indices are correct
96
- if file_paths.conus_hydrofabric.is_file():
97
- verify_indices()
98
96
 
99
97
 
100
- def blob_to_geometry(blob: bytes) -> Union[Point, Polygon]:
98
+ def blob_to_geometry(blob: bytes) -> BaseGeometry | None:
101
99
  """
102
100
  Convert a blob to a geometry.
103
101
  from http://www.geopackage.org/spec/#gpb_format
@@ -120,7 +118,7 @@ def blob_to_geometry(blob: bytes) -> Union[Point, Polygon]:
120
118
  return geometry
121
119
 
122
120
 
123
- def blob_to_centre_point(blob: bytes) -> Point:
121
+ def blob_to_centre_point(blob: bytes) -> Point | None:
124
122
  """
125
123
  Convert a blob to a geometry.
126
124
  from http://www.geopackage.org/spec/#gpb_format
@@ -151,7 +149,7 @@ def blob_to_centre_point(blob: bytes) -> Point:
151
149
  return Point(x, y)
152
150
 
153
151
 
154
- def convert_to_5070(shapely_geometry):
152
+ def convert_to_5070(shapely_geometry: Point) -> Point:
155
153
  # convert to web mercator
156
154
  if shapely_geometry.is_empty:
157
155
  return shapely_geometry
@@ -164,7 +162,7 @@ def convert_to_5070(shapely_geometry):
164
162
  return new_geometry
165
163
 
166
164
 
167
- def get_catid_from_point(coords):
165
+ def get_catid_from_point(coords: Dict[str, float]) -> str:
168
166
  """
169
167
  Retrieves the watershed boundary ID (catid) of the watershed that contains the given point.
170
168
 
@@ -196,6 +194,8 @@ def get_catid_from_point(coords):
196
194
  # check the geometries to see which one contains the point
197
195
  for result in results:
198
196
  geom = blob_to_geometry(result[1])
197
+ if geom is None:
198
+ continue
199
199
  if geom.contains(point):
200
200
  return result[0]
201
201
  return results[0][0]
@@ -321,16 +321,16 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
321
321
  if table == "network":
322
322
  # Look for the network entry that has a toid not in the flowpath or nexus tables
323
323
  network_toids = [x[2] for x in contents]
324
- print(f"Network toids: {len(network_toids)}")
324
+ logger.debug(f"Network toids: {len(network_toids)}")
325
325
  sql = "SELECT id FROM flowpaths"
326
326
  flowpath_ids = [x[0] for x in dest_db.execute(sql).fetchall()]
327
- print(f"Flowpath ids: {len(flowpath_ids)}")
327
+ logger.debug(f"Flowpath ids: {len(flowpath_ids)}")
328
328
  sql = "SELECT id FROM nexus"
329
329
  nexus_ids = [x[0] for x in dest_db.execute(sql).fetchall()]
330
- print(f"Nexus ids: {len(nexus_ids)}")
330
+ logger.debug(f"Nexus ids: {len(nexus_ids)}")
331
331
  bad_ids = set(network_toids) - set(flowpath_ids + nexus_ids)
332
- print(bad_ids)
333
- print(f"Removing {len(bad_ids)} network entries that are not in flowpaths or nexuses")
332
+ logger.debug(bad_ids)
333
+ logger.info(f"Removing {len(bad_ids)} network entries that are not in flowpaths or nexuses")
334
334
  # id column is second after fid
335
335
  contents = [x for x in contents if x[1] not in bad_ids]
336
336
 
@@ -398,7 +398,7 @@ def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name
398
398
  dest_db.close()
399
399
 
400
400
 
401
- def get_table_crs_short(gpkg: str, table: str) -> str:
401
+ def get_table_crs_short(gpkg: str | Path, table: str) -> str:
402
402
  """
403
403
  Gets the CRS of the specified table in the specified geopackage as a short string. e.g. EPSG:5070
404
404
 
@@ -518,7 +518,7 @@ def get_available_tables(gpkg: Path) -> List[str]:
518
518
  return tables
519
519
 
520
520
 
521
- def get_cat_to_nhd_feature_id(gpkg: Path = file_paths.conus_hydrofabric) -> dict:
521
+ def get_cat_to_nhd_feature_id(gpkg: Path = file_paths.conus_hydrofabric) -> Dict[str, int]:
522
522
  available_tables = get_available_tables(gpkg)
523
523
  possible_tables = ["flowpath_edge_list", "network"]
524
524
 
@@ -535,7 +535,7 @@ def get_cat_to_nhd_feature_id(gpkg: Path = file_paths.conus_hydrofabric) -> dict
535
535
  sql_query = f"SELECT divide_id, hf_id FROM {table_name} WHERE divide_id IS NOT NULL AND hf_id IS NOT NULL"
536
536
 
537
537
  with sqlite3.connect(gpkg) as conn:
538
- result = conn.execute(sql_query).fetchall()
538
+ result: List[Tuple[str, str]] = conn.execute(sql_query).fetchall()
539
539
 
540
540
  mapping = {}
541
541
  for cat, feature in result:
@@ -2,7 +2,7 @@ import logging
2
2
  import sqlite3
3
3
  from functools import cache
4
4
  from pathlib import Path
5
- from typing import List, Set, Union
5
+ from typing import List, Optional, Set, Union
6
6
 
7
7
  import igraph as ig
8
8
  from data_processing.file_paths import file_paths
@@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
13
  def get_from_to_id_pairs(
14
- hydrofabric: Path = file_paths.conus_hydrofabric, ids: Set = None
14
+ hydrofabric: Path = file_paths.conus_hydrofabric, ids: Optional[Set | List] = None
15
15
  ) -> List[tuple]:
16
16
  """
17
17
  Retrieves the from and to IDs from the specified hydrofabric.
@@ -112,7 +112,7 @@ def get_graph() -> ig.Graph:
112
112
  return network_graph
113
113
 
114
114
 
115
- def get_outlet_id(wb_or_cat_id: str) -> str:
115
+ def get_outlet_id(wb_or_cat_id: str) -> str | None:
116
116
  """
117
117
  Retrieves the ID of the node downstream of the given node in the hydrological network.
118
118
 
@@ -209,7 +209,7 @@ def get_upstream_ids(names: Union[str, List[str]], include_outlet: bool = True)
209
209
  if name in parent_ids:
210
210
  continue
211
211
  try:
212
- if "cat" in name:
212
+ if "cat" in name: # type: ignore # If name is None, this will raise an error, which is handled below
213
213
  node_index = graph.vs.find(cat=name).index
214
214
  else:
215
215
  node_index = graph.vs.find(name=name).index
@@ -32,7 +32,7 @@ class S3ParallelFileSystem(S3FileSystem):
32
32
  "head_object", Bucket=bucket, Key=key, **version_kw, **self.req_kw
33
33
  )
34
34
  )["ContentLength"]
35
- except Exception as e:
35
+ except Exception:
36
36
  # Fall back to single request if HEAD fails
37
37
  return await self._download_chunk(bucket, key, {}, version_kw)
38
38
 
data_processing/subset.py CHANGED
@@ -64,12 +64,11 @@ def subset_vpu(
64
64
 
65
65
 
66
66
  def subset(
67
- cat_ids: List[str],
67
+ cat_ids: str | List[str],
68
68
  hydrofabric: Path = file_paths.conus_hydrofabric,
69
69
  output_gpkg_path: Path = Path(),
70
70
  include_outlet: bool = True,
71
71
  ):
72
- print(cat_ids)
73
72
  upstream_ids = list(get_upstream_ids(cat_ids, include_outlet))
74
73
 
75
74
  if not output_gpkg_path:
@@ -1,19 +1,29 @@
1
1
  import gzip
2
+ import json
2
3
  import os
3
4
  import tarfile
4
5
  import warnings
5
- import json
6
- import requests
7
- from data_processing.file_paths import file_paths
8
- from tqdm import TqdmExperimentalWarning
9
6
  from time import sleep
7
+
10
8
  import boto3
11
- from botocore.exceptions import ClientError
9
+ import psutil
10
+ import requests
12
11
  from boto3.s3.transfer import TransferConfig
12
+ from botocore.exceptions import ClientError
13
+ import botocore
14
+ from data_processing.file_paths import file_paths
13
15
  from rich.console import Console
16
+ from rich.progress import (Progress,
17
+ SpinnerColumn,
18
+ TextColumn,
19
+ TimeElapsedColumn,
20
+ BarColumn,
21
+ DownloadColumn,
22
+ TransferSpeedColumn)
14
23
  from rich.prompt import Prompt
15
- from rich.progress import Progress, TextColumn, TimeElapsedColumn, SpinnerColumn
16
- import psutil
24
+ from tqdm import TqdmExperimentalWarning
25
+ from data_processing.gpkg_utils import verify_indices
26
+ import sqlite3
17
27
 
18
28
  warnings.filterwarnings("ignore", category=TqdmExperimentalWarning)
19
29
 
@@ -23,25 +33,22 @@ S3_KEY = "hydrofabrics/community/conus_nextgen.tar.gz"
23
33
  S3_REGION = "us-east-1"
24
34
  hydrofabric_url = f"https://{S3_BUCKET}.s3.{S3_REGION}.amazonaws.com/{S3_KEY}"
25
35
 
36
+
26
37
  def decompress_gzip_tar(file_path, output_dir):
27
- # use rich to display "decompressing" message with a progress bar that just counts down from 30s
28
- # actually measuring this is hard and it usually takes ~20s to decompress
29
38
  console.print("Decompressing Hydrofabric...", style="bold green")
30
39
  progress = Progress(
31
40
  SpinnerColumn(),
32
41
  TextColumn("[progress.description]{task.description}"),
33
- TimeElapsedColumn(),
42
+ TimeElapsedColumn(),
34
43
  )
35
44
  task = progress.add_task("Decompressing", total=1)
36
- progress.start()
37
- with gzip.open(file_path, "rb") as f_in:
38
- with tarfile.open(fileobj=f_in) as tar:
39
- # Extract all contents
40
- for member in tar:
41
- tar.extract(member, path=output_dir)
42
- # Update the progress bar
43
- progress.update(task, completed=1)
44
- progress.stop()
45
+ with progress:
46
+ with gzip.open(file_path, "rb") as f_in:
47
+ with tarfile.open(fileobj=f_in) as tar:
48
+ # Extract all contents
49
+ for member in tar:
50
+ tar.extract(member, path=output_dir)
51
+ progress.update(task, advance=1 / len(tar.getmembers()))
45
52
 
46
53
 
47
54
  def download_from_s3(save_path, bucket=S3_BUCKET, key=S3_KEY, region=S3_REGION):
@@ -53,10 +60,13 @@ def download_from_s3(save_path, bucket=S3_BUCKET, key=S3_KEY, region=S3_REGION):
53
60
  if os.path.exists(save_path):
54
61
  console.print(f"File already exists: {save_path}", style="bold yellow")
55
62
  os.remove(save_path)
56
-
63
+
64
+ client_config = botocore.config.Config(
65
+ max_pool_connections=75
66
+ )
57
67
  # Initialize S3 client
58
68
  s3_client = boto3.client(
59
- "s3", aws_access_key_id="", aws_secret_access_key="", region_name=region
69
+ "s3", aws_access_key_id="", aws_secret_access_key="", region_name=region, config=client_config
60
70
  )
61
71
  # Disable request signing for public buckets
62
72
  s3_client._request_signer.sign = lambda *args, **kwargs: None
@@ -92,19 +102,15 @@ def download_from_s3(save_path, bucket=S3_BUCKET, key=S3_KEY, region=S3_REGION):
92
102
  use_threads=True,
93
103
  )
94
104
 
95
- console.print(f"Downloading {key} to {save_path}...", style="bold green")
96
- console.print(
97
- f"The file downloads faster with no progress indicator, this should take around 30s",
98
- style="bold yellow",
99
- )
100
- console.print(
101
- f"Please use network monitoring on your computer if you wish to track the download",
102
- style="green",
103
- )
104
105
 
105
106
  try:
107
+ dl_progress = Progress(BarColumn(), DownloadColumn(), TransferSpeedColumn())
106
108
  # Download file using optimized transfer config
107
- s3_client.download_file(Bucket=bucket, Key=key, Filename=save_path, Config=config)
109
+ with dl_progress:
110
+ task = dl_progress.add_task("Downloading...", total=total_size)
111
+ s3_client.download_file(Bucket=bucket, Key=key, Filename=save_path, Config=config,
112
+ Callback=lambda bytes_downloaded: dl_progress.update(
113
+ task, advance=bytes_downloaded))
108
114
  return True
109
115
  except Exception as e:
110
116
  console.print(f"Error downloading file: {e}", style="bold red")
@@ -122,6 +128,14 @@ def get_headers():
122
128
 
123
129
 
124
130
  def download_and_update_hf():
131
+
132
+ if file_paths.conus_hydrofabric.is_file():
133
+ console.print(
134
+ f"Hydrofabric already exists at {file_paths.conus_hydrofabric}, removing it to download the latest version.",
135
+ style="bold yellow",
136
+ )
137
+ file_paths.conus_hydrofabric.unlink()
138
+
125
139
  download_from_s3(
126
140
  file_paths.conus_hydrofabric.with_suffix(".tar.gz"),
127
141
  bucket="communityhydrofabric",
@@ -207,6 +221,17 @@ def validate_hydrofabric():
207
221
  )
208
222
  sleep(2)
209
223
  return
224
+
225
+ # moved this from gpkg_utils to here to avoid potential nested rich live displays
226
+ if file_paths.conus_hydrofabric.is_file():
227
+ valid_hf = False
228
+ while not valid_hf:
229
+ try:
230
+ verify_indices()
231
+ valid_hf = True
232
+ except sqlite3.DatabaseError:
233
+ console.print(f"Hydrofabric {file_paths.conus_hydrofabric} is corrupted. Redownloading...", style="red")
234
+ download_and_update_hf()
210
235
 
211
236
 
212
237
  def validate_output_dir():
@@ -220,7 +245,7 @@ def validate_output_dir():
220
245
  response = Prompt.ask("Enter the path to the working directory")
221
246
  if response == "" or response.lower() == "y":
222
247
  response = "~/ngiab_preprocess_output/"
223
- file_paths.set_working_dir(response)
248
+ file_paths.set_working_dir(response) # type: ignore
224
249
 
225
250
 
226
251
  def validate_all():
map_app/__main__.py CHANGED
@@ -39,12 +39,13 @@ def main():
39
39
  Timer(2, set_logs_to_warning).start()
40
40
  with open("app.log", "a") as f:
41
41
  f.write("Running in debug mode\n")
42
- app.run(debug=True, host="0.0.0.0", port="8080")
42
+ app.run(debug=True, host="0.0.0.0", port="8080") # type: ignore
43
43
  else:
44
44
  Timer(1, open_browser).start()
45
45
  with open("app.log", "a") as f:
46
46
  f.write("Running in production mode\n")
47
- app.run(host="0.0.0.0", port="0")
47
+ app.run(host="0.0.0.0", port="0") # type: ignore
48
+
48
49
 
49
50
  if __name__ == "__main__":
50
51
  main()
@@ -28,10 +28,9 @@ main {
28
28
 
29
29
  .maplibregl-popup-content {
30
30
  background: var(--surface-color) !important;
31
-
32
31
  }
33
32
 
34
- #toggle-button {
33
+ #toggle-button-gages, #toggle-button-camels, #toggle-button-nwm, #toggle-button-aorc {
35
34
  position: relative;
36
35
  top: 20px;
37
36
  left: 20px;
@@ -46,11 +45,23 @@ main {
46
45
  z-index: 1;
47
46
  }
48
47
 
49
- #toggle-button:hover {
48
+ #toggle-button-gages:hover, #toggle-button-camels:hover, #toggle-button-nwm:hover, #toggle-button-aorc:hover {
50
49
  scale: 1.1;
51
50
  box-shadow: var(--shadow-md);
52
51
  }
53
52
 
53
+ #toggle-button-camels {
54
+ left: 30px;
55
+ }
56
+
57
+ #toggle-button-nwm {
58
+ left: 40px;
59
+ }
60
+
61
+ #toggle-button-aorc {
62
+ left: 50px;
63
+ }
64
+
54
65
  body {
55
66
  font-family: 'Inter', system-ui, -apple-system, sans-serif;
56
67
  margin: 0;
@@ -109,7 +120,7 @@ h2 {
109
120
  }
110
121
 
111
122
  #selected-basins,
112
- #cli-command {
123
+ #cli-command,#cli-prefix {
113
124
  background: var(--code-bg);
114
125
  padding: 16px;
115
126
  border-radius: var(--border-radius);
@@ -119,6 +130,7 @@ h2 {
119
130
  color: var(--text-color);
120
131
  }
121
132
 
133
+
122
134
  button {
123
135
  background-color: var(--primary-color);
124
136
  color: light-dark(white, #f1f5f9);
@@ -204,6 +216,11 @@ input[type="datetime-local"] {
204
216
  display: inline-block;
205
217
  }
206
218
 
219
+ #command-builder{
220
+ display: inline-block;
221
+ padding:16px ;
222
+ }
223
+
207
224
  .command-container {
208
225
  background: var(--surface-color);
209
226
  border: 1px solid var(--border-color);
@@ -222,7 +239,7 @@ input[type="datetime-local"] {
222
239
  border-top-right-radius: var(--border-radius);
223
240
  }
224
241
 
225
- .command-header span {
242
+ .command-header>span {
226
243
  font-size: 0.875rem;
227
244
  color: var(--secondary-text);
228
245
  font-weight: 500;
@@ -254,7 +271,8 @@ input[type="datetime-local"] {
254
271
  }
255
272
 
256
273
  .command-content {
257
- padding: 16px;
274
+ display:inline;
275
+ padding: 0px !important;
258
276
  background: var(--code-bg);
259
277
  font-family: 'Monaco', 'Consolas', monospace;
260
278
  font-size: 0.875rem;
@@ -263,6 +281,11 @@ input[type="datetime-local"] {
263
281
  border-bottom-left-radius: var(--border-radius);
264
282
  border-bottom-right-radius: var(--border-radius);
265
283
  color: var(--text-color);
284
+
285
+ }
286
+
287
+ #cli-prefix{
288
+ opacity: 0;
266
289
  }
267
290
 
268
291
  .copy-button.copied {
@@ -281,17 +304,17 @@ input[type="datetime-local"] {
281
304
  body {
282
305
  padding: 16px;
283
306
  }
284
-
307
+
285
308
  main {
286
309
  width: 90vw;
287
310
  }
288
-
311
+
289
312
  .time-input {
290
313
  flex-direction: column;
291
314
  align-items: flex-start;
292
315
  }
293
-
316
+
294
317
  input[type="datetime-local"] {
295
318
  width: 100%;
296
319
  }
297
- }
320
+ }
@@ -34,24 +34,27 @@
34
34
  background-color: white;
35
35
  border-radius: 18px;
36
36
  box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
37
- transition: transform 0.3s ease, box-shadow 0.3s ease;
37
+ transition:
38
+ transform 0.3s ease,
39
+ box-shadow 0.3s ease;
38
40
  display: flex;
39
41
  justify-content: center;
40
42
  align-items: center;
41
43
  font-size: 14px;
42
44
  font-weight: bold;
43
- color: #4CAF50; /* Default color for the selected text */
45
+ color: #4caf50; /* Default color for the selected text */
44
46
  }
45
47
 
46
48
  /* Toggle Text (NWM and AORC labels) */
47
49
  .toggle-text {
48
50
  position: absolute;
49
51
  top: 50%;
52
+ min-width: 40px;
53
+ text-align: center;
50
54
  transform: translateY(-50%);
51
55
  font-size: 14px;
52
56
  font-weight: bold;
53
57
  color: #888; /* Grey color for non-selected text */
54
- transition: color 0.3s ease;
55
58
  }
56
59
 
57
60
  .toggle-text-left {
@@ -70,7 +73,7 @@
70
73
  .toggle-input:checked + .toggle-label .toggle-handle {
71
74
  transform: translateX(56px);
72
75
  box-shadow: 0 0 10px rgba(0, 123, 255, 0.8); /* Blue glow effect */
73
- color: #007BFF; /* Blue color for the selected text */
76
+ color: #007bff; /* Blue color for the selected text */
74
77
  }
75
78
 
76
79
  .toggle-input:checked + .toggle-label .toggle-text-left {
@@ -79,4 +82,4 @@
79
82
 
80
83
  .toggle-input:checked + .toggle-label .toggle-text-right {
81
84
  color: #888; /* Grey color for non-selected text */
82
- }
85
+ }