geo-explorer 0.9.8__py3-none-any.whl → 0.9.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,12 +5,14 @@ import json
5
5
  import logging
6
6
  import math
7
7
  import os
8
+ import pickle
9
+ import random
8
10
  import re
9
11
  import signal
10
12
  import sys
11
13
  import time
14
+ import traceback
12
15
  from collections.abc import Callable
13
- from collections.abc import Sequence
14
16
  from concurrent.futures import ThreadPoolExecutor
15
17
  from functools import partial
16
18
  from functools import wraps
@@ -21,15 +23,15 @@ from pathlib import PurePath
21
23
  from time import perf_counter
22
24
  from typing import Any
23
25
  from typing import ClassVar
24
- import random
25
26
 
26
- import pickle
27
- import geopandas as gpd
28
27
  import dash
29
28
  import dash_bootstrap_components as dbc
30
29
  import dash_leaflet as dl
30
+ import folium
31
+ import geopandas as gpd
31
32
  import joblib
32
33
  import matplotlib
34
+ import matplotlib.colors
33
35
  import matplotlib.colors as mcolors
34
36
  import msgspec
35
37
  import numpy as np
@@ -37,6 +39,7 @@ import pandas as pd
37
39
  import polars as pl
38
40
  import pyarrow
39
41
  import pyarrow.parquet as pq
42
+ import rasterio
40
43
  import sgis as sg
41
44
  import shapely
42
45
  from dash import Dash
@@ -54,24 +57,44 @@ from geopandas import GeoDataFrame
54
57
  from geopandas import GeoSeries
55
58
  from geopandas.array import GeometryArray
56
59
  from jenkspy import jenks_breaks
57
- from sgis.io.dapla_functions import _get_geo_metadata
58
- from sgis.io.dapla_functions import _read_pyarrow
60
+ from sgis import get_common_crs
59
61
  from sgis.io.dapla_functions import _get_bounds_parquet
60
62
  from sgis.io.dapla_functions import _get_bounds_parquet_from_open_file
63
+ from sgis.io.dapla_functions import _get_geo_metadata
64
+ from sgis.io.dapla_functions import _read_pyarrow
61
65
  from sgis.maps.wms import WmsLoader
62
66
  from shapely import Geometry
63
67
  from shapely.errors import GEOSException
64
68
  from shapely.geometry import Point
65
- from sgis import get_common_crs
69
+ from shapely.geometry import Polygon
70
+
71
+ try:
72
+ from xarray import DataArray
73
+ from xarray import Dataset
74
+ except ImportError:
75
+
76
+ class Dataset:
77
+ """Placeholder."""
78
+
79
+ class DataArray:
80
+ """Placeholder."""
81
+
66
82
 
67
83
  from .file_browser import FileBrowser
68
84
  from .fs import LocalFileSystem
85
+ from .nc import GeoTIFFConfig
86
+ from .nc import NetCDFConfig
87
+ from .nc import _run_code_block
88
+ from .utils import _PROFILE_DICT
89
+ from .utils import DEBUG
69
90
  from .utils import _clicked_button_style
70
91
  from .utils import _standardize_path
71
92
  from .utils import _unclicked_button_style
93
+ from .utils import debug_print
72
94
  from .utils import get_button_with_tooltip
73
- from .utils import time_method_call
95
+ from .utils import get_xarray_bounds
74
96
  from .utils import time_function_call
97
+ from .utils import time_method_call
75
98
 
76
99
  OFFWHITE: str = "#ebebeb"
77
100
  FILE_CHECKED_COLOR: str = "#3e82ff"
@@ -91,45 +114,6 @@ HIDDEN_ADDED_COLUMNS = {
91
114
  ADDED_COLUMNS = HIDDEN_ADDED_COLUMNS | {"area"}
92
115
  ns = Namespace("onEachFeatureToggleHighlight", "default")
93
116
 
94
- DEBUG: bool = False
95
-
96
- _PROFILE_DICT = {}
97
-
98
- if DEBUG:
99
-
100
- def debug_print(*args):
101
- print(
102
- *(
103
- f"{type(arg).__name__}: {arg}" if isinstance(arg, Exception) else arg
104
- for arg in args
105
- )
106
- )
107
-
108
- else:
109
-
110
- def debug_print(*args):
111
- pass
112
-
113
- def time_method_call(_) -> Callable:
114
- def decorator(method):
115
- @wraps(method)
116
- def wrapper(self, *args, **kwargs):
117
- return method(self, *args, **kwargs)
118
-
119
- return wrapper
120
-
121
- return decorator
122
-
123
- def time_function_call(_):
124
- def decorator(func):
125
- @wraps(func)
126
- def wrapper(*args, **kwargs):
127
- return func(*args, **kwargs)
128
-
129
- return wrapper
130
-
131
- return decorator
132
-
133
117
 
134
118
  def _get_default_sql_query(df: pl.LazyFrame | pl.DataFrame, columns: list[str]) -> str:
135
119
  if isinstance(df, pl.LazyFrame):
@@ -190,9 +174,27 @@ def _get_sql_query_with_col(
190
174
 
191
175
  @time_function_call(_PROFILE_DICT)
192
176
  def read_file(
193
- path: str, file_system: AbstractFileSystem, **kwargs
177
+ i: int, path: str, file_system: AbstractFileSystem, **kwargs
194
178
  ) -> tuple[pl.LazyFrame, dict[str, pl.DataType]]:
195
179
 
180
+ if is_raster_file(path):
181
+ import xarray as xr
182
+
183
+ if path.endswith(".ncml"):
184
+ # need to sleep some seconds when multiple ncml files from url
185
+ time.sleep(i * 5)
186
+
187
+ try:
188
+ ds = xr.open_dataarray(path)
189
+ except Exception:
190
+ ds = xr.open_dataset(path)
191
+ try:
192
+ ds = ds.sortby("time")
193
+ except Exception:
194
+ pass
195
+
196
+ return ds, {}
197
+
196
198
  if not path.endswith(".parquet") and FILE_SPLITTER_TXT not in path:
197
199
  try:
198
200
  df = gpd.read_file(path, filesystem=file_system, **kwargs)
@@ -335,25 +337,6 @@ def _get_colorpicker_container(color_dict: dict[str, str]) -> html.Div:
335
337
  dbc.Label([column_value]),
336
338
  width="auto",
337
339
  ),
338
- dbc.Col(
339
- get_button_with_tooltip(
340
- "❌",
341
- id={
342
- "type": "delete-cat-btn",
343
- "index": column_value,
344
- },
345
- n_clicks=0,
346
- style={
347
- "color": "red",
348
- "border": "none",
349
- "background": "none",
350
- "cursor": "pointer",
351
- "marginLeft": "auto",
352
- },
353
- tooltip_text="Remove all data in this category",
354
- ),
355
- width="auto",
356
- ),
357
340
  ],
358
341
  style={
359
342
  "display": "flex",
@@ -379,7 +362,7 @@ def _add_data_one_path(
379
362
  concatted_data,
380
363
  nan_color,
381
364
  nan_label,
382
- alpha,
365
+ opacity,
383
366
  n_rows_per_path,
384
367
  columns: dict[str, set[str]],
385
368
  current_columns: set[str],
@@ -389,7 +372,7 @@ def _add_data_one_path(
389
372
  for key, cols in columns.items()
390
373
  for col in cols
391
374
  if path in key and col in current_columns
392
- } | {"split_index"}
375
+ } | {"split_index", "_color"}
393
376
 
394
377
  df = concatted_data.filter(
395
378
  (pl.col("__file_path") == path)
@@ -412,7 +395,7 @@ def _add_data_one_path(
412
395
  if column is not None and column in columns:
413
396
  df = _fix_colors(df, column, bins, is_numeric, color_dict, nan_color, nan_label)
414
397
 
415
- if column and column not in columns:
398
+ if column and column not in columns and column != "_color":
416
399
  return rows_are_hidden, [
417
400
  _get_leaflet_overlay(
418
401
  data=_cheap_geo_interface(df.collect()),
@@ -420,8 +403,8 @@ def _add_data_one_path(
420
403
  style={
421
404
  "color": nan_color,
422
405
  "fillColor": nan_color,
423
- "weight": 2,
424
- "fillOpacity": alpha,
406
+ "weight": 1,
407
+ "fillOpacity": opacity,
425
408
  },
426
409
  onEachFeature=ns("yellowIfHighlighted"),
427
410
  pointToLayer=ns("pointToLayerCircle"),
@@ -437,7 +420,7 @@ def _add_data_one_path(
437
420
  path,
438
421
  column,
439
422
  nan_color,
440
- alpha,
423
+ opacity,
441
424
  onEachFeature=ns("yellowIfHighlighted"),
442
425
  pointToLayer=ns("pointToLayerCircle"),
443
426
  hideout=dict(
@@ -458,8 +441,8 @@ def _add_data_one_path(
458
441
  style={
459
442
  "color": color,
460
443
  "fillColor": color,
461
- "weight": 2,
462
- "fillOpacity": alpha,
444
+ "weight": 1,
445
+ "fillOpacity": opacity,
463
446
  },
464
447
  onEachFeature=ns("yellowIfHighlighted"),
465
448
  pointToLayer=ns("pointToLayerCircle"),
@@ -472,6 +455,8 @@ def _add_data_one_path(
472
455
 
473
456
  @time_function_call(_PROFILE_DICT)
474
457
  def _fix_colors(df, column, bins, is_numeric, color_dict, nan_color, nan_label):
458
+ if column == "_color":
459
+ return df
475
460
  if not is_numeric:
476
461
  return df.with_columns(
477
462
  _color=pl.col(column).replace(
@@ -508,7 +493,7 @@ def _fix_colors(df, column, bins, is_numeric, color_dict, nan_color, nan_label):
508
493
  bin_index_expr = pl.when(conditions[0]).then(pl.lit(color_dict[0]))
509
494
  for i, cond in enumerate(conditions[1:], start=1):
510
495
  if i not in color_dict:
511
- raise KeyError(f"{i} not in {color_dict}")
496
+ continue
512
497
  bin_index_expr = bin_index_expr.when(cond).then(pl.lit(color_dict[i]))
513
498
  notnas = notnas.with_columns(bin_index_expr.alias("_color"))
514
499
 
@@ -697,15 +682,18 @@ def _get_unique_id(i: float) -> pl.Expr:
697
682
  def _read_files(explorer, paths: list[str], mask=None, **kwargs) -> None:
698
683
  if not paths:
699
684
  return
700
- bounds_set = set(explorer._bounds_series.index)
685
+ bbox_set = set(explorer._bbox_series.index)
701
686
 
702
687
  paths = [
703
688
  path
704
689
  for path in paths
705
690
  if mask is None
706
691
  or (
707
- path in bounds_set
708
- and shapely.intersects(mask, explorer._bounds_series[path])
692
+ path in bbox_set
693
+ and (
694
+ pd.isna(explorer._bbox_series[path])
695
+ or shapely.intersects(mask, explorer._bbox_series[path])
696
+ )
709
697
  )
710
698
  ]
711
699
  if not paths:
@@ -716,18 +704,33 @@ def _read_files(explorer, paths: list[str], mask=None, **kwargs) -> None:
716
704
  with joblib.Parallel(len(paths), backend=backend) as parallel:
717
705
  more_data = parallel(
718
706
  joblib.delayed(explorer.__class__.read_func)(
719
- path=path, file_system=file_system, **kwargs
707
+ i=i, path=path, file_system=file_system, **kwargs
720
708
  )
721
- for path in paths
709
+ for i, path in enumerate(paths)
722
710
  )
723
- for path, (df, dtypes) in zip(paths, more_data, strict=True):
724
- if df is None:
725
- continue
726
- explorer._loaded_data[path] = df.with_columns(
727
- _unique_id=_get_unique_id(explorer._max_unique_id_int)
728
- )
729
- explorer._dtypes[path] = dtypes | {"area": pl.Float64()}
730
- explorer._max_unique_id_int += 1
711
+ for selected_path in explorer.selected_files:
712
+ for path, (df, dtypes) in zip(paths, more_data, strict=True):
713
+ if selected_path not in path or df is None:
714
+ continue
715
+ if isinstance(df, (Dataset | DataArray)):
716
+ explorer._loaded_data[path] = df
717
+ img_bbox = GeoSeries(
718
+ [shapely.box(*get_xarray_bounds(df))],
719
+ crs=explorer._nc[selected_path].get_crs(df, path),
720
+ ).to_crs(4326)
721
+ explorer._bbox_series = pd.concat(
722
+ [
723
+ GeoSeries({path: next(iter(img_bbox))}),
724
+ explorer._bbox_series[lambda x: x.index != path],
725
+ ]
726
+ )
727
+
728
+ continue
729
+ explorer._loaded_data[path] = df.with_columns(
730
+ _unique_id=_get_unique_id(explorer._max_unique_id_int)
731
+ ).drop("id", strict=False)
732
+ explorer._dtypes[path] = dtypes | {"area": pl.Float64()}
733
+ explorer._max_unique_id_int += 1
731
734
 
732
735
 
733
736
  def _random_color(min_diff: int = 50) -> str:
@@ -753,7 +756,7 @@ def _get_stem_from_parent(path):
753
756
  return f"{parent_name}/{name}"
754
757
 
755
758
 
756
- def _try_to_get_bounds_else_none(
759
+ def _try_to_get_bbox_else_none(
757
760
  path, file_system
758
761
  ) -> tuple[tuple[float] | None, str | None]:
759
762
  try:
@@ -765,15 +768,15 @@ def _try_to_get_bounds_else_none(
765
768
  return None, None
766
769
 
767
770
 
768
- def _get_bounds_series_as_4326(paths, file_system):
769
- # bounds_series = sg.get_bounds_series(paths, file_system=file_system)
770
- # return bounds_series.to_crs(4326)
771
+ def _get_bbox_series_as_4326(paths, file_system):
772
+ # bbox_series = sg.get_bbox_series(paths, file_system=file_system)
773
+ # return bbox_series.to_crs(4326)
771
774
 
772
- func = partial(_try_to_get_bounds_else_none, file_system=file_system)
775
+ func = partial(_try_to_get_bbox_else_none, file_system=file_system)
773
776
  with ThreadPoolExecutor() as executor:
774
- bounds_and_crs = list(executor.map(func, paths))
777
+ bbox_and_crs = list(executor.map(func, paths))
775
778
 
776
- crss = {json.dumps(x[1]) for x in bounds_and_crs}
779
+ crss = {json.dumps(x[1]) for x in bbox_and_crs}
777
780
  crss = {
778
781
  crs
779
782
  for crs in crss
@@ -785,7 +788,7 @@ def _get_bounds_series_as_4326(paths, file_system):
785
788
  return GeoSeries(
786
789
  [
787
790
  shapely.box(*bbox[0]) if bbox[0] is not None else None
788
- for bbox in bounds_and_crs
791
+ for bbox in bbox_and_crs
789
792
  ],
790
793
  index=paths,
791
794
  crs=crs,
@@ -1016,9 +1019,11 @@ def _is_likely_geopandas_func(df, txt: Any):
1016
1019
  return any(x in txt and len(x) > 2 and x not in cols for x in geopandas_methods)
1017
1020
 
1018
1021
 
1019
- def _unformat_query(query: str) -> str:
1022
+ def _unformat_query(query: str | None) -> str:
1020
1023
  """Remove newlines and multiple whitespaces from SQL query."""
1021
- query = query.replace("\n", " ").strip()
1024
+ if query is None:
1025
+ return None
1026
+ query = query.replace("\n", "; ").strip().strip(";").strip()
1022
1027
  while " " in query:
1023
1028
  query = query.replace(" ", " ")
1024
1029
  return query
@@ -1058,7 +1063,7 @@ def _get_leaflet_overlay(data, path, **kwargs):
1058
1063
 
1059
1064
 
1060
1065
  @time_function_call(_PROFILE_DICT)
1061
- def _get_multiple_leaflet_overlay(df, path, column, nan_color, alpha, **kwargs):
1066
+ def _get_multiple_leaflet_overlay(df, path, column, nan_color, opacity, **kwargs):
1062
1067
  values = df.select("_color").unique().collect()["_color"]
1063
1068
  return dl.Overlay(
1064
1069
  dl.LayerGroup(
@@ -1070,8 +1075,8 @@ def _get_multiple_leaflet_overlay(df, path, column, nan_color, alpha, **kwargs):
1070
1075
  style={
1071
1076
  "color": color_,
1072
1077
  "fillColor": color_,
1073
- "weight": 2,
1074
- "fillOpacity": alpha,
1078
+ "weight": 1,
1079
+ "fillOpacity": opacity,
1075
1080
  },
1076
1081
  id={
1077
1082
  "type": "geojson",
@@ -1092,8 +1097,8 @@ def _get_multiple_leaflet_overlay(df, path, column, nan_color, alpha, **kwargs):
1092
1097
  style={
1093
1098
  "color": nan_color,
1094
1099
  "fillColor": nan_color,
1095
- "weight": 2,
1096
- "fillOpacity": alpha,
1100
+ "weight": 1,
1101
+ "fillOpacity": opacity,
1097
1102
  },
1098
1103
  id={
1099
1104
  "type": "geojson",
@@ -1149,18 +1154,18 @@ class GeoExplorer:
1149
1154
  max_rows: Max number of rows to sample per dataset if number of feature in bounds excedes.
1150
1155
  Note that rendering more than the default (10,000) might crash the server, especially for
1151
1156
  polygon features.
1152
- selected_features: list of indices of features (rows) to show in attribute table
1153
- at init. Fetch this list with the "Export as code" button.
1157
+ # selected_features: list of indices of features (rows) to show in attribute table
1158
+ # at init. Fetch this list with the "Export as code" button.
1154
1159
  hard_click: If True, clicking on a geometry triggers all overlapping geometries to be marked.
1155
1160
  splitted: If True, all rows will have a separate label and color.
1156
- alpha: Opacity/transparency of the geometries.
1161
+ opacity: Opacity/transparency of the geometries.
1157
1162
  nan_color: Color for missing values. Defaults to a shade of gray.
1158
1163
  nan_label: Defaults to "Missing".
1159
1164
  max_read_size_per_callback: Defaults to 1e9 bytes (1 GB). Meaning max 1 GB is read at once, then the read
1160
1165
  function is cycled until all data is read. This is because long callbacks time out.
1161
1166
  **kwargs: Additional keyword arguments passed to dash_leaflet.Map: https://www.dash-leaflet.com/components/map_container.
1162
1167
 
1163
- A "clean" GeoExplorer can be initialized like this:
1168
+ An empty GeoExplorer can be initialized like this:
1164
1169
 
1165
1170
  >>> from geo_explorer import GeoExplorer
1166
1171
  >>> from geo_explorer import LocalFileSystem
@@ -1251,7 +1256,9 @@ class GeoExplorer:
1251
1256
  favorites: list[str] | None = None,
1252
1257
  port: int = 8050,
1253
1258
  file_system: AbstractFileSystem | None = None,
1254
- data: dict[str, str | GeoDataFrame] | list[str | dict] | None = None,
1259
+ data: (
1260
+ dict[str, str | GeoDataFrame | NetCDFConfig] | list[str | dict] | None
1261
+ ) = None,
1255
1262
  column: str | None = None,
1256
1263
  color_dict: dict | None = None,
1257
1264
  center: tuple[float, float] | None = None,
@@ -1259,10 +1266,10 @@ class GeoExplorer:
1259
1266
  wms: dict[str, WmsLoader] | None = None,
1260
1267
  wms_layers_checked: dict[str, list[str]] | None = None,
1261
1268
  max_rows: int = 10_000,
1262
- selected_features: list[str] | None = None,
1269
+ # selected_features: list[str] | None = None,
1263
1270
  hard_click: bool = False,
1264
1271
  splitted: bool = False,
1265
- alpha: float = 0.6,
1272
+ opacity: float = 0.6,
1266
1273
  nan_color: str = "#969696",
1267
1274
  nan_label: str = "Missing",
1268
1275
  max_read_size_per_callback: int = 1e9,
@@ -1295,16 +1302,16 @@ class GeoExplorer:
1295
1302
  self.splitted = splitted
1296
1303
  self.hard_click = hard_click
1297
1304
  self.max_rows = max_rows
1298
- self.alpha = alpha
1299
- self._bounds_series = GeoSeries()
1305
+ self.opacity = opacity
1306
+ self._bbox_series = GeoSeries()
1300
1307
  self.selected_files: dict[str, int] = {}
1301
1308
  self._loaded_data: dict[str, pl.LazyFrame] = {}
1309
+ self._images: dict[str, Polygon] = {}
1302
1310
  self._dtypes: dict[str, dict[str, pl.DataType]] = {}
1303
1311
  self._max_unique_id_int: int = 0
1304
1312
  self._loaded_data_sizes: dict[str, int] = {}
1305
1313
  self._concatted_data: pl.DataFrame | None = None
1306
- self._deleted_categories = set()
1307
- self.selected_features = {}
1314
+ self._selected_features = {}
1308
1315
  self._file_browser = FileBrowser(
1309
1316
  start_dir, file_system=file_system, favorites=favorites
1310
1317
  )
@@ -1312,6 +1319,8 @@ class GeoExplorer:
1312
1319
  self.max_read_size_per_callback = max_read_size_per_callback
1313
1320
  self._force_categorical = False
1314
1321
  self._is_recursing = False
1322
+ self._nc = {}
1323
+ self._px_plot_conf = {}
1315
1324
 
1316
1325
  if is_jupyter():
1317
1326
  service_prefix = os.environ["JUPYTERHUB_SERVICE_PREFIX"].strip("/")
@@ -1344,7 +1353,7 @@ class GeoExplorer:
1344
1353
  return dbc.Container(
1345
1354
  [
1346
1355
  dcc.Location(id="url"),
1347
- dbc.Row(html.Div(id="alert")),
1356
+ dbc.Row(html.Div(id="add-data-alert")),
1348
1357
  dbc.Row(html.Div(id="alert3")),
1349
1358
  dbc.Row(html.Div(id="alert4")),
1350
1359
  dbc.Row(html.Div(id="new-file-added")),
@@ -1403,7 +1412,7 @@ class GeoExplorer:
1403
1412
  tooltip_text="Get code to reproduce current view",
1404
1413
  ),
1405
1414
  dcc.Dropdown(
1406
- value=self.alpha,
1415
+ value=self.opacity,
1407
1416
  options=[
1408
1417
  {
1409
1418
  "label": f"opacity={round(x, 1)}",
@@ -1415,7 +1424,7 @@ class GeoExplorer:
1415
1424
  0.1, 1.1, 0.1
1416
1425
  )
1417
1426
  ],
1418
- id="alpha",
1427
+ id="opacity",
1419
1428
  clearable=False,
1420
1429
  ),
1421
1430
  dbc.Modal(
@@ -1523,6 +1532,36 @@ class GeoExplorer:
1523
1532
  id="numeric-options",
1524
1533
  style={"display": "none"},
1525
1534
  ),
1535
+ dbc.Row(
1536
+ [
1537
+ dbc.Col(
1538
+ [
1539
+ dbc.Row(
1540
+ [
1541
+ dbc.Col(width=10),
1542
+ dbc.Col(
1543
+ get_button_with_tooltip(
1544
+ "✖",
1545
+ id="close-image-btn",
1546
+ tooltip_text="Remove plot",
1547
+ style={
1548
+ "display": "none"
1549
+ },
1550
+ ),
1551
+ width=2,
1552
+ ),
1553
+ ],
1554
+ ),
1555
+ dbc.Row(
1556
+ html.Div(
1557
+ id="image-plot",
1558
+ style={"display": "none"},
1559
+ )
1560
+ ),
1561
+ ]
1562
+ ),
1563
+ ]
1564
+ ),
1526
1565
  dbc.Row(
1527
1566
  [
1528
1567
  dbc.Row(
@@ -1594,14 +1633,6 @@ class GeoExplorer:
1594
1633
  "margin-right": "0px",
1595
1634
  },
1596
1635
  ),
1597
- dbc.Row(
1598
- get_button_with_tooltip(
1599
- "Reload categories",
1600
- id="reload-categories",
1601
- n_clicks=0,
1602
- tooltip_text="Get back categories that have been X-ed out",
1603
- ),
1604
- ),
1605
1636
  dbc.Row(
1606
1637
  id="colorpicker-container",
1607
1638
  ),
@@ -1674,7 +1705,7 @@ class GeoExplorer:
1674
1705
  )
1675
1706
 
1676
1707
  error_mess = "'data' must be a list of file paths or a dict of GeoDataFrames."
1677
- bounds_series_dict = {}
1708
+ bbox_series_dict = {}
1678
1709
  if isinstance(data, dict):
1679
1710
  data = [data]
1680
1711
 
@@ -1687,6 +1718,12 @@ class GeoExplorer:
1687
1718
  raise ValueError(error_mess)
1688
1719
  for key, value in x.items():
1689
1720
  key = _standardize_path(key)
1721
+ if isinstance(value, NetCDFConfig):
1722
+ # setting nc files as unchecked because they might be very large
1723
+ self.selected_files[key] = False
1724
+ self._queries[key] = value.code_block
1725
+ self._nc[key] = value
1726
+ continue
1690
1727
  if value is not None and not isinstance(value, (GeoDataFrame | str)):
1691
1728
  raise ValueError(error_mess)
1692
1729
  elif not isinstance(value, GeoDataFrame):
@@ -1694,7 +1731,7 @@ class GeoExplorer:
1694
1731
  self._queries[key] = value
1695
1732
  continue
1696
1733
  value, dtypes = _geopandas_to_polars(value, key)
1697
- bounds_series_dict[key] = shapely.box(
1734
+ bbox_series_dict[key] = shapely.box(
1698
1735
  float(value["minx"].min()),
1699
1736
  float(value["miny"].min()),
1700
1737
  float(value["maxx"].max()),
@@ -1705,11 +1742,11 @@ class GeoExplorer:
1705
1742
  self.selected_files[key] = True
1706
1743
 
1707
1744
  self.selected_files = dict(reversed(self.selected_files.items()))
1708
- self._bounds_series = GeoSeries(bounds_series_dict)
1745
+ self._bbox_series = GeoSeries(bbox_series_dict)
1709
1746
 
1710
1747
  # storing bounds here before file paths are loaded. To avoid setting center as the entire map bounds if large data
1711
- if len(self._bounds_series):
1712
- minx, miny, maxx, maxy = self._bounds_series.total_bounds
1748
+ if len(self._bbox_series):
1749
+ minx, miny, maxx, maxy = self._bbox_series.total_bounds
1713
1750
  else:
1714
1751
  minx, miny, maxx, maxy = None, None, None, None
1715
1752
 
@@ -1720,7 +1757,7 @@ class GeoExplorer:
1720
1757
  self._register_callbacks()
1721
1758
  return
1722
1759
 
1723
- self._append_to_bounds_series(
1760
+ self._append_to_bbox_series(
1724
1761
  [x for x in self.selected_files if x not in self._loaded_data]
1725
1762
  )
1726
1763
 
@@ -1743,16 +1780,18 @@ class GeoExplorer:
1743
1780
  if key not in self._loaded_data:
1744
1781
  continue
1745
1782
  df = self._loaded_data[key]
1783
+ self._max_unique_id_int += 1
1784
+ if isinstance(df, (Dataset | DataArray)):
1785
+ continue
1746
1786
  loaded_data_sorted[key] = df.with_columns(
1747
1787
  _unique_id=_get_unique_id(self._max_unique_id_int)
1748
- )
1749
- self._max_unique_id_int += 1
1788
+ ).drop("id", errors="ignore")
1750
1789
  else:
1751
1790
  x = _standardize_path(x)
1752
1791
  df = self._loaded_data[x]
1753
1792
  loaded_data_sorted[x] = df.with_columns(
1754
1793
  _unique_id=_get_unique_id(self._max_unique_id_int)
1755
- )
1794
+ ).drop("id", errors="ignore")
1756
1795
  self._max_unique_id_int += 1
1757
1796
 
1758
1797
  self._loaded_data = loaded_data_sorted
@@ -1773,11 +1812,11 @@ class GeoExplorer:
1773
1812
 
1774
1813
  self.app.layout = get_layout
1775
1814
 
1776
- for unique_id in selected_features if selected_features is not None else []:
1777
- i = int(float(unique_id))
1778
- path = list(self._loaded_data)[i]
1779
- properties, _ = self._get_selected_feature(unique_id, path, bounds=None)
1780
- self.selected_features[unique_id] = properties
1815
+ # for unique_id in selected_features if selected_features is not None else []:
1816
+ # i = int(float(unique_id))
1817
+ # path = list(self._loaded_data)[i]
1818
+ # properties, _ = self._get_selected_feature(unique_id, path, bounds=None)
1819
+ # self._selected_features[unique_id] = properties
1781
1820
 
1782
1821
  self._register_callbacks()
1783
1822
 
@@ -1810,7 +1849,7 @@ class GeoExplorer:
1810
1849
  os.kill(os.getpid(), signal.SIGTERM)
1811
1850
  finally:
1812
1851
  print("\nExiting with configs:")
1813
- print(self._get_self_as_string_without_defaults())
1852
+ print(self._get_self_as_string_except_defaults())
1814
1853
 
1815
1854
  def _register_callbacks(self) -> None:
1816
1855
 
@@ -1842,12 +1881,12 @@ class GeoExplorer:
1842
1881
  ):
1843
1882
  print(k, v)
1844
1883
 
1845
- txt = self._get_self_as_string_without_defaults()
1884
+ txt = self._get_self_as_string_except_defaults()
1846
1885
  return html.Div(f"{txt}.run()"), True
1847
1886
 
1848
1887
  @callback(
1849
1888
  Output("buffer-tip", "children"),
1850
- Input("alert", "children"),
1889
+ Input("add-data-alert", "children"),
1851
1890
  State("map", "zoom"),
1852
1891
  )
1853
1892
  def maybe_tip_about_buffer(_, zoom):
@@ -1916,7 +1955,7 @@ class GeoExplorer:
1916
1955
  if selected_path in self.selected_files or not n_clicks:
1917
1956
  return dash.no_update
1918
1957
  try:
1919
- self._append_to_bounds_series([selected_path])
1958
+ self._append_to_bbox_series([selected_path])
1920
1959
  except Exception as e:
1921
1960
  if DEBUG:
1922
1961
  raise e
@@ -1926,6 +1965,20 @@ class GeoExplorer:
1926
1965
  dismissable=True,
1927
1966
  )
1928
1967
  self.selected_files[selected_path] = True
1968
+ if is_netcdf(selected_path) or all(
1969
+ is_netcdf(x)
1970
+ for x in self._bbox_series[
1971
+ self._bbox_series.index.str.contains(selected_path)
1972
+ ].index
1973
+ ):
1974
+ self._nc[selected_path] = NetCDFConfig()
1975
+ elif is_raster_file(selected_path) or all(
1976
+ is_raster_file(x)
1977
+ for x in self._bbox_series[
1978
+ self._bbox_series.index.str.contains(selected_path)
1979
+ ].index
1980
+ ):
1981
+ self._nc[selected_path] = GeoTIFFConfig()
1929
1982
  return None
1930
1983
 
1931
1984
  @callback(
@@ -1948,6 +2001,8 @@ class GeoExplorer:
1948
2001
  checked_clicks,
1949
2002
  checked_ids,
1950
2003
  ):
2004
+ if not len(self._bbox_series):
2005
+ return dash.no_update, dash.no_update, dash.no_update
1951
2006
  t = perf_counter()
1952
2007
 
1953
2008
  triggered = dash.callback_context.triggered_id
@@ -1964,9 +2019,9 @@ class GeoExplorer:
1964
2019
 
1965
2020
  if triggered != "missing":
1966
2021
  box = shapely.box(*self._nested_bounds_to_bounds(bounds))
1967
- files_in_bounds = set(sg.sfilter(self._bounds_series, box).index)
2022
+ files_in_bounds = set(sg.sfilter(self._bbox_series, box).index)
1968
2023
  non_geodata = set(
1969
- self._bounds_series[lambda x: (x.isna()) | (x.is_empty)].index
2024
+ self._bbox_series[lambda x: (x.isna()) | (x.is_empty)].index
1970
2025
  )
1971
2026
  files_in_bounds |= non_geodata
1972
2027
 
@@ -1990,6 +2045,26 @@ class GeoExplorer:
1990
2045
  disabled = True
1991
2046
  return new_data_read, missing, disabled
1992
2047
 
2048
+ for selected_path in self.selected_files:
2049
+ for path in list(missing):
2050
+ if selected_path not in path or not any(
2051
+ path.lower().endswith(txt) for txt in [".tif", ".tiff"]
2052
+ ):
2053
+ continue
2054
+ self._bbox_series = pd.concat(
2055
+ [
2056
+ GeoSeries(
2057
+ {
2058
+ path: shapely.box(
2059
+ *self._nc[selected_path].get_bounds(None, path)
2060
+ )
2061
+ }
2062
+ ),
2063
+ self._bbox_series[lambda x: x.index != path],
2064
+ ]
2065
+ )
2066
+ missing.pop(missing.index(path))
2067
+
1993
2068
  if len(missing) > 10:
1994
2069
  to_read = 0
1995
2070
  cumsum = 0
@@ -2035,7 +2110,6 @@ class GeoExplorer:
2035
2110
  self.splitted = not self.splitted
2036
2111
  self.column = None if not self.splitted else self.column
2037
2112
  if self.splitted:
2038
- self._deleted_categories = set()
2039
2113
  return self.splitted, "split_index"
2040
2114
  return self.splitted, self.column
2041
2115
 
@@ -2401,30 +2475,6 @@ class GeoExplorer:
2401
2475
  True,
2402
2476
  )
2403
2477
 
2404
- @callback(
2405
- Output("file-deleted", "children", allow_duplicate=True),
2406
- Output("alert3", "children", allow_duplicate=True),
2407
- Output("update-table", "data", allow_duplicate=True),
2408
- Output("color-container", "children", allow_duplicate=True),
2409
- Input({"type": "delete-cat-btn", "index": dash.ALL}, "n_clicks"),
2410
- State({"type": "delete-cat-btn", "index": dash.ALL}, "id"),
2411
- prevent_initial_call=True,
2412
- )
2413
- @time_method_call(_PROFILE_DICT)
2414
- def delete_category(n_clicks_list, delete_ids):
2415
- path_to_delete = get_index_if_clicks(n_clicks_list, delete_ids)
2416
- if path_to_delete is None:
2417
- return dash.no_update, dash.no_update, dash.no_update, dash.no_update
2418
- if not self.column:
2419
- return (
2420
- *self._delete_file(n_clicks_list, delete_ids, delete_category=True),
2421
- True,
2422
- dash.no_update,
2423
- )
2424
- else:
2425
- self._deleted_categories.add(path_to_delete)
2426
- return None, None, True, dash.no_update
2427
-
2428
2478
  @callback(
2429
2479
  Output({"type": "query-view", "index": dash.MATCH}, "children"),
2430
2480
  Output({"type": "query-view", "index": dash.MATCH}, "is_open"),
@@ -2602,17 +2652,6 @@ class GeoExplorer:
2602
2652
  True,
2603
2653
  )
2604
2654
 
2605
- @callback(
2606
- Output("file-deleted", "children", allow_duplicate=True),
2607
- Input("reload-categories", "n_clicks"),
2608
- prevent_initial_call=True,
2609
- )
2610
- def reload_categories(n_clicks):
2611
- if not n_clicks:
2612
- return dash.no_update
2613
- self._deleted_categories = set()
2614
- return None
2615
-
2616
2655
  @callback(
2617
2656
  Output("splitter", "style"),
2618
2657
  Input("is_splitted", "data"),
@@ -2623,7 +2662,6 @@ class GeoExplorer:
2623
2662
  if column is None:
2624
2663
  self.column = None
2625
2664
  self.splitted = False
2626
- self._deleted_categories = set()
2627
2665
  if self.splitted and column == "split_index":
2628
2666
  return _clicked_button_style()
2629
2667
  else:
@@ -2788,11 +2826,9 @@ class GeoExplorer:
2788
2826
  if not self.selected_files:
2789
2827
  self.column = None
2790
2828
  self.color_dict = {}
2791
- self._deleted_categories = set()
2792
2829
  return html.Div(), None, False, None, 1
2793
2830
  elif column != self.column or triggered in ["force-categorical"]:
2794
2831
  self.color_dict = {}
2795
- self._deleted_categories = set()
2796
2832
  elif not column and triggered is None:
2797
2833
  column = self.column
2798
2834
  elif self._concatted_data is None:
@@ -2885,29 +2921,9 @@ class GeoExplorer:
2885
2921
  )
2886
2922
 
2887
2923
  if is_numeric and len(values_no_nans):
2888
- if len(values_no_nans_unique) <= k:
2889
- bins = list(values_no_nans_unique)
2890
- else:
2891
- bins = jenks_breaks(values_no_nans.to_numpy(), n_classes=k)
2892
-
2893
- cmap_ = matplotlib.colormaps.get_cmap(cmap)
2894
- colors_ = [
2895
- matplotlib.colors.to_hex(cmap_(int(i)))
2896
- for i in np.linspace(0, 255, num=k + 1)
2897
- ]
2898
- rounded_bins = [round(x, 1) for x in bins]
2899
- color_dict = {
2900
- f"{round(min(values_no_nans), 1)} - {rounded_bins[0]}": colors_[0],
2901
- **{
2902
- f"{start} - {stop}": colors_[i + 1]
2903
- for i, (start, stop) in enumerate(
2904
- itertools.pairwise(rounded_bins[1:])
2905
- )
2906
- },
2907
- f"{rounded_bins[-1]} - {round(max(values_no_nans), 1)}": colors_[
2908
- -1
2909
- ],
2910
- }
2924
+ color_dict, bins = get_numeric_colors(
2925
+ values_no_nans_unique, values_no_nans, cmap, k
2926
+ )
2911
2927
  else:
2912
2928
  new_values = [
2913
2929
  value
@@ -2961,7 +2977,7 @@ class GeoExplorer:
2961
2977
 
2962
2978
  @callback(
2963
2979
  Output("loading", "children", allow_duplicate=True),
2964
- Input("alert", "children"),
2980
+ Input("add-data-alert", "children"),
2965
2981
  prevent_initial_call=True,
2966
2982
  )
2967
2983
  def update_loading(_):
@@ -2971,7 +2987,7 @@ class GeoExplorer:
2971
2987
 
2972
2988
  @callback(
2973
2989
  Output("lc", "children"),
2974
- Output("alert", "children"),
2990
+ Output("add-data-alert", "children"),
2975
2991
  Output("max_rows", "children"),
2976
2992
  Output({"type": "wms-list", "index": dash.ALL}, "children"),
2977
2993
  Input("colors-are-updated", "data"),
@@ -2981,14 +2997,14 @@ class GeoExplorer:
2981
2997
  Input("wms-added", "data"),
2982
2998
  Input("max_rows_value", "value"),
2983
2999
  Input("file-control-panel", "children"),
2984
- Input("alpha", "value"),
3000
+ Input("opacity", "value"),
2985
3001
  Input({"type": "checked-btn", "index": dash.ALL}, "style"),
2986
3002
  Input(
2987
3003
  {"type": "checked-btn-wms", "wms_name": dash.ALL, "tile": dash.ALL},
2988
3004
  "style",
2989
3005
  ),
2990
3006
  State("debounced_bounds", "value"),
2991
- State("column-dropdown", "value"),
3007
+ # State("column-dropdown", "value"),
2992
3008
  State("bins", "data"),
2993
3009
  )
2994
3010
  @time_method_call(_PROFILE_DICT)
@@ -3002,21 +3018,22 @@ class GeoExplorer:
3002
3018
  max_rows_value,
3003
3019
  # data_was_changed,
3004
3020
  order_was_changed,
3005
- alpha,
3021
+ opacity,
3006
3022
  checked_clicks,
3007
3023
  checked_wms_clicks,
3008
3024
  bounds,
3009
- column,
3025
+ # column,
3010
3026
  bins,
3011
3027
  ):
3012
3028
  triggered = dash.callback_context.triggered_id
3013
- debug_print(
3029
+ print(
3014
3030
  "\nadd_data",
3015
3031
  dash.callback_context.triggered_id,
3016
3032
  len(self._loaded_data),
3017
3033
  f"{self.column=}",
3018
3034
  )
3019
3035
  t = perf_counter()
3036
+ alerts = []
3020
3037
 
3021
3038
  if max_rows_value is not None:
3022
3039
  self.max_rows = max_rows_value
@@ -3047,11 +3064,11 @@ class GeoExplorer:
3047
3064
  concatted_data=self._concatted_data,
3048
3065
  nan_color=self.nan_color,
3049
3066
  nan_label=self.nan_label,
3050
- column=column,
3067
+ column=self.column,
3051
3068
  is_numeric=is_numeric,
3052
3069
  color_dict=color_dict,
3053
3070
  bins=bins,
3054
- alpha=alpha,
3071
+ opacity=opacity,
3055
3072
  n_rows_per_path=n_rows_per_path,
3056
3073
  columns=self._columns,
3057
3074
  current_columns=current_columns,
@@ -3071,19 +3088,101 @@ class GeoExplorer:
3071
3088
  else:
3072
3089
  max_rows_component = _get_max_rows_displayed_component(self.max_rows)
3073
3090
 
3091
+ bbox = shapely.box(*bounds)
3092
+ images = {}
3093
+ self._current_xarrays = {}
3094
+ for selected_path, is_checked in self.selected_files.items():
3095
+ if not is_checked:
3096
+ continue
3097
+ for img_path in set(self._loaded_data_sizes).union(
3098
+ set(self._loaded_data)
3099
+ ):
3100
+ if selected_path not in img_path or not is_raster_file(img_path):
3101
+ continue
3102
+ if img_path not in set(self._bbox_series.index) or pd.notna(
3103
+ self._bbox_series.loc[img_path]
3104
+ ):
3105
+ _read_files(self, [img_path], mask=bbox)
3106
+ img_bbox = self._bbox_series.loc[img_path]
3107
+ clipped_bounds = img_bbox.intersection(bbox)
3108
+ if clipped_bounds.is_empty:
3109
+ continue
3110
+ try:
3111
+ ds = self._open_img_path_as_xarray(
3112
+ img_path, selected_path, clipped_bounds
3113
+ )
3114
+ except Exception as e:
3115
+ traceback.print_exc()
3116
+ alerts.append(
3117
+ dbc.Alert(
3118
+ f"{type(e).__name__}: {e}. (Traceback printed in terminal)",
3119
+ color="warning",
3120
+ )
3121
+ )
3122
+ continue
3123
+ if ds is None:
3124
+ continue
3125
+ self._current_xarrays[img_path] = ds
3126
+ arr = self._nc[selected_path].to_numpy(ds)
3127
+ arr = fix_numpy_img_shape(arr)
3128
+ if np.isnan(arr).any() and not np.all(np.isnan(arr)):
3129
+ arr[np.isnan(arr)] = np.min(arr[~np.isnan(arr)])
3130
+
3131
+ images[img_path] = (arr, clipped_bounds)
3132
+
3133
+ if images:
3134
+ # make sure all single-band images are normalized by same extremities
3135
+ vmin = np.min([np.min(x[0]) for x in images.values()])
3136
+ vmax = np.min([np.max(x[0]) for x in images.values()])
3137
+
3138
+ image_overlays = []
3139
+ for img_path, (arr, bounds) in images.items():
3140
+ # skip normalization for rgb images
3141
+ if len(arr.shape) == 2:
3142
+ arr = (arr - vmin) / (vmax - vmin)
3143
+
3144
+ minx, miny, maxx, maxy = bounds.bounds
3145
+
3146
+ # hack: using folium because dash_leaflet doesn't accept np.array
3147
+ image_overlay = folium.raster_layers.ImageOverlay(
3148
+ arr,
3149
+ bounds=[[miny, minx], [maxy, maxx]],
3150
+ vmin=vmin,
3151
+ vmax=vmax,
3152
+ )
3153
+ img_name = Path(img_path).stem
3154
+ image_overlay = dl.ImageOverlay(
3155
+ url=image_overlay.url,
3156
+ bounds=[[miny, minx], [maxy, maxx]],
3157
+ opacity=self.opacity,
3158
+ interactive=True,
3159
+ id={"type": "image", "index": img_path},
3160
+ )
3161
+ image_overlay = dl.Overlay(
3162
+ image_overlay,
3163
+ name=img_name,
3164
+ checked=True,
3165
+ )
3166
+ image_overlays.append(image_overlay)
3167
+
3074
3168
  return (
3075
- dl.LayersControl(list(self._base_layers.values()) + wms_layers + data),
3076
- None,
3169
+ dl.LayersControl(
3170
+ list(self._base_layers.values())
3171
+ + wms_layers
3172
+ + data
3173
+ + image_overlays
3174
+ ),
3175
+ alerts,
3077
3176
  max_rows_component,
3078
3177
  all_tiles_lists,
3079
3178
  )
3080
3179
 
3081
3180
  @callback(
3082
- Input("alpha", "value"),
3181
+ Input("opacity", "value"),
3083
3182
  prevent_initial_call=True,
3084
3183
  )
3085
- def update_alpha(alpha):
3086
- self.alpha = alpha
3184
+ def update_opacity(opacity):
3185
+ self.opacity = opacity
3087
3186
 
3088
3187
  @callback(
3089
3188
  Output("clicked-features-title", "children"),
@@ -3108,6 +3207,69 @@ class GeoExplorer:
3108
3207
  " (note that for partitioned files, only partitions in bounds are loaded)",
3109
3208
  )
3110
3209
 
3210
+ @callback(
3211
+ Output("image-plot", "children", allow_duplicate=True),
3212
+ Output("image-plot", "style", allow_duplicate=True),
3213
+ Output("close-image-btn", "style", allow_duplicate=True),
3214
+ Input("add-data-alert", "children"),
3215
+ Input({"type": "image", "index": dash.ALL}, "clickData"),
3216
+ State({"type": "image", "index": dash.ALL}, "n_clicks"),
3217
+ State({"type": "image", "index": dash.ALL}, "id"),
3218
+ State("image-plot", "style"),
3219
+ prevent_initial_call=True,
3220
+ )
3221
+ def display_pixel_plot(data_added, latlng, n_clicks, ids, plot_style):
3222
+ img_path = get_index_if_clicks(n_clicks, ids)
3223
+ query_updated: bool = bool(
3224
+ (not plot_style)
3225
+ and (not latlng or not any(latlng))
3226
+ and self._px_plot_conf
3227
+ )
3228
+ if (not img_path or not latlng or not any(latlng)) and not query_updated:
3229
+ return dash.no_update, dash.no_update, dash.no_update
3230
+ elif query_updated:
3231
+ selected_path = self._px_plot_conf["selected_path"]
3232
+ img_path = self._px_plot_conf["img_path"]
3233
+ point_correct_crs = self._px_plot_conf["point_correct_crs"]
3234
+ else:
3235
+ selected_path = next(
3236
+ iter(x for x in self.selected_files if x in img_path)
3237
+ )
3238
+ i = [x["index"] for x in ids].index(img_path)
3239
+ latlng2 = latlng[i]["latlng"]
3240
+ lat, lng = latlng2["lat"], latlng2["lng"]
3241
+ point = Point(float(lng), float(lat))
3242
+ crs = self._nc[selected_path].get_crs(
3243
+ self._loaded_data[img_path], img_path
3244
+ )
3245
+ point_correct_crs = GeoSeries([point], crs=4326).to_crs(crs).union_all()
3246
+ self._px_plot_conf["selected_path"] = selected_path
3247
+ self._px_plot_conf["img_path"] = img_path
3248
+ self._px_plot_conf["point_correct_crs"] = point_correct_crs
3249
+ x, y = point_correct_crs.x, point_correct_crs.y
3250
+ try:
3251
+ ds = (
3252
+ self._current_xarrays[img_path]
3253
+ .copy()
3254
+ .sel(x=x, y=y, method="nearest")
3255
+ )
3256
+ except KeyError:
3257
+ # dataset is probably unchecked
3258
+ return None, {"display": "none"}, {"display": "none"}
3259
+ return dcc.Graph(figure=pixel_value_scatter(ds)), None, None
3260
+
3261
+ @callback(
3262
+ Output("image-plot", "children", allow_duplicate=True),
3263
+ Output("image-plot", "style", allow_duplicate=True),
3264
+ Output("close-image-btn", "style", allow_duplicate=True),
3265
+ Input("close-image-btn", "n_clicks"),
3266
+ prevent_initial_call=True,
3267
+ )
3268
+ def remove_pixel_plot(n_clicks):
3269
+ if n_clicks:
3270
+ return None, {"display": "none"}, {"display": "none"}
3271
+ return dash.no_update, dash.no_update, dash.no_update
3272
+
3111
3273
  @callback(
3112
3274
  Output("clicked-features", "data"),
3113
3275
  Output("clicked-ids", "data"),
@@ -3135,18 +3297,18 @@ class GeoExplorer:
3135
3297
  triggered = dash.callback_context.triggered_id
3136
3298
  debug_print("display_clicked_feature_attributes", triggered)
3137
3299
  if triggered == "clear-table-clicked":
3138
- self.selected_features = {}
3300
+ self._selected_features = {}
3139
3301
  return [], [], None
3140
3302
  if (
3141
3303
  triggered is None
3142
3304
  or triggered == "update-table"
3143
3305
  or (
3144
- (self.selected_features and not features)
3306
+ (self._selected_features and not features)
3145
3307
  or all(x is None for x in features)
3146
3308
  )
3147
3309
  ):
3148
- clicked_ids = list(self.selected_features)
3149
- clicked_features = list(self.selected_features.values())
3310
+ clicked_ids = list(self._selected_features)
3311
+ clicked_features = list(self._selected_features.values())
3150
3312
  return clicked_features, clicked_ids, None
3151
3313
 
3152
3314
  if not features or all(x is None for x in features):
@@ -3230,7 +3392,7 @@ class GeoExplorer:
3230
3392
  if props["id"] not in clicked_ids:
3231
3393
  clicked_features.append(props)
3232
3394
  clicked_ids = [x["id"] for x in clicked_features]
3233
- self.selected_features = dict(
3395
+ self._selected_features = dict(
3234
3396
  zip(clicked_ids, clicked_features, strict=True)
3235
3397
  )
3236
3398
  return clicked_features, clicked_ids, None
@@ -3299,7 +3461,7 @@ class GeoExplorer:
3299
3461
  self,
3300
3462
  [
3301
3463
  x
3302
- for x in self._bounds_series[
3464
+ for x in self._bbox_series[
3303
3465
  lambda x: x.index.str.contains(clicked_path)
3304
3466
  ].index
3305
3467
  if x not in self._loaded_data
@@ -3705,33 +3867,32 @@ class GeoExplorer:
3705
3867
  parts = Path(path2).parts
3706
3868
  if not all(part in parts for part in Path(path).parts):
3707
3869
  continue
3708
- for idx in list(self.selected_features):
3870
+ for idx in list(self._selected_features):
3709
3871
  if int(float(idx)) == i:
3710
- self.selected_features.pop(idx)
3872
+ self._selected_features.pop(idx)
3711
3873
  del self._loaded_data[path2]
3712
3874
  deleted_files2.add(path2)
3713
3875
 
3714
3876
  debug_print(f"{deleted_files2=}")
3715
- self._bounds_series = self._bounds_series[
3716
- lambda x: ~x.index.isin(deleted_files2)
3717
- ]
3718
-
3719
- self._max_unique_id_int = -1
3720
- for path, df in self._loaded_data.items():
3721
- self._max_unique_id_int += 1
3722
- id_prev = df.select(pl.col("_unique_id").first()).collect().item()
3723
- self._loaded_data[path] = df.with_columns(
3724
- _unique_id=_get_unique_id(self._max_unique_id_int)
3725
- )
3726
- for idx in list(self.selected_features):
3727
- if idx[0] != id_prev[0]:
3728
- continue
3729
-
3730
- # rounding values to avoid floating point precicion problems
3731
- new_idx = f"{self._max_unique_id_int}.{idx[2:]}"
3732
- feature = self.selected_features.pop(idx)
3733
- feature["id"] = new_idx
3734
- self.selected_features[new_idx] = feature
3877
+ self._bbox_series = self._bbox_series[lambda x: ~x.index.isin(deleted_files2)]
3878
+
3879
+ self._reset()
3880
+ # self._max_unique_id_int = -1
3881
+ # for path, df in self._loaded_data.items():
3882
+ # self._max_unique_id_int += 1
3883
+ # id_prev = df.select(pl.col("_unique_id").first()).collect().item()
3884
+ # self._loaded_data[path] = df.with_columns(
3885
+ # _unique_id=_get_unique_id(self._max_unique_id_int)
3886
+ # )
3887
+ # for idx in list(self._selected_features):
3888
+ # if idx[0] != id_prev[0]:
3889
+ # continue
3890
+
3891
+ # # rounding values to avoid floating point precicion problems
3892
+ # new_idx = f"{self._max_unique_id_int}.{idx[2:]}"
3893
+ # feature = self._selected_features.pop(idx)
3894
+ # feature["id"] = new_idx
3895
+ # self._selected_features[new_idx] = feature
3735
3896
 
3736
3897
  return None, None
3737
3898
 
@@ -3744,7 +3905,7 @@ class GeoExplorer:
3744
3905
  return (
3745
3906
  sg.to_gdf(reversed(self.center), 4326)
3746
3907
  .to_crs(3035)
3747
- .buffer(165_000 / (self.zoom**1.25))
3908
+ .buffer(100_000 / (self.zoom**1.5))
3748
3909
  .to_crs(4326)
3749
3910
  .total_bounds
3750
3911
  )
@@ -3862,9 +4023,23 @@ class GeoExplorer:
3862
4023
  name = _get_stem_from_parent(path)
3863
4024
  return name
3864
4025
 
4026
+ def _open_img_path_as_xarray(self, img_path, selected_path, clipped_bounds):
4027
+ if is_netcdf(img_path):
4028
+ return self._nc[selected_path].filter_ds(
4029
+ ds=self._loaded_data[img_path],
4030
+ bounds=clipped_bounds.bounds,
4031
+ code_block=self._queries.get(selected_path),
4032
+ )
4033
+ else:
4034
+ return rasterio_to_xarray(
4035
+ img_path, clipped_bounds, code_block=self._queries.get(selected_path)
4036
+ )
4037
+
3865
4038
  @property
3866
4039
  def _columns(self) -> dict[str, set[str]]:
3867
- return {path: set(dtypes) for path, dtypes in self._dtypes.items()}
4040
+ return {path: set(dtypes) for path, dtypes in self._dtypes.items()} | {
4041
+ path: {"value"} for path in self._nc
4042
+ }
3868
4043
 
3869
4044
  def _has_column(self, path: str, column: str) -> bool:
3870
4045
  return bool(
@@ -3955,56 +4130,16 @@ class GeoExplorer:
3955
4130
  for path in self.selected_files:
3956
4131
  path_parts = Path(path).parts
3957
4132
  for key in self._loaded_data:
3958
- if paths and (path not in paths and key not in paths) or key in dfs:
4133
+ if (paths and (path not in paths and key not in paths)) or key in dfs:
3959
4134
  continue
3960
4135
  key_parts = Path(key).parts
3961
4136
  if not all(part in key_parts for part in path_parts):
3962
4137
  continue
3963
4138
  df = self._loaded_data[key]
4139
+ if isinstance(df, (Dataset | DataArray)):
4140
+ continue
3964
4141
  if bounds is not None:
3965
4142
  df = filter_by_bounds(df, bounds)
3966
- if (
3967
- self._deleted_categories
3968
- and self.column
3969
- and not self._force_categorical
3970
- and self._has_column(key, self.column)
3971
- and self._get_dtype(key, self.column).is_numeric()
3972
- ):
3973
- try:
3974
- error_mess = "Cannot remove categories from numeric columns. Use an SQL query instead"
3975
- # make sure we only give one warning
3976
- assert not any(
3977
- x.children == error_mess for x in alerts if x is not None
3978
- )
3979
- alerts.add(
3980
- dbc.Alert(
3981
- error_mess,
3982
- color="warning",
3983
- dismissable=True,
3984
- duration=5_000,
3985
- )
3986
- )
3987
- except AssertionError:
3988
- pass
3989
- elif self._deleted_categories and self.column in df:
3990
- try:
3991
- expression = (
3992
- pl.col(self.column).is_in(list(self._deleted_categories))
3993
- == False
3994
- )
3995
- except Exception as e:
3996
- raise type(e)(
3997
- f"{e}. {self.column=}, {self._deleted_categories=}"
3998
- )
3999
- if self.nan_label in self._deleted_categories:
4000
- expression &= pl.col(self.column).is_not_null()
4001
- df = df.filter(expression)
4002
- elif (
4003
- self.nan_label in self._deleted_categories and self.column not in df
4004
- ):
4005
- if self.splitted:
4006
- df = get_split_index(df)
4007
- continue
4008
4143
  if _filter and self._queries.get(path, None) is not None:
4009
4144
  df, alert = self._filter_data(df, self._queries[path], key)
4010
4145
  alerts.add(alert)
@@ -4189,7 +4324,7 @@ class GeoExplorer:
4189
4324
  if isinstance(called, GeoDataFrame):
4190
4325
  called, _ = _geopandas_to_polars(called, path)
4191
4326
  called = called.with_columns(
4192
- _unique_id=_get_unique_id(list(self._loaded_data).index(path))
4327
+ _unique_id=_get_unique_id(list(self._loaded_data).index(path) + 999)
4193
4328
  ).lazy()
4194
4329
  return called
4195
4330
  if isinstance(called, GeoSeries):
@@ -4201,7 +4336,7 @@ class GeoExplorer:
4201
4336
  bounds,
4202
4337
  path,
4203
4338
  ).with_columns(
4204
- _unique_id=_get_unique_id(list(self._loaded_data).index(path))
4339
+ _unique_id=_get_unique_id(list(self._loaded_data).index(path) + 999)
4205
4340
  )
4206
4341
  return called
4207
4342
  if isinstance(called, pd.DataFrame):
@@ -4425,14 +4560,14 @@ class GeoExplorer:
4425
4560
 
4426
4561
  self.wms[wms_name] = constructor(**(current_kwargs | kwargs))
4427
4562
 
4428
- def _append_to_bounds_series(self, paths, recurse: bool = True) -> None:
4563
+ def _append_to_bbox_series(self, paths, recurse: bool = True) -> None:
4429
4564
  try:
4430
4565
  child_paths = self._get_child_paths(paths)
4431
4566
  self._loaded_data_sizes |= child_paths
4432
4567
  paths_with_meta, paths_without_meta = (
4433
4568
  self._get_paths_with_and_without_metadata(list(child_paths))
4434
4569
  )
4435
- more_bounds = _get_bounds_series_as_4326(
4570
+ more_bounds = _get_bbox_series_as_4326(
4436
4571
  paths_with_meta,
4437
4572
  file_system=self.file_system,
4438
4573
  )
@@ -4441,10 +4576,10 @@ class GeoExplorer:
4441
4576
  raise e
4442
4577
  # reload file system to avoid cached reading of files that don't exist any more
4443
4578
  self.file_system = self.file_system.__class__()
4444
- return self._append_to_bounds_series(paths, recurse=False)
4445
- self._bounds_series = pd.concat(
4579
+ return self._append_to_bbox_series(paths, recurse=False)
4580
+ self._bbox_series = pd.concat(
4446
4581
  [
4447
- self._bounds_series,
4582
+ self._bbox_series,
4448
4583
  more_bounds,
4449
4584
  pd.Series(
4450
4585
  [None for _ in range(len(paths_without_meta))],
@@ -4461,6 +4596,8 @@ class GeoExplorer:
4461
4596
  return child_paths
4462
4597
 
4463
4598
  def _get_paths_and_sizes(self, path):
4599
+ if path.startswith("http://") or path.startswith("https://"):
4600
+ return {_standardize_path(path): 0}
4464
4601
  suffix = Path(path).suffix
4465
4602
  child_pattern = f"**/*{suffix}" if suffix else "**/*.*"
4466
4603
  child_paths = {
@@ -4482,12 +4619,12 @@ class GeoExplorer:
4482
4619
  for i in range(n):
4483
4620
  new_path = path + f"{FILE_SPLITTER_TXT}{rows_to_read}-{i}"
4484
4621
  out_paths[new_path] = size / n
4485
- more_bounds.append(GeoSeries({new_path: self._bounds_series.loc[path]}))
4622
+ more_bounds.append(GeoSeries({new_path: self._bbox_series.loc[path]}))
4486
4623
  new_path = path + f"{FILE_SPLITTER_TXT}{rows_to_read}-{n}"
4487
4624
  out_paths[new_path] = size / n
4488
- more_bounds.append(GeoSeries({new_path: self._bounds_series.loc[path]}))
4625
+ more_bounds.append(GeoSeries({new_path: self._bbox_series.loc[path]}))
4489
4626
 
4490
- self._bounds_series = pd.concat([self._bounds_series] + more_bounds)
4627
+ self._bbox_series = pd.concat([self._bbox_series] + more_bounds)
4491
4628
 
4492
4629
  if len(out_paths) > 1:
4493
4630
  out_paths.pop(_standardize_path(path), None)
@@ -4523,10 +4660,19 @@ class GeoExplorer:
4523
4660
  data.pop("wms_layers_checked")
4524
4661
 
4525
4662
  if self.selected_files:
4663
+
4664
+ def as_nc_config_if_nc(path, query):
4665
+ if self._nc.get(path):
4666
+ return self._nc[path].__class__(query)
4667
+ return query
4668
+
4526
4669
  data = {
4527
4670
  "data": {
4528
- key: _unformat_query(self._queries.get(key, "")) or None
4529
- for key in reversed(data.pop("selected_files", []))
4671
+ path: as_nc_config_if_nc(
4672
+ path, _unformat_query(self._queries.get(path, ""))
4673
+ )
4674
+ or None
4675
+ for path in reversed(data.pop("selected_files", []))
4530
4676
  },
4531
4677
  **data,
4532
4678
  }
@@ -4539,7 +4685,7 @@ class GeoExplorer:
4539
4685
  data["file_system"] = data["file_system"].__class__.__name__ + "()"
4540
4686
 
4541
4687
  if "selected_features" in data:
4542
- data["selected_features"] = list(self.selected_features)
4688
+ data["selected_features"] = list(self._selected_features)
4543
4689
  return data
4544
4690
 
4545
4691
  def _get_self_as_string(self, data: dict[str, Any]) -> str:
@@ -4551,7 +4697,7 @@ class GeoExplorer:
4551
4697
  txt = ", ".join(f"{k}={maybe_to_string(k, v)}" for k, v in data.items())
4552
4698
  return f"{self.__class__.__name__}({txt})"
4553
4699
 
4554
- def _get_self_as_string_without_defaults(self):
4700
+ def _get_self_as_string_except_defaults(self):
4555
4701
  data = self._get_self_as_dict()
4556
4702
  defaults = inspect.getfullargspec(self.__class__).kwonlydefaults
4557
4703
  data = {
@@ -4560,14 +4706,23 @@ class GeoExplorer:
4560
4706
  return self._get_self_as_string(data)
4561
4707
 
4562
4708
  def _reset(self):
4563
- self._max_unique_id_int = 0
4709
+ self._max_unique_id_int = -1
4564
4710
  for path, df in self._loaded_data.items():
4565
- if df is None:
4711
+ self._max_unique_id_int += 1
4712
+ if df is None or isinstance(df, (Dataset | DataArray)):
4566
4713
  continue
4567
4714
  self._loaded_data[path] = df.with_columns(
4568
4715
  _unique_id=_get_unique_id(self._max_unique_id_int)
4569
4716
  )
4570
- self._max_unique_id_int += 1
4717
+
4718
+ for idx, feature in dict(self._selected_features).items():
4719
+ if feature["__file_path"] != path:
4720
+ continue
4721
+
4722
+ new_idx = f"{self._max_unique_id_int}.{idx.split(".")[-1]}"
4723
+ self._selected_features.pop(idx)
4724
+ feature["id"] = new_idx
4725
+ self._selected_features[new_idx] = feature
4571
4726
 
4572
4727
  def __str__(self) -> str:
4573
4728
  """String representation."""
@@ -4580,3 +4735,166 @@ class GeoExplorer:
4580
4735
  pickle.dumps(value)
4581
4736
  except pickle.PicklingError:
4582
4737
  print(f"{variable_name} with value {value} is not pickable")
4738
+
4739
+
4740
+ def get_numeric_colors(values_no_nans_unique, values_no_nans, cmap, k):
4741
+ if len(values_no_nans_unique) <= k:
4742
+ bins = list(values_no_nans_unique)
4743
+ else:
4744
+ bins = jenks_breaks(values_no_nans.to_numpy(), n_classes=k)
4745
+
4746
+ cmap_ = matplotlib.colormaps.get_cmap(cmap)
4747
+ colors_ = [
4748
+ matplotlib.colors.to_hex(cmap_(int(i))) for i in np.linspace(0, 255, num=k + 1)
4749
+ ]
4750
+ rounded_bins = [round(x, 1) for x in bins]
4751
+ color_dict = {
4752
+ f"{round(min(values_no_nans), 1)} - {rounded_bins[0]}": colors_[0],
4753
+ **{
4754
+ f"{start} - {stop}": colors_[i + 1]
4755
+ for i, (start, stop) in enumerate(itertools.pairwise(rounded_bins[1:]))
4756
+ },
4757
+ f"{rounded_bins[-1]} - {round(max(values_no_nans), 1)}": colors_[-1],
4758
+ }
4759
+ return color_dict, bins
4760
+
4761
+
4762
+ def rasterio_to_numpy(
4763
+ img_path, bbox, return_attrs: list[str] | None = None
4764
+ ) -> np.ndarray | tuple[Any]:
4765
+ with rasterio.open(img_path) as src:
4766
+ bounds_in_img_crs = GeoSeries([bbox], crs=4326).to_crs(src.crs).total_bounds
4767
+ window = rasterio.windows.from_bounds(
4768
+ *bounds_in_img_crs, transform=src.transform
4769
+ )
4770
+ arr = src.read(window=window, boundless=False, masked=False)
4771
+ if not return_attrs:
4772
+ return arr
4773
+ return (arr, *[getattr(src, attr) for attr in return_attrs])
4774
+
4775
+
4776
+ def rasterio_to_xarray(img_path, bbox, code_block):
4777
+ import xarray as xr
4778
+ from rioxarray.rioxarray import _generate_spatial_coords
4779
+
4780
+ arr, crs, descriptions = rasterio_to_numpy(
4781
+ img_path, bbox, return_attrs=["crs", "descriptions"]
4782
+ )
4783
+ bounds_in_img_crs = GeoSeries([bbox], crs=4326).to_crs(crs).total_bounds
4784
+
4785
+ if not all(arr.shape):
4786
+ return xr.DataArray(
4787
+ arr,
4788
+ dims=["y", "x"],
4789
+ attrs={"crs": crs},
4790
+ )
4791
+
4792
+ if len(arr.shape) == 2:
4793
+ height, width = arr.shape
4794
+ elif len(arr.shape) == 3 and arr.shape[0] == 1:
4795
+ arr = arr[0]
4796
+ height, width = arr.shape
4797
+ elif len(arr.shape) == 3:
4798
+ height, width = arr.shape[1:]
4799
+ else:
4800
+ raise ValueError(arr.shape)
4801
+
4802
+ transform = rasterio.transform.from_bounds(*bounds_in_img_crs, width, height)
4803
+ coords = _generate_spatial_coords(transform, width, height)
4804
+
4805
+ if len(arr.shape) == 2:
4806
+ ds = xr.DataArray(
4807
+ arr,
4808
+ coords=coords,
4809
+ dims=["y", "x"],
4810
+ attrs={"crs": crs},
4811
+ )
4812
+ else:
4813
+ if len(descriptions) != arr.shape[0]:
4814
+ descriptions = range(arr.shape[0])
4815
+ ds = xr.Dataset(
4816
+ {
4817
+ desc: xr.DataArray(
4818
+ arr[i],
4819
+ coords=coords,
4820
+ dims=["y", "x"],
4821
+ attrs={"crs": crs},
4822
+ name=desc,
4823
+ )
4824
+ for i, desc in enumerate(descriptions)
4825
+ }
4826
+ )
4827
+ return _run_code_block(ds, code_block)
4828
+
4829
+
4830
+ def as_sized_array(arr: np.ndarray) -> np.ndarray:
4831
+ try:
4832
+ len(arr)
4833
+ return arr
4834
+ except TypeError:
4835
+ return np.array([arr])
4836
+
4837
+
4838
+ def pixel_value_scatter(ds: Dataset | DataArray):
4839
+ import plotly.express as px
4840
+
4841
+ try:
4842
+ xs = as_sized_array(ds["time"].values)
4843
+ assert isinstance(xs, np.ndarray)
4844
+ x = "time"
4845
+ except Exception:
4846
+ x = "index"
4847
+
4848
+ def get_df(ds, var):
4849
+ if isinstance(ds, DataArray):
4850
+ values = ds.values
4851
+ else:
4852
+ values = ds[var].values
4853
+ values = as_sized_array(values)
4854
+ if x == "index":
4855
+ these_xs = np.arange(len(values))
4856
+ else:
4857
+ these_xs = xs
4858
+ data = pd.DataFrame({"value": values, x: these_xs})
4859
+ data["color"] = var
4860
+ return data
4861
+
4862
+ if isinstance(ds, Dataset):
4863
+ data = []
4864
+ for var in ds.data_vars:
4865
+ try:
4866
+ data.append(get_df(ds, var))
4867
+ except Exception:
4868
+ pass
4869
+ data = pd.concat(data)
4870
+ elif isinstance(ds, DataArray):
4871
+ data = get_df(ds, ds.name)
4872
+ if len(data["color"].unique()) == 1 and data["color"].notna().all():
4873
+ colname = next(iter(data["color"]))
4874
+ data = data.rename(columns={"value": colname}, errors="raise")
4875
+ else:
4876
+ colname = "value"
4877
+ fig = px.scatter(data, y=colname, x=x, color="color")
4878
+ fig.update_traces(marker_size=10)
4879
+ fig.update_layout(margin=dict(l=0, r=0, t=30, b=20))
4880
+ return fig
4881
+
4882
+
4883
+ def fix_numpy_img_shape(arr: np.ndarray) -> np.ndarray:
4884
+ if len(arr.shape) == 2:
4885
+ return arr
4886
+ if len(arr.shape) == 3 and arr.shape[0] == 1:
4887
+ return arr[0]
4888
+ elif len(arr.shape) == 3 and arr.shape[0] == 3:
4889
+ # to 3d array in shape (x, y, 3)
4890
+ return np.transpose(arr, (1, 2, 0))
4891
+ else:
4892
+ raise ValueError("Only single band or 3-band (RGB) are supported")
4893
+
4894
+
4895
+ def is_raster_file(path: str) -> bool:
4896
+ return any(Path(path).suffix.lower().startswith(x) for x in [".tif", ".nc"])
4897
+
4898
+
4899
+ def is_netcdf(path: str) -> bool:
4900
+ return Path(path).suffix.lower().startswith(".nc")