geo-explorer 0.9.8__py3-none-any.whl → 0.9.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geo_explorer/file_browser.py +134 -112
- geo_explorer/geo_explorer.py +630 -312
- geo_explorer/nc.py +199 -0
- geo_explorer/utils.py +82 -33
- {geo_explorer-0.9.8.dist-info → geo_explorer-0.9.9.dist-info}/METADATA +2 -2
- geo_explorer-0.9.9.dist-info/RECORD +14 -0
- geo_explorer-0.9.8.dist-info/RECORD +0 -13
- {geo_explorer-0.9.8.dist-info → geo_explorer-0.9.9.dist-info}/LICENSE +0 -0
- {geo_explorer-0.9.8.dist-info → geo_explorer-0.9.9.dist-info}/LICENSE.md +0 -0
- {geo_explorer-0.9.8.dist-info → geo_explorer-0.9.9.dist-info}/WHEEL +0 -0
geo_explorer/geo_explorer.py
CHANGED
|
@@ -5,12 +5,14 @@ import json
|
|
|
5
5
|
import logging
|
|
6
6
|
import math
|
|
7
7
|
import os
|
|
8
|
+
import pickle
|
|
9
|
+
import random
|
|
8
10
|
import re
|
|
9
11
|
import signal
|
|
10
12
|
import sys
|
|
11
13
|
import time
|
|
14
|
+
import traceback
|
|
12
15
|
from collections.abc import Callable
|
|
13
|
-
from collections.abc import Sequence
|
|
14
16
|
from concurrent.futures import ThreadPoolExecutor
|
|
15
17
|
from functools import partial
|
|
16
18
|
from functools import wraps
|
|
@@ -21,15 +23,15 @@ from pathlib import PurePath
|
|
|
21
23
|
from time import perf_counter
|
|
22
24
|
from typing import Any
|
|
23
25
|
from typing import ClassVar
|
|
24
|
-
import random
|
|
25
26
|
|
|
26
|
-
import pickle
|
|
27
|
-
import geopandas as gpd
|
|
28
27
|
import dash
|
|
29
28
|
import dash_bootstrap_components as dbc
|
|
30
29
|
import dash_leaflet as dl
|
|
30
|
+
import folium
|
|
31
|
+
import geopandas as gpd
|
|
31
32
|
import joblib
|
|
32
33
|
import matplotlib
|
|
34
|
+
import matplotlib.colors
|
|
33
35
|
import matplotlib.colors as mcolors
|
|
34
36
|
import msgspec
|
|
35
37
|
import numpy as np
|
|
@@ -37,6 +39,7 @@ import pandas as pd
|
|
|
37
39
|
import polars as pl
|
|
38
40
|
import pyarrow
|
|
39
41
|
import pyarrow.parquet as pq
|
|
42
|
+
import rasterio
|
|
40
43
|
import sgis as sg
|
|
41
44
|
import shapely
|
|
42
45
|
from dash import Dash
|
|
@@ -54,24 +57,44 @@ from geopandas import GeoDataFrame
|
|
|
54
57
|
from geopandas import GeoSeries
|
|
55
58
|
from geopandas.array import GeometryArray
|
|
56
59
|
from jenkspy import jenks_breaks
|
|
57
|
-
from sgis
|
|
58
|
-
from sgis.io.dapla_functions import _read_pyarrow
|
|
60
|
+
from sgis import get_common_crs
|
|
59
61
|
from sgis.io.dapla_functions import _get_bounds_parquet
|
|
60
62
|
from sgis.io.dapla_functions import _get_bounds_parquet_from_open_file
|
|
63
|
+
from sgis.io.dapla_functions import _get_geo_metadata
|
|
64
|
+
from sgis.io.dapla_functions import _read_pyarrow
|
|
61
65
|
from sgis.maps.wms import WmsLoader
|
|
62
66
|
from shapely import Geometry
|
|
63
67
|
from shapely.errors import GEOSException
|
|
64
68
|
from shapely.geometry import Point
|
|
65
|
-
from
|
|
69
|
+
from shapely.geometry import Polygon
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
from xarray import DataArray
|
|
73
|
+
from xarray import Dataset
|
|
74
|
+
except ImportError:
|
|
75
|
+
|
|
76
|
+
class Dataset:
|
|
77
|
+
"""Placeholder."""
|
|
78
|
+
|
|
79
|
+
class DataArray:
|
|
80
|
+
"""Placeholder."""
|
|
81
|
+
|
|
66
82
|
|
|
67
83
|
from .file_browser import FileBrowser
|
|
68
84
|
from .fs import LocalFileSystem
|
|
85
|
+
from .nc import GeoTIFFConfig
|
|
86
|
+
from .nc import NetCDFConfig
|
|
87
|
+
from .nc import _run_code_block
|
|
88
|
+
from .utils import _PROFILE_DICT
|
|
89
|
+
from .utils import DEBUG
|
|
69
90
|
from .utils import _clicked_button_style
|
|
70
91
|
from .utils import _standardize_path
|
|
71
92
|
from .utils import _unclicked_button_style
|
|
93
|
+
from .utils import debug_print
|
|
72
94
|
from .utils import get_button_with_tooltip
|
|
73
|
-
from .utils import
|
|
95
|
+
from .utils import get_xarray_bounds
|
|
74
96
|
from .utils import time_function_call
|
|
97
|
+
from .utils import time_method_call
|
|
75
98
|
|
|
76
99
|
OFFWHITE: str = "#ebebeb"
|
|
77
100
|
FILE_CHECKED_COLOR: str = "#3e82ff"
|
|
@@ -91,45 +114,6 @@ HIDDEN_ADDED_COLUMNS = {
|
|
|
91
114
|
ADDED_COLUMNS = HIDDEN_ADDED_COLUMNS | {"area"}
|
|
92
115
|
ns = Namespace("onEachFeatureToggleHighlight", "default")
|
|
93
116
|
|
|
94
|
-
DEBUG: bool = False
|
|
95
|
-
|
|
96
|
-
_PROFILE_DICT = {}
|
|
97
|
-
|
|
98
|
-
if DEBUG:
|
|
99
|
-
|
|
100
|
-
def debug_print(*args):
|
|
101
|
-
print(
|
|
102
|
-
*(
|
|
103
|
-
f"{type(arg).__name__}: {arg}" if isinstance(arg, Exception) else arg
|
|
104
|
-
for arg in args
|
|
105
|
-
)
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
else:
|
|
109
|
-
|
|
110
|
-
def debug_print(*args):
|
|
111
|
-
pass
|
|
112
|
-
|
|
113
|
-
def time_method_call(_) -> Callable:
|
|
114
|
-
def decorator(method):
|
|
115
|
-
@wraps(method)
|
|
116
|
-
def wrapper(self, *args, **kwargs):
|
|
117
|
-
return method(self, *args, **kwargs)
|
|
118
|
-
|
|
119
|
-
return wrapper
|
|
120
|
-
|
|
121
|
-
return decorator
|
|
122
|
-
|
|
123
|
-
def time_function_call(_):
|
|
124
|
-
def decorator(func):
|
|
125
|
-
@wraps(func)
|
|
126
|
-
def wrapper(*args, **kwargs):
|
|
127
|
-
return func(*args, **kwargs)
|
|
128
|
-
|
|
129
|
-
return wrapper
|
|
130
|
-
|
|
131
|
-
return decorator
|
|
132
|
-
|
|
133
117
|
|
|
134
118
|
def _get_default_sql_query(df: pl.LazyFrame | pl.DataFrame, columns: list[str]) -> str:
|
|
135
119
|
if isinstance(df, pl.LazyFrame):
|
|
@@ -190,9 +174,27 @@ def _get_sql_query_with_col(
|
|
|
190
174
|
|
|
191
175
|
@time_function_call(_PROFILE_DICT)
|
|
192
176
|
def read_file(
|
|
193
|
-
path: str, file_system: AbstractFileSystem, **kwargs
|
|
177
|
+
i: int, path: str, file_system: AbstractFileSystem, **kwargs
|
|
194
178
|
) -> tuple[pl.LazyFrame, dict[str, pl.DataType]]:
|
|
195
179
|
|
|
180
|
+
if is_raster_file(path):
|
|
181
|
+
import xarray as xr
|
|
182
|
+
|
|
183
|
+
if path.endswith(".ncml"):
|
|
184
|
+
# need to sleep some seconds when multiple ncml files from url
|
|
185
|
+
time.sleep(i * 5)
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
ds = xr.open_dataarray(path)
|
|
189
|
+
except Exception:
|
|
190
|
+
ds = xr.open_dataset(path)
|
|
191
|
+
try:
|
|
192
|
+
ds = ds.sortby("time")
|
|
193
|
+
except Exception:
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
return ds, {}
|
|
197
|
+
|
|
196
198
|
if not path.endswith(".parquet") and FILE_SPLITTER_TXT not in path:
|
|
197
199
|
try:
|
|
198
200
|
df = gpd.read_file(path, filesystem=file_system, **kwargs)
|
|
@@ -335,25 +337,6 @@ def _get_colorpicker_container(color_dict: dict[str, str]) -> html.Div:
|
|
|
335
337
|
dbc.Label([column_value]),
|
|
336
338
|
width="auto",
|
|
337
339
|
),
|
|
338
|
-
dbc.Col(
|
|
339
|
-
get_button_with_tooltip(
|
|
340
|
-
"❌",
|
|
341
|
-
id={
|
|
342
|
-
"type": "delete-cat-btn",
|
|
343
|
-
"index": column_value,
|
|
344
|
-
},
|
|
345
|
-
n_clicks=0,
|
|
346
|
-
style={
|
|
347
|
-
"color": "red",
|
|
348
|
-
"border": "none",
|
|
349
|
-
"background": "none",
|
|
350
|
-
"cursor": "pointer",
|
|
351
|
-
"marginLeft": "auto",
|
|
352
|
-
},
|
|
353
|
-
tooltip_text="Remove all data in this category",
|
|
354
|
-
),
|
|
355
|
-
width="auto",
|
|
356
|
-
),
|
|
357
340
|
],
|
|
358
341
|
style={
|
|
359
342
|
"display": "flex",
|
|
@@ -379,7 +362,7 @@ def _add_data_one_path(
|
|
|
379
362
|
concatted_data,
|
|
380
363
|
nan_color,
|
|
381
364
|
nan_label,
|
|
382
|
-
|
|
365
|
+
opacity,
|
|
383
366
|
n_rows_per_path,
|
|
384
367
|
columns: dict[str, set[str]],
|
|
385
368
|
current_columns: set[str],
|
|
@@ -389,7 +372,7 @@ def _add_data_one_path(
|
|
|
389
372
|
for key, cols in columns.items()
|
|
390
373
|
for col in cols
|
|
391
374
|
if path in key and col in current_columns
|
|
392
|
-
} | {"split_index"}
|
|
375
|
+
} | {"split_index", "_color"}
|
|
393
376
|
|
|
394
377
|
df = concatted_data.filter(
|
|
395
378
|
(pl.col("__file_path") == path)
|
|
@@ -412,7 +395,7 @@ def _add_data_one_path(
|
|
|
412
395
|
if column is not None and column in columns:
|
|
413
396
|
df = _fix_colors(df, column, bins, is_numeric, color_dict, nan_color, nan_label)
|
|
414
397
|
|
|
415
|
-
if column and column not in columns:
|
|
398
|
+
if column and column not in columns and column != "_color":
|
|
416
399
|
return rows_are_hidden, [
|
|
417
400
|
_get_leaflet_overlay(
|
|
418
401
|
data=_cheap_geo_interface(df.collect()),
|
|
@@ -420,8 +403,8 @@ def _add_data_one_path(
|
|
|
420
403
|
style={
|
|
421
404
|
"color": nan_color,
|
|
422
405
|
"fillColor": nan_color,
|
|
423
|
-
"weight":
|
|
424
|
-
"fillOpacity":
|
|
406
|
+
"weight": 1,
|
|
407
|
+
"fillOpacity": opacity,
|
|
425
408
|
},
|
|
426
409
|
onEachFeature=ns("yellowIfHighlighted"),
|
|
427
410
|
pointToLayer=ns("pointToLayerCircle"),
|
|
@@ -437,7 +420,7 @@ def _add_data_one_path(
|
|
|
437
420
|
path,
|
|
438
421
|
column,
|
|
439
422
|
nan_color,
|
|
440
|
-
|
|
423
|
+
opacity,
|
|
441
424
|
onEachFeature=ns("yellowIfHighlighted"),
|
|
442
425
|
pointToLayer=ns("pointToLayerCircle"),
|
|
443
426
|
hideout=dict(
|
|
@@ -458,8 +441,8 @@ def _add_data_one_path(
|
|
|
458
441
|
style={
|
|
459
442
|
"color": color,
|
|
460
443
|
"fillColor": color,
|
|
461
|
-
"weight":
|
|
462
|
-
"fillOpacity":
|
|
444
|
+
"weight": 1,
|
|
445
|
+
"fillOpacity": opacity,
|
|
463
446
|
},
|
|
464
447
|
onEachFeature=ns("yellowIfHighlighted"),
|
|
465
448
|
pointToLayer=ns("pointToLayerCircle"),
|
|
@@ -472,6 +455,8 @@ def _add_data_one_path(
|
|
|
472
455
|
|
|
473
456
|
@time_function_call(_PROFILE_DICT)
|
|
474
457
|
def _fix_colors(df, column, bins, is_numeric, color_dict, nan_color, nan_label):
|
|
458
|
+
if column == "_color":
|
|
459
|
+
return df
|
|
475
460
|
if not is_numeric:
|
|
476
461
|
return df.with_columns(
|
|
477
462
|
_color=pl.col(column).replace(
|
|
@@ -508,7 +493,7 @@ def _fix_colors(df, column, bins, is_numeric, color_dict, nan_color, nan_label):
|
|
|
508
493
|
bin_index_expr = pl.when(conditions[0]).then(pl.lit(color_dict[0]))
|
|
509
494
|
for i, cond in enumerate(conditions[1:], start=1):
|
|
510
495
|
if i not in color_dict:
|
|
511
|
-
|
|
496
|
+
continue
|
|
512
497
|
bin_index_expr = bin_index_expr.when(cond).then(pl.lit(color_dict[i]))
|
|
513
498
|
notnas = notnas.with_columns(bin_index_expr.alias("_color"))
|
|
514
499
|
|
|
@@ -697,15 +682,18 @@ def _get_unique_id(i: float) -> pl.Expr:
|
|
|
697
682
|
def _read_files(explorer, paths: list[str], mask=None, **kwargs) -> None:
|
|
698
683
|
if not paths:
|
|
699
684
|
return
|
|
700
|
-
|
|
685
|
+
bbox_set = set(explorer._bbox_series.index)
|
|
701
686
|
|
|
702
687
|
paths = [
|
|
703
688
|
path
|
|
704
689
|
for path in paths
|
|
705
690
|
if mask is None
|
|
706
691
|
or (
|
|
707
|
-
path in
|
|
708
|
-
and
|
|
692
|
+
path in bbox_set
|
|
693
|
+
and (
|
|
694
|
+
pd.isna(explorer._bbox_series[path])
|
|
695
|
+
or shapely.intersects(mask, explorer._bbox_series[path])
|
|
696
|
+
)
|
|
709
697
|
)
|
|
710
698
|
]
|
|
711
699
|
if not paths:
|
|
@@ -716,18 +704,33 @@ def _read_files(explorer, paths: list[str], mask=None, **kwargs) -> None:
|
|
|
716
704
|
with joblib.Parallel(len(paths), backend=backend) as parallel:
|
|
717
705
|
more_data = parallel(
|
|
718
706
|
joblib.delayed(explorer.__class__.read_func)(
|
|
719
|
-
path=path, file_system=file_system, **kwargs
|
|
707
|
+
i=i, path=path, file_system=file_system, **kwargs
|
|
720
708
|
)
|
|
721
|
-
for path in paths
|
|
709
|
+
for i, path in enumerate(paths)
|
|
722
710
|
)
|
|
723
|
-
for
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
711
|
+
for selected_path in explorer.selected_files:
|
|
712
|
+
for path, (df, dtypes) in zip(paths, more_data, strict=True):
|
|
713
|
+
if selected_path not in path or df is None:
|
|
714
|
+
continue
|
|
715
|
+
if isinstance(df, (Dataset | DataArray)):
|
|
716
|
+
explorer._loaded_data[path] = df
|
|
717
|
+
img_bbox = GeoSeries(
|
|
718
|
+
[shapely.box(*get_xarray_bounds(df))],
|
|
719
|
+
crs=explorer._nc[selected_path].get_crs(df, path),
|
|
720
|
+
).to_crs(4326)
|
|
721
|
+
explorer._bbox_series = pd.concat(
|
|
722
|
+
[
|
|
723
|
+
GeoSeries({path: next(iter(img_bbox))}),
|
|
724
|
+
explorer._bbox_series[lambda x: x.index != path],
|
|
725
|
+
]
|
|
726
|
+
)
|
|
727
|
+
|
|
728
|
+
continue
|
|
729
|
+
explorer._loaded_data[path] = df.with_columns(
|
|
730
|
+
_unique_id=_get_unique_id(explorer._max_unique_id_int)
|
|
731
|
+
).drop("id", strict=False)
|
|
732
|
+
explorer._dtypes[path] = dtypes | {"area": pl.Float64()}
|
|
733
|
+
explorer._max_unique_id_int += 1
|
|
731
734
|
|
|
732
735
|
|
|
733
736
|
def _random_color(min_diff: int = 50) -> str:
|
|
@@ -753,7 +756,7 @@ def _get_stem_from_parent(path):
|
|
|
753
756
|
return f"{parent_name}/{name}"
|
|
754
757
|
|
|
755
758
|
|
|
756
|
-
def
|
|
759
|
+
def _try_to_get_bbox_else_none(
|
|
757
760
|
path, file_system
|
|
758
761
|
) -> tuple[tuple[float] | None, str | None]:
|
|
759
762
|
try:
|
|
@@ -765,15 +768,15 @@ def _try_to_get_bounds_else_none(
|
|
|
765
768
|
return None, None
|
|
766
769
|
|
|
767
770
|
|
|
768
|
-
def
|
|
769
|
-
#
|
|
770
|
-
# return
|
|
771
|
+
def _get_bbox_series_as_4326(paths, file_system):
|
|
772
|
+
# bbox_series = sg.get_bbox_series(paths, file_system=file_system)
|
|
773
|
+
# return bbox_series.to_crs(4326)
|
|
771
774
|
|
|
772
|
-
func = partial(
|
|
775
|
+
func = partial(_try_to_get_bbox_else_none, file_system=file_system)
|
|
773
776
|
with ThreadPoolExecutor() as executor:
|
|
774
|
-
|
|
777
|
+
bbox_and_crs = list(executor.map(func, paths))
|
|
775
778
|
|
|
776
|
-
crss = {json.dumps(x[1]) for x in
|
|
779
|
+
crss = {json.dumps(x[1]) for x in bbox_and_crs}
|
|
777
780
|
crss = {
|
|
778
781
|
crs
|
|
779
782
|
for crs in crss
|
|
@@ -785,7 +788,7 @@ def _get_bounds_series_as_4326(paths, file_system):
|
|
|
785
788
|
return GeoSeries(
|
|
786
789
|
[
|
|
787
790
|
shapely.box(*bbox[0]) if bbox[0] is not None else None
|
|
788
|
-
for bbox in
|
|
791
|
+
for bbox in bbox_and_crs
|
|
789
792
|
],
|
|
790
793
|
index=paths,
|
|
791
794
|
crs=crs,
|
|
@@ -1016,9 +1019,11 @@ def _is_likely_geopandas_func(df, txt: Any):
|
|
|
1016
1019
|
return any(x in txt and len(x) > 2 and x not in cols for x in geopandas_methods)
|
|
1017
1020
|
|
|
1018
1021
|
|
|
1019
|
-
def _unformat_query(query: str) -> str:
|
|
1022
|
+
def _unformat_query(query: str | None) -> str:
|
|
1020
1023
|
"""Remove newlines and multiple whitespaces from SQL query."""
|
|
1021
|
-
|
|
1024
|
+
if query is None:
|
|
1025
|
+
return None
|
|
1026
|
+
query = query.replace("\n", "; ").strip().strip(";").strip()
|
|
1022
1027
|
while " " in query:
|
|
1023
1028
|
query = query.replace(" ", " ")
|
|
1024
1029
|
return query
|
|
@@ -1058,7 +1063,7 @@ def _get_leaflet_overlay(data, path, **kwargs):
|
|
|
1058
1063
|
|
|
1059
1064
|
|
|
1060
1065
|
@time_function_call(_PROFILE_DICT)
|
|
1061
|
-
def _get_multiple_leaflet_overlay(df, path, column, nan_color,
|
|
1066
|
+
def _get_multiple_leaflet_overlay(df, path, column, nan_color, opacity, **kwargs):
|
|
1062
1067
|
values = df.select("_color").unique().collect()["_color"]
|
|
1063
1068
|
return dl.Overlay(
|
|
1064
1069
|
dl.LayerGroup(
|
|
@@ -1070,8 +1075,8 @@ def _get_multiple_leaflet_overlay(df, path, column, nan_color, alpha, **kwargs):
|
|
|
1070
1075
|
style={
|
|
1071
1076
|
"color": color_,
|
|
1072
1077
|
"fillColor": color_,
|
|
1073
|
-
"weight":
|
|
1074
|
-
"fillOpacity":
|
|
1078
|
+
"weight": 1,
|
|
1079
|
+
"fillOpacity": opacity,
|
|
1075
1080
|
},
|
|
1076
1081
|
id={
|
|
1077
1082
|
"type": "geojson",
|
|
@@ -1092,8 +1097,8 @@ def _get_multiple_leaflet_overlay(df, path, column, nan_color, alpha, **kwargs):
|
|
|
1092
1097
|
style={
|
|
1093
1098
|
"color": nan_color,
|
|
1094
1099
|
"fillColor": nan_color,
|
|
1095
|
-
"weight":
|
|
1096
|
-
"fillOpacity":
|
|
1100
|
+
"weight": 1,
|
|
1101
|
+
"fillOpacity": opacity,
|
|
1097
1102
|
},
|
|
1098
1103
|
id={
|
|
1099
1104
|
"type": "geojson",
|
|
@@ -1149,18 +1154,18 @@ class GeoExplorer:
|
|
|
1149
1154
|
max_rows: Max number of rows to sample per dataset if number of feature in bounds excedes.
|
|
1150
1155
|
Note that rendering more than the default (10,000) might crash the server, especially for
|
|
1151
1156
|
polygon features.
|
|
1152
|
-
selected_features: list of indices of features (rows) to show in attribute table
|
|
1153
|
-
|
|
1157
|
+
# selected_features: list of indices of features (rows) to show in attribute table
|
|
1158
|
+
# at init. Fetch this list with the "Export as code" button.
|
|
1154
1159
|
hard_click: If True, clicking on a geometry triggers all overlapping geometries to be marked.
|
|
1155
1160
|
splitted: If True, all rows will have a separate label and color.
|
|
1156
|
-
|
|
1161
|
+
opacity: Opacity/transparency of the geometries.
|
|
1157
1162
|
nan_color: Color for missing values. Defaults to a shade of gray.
|
|
1158
1163
|
nan_label: Defaults to "Missing".
|
|
1159
1164
|
max_read_size_per_callback: Defaults to 1e9 bytes (1 GB). Meaning max 1 GB is read at once, then the read
|
|
1160
1165
|
function is cycled until all data is read. This is because long callbacks time out.
|
|
1161
1166
|
**kwargs: Additional keyword arguments passed to dash_leaflet.Map: https://www.dash-leaflet.com/components/map_container.
|
|
1162
1167
|
|
|
1163
|
-
|
|
1168
|
+
An empty GeoExplorer can be initialized like this:
|
|
1164
1169
|
|
|
1165
1170
|
>>> from geo_explorer import GeoExplorer
|
|
1166
1171
|
>>> from geo_explorer import LocalFileSystem
|
|
@@ -1251,7 +1256,9 @@ class GeoExplorer:
|
|
|
1251
1256
|
favorites: list[str] | None = None,
|
|
1252
1257
|
port: int = 8050,
|
|
1253
1258
|
file_system: AbstractFileSystem | None = None,
|
|
1254
|
-
data:
|
|
1259
|
+
data: (
|
|
1260
|
+
dict[str, str | GeoDataFrame | NetCDFConfig] | list[str | dict] | None
|
|
1261
|
+
) = None,
|
|
1255
1262
|
column: str | None = None,
|
|
1256
1263
|
color_dict: dict | None = None,
|
|
1257
1264
|
center: tuple[float, float] | None = None,
|
|
@@ -1259,10 +1266,10 @@ class GeoExplorer:
|
|
|
1259
1266
|
wms: dict[str, WmsLoader] | None = None,
|
|
1260
1267
|
wms_layers_checked: dict[str, list[str]] | None = None,
|
|
1261
1268
|
max_rows: int = 10_000,
|
|
1262
|
-
selected_features: list[str] | None = None,
|
|
1269
|
+
# selected_features: list[str] | None = None,
|
|
1263
1270
|
hard_click: bool = False,
|
|
1264
1271
|
splitted: bool = False,
|
|
1265
|
-
|
|
1272
|
+
opacity: float = 0.6,
|
|
1266
1273
|
nan_color: str = "#969696",
|
|
1267
1274
|
nan_label: str = "Missing",
|
|
1268
1275
|
max_read_size_per_callback: int = 1e9,
|
|
@@ -1295,16 +1302,16 @@ class GeoExplorer:
|
|
|
1295
1302
|
self.splitted = splitted
|
|
1296
1303
|
self.hard_click = hard_click
|
|
1297
1304
|
self.max_rows = max_rows
|
|
1298
|
-
self.
|
|
1299
|
-
self.
|
|
1305
|
+
self.opacity = opacity
|
|
1306
|
+
self._bbox_series = GeoSeries()
|
|
1300
1307
|
self.selected_files: dict[str, int] = {}
|
|
1301
1308
|
self._loaded_data: dict[str, pl.LazyFrame] = {}
|
|
1309
|
+
self._images: dict[str, Polygon] = {}
|
|
1302
1310
|
self._dtypes: dict[str, dict[str, pl.DataType]] = {}
|
|
1303
1311
|
self._max_unique_id_int: int = 0
|
|
1304
1312
|
self._loaded_data_sizes: dict[str, int] = {}
|
|
1305
1313
|
self._concatted_data: pl.DataFrame | None = None
|
|
1306
|
-
self.
|
|
1307
|
-
self.selected_features = {}
|
|
1314
|
+
self._selected_features = {}
|
|
1308
1315
|
self._file_browser = FileBrowser(
|
|
1309
1316
|
start_dir, file_system=file_system, favorites=favorites
|
|
1310
1317
|
)
|
|
@@ -1312,6 +1319,8 @@ class GeoExplorer:
|
|
|
1312
1319
|
self.max_read_size_per_callback = max_read_size_per_callback
|
|
1313
1320
|
self._force_categorical = False
|
|
1314
1321
|
self._is_recursing = False
|
|
1322
|
+
self._nc = {}
|
|
1323
|
+
self._px_plot_conf = {}
|
|
1315
1324
|
|
|
1316
1325
|
if is_jupyter():
|
|
1317
1326
|
service_prefix = os.environ["JUPYTERHUB_SERVICE_PREFIX"].strip("/")
|
|
@@ -1344,7 +1353,7 @@ class GeoExplorer:
|
|
|
1344
1353
|
return dbc.Container(
|
|
1345
1354
|
[
|
|
1346
1355
|
dcc.Location(id="url"),
|
|
1347
|
-
dbc.Row(html.Div(id="alert")),
|
|
1356
|
+
dbc.Row(html.Div(id="add-data-alert")),
|
|
1348
1357
|
dbc.Row(html.Div(id="alert3")),
|
|
1349
1358
|
dbc.Row(html.Div(id="alert4")),
|
|
1350
1359
|
dbc.Row(html.Div(id="new-file-added")),
|
|
@@ -1403,7 +1412,7 @@ class GeoExplorer:
|
|
|
1403
1412
|
tooltip_text="Get code to reproduce current view",
|
|
1404
1413
|
),
|
|
1405
1414
|
dcc.Dropdown(
|
|
1406
|
-
value=self.
|
|
1415
|
+
value=self.opacity,
|
|
1407
1416
|
options=[
|
|
1408
1417
|
{
|
|
1409
1418
|
"label": f"opacity={round(x, 1)}",
|
|
@@ -1415,7 +1424,7 @@ class GeoExplorer:
|
|
|
1415
1424
|
0.1, 1.1, 0.1
|
|
1416
1425
|
)
|
|
1417
1426
|
],
|
|
1418
|
-
id="
|
|
1427
|
+
id="opacity",
|
|
1419
1428
|
clearable=False,
|
|
1420
1429
|
),
|
|
1421
1430
|
dbc.Modal(
|
|
@@ -1523,6 +1532,36 @@ class GeoExplorer:
|
|
|
1523
1532
|
id="numeric-options",
|
|
1524
1533
|
style={"display": "none"},
|
|
1525
1534
|
),
|
|
1535
|
+
dbc.Row(
|
|
1536
|
+
[
|
|
1537
|
+
dbc.Col(
|
|
1538
|
+
[
|
|
1539
|
+
dbc.Row(
|
|
1540
|
+
[
|
|
1541
|
+
dbc.Col(width=10),
|
|
1542
|
+
dbc.Col(
|
|
1543
|
+
get_button_with_tooltip(
|
|
1544
|
+
"✖",
|
|
1545
|
+
id="close-image-btn",
|
|
1546
|
+
tooltip_text="Remove plot",
|
|
1547
|
+
style={
|
|
1548
|
+
"display": "none"
|
|
1549
|
+
},
|
|
1550
|
+
),
|
|
1551
|
+
width=2,
|
|
1552
|
+
),
|
|
1553
|
+
],
|
|
1554
|
+
),
|
|
1555
|
+
dbc.Row(
|
|
1556
|
+
html.Div(
|
|
1557
|
+
id="image-plot",
|
|
1558
|
+
style={"display": "none"},
|
|
1559
|
+
)
|
|
1560
|
+
),
|
|
1561
|
+
]
|
|
1562
|
+
),
|
|
1563
|
+
]
|
|
1564
|
+
),
|
|
1526
1565
|
dbc.Row(
|
|
1527
1566
|
[
|
|
1528
1567
|
dbc.Row(
|
|
@@ -1594,14 +1633,6 @@ class GeoExplorer:
|
|
|
1594
1633
|
"margin-right": "0px",
|
|
1595
1634
|
},
|
|
1596
1635
|
),
|
|
1597
|
-
dbc.Row(
|
|
1598
|
-
get_button_with_tooltip(
|
|
1599
|
-
"Reload categories",
|
|
1600
|
-
id="reload-categories",
|
|
1601
|
-
n_clicks=0,
|
|
1602
|
-
tooltip_text="Get back categories that have been X-ed out",
|
|
1603
|
-
),
|
|
1604
|
-
),
|
|
1605
1636
|
dbc.Row(
|
|
1606
1637
|
id="colorpicker-container",
|
|
1607
1638
|
),
|
|
@@ -1674,7 +1705,7 @@ class GeoExplorer:
|
|
|
1674
1705
|
)
|
|
1675
1706
|
|
|
1676
1707
|
error_mess = "'data' must be a list of file paths or a dict of GeoDataFrames."
|
|
1677
|
-
|
|
1708
|
+
bbox_series_dict = {}
|
|
1678
1709
|
if isinstance(data, dict):
|
|
1679
1710
|
data = [data]
|
|
1680
1711
|
|
|
@@ -1687,6 +1718,12 @@ class GeoExplorer:
|
|
|
1687
1718
|
raise ValueError(error_mess)
|
|
1688
1719
|
for key, value in x.items():
|
|
1689
1720
|
key = _standardize_path(key)
|
|
1721
|
+
if isinstance(value, NetCDFConfig):
|
|
1722
|
+
# setting nc files as unchecked because they might be very large
|
|
1723
|
+
self.selected_files[key] = False
|
|
1724
|
+
self._queries[key] = value.code_block
|
|
1725
|
+
self._nc[key] = value
|
|
1726
|
+
continue
|
|
1690
1727
|
if value is not None and not isinstance(value, (GeoDataFrame | str)):
|
|
1691
1728
|
raise ValueError(error_mess)
|
|
1692
1729
|
elif not isinstance(value, GeoDataFrame):
|
|
@@ -1694,7 +1731,7 @@ class GeoExplorer:
|
|
|
1694
1731
|
self._queries[key] = value
|
|
1695
1732
|
continue
|
|
1696
1733
|
value, dtypes = _geopandas_to_polars(value, key)
|
|
1697
|
-
|
|
1734
|
+
bbox_series_dict[key] = shapely.box(
|
|
1698
1735
|
float(value["minx"].min()),
|
|
1699
1736
|
float(value["miny"].min()),
|
|
1700
1737
|
float(value["maxx"].max()),
|
|
@@ -1705,11 +1742,11 @@ class GeoExplorer:
|
|
|
1705
1742
|
self.selected_files[key] = True
|
|
1706
1743
|
|
|
1707
1744
|
self.selected_files = dict(reversed(self.selected_files.items()))
|
|
1708
|
-
self.
|
|
1745
|
+
self._bbox_series = GeoSeries(bbox_series_dict)
|
|
1709
1746
|
|
|
1710
1747
|
# storing bounds here before file paths are loaded. To avoid setting center as the entire map bounds if large data
|
|
1711
|
-
if len(self.
|
|
1712
|
-
minx, miny, maxx, maxy = self.
|
|
1748
|
+
if len(self._bbox_series):
|
|
1749
|
+
minx, miny, maxx, maxy = self._bbox_series.total_bounds
|
|
1713
1750
|
else:
|
|
1714
1751
|
minx, miny, maxx, maxy = None, None, None, None
|
|
1715
1752
|
|
|
@@ -1720,7 +1757,7 @@ class GeoExplorer:
|
|
|
1720
1757
|
self._register_callbacks()
|
|
1721
1758
|
return
|
|
1722
1759
|
|
|
1723
|
-
self.
|
|
1760
|
+
self._append_to_bbox_series(
|
|
1724
1761
|
[x for x in self.selected_files if x not in self._loaded_data]
|
|
1725
1762
|
)
|
|
1726
1763
|
|
|
@@ -1743,16 +1780,18 @@ class GeoExplorer:
|
|
|
1743
1780
|
if key not in self._loaded_data:
|
|
1744
1781
|
continue
|
|
1745
1782
|
df = self._loaded_data[key]
|
|
1783
|
+
self._max_unique_id_int += 1
|
|
1784
|
+
if isinstance(df, (Dataset | DataArray)):
|
|
1785
|
+
continue
|
|
1746
1786
|
loaded_data_sorted[key] = df.with_columns(
|
|
1747
1787
|
_unique_id=_get_unique_id(self._max_unique_id_int)
|
|
1748
|
-
)
|
|
1749
|
-
self._max_unique_id_int += 1
|
|
1788
|
+
).drop("id", errors="ignore")
|
|
1750
1789
|
else:
|
|
1751
1790
|
x = _standardize_path(x)
|
|
1752
1791
|
df = self._loaded_data[x]
|
|
1753
1792
|
loaded_data_sorted[x] = df.with_columns(
|
|
1754
1793
|
_unique_id=_get_unique_id(self._max_unique_id_int)
|
|
1755
|
-
)
|
|
1794
|
+
).drop("id", errors="ignore")
|
|
1756
1795
|
self._max_unique_id_int += 1
|
|
1757
1796
|
|
|
1758
1797
|
self._loaded_data = loaded_data_sorted
|
|
@@ -1773,11 +1812,11 @@ class GeoExplorer:
|
|
|
1773
1812
|
|
|
1774
1813
|
self.app.layout = get_layout
|
|
1775
1814
|
|
|
1776
|
-
for unique_id in selected_features if selected_features is not None else []:
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1815
|
+
# for unique_id in selected_features if selected_features is not None else []:
|
|
1816
|
+
# i = int(float(unique_id))
|
|
1817
|
+
# path = list(self._loaded_data)[i]
|
|
1818
|
+
# properties, _ = self._get_selected_feature(unique_id, path, bounds=None)
|
|
1819
|
+
# self._selected_features[unique_id] = properties
|
|
1781
1820
|
|
|
1782
1821
|
self._register_callbacks()
|
|
1783
1822
|
|
|
@@ -1810,7 +1849,7 @@ class GeoExplorer:
|
|
|
1810
1849
|
os.kill(os.getpid(), signal.SIGTERM)
|
|
1811
1850
|
finally:
|
|
1812
1851
|
print("\nExiting with configs:")
|
|
1813
|
-
print(self.
|
|
1852
|
+
print(self._get_self_as_string_except_defaults())
|
|
1814
1853
|
|
|
1815
1854
|
def _register_callbacks(self) -> None:
|
|
1816
1855
|
|
|
@@ -1842,12 +1881,12 @@ class GeoExplorer:
|
|
|
1842
1881
|
):
|
|
1843
1882
|
print(k, v)
|
|
1844
1883
|
|
|
1845
|
-
txt = self.
|
|
1884
|
+
txt = self._get_self_as_string_except_defaults()
|
|
1846
1885
|
return html.Div(f"{txt}.run()"), True
|
|
1847
1886
|
|
|
1848
1887
|
@callback(
|
|
1849
1888
|
Output("buffer-tip", "children"),
|
|
1850
|
-
Input("alert", "children"),
|
|
1889
|
+
Input("add-data-alert", "children"),
|
|
1851
1890
|
State("map", "zoom"),
|
|
1852
1891
|
)
|
|
1853
1892
|
def maybe_tip_about_buffer(_, zoom):
|
|
@@ -1916,7 +1955,7 @@ class GeoExplorer:
|
|
|
1916
1955
|
if selected_path in self.selected_files or not n_clicks:
|
|
1917
1956
|
return dash.no_update
|
|
1918
1957
|
try:
|
|
1919
|
-
self.
|
|
1958
|
+
self._append_to_bbox_series([selected_path])
|
|
1920
1959
|
except Exception as e:
|
|
1921
1960
|
if DEBUG:
|
|
1922
1961
|
raise e
|
|
@@ -1926,6 +1965,20 @@ class GeoExplorer:
|
|
|
1926
1965
|
dismissable=True,
|
|
1927
1966
|
)
|
|
1928
1967
|
self.selected_files[selected_path] = True
|
|
1968
|
+
if is_netcdf(selected_path) or all(
|
|
1969
|
+
is_netcdf(x)
|
|
1970
|
+
for x in self._bbox_series[
|
|
1971
|
+
self._bbox_series.index.str.contains(selected_path)
|
|
1972
|
+
].index
|
|
1973
|
+
):
|
|
1974
|
+
self._nc[selected_path] = NetCDFConfig()
|
|
1975
|
+
elif is_raster_file(selected_path) or all(
|
|
1976
|
+
is_raster_file(x)
|
|
1977
|
+
for x in self._bbox_series[
|
|
1978
|
+
self._bbox_series.index.str.contains(selected_path)
|
|
1979
|
+
].index
|
|
1980
|
+
):
|
|
1981
|
+
self._nc[selected_path] = GeoTIFFConfig()
|
|
1929
1982
|
return None
|
|
1930
1983
|
|
|
1931
1984
|
@callback(
|
|
@@ -1948,6 +2001,8 @@ class GeoExplorer:
|
|
|
1948
2001
|
checked_clicks,
|
|
1949
2002
|
checked_ids,
|
|
1950
2003
|
):
|
|
2004
|
+
if not len(self._bbox_series):
|
|
2005
|
+
return dash.no_update, dash.no_update, dash.no_update
|
|
1951
2006
|
t = perf_counter()
|
|
1952
2007
|
|
|
1953
2008
|
triggered = dash.callback_context.triggered_id
|
|
@@ -1964,9 +2019,9 @@ class GeoExplorer:
|
|
|
1964
2019
|
|
|
1965
2020
|
if triggered != "missing":
|
|
1966
2021
|
box = shapely.box(*self._nested_bounds_to_bounds(bounds))
|
|
1967
|
-
files_in_bounds = set(sg.sfilter(self.
|
|
2022
|
+
files_in_bounds = set(sg.sfilter(self._bbox_series, box).index)
|
|
1968
2023
|
non_geodata = set(
|
|
1969
|
-
self.
|
|
2024
|
+
self._bbox_series[lambda x: (x.isna()) | (x.is_empty)].index
|
|
1970
2025
|
)
|
|
1971
2026
|
files_in_bounds |= non_geodata
|
|
1972
2027
|
|
|
@@ -1990,6 +2045,26 @@ class GeoExplorer:
|
|
|
1990
2045
|
disabled = True
|
|
1991
2046
|
return new_data_read, missing, disabled
|
|
1992
2047
|
|
|
2048
|
+
for selected_path in self.selected_files:
|
|
2049
|
+
for path in list(missing):
|
|
2050
|
+
if selected_path not in path or not any(
|
|
2051
|
+
path.lower().endswith(txt) for txt in [".tif", ".tiff"]
|
|
2052
|
+
):
|
|
2053
|
+
continue
|
|
2054
|
+
self._bbox_series = pd.concat(
|
|
2055
|
+
[
|
|
2056
|
+
GeoSeries(
|
|
2057
|
+
{
|
|
2058
|
+
path: shapely.box(
|
|
2059
|
+
*self._nc[selected_path].get_bounds(None, path)
|
|
2060
|
+
)
|
|
2061
|
+
}
|
|
2062
|
+
),
|
|
2063
|
+
self._bbox_series[lambda x: x.index != path],
|
|
2064
|
+
]
|
|
2065
|
+
)
|
|
2066
|
+
missing.pop(missing.index(path))
|
|
2067
|
+
|
|
1993
2068
|
if len(missing) > 10:
|
|
1994
2069
|
to_read = 0
|
|
1995
2070
|
cumsum = 0
|
|
@@ -2035,7 +2110,6 @@ class GeoExplorer:
|
|
|
2035
2110
|
self.splitted = not self.splitted
|
|
2036
2111
|
self.column = None if not self.splitted else self.column
|
|
2037
2112
|
if self.splitted:
|
|
2038
|
-
self._deleted_categories = set()
|
|
2039
2113
|
return self.splitted, "split_index"
|
|
2040
2114
|
return self.splitted, self.column
|
|
2041
2115
|
|
|
@@ -2401,30 +2475,6 @@ class GeoExplorer:
|
|
|
2401
2475
|
True,
|
|
2402
2476
|
)
|
|
2403
2477
|
|
|
2404
|
-
@callback(
|
|
2405
|
-
Output("file-deleted", "children", allow_duplicate=True),
|
|
2406
|
-
Output("alert3", "children", allow_duplicate=True),
|
|
2407
|
-
Output("update-table", "data", allow_duplicate=True),
|
|
2408
|
-
Output("color-container", "children", allow_duplicate=True),
|
|
2409
|
-
Input({"type": "delete-cat-btn", "index": dash.ALL}, "n_clicks"),
|
|
2410
|
-
State({"type": "delete-cat-btn", "index": dash.ALL}, "id"),
|
|
2411
|
-
prevent_initial_call=True,
|
|
2412
|
-
)
|
|
2413
|
-
@time_method_call(_PROFILE_DICT)
|
|
2414
|
-
def delete_category(n_clicks_list, delete_ids):
|
|
2415
|
-
path_to_delete = get_index_if_clicks(n_clicks_list, delete_ids)
|
|
2416
|
-
if path_to_delete is None:
|
|
2417
|
-
return dash.no_update, dash.no_update, dash.no_update, dash.no_update
|
|
2418
|
-
if not self.column:
|
|
2419
|
-
return (
|
|
2420
|
-
*self._delete_file(n_clicks_list, delete_ids, delete_category=True),
|
|
2421
|
-
True,
|
|
2422
|
-
dash.no_update,
|
|
2423
|
-
)
|
|
2424
|
-
else:
|
|
2425
|
-
self._deleted_categories.add(path_to_delete)
|
|
2426
|
-
return None, None, True, dash.no_update
|
|
2427
|
-
|
|
2428
2478
|
@callback(
|
|
2429
2479
|
Output({"type": "query-view", "index": dash.MATCH}, "children"),
|
|
2430
2480
|
Output({"type": "query-view", "index": dash.MATCH}, "is_open"),
|
|
@@ -2602,17 +2652,6 @@ class GeoExplorer:
|
|
|
2602
2652
|
True,
|
|
2603
2653
|
)
|
|
2604
2654
|
|
|
2605
|
-
@callback(
|
|
2606
|
-
Output("file-deleted", "children", allow_duplicate=True),
|
|
2607
|
-
Input("reload-categories", "n_clicks"),
|
|
2608
|
-
prevent_initial_call=True,
|
|
2609
|
-
)
|
|
2610
|
-
def reload_categories(n_clicks):
|
|
2611
|
-
if not n_clicks:
|
|
2612
|
-
return dash.no_update
|
|
2613
|
-
self._deleted_categories = set()
|
|
2614
|
-
return None
|
|
2615
|
-
|
|
2616
2655
|
@callback(
|
|
2617
2656
|
Output("splitter", "style"),
|
|
2618
2657
|
Input("is_splitted", "data"),
|
|
@@ -2623,7 +2662,6 @@ class GeoExplorer:
|
|
|
2623
2662
|
if column is None:
|
|
2624
2663
|
self.column = None
|
|
2625
2664
|
self.splitted = False
|
|
2626
|
-
self._deleted_categories = set()
|
|
2627
2665
|
if self.splitted and column == "split_index":
|
|
2628
2666
|
return _clicked_button_style()
|
|
2629
2667
|
else:
|
|
@@ -2788,11 +2826,9 @@ class GeoExplorer:
|
|
|
2788
2826
|
if not self.selected_files:
|
|
2789
2827
|
self.column = None
|
|
2790
2828
|
self.color_dict = {}
|
|
2791
|
-
self._deleted_categories = set()
|
|
2792
2829
|
return html.Div(), None, False, None, 1
|
|
2793
2830
|
elif column != self.column or triggered in ["force-categorical"]:
|
|
2794
2831
|
self.color_dict = {}
|
|
2795
|
-
self._deleted_categories = set()
|
|
2796
2832
|
elif not column and triggered is None:
|
|
2797
2833
|
column = self.column
|
|
2798
2834
|
elif self._concatted_data is None:
|
|
@@ -2885,29 +2921,9 @@ class GeoExplorer:
|
|
|
2885
2921
|
)
|
|
2886
2922
|
|
|
2887
2923
|
if is_numeric and len(values_no_nans):
|
|
2888
|
-
|
|
2889
|
-
|
|
2890
|
-
|
|
2891
|
-
bins = jenks_breaks(values_no_nans.to_numpy(), n_classes=k)
|
|
2892
|
-
|
|
2893
|
-
cmap_ = matplotlib.colormaps.get_cmap(cmap)
|
|
2894
|
-
colors_ = [
|
|
2895
|
-
matplotlib.colors.to_hex(cmap_(int(i)))
|
|
2896
|
-
for i in np.linspace(0, 255, num=k + 1)
|
|
2897
|
-
]
|
|
2898
|
-
rounded_bins = [round(x, 1) for x in bins]
|
|
2899
|
-
color_dict = {
|
|
2900
|
-
f"{round(min(values_no_nans), 1)} - {rounded_bins[0]}": colors_[0],
|
|
2901
|
-
**{
|
|
2902
|
-
f"{start} - {stop}": colors_[i + 1]
|
|
2903
|
-
for i, (start, stop) in enumerate(
|
|
2904
|
-
itertools.pairwise(rounded_bins[1:])
|
|
2905
|
-
)
|
|
2906
|
-
},
|
|
2907
|
-
f"{rounded_bins[-1]} - {round(max(values_no_nans), 1)}": colors_[
|
|
2908
|
-
-1
|
|
2909
|
-
],
|
|
2910
|
-
}
|
|
2924
|
+
color_dict, bins = get_numeric_colors(
|
|
2925
|
+
values_no_nans_unique, values_no_nans, cmap, k
|
|
2926
|
+
)
|
|
2911
2927
|
else:
|
|
2912
2928
|
new_values = [
|
|
2913
2929
|
value
|
|
@@ -2961,7 +2977,7 @@ class GeoExplorer:
|
|
|
2961
2977
|
|
|
2962
2978
|
@callback(
|
|
2963
2979
|
Output("loading", "children", allow_duplicate=True),
|
|
2964
|
-
Input("alert", "children"),
|
|
2980
|
+
Input("add-data-alert", "children"),
|
|
2965
2981
|
prevent_initial_call=True,
|
|
2966
2982
|
)
|
|
2967
2983
|
def update_loading(_):
|
|
@@ -2971,7 +2987,7 @@ class GeoExplorer:
|
|
|
2971
2987
|
|
|
2972
2988
|
@callback(
|
|
2973
2989
|
Output("lc", "children"),
|
|
2974
|
-
Output("alert", "children"),
|
|
2990
|
+
Output("add-data-alert", "children"),
|
|
2975
2991
|
Output("max_rows", "children"),
|
|
2976
2992
|
Output({"type": "wms-list", "index": dash.ALL}, "children"),
|
|
2977
2993
|
Input("colors-are-updated", "data"),
|
|
@@ -2981,14 +2997,14 @@ class GeoExplorer:
|
|
|
2981
2997
|
Input("wms-added", "data"),
|
|
2982
2998
|
Input("max_rows_value", "value"),
|
|
2983
2999
|
Input("file-control-panel", "children"),
|
|
2984
|
-
Input("
|
|
3000
|
+
Input("opacity", "value"),
|
|
2985
3001
|
Input({"type": "checked-btn", "index": dash.ALL}, "style"),
|
|
2986
3002
|
Input(
|
|
2987
3003
|
{"type": "checked-btn-wms", "wms_name": dash.ALL, "tile": dash.ALL},
|
|
2988
3004
|
"style",
|
|
2989
3005
|
),
|
|
2990
3006
|
State("debounced_bounds", "value"),
|
|
2991
|
-
State("column-dropdown", "value"),
|
|
3007
|
+
# State("column-dropdown", "value"),
|
|
2992
3008
|
State("bins", "data"),
|
|
2993
3009
|
)
|
|
2994
3010
|
@time_method_call(_PROFILE_DICT)
|
|
@@ -3002,21 +3018,22 @@ class GeoExplorer:
|
|
|
3002
3018
|
max_rows_value,
|
|
3003
3019
|
# data_was_changed,
|
|
3004
3020
|
order_was_changed,
|
|
3005
|
-
|
|
3021
|
+
opacity,
|
|
3006
3022
|
checked_clicks,
|
|
3007
3023
|
checked_wms_clicks,
|
|
3008
3024
|
bounds,
|
|
3009
|
-
column,
|
|
3025
|
+
# column,
|
|
3010
3026
|
bins,
|
|
3011
3027
|
):
|
|
3012
3028
|
triggered = dash.callback_context.triggered_id
|
|
3013
|
-
|
|
3029
|
+
print(
|
|
3014
3030
|
"\nadd_data",
|
|
3015
3031
|
dash.callback_context.triggered_id,
|
|
3016
3032
|
len(self._loaded_data),
|
|
3017
3033
|
f"{self.column=}",
|
|
3018
3034
|
)
|
|
3019
3035
|
t = perf_counter()
|
|
3036
|
+
alerts = []
|
|
3020
3037
|
|
|
3021
3038
|
if max_rows_value is not None:
|
|
3022
3039
|
self.max_rows = max_rows_value
|
|
@@ -3047,11 +3064,11 @@ class GeoExplorer:
|
|
|
3047
3064
|
concatted_data=self._concatted_data,
|
|
3048
3065
|
nan_color=self.nan_color,
|
|
3049
3066
|
nan_label=self.nan_label,
|
|
3050
|
-
column=column,
|
|
3067
|
+
column=self.column,
|
|
3051
3068
|
is_numeric=is_numeric,
|
|
3052
3069
|
color_dict=color_dict,
|
|
3053
3070
|
bins=bins,
|
|
3054
|
-
|
|
3071
|
+
opacity=opacity,
|
|
3055
3072
|
n_rows_per_path=n_rows_per_path,
|
|
3056
3073
|
columns=self._columns,
|
|
3057
3074
|
current_columns=current_columns,
|
|
@@ -3071,19 +3088,101 @@ class GeoExplorer:
|
|
|
3071
3088
|
else:
|
|
3072
3089
|
max_rows_component = _get_max_rows_displayed_component(self.max_rows)
|
|
3073
3090
|
|
|
3091
|
+
bbox = shapely.box(*bounds)
|
|
3092
|
+
images = {}
|
|
3093
|
+
self._current_xarrays = {}
|
|
3094
|
+
for selected_path, is_checked in self.selected_files.items():
|
|
3095
|
+
if not is_checked:
|
|
3096
|
+
continue
|
|
3097
|
+
for img_path in set(self._loaded_data_sizes).union(
|
|
3098
|
+
set(self._loaded_data)
|
|
3099
|
+
):
|
|
3100
|
+
if selected_path not in img_path or not is_raster_file(img_path):
|
|
3101
|
+
continue
|
|
3102
|
+
if img_path not in set(self._bbox_series.index) or pd.notna(
|
|
3103
|
+
self._bbox_series.loc[img_path]
|
|
3104
|
+
):
|
|
3105
|
+
_read_files(self, [img_path], mask=bbox)
|
|
3106
|
+
img_bbox = self._bbox_series.loc[img_path]
|
|
3107
|
+
clipped_bounds = img_bbox.intersection(bbox)
|
|
3108
|
+
if clipped_bounds.is_empty:
|
|
3109
|
+
continue
|
|
3110
|
+
try:
|
|
3111
|
+
ds = self._open_img_path_as_xarray(
|
|
3112
|
+
img_path, selected_path, clipped_bounds
|
|
3113
|
+
)
|
|
3114
|
+
except Exception as e:
|
|
3115
|
+
traceback.print_exc()
|
|
3116
|
+
alerts.append(
|
|
3117
|
+
dbc.Alert(
|
|
3118
|
+
f"{type(e).__name__}: {e}. (Traceback printed in terminal)",
|
|
3119
|
+
color="warning",
|
|
3120
|
+
)
|
|
3121
|
+
)
|
|
3122
|
+
continue
|
|
3123
|
+
if ds is None:
|
|
3124
|
+
continue
|
|
3125
|
+
self._current_xarrays[img_path] = ds
|
|
3126
|
+
arr = self._nc[selected_path].to_numpy(ds)
|
|
3127
|
+
arr = fix_numpy_img_shape(arr)
|
|
3128
|
+
if np.isnan(arr).any() and not np.all(np.isnan(arr)):
|
|
3129
|
+
arr[np.isnan(arr)] = np.min(arr[~np.isnan(arr)])
|
|
3130
|
+
|
|
3131
|
+
images[img_path] = (arr, clipped_bounds)
|
|
3132
|
+
|
|
3133
|
+
if images:
|
|
3134
|
+
# make sure all single-band images are normalized by same extremities
|
|
3135
|
+
vmin = np.min([np.min(x[0]) for x in images.values()])
|
|
3136
|
+
vmax = np.min([np.max(x[0]) for x in images.values()])
|
|
3137
|
+
|
|
3138
|
+
image_overlays = []
|
|
3139
|
+
for img_path, (arr, bounds) in images.items():
|
|
3140
|
+
# skip normalization for rgb images
|
|
3141
|
+
if len(arr.shape) == 2:
|
|
3142
|
+
arr = (arr - vmin) / (vmax - vmin)
|
|
3143
|
+
|
|
3144
|
+
minx, miny, maxx, maxy = bounds.bounds
|
|
3145
|
+
|
|
3146
|
+
# hack: using folium because dash_leaflet doesn't accept np.array
|
|
3147
|
+
image_overlay = folium.raster_layers.ImageOverlay(
|
|
3148
|
+
arr,
|
|
3149
|
+
bounds=[[miny, minx], [maxy, maxx]],
|
|
3150
|
+
vmin=vmin,
|
|
3151
|
+
vmax=vmax,
|
|
3152
|
+
)
|
|
3153
|
+
img_name = Path(img_path).stem
|
|
3154
|
+
image_overlay = dl.ImageOverlay(
|
|
3155
|
+
url=image_overlay.url,
|
|
3156
|
+
bounds=[[miny, minx], [maxy, maxx]],
|
|
3157
|
+
opacity=self.opacity,
|
|
3158
|
+
interactive=True,
|
|
3159
|
+
id={"type": "image", "index": img_path},
|
|
3160
|
+
)
|
|
3161
|
+
image_overlay = dl.Overlay(
|
|
3162
|
+
image_overlay,
|
|
3163
|
+
name=img_name,
|
|
3164
|
+
checked=True,
|
|
3165
|
+
)
|
|
3166
|
+
image_overlays.append(image_overlay)
|
|
3167
|
+
|
|
3074
3168
|
return (
|
|
3075
|
-
dl.LayersControl(
|
|
3076
|
-
|
|
3169
|
+
dl.LayersControl(
|
|
3170
|
+
list(self._base_layers.values())
|
|
3171
|
+
+ wms_layers
|
|
3172
|
+
+ data
|
|
3173
|
+
+ image_overlays
|
|
3174
|
+
),
|
|
3175
|
+
alerts,
|
|
3077
3176
|
max_rows_component,
|
|
3078
3177
|
all_tiles_lists,
|
|
3079
3178
|
)
|
|
3080
3179
|
|
|
3081
3180
|
@callback(
|
|
3082
|
-
Input("
|
|
3181
|
+
Input("opacity", "value"),
|
|
3083
3182
|
prevent_initial_call=True,
|
|
3084
3183
|
)
|
|
3085
|
-
def
|
|
3086
|
-
self.
|
|
3184
|
+
def update_opacity(opacity):
|
|
3185
|
+
self.opacity = opacity
|
|
3087
3186
|
|
|
3088
3187
|
@callback(
|
|
3089
3188
|
Output("clicked-features-title", "children"),
|
|
@@ -3108,6 +3207,69 @@ class GeoExplorer:
|
|
|
3108
3207
|
" (note that for partitioned files, only partitions in bounds are loaded)",
|
|
3109
3208
|
)
|
|
3110
3209
|
|
|
3210
|
+
@callback(
|
|
3211
|
+
Output("image-plot", "children", allow_duplicate=True),
|
|
3212
|
+
Output("image-plot", "style", allow_duplicate=True),
|
|
3213
|
+
Output("close-image-btn", "style", allow_duplicate=True),
|
|
3214
|
+
Input("add-data-alert", "children"),
|
|
3215
|
+
Input({"type": "image", "index": dash.ALL}, "clickData"),
|
|
3216
|
+
State({"type": "image", "index": dash.ALL}, "n_clicks"),
|
|
3217
|
+
State({"type": "image", "index": dash.ALL}, "id"),
|
|
3218
|
+
State("image-plot", "style"),
|
|
3219
|
+
prevent_initial_call=True,
|
|
3220
|
+
)
|
|
3221
|
+
def display_pixel_plot(data_added, latlng, n_clicks, ids, plot_style):
|
|
3222
|
+
img_path = get_index_if_clicks(n_clicks, ids)
|
|
3223
|
+
query_updated: bool = bool(
|
|
3224
|
+
(not plot_style)
|
|
3225
|
+
and (not latlng or not any(latlng))
|
|
3226
|
+
and self._px_plot_conf
|
|
3227
|
+
)
|
|
3228
|
+
if (not img_path or not latlng or not any(latlng)) and not query_updated:
|
|
3229
|
+
return dash.no_update, dash.no_update, dash.no_update
|
|
3230
|
+
elif query_updated:
|
|
3231
|
+
selected_path = self._px_plot_conf["selected_path"]
|
|
3232
|
+
img_path = self._px_plot_conf["img_path"]
|
|
3233
|
+
point_correct_crs = self._px_plot_conf["point_correct_crs"]
|
|
3234
|
+
else:
|
|
3235
|
+
selected_path = next(
|
|
3236
|
+
iter(x for x in self.selected_files if x in img_path)
|
|
3237
|
+
)
|
|
3238
|
+
i = [x["index"] for x in ids].index(img_path)
|
|
3239
|
+
latlng2 = latlng[i]["latlng"]
|
|
3240
|
+
lat, lng = latlng2["lat"], latlng2["lng"]
|
|
3241
|
+
point = Point(float(lng), float(lat))
|
|
3242
|
+
crs = self._nc[selected_path].get_crs(
|
|
3243
|
+
self._loaded_data[img_path], img_path
|
|
3244
|
+
)
|
|
3245
|
+
point_correct_crs = GeoSeries([point], crs=4326).to_crs(crs).union_all()
|
|
3246
|
+
self._px_plot_conf["selected_path"] = selected_path
|
|
3247
|
+
self._px_plot_conf["img_path"] = img_path
|
|
3248
|
+
self._px_plot_conf["point_correct_crs"] = point_correct_crs
|
|
3249
|
+
x, y = point_correct_crs.x, point_correct_crs.y
|
|
3250
|
+
try:
|
|
3251
|
+
ds = (
|
|
3252
|
+
self._current_xarrays[img_path]
|
|
3253
|
+
.copy()
|
|
3254
|
+
.sel(x=x, y=y, method="nearest")
|
|
3255
|
+
)
|
|
3256
|
+
except KeyError:
|
|
3257
|
+
# dataset is probably unchecked
|
|
3258
|
+
return None, {"display": "none"}, {"display": "none"}
|
|
3259
|
+
return dcc.Graph(figure=pixel_value_scatter(ds)), None, None
|
|
3260
|
+
|
|
3261
|
+
@callback(
|
|
3262
|
+
Output("image-plot", "children", allow_duplicate=True),
|
|
3263
|
+
Output("image-plot", "style", allow_duplicate=True),
|
|
3264
|
+
Output("close-image-btn", "style", allow_duplicate=True),
|
|
3265
|
+
Input("close-image-btn", "n_clicks"),
|
|
3266
|
+
prevent_initial_call=True,
|
|
3267
|
+
)
|
|
3268
|
+
def remove_pixel_plot(n_clicks):
|
|
3269
|
+
if n_clicks:
|
|
3270
|
+
return None, {"display": "none"}, {"display": "none"}
|
|
3271
|
+
return dash.no_update, dash.no_update, dash.no_update
|
|
3272
|
+
|
|
3111
3273
|
@callback(
|
|
3112
3274
|
Output("clicked-features", "data"),
|
|
3113
3275
|
Output("clicked-ids", "data"),
|
|
@@ -3135,18 +3297,18 @@ class GeoExplorer:
|
|
|
3135
3297
|
triggered = dash.callback_context.triggered_id
|
|
3136
3298
|
debug_print("display_clicked_feature_attributes", triggered)
|
|
3137
3299
|
if triggered == "clear-table-clicked":
|
|
3138
|
-
self.
|
|
3300
|
+
self._selected_features = {}
|
|
3139
3301
|
return [], [], None
|
|
3140
3302
|
if (
|
|
3141
3303
|
triggered is None
|
|
3142
3304
|
or triggered == "update-table"
|
|
3143
3305
|
or (
|
|
3144
|
-
(self.
|
|
3306
|
+
(self._selected_features and not features)
|
|
3145
3307
|
or all(x is None for x in features)
|
|
3146
3308
|
)
|
|
3147
3309
|
):
|
|
3148
|
-
clicked_ids = list(self.
|
|
3149
|
-
clicked_features = list(self.
|
|
3310
|
+
clicked_ids = list(self._selected_features)
|
|
3311
|
+
clicked_features = list(self._selected_features.values())
|
|
3150
3312
|
return clicked_features, clicked_ids, None
|
|
3151
3313
|
|
|
3152
3314
|
if not features or all(x is None for x in features):
|
|
@@ -3230,7 +3392,7 @@ class GeoExplorer:
|
|
|
3230
3392
|
if props["id"] not in clicked_ids:
|
|
3231
3393
|
clicked_features.append(props)
|
|
3232
3394
|
clicked_ids = [x["id"] for x in clicked_features]
|
|
3233
|
-
self.
|
|
3395
|
+
self._selected_features = dict(
|
|
3234
3396
|
zip(clicked_ids, clicked_features, strict=True)
|
|
3235
3397
|
)
|
|
3236
3398
|
return clicked_features, clicked_ids, None
|
|
@@ -3299,7 +3461,7 @@ class GeoExplorer:
|
|
|
3299
3461
|
self,
|
|
3300
3462
|
[
|
|
3301
3463
|
x
|
|
3302
|
-
for x in self.
|
|
3464
|
+
for x in self._bbox_series[
|
|
3303
3465
|
lambda x: x.index.str.contains(clicked_path)
|
|
3304
3466
|
].index
|
|
3305
3467
|
if x not in self._loaded_data
|
|
@@ -3705,33 +3867,32 @@ class GeoExplorer:
|
|
|
3705
3867
|
parts = Path(path2).parts
|
|
3706
3868
|
if not all(part in parts for part in Path(path).parts):
|
|
3707
3869
|
continue
|
|
3708
|
-
for idx in list(self.
|
|
3870
|
+
for idx in list(self._selected_features):
|
|
3709
3871
|
if int(float(idx)) == i:
|
|
3710
|
-
self.
|
|
3872
|
+
self._selected_features.pop(idx)
|
|
3711
3873
|
del self._loaded_data[path2]
|
|
3712
3874
|
deleted_files2.add(path2)
|
|
3713
3875
|
|
|
3714
3876
|
debug_print(f"{deleted_files2=}")
|
|
3715
|
-
self.
|
|
3716
|
-
|
|
3717
|
-
|
|
3718
|
-
|
|
3719
|
-
self.
|
|
3720
|
-
|
|
3721
|
-
|
|
3722
|
-
|
|
3723
|
-
|
|
3724
|
-
|
|
3725
|
-
|
|
3726
|
-
|
|
3727
|
-
|
|
3728
|
-
|
|
3729
|
-
|
|
3730
|
-
|
|
3731
|
-
|
|
3732
|
-
|
|
3733
|
-
|
|
3734
|
-
self.selected_features[new_idx] = feature
|
|
3877
|
+
self._bbox_series = self._bbox_series[lambda x: ~x.index.isin(deleted_files2)]
|
|
3878
|
+
|
|
3879
|
+
self._reset()
|
|
3880
|
+
# self._max_unique_id_int = -1
|
|
3881
|
+
# for path, df in self._loaded_data.items():
|
|
3882
|
+
# self._max_unique_id_int += 1
|
|
3883
|
+
# id_prev = df.select(pl.col("_unique_id").first()).collect().item()
|
|
3884
|
+
# self._loaded_data[path] = df.with_columns(
|
|
3885
|
+
# _unique_id=_get_unique_id(self._max_unique_id_int)
|
|
3886
|
+
# )
|
|
3887
|
+
# for idx in list(self._selected_features):
|
|
3888
|
+
# if idx[0] != id_prev[0]:
|
|
3889
|
+
# continue
|
|
3890
|
+
|
|
3891
|
+
# # rounding values to avoid floating point precicion problems
|
|
3892
|
+
# new_idx = f"{self._max_unique_id_int}.{idx[2:]}"
|
|
3893
|
+
# feature = self._selected_features.pop(idx)
|
|
3894
|
+
# feature["id"] = new_idx
|
|
3895
|
+
# self._selected_features[new_idx] = feature
|
|
3735
3896
|
|
|
3736
3897
|
return None, None
|
|
3737
3898
|
|
|
@@ -3744,7 +3905,7 @@ class GeoExplorer:
|
|
|
3744
3905
|
return (
|
|
3745
3906
|
sg.to_gdf(reversed(self.center), 4326)
|
|
3746
3907
|
.to_crs(3035)
|
|
3747
|
-
.buffer(
|
|
3908
|
+
.buffer(100_000 / (self.zoom**1.5))
|
|
3748
3909
|
.to_crs(4326)
|
|
3749
3910
|
.total_bounds
|
|
3750
3911
|
)
|
|
@@ -3862,9 +4023,23 @@ class GeoExplorer:
|
|
|
3862
4023
|
name = _get_stem_from_parent(path)
|
|
3863
4024
|
return name
|
|
3864
4025
|
|
|
4026
|
+
def _open_img_path_as_xarray(self, img_path, selected_path, clipped_bounds):
|
|
4027
|
+
if is_netcdf(img_path):
|
|
4028
|
+
return self._nc[selected_path].filter_ds(
|
|
4029
|
+
ds=self._loaded_data[img_path],
|
|
4030
|
+
bounds=clipped_bounds.bounds,
|
|
4031
|
+
code_block=self._queries.get(selected_path),
|
|
4032
|
+
)
|
|
4033
|
+
else:
|
|
4034
|
+
return rasterio_to_xarray(
|
|
4035
|
+
img_path, clipped_bounds, code_block=self._queries.get(selected_path)
|
|
4036
|
+
)
|
|
4037
|
+
|
|
3865
4038
|
@property
|
|
3866
4039
|
def _columns(self) -> dict[str, set[str]]:
|
|
3867
|
-
return {path: set(dtypes) for path, dtypes in self._dtypes.items()}
|
|
4040
|
+
return {path: set(dtypes) for path, dtypes in self._dtypes.items()} | {
|
|
4041
|
+
path: {"value"} for path in self._nc
|
|
4042
|
+
}
|
|
3868
4043
|
|
|
3869
4044
|
def _has_column(self, path: str, column: str) -> bool:
|
|
3870
4045
|
return bool(
|
|
@@ -3955,56 +4130,16 @@ class GeoExplorer:
|
|
|
3955
4130
|
for path in self.selected_files:
|
|
3956
4131
|
path_parts = Path(path).parts
|
|
3957
4132
|
for key in self._loaded_data:
|
|
3958
|
-
if paths and (path not in paths and key not in paths) or key in dfs:
|
|
4133
|
+
if (paths and (path not in paths and key not in paths)) or key in dfs:
|
|
3959
4134
|
continue
|
|
3960
4135
|
key_parts = Path(key).parts
|
|
3961
4136
|
if not all(part in key_parts for part in path_parts):
|
|
3962
4137
|
continue
|
|
3963
4138
|
df = self._loaded_data[key]
|
|
4139
|
+
if isinstance(df, (Dataset | DataArray)):
|
|
4140
|
+
continue
|
|
3964
4141
|
if bounds is not None:
|
|
3965
4142
|
df = filter_by_bounds(df, bounds)
|
|
3966
|
-
if (
|
|
3967
|
-
self._deleted_categories
|
|
3968
|
-
and self.column
|
|
3969
|
-
and not self._force_categorical
|
|
3970
|
-
and self._has_column(key, self.column)
|
|
3971
|
-
and self._get_dtype(key, self.column).is_numeric()
|
|
3972
|
-
):
|
|
3973
|
-
try:
|
|
3974
|
-
error_mess = "Cannot remove categories from numeric columns. Use an SQL query instead"
|
|
3975
|
-
# make sure we only give one warning
|
|
3976
|
-
assert not any(
|
|
3977
|
-
x.children == error_mess for x in alerts if x is not None
|
|
3978
|
-
)
|
|
3979
|
-
alerts.add(
|
|
3980
|
-
dbc.Alert(
|
|
3981
|
-
error_mess,
|
|
3982
|
-
color="warning",
|
|
3983
|
-
dismissable=True,
|
|
3984
|
-
duration=5_000,
|
|
3985
|
-
)
|
|
3986
|
-
)
|
|
3987
|
-
except AssertionError:
|
|
3988
|
-
pass
|
|
3989
|
-
elif self._deleted_categories and self.column in df:
|
|
3990
|
-
try:
|
|
3991
|
-
expression = (
|
|
3992
|
-
pl.col(self.column).is_in(list(self._deleted_categories))
|
|
3993
|
-
== False
|
|
3994
|
-
)
|
|
3995
|
-
except Exception as e:
|
|
3996
|
-
raise type(e)(
|
|
3997
|
-
f"{e}. {self.column=}, {self._deleted_categories=}"
|
|
3998
|
-
)
|
|
3999
|
-
if self.nan_label in self._deleted_categories:
|
|
4000
|
-
expression &= pl.col(self.column).is_not_null()
|
|
4001
|
-
df = df.filter(expression)
|
|
4002
|
-
elif (
|
|
4003
|
-
self.nan_label in self._deleted_categories and self.column not in df
|
|
4004
|
-
):
|
|
4005
|
-
if self.splitted:
|
|
4006
|
-
df = get_split_index(df)
|
|
4007
|
-
continue
|
|
4008
4143
|
if _filter and self._queries.get(path, None) is not None:
|
|
4009
4144
|
df, alert = self._filter_data(df, self._queries[path], key)
|
|
4010
4145
|
alerts.add(alert)
|
|
@@ -4189,7 +4324,7 @@ class GeoExplorer:
|
|
|
4189
4324
|
if isinstance(called, GeoDataFrame):
|
|
4190
4325
|
called, _ = _geopandas_to_polars(called, path)
|
|
4191
4326
|
called = called.with_columns(
|
|
4192
|
-
_unique_id=_get_unique_id(list(self._loaded_data).index(path))
|
|
4327
|
+
_unique_id=_get_unique_id(list(self._loaded_data).index(path) + 999)
|
|
4193
4328
|
).lazy()
|
|
4194
4329
|
return called
|
|
4195
4330
|
if isinstance(called, GeoSeries):
|
|
@@ -4201,7 +4336,7 @@ class GeoExplorer:
|
|
|
4201
4336
|
bounds,
|
|
4202
4337
|
path,
|
|
4203
4338
|
).with_columns(
|
|
4204
|
-
_unique_id=_get_unique_id(list(self._loaded_data).index(path))
|
|
4339
|
+
_unique_id=_get_unique_id(list(self._loaded_data).index(path) + 999)
|
|
4205
4340
|
)
|
|
4206
4341
|
return called
|
|
4207
4342
|
if isinstance(called, pd.DataFrame):
|
|
@@ -4425,14 +4560,14 @@ class GeoExplorer:
|
|
|
4425
4560
|
|
|
4426
4561
|
self.wms[wms_name] = constructor(**(current_kwargs | kwargs))
|
|
4427
4562
|
|
|
4428
|
-
def
|
|
4563
|
+
def _append_to_bbox_series(self, paths, recurse: bool = True) -> None:
|
|
4429
4564
|
try:
|
|
4430
4565
|
child_paths = self._get_child_paths(paths)
|
|
4431
4566
|
self._loaded_data_sizes |= child_paths
|
|
4432
4567
|
paths_with_meta, paths_without_meta = (
|
|
4433
4568
|
self._get_paths_with_and_without_metadata(list(child_paths))
|
|
4434
4569
|
)
|
|
4435
|
-
more_bounds =
|
|
4570
|
+
more_bounds = _get_bbox_series_as_4326(
|
|
4436
4571
|
paths_with_meta,
|
|
4437
4572
|
file_system=self.file_system,
|
|
4438
4573
|
)
|
|
@@ -4441,10 +4576,10 @@ class GeoExplorer:
|
|
|
4441
4576
|
raise e
|
|
4442
4577
|
# reload file system to avoid cached reading of files that don't exist any more
|
|
4443
4578
|
self.file_system = self.file_system.__class__()
|
|
4444
|
-
return self.
|
|
4445
|
-
self.
|
|
4579
|
+
return self._append_to_bbox_series(paths, recurse=False)
|
|
4580
|
+
self._bbox_series = pd.concat(
|
|
4446
4581
|
[
|
|
4447
|
-
self.
|
|
4582
|
+
self._bbox_series,
|
|
4448
4583
|
more_bounds,
|
|
4449
4584
|
pd.Series(
|
|
4450
4585
|
[None for _ in range(len(paths_without_meta))],
|
|
@@ -4461,6 +4596,8 @@ class GeoExplorer:
|
|
|
4461
4596
|
return child_paths
|
|
4462
4597
|
|
|
4463
4598
|
def _get_paths_and_sizes(self, path):
|
|
4599
|
+
if path.startswith("http://") or path.startswith("https://"):
|
|
4600
|
+
return {_standardize_path(path): 0}
|
|
4464
4601
|
suffix = Path(path).suffix
|
|
4465
4602
|
child_pattern = f"**/*{suffix}" if suffix else "**/*.*"
|
|
4466
4603
|
child_paths = {
|
|
@@ -4482,12 +4619,12 @@ class GeoExplorer:
|
|
|
4482
4619
|
for i in range(n):
|
|
4483
4620
|
new_path = path + f"{FILE_SPLITTER_TXT}{rows_to_read}-{i}"
|
|
4484
4621
|
out_paths[new_path] = size / n
|
|
4485
|
-
more_bounds.append(GeoSeries({new_path: self.
|
|
4622
|
+
more_bounds.append(GeoSeries({new_path: self._bbox_series.loc[path]}))
|
|
4486
4623
|
new_path = path + f"{FILE_SPLITTER_TXT}{rows_to_read}-{n}"
|
|
4487
4624
|
out_paths[new_path] = size / n
|
|
4488
|
-
more_bounds.append(GeoSeries({new_path: self.
|
|
4625
|
+
more_bounds.append(GeoSeries({new_path: self._bbox_series.loc[path]}))
|
|
4489
4626
|
|
|
4490
|
-
self.
|
|
4627
|
+
self._bbox_series = pd.concat([self._bbox_series] + more_bounds)
|
|
4491
4628
|
|
|
4492
4629
|
if len(out_paths) > 1:
|
|
4493
4630
|
out_paths.pop(_standardize_path(path), None)
|
|
@@ -4523,10 +4660,19 @@ class GeoExplorer:
|
|
|
4523
4660
|
data.pop("wms_layers_checked")
|
|
4524
4661
|
|
|
4525
4662
|
if self.selected_files:
|
|
4663
|
+
|
|
4664
|
+
def as_nc_config_if_nc(path, query):
|
|
4665
|
+
if self._nc.get(path):
|
|
4666
|
+
return self._nc[path].__class__(query)
|
|
4667
|
+
return query
|
|
4668
|
+
|
|
4526
4669
|
data = {
|
|
4527
4670
|
"data": {
|
|
4528
|
-
|
|
4529
|
-
|
|
4671
|
+
path: as_nc_config_if_nc(
|
|
4672
|
+
path, _unformat_query(self._queries.get(path, ""))
|
|
4673
|
+
)
|
|
4674
|
+
or None
|
|
4675
|
+
for path in reversed(data.pop("selected_files", []))
|
|
4530
4676
|
},
|
|
4531
4677
|
**data,
|
|
4532
4678
|
}
|
|
@@ -4539,7 +4685,7 @@ class GeoExplorer:
|
|
|
4539
4685
|
data["file_system"] = data["file_system"].__class__.__name__ + "()"
|
|
4540
4686
|
|
|
4541
4687
|
if "selected_features" in data:
|
|
4542
|
-
data["selected_features"] = list(self.
|
|
4688
|
+
data["selected_features"] = list(self._selected_features)
|
|
4543
4689
|
return data
|
|
4544
4690
|
|
|
4545
4691
|
def _get_self_as_string(self, data: dict[str, Any]) -> str:
|
|
@@ -4551,7 +4697,7 @@ class GeoExplorer:
|
|
|
4551
4697
|
txt = ", ".join(f"{k}={maybe_to_string(k, v)}" for k, v in data.items())
|
|
4552
4698
|
return f"{self.__class__.__name__}({txt})"
|
|
4553
4699
|
|
|
4554
|
-
def
|
|
4700
|
+
def _get_self_as_string_except_defaults(self):
|
|
4555
4701
|
data = self._get_self_as_dict()
|
|
4556
4702
|
defaults = inspect.getfullargspec(self.__class__).kwonlydefaults
|
|
4557
4703
|
data = {
|
|
@@ -4560,14 +4706,23 @@ class GeoExplorer:
|
|
|
4560
4706
|
return self._get_self_as_string(data)
|
|
4561
4707
|
|
|
4562
4708
|
def _reset(self):
|
|
4563
|
-
self._max_unique_id_int =
|
|
4709
|
+
self._max_unique_id_int = -1
|
|
4564
4710
|
for path, df in self._loaded_data.items():
|
|
4565
|
-
|
|
4711
|
+
self._max_unique_id_int += 1
|
|
4712
|
+
if df is None or isinstance(df, (Dataset | DataArray)):
|
|
4566
4713
|
continue
|
|
4567
4714
|
self._loaded_data[path] = df.with_columns(
|
|
4568
4715
|
_unique_id=_get_unique_id(self._max_unique_id_int)
|
|
4569
4716
|
)
|
|
4570
|
-
|
|
4717
|
+
|
|
4718
|
+
for idx, feature in dict(self._selected_features).items():
|
|
4719
|
+
if feature["__file_path"] != path:
|
|
4720
|
+
continue
|
|
4721
|
+
|
|
4722
|
+
new_idx = f"{self._max_unique_id_int}.{idx.split(".")[-1]}"
|
|
4723
|
+
self._selected_features.pop(idx)
|
|
4724
|
+
feature["id"] = new_idx
|
|
4725
|
+
self._selected_features[new_idx] = feature
|
|
4571
4726
|
|
|
4572
4727
|
def __str__(self) -> str:
|
|
4573
4728
|
"""String representation."""
|
|
@@ -4580,3 +4735,166 @@ class GeoExplorer:
|
|
|
4580
4735
|
pickle.dumps(value)
|
|
4581
4736
|
except pickle.PicklingError:
|
|
4582
4737
|
print(f"{variable_name} with value {value} is not pickable")
|
|
4738
|
+
|
|
4739
|
+
|
|
4740
|
+
def get_numeric_colors(values_no_nans_unique, values_no_nans, cmap, k):
|
|
4741
|
+
if len(values_no_nans_unique) <= k:
|
|
4742
|
+
bins = list(values_no_nans_unique)
|
|
4743
|
+
else:
|
|
4744
|
+
bins = jenks_breaks(values_no_nans.to_numpy(), n_classes=k)
|
|
4745
|
+
|
|
4746
|
+
cmap_ = matplotlib.colormaps.get_cmap(cmap)
|
|
4747
|
+
colors_ = [
|
|
4748
|
+
matplotlib.colors.to_hex(cmap_(int(i))) for i in np.linspace(0, 255, num=k + 1)
|
|
4749
|
+
]
|
|
4750
|
+
rounded_bins = [round(x, 1) for x in bins]
|
|
4751
|
+
color_dict = {
|
|
4752
|
+
f"{round(min(values_no_nans), 1)} - {rounded_bins[0]}": colors_[0],
|
|
4753
|
+
**{
|
|
4754
|
+
f"{start} - {stop}": colors_[i + 1]
|
|
4755
|
+
for i, (start, stop) in enumerate(itertools.pairwise(rounded_bins[1:]))
|
|
4756
|
+
},
|
|
4757
|
+
f"{rounded_bins[-1]} - {round(max(values_no_nans), 1)}": colors_[-1],
|
|
4758
|
+
}
|
|
4759
|
+
return color_dict, bins
|
|
4760
|
+
|
|
4761
|
+
|
|
4762
|
+
def rasterio_to_numpy(
|
|
4763
|
+
img_path, bbox, return_attrs: list[str] | None = None
|
|
4764
|
+
) -> np.ndarray | tuple[Any]:
|
|
4765
|
+
with rasterio.open(img_path) as src:
|
|
4766
|
+
bounds_in_img_crs = GeoSeries([bbox], crs=4326).to_crs(src.crs).total_bounds
|
|
4767
|
+
window = rasterio.windows.from_bounds(
|
|
4768
|
+
*bounds_in_img_crs, transform=src.transform
|
|
4769
|
+
)
|
|
4770
|
+
arr = src.read(window=window, boundless=False, masked=False)
|
|
4771
|
+
if not return_attrs:
|
|
4772
|
+
return arr
|
|
4773
|
+
return (arr, *[getattr(src, attr) for attr in return_attrs])
|
|
4774
|
+
|
|
4775
|
+
|
|
4776
|
+
def rasterio_to_xarray(img_path, bbox, code_block):
|
|
4777
|
+
import xarray as xr
|
|
4778
|
+
from rioxarray.rioxarray import _generate_spatial_coords
|
|
4779
|
+
|
|
4780
|
+
arr, crs, descriptions = rasterio_to_numpy(
|
|
4781
|
+
img_path, bbox, return_attrs=["crs", "descriptions"]
|
|
4782
|
+
)
|
|
4783
|
+
bounds_in_img_crs = GeoSeries([bbox], crs=4326).to_crs(crs).total_bounds
|
|
4784
|
+
|
|
4785
|
+
if not all(arr.shape):
|
|
4786
|
+
return xr.DataArray(
|
|
4787
|
+
arr,
|
|
4788
|
+
dims=["y", "x"],
|
|
4789
|
+
attrs={"crs": crs},
|
|
4790
|
+
)
|
|
4791
|
+
|
|
4792
|
+
if len(arr.shape) == 2:
|
|
4793
|
+
height, width = arr.shape
|
|
4794
|
+
elif len(arr.shape) == 3 and arr.shape[0] == 1:
|
|
4795
|
+
arr = arr[0]
|
|
4796
|
+
height, width = arr.shape
|
|
4797
|
+
elif len(arr.shape) == 3:
|
|
4798
|
+
height, width = arr.shape[1:]
|
|
4799
|
+
else:
|
|
4800
|
+
raise ValueError(arr.shape)
|
|
4801
|
+
|
|
4802
|
+
transform = rasterio.transform.from_bounds(*bounds_in_img_crs, width, height)
|
|
4803
|
+
coords = _generate_spatial_coords(transform, width, height)
|
|
4804
|
+
|
|
4805
|
+
if len(arr.shape) == 2:
|
|
4806
|
+
ds = xr.DataArray(
|
|
4807
|
+
arr,
|
|
4808
|
+
coords=coords,
|
|
4809
|
+
dims=["y", "x"],
|
|
4810
|
+
attrs={"crs": crs},
|
|
4811
|
+
)
|
|
4812
|
+
else:
|
|
4813
|
+
if len(descriptions) != arr.shape[0]:
|
|
4814
|
+
descriptions = range(arr.shape[0])
|
|
4815
|
+
ds = xr.Dataset(
|
|
4816
|
+
{
|
|
4817
|
+
desc: xr.DataArray(
|
|
4818
|
+
arr[i],
|
|
4819
|
+
coords=coords,
|
|
4820
|
+
dims=["y", "x"],
|
|
4821
|
+
attrs={"crs": crs},
|
|
4822
|
+
name=desc,
|
|
4823
|
+
)
|
|
4824
|
+
for i, desc in enumerate(descriptions)
|
|
4825
|
+
}
|
|
4826
|
+
)
|
|
4827
|
+
return _run_code_block(ds, code_block)
|
|
4828
|
+
|
|
4829
|
+
|
|
4830
|
+
def as_sized_array(arr: np.ndarray) -> np.ndarray:
|
|
4831
|
+
try:
|
|
4832
|
+
len(arr)
|
|
4833
|
+
return arr
|
|
4834
|
+
except TypeError:
|
|
4835
|
+
return np.array([arr])
|
|
4836
|
+
|
|
4837
|
+
|
|
4838
|
+
def pixel_value_scatter(ds: Dataset | DataArray):
|
|
4839
|
+
import plotly.express as px
|
|
4840
|
+
|
|
4841
|
+
try:
|
|
4842
|
+
xs = as_sized_array(ds["time"].values)
|
|
4843
|
+
assert isinstance(xs, np.ndarray)
|
|
4844
|
+
x = "time"
|
|
4845
|
+
except Exception:
|
|
4846
|
+
x = "index"
|
|
4847
|
+
|
|
4848
|
+
def get_df(ds, var):
|
|
4849
|
+
if isinstance(ds, DataArray):
|
|
4850
|
+
values = ds.values
|
|
4851
|
+
else:
|
|
4852
|
+
values = ds[var].values
|
|
4853
|
+
values = as_sized_array(values)
|
|
4854
|
+
if x == "index":
|
|
4855
|
+
these_xs = np.arange(len(values))
|
|
4856
|
+
else:
|
|
4857
|
+
these_xs = xs
|
|
4858
|
+
data = pd.DataFrame({"value": values, x: these_xs})
|
|
4859
|
+
data["color"] = var
|
|
4860
|
+
return data
|
|
4861
|
+
|
|
4862
|
+
if isinstance(ds, Dataset):
|
|
4863
|
+
data = []
|
|
4864
|
+
for var in ds.data_vars:
|
|
4865
|
+
try:
|
|
4866
|
+
data.append(get_df(ds, var))
|
|
4867
|
+
except Exception:
|
|
4868
|
+
pass
|
|
4869
|
+
data = pd.concat(data)
|
|
4870
|
+
elif isinstance(ds, DataArray):
|
|
4871
|
+
data = get_df(ds, ds.name)
|
|
4872
|
+
if len(data["color"].unique()) == 1 and data["color"].notna().all():
|
|
4873
|
+
colname = next(iter(data["color"]))
|
|
4874
|
+
data = data.rename(columns={"value": colname}, errors="raise")
|
|
4875
|
+
else:
|
|
4876
|
+
colname = "value"
|
|
4877
|
+
fig = px.scatter(data, y=colname, x=x, color="color")
|
|
4878
|
+
fig.update_traces(marker_size=10)
|
|
4879
|
+
fig.update_layout(margin=dict(l=0, r=0, t=30, b=20))
|
|
4880
|
+
return fig
|
|
4881
|
+
|
|
4882
|
+
|
|
4883
|
+
def fix_numpy_img_shape(arr: np.ndarray) -> np.ndarray:
|
|
4884
|
+
if len(arr.shape) == 2:
|
|
4885
|
+
return arr
|
|
4886
|
+
if len(arr.shape) == 3 and arr.shape[0] == 1:
|
|
4887
|
+
return arr[0]
|
|
4888
|
+
elif len(arr.shape) == 3 and arr.shape[0] == 3:
|
|
4889
|
+
# to 3d array in shape (x, y, 3)
|
|
4890
|
+
return np.transpose(arr, (1, 2, 0))
|
|
4891
|
+
else:
|
|
4892
|
+
raise ValueError("Only single band or 3-band (RGB) are supported")
|
|
4893
|
+
|
|
4894
|
+
|
|
4895
|
+
def is_raster_file(path: str) -> bool:
|
|
4896
|
+
return any(Path(path).suffix.lower().startswith(x) for x in [".tif", ".nc"])
|
|
4897
|
+
|
|
4898
|
+
|
|
4899
|
+
def is_netcdf(path: str) -> bool:
|
|
4900
|
+
return Path(path).suffix.lower().startswith(".nc")
|