geo-explorer 0.9.8__tar.gz → 0.9.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/PKG-INFO +2 -2
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/pyproject.toml +3 -2
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/src/geo_explorer/file_browser.py +134 -112
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/src/geo_explorer/geo_explorer.py +630 -312
- geo_explorer-0.9.9/src/geo_explorer/nc.py +199 -0
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/src/geo_explorer/utils.py +82 -33
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/LICENSE +0 -0
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/LICENSE.md +0 -0
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/README.md +0 -0
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/src/geo_explorer/__init__.py +0 -0
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/src/geo_explorer/assets/chroma.min.js +0 -0
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/src/geo_explorer/assets/on_each_feature.js +0 -0
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/src/geo_explorer/assets/stylesheet.css +0 -0
- {geo_explorer-0.9.8 → geo_explorer-0.9.9}/src/geo_explorer/fs.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: geo-explorer
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.9
|
|
4
4
|
Summary: Explore geodata interactively.
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Morten Letnes
|
|
@@ -20,7 +20,7 @@ Requires-Dist: fsspec (>=2024.10.1)
|
|
|
20
20
|
Requires-Dist: geopandas (>=0.14.0)
|
|
21
21
|
Requires-Dist: jenkspy (>=0.3.2)
|
|
22
22
|
Requires-Dist: matplotlib (>=3.7.0)
|
|
23
|
-
Requires-Dist: msgspec (>=0.19.0
|
|
23
|
+
Requires-Dist: msgspec (>=0.19.0)
|
|
24
24
|
Requires-Dist: numpy (>=1.26.4)
|
|
25
25
|
Requires-Dist: pandas (>=2.2.1)
|
|
26
26
|
Requires-Dist: polars (>=1.32.0)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "geo-explorer"
|
|
3
|
-
version = "0.9.
|
|
3
|
+
version = "0.9.9"
|
|
4
4
|
description = "Explore geodata interactively."
|
|
5
5
|
authors = ["Morten Letnes <morten.letnes@ssb.no>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -36,7 +36,8 @@ fsspec = ">=2024.10.1"
|
|
|
36
36
|
polars = ">=1.32.0"
|
|
37
37
|
dash = ">=2.0.0"
|
|
38
38
|
tzlocal = ">=5.3.1"
|
|
39
|
-
msgspec = "
|
|
39
|
+
msgspec = ">=0.19.0"
|
|
40
|
+
xarray = { version = ">=2024.3.0", optional = true }
|
|
40
41
|
|
|
41
42
|
[tool.poetry.group.dev.dependencies]
|
|
42
43
|
pygments = ">=2.10.0"
|
|
@@ -2,6 +2,7 @@ import datetime
|
|
|
2
2
|
import time
|
|
3
3
|
from concurrent.futures import ThreadPoolExecutor
|
|
4
4
|
from pathlib import Path
|
|
5
|
+
from typing import ClassVar
|
|
5
6
|
|
|
6
7
|
import dash
|
|
7
8
|
import dash_bootstrap_components as dbc
|
|
@@ -21,6 +22,8 @@ from .utils import get_button_with_tooltip
|
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
class FileBrowser:
|
|
25
|
+
file_formats: ClassVar[list[str]] = [".parquet", ".tif", ".tiff", ".nc"]
|
|
26
|
+
|
|
24
27
|
def __init__(
|
|
25
28
|
self,
|
|
26
29
|
start_dir: str,
|
|
@@ -313,7 +316,7 @@ class FileBrowser:
|
|
|
313
316
|
]
|
|
314
317
|
alert = None
|
|
315
318
|
else:
|
|
316
|
-
file_data_dict, file_list, alert = _list_dir(
|
|
319
|
+
file_data_dict, file_list, alert = self._list_dir(
|
|
317
320
|
path, search_word, case_sensitive, recursive, self.file_system
|
|
318
321
|
)
|
|
319
322
|
if sum(sort_by_clicks):
|
|
@@ -347,143 +350,162 @@ class FileBrowser:
|
|
|
347
350
|
|
|
348
351
|
return (file_data_dict, file_list, alert, sort_by_clicks, self._history[1:])
|
|
349
352
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
353
|
+
def _list_dir(
|
|
354
|
+
self,
|
|
355
|
+
path: str,
|
|
356
|
+
containing: str,
|
|
357
|
+
case_sensitive: bool,
|
|
358
|
+
recursive: bool,
|
|
359
|
+
file_system,
|
|
360
|
+
):
|
|
361
|
+
containing = containing or ""
|
|
362
|
+
containing = [txt.strip() for txt in containing.split(",") if txt.strip()]
|
|
363
|
+
if (case_sensitive or 0) % 2 == 0:
|
|
364
|
+
|
|
365
|
+
def _contains(path):
|
|
366
|
+
if not containing:
|
|
367
|
+
return True
|
|
368
|
+
return all(
|
|
369
|
+
any(
|
|
370
|
+
txt.strip().lower() in path.lower()
|
|
371
|
+
for txt in x.split("|")
|
|
372
|
+
if txt.strip()
|
|
373
|
+
)
|
|
374
|
+
for x in containing
|
|
366
375
|
)
|
|
367
|
-
for x in containing
|
|
368
|
-
)
|
|
369
376
|
|
|
370
|
-
|
|
377
|
+
else:
|
|
371
378
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
+
def _contains(path):
|
|
380
|
+
if not containing:
|
|
381
|
+
return True
|
|
382
|
+
return all(
|
|
383
|
+
any(txt.strip() in path for txt in x.split("|") if txt.strip())
|
|
384
|
+
for x in containing
|
|
385
|
+
)
|
|
379
386
|
|
|
380
|
-
|
|
387
|
+
if (recursive or 0) % 2 == 0:
|
|
381
388
|
|
|
382
|
-
|
|
383
|
-
|
|
389
|
+
def _ls(path):
|
|
390
|
+
return file_system.ls(path, detail=True)
|
|
384
391
|
|
|
385
|
-
|
|
392
|
+
else:
|
|
386
393
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
394
|
+
def _ls(path):
|
|
395
|
+
path = str(Path(path) / "**")
|
|
396
|
+
return _try_glob(path, file_system)
|
|
390
397
|
|
|
391
|
-
try:
|
|
392
|
-
paths = _ls(path)
|
|
393
|
-
except Exception as e:
|
|
394
398
|
try:
|
|
395
|
-
paths =
|
|
396
|
-
except Exception:
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
399
|
+
paths = _ls(path)
|
|
400
|
+
except Exception as e:
|
|
401
|
+
try:
|
|
402
|
+
paths = _try_glob(path, file_system)
|
|
403
|
+
except Exception:
|
|
404
|
+
return (
|
|
405
|
+
[],
|
|
406
|
+
[],
|
|
407
|
+
dbc.Alert(
|
|
408
|
+
f"Couldn't list files in {path}. {type(e)}: {e}",
|
|
409
|
+
color="warning",
|
|
410
|
+
dismissable=True,
|
|
411
|
+
),
|
|
412
|
+
)
|
|
409
413
|
|
|
410
|
-
|
|
411
|
-
|
|
414
|
+
if not paths:
|
|
415
|
+
paths = _try_glob(path, file_system)
|
|
412
416
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
x["name"].endswith(txt) for txt in [".parquet"]
|
|
416
|
-
)
|
|
417
|
+
if isinstance(paths, dict):
|
|
418
|
+
paths = list(paths.values())
|
|
417
419
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
and _contains(x["name"])
|
|
423
|
-
and is_dir_or_is_partitioned_parquet(x)
|
|
424
|
-
and Path(path).parts != Path(x["name"]).parts
|
|
425
|
-
]
|
|
426
|
-
|
|
427
|
-
paths.sort(key=lambda x: x["name"])
|
|
428
|
-
isdir_list = [x["type"] == "directory" for x in paths]
|
|
429
|
-
|
|
430
|
-
partitioned = {
|
|
431
|
-
i: x
|
|
432
|
-
for i, x in enumerate(paths)
|
|
433
|
-
if x["type"] == "directory"
|
|
434
|
-
and any(
|
|
435
|
-
str(x).endswith(".parquet") for x in (x["name"], *Path(x["name"]).parents)
|
|
436
|
-
)
|
|
437
|
-
}
|
|
420
|
+
def is_dir_or_is_partitioned_parquet(x) -> bool:
|
|
421
|
+
return x["type"] == "directory" or any(
|
|
422
|
+
x["name"].endswith(txt) for txt in self.file_formats
|
|
423
|
+
)
|
|
438
424
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
425
|
+
paths = [
|
|
426
|
+
x
|
|
427
|
+
for x in paths
|
|
428
|
+
if isinstance(x, dict)
|
|
429
|
+
and _contains(x["name"])
|
|
430
|
+
and is_dir_or_is_partitioned_parquet(x)
|
|
431
|
+
and Path(path).parts != Path(x["name"]).parts
|
|
432
|
+
]
|
|
443
433
|
|
|
444
|
-
|
|
445
|
-
|
|
434
|
+
paths.sort(key=lambda x: x["name"])
|
|
435
|
+
isdir_list = [x["type"] == "directory" for x in paths]
|
|
446
436
|
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
437
|
+
partitioned = {
|
|
438
|
+
i: x
|
|
439
|
+
for i, x in enumerate(paths)
|
|
440
|
+
if x["type"] == "directory"
|
|
441
|
+
and any(
|
|
442
|
+
str(x).endswith(".parquet")
|
|
443
|
+
for x in (x["name"], *Path(x["name"]).parents)
|
|
444
|
+
)
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
def get_summed_size_and_latest_timestamp_in_subdirs(
|
|
448
|
+
x,
|
|
449
|
+
) -> tuple[float, datetime.datetime]:
|
|
450
|
+
file_info = _try_glob(str(Path(x["name"]) / "**/*.parquet"), file_system)
|
|
451
|
+
|
|
452
|
+
if isinstance(file_info, dict):
|
|
453
|
+
file_info = list(file_info.values())
|
|
454
|
+
|
|
455
|
+
file_info = [
|
|
456
|
+
x for x in file_info if isinstance(x, dict) and x["type"] != "directory"
|
|
457
|
+
]
|
|
458
|
+
if not file_info:
|
|
459
|
+
return 0, str(datetime.datetime.fromtimestamp(0))
|
|
460
|
+
return sum(x["size"] for x in file_info), max(
|
|
461
|
+
x["updated"] for x in file_info
|
|
458
462
|
)
|
|
459
|
-
)
|
|
460
|
-
for i, (size, timestamp) in zip(partitioned, summed_size_ant_time, strict=True):
|
|
461
|
-
paths[i]["size"] = size
|
|
462
|
-
paths[i]["updated"] = timestamp
|
|
463
463
|
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
464
|
+
with ThreadPoolExecutor() as executor:
|
|
465
|
+
summed_size_ant_time = list(
|
|
466
|
+
executor.map(
|
|
467
|
+
get_summed_size_and_latest_timestamp_in_subdirs,
|
|
468
|
+
partitioned.values(),
|
|
469
|
+
)
|
|
469
470
|
)
|
|
470
|
-
for
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
471
|
+
for i, (size, timestamp) in zip(
|
|
472
|
+
partitioned, summed_size_ant_time, strict=True
|
|
473
|
+
):
|
|
474
|
+
paths[i]["size"] = size
|
|
475
|
+
paths[i]["updated"] = timestamp
|
|
476
|
+
|
|
477
|
+
return (
|
|
478
|
+
paths,
|
|
479
|
+
[
|
|
480
|
+
_get_file_list_row(
|
|
481
|
+
x["name"],
|
|
482
|
+
x.get("updated", None),
|
|
483
|
+
x["size"],
|
|
484
|
+
isdir,
|
|
485
|
+
path,
|
|
486
|
+
self.file_formats,
|
|
487
|
+
file_system,
|
|
488
|
+
)
|
|
489
|
+
for x, isdir in zip(paths, isdir_list, strict=True)
|
|
490
|
+
if isinstance(x, dict)
|
|
491
|
+
],
|
|
492
|
+
None,
|
|
493
|
+
)
|
|
475
494
|
|
|
476
495
|
|
|
477
|
-
def _get_file_list_row(
|
|
496
|
+
def _get_file_list_row(
|
|
497
|
+
path, timestamp, size, isdir: bool, current_path, file_formats, file_system
|
|
498
|
+
):
|
|
478
499
|
path = _standardize_path(path)
|
|
479
500
|
timestamp = str(timestamp)[:19]
|
|
480
501
|
mb = str(round(size / 1_000_000, 2))
|
|
481
|
-
is_loadable = not isdir or (
|
|
482
|
-
path.endswith(
|
|
502
|
+
is_loadable = not isdir or any(
|
|
503
|
+
path.endswith(file_format)
|
|
483
504
|
or all(
|
|
484
|
-
x.endswith(
|
|
505
|
+
x.endswith(file_format) or _standardize_path(x) == path
|
|
485
506
|
for x in file_system.ls(path)
|
|
486
507
|
)
|
|
508
|
+
for file_format in file_formats
|
|
487
509
|
)
|
|
488
510
|
if is_loadable:
|
|
489
511
|
button = html.Button(
|