geo-explorer 0.9.8__tar.gz → 0.9.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: geo-explorer
3
- Version: 0.9.8
3
+ Version: 0.9.10
4
4
  Summary: Explore geodata interactively.
5
5
  License: MIT
6
6
  Author: Morten Letnes
@@ -20,7 +20,7 @@ Requires-Dist: fsspec (>=2024.10.1)
20
20
  Requires-Dist: geopandas (>=0.14.0)
21
21
  Requires-Dist: jenkspy (>=0.3.2)
22
22
  Requires-Dist: matplotlib (>=3.7.0)
23
- Requires-Dist: msgspec (>=0.19.0,<0.20.0)
23
+ Requires-Dist: msgspec (>=0.19.0)
24
24
  Requires-Dist: numpy (>=1.26.4)
25
25
  Requires-Dist: pandas (>=2.2.1)
26
26
  Requires-Dist: polars (>=1.32.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "geo-explorer"
3
- version = "0.9.8"
3
+ version = "0.9.10"
4
4
  description = "Explore geodata interactively."
5
5
  authors = ["Morten Letnes <morten.letnes@ssb.no>"]
6
6
  license = "MIT"
@@ -36,7 +36,8 @@ fsspec = ">=2024.10.1"
36
36
  polars = ">=1.32.0"
37
37
  dash = ">=2.0.0"
38
38
  tzlocal = ">=5.3.1"
39
- msgspec = "^0.19.0"
39
+ msgspec = ">=0.19.0"
40
+ xarray = { version = ">=2024.3.0", optional = true }
40
41
 
41
42
  [tool.poetry.group.dev.dependencies]
42
43
  pygments = ">=2.10.0"
@@ -2,6 +2,7 @@ import datetime
2
2
  import time
3
3
  from concurrent.futures import ThreadPoolExecutor
4
4
  from pathlib import Path
5
+ from typing import ClassVar
5
6
 
6
7
  import dash
7
8
  import dash_bootstrap_components as dbc
@@ -21,6 +22,8 @@ from .utils import get_button_with_tooltip
21
22
 
22
23
 
23
24
  class FileBrowser:
25
+ file_formats: ClassVar[list[str]] = [".parquet", ".tif", ".tiff", ".nc"]
26
+
24
27
  def __init__(
25
28
  self,
26
29
  start_dir: str,
@@ -313,7 +316,7 @@ class FileBrowser:
313
316
  ]
314
317
  alert = None
315
318
  else:
316
- file_data_dict, file_list, alert = _list_dir(
319
+ file_data_dict, file_list, alert = self._list_dir(
317
320
  path, search_word, case_sensitive, recursive, self.file_system
318
321
  )
319
322
  if sum(sort_by_clicks):
@@ -347,143 +350,162 @@ class FileBrowser:
347
350
 
348
351
  return (file_data_dict, file_list, alert, sort_by_clicks, self._history[1:])
349
352
 
350
-
351
- def _list_dir(
352
- path: str, containing: str, case_sensitive: bool, recursive: bool, file_system
353
- ):
354
- containing = containing or ""
355
- containing = [txt.strip() for txt in containing.split(",") if txt.strip()]
356
- if (case_sensitive or 0) % 2 == 0:
357
-
358
- def _contains(path):
359
- if not containing:
360
- return True
361
- return all(
362
- any(
363
- txt.strip().lower() in path.lower()
364
- for txt in x.split("|")
365
- if txt.strip()
353
+ def _list_dir(
354
+ self,
355
+ path: str,
356
+ containing: str,
357
+ case_sensitive: bool,
358
+ recursive: bool,
359
+ file_system,
360
+ ):
361
+ containing = containing or ""
362
+ containing = [txt.strip() for txt in containing.split(",") if txt.strip()]
363
+ if (case_sensitive or 0) % 2 == 0:
364
+
365
+ def _contains(path):
366
+ if not containing:
367
+ return True
368
+ return all(
369
+ any(
370
+ txt.strip().lower() in path.lower()
371
+ for txt in x.split("|")
372
+ if txt.strip()
373
+ )
374
+ for x in containing
366
375
  )
367
- for x in containing
368
- )
369
376
 
370
- else:
377
+ else:
371
378
 
372
- def _contains(path):
373
- if not containing:
374
- return True
375
- return all(
376
- any(txt.strip() in path for txt in x.split("|") if txt.strip())
377
- for x in containing
378
- )
379
+ def _contains(path):
380
+ if not containing:
381
+ return True
382
+ return all(
383
+ any(txt.strip() in path for txt in x.split("|") if txt.strip())
384
+ for x in containing
385
+ )
379
386
 
380
- if (recursive or 0) % 2 == 0:
387
+ if (recursive or 0) % 2 == 0:
381
388
 
382
- def _ls(path):
383
- return file_system.ls(path, detail=True)
389
+ def _ls(path):
390
+ return file_system.ls(path, detail=True)
384
391
 
385
- else:
392
+ else:
386
393
 
387
- def _ls(path):
388
- path = str(Path(path) / "**")
389
- return _try_glob(path, file_system)
394
+ def _ls(path):
395
+ path = str(Path(path) / "**")
396
+ return _try_glob(path, file_system)
390
397
 
391
- try:
392
- paths = _ls(path)
393
- except Exception as e:
394
398
  try:
395
- paths = _try_glob(path, file_system)
396
- except Exception:
397
- return (
398
- [],
399
- [],
400
- dbc.Alert(
401
- f"Couldn't list files in {path}. {type(e)}: {e}",
402
- color="warning",
403
- dismissable=True,
404
- ),
405
- )
406
-
407
- if not paths:
408
- paths = _try_glob(path, file_system)
399
+ paths = _ls(path)
400
+ except Exception as e:
401
+ try:
402
+ paths = _try_glob(path, file_system)
403
+ except Exception:
404
+ return (
405
+ [],
406
+ [],
407
+ dbc.Alert(
408
+ f"Couldn't list files in {path}. {type(e)}: {e}",
409
+ color="warning",
410
+ dismissable=True,
411
+ ),
412
+ )
409
413
 
410
- if isinstance(paths, dict):
411
- paths = list(paths.values())
414
+ if not paths:
415
+ paths = _try_glob(path, file_system)
412
416
 
413
- def is_dir_or_is_partitioned_parquet(x) -> bool:
414
- return x["type"] == "directory" or any(
415
- x["name"].endswith(txt) for txt in [".parquet"]
416
- )
417
+ if isinstance(paths, dict):
418
+ paths = list(paths.values())
417
419
 
418
- paths = [
419
- x
420
- for x in paths
421
- if isinstance(x, dict)
422
- and _contains(x["name"])
423
- and is_dir_or_is_partitioned_parquet(x)
424
- and Path(path).parts != Path(x["name"]).parts
425
- ]
426
-
427
- paths.sort(key=lambda x: x["name"])
428
- isdir_list = [x["type"] == "directory" for x in paths]
429
-
430
- partitioned = {
431
- i: x
432
- for i, x in enumerate(paths)
433
- if x["type"] == "directory"
434
- and any(
435
- str(x).endswith(".parquet") for x in (x["name"], *Path(x["name"]).parents)
436
- )
437
- }
420
+ def is_dir_or_is_partitioned_parquet(x) -> bool:
421
+ return x["type"] == "directory" or any(
422
+ x["name"].endswith(txt) for txt in self.file_formats
423
+ )
438
424
 
439
- def get_summed_size_and_latest_timestamp_in_subdirs(
440
- x,
441
- ) -> tuple[float, datetime.datetime]:
442
- file_info = _try_glob(str(Path(x["name"]) / "**/*.parquet"), file_system)
425
+ paths = [
426
+ x
427
+ for x in paths
428
+ if isinstance(x, dict)
429
+ and _contains(x["name"])
430
+ and is_dir_or_is_partitioned_parquet(x)
431
+ and Path(path).parts != Path(x["name"]).parts
432
+ ]
443
433
 
444
- if isinstance(file_info, dict):
445
- file_info = list(file_info.values())
434
+ paths.sort(key=lambda x: x["name"])
435
+ isdir_list = [x["type"] == "directory" for x in paths]
446
436
 
447
- file_info = [
448
- x for x in file_info if isinstance(x, dict) and x["type"] != "directory"
449
- ]
450
- if not file_info:
451
- return 0, str(datetime.datetime.fromtimestamp(0))
452
- return sum(x["size"] for x in file_info), max(x["updated"] for x in file_info)
453
-
454
- with ThreadPoolExecutor() as executor:
455
- summed_size_ant_time = list(
456
- executor.map(
457
- get_summed_size_and_latest_timestamp_in_subdirs, partitioned.values()
437
+ partitioned = {
438
+ i: x
439
+ for i, x in enumerate(paths)
440
+ if x["type"] == "directory"
441
+ and any(
442
+ str(x).endswith(".parquet")
443
+ for x in (x["name"], *Path(x["name"]).parents)
444
+ )
445
+ }
446
+
447
+ def get_summed_size_and_latest_timestamp_in_subdirs(
448
+ x,
449
+ ) -> tuple[float, datetime.datetime]:
450
+ file_info = _try_glob(str(Path(x["name"]) / "**/*.parquet"), file_system)
451
+
452
+ if isinstance(file_info, dict):
453
+ file_info = list(file_info.values())
454
+
455
+ file_info = [
456
+ x for x in file_info if isinstance(x, dict) and x["type"] != "directory"
457
+ ]
458
+ if not file_info:
459
+ return 0, str(datetime.datetime.fromtimestamp(0))
460
+ return sum(x["size"] for x in file_info), max(
461
+ x["updated"] for x in file_info
458
462
  )
459
- )
460
- for i, (size, timestamp) in zip(partitioned, summed_size_ant_time, strict=True):
461
- paths[i]["size"] = size
462
- paths[i]["updated"] = timestamp
463
463
 
464
- return (
465
- paths,
466
- [
467
- _get_file_list_row(
468
- x["name"], x.get("updated", None), x["size"], isdir, path, file_system
464
+ with ThreadPoolExecutor() as executor:
465
+ summed_size_ant_time = list(
466
+ executor.map(
467
+ get_summed_size_and_latest_timestamp_in_subdirs,
468
+ partitioned.values(),
469
+ )
469
470
  )
470
- for x, isdir in zip(paths, isdir_list, strict=True)
471
- if isinstance(x, dict)
472
- ],
473
- None,
474
- )
471
+ for i, (size, timestamp) in zip(
472
+ partitioned, summed_size_ant_time, strict=True
473
+ ):
474
+ paths[i]["size"] = size
475
+ paths[i]["updated"] = timestamp
476
+
477
+ return (
478
+ paths,
479
+ [
480
+ _get_file_list_row(
481
+ x["name"],
482
+ x.get("updated", None),
483
+ x["size"],
484
+ isdir,
485
+ path,
486
+ self.file_formats,
487
+ file_system,
488
+ )
489
+ for x, isdir in zip(paths, isdir_list, strict=True)
490
+ if isinstance(x, dict)
491
+ ],
492
+ None,
493
+ )
475
494
 
476
495
 
477
- def _get_file_list_row(path, timestamp, size, isdir: bool, current_path, file_system):
496
+ def _get_file_list_row(
497
+ path, timestamp, size, isdir: bool, current_path, file_formats, file_system
498
+ ):
478
499
  path = _standardize_path(path)
479
500
  timestamp = str(timestamp)[:19]
480
501
  mb = str(round(size / 1_000_000, 2))
481
- is_loadable = not isdir or (
482
- path.endswith(".parquet")
502
+ is_loadable = not isdir or any(
503
+ path.endswith(file_format)
483
504
  or all(
484
- x.endswith(".parquet") or _standardize_path(x) == path
505
+ x.endswith(file_format) or _standardize_path(x) == path
485
506
  for x in file_system.ls(path)
486
507
  )
508
+ for file_format in file_formats
487
509
  )
488
510
  if is_loadable:
489
511
  button = html.Button(