agi-page-geospatial-map 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: agi-page-geospatial-map
3
+ Version: 0.1.0
4
+ Summary: AGILAB page bundle for 2D geospatial map exploration.
5
+ Requires-Python: >=3.11
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: agi-gui<2027.0,>=2026.05.12.post3
8
+ Requires-Dist: agi-node<2027.0,>=2026.05.12.post3
9
+ Requires-Dist: plotly>=6.3.0
10
+ Requires-Dist: sqlalchemy>=2.0.43
11
+
12
+ AGILAB page bundle for 2D geospatial map exploration.
@@ -0,0 +1,20 @@
1
+ [project]
2
+ name = "agi-page-geospatial-map"
3
+ version = "0.1.0"
4
+ description = "AGILAB page bundle for 2D geospatial map exploration."
5
+ readme = { text = "AGILAB page bundle for 2D geospatial map exploration.", content-type = "text/markdown" }
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "agi-gui>=2026.05.12.post3,<2027.0",
9
+ "agi-node>=2026.05.12.post3,<2027.0",
10
+ "plotly>=6.3.0",
11
+ "sqlalchemy>=2.0.43",
12
+ ]
13
+
14
+ [project.entry-points."agilab.pages"]
15
+ view_maps = "view_maps:bundle_root"
16
+
17
+ [tool.uv.sources]
18
+ agi-gui = { path = "../../lib/agi-gui", editable = true }
19
+ agi-env = { path = "../../core/agi-env", editable = true }
20
+ agi-node = { path = "../../core/agi-node", editable = true }
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: agi-page-geospatial-map
3
+ Version: 0.1.0
4
+ Summary: AGILAB page bundle for 2D geospatial map exploration.
5
+ Requires-Python: >=3.11
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: agi-gui<2027.0,>=2026.05.12.post3
8
+ Requires-Dist: agi-node<2027.0,>=2026.05.12.post3
9
+ Requires-Dist: plotly>=6.3.0
10
+ Requires-Dist: sqlalchemy>=2.0.43
11
+
12
+ AGILAB page bundle for 2D geospatial map exploration.
@@ -0,0 +1,10 @@
1
+ pyproject.toml
2
+ src/agi_page_geospatial_map.egg-info/PKG-INFO
3
+ src/agi_page_geospatial_map.egg-info/SOURCES.txt
4
+ src/agi_page_geospatial_map.egg-info/dependency_links.txt
5
+ src/agi_page_geospatial_map.egg-info/entry_points.txt
6
+ src/agi_page_geospatial_map.egg-info/requires.txt
7
+ src/agi_page_geospatial_map.egg-info/top_level.txt
8
+ src/view_maps/__init__.py
9
+ src/view_maps/maps.py
10
+ src/view_maps/view_maps.py
@@ -0,0 +1,2 @@
1
+ [agilab.pages]
2
+ view_maps = view_maps:bundle_root
@@ -0,0 +1,4 @@
1
+ agi-gui<2027.0,>=2026.05.12.post3
2
+ agi-node<2027.0,>=2026.05.12.post3
3
+ plotly>=6.3.0
4
+ sqlalchemy>=2.0.43
@@ -0,0 +1,7 @@
1
+ from pathlib import Path
2
+
3
+
4
+ def bundle_root() -> Path:
5
+ """Return the installed root for this AGILAB analysis page bundle."""
6
+
7
+ return Path(__file__).resolve().parent
@@ -0,0 +1,14 @@
1
+ """Support module for the cartography Streamlit page."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ try:
9
+ from .view_maps import * # type: ignore # noqa: F401,F403
10
+ except ImportError: # pragma: no cover
11
+ _HERE = Path(__file__).resolve().parent
12
+ if str(_HERE) not in sys.path:
13
+ sys.path.insert(0, str(_HERE))
14
+ from view_maps import * # type: ignore # noqa: F401,F403
@@ -0,0 +1,935 @@
1
+ # BSD 3-Clause License
2
+ #
3
+ # Copyright (c) 2025, Jean-Pierre Morard, THALES SIX GTS France SAS
4
+ # All rights reserved.
5
+ #
6
+ # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10
+ # 3. Neither the name of Jean-Pierre Morard nor the names of its contributors, or THALES SIX GTS France SAS, may be used to endorse or promote products derived from this software without specific prior written permission.
11
+ #
12
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13
+
14
+ import argparse
15
+ import math
16
+ import os
17
+ from pathlib import Path
18
+ import re
19
+ import sys
20
+
21
+ import pandas as pd
22
+ from pandas.api.types import is_integer_dtype, is_numeric_dtype
23
+ import plotly.express as px
24
+ import plotly.graph_objects as go
25
+ import streamlit as st
26
+ import tomllib as _toml
27
+ from agi_env.app_settings_support import prepare_app_settings_for_write
28
+
29
+ try:
30
+ import tomli_w as _toml_writer # type: ignore[import-not-found]
31
+
32
+ def _dump_toml_payload(data: dict, handle) -> None:
33
+ _toml_writer.dump(data, handle)
34
+
35
+ except ModuleNotFoundError: # pragma: no cover - fallback for lightweight envs
36
+ try:
37
+ from tomlkit import dumps as _tomlkit_dumps
38
+
39
+ def _dump_toml_payload(data: dict, handle) -> None:
40
+ handle.write(_tomlkit_dumps(data).encode("utf-8"))
41
+
42
+ except ImportError as _toml_exc:
43
+
44
+ def _dump_toml_payload(data: dict, handle) -> None:
45
+ raise RuntimeError(
46
+ "Writing settings requires the 'tomli-w' or 'tomlkit' package"
47
+ ) from _toml_exc
48
+
49
+
50
+ def _ensure_repo_on_path() -> None:
51
+ here = Path(__file__).resolve()
52
+ for parent in here.parents:
53
+ candidate = parent / "agilab"
54
+ if candidate.is_dir():
55
+ src_root = candidate.parent
56
+ repo_root = src_root.parent
57
+ for entry in (str(src_root), str(repo_root)):
58
+ if entry not in sys.path:
59
+ sys.path.insert(0, entry)
60
+ break
61
+
62
+
63
+ _ensure_repo_on_path()
64
+
65
+ def _default_app() -> Path | None:
66
+ apps_path = Path(__file__).resolve().parents[4] / "apps"
67
+ if not apps_path.exists():
68
+ return None
69
+ for candidate in sorted(apps_path.iterdir()):
70
+ if (
71
+ candidate.is_dir()
72
+ and candidate.name.endswith("_project")
73
+ and not candidate.name.startswith(".")
74
+ ):
75
+ return candidate
76
+ return None
77
+
78
+
79
+ from agi_env import AgiEnv
80
+ from agi_gui.pagelib import find_files, load_df, update_datadir, initialize_csv_files
81
+
82
+ var = ["discrete", "continuous", "lat", "long"]
83
+ var_default = [0, None]
84
+ DATASET_EXTENSIONS = (".csv", ".parquet", ".json")
85
+ FILE_TYPE_OPTIONS = ("csv", "parquet", "json", "all")
86
+ DF_SELECTION_MODES = ("Single file", "Multi-select", "Regex (multi)")
87
+ PAGE_KEY_PREFIX = "view_maps"
88
+
89
+
90
+ def _vm_key(name: str) -> str:
91
+ return f"{PAGE_KEY_PREFIX}:{name}"
92
+
93
+
94
+ def _discover_dataset_files(datadir: Path, ext_choice: str) -> list[Path]:
95
+ files: list[Path] = []
96
+ extensions = DATASET_EXTENSIONS if ext_choice == "all" else (f".{ext_choice}",)
97
+ for ext in extensions:
98
+ files.extend(find_files(datadir, ext=ext))
99
+ return files
100
+
101
+
102
+ def _visible_dataset_files(datadir: Path, files: list[Path]) -> list[Path]:
103
+ visible_files: list[Path] = []
104
+ for file_path in files:
105
+ try:
106
+ parts = file_path.relative_to(datadir).parts
107
+ except (OSError, RuntimeError, ValueError):
108
+ parts = file_path.parts
109
+ if any(part.startswith(".") for part in parts):
110
+ continue
111
+ visible_files.append(file_path)
112
+ return visible_files
113
+
114
+ st.title(":world_map: Cartography Visualization")
115
+
116
+
117
+ def continuous():
118
+ """Set coltype to 'continuous'."""
119
+ st.session_state["coltype"] = "continuous"
120
+
121
+
122
+ def discrete():
123
+ """Set coltype to 'discrete'."""
124
+ st.session_state["coltype"] = "discrete"
125
+
126
+ # Default to 'discrete'
127
+
128
+
129
+ def downsample_df_deterministic(df: pd.DataFrame, ratio: int) -> pd.DataFrame:
130
+ """
131
+ Return a new DataFrame containing every `ratio`-th row from the original df.
132
+
133
+ Parameters
134
+ ----------
135
+ df : pd.DataFrame
136
+ The original DataFrame to down-sample.
137
+ ratio : int
138
+ Keep one row every `ratio` rows. E.g. ratio=20 → rows 0, 20, 40, …
139
+
140
+ Returns
141
+ -------
142
+ pd.DataFrame
143
+ The down-sampled DataFrame, re-indexed from 0.
144
+ """
145
+ if ratio <= 0:
146
+ raise ValueError("`ratio` must be a positive integer.")
147
+ # Ensure a clean integer index before slicing
148
+ df_reset = df.reset_index(drop=True)
149
+ # Take every ratio-th row
150
+ sampled = df_reset.iloc[::ratio].copy()
151
+ # Reset index for the result
152
+ return sampled.reset_index(drop=True)
153
+
154
+
155
+ def _compute_zoom_from_span(span_deg: float) -> float:
156
+ """Approximate a map zoom level based on the largest lat/lon span."""
157
+ thresholds = [
158
+ (160, 1),
159
+ (80, 2),
160
+ (40, 3),
161
+ (20, 4),
162
+ (10, 5),
163
+ (5, 6),
164
+ (2.5, 7),
165
+ (1.2, 8),
166
+ (0.6, 9),
167
+ (0.3, 10),
168
+ (0.15, 11),
169
+ (0.075, 12),
170
+ (0.035, 13),
171
+ (0.018, 14),
172
+ ]
173
+ for threshold, zoom in thresholds:
174
+ if span_deg > threshold:
175
+ return zoom
176
+ return 15
177
+
178
+
179
+ def _compute_viewport(df: pd.DataFrame, lat_col: str, lon_col: str) -> dict[str, float] | None:
180
+ """Return center/zoom settings that fit the current dataset."""
181
+ try:
182
+ latitudes = pd.to_numeric(df[lat_col], errors="coerce").dropna()
183
+ longitudes = pd.to_numeric(df[lon_col], errors="coerce").dropna()
184
+ except (KeyError, RuntimeError, ValueError):
185
+ return None
186
+ if latitudes.empty or longitudes.empty:
187
+ return None
188
+ lat_min, lat_max = latitudes.min(), latitudes.max()
189
+ lon_min, lon_max = longitudes.min(), longitudes.max()
190
+ center_lat = float((lat_min + lat_max) / 2)
191
+ center_lon = float((lon_min + lon_max) / 2)
192
+ span_lat = abs(lat_max - lat_min)
193
+ span_lon = abs(lon_max - lon_min)
194
+ span = max(span_lat, span_lon)
195
+ zoom = _compute_zoom_from_span(span if span > 0 else 0.01)
196
+ return {"center_lat": center_lat, "center_lon": center_lon, "default_zoom": zoom}
197
+
198
+
199
+ def _load_map_defaults(env: AgiEnv) -> dict[str, float]:
200
+ """Read custom map settings from app_settings.toml when available."""
201
+
202
+ try:
203
+ with open(env.app_settings_file, "rb") as fh:
204
+ data = _toml.load(fh)
205
+ except FileNotFoundError:
206
+ data = {}
207
+ map_cfg = data.get("ui", {}).get(
208
+ "map",
209
+ {"center_lat": 0.0, "center_lon": 0.0, "default_zoom": 2.5},
210
+ )
211
+ return {
212
+ "center_lat": float(map_cfg.get("center_lat", 0.0)),
213
+ "center_lon": float(map_cfg.get("center_lon", 0.0)),
214
+ "default_zoom": float(map_cfg.get("default_zoom", 2.5)),
215
+ }
216
+
217
+
218
+ def _load_view_maps_settings(env: AgiEnv) -> tuple[dict, dict]:
219
+ """Return the full TOML payload and the view_maps subsection."""
220
+ try:
221
+ with open(env.app_settings_file, "rb") as fh:
222
+ data = _toml.load(fh)
223
+ except FileNotFoundError:
224
+ data = {}
225
+ except (OSError, _toml.TOMLDecodeError):
226
+ data = {}
227
+ view_section = data.get("view_maps")
228
+ if not isinstance(view_section, dict):
229
+ view_section = {}
230
+ return data, view_section
231
+
232
+
233
+ def _persist_view_maps_settings(env: AgiEnv, base_settings: dict, view_settings: dict) -> dict:
234
+ """Write the updated view_maps settings back to disk."""
235
+ payload = dict(base_settings) if isinstance(base_settings, dict) else {}
236
+ payload["view_maps"] = view_settings
237
+ try:
238
+ with open(env.app_settings_file, "wb") as fh:
239
+ _dump_toml_payload(prepare_app_settings_for_write(payload), fh)
240
+ except (OSError, RuntimeError):
241
+ pass
242
+ return payload
243
+
244
+
245
+ def page(env):
246
+ """
247
+ Page function for displaying and interacting with data in a Streamlit app.
248
+
249
+ This function sets up the page layout and functionality for displaying and interacting with data in a Streamlit app.
250
+
251
+ It handles the following key tasks:
252
+ - Setting up default values for session state variables related to the project, help path, and available projects.
253
+ - Checking and validating the data directory path, and displaying appropriate messages if it is invalid or not found.
254
+ - Loading and displaying the selected data file in a DataFrame.
255
+ - Allowing users to select columns for visualizations and customization options like color sequence and scale.
256
+ - Generating and displaying interactive scatter maps based on selected columns for latitude, longitude, and coloring.
257
+
258
+ No specific Args are passed to this function as it directly interacts with and manipulates the page layout and user inputs in a Streamlit app.
259
+
260
+ Returns:
261
+ None
262
+
263
+ Raises:
264
+ None
265
+ """
266
+
267
+ if "project" not in st.session_state:
268
+ st.session_state["project"] = env.target
269
+
270
+ if "projects" not in st.session_state:
271
+ st.session_state["projects"] = env.projects
272
+
273
+ full_settings, view_settings = _load_view_maps_settings(env)
274
+ for k in (
275
+ "df_files_selected",
276
+ "df_select_mode",
277
+ "df_file_regex",
278
+ "file_ext_choice",
279
+ "discrete",
280
+ "continuous",
281
+ "lat",
282
+ "long",
283
+ "coltype",
284
+ ):
285
+ if k in view_settings and k not in st.session_state:
286
+ st.session_state[k] = view_settings[k]
287
+
288
+ map_defaults_key = f"_view_maps_map_defaults_{env.app}"
289
+
290
+ # Resolve the data directory for the currently selected app
291
+ default_datadir = Path(env.AGILAB_EXPORT_ABS) / env.target
292
+ last_target_key = "_view_maps_last_target"
293
+ last_target = st.session_state.get(last_target_key)
294
+
295
+ current = st.session_state.get("datadir")
296
+ if (
297
+ last_target != env.target
298
+ or current is None
299
+ or str(current).strip() == ""
300
+ ):
301
+ current = str(view_settings.get("datadir") or default_datadir)
302
+ st.session_state["datadir"] = str(current)
303
+
304
+ st.session_state["datadir_str"] = st.session_state["datadir"]
305
+ st.session_state[last_target_key] = env.target
306
+ if (
307
+ map_defaults_key not in st.session_state
308
+ or last_target != env.target
309
+ ):
310
+ st.session_state[map_defaults_key] = _load_map_defaults(env)
311
+ datadir = Path(st.session_state["datadir"])
312
+ datadir_changed = st.session_state.get("_view_maps_last_datadir") != str(datadir)
313
+ st.session_state["_view_maps_last_datadir"] = str(datadir)
314
+ if view_settings.get("datadir") != st.session_state["datadir"]:
315
+ view_settings["datadir"] = st.session_state["datadir"]
316
+ full_settings = _persist_view_maps_settings(env, full_settings, view_settings)
317
+ datadir_widget_key = _vm_key("input_datadir")
318
+ if st.session_state.get(datadir_widget_key) != st.session_state["datadir"]:
319
+ st.session_state[datadir_widget_key] = st.session_state["datadir"]
320
+ # Data directory input
321
+ st.sidebar.text_input(
322
+ "Data Directory",
323
+ key=datadir_widget_key,
324
+ on_change=update_datadir,
325
+ args=("datadir", datadir_widget_key),
326
+ )
327
+
328
+ if not datadir.exists() or not datadir.is_dir():
329
+ st.sidebar.error("Directory not found.")
330
+ st.warning("A valid data directory is required to proceed.")
331
+ return # Stop further processing
332
+
333
+ file_ext_key = _vm_key("file_ext_choice")
334
+ ext_default = str(view_settings.get("file_ext_choice", "all")).lower()
335
+ if ext_default not in FILE_TYPE_OPTIONS:
336
+ ext_default = "all"
337
+ if st.session_state.get(file_ext_key) not in FILE_TYPE_OPTIONS:
338
+ st.session_state[file_ext_key] = ext_default
339
+ ext_choice = st.sidebar.selectbox(
340
+ "File type",
341
+ FILE_TYPE_OPTIONS,
342
+ key=file_ext_key,
343
+ )
344
+ st.session_state["file_ext_choice"] = ext_choice
345
+
346
+ # Find dataset files in the data directory
347
+ dataset_key = "dataset_files"
348
+ legacy_key = "csv_files"
349
+ if dataset_key not in st.session_state and legacy_key in st.session_state:
350
+ st.session_state[dataset_key] = st.session_state.pop(legacy_key)
351
+
352
+ try:
353
+ dataset_files = _discover_dataset_files(datadir, ext_choice=ext_choice)
354
+ except NotADirectoryError as exc:
355
+ st.warning(str(exc))
356
+ dataset_files = []
357
+ dataset_files = _visible_dataset_files(datadir, dataset_files)
358
+
359
+ st.session_state[dataset_key] = dataset_files
360
+ if not st.session_state[dataset_key]:
361
+ st.warning(
362
+ f"No dataset found in {datadir} (filter: {ext_choice}). "
363
+ "Use the EXECUTE → EXPORT workflow to materialize CSV/Parquet/JSON outputs first."
364
+ )
365
+ st.stop() # Stop further processing
366
+
367
+ # Prepare list of dataset files relative to the data directory
368
+ dataset_files_rel = sorted(
369
+ {
370
+ Path(file).relative_to(datadir).as_posix()
371
+ for file in st.session_state[dataset_key]
372
+ }
373
+ )
374
+
375
+ # Prefer the consolidated export file when present.
376
+ priority_files = [
377
+ candidate
378
+ for candidate in dataset_files_rel
379
+ if Path(candidate).name.lower() in {"export.csv", "export.parquet", "export.json"}
380
+ ]
381
+ settings_files = view_settings.get("df_files_selected") or []
382
+ if not settings_files:
383
+ legacy_setting = view_settings.get("df_file")
384
+ settings_files = [legacy_setting] if legacy_setting else []
385
+ if settings_files and all(item in dataset_files_rel for item in settings_files):
386
+ default_selection = settings_files
387
+ else:
388
+ default_selection = [priority_files[0]] if priority_files else (dataset_files_rel[:1] if dataset_files_rel else [])
389
+
390
+ selection_mode_key = _vm_key("df_select_mode")
391
+ mode_default = str(view_settings.get("df_select_mode", "Multi-select"))
392
+ if mode_default not in DF_SELECTION_MODES:
393
+ mode_default = "Multi-select"
394
+ if st.session_state.get(selection_mode_key) not in DF_SELECTION_MODES:
395
+ st.session_state[selection_mode_key] = mode_default
396
+ df_mode = st.sidebar.radio(
397
+ "Dataset selection",
398
+ options=DF_SELECTION_MODES,
399
+ key=selection_mode_key,
400
+ )
401
+ st.session_state["df_select_mode"] = df_mode
402
+
403
+ selection_key = _vm_key("df_files_selected")
404
+ if selection_key not in st.session_state:
405
+ legacy_selection = st.session_state.get("df_files_selected")
406
+ if isinstance(legacy_selection, list):
407
+ st.session_state[selection_key] = [item for item in legacy_selection if item in dataset_files_rel]
408
+ else:
409
+ st.session_state[selection_key] = []
410
+ current_selection = st.session_state.get(selection_key)
411
+ if not isinstance(current_selection, list):
412
+ current_selection = []
413
+ current_selection = [item for item in current_selection if item in dataset_files_rel]
414
+ if datadir_changed or (not current_selection and default_selection):
415
+ current_selection = default_selection
416
+ st.session_state[selection_key] = current_selection
417
+
418
+ single_file_key = _vm_key("df_file")
419
+ single_default = (
420
+ current_selection[0]
421
+ if current_selection
422
+ else (default_selection[0] if default_selection else "")
423
+ )
424
+ if st.session_state.get(single_file_key) not in dataset_files_rel:
425
+ st.session_state[single_file_key] = single_default
426
+
427
+ regex_key = _vm_key("df_file_regex")
428
+ if regex_key not in st.session_state:
429
+ st.session_state[regex_key] = str(view_settings.get("df_file_regex", ""))
430
+
431
+ selected_files: list[str] = []
432
+ if df_mode == "Single file":
433
+ st.sidebar.selectbox(
434
+ label="DataFrame",
435
+ options=dataset_files_rel,
436
+ key=single_file_key,
437
+ )
438
+ selected_single = st.session_state.get(single_file_key)
439
+ if selected_single:
440
+ selected_files = [selected_single]
441
+ elif df_mode == "Regex (multi)":
442
+ regex_raw = st.sidebar.text_input(
443
+ "DataFrame filename regex",
444
+ key=regex_key,
445
+ help="Python regex applied to the relative file path. Leave empty to match all files.",
446
+ ).strip()
447
+ regex_ok = True
448
+ pattern = None
449
+ if regex_raw:
450
+ try:
451
+ pattern = re.compile(regex_raw)
452
+ except re.error as exc:
453
+ regex_ok = False
454
+ st.sidebar.error(f"Invalid regex: {exc}")
455
+ matching = (
456
+ [item for item in dataset_files_rel if pattern.search(item)]
457
+ if (regex_ok and pattern is not None)
458
+ else (dataset_files_rel if not regex_raw else [])
459
+ )
460
+ st.sidebar.caption(f"{len(matching)} / {len(dataset_files_rel)} files match")
461
+ if st.sidebar.button(
462
+ f"Select all matching ({len(matching)})",
463
+ disabled=not matching,
464
+ key=_vm_key("df_regex_select_all"),
465
+ ):
466
+ st.session_state[selection_key] = matching
467
+ seeded = st.session_state.get(selection_key)
468
+ if not isinstance(seeded, list):
469
+ seeded = []
470
+ seeded = [item for item in seeded if item in dataset_files_rel]
471
+ if not seeded:
472
+ seeded = default_selection
473
+ st.session_state[selection_key] = seeded
474
+ st.sidebar.multiselect(
475
+ label="DataFrames",
476
+ options=dataset_files_rel,
477
+ key=selection_key,
478
+ help="Select one or more CSV/Parquet/JSON files (including split part files).",
479
+ )
480
+ selected_files = [item for item in st.session_state.get(selection_key, []) if item in dataset_files_rel]
481
+ else:
482
+ st.sidebar.multiselect(
483
+ label="DataFrames",
484
+ options=dataset_files_rel,
485
+ key=selection_key,
486
+ help="Select one or more CSV/Parquet/JSON files (including split part files).",
487
+ )
488
+ selected_files = [item for item in st.session_state.get(selection_key, []) if item in dataset_files_rel]
489
+
490
+ st.sidebar.caption(f"{len(selected_files)} selected")
491
+ if selected_files:
492
+ st.session_state[single_file_key] = selected_files[0]
493
+ st.session_state["df_files_selected"] = selected_files
494
+ st.session_state["df_file"] = selected_files[0] if selected_files else ""
495
+ st.session_state["df_file_regex"] = st.session_state.get(regex_key, "")
496
+ if not selected_files:
497
+ st.warning("Please select at least one dataset to proceed.")
498
+ return
499
+
500
+ # Load and concatenate selected DataFrames
501
+ dataframes: list[pd.DataFrame] = []
502
+ load_errors: list[str] = []
503
+ for rel_path in selected_files:
504
+ df_file_abs = datadir / rel_path
505
+ cache_buster = None
506
+ try:
507
+ cache_buster = df_file_abs.stat().st_mtime_ns
508
+ except FileNotFoundError:
509
+ cache_buster = None
510
+ try:
511
+ df_loaded = load_df(df_file_abs, with_index=True, cache_buster=cache_buster)
512
+ except Exception as exc:
513
+ load_errors.append(f"{rel_path}: {exc}")
514
+ continue
515
+ if not isinstance(df_loaded, pd.DataFrame):
516
+ load_errors.append(f"{rel_path}: unexpected type {type(df_loaded)}")
517
+ continue
518
+ df_loaded = df_loaded.copy()
519
+ df_loaded["__dataset__"] = rel_path
520
+ dataframes.append(df_loaded)
521
+
522
+ if load_errors:
523
+ st.sidebar.warning("Some selected files failed to load; continuing with the rest.")
524
+ with st.sidebar.expander("Load errors", expanded=False):
525
+ for err in load_errors[:50]:
526
+ st.write(err)
527
+ if len(load_errors) > 50:
528
+ st.write(f"... ({len(load_errors) - 50} more)")
529
+
530
+ if not dataframes:
531
+ st.error("No selected dataframes could be loaded.")
532
+ return
533
+
534
+ try:
535
+ combined_df = pd.concat(dataframes, ignore_index=True)
536
+ except Exception as e:
537
+ st.error(f"Error concatenating datasets: {e}")
538
+ return
539
+
540
+ st.session_state["loaded_df"] = combined_df
541
+
542
+ # Check if data is loaded and valid
543
+ if (
544
+ "loaded_df" not in st.session_state
545
+ or not isinstance(st.session_state.loaded_df, pd.DataFrame)
546
+ or not st.session_state.loaded_df.shape[1] > 0
547
+ ):
548
+ st.warning("The dataset is empty or could not be loaded. Please select a valid data file.")
549
+ return # Stop further processing
550
+
551
+ # data filter to speed-up
552
+ c = st.columns(5)
553
+ sampling_key = _vm_key("sampling_ratio")
554
+ if sampling_key not in st.session_state:
555
+ st.session_state[sampling_key] = max(1, int(st.session_state.GUI_SAMPLING))
556
+ sampling_ratio = c[4].number_input(
557
+ "Sampling ratio",
558
+ min_value=1,
559
+ step=1,
560
+ key=sampling_key,
561
+ )
562
+ st.session_state.GUI_SAMPLING = int(sampling_ratio)
563
+ st.session_state.loaded_df = downsample_df_deterministic(st.session_state.loaded_df, sampling_ratio)
564
+ nrows = st.session_state.loaded_df.shape[0]
565
+ if nrows == 0:
566
+ st.warning("No points remain after sampling. Reduce the sampling ratio or choose another dataset.")
567
+ return
568
+ min_lines = 1 if nrows < 5 else 5
569
+
570
+ line_limit_key = _vm_key("table_max_rows")
571
+ try:
572
+ table_max_rows = int(st.session_state.TABLE_MAX_ROWS)
573
+ except Exception:
574
+ table_max_rows = nrows
575
+ default_line_limit = min(max(min_lines, table_max_rows), nrows)
576
+ if st.session_state.get(line_limit_key) is None:
577
+ st.session_state[line_limit_key] = default_line_limit
578
+ else:
579
+ try:
580
+ current_limit = int(st.session_state[line_limit_key])
581
+ except Exception:
582
+ current_limit = default_line_limit
583
+ st.session_state[line_limit_key] = min(max(min_lines, current_limit), nrows)
584
+ if nrows <= min_lines:
585
+ lines = nrows
586
+ st.session_state[line_limit_key] = nrows
587
+ st.caption(f"Showing all {nrows} available point{'s' if nrows != 1 else ''}.")
588
+ else:
589
+ lines = st.slider(
590
+ "Select the desired number of points:",
591
+ min_value=min_lines,
592
+ max_value=nrows,
593
+ key=line_limit_key,
594
+ step=1,
595
+ )
596
+ st.session_state.TABLE_MAX_ROWS = int(lines)
597
+ if lines >= 0:
598
+ st.session_state.loaded_df = st.session_state.loaded_df.iloc[:lines, :]
599
+
600
+ df = st.session_state.loaded_df
601
+
602
+ if "beam" in df.columns:
603
+ available_beams = sorted({str(val) for val in df["beam"].dropna().unique()})
604
+ selected_beams = st.sidebar.multiselect(
605
+ "Filter beams",
606
+ available_beams,
607
+ key=f"view_maps_beam_filter_{env.app}",
608
+ )
609
+ if selected_beams:
610
+ df = df[df["beam"].astype(str).isin(selected_beams)].copy()
611
+ st.session_state.loaded_df = df
612
+ beam_summary_cols = {"points": ("beam", "size")}
613
+ if "alt_m" in df.columns:
614
+ beam_summary_cols["mean_alt_m"] = ("alt_m", "mean")
615
+ if "sat" in df.columns:
616
+ beam_summary_cols["dominant_sat"] = (
617
+ "sat",
618
+ lambda series: series.mode().iat[0] if not series.mode().empty else None,
619
+ )
620
+ with st.expander("Beam coverage", expanded=False):
621
+ summary_df = (
622
+ df.groupby("beam")
623
+ .agg(**beam_summary_cols)
624
+ .reset_index()
625
+ .rename(columns={"beam": "beam_id"})
626
+ .sort_values(by="beam_id")
627
+ )
628
+ st.dataframe(summary_df, width="stretch")
629
+ else:
630
+ st.sidebar.write("")
631
+
632
+ sat_default = bool(view_settings.get("show_sat_overlay", True))
633
+ show_sat_overlay = st.sidebar.checkbox(
634
+ "Show satellite overlay",
635
+ value=sat_default,
636
+ key=f"view_maps_sat_overlay_{env.app}",
637
+ )
638
+ if view_settings.get("show_sat_overlay", True) != show_sat_overlay:
639
+ view_settings["show_sat_overlay"] = show_sat_overlay
640
+ full_settings = _persist_view_maps_settings(env, full_settings, view_settings)
641
+
642
+ # Select numeric columns
643
+ numeric_cols = st.session_state.loaded_df.select_dtypes(include=["number"]).columns.tolist()
644
+
645
+ # Define lists to store continuous and discrete numeric variables
646
+ continuous_cols = []
647
+ discrete_numeric_cols = []
648
+
649
+ # Define a threshold: if a numeric column has fewer unique values than this threshold,
650
+ # treat it as discrete. Adjust this value based on your needs.
651
+ # Threshold to classify numeric columns as discrete vs continuous
652
+ unique_default = int(view_settings.get("unique_threshold", 10))
653
+ unique_threshold = st.sidebar.number_input(
654
+ "Discrete threshold (unique values <)",
655
+ min_value=2,
656
+ max_value=100,
657
+ value=unique_default,
658
+ step=1,
659
+ )
660
+ if view_settings.get("unique_threshold", 10) != unique_threshold:
661
+ view_settings["unique_threshold"] = int(unique_threshold)
662
+ full_settings = _persist_view_maps_settings(env, full_settings, view_settings)
663
+
664
+ range_default = int(view_settings.get("range_threshold", 200))
665
+ range_threshold = st.sidebar.number_input(
666
+ "Integer discrete range (max-min <=)",
667
+ min_value=1,
668
+ max_value=10000,
669
+ value=range_default,
670
+ step=1,
671
+ )
672
+ if view_settings.get("range_threshold", 200) != range_threshold:
673
+ view_settings["range_threshold"] = int(range_threshold)
674
+ full_settings = _persist_view_maps_settings(env, full_settings, view_settings)
675
+
676
+ # Loop through numeric columns and classify them based on the unique value count.
677
+ for col in numeric_cols:
678
+ if df[col].nunique() < unique_threshold:
679
+ discrete_numeric_cols.append(col)
680
+ else:
681
+ continuous_cols.append(col)
682
+
683
+ # Get discrete variables from object type
684
+ discrete_object_cols = df.select_dtypes(include=["object"]).columns.tolist()
685
+
686
+ # Combine numeric discrete and object discrete variables
687
+ discrete_cols = discrete_numeric_cols + discrete_object_cols
688
+
689
+ # Re-classify integer columns with limited range as discrete to avoid sliders
690
+ for col in numeric_cols:
691
+ if not is_integer_dtype(df[col]):
692
+ continue
693
+ value_range = df[col].max() - df[col].min()
694
+ if pd.isna(value_range) or value_range > range_threshold:
695
+ continue
696
+ if col in continuous_cols:
697
+ continuous_cols.remove(col)
698
+ if col not in discrete_cols:
699
+ discrete_cols.append(col)
700
+ discreteseq = None
701
+ colorscale = None
702
+
703
+ # Identify numerical columns
704
+ for col in discrete_cols.copy(): # Use copy to avoid modifying the list during iteration
705
+ try:
706
+ pd.to_datetime(
707
+ st.session_state.loaded_df[col],
708
+ format="%Y-%m-%d %H:%M:%S",
709
+ errors="raise",
710
+ )
711
+ discrete_cols.remove(col)
712
+ continuous_cols.append(col)
713
+ except (ValueError, TypeError):
714
+ pass
715
+
716
+ for i, cols in enumerate([discrete_cols, continuous_cols]):
717
+ if cols:
718
+ colsn = (
719
+ pd.DataFrame(
720
+ [
721
+ {
722
+ "Columns": col,
723
+ "nbval": len(set(st.session_state.loaded_df[col])),
724
+ }
725
+ for col in cols
726
+ ]
727
+ )
728
+ .sort_values(by="nbval", ascending=False)
729
+ .Columns.tolist()
730
+ )
731
+ if var[i] == "discrete" and "beam" in colsn:
732
+ colsn = ["beam"] + [col for col in colsn if col != "beam"]
733
+ on_change_function = None
734
+ if var[i] == "discrete":
735
+ on_change_function = discrete
736
+ elif var[i] == "continuous":
737
+ on_change_function = continuous
738
+ with c[i]:
739
+ st.selectbox(
740
+ label=f"{var[i]}",
741
+ options=colsn,
742
+ index=var_default[i] if var_default[i] is not None and var_default[i] < len(colsn) else 0,
743
+ key=var[i],
744
+ on_change=on_change_function,
745
+ )
746
+ if var[i] == "discrete":
747
+ discreteseqs = [
748
+ "Plotly",
749
+ "D3",
750
+ "G10",
751
+ "T10",
752
+ "Alphabet",
753
+ "Dark24",
754
+ "Light24",
755
+ "Set1",
756
+ "Pastel1",
757
+ "Dark2",
758
+ "Set2",
759
+ "Pastel2",
760
+ "Set3",
761
+ ]
762
+ discreteseq = st.selectbox("Color Sequence", discreteseqs, index=0)
763
+ elif var[i] == "continuous":
764
+ colorscales = px.colors.named_colorscales()
765
+ colorscale = st.selectbox("Color Scale", colorscales, index=0)
766
+ else:
767
+ with c[i]:
768
+ st.warning(f"No columns available for {var[i]}.")
769
+ st.session_state[var[i]] = None
770
+
771
+ for i in range(2, 4):
772
+ colsn = st.session_state.loaded_df.filter(regex=var[i]).columns.tolist()
773
+ with c[i]:
774
+ if colsn:
775
+ st.selectbox(f"{var[i]}", colsn, index=0, key=var[i])
776
+ else:
777
+ st.warning(f"No columns matching '{var[i]}' found.")
778
+ st.session_state[var[i]] = None
779
+
780
+ map_cfg = st.session_state.get(map_defaults_key, {"center_lat": 0.0, "center_lon": 0.0, "default_zoom": 2.5})
781
+ lat_col = st.session_state.get("lat")
782
+ lon_col = st.session_state.get("long")
783
+ if lat_col and lon_col and lat_col in df.columns and lon_col in df.columns:
784
+ viewport = _compute_viewport(df, lat_col, lon_col)
785
+ if viewport:
786
+ map_cfg.update(viewport)
787
+
788
+ plot_df = st.session_state.loaded_df
789
+ color_column = st.session_state.get(st.session_state.get("coltype", ""), None)
790
+ if (
791
+ st.session_state.get("coltype") == "discrete"
792
+ and color_column
793
+ and color_column in plot_df.columns
794
+ and is_numeric_dtype(plot_df[color_column])
795
+ ):
796
+ plot_df = plot_df.copy()
797
+ plot_df[color_column] = plot_df[color_column].astype("Int64").astype(str)
798
+
799
+ if st.session_state.get("lat") and st.session_state.get("long"):
800
+ if st.session_state.get("coltype") and st.session_state.get(st.session_state["coltype"]):
801
+ color_kwargs = (
802
+ {
803
+ "color_discrete_sequence": getattr(px.colors.qualitative, discreteseq),
804
+ "color": st.session_state[st.session_state.coltype],
805
+ }
806
+ if discreteseq
807
+ else {
808
+ "color_continuous_scale": colorscale,
809
+ "color": st.session_state[st.session_state.coltype],
810
+ }
811
+ if colorscale
812
+ else {}
813
+ )
814
+ fig = px.scatter_map(
815
+ plot_df,
816
+ lat=st.session_state.lat,
817
+ lon=st.session_state.long,
818
+ zoom=map_cfg["default_zoom"],
819
+ center={"lat": map_cfg["center_lat"], "lon": map_cfg["center_lon"]},
820
+ **color_kwargs,
821
+ )
822
+
823
+ if (
824
+ show_sat_overlay
825
+ and {"sat_track_lat", "sat_track_long"} <= set(st.session_state.loaded_df.columns)
826
+ ):
827
+ sat_points = (
828
+ st.session_state.loaded_df[["sat_track_lat", "sat_track_long", "sat"]]
829
+ .dropna(subset=["sat_track_lat", "sat_track_long"])
830
+ .drop_duplicates()
831
+ )
832
+ if not sat_points.empty:
833
+ fig.add_trace(
834
+ go.Scattermap(
835
+ lat=sat_points["sat_track_lat"],
836
+ lon=sat_points["sat_track_long"],
837
+ mode="markers",
838
+ marker=dict(size=10, color="#ffa600", symbol="triangle"),
839
+ name="Satellite track",
840
+ text=sat_points.get("sat"),
841
+ )
842
+ )
843
+
844
+ fig.update_layout(map_style="open-street-map")
845
+ fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
846
+
847
+ st.plotly_chart(fig, width="stretch", theme="streamlit")
848
+ else:
849
+ st.warning("Please select a valid column for coloring.")
850
+ else:
851
+ st.warning("Latitude and Longitude columns are required for the map.")
852
+
853
+ # Persist user selections for next reload
854
+ persist_keys = [
855
+ "file_ext_choice",
856
+ "df_select_mode",
857
+ "df_file_regex",
858
+ "df_file",
859
+ "df_files_selected",
860
+ "discrete",
861
+ "continuous",
862
+ "lat",
863
+ "long",
864
+ "coltype",
865
+ ]
866
+ mutated = False
867
+ for key in persist_keys:
868
+ val = st.session_state.get(key)
869
+ if val is None:
870
+ continue
871
+ if view_settings.get(key) != val:
872
+ view_settings[key] = val
873
+ mutated = True
874
+ if mutated:
875
+ full_settings = _persist_view_maps_settings(env, full_settings, view_settings)
876
+
877
+ # -------------------- Main Application Entry -------------------- #
878
+ def main():
879
+ """
880
+ Main function to run the application.
881
+ """
882
+
883
+ try:
884
+ parser = argparse.ArgumentParser(description="Run the AGI Streamlit View with optional parameters.")
885
+ parser.add_argument(
886
+ "--active-app",
887
+ dest="active_app",
888
+ type=str,
889
+ help="Active app path (e.g. src/agilab/apps/builtin/flight_telemetry_project)",
890
+ required=True,
891
+ )
892
+ args, _ = parser.parse_known_args()
893
+
894
+ active_app = Path(args.active_app).expanduser()
895
+ if not active_app.exists():
896
+ st.error(f"Error: provided --active-app path not found: {active_app}")
897
+ sys.exit(1)
898
+
899
+ if "coltype" not in st.session_state:
900
+ st.session_state["coltype"] = var[0]
901
+
902
+ # Derive the short app name (e.g., 'flight_telemetry_project')
903
+ app = active_app.name
904
+ st.session_state["apps_path"] = str(active_app.parent)
905
+ st.session_state["app"] = app
906
+
907
+ st.info(f"active_app: {active_app}")
908
+ env = AgiEnv(
909
+ apps_path=active_app.parent,
910
+ app=app,
911
+ verbose=1,
912
+ )
913
+ env.init_done = True
914
+ st.session_state['env'] = env
915
+ st.session_state["IS_SOURCE_ENV"] = env.is_source_env
916
+ st.session_state["IS_WORKER_ENV"] = env.is_worker_env
917
+
918
+ if "TABLE_MAX_ROWS" not in st.session_state:
919
+ st.session_state["TABLE_MAX_ROWS"] = env.TABLE_MAX_ROWS
920
+ if "GUI_SAMPLING" not in st.session_state:
921
+ st.session_state["GUI_SAMPLING"] = env.GUI_SAMPLING
922
+
923
+ page(env)
924
+
925
+ except Exception as e:
926
+ st.error(f"An error occurred: {e}")
927
+ import traceback
928
+
929
+ st.caption("Full traceback")
930
+ st.code(traceback.format_exc(), language="text")
931
+
932
+
933
+ # -------------------- Main Entry Point -------------------- #
934
+ if __name__ == "__main__":
935
+ main()