firedataforge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,43 @@
1
+ """FireDataForge: a unified framework for multi-source wildfire data retrieval and integration.
2
+
3
+ The public API is re-exported here so consumers can ``from firedataforge import
4
+ forge_event`` (or ``import firedataforge as fdf``) without reaching into submodules.
5
+ See ``firedataforge.cli`` for the command-line entry point.
6
+ """
7
+
8
+ from firedataforge.__about__ import __version__
9
+ from firedataforge.config import (
10
+ ensure_ca_bundle, ensure_setup, feds_available, gee_ready, load_env,
11
+ run_setup_wizard,
12
+ )
13
+ from firedataforge.constants import DEFAULT_FIRE_WINDOW_DAYS
14
+ from firedataforge.events import (
15
+ build_firelist, get_fire_info, get_task_info, read_feds_firelist,
16
+ validate_projected_crs,
17
+ )
18
+ from firedataforge.examples import fetch_examples
19
+ from firedataforge.io import load_numpy, save_coordinates, save_numpy
20
+ from firedataforge.pipeline import (
21
+ forge_event, parse_batch_input, process_batch, process_single_fire,
22
+ )
23
+ from firedataforge.sources.feds import find_event_gpkg
24
+ from firedataforge.schemas import (
25
+ DataLayer, FireEvent, GeoReference, ProcessingArgs, ProcessingTask,
26
+ )
27
+
28
+ # Load persisted .env on import (the real environment still takes precedence),
29
+ # then make sure HTTPS calls have a usable CA bundle (some HPC Pythons ship none).
30
+ load_env()
31
+ ensure_ca_bundle()
32
+
33
+ __all__ = [
34
+ "__version__",
35
+ "forge_event", "process_single_fire", "process_batch", "parse_batch_input",
36
+ "get_fire_info", "get_task_info", "validate_projected_crs",
37
+ "find_event_gpkg", "read_feds_firelist",
38
+ "build_firelist", "fetch_examples", "load_numpy", "save_numpy", "save_coordinates",
39
+ "gee_ready", "feds_available", "run_setup_wizard", "ensure_setup", "load_env",
40
+ "ensure_ca_bundle",
41
+ "FireEvent", "ProcessingTask", "ProcessingArgs", "DataLayer", "GeoReference",
42
+ "DEFAULT_FIRE_WINDOW_DAYS",
43
+ ]
firedataforge/cli.py ADDED
@@ -0,0 +1,153 @@
1
+ """Command-line interface for FireDataForge."""
2
+
3
+ import argparse
4
+ import logging
5
+ import os
6
+
7
+ from firedataforge.config import (
8
+ ensure_setup, is_first_run, is_interactive, load_env, run_setup_wizard,
9
+ )
10
+ from firedataforge.constants import BASE_DIR, CACHE_DIR, FEDS_DIR
11
+ from firedataforge.events import build_firelist, validate_projected_crs
12
+ from firedataforge.examples import fetch_examples
13
+ from firedataforge.pipeline import (
14
+ AVAILABLE_LAYERS, LAYER_ALIASES, parse_batch_input, process_batch,
15
+ process_single_fire,
16
+ )
17
+ from firedataforge.schemas import ProcessingArgs
18
+
19
+ log = logging.getLogger(__name__)
20
+
21
+
22
+ def main() -> None:
23
+ """Command-line entry point: resolve an MTBS Event ID (or a batch) and forge it."""
24
+ parser = argparse.ArgumentParser(
25
+ description="FireDataForge -- unified multi-source wildfire data retrieval and integration",
26
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
27
+ )
28
+
29
+ # event_id and --batch are mutually exclusive and both optional, so --setup /
30
+ # --build-firelist can run on their own.
31
+ input_group = parser.add_mutually_exclusive_group(required=False)
32
+ input_group.add_argument(
33
+ "event_id", type=str, nargs="?",
34
+ help="MTBS Event ID to process (e.g. CA3432611848120191010)")
35
+ input_group.add_argument(
36
+ "--batch", type=str,
37
+ help="A file of Event IDs (one per line) or a comma-separated list")
38
+
39
+ parser.add_argument("--setup", action="store_true",
40
+ help="Run the interactive credential wizard and exit")
41
+ parser.add_argument("--build-firelist", dest="build_firelist", action="store_true",
42
+ help="Download the full MTBS archive to the offline fire-list cache and exit")
43
+ parser.add_argument("--fetch-examples", dest="fetch_examples", action="store_true",
44
+ help="Download examples.zip from Zenodo and unzip it under the "
45
+ "FireDataForge home (datasets/FEDS25MTBS/ + events.txt; the "
46
+ "repo root for a source checkout, else ~/.firedataforge), then exit")
47
+ parser.add_argument("--workers", "-w", type=int, default=1,
48
+ help="Events processed in parallel in batch mode (1-4 recommended)")
49
+ parser.add_argument("--layer-workers", dest="layer_workers", type=int, default=5,
50
+ help="Concurrent layer downloads within a single event")
51
+ parser.add_argument("--resolution", "-r", type=int, default=30,
52
+ help="Target spatial resolution in meters")
53
+ parser.add_argument("--buffer", "-b", type=int, default=100,
54
+ help="Buffer around the fire bounds in meters")
55
+ parser.add_argument("--crs", "-c", type=str, default="EPSG:5070",
56
+ help="Target coordinate reference system")
57
+ parser.add_argument("--output_dir", "-o", type=str, default="output",
58
+ help="Output directory")
59
+ parser.add_argument("--interpolation", "-t", type=int, default=0,
60
+ help="Intermediate frames to interpolate between perimeter timesteps")
61
+ parser.add_argument("--cache_dir", type=str, default=CACHE_DIR,
62
+ help="Root directory for all on-the-fly downloads "
63
+ "(HRRR, FIRMS, FEDS, firepix, WUI, fire list); each "
64
+ "caches under its own fixed subfolder of this root")
65
+ parser.add_argument("--verbose", "-v", action="store_true",
66
+ help="Verbose (DEBUG) logging")
67
+ parser.add_argument("--only", type=str, default=None,
68
+ help="Comma-separated subset of layers to process. Available: "
69
+ + ", ".join(AVAILABLE_LAYERS))
70
+
71
+ args = parser.parse_args()
72
+
73
+ logging.basicConfig(
74
+ level=logging.DEBUG if args.verbose else logging.INFO,
75
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
76
+ )
77
+
78
+ # Stand-alone maintenance commands.
79
+ load_env()
80
+ if args.setup:
81
+ run_setup_wizard()
82
+ return
83
+ if args.build_firelist:
84
+ build_firelist()
85
+ return
86
+ if args.fetch_examples:
87
+ try:
88
+ fetch_examples()
89
+ except Exception as exc:
90
+ log.error(str(exc))
91
+ raise SystemExit(1)
92
+ log.info(
93
+ "Example FEDS-MTBS fires ready in %s (event IDs listed in %s)",
94
+ FEDS_DIR, os.path.join(BASE_DIR, "events.txt"))
95
+ return
96
+
97
+ # On the first interactive run with no event, configure and stop.
98
+ ran_wizard = is_first_run() and is_interactive()
99
+ ensure_setup()
100
+ if ran_wizard and not (args.event_id or args.batch):
101
+ return
102
+
103
+ if not args.event_id and not args.batch:
104
+ parser.error("an event_id or --batch is required")
105
+
106
+ # Fail fast on an unusable target CRS, before any network resolution: the
107
+ # grid is built in metres, so a geographic or non-metric CRS cannot work.
108
+ try:
109
+ validate_projected_crs(args.crs)
110
+ except ValueError as exc:
111
+ parser.error(str(exc))
112
+
113
+ only_features = (
114
+ [f.strip() for f in args.only.split(",") if f.strip()] if args.only else None
115
+ )
116
+ # Validate --only against the known layer names (+ the landfire/hrrr aliases)
117
+ # so a typo fails loudly instead of silently producing an empty output dir.
118
+ if only_features is not None:
119
+ valid_only = set(AVAILABLE_LAYERS) | set(LAYER_ALIASES.values())
120
+ unknown = [f for f in only_features if f not in valid_only]
121
+ if unknown:
122
+ only_features = [f for f in only_features if f in valid_only]
123
+ log.warning(
124
+ "Ignoring unknown --only layer(s): %s. Valid names: %s",
125
+ ", ".join(unknown), ", ".join(AVAILABLE_LAYERS + ["landfire", "hrrr"]),
126
+ )
127
+ if not only_features:
128
+ parser.error(
129
+ "--only contained no recognized layer names. Valid names: "
130
+ + ", ".join(AVAILABLE_LAYERS + ["landfire", "hrrr"]))
131
+
132
+ processing_args = ProcessingArgs(
133
+ resolution=args.resolution,
134
+ buffer=args.buffer,
135
+ crs=args.crs,
136
+ output_dir=args.output_dir,
137
+ interpolation=args.interpolation,
138
+ cache_dir=args.cache_dir,
139
+ verbose=args.verbose,
140
+ only=only_features,
141
+ layer_workers=args.layer_workers,
142
+ )
143
+
144
+ # Earth Engine is initialized lazily and per-event (fail-soft); no global init
145
+ # here, so a GEE outage never blocks the FEDS/FIRMS/HRRR layers.
146
+ if args.batch:
147
+ event_ids = parse_batch_input(args.batch)
148
+ if not event_ids:
149
+ log.error("No valid event IDs found in batch input")
150
+ return
151
+ process_batch(event_ids, processing_args, max_workers=args.workers)
152
+ else:
153
+ process_single_fire(args.event_id, processing_args)
@@ -0,0 +1,475 @@
1
+ """Credentials, the first-run setup wizard, and dataset/credential discovery."""
2
+
3
+ import glob
4
+ import logging
5
+ import os
6
+ import subprocess
7
+ import sys
8
+ import urllib.error
9
+ import urllib.request
10
+ from datetime import datetime
11
+ from typing import Optional
12
+
13
+ from firedataforge.constants import (
14
+ BASE_DIR, DEFAULT_FIRELIST_CACHE, FEDS_CACHE_DIR, FEDS_DIR,
15
+ FEDS_MTBS_FIRELIST_NAME, FEDS_MTBS_ZENODO_URL, FEDS_MTBS_ZIP_NAME,
16
+ )
17
+
18
+ log = logging.getLogger(__name__)
19
+
20
+
21
+ # Persist settings under the FireDataForge home (BASE_DIR): the repo root for a
22
+ # source checkout (next to ``main.py``, where dev users expect it), or
23
+ # ``~/.firedataforge`` for an installed package -- never inside a possibly
24
+ # read-only ``site-packages``. Override the location with FIREDATAFORGE_HOME.
25
+ ENV_PATH = os.path.join(BASE_DIR, ".env")
26
+
27
+ # Which credential / dependency unlocks which output features, so the wizard can
28
+ # tell the user exactly what skipping a step would cost. Keep in sync with the
29
+ # layer registry in ``process_single_fire`` and the README feature table.
30
+ GEE_FEATURES = [
31
+ "elevation", "terrain_rgb", "canopy_bulk_density", "canopy_cover",
32
+ "building_height", "landcover", "lai", "sentinel2_rgb",
33
+ ]
34
+ FIRMS_FEATURES = ["frp_daytime", "frp_nighttime"]
35
+ FEDS_FEATURES = ["burn_perimeter", "fireline", "fireline_max_frp"]
36
+ # Features that need no credentials at all (shown for reassurance).
37
+ FREE_FEATURES = ["wui", "recent_burn", "r2", "u10", "v10"]
38
+
39
+
40
+ def _parse_env_file(path: str) -> dict[str, str]:
41
+ """Parse a ``.env`` file into a dict of KEY -> value (last wins)."""
42
+ data: dict[str, str] = {}
43
+ if not os.path.exists(path):
44
+ return data
45
+ with open(path, "r", encoding="utf-8") as f:
46
+ for line in f:
47
+ line = line.strip()
48
+ if not line or line.startswith("#") or "=" not in line:
49
+ continue
50
+ key, _, value = line.partition("=")
51
+ key = key.strip()
52
+ value = value.strip().strip('"').strip("'")
53
+ if key:
54
+ data[key] = value
55
+ return data
56
+
57
+
58
+ def load_env(path: str = ENV_PATH) -> None:
59
+ """Load values from ``path`` into ``os.environ`` without overriding existing.
60
+
61
+ Safe to call multiple times and at import; the real environment always takes
62
+ precedence over the persisted file.
63
+ """
64
+ for key, value in _parse_env_file(path).items():
65
+ os.environ.setdefault(key, value)
66
+
67
+
68
+ def ensure_ca_bundle() -> None:
69
+ """Point OpenSSL/requests at certifi's CA bundle when the system store is unusable.
70
+
71
+ Some Python builds (e.g. uv-managed CPython on HPC nodes) ship no CA bundle,
72
+ so OpenSSL's default ``cafile`` is missing and every HTTPS call fails with
73
+ ``CERTIFICATE_VERIFY_FAILED: unable to get local issuer certificate``. When
74
+ the user has not already chosen a bundle and the default one is absent, fall
75
+ back to the certifi bundle that ships with our dependencies. Exported via the
76
+ environment so child processes (notably ``earthengine authenticate``) inherit it.
77
+ """
78
+ if os.environ.get("SSL_CERT_FILE") or os.environ.get("SSL_CERT_DIR"):
79
+ return # respect an explicit user choice
80
+ import ssl
81
+
82
+ default_cafile = ssl.get_default_verify_paths().cafile
83
+ if default_cafile and os.path.exists(default_cafile):
84
+ return # the system store already works
85
+ try:
86
+ import certifi
87
+ except ImportError:
88
+ return
89
+ bundle = certifi.where()
90
+ if not os.path.exists(bundle):
91
+ return
92
+ os.environ["SSL_CERT_FILE"] = bundle
93
+ os.environ.setdefault("REQUESTS_CA_BUNDLE", bundle)
94
+
95
+
96
+ def set_env_var(key: str, value: str, path: str = ENV_PATH) -> None:
97
+ """Upsert ``key=value`` into the ``.env`` file and the live environment."""
98
+ lines: list[str] = []
99
+ if os.path.exists(path):
100
+ with open(path, "r", encoding="utf-8") as f:
101
+ lines = f.read().splitlines()
102
+
103
+ out: list[str] = []
104
+ found = False
105
+ for line in lines:
106
+ stripped = line.strip()
107
+ is_assignment = (
108
+ stripped and not stripped.startswith("#") and "=" in stripped
109
+ and stripped.split("=", 1)[0].strip() == key
110
+ )
111
+ if is_assignment:
112
+ out.append(f"{key}={value}")
113
+ found = True
114
+ else:
115
+ out.append(line)
116
+ if not found:
117
+ out.append(f"{key}={value}")
118
+
119
+ # The home dir may not exist yet on an installed user's first run.
120
+ os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
121
+ with open(path, "w", encoding="utf-8") as f:
122
+ f.write("\n".join(out).rstrip("\n") + "\n")
123
+ os.environ[key] = value
124
+
125
+
126
+ def is_first_run(path: str = ENV_PATH) -> bool:
127
+ """First run if no ``.env`` file exists yet."""
128
+ return not os.path.exists(path)
129
+
130
+
131
+ def is_interactive() -> bool:
132
+ """True when both stdin and stdout are attached to a terminal."""
133
+ return sys.stdin.isatty() and sys.stdout.isatty()
134
+
135
+
136
+ def _prompt(message: str, default: str = "") -> str:
137
+ try:
138
+ answer = input(message).strip()
139
+ except (EOFError, KeyboardInterrupt):
140
+ print()
141
+ return default
142
+ return answer or default
143
+
144
+
145
+ def _yes_no(message: str, default: bool = True) -> bool:
146
+ suffix = " [Y/n] " if default else " [y/N] "
147
+ answer = _prompt(message + suffix).lower()
148
+ if not answer:
149
+ return default
150
+ return answer.startswith("y")
151
+
152
+
153
+ def gee_ready(project: Optional[str] = None) -> bool:
154
+ """Return True if Earth Engine can initialize and answer a trivial query now."""
155
+ import ee # imported lazily so this module stays light for metadata-only use
156
+ project = project or os.environ.get("EARTHENGINE_PROJECT")
157
+ try:
158
+ if project:
159
+ ee.Initialize(project=project)
160
+ else:
161
+ ee.Initialize()
162
+ ee.Number(1).getInfo()
163
+ return True
164
+ except Exception:
165
+ return False
166
+
167
+
168
+ def firms_key_valid(map_key: str) -> Optional[bool]:
169
+ """Best-effort online check of a FIRMS MAP_KEY.
170
+
171
+ Returns True/False when it could be determined, or None if the check itself
172
+ failed (e.g. no network) so the caller can treat it as "unknown".
173
+ """
174
+ url = f"https://firms.modaps.eosdis.nasa.gov/mapserver/mapkey_status/?MAP_KEY={map_key}"
175
+ try:
176
+ with urllib.request.urlopen(url, timeout=30) as resp:
177
+ body = resp.read().decode("utf-8", "replace").lower()
178
+ except urllib.error.HTTPError:
179
+ return False
180
+ except Exception:
181
+ return None
182
+ if "invalid" in body or "error" in body:
183
+ return False
184
+ return "current_transactions" in body or "transaction" in body or "map_key" in body
185
+
186
+
187
+ def find_feds_firelist(feds_dir: str = FEDS_DIR) -> Optional[str]:
188
+ """Locate the FEDS-MTBS fire list in ``feds_dir``, if present.
189
+
190
+ Matches, in order of preference:
191
+
192
+ * the bundled example list ``fireslist_examples.csv`` shipped by
193
+ ``--fetch-examples`` (FEDS perimeter bbox plus explicit ``tst``/``ted`` for
194
+ the eight demo fires), then
195
+ * the released summary GeoPackage ``fireslist_FEDS25MTBS_2012-2024.geojson``
196
+ (MTBS final perimeters + metadata for all 7,739 fires).
197
+
198
+ Any top-level ``*fire*list*`` file with a ``.geojson``, ``.gpkg`` or ``.csv``
199
+ extension is accepted case-insensitively, so a spelling difference never
200
+ disables this offline metadata source. The example list wins when present
201
+ (it carries the demo fires' exact end dates); otherwise GeoPackage/GeoJSON
202
+ lists win over a generic CSV.
203
+ """
204
+ if not os.path.isdir(feds_dir):
205
+ return None
206
+ candidates = []
207
+ for ext in ("geojson", "gpkg", "csv"):
208
+ candidates += [
209
+ f for f in glob.glob(os.path.join(feds_dir, f"*.{ext}"))
210
+ if "list" in os.path.basename(f).lower()
211
+ and "fire" in os.path.basename(f).lower()
212
+ ]
213
+ # The bundled example list wins outright when present; otherwise prefer the
214
+ # released GeoPackage list (.geojson/.gpkg) over a CSV, and within a kind
215
+ # prefer a name containing the full word "firelist"/"fireslist".
216
+ ext_rank = {".geojson": 0, ".gpkg": 0, ".csv": 1}
217
+
218
+ def _key(f: str) -> tuple:
219
+ base = os.path.basename(f).lower()
220
+ _, ext = os.path.splitext(base)
221
+ not_example = "example" not in base # examples list first (False < True)
222
+ named = "firelist" not in base and "fireslist" not in base
223
+ return (not_example, ext_rank.get(ext, 2), named, base)
224
+
225
+ candidates.sort(key=_key)
226
+ return candidates[0] if candidates else None
227
+
228
+
229
+ def count_feds_gpkgs(base_dir: str = FEDS_DIR) -> int:
230
+ """Count FEDS perimeter GeoPackages available anywhere under ``base_dir``."""
231
+ if not os.path.isdir(base_dir):
232
+ return 0
233
+ return sum(len(files) for files in (
234
+ [f for f in fs if f.lower().endswith(".gpkg")]
235
+ for _, _, fs in os.walk(base_dir)
236
+ ))
237
+
238
+
239
+ def feds_available() -> bool:
240
+ """True when FEDS25MTBS data (fire list or GeoPackages) is present locally.
241
+
242
+ Checks both the user archive (``datasets/``) and the software cache
243
+ (``cache/``), so a run that has already lazily fetched fires still counts.
244
+ """
245
+ return (find_feds_firelist() is not None
246
+ or count_feds_gpkgs(FEDS_DIR) > 0
247
+ or count_feds_gpkgs(FEDS_CACHE_DIR) > 0)
248
+
249
+
250
+ def _setup_gee(path: str) -> None:
251
+ print("\n[1/5] Google Earth Engine")
252
+ print(" Unlocks: " + ", ".join(GEE_FEATURES))
253
+ print(" Get access: enable Earth Engine on a Google Cloud project at "
254
+ "https://console.cloud.google.com/")
255
+ if gee_ready():
256
+ print(" OK Already authenticated and initialized.")
257
+ return
258
+ if not _yes_no(" Configure Earth Engine now?"):
259
+ print(" -- Skipped. Disabled features: " + ", ".join(GEE_FEATURES))
260
+ return
261
+ print(" Launching 'earthengine authenticate'...")
262
+ print(" A URL will be printed; open it, authorize, and paste the code back.")
263
+ try:
264
+ # auth_mode=notebook prints a URL + accepts a pasted verification code,
265
+ # so it works on headless/HPC nodes with no browser and no gcloud CLI.
266
+ subprocess.run(
267
+ ["earthengine", "authenticate", "--auth_mode=notebook"], check=False
268
+ )
269
+ except FileNotFoundError:
270
+ print(" ! 'earthengine' CLI not found. Install with: pip install earthengine-api")
271
+ print(" To get a project ID:")
272
+ print(" 1. Go to Google Cloud Console: https://console.cloud.google.com/")
273
+ print(" 2. Create or select a project with Earth Engine enabled:")
274
+ print(" https://developers.google.com/earth-engine/guides/access")
275
+ print(" 3. Copy the project ID")
276
+ project = _prompt(" Google Cloud project ID (blank to skip): ")
277
+ if project:
278
+ try:
279
+ subprocess.run(["earthengine", "set_project", project], check=False)
280
+ except FileNotFoundError:
281
+ pass
282
+ set_env_var("EARTHENGINE_PROJECT", project, path)
283
+ if gee_ready(project or None):
284
+ print(" OK Earth Engine is ready.")
285
+ else:
286
+ print(" ! Earth Engine still not initialized; GEE features will be "
287
+ "skipped until setup is finished (rerun: python main.py --setup).")
288
+
289
+
290
+ def _setup_firms(path: str) -> None:
291
+ print("\n[2/5] NASA FIRMS map key (VIIRS active fire / FRP)")
292
+ print(" Unlocks: " + ", ".join(FIRMS_FEATURES))
293
+ print(" Get access (free, instant): https://firms.modaps.eosdis.nasa.gov/api/map_key/")
294
+ print(" Each fire's FRP is streamed from the Area API and cached per")
295
+ print(" event under cache/FIRMS/ (no bulk download). To work fully")
296
+ print(" offline instead, drop archive CSVs into datasets/FIRMS/.")
297
+ if _firms_map_key():
298
+ print(" OK A FIRMS key is already configured.")
299
+ return
300
+ key = _prompt(" Paste your FIRMS MAP_KEY (blank to skip): ")
301
+ if not key:
302
+ print(" -- Skipped. FRP needs either a FIRMS key or the local FEDS "
303
+ "firepix archive (pre-2025).")
304
+ return
305
+ valid = firms_key_valid(key)
306
+ if valid is False:
307
+ if not _yes_no(" ! That key looks invalid. Save it anyway?", default=False):
308
+ print(" Skipped.")
309
+ return
310
+ elif valid is None:
311
+ print(" (could not verify the key online -- saving it as given)")
312
+ else:
313
+ print(" OK Key verified.")
314
+ set_env_var("FIRMS_MAP_KEY", key, path)
315
+
316
+
317
+ def _setup_feds() -> None:
318
+ print("\n[3/5] FEDS-MTBS archive (Zenodo: Chen et al. -- perimeter GeoPackages")
319
+ print(" + firepix; geometry, active-fire window, and bounds per fire)")
320
+ print(" Unlocks: " + ", ".join(FEDS_FEATURES) + " (and perimeter-masked FRP)")
321
+ print(f" Full dataset (7,739 fires, 2012-2024): {FEDS_MTBS_ZENODO_URL}")
322
+ print(" (Fire NAME + acreage are configured separately in step [5/5].)")
323
+ firelist = find_feds_firelist()
324
+ n_gpkg = count_feds_gpkgs(FEDS_DIR)
325
+ if firelist or n_gpkg:
326
+ print(f" OK Found a local FEDS archive in {FEDS_DIR} "
327
+ f"(fire list: {'yes' if firelist else 'no'}, {n_gpkg} GeoPackage(s)).")
328
+ return
329
+ print(" Choose how to get the data:")
330
+ print(f" - Manual : unzip {FEDS_MTBS_ZIP_NAME} into {FEDS_DIR}/")
331
+ print(" - Full download : zip (~370 MB) into datasets/ (saves bandwidth")
332
+ print(" on repeated runs; all fires resolve offline)")
333
+ print(" - On-the-fly : each requested fire is range-pulled from")
334
+ print(" Zenodo into cache/ as needed (saves disk)")
335
+ if _yes_no(" Download the full FEDS-MTBS archive (zip) now?", default=False):
336
+ # Imported lazily to avoid a config -> remote_archive import at load.
337
+ from firedataforge.remote_archive import download_full_feds_archive
338
+ try:
339
+ download_full_feds_archive() # zip only; fire list is step [5/5]
340
+ print(f" OK Full FEDS-MTBS archive unpacked into {FEDS_DIR}.")
341
+ return
342
+ except Exception as exc: # pragma: no cover - network dependent
343
+ print(f" ! Could not download the full archive ({exc}); "
344
+ "falling back to on-the-fly fetching.")
345
+ else:
346
+ print(f" -- On-the-fly: fires stream into {FEDS_CACHE_DIR}/ as needed.")
347
+ # On-the-fly chosen (or full download failed): offer the tiny example bundle
348
+ # so the demos/benchmark/validation can run without any large download.
349
+ from firedataforge.examples import examples_record_configured, fetch_examples
350
+ if examples_record_configured() and _yes_no(
351
+ " Also download the 8 example fires now (~22 MB, lets the demos run)?"):
352
+ try:
353
+ fetch_examples()
354
+ print(f" OK Example fires downloaded to {FEDS_DIR}.")
355
+ except Exception as exc: # pragma: no cover - network dependent
356
+ print(f" ! Could not fetch examples ({exc}); skipping.")
357
+
358
+
359
+ def _setup_globalwui() -> None:
360
+ print("\n[4/5] Global WUI (wildland-urban interface)")
361
+ print(" Unlocks: wui")
362
+ # Imported lazily: wui pulls in rioxarray/rasterio, heavy for metadata-only use.
363
+ from firedataforge.sources.wui import (
364
+ DEFAULT_GLOBALWUI_DIR, download_globalwui_archive,
365
+ )
366
+ if os.path.isdir(DEFAULT_GLOBALWUI_DIR) and any(
367
+ f.endswith(".tif")
368
+ for _, _, fs in os.walk(DEFAULT_GLOBALWUI_DIR) for f in fs):
369
+ print(f" OK Found a local Global WUI archive in {DEFAULT_GLOBALWUI_DIR}.")
370
+ return
371
+ print(" Choose how to get the data:")
372
+ print(" - Full download : ~3.8 GB North America archive into datasets/")
373
+ print(" - On-the-fly : only the ~32 KB tiles each fire needs are")
374
+ print(" streamed into cache/ (recommended)")
375
+ if _yes_no(" Download the full Global WUI archive now?", default=False):
376
+ try:
377
+ download_globalwui_archive()
378
+ print(f" OK Full Global WUI archive unpacked into {DEFAULT_GLOBALWUI_DIR}.")
379
+ except Exception as exc: # pragma: no cover - network dependent
380
+ print(f" ! Could not download the full archive ({exc}); "
381
+ "tiles will stream on demand instead.")
382
+ else:
383
+ print(" -- On-the-fly: tiles stream into cache/GlobalWUI/ as needed.")
384
+
385
+
386
+ def _setup_fire_metadata(path: str) -> None:
387
+ """Let the user choose where the fire NAME + acreage come from.
388
+
389
+ The GeoPackage of step [3/5] already supplies each fire's geometry, active-fire
390
+ window, and bounds; only the human-readable name + acreage are missing, and the
391
+ three sources trade off accuracy, recency, and reliability differently. The
392
+ pipeline always prefers, in order, whatever is present: FEDS-MTBS fire list >
393
+ MTBS fire list > live mtbs.gov > the Event ID. This step just stages the source
394
+ the user prefers.
395
+ """
396
+ print("\n[5/5] Fire name & acreage (event metadata only -- geometry/window/")
397
+ print(" bounds already come from the FEDS GeoPackage)")
398
+ feds_fl = find_feds_firelist()
399
+ mtbs_fl = os.path.exists(DEFAULT_FIRELIST_CACHE)
400
+ if feds_fl or mtbs_fl:
401
+ have = []
402
+ if feds_fl:
403
+ have.append(f"FEDS-MTBS fire list ({os.path.basename(feds_fl)})")
404
+ if mtbs_fl:
405
+ have.append(f"MTBS fire list ({DEFAULT_FIRELIST_CACHE})")
406
+ print(f" OK Already available: {', '.join(have)}.")
407
+ return
408
+ print(" Pick a source (all are optional; the gpkg still drives the data):")
409
+ print(f" 1) FEDS-MTBS fire list -- {FEDS_MTBS_FIRELIST_NAME} from Zenodo")
410
+ print(" (~280 MB). Acreage aligned to FEDS;")
411
+ print(" 2012-2024 only; offline & most reliable.")
412
+ print(" 2) MTBS fire list -- built from mtbs.gov (~30k fires, ~30s).")
413
+ print(" More recent coverage; acreage NOT FEDS-")
414
+ print(" aligned; offline once built.")
415
+ print(" 3) On-the-fly (mtbs.gov) -- nothing staged; resolved live per fire.")
416
+ print(" Most up-to-date, but per-event network")
417
+ print(" and least reliable.")
418
+ choice = _prompt(" Choose 1/2/3 [3]: ", default="3").strip()
419
+ if choice == "1":
420
+ from firedataforge.remote_archive import download_feds_firelist
421
+ try:
422
+ dest = download_feds_firelist(FEDS_DIR)
423
+ if dest:
424
+ print(f" OK FEDS-MTBS fire list saved to {dest}")
425
+ else:
426
+ print(" ! Could not download the FEDS-MTBS fire list; "
427
+ "names will fall back to mtbs.gov / the Event ID.")
428
+ except Exception as exc: # pragma: no cover - network dependent
429
+ print(f" ! Could not download the FEDS-MTBS fire list ({exc}); skipping.")
430
+ elif choice == "2":
431
+ # Imported lazily to avoid a config -> events import cycle at module load.
432
+ from firedataforge.events import build_firelist
433
+ try:
434
+ build_firelist(DEFAULT_FIRELIST_CACHE)
435
+ print(f" OK Saved offline MTBS fire list to {DEFAULT_FIRELIST_CACHE}")
436
+ except Exception as exc: # pragma: no cover - network dependent
437
+ print(f" ! Could not build the MTBS fire list ({exc}); skipping.")
438
+ else:
439
+ print(" -- On-the-fly: names/acreage resolved live from mtbs.gov, "
440
+ "falling back to the Event ID. Stage a list later with "
441
+ "`python main.py --build-firelist`.")
442
+
443
+
444
+ def run_setup_wizard(path: str = ENV_PATH) -> None:
445
+ """Run the interactive first-run setup and persist results to ``.env``."""
446
+ print("=" * 70)
447
+ print(" FireDataForge -- first-run setup")
448
+ print("=" * 70)
449
+ print("All steps are optional; skip any and rerun later with: python main.py --setup")
450
+ print("Features needing no setup: " + ", ".join(FREE_FEATURES))
451
+
452
+ _setup_gee(path)
453
+ _setup_firms(path)
454
+ _setup_feds()
455
+ _setup_globalwui()
456
+ _setup_fire_metadata(path)
457
+
458
+ # Write the marker so .env exists and we don't nag on the next run.
459
+ set_env_var("FIREDATAFORGE_SETUP", datetime.now().strftime("%Y-%m-%dT%H:%M:%S"), path)
460
+ print(f"\nSettings saved to {path}. Setup complete.\n")
461
+
462
+
463
+ def ensure_setup(interactive: Optional[bool] = None, path: str = ENV_PATH) -> None:
464
+ """Load persisted settings and run the wizard on the first interactive run."""
465
+ if interactive is None:
466
+ interactive = is_interactive()
467
+ if is_first_run(path) and interactive:
468
+ run_setup_wizard(path)
469
+ load_env(path)
470
+
471
+
472
+ def _firms_map_key() -> Optional[str]:
473
+ """Return the NASA FIRMS MAP_KEY from the environment, if configured."""
474
+ key = os.environ.get("FIRMS_MAP_KEY") or os.environ.get("MAP_KEY")
475
+ return key.strip() if key else None