firedataforge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firedataforge/__about__.py +1 -0
- firedataforge/__init__.py +43 -0
- firedataforge/cli.py +153 -0
- firedataforge/config.py +475 -0
- firedataforge/constants.py +199 -0
- firedataforge/events.py +597 -0
- firedataforge/examples.py +109 -0
- firedataforge/io.py +140 -0
- firedataforge/pipeline.py +421 -0
- firedataforge/progress.py +222 -0
- firedataforge/remote_archive.py +286 -0
- firedataforge/schemas.py +210 -0
- firedataforge/sources/__init__.py +1 -0
- firedataforge/sources/feds.py +770 -0
- firedataforge/sources/frp.py +828 -0
- firedataforge/sources/gee.py +731 -0
- firedataforge/sources/mtbs.py +161 -0
- firedataforge/sources/nifc.py +212 -0
- firedataforge/sources/weather.py +391 -0
- firedataforge/sources/wui.py +405 -0
- firedataforge-0.1.0.dist-info/METADATA +1099 -0
- firedataforge-0.1.0.dist-info/RECORD +25 -0
- firedataforge-0.1.0.dist-info/WHEEL +4 -0
- firedataforge-0.1.0.dist-info/entry_points.txt +2 -0
- firedataforge-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""FireDataForge: a unified framework for multi-source wildfire data retrieval and integration.
|
|
2
|
+
|
|
3
|
+
The public API is re-exported here so consumers can ``from firedataforge import
|
|
4
|
+
forge_event`` (or ``import firedataforge as fdf``) without reaching into submodules.
|
|
5
|
+
See ``firedataforge.cli`` for the command-line entry point.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from firedataforge.__about__ import __version__
|
|
9
|
+
from firedataforge.config import (
|
|
10
|
+
ensure_ca_bundle, ensure_setup, feds_available, gee_ready, load_env,
|
|
11
|
+
run_setup_wizard,
|
|
12
|
+
)
|
|
13
|
+
from firedataforge.constants import DEFAULT_FIRE_WINDOW_DAYS
|
|
14
|
+
from firedataforge.events import (
|
|
15
|
+
build_firelist, get_fire_info, get_task_info, read_feds_firelist,
|
|
16
|
+
validate_projected_crs,
|
|
17
|
+
)
|
|
18
|
+
from firedataforge.examples import fetch_examples
|
|
19
|
+
from firedataforge.io import load_numpy, save_coordinates, save_numpy
|
|
20
|
+
from firedataforge.pipeline import (
|
|
21
|
+
forge_event, parse_batch_input, process_batch, process_single_fire,
|
|
22
|
+
)
|
|
23
|
+
from firedataforge.sources.feds import find_event_gpkg
|
|
24
|
+
from firedataforge.schemas import (
|
|
25
|
+
DataLayer, FireEvent, GeoReference, ProcessingArgs, ProcessingTask,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Load persisted .env on import (the real environment still takes precedence),
|
|
29
|
+
# then make sure HTTPS calls have a usable CA bundle (some HPC Pythons ship none).
|
|
30
|
+
load_env()
|
|
31
|
+
ensure_ca_bundle()
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
"__version__",
|
|
35
|
+
"forge_event", "process_single_fire", "process_batch", "parse_batch_input",
|
|
36
|
+
"get_fire_info", "get_task_info", "validate_projected_crs",
|
|
37
|
+
"find_event_gpkg", "read_feds_firelist",
|
|
38
|
+
"build_firelist", "fetch_examples", "load_numpy", "save_numpy", "save_coordinates",
|
|
39
|
+
"gee_ready", "feds_available", "run_setup_wizard", "ensure_setup", "load_env",
|
|
40
|
+
"ensure_ca_bundle",
|
|
41
|
+
"FireEvent", "ProcessingTask", "ProcessingArgs", "DataLayer", "GeoReference",
|
|
42
|
+
"DEFAULT_FIRE_WINDOW_DAYS",
|
|
43
|
+
]
|
firedataforge/cli.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Command-line interface for FireDataForge."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
from firedataforge.config import (
|
|
8
|
+
ensure_setup, is_first_run, is_interactive, load_env, run_setup_wizard,
|
|
9
|
+
)
|
|
10
|
+
from firedataforge.constants import BASE_DIR, CACHE_DIR, FEDS_DIR
|
|
11
|
+
from firedataforge.events import build_firelist, validate_projected_crs
|
|
12
|
+
from firedataforge.examples import fetch_examples
|
|
13
|
+
from firedataforge.pipeline import (
|
|
14
|
+
AVAILABLE_LAYERS, LAYER_ALIASES, parse_batch_input, process_batch,
|
|
15
|
+
process_single_fire,
|
|
16
|
+
)
|
|
17
|
+
from firedataforge.schemas import ProcessingArgs
|
|
18
|
+
|
|
19
|
+
log = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def main() -> None:
|
|
23
|
+
"""Command-line entry point: resolve an MTBS Event ID (or a batch) and forge it."""
|
|
24
|
+
parser = argparse.ArgumentParser(
|
|
25
|
+
description="FireDataForge -- unified multi-source wildfire data retrieval and integration",
|
|
26
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# event_id and --batch are mutually exclusive and both optional, so --setup /
|
|
30
|
+
# --build-firelist can run on their own.
|
|
31
|
+
input_group = parser.add_mutually_exclusive_group(required=False)
|
|
32
|
+
input_group.add_argument(
|
|
33
|
+
"event_id", type=str, nargs="?",
|
|
34
|
+
help="MTBS Event ID to process (e.g. CA3432611848120191010)")
|
|
35
|
+
input_group.add_argument(
|
|
36
|
+
"--batch", type=str,
|
|
37
|
+
help="A file of Event IDs (one per line) or a comma-separated list")
|
|
38
|
+
|
|
39
|
+
parser.add_argument("--setup", action="store_true",
|
|
40
|
+
help="Run the interactive credential wizard and exit")
|
|
41
|
+
parser.add_argument("--build-firelist", dest="build_firelist", action="store_true",
|
|
42
|
+
help="Download the full MTBS archive to the offline fire-list cache and exit")
|
|
43
|
+
parser.add_argument("--fetch-examples", dest="fetch_examples", action="store_true",
|
|
44
|
+
help="Download examples.zip from Zenodo and unzip it under the "
|
|
45
|
+
"FireDataForge home (datasets/FEDS25MTBS/ + events.txt; the "
|
|
46
|
+
"repo root for a source checkout, else ~/.firedataforge), then exit")
|
|
47
|
+
parser.add_argument("--workers", "-w", type=int, default=1,
|
|
48
|
+
help="Events processed in parallel in batch mode (1-4 recommended)")
|
|
49
|
+
parser.add_argument("--layer-workers", dest="layer_workers", type=int, default=5,
|
|
50
|
+
help="Concurrent layer downloads within a single event")
|
|
51
|
+
parser.add_argument("--resolution", "-r", type=int, default=30,
|
|
52
|
+
help="Target spatial resolution in meters")
|
|
53
|
+
parser.add_argument("--buffer", "-b", type=int, default=100,
|
|
54
|
+
help="Buffer around the fire bounds in meters")
|
|
55
|
+
parser.add_argument("--crs", "-c", type=str, default="EPSG:5070",
|
|
56
|
+
help="Target coordinate reference system")
|
|
57
|
+
parser.add_argument("--output_dir", "-o", type=str, default="output",
|
|
58
|
+
help="Output directory")
|
|
59
|
+
parser.add_argument("--interpolation", "-t", type=int, default=0,
|
|
60
|
+
help="Intermediate frames to interpolate between perimeter timesteps")
|
|
61
|
+
parser.add_argument("--cache_dir", type=str, default=CACHE_DIR,
|
|
62
|
+
help="Root directory for all on-the-fly downloads "
|
|
63
|
+
"(HRRR, FIRMS, FEDS, firepix, WUI, fire list); each "
|
|
64
|
+
"caches under its own fixed subfolder of this root")
|
|
65
|
+
parser.add_argument("--verbose", "-v", action="store_true",
|
|
66
|
+
help="Verbose (DEBUG) logging")
|
|
67
|
+
parser.add_argument("--only", type=str, default=None,
|
|
68
|
+
help="Comma-separated subset of layers to process. Available: "
|
|
69
|
+
+ ", ".join(AVAILABLE_LAYERS))
|
|
70
|
+
|
|
71
|
+
args = parser.parse_args()
|
|
72
|
+
|
|
73
|
+
logging.basicConfig(
|
|
74
|
+
level=logging.DEBUG if args.verbose else logging.INFO,
|
|
75
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Stand-alone maintenance commands.
|
|
79
|
+
load_env()
|
|
80
|
+
if args.setup:
|
|
81
|
+
run_setup_wizard()
|
|
82
|
+
return
|
|
83
|
+
if args.build_firelist:
|
|
84
|
+
build_firelist()
|
|
85
|
+
return
|
|
86
|
+
if args.fetch_examples:
|
|
87
|
+
try:
|
|
88
|
+
fetch_examples()
|
|
89
|
+
except Exception as exc:
|
|
90
|
+
log.error(str(exc))
|
|
91
|
+
raise SystemExit(1)
|
|
92
|
+
log.info(
|
|
93
|
+
"Example FEDS-MTBS fires ready in %s (event IDs listed in %s)",
|
|
94
|
+
FEDS_DIR, os.path.join(BASE_DIR, "events.txt"))
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
# On the first interactive run with no event, configure and stop.
|
|
98
|
+
ran_wizard = is_first_run() and is_interactive()
|
|
99
|
+
ensure_setup()
|
|
100
|
+
if ran_wizard and not (args.event_id or args.batch):
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
if not args.event_id and not args.batch:
|
|
104
|
+
parser.error("an event_id or --batch is required")
|
|
105
|
+
|
|
106
|
+
# Fail fast on an unusable target CRS, before any network resolution: the
|
|
107
|
+
# grid is built in metres, so a geographic or non-metric CRS cannot work.
|
|
108
|
+
try:
|
|
109
|
+
validate_projected_crs(args.crs)
|
|
110
|
+
except ValueError as exc:
|
|
111
|
+
parser.error(str(exc))
|
|
112
|
+
|
|
113
|
+
only_features = (
|
|
114
|
+
[f.strip() for f in args.only.split(",") if f.strip()] if args.only else None
|
|
115
|
+
)
|
|
116
|
+
# Validate --only against the known layer names (+ the landfire/hrrr aliases)
|
|
117
|
+
# so a typo fails loudly instead of silently producing an empty output dir.
|
|
118
|
+
if only_features is not None:
|
|
119
|
+
valid_only = set(AVAILABLE_LAYERS) | set(LAYER_ALIASES.values())
|
|
120
|
+
unknown = [f for f in only_features if f not in valid_only]
|
|
121
|
+
if unknown:
|
|
122
|
+
only_features = [f for f in only_features if f in valid_only]
|
|
123
|
+
log.warning(
|
|
124
|
+
"Ignoring unknown --only layer(s): %s. Valid names: %s",
|
|
125
|
+
", ".join(unknown), ", ".join(AVAILABLE_LAYERS + ["landfire", "hrrr"]),
|
|
126
|
+
)
|
|
127
|
+
if not only_features:
|
|
128
|
+
parser.error(
|
|
129
|
+
"--only contained no recognized layer names. Valid names: "
|
|
130
|
+
+ ", ".join(AVAILABLE_LAYERS + ["landfire", "hrrr"]))
|
|
131
|
+
|
|
132
|
+
processing_args = ProcessingArgs(
|
|
133
|
+
resolution=args.resolution,
|
|
134
|
+
buffer=args.buffer,
|
|
135
|
+
crs=args.crs,
|
|
136
|
+
output_dir=args.output_dir,
|
|
137
|
+
interpolation=args.interpolation,
|
|
138
|
+
cache_dir=args.cache_dir,
|
|
139
|
+
verbose=args.verbose,
|
|
140
|
+
only=only_features,
|
|
141
|
+
layer_workers=args.layer_workers,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Earth Engine is initialized lazily and per-event (fail-soft); no global init
|
|
145
|
+
# here, so a GEE outage never blocks the FEDS/FIRMS/HRRR layers.
|
|
146
|
+
if args.batch:
|
|
147
|
+
event_ids = parse_batch_input(args.batch)
|
|
148
|
+
if not event_ids:
|
|
149
|
+
log.error("No valid event IDs found in batch input")
|
|
150
|
+
return
|
|
151
|
+
process_batch(event_ids, processing_args, max_workers=args.workers)
|
|
152
|
+
else:
|
|
153
|
+
process_single_fire(args.event_id, processing_args)
|
firedataforge/config.py
ADDED
|
@@ -0,0 +1,475 @@
|
|
|
1
|
+
"""Credentials, the first-run setup wizard, and dataset/credential discovery."""
|
|
2
|
+
|
|
3
|
+
import glob
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
import urllib.error
|
|
9
|
+
import urllib.request
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
from firedataforge.constants import (
|
|
14
|
+
BASE_DIR, DEFAULT_FIRELIST_CACHE, FEDS_CACHE_DIR, FEDS_DIR,
|
|
15
|
+
FEDS_MTBS_FIRELIST_NAME, FEDS_MTBS_ZENODO_URL, FEDS_MTBS_ZIP_NAME,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
log = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Persist settings under the FireDataForge home (BASE_DIR): the repo root for a
|
|
22
|
+
# source checkout (next to ``main.py``, where dev users expect it), or
|
|
23
|
+
# ``~/.firedataforge`` for an installed package -- never inside a possibly
|
|
24
|
+
# read-only ``site-packages``. Override the location with FIREDATAFORGE_HOME.
|
|
25
|
+
ENV_PATH = os.path.join(BASE_DIR, ".env")
|
|
26
|
+
|
|
27
|
+
# Which credential / dependency unlocks which output features, so the wizard can
|
|
28
|
+
# tell the user exactly what skipping a step would cost. Keep in sync with the
|
|
29
|
+
# layer registry in ``process_single_fire`` and the README feature table.
|
|
30
|
+
GEE_FEATURES = [
|
|
31
|
+
"elevation", "terrain_rgb", "canopy_bulk_density", "canopy_cover",
|
|
32
|
+
"building_height", "landcover", "lai", "sentinel2_rgb",
|
|
33
|
+
]
|
|
34
|
+
FIRMS_FEATURES = ["frp_daytime", "frp_nighttime"]
|
|
35
|
+
FEDS_FEATURES = ["burn_perimeter", "fireline", "fireline_max_frp"]
|
|
36
|
+
# Features that need no credentials at all (shown for reassurance).
|
|
37
|
+
FREE_FEATURES = ["wui", "recent_burn", "r2", "u10", "v10"]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _parse_env_file(path: str) -> dict[str, str]:
|
|
41
|
+
"""Parse a ``.env`` file into a dict of KEY -> value (last wins)."""
|
|
42
|
+
data: dict[str, str] = {}
|
|
43
|
+
if not os.path.exists(path):
|
|
44
|
+
return data
|
|
45
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
46
|
+
for line in f:
|
|
47
|
+
line = line.strip()
|
|
48
|
+
if not line or line.startswith("#") or "=" not in line:
|
|
49
|
+
continue
|
|
50
|
+
key, _, value = line.partition("=")
|
|
51
|
+
key = key.strip()
|
|
52
|
+
value = value.strip().strip('"').strip("'")
|
|
53
|
+
if key:
|
|
54
|
+
data[key] = value
|
|
55
|
+
return data
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def load_env(path: str = ENV_PATH) -> None:
|
|
59
|
+
"""Load values from ``path`` into ``os.environ`` without overriding existing.
|
|
60
|
+
|
|
61
|
+
Safe to call multiple times and at import; the real environment always takes
|
|
62
|
+
precedence over the persisted file.
|
|
63
|
+
"""
|
|
64
|
+
for key, value in _parse_env_file(path).items():
|
|
65
|
+
os.environ.setdefault(key, value)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def ensure_ca_bundle() -> None:
|
|
69
|
+
"""Point OpenSSL/requests at certifi's CA bundle when the system store is unusable.
|
|
70
|
+
|
|
71
|
+
Some Python builds (e.g. uv-managed CPython on HPC nodes) ship no CA bundle,
|
|
72
|
+
so OpenSSL's default ``cafile`` is missing and every HTTPS call fails with
|
|
73
|
+
``CERTIFICATE_VERIFY_FAILED: unable to get local issuer certificate``. When
|
|
74
|
+
the user has not already chosen a bundle and the default one is absent, fall
|
|
75
|
+
back to the certifi bundle that ships with our dependencies. Exported via the
|
|
76
|
+
environment so child processes (notably ``earthengine authenticate``) inherit it.
|
|
77
|
+
"""
|
|
78
|
+
if os.environ.get("SSL_CERT_FILE") or os.environ.get("SSL_CERT_DIR"):
|
|
79
|
+
return # respect an explicit user choice
|
|
80
|
+
import ssl
|
|
81
|
+
|
|
82
|
+
default_cafile = ssl.get_default_verify_paths().cafile
|
|
83
|
+
if default_cafile and os.path.exists(default_cafile):
|
|
84
|
+
return # the system store already works
|
|
85
|
+
try:
|
|
86
|
+
import certifi
|
|
87
|
+
except ImportError:
|
|
88
|
+
return
|
|
89
|
+
bundle = certifi.where()
|
|
90
|
+
if not os.path.exists(bundle):
|
|
91
|
+
return
|
|
92
|
+
os.environ["SSL_CERT_FILE"] = bundle
|
|
93
|
+
os.environ.setdefault("REQUESTS_CA_BUNDLE", bundle)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def set_env_var(key: str, value: str, path: str = ENV_PATH) -> None:
|
|
97
|
+
"""Upsert ``key=value`` into the ``.env`` file and the live environment."""
|
|
98
|
+
lines: list[str] = []
|
|
99
|
+
if os.path.exists(path):
|
|
100
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
101
|
+
lines = f.read().splitlines()
|
|
102
|
+
|
|
103
|
+
out: list[str] = []
|
|
104
|
+
found = False
|
|
105
|
+
for line in lines:
|
|
106
|
+
stripped = line.strip()
|
|
107
|
+
is_assignment = (
|
|
108
|
+
stripped and not stripped.startswith("#") and "=" in stripped
|
|
109
|
+
and stripped.split("=", 1)[0].strip() == key
|
|
110
|
+
)
|
|
111
|
+
if is_assignment:
|
|
112
|
+
out.append(f"{key}={value}")
|
|
113
|
+
found = True
|
|
114
|
+
else:
|
|
115
|
+
out.append(line)
|
|
116
|
+
if not found:
|
|
117
|
+
out.append(f"{key}={value}")
|
|
118
|
+
|
|
119
|
+
# The home dir may not exist yet on an installed user's first run.
|
|
120
|
+
os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
|
|
121
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
122
|
+
f.write("\n".join(out).rstrip("\n") + "\n")
|
|
123
|
+
os.environ[key] = value
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def is_first_run(path: str = ENV_PATH) -> bool:
|
|
127
|
+
"""First run if no ``.env`` file exists yet."""
|
|
128
|
+
return not os.path.exists(path)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def is_interactive() -> bool:
|
|
132
|
+
"""True when both stdin and stdout are attached to a terminal."""
|
|
133
|
+
return sys.stdin.isatty() and sys.stdout.isatty()
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _prompt(message: str, default: str = "") -> str:
|
|
137
|
+
try:
|
|
138
|
+
answer = input(message).strip()
|
|
139
|
+
except (EOFError, KeyboardInterrupt):
|
|
140
|
+
print()
|
|
141
|
+
return default
|
|
142
|
+
return answer or default
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _yes_no(message: str, default: bool = True) -> bool:
|
|
146
|
+
suffix = " [Y/n] " if default else " [y/N] "
|
|
147
|
+
answer = _prompt(message + suffix).lower()
|
|
148
|
+
if not answer:
|
|
149
|
+
return default
|
|
150
|
+
return answer.startswith("y")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def gee_ready(project: Optional[str] = None) -> bool:
|
|
154
|
+
"""Return True if Earth Engine can initialize and answer a trivial query now."""
|
|
155
|
+
import ee # imported lazily so this module stays light for metadata-only use
|
|
156
|
+
project = project or os.environ.get("EARTHENGINE_PROJECT")
|
|
157
|
+
try:
|
|
158
|
+
if project:
|
|
159
|
+
ee.Initialize(project=project)
|
|
160
|
+
else:
|
|
161
|
+
ee.Initialize()
|
|
162
|
+
ee.Number(1).getInfo()
|
|
163
|
+
return True
|
|
164
|
+
except Exception:
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def firms_key_valid(map_key: str) -> Optional[bool]:
|
|
169
|
+
"""Best-effort online check of a FIRMS MAP_KEY.
|
|
170
|
+
|
|
171
|
+
Returns True/False when it could be determined, or None if the check itself
|
|
172
|
+
failed (e.g. no network) so the caller can treat it as "unknown".
|
|
173
|
+
"""
|
|
174
|
+
url = f"https://firms.modaps.eosdis.nasa.gov/mapserver/mapkey_status/?MAP_KEY={map_key}"
|
|
175
|
+
try:
|
|
176
|
+
with urllib.request.urlopen(url, timeout=30) as resp:
|
|
177
|
+
body = resp.read().decode("utf-8", "replace").lower()
|
|
178
|
+
except urllib.error.HTTPError:
|
|
179
|
+
return False
|
|
180
|
+
except Exception:
|
|
181
|
+
return None
|
|
182
|
+
if "invalid" in body or "error" in body:
|
|
183
|
+
return False
|
|
184
|
+
return "current_transactions" in body or "transaction" in body or "map_key" in body
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def find_feds_firelist(feds_dir: str = FEDS_DIR) -> Optional[str]:
|
|
188
|
+
"""Locate the FEDS-MTBS fire list in ``feds_dir``, if present.
|
|
189
|
+
|
|
190
|
+
Matches, in order of preference:
|
|
191
|
+
|
|
192
|
+
* the bundled example list ``fireslist_examples.csv`` shipped by
|
|
193
|
+
``--fetch-examples`` (FEDS perimeter bbox plus explicit ``tst``/``ted`` for
|
|
194
|
+
the eight demo fires), then
|
|
195
|
+
* the released summary GeoPackage ``fireslist_FEDS25MTBS_2012-2024.geojson``
|
|
196
|
+
(MTBS final perimeters + metadata for all 7,739 fires).
|
|
197
|
+
|
|
198
|
+
Any top-level ``*fire*list*`` file with a ``.geojson``, ``.gpkg`` or ``.csv``
|
|
199
|
+
extension is accepted case-insensitively, so a spelling difference never
|
|
200
|
+
disables this offline metadata source. The example list wins when present
|
|
201
|
+
(it carries the demo fires' exact end dates); otherwise GeoPackage/GeoJSON
|
|
202
|
+
lists win over a generic CSV.
|
|
203
|
+
"""
|
|
204
|
+
if not os.path.isdir(feds_dir):
|
|
205
|
+
return None
|
|
206
|
+
candidates = []
|
|
207
|
+
for ext in ("geojson", "gpkg", "csv"):
|
|
208
|
+
candidates += [
|
|
209
|
+
f for f in glob.glob(os.path.join(feds_dir, f"*.{ext}"))
|
|
210
|
+
if "list" in os.path.basename(f).lower()
|
|
211
|
+
and "fire" in os.path.basename(f).lower()
|
|
212
|
+
]
|
|
213
|
+
# The bundled example list wins outright when present; otherwise prefer the
|
|
214
|
+
# released GeoPackage list (.geojson/.gpkg) over a CSV, and within a kind
|
|
215
|
+
# prefer a name containing the full word "firelist"/"fireslist".
|
|
216
|
+
ext_rank = {".geojson": 0, ".gpkg": 0, ".csv": 1}
|
|
217
|
+
|
|
218
|
+
def _key(f: str) -> tuple:
|
|
219
|
+
base = os.path.basename(f).lower()
|
|
220
|
+
_, ext = os.path.splitext(base)
|
|
221
|
+
not_example = "example" not in base # examples list first (False < True)
|
|
222
|
+
named = "firelist" not in base and "fireslist" not in base
|
|
223
|
+
return (not_example, ext_rank.get(ext, 2), named, base)
|
|
224
|
+
|
|
225
|
+
candidates.sort(key=_key)
|
|
226
|
+
return candidates[0] if candidates else None
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def count_feds_gpkgs(base_dir: str = FEDS_DIR) -> int:
|
|
230
|
+
"""Count FEDS perimeter GeoPackages available anywhere under ``base_dir``."""
|
|
231
|
+
if not os.path.isdir(base_dir):
|
|
232
|
+
return 0
|
|
233
|
+
return sum(len(files) for files in (
|
|
234
|
+
[f for f in fs if f.lower().endswith(".gpkg")]
|
|
235
|
+
for _, _, fs in os.walk(base_dir)
|
|
236
|
+
))
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def feds_available() -> bool:
|
|
240
|
+
"""True when FEDS25MTBS data (fire list or GeoPackages) is present locally.
|
|
241
|
+
|
|
242
|
+
Checks both the user archive (``datasets/``) and the software cache
|
|
243
|
+
(``cache/``), so a run that has already lazily fetched fires still counts.
|
|
244
|
+
"""
|
|
245
|
+
return (find_feds_firelist() is not None
|
|
246
|
+
or count_feds_gpkgs(FEDS_DIR) > 0
|
|
247
|
+
or count_feds_gpkgs(FEDS_CACHE_DIR) > 0)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _setup_gee(path: str) -> None:
|
|
251
|
+
print("\n[1/5] Google Earth Engine")
|
|
252
|
+
print(" Unlocks: " + ", ".join(GEE_FEATURES))
|
|
253
|
+
print(" Get access: enable Earth Engine on a Google Cloud project at "
|
|
254
|
+
"https://console.cloud.google.com/")
|
|
255
|
+
if gee_ready():
|
|
256
|
+
print(" OK Already authenticated and initialized.")
|
|
257
|
+
return
|
|
258
|
+
if not _yes_no(" Configure Earth Engine now?"):
|
|
259
|
+
print(" -- Skipped. Disabled features: " + ", ".join(GEE_FEATURES))
|
|
260
|
+
return
|
|
261
|
+
print(" Launching 'earthengine authenticate'...")
|
|
262
|
+
print(" A URL will be printed; open it, authorize, and paste the code back.")
|
|
263
|
+
try:
|
|
264
|
+
# auth_mode=notebook prints a URL + accepts a pasted verification code,
|
|
265
|
+
# so it works on headless/HPC nodes with no browser and no gcloud CLI.
|
|
266
|
+
subprocess.run(
|
|
267
|
+
["earthengine", "authenticate", "--auth_mode=notebook"], check=False
|
|
268
|
+
)
|
|
269
|
+
except FileNotFoundError:
|
|
270
|
+
print(" ! 'earthengine' CLI not found. Install with: pip install earthengine-api")
|
|
271
|
+
print(" To get a project ID:")
|
|
272
|
+
print(" 1. Go to Google Cloud Console: https://console.cloud.google.com/")
|
|
273
|
+
print(" 2. Create or select a project with Earth Engine enabled:")
|
|
274
|
+
print(" https://developers.google.com/earth-engine/guides/access")
|
|
275
|
+
print(" 3. Copy the project ID")
|
|
276
|
+
project = _prompt(" Google Cloud project ID (blank to skip): ")
|
|
277
|
+
if project:
|
|
278
|
+
try:
|
|
279
|
+
subprocess.run(["earthengine", "set_project", project], check=False)
|
|
280
|
+
except FileNotFoundError:
|
|
281
|
+
pass
|
|
282
|
+
set_env_var("EARTHENGINE_PROJECT", project, path)
|
|
283
|
+
if gee_ready(project or None):
|
|
284
|
+
print(" OK Earth Engine is ready.")
|
|
285
|
+
else:
|
|
286
|
+
print(" ! Earth Engine still not initialized; GEE features will be "
|
|
287
|
+
"skipped until setup is finished (rerun: python main.py --setup).")
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _setup_firms(path: str) -> None:
|
|
291
|
+
print("\n[2/5] NASA FIRMS map key (VIIRS active fire / FRP)")
|
|
292
|
+
print(" Unlocks: " + ", ".join(FIRMS_FEATURES))
|
|
293
|
+
print(" Get access (free, instant): https://firms.modaps.eosdis.nasa.gov/api/map_key/")
|
|
294
|
+
print(" Each fire's FRP is streamed from the Area API and cached per")
|
|
295
|
+
print(" event under cache/FIRMS/ (no bulk download). To work fully")
|
|
296
|
+
print(" offline instead, drop archive CSVs into datasets/FIRMS/.")
|
|
297
|
+
if _firms_map_key():
|
|
298
|
+
print(" OK A FIRMS key is already configured.")
|
|
299
|
+
return
|
|
300
|
+
key = _prompt(" Paste your FIRMS MAP_KEY (blank to skip): ")
|
|
301
|
+
if not key:
|
|
302
|
+
print(" -- Skipped. FRP needs either a FIRMS key or the local FEDS "
|
|
303
|
+
"firepix archive (pre-2025).")
|
|
304
|
+
return
|
|
305
|
+
valid = firms_key_valid(key)
|
|
306
|
+
if valid is False:
|
|
307
|
+
if not _yes_no(" ! That key looks invalid. Save it anyway?", default=False):
|
|
308
|
+
print(" Skipped.")
|
|
309
|
+
return
|
|
310
|
+
elif valid is None:
|
|
311
|
+
print(" (could not verify the key online -- saving it as given)")
|
|
312
|
+
else:
|
|
313
|
+
print(" OK Key verified.")
|
|
314
|
+
set_env_var("FIRMS_MAP_KEY", key, path)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _setup_feds() -> None:
|
|
318
|
+
print("\n[3/5] FEDS-MTBS archive (Zenodo: Chen et al. -- perimeter GeoPackages")
|
|
319
|
+
print(" + firepix; geometry, active-fire window, and bounds per fire)")
|
|
320
|
+
print(" Unlocks: " + ", ".join(FEDS_FEATURES) + " (and perimeter-masked FRP)")
|
|
321
|
+
print(f" Full dataset (7,739 fires, 2012-2024): {FEDS_MTBS_ZENODO_URL}")
|
|
322
|
+
print(" (Fire NAME + acreage are configured separately in step [5/5].)")
|
|
323
|
+
firelist = find_feds_firelist()
|
|
324
|
+
n_gpkg = count_feds_gpkgs(FEDS_DIR)
|
|
325
|
+
if firelist or n_gpkg:
|
|
326
|
+
print(f" OK Found a local FEDS archive in {FEDS_DIR} "
|
|
327
|
+
f"(fire list: {'yes' if firelist else 'no'}, {n_gpkg} GeoPackage(s)).")
|
|
328
|
+
return
|
|
329
|
+
print(" Choose how to get the data:")
|
|
330
|
+
print(f" - Manual : unzip {FEDS_MTBS_ZIP_NAME} into {FEDS_DIR}/")
|
|
331
|
+
print(" - Full download : zip (~370 MB) into datasets/ (saves bandwidth")
|
|
332
|
+
print(" on repeated runs; all fires resolve offline)")
|
|
333
|
+
print(" - On-the-fly : each requested fire is range-pulled from")
|
|
334
|
+
print(" Zenodo into cache/ as needed (saves disk)")
|
|
335
|
+
if _yes_no(" Download the full FEDS-MTBS archive (zip) now?", default=False):
|
|
336
|
+
# Imported lazily to avoid a config -> remote_archive import at load.
|
|
337
|
+
from firedataforge.remote_archive import download_full_feds_archive
|
|
338
|
+
try:
|
|
339
|
+
download_full_feds_archive() # zip only; fire list is step [5/5]
|
|
340
|
+
print(f" OK Full FEDS-MTBS archive unpacked into {FEDS_DIR}.")
|
|
341
|
+
return
|
|
342
|
+
except Exception as exc: # pragma: no cover - network dependent
|
|
343
|
+
print(f" ! Could not download the full archive ({exc}); "
|
|
344
|
+
"falling back to on-the-fly fetching.")
|
|
345
|
+
else:
|
|
346
|
+
print(f" -- On-the-fly: fires stream into {FEDS_CACHE_DIR}/ as needed.")
|
|
347
|
+
# On-the-fly chosen (or full download failed): offer the tiny example bundle
|
|
348
|
+
# so the demos/benchmark/validation can run without any large download.
|
|
349
|
+
from firedataforge.examples import examples_record_configured, fetch_examples
|
|
350
|
+
if examples_record_configured() and _yes_no(
|
|
351
|
+
" Also download the 8 example fires now (~22 MB, lets the demos run)?"):
|
|
352
|
+
try:
|
|
353
|
+
fetch_examples()
|
|
354
|
+
print(f" OK Example fires downloaded to {FEDS_DIR}.")
|
|
355
|
+
except Exception as exc: # pragma: no cover - network dependent
|
|
356
|
+
print(f" ! Could not fetch examples ({exc}); skipping.")
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def _setup_globalwui() -> None:
|
|
360
|
+
print("\n[4/5] Global WUI (wildland-urban interface)")
|
|
361
|
+
print(" Unlocks: wui")
|
|
362
|
+
# Imported lazily: wui pulls in rioxarray/rasterio, heavy for metadata-only use.
|
|
363
|
+
from firedataforge.sources.wui import (
|
|
364
|
+
DEFAULT_GLOBALWUI_DIR, download_globalwui_archive,
|
|
365
|
+
)
|
|
366
|
+
if os.path.isdir(DEFAULT_GLOBALWUI_DIR) and any(
|
|
367
|
+
f.endswith(".tif")
|
|
368
|
+
for _, _, fs in os.walk(DEFAULT_GLOBALWUI_DIR) for f in fs):
|
|
369
|
+
print(f" OK Found a local Global WUI archive in {DEFAULT_GLOBALWUI_DIR}.")
|
|
370
|
+
return
|
|
371
|
+
print(" Choose how to get the data:")
|
|
372
|
+
print(" - Full download : ~3.8 GB North America archive into datasets/")
|
|
373
|
+
print(" - On-the-fly : only the ~32 KB tiles each fire needs are")
|
|
374
|
+
print(" streamed into cache/ (recommended)")
|
|
375
|
+
if _yes_no(" Download the full Global WUI archive now?", default=False):
|
|
376
|
+
try:
|
|
377
|
+
download_globalwui_archive()
|
|
378
|
+
print(f" OK Full Global WUI archive unpacked into {DEFAULT_GLOBALWUI_DIR}.")
|
|
379
|
+
except Exception as exc: # pragma: no cover - network dependent
|
|
380
|
+
print(f" ! Could not download the full archive ({exc}); "
|
|
381
|
+
"tiles will stream on demand instead.")
|
|
382
|
+
else:
|
|
383
|
+
print(" -- On-the-fly: tiles stream into cache/GlobalWUI/ as needed.")
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _setup_fire_metadata(path: str) -> None:
|
|
387
|
+
"""Let the user choose where the fire NAME + acreage come from.
|
|
388
|
+
|
|
389
|
+
The GeoPackage of step [3/5] already supplies each fire's geometry, active-fire
|
|
390
|
+
window, and bounds; only the human-readable name + acreage are missing, and the
|
|
391
|
+
three sources trade off accuracy, recency, and reliability differently. The
|
|
392
|
+
pipeline always prefers, in order, whatever is present: FEDS-MTBS fire list >
|
|
393
|
+
MTBS fire list > live mtbs.gov > the Event ID. This step just stages the source
|
|
394
|
+
the user prefers.
|
|
395
|
+
"""
|
|
396
|
+
print("\n[5/5] Fire name & acreage (event metadata only -- geometry/window/")
|
|
397
|
+
print(" bounds already come from the FEDS GeoPackage)")
|
|
398
|
+
feds_fl = find_feds_firelist()
|
|
399
|
+
mtbs_fl = os.path.exists(DEFAULT_FIRELIST_CACHE)
|
|
400
|
+
if feds_fl or mtbs_fl:
|
|
401
|
+
have = []
|
|
402
|
+
if feds_fl:
|
|
403
|
+
have.append(f"FEDS-MTBS fire list ({os.path.basename(feds_fl)})")
|
|
404
|
+
if mtbs_fl:
|
|
405
|
+
have.append(f"MTBS fire list ({DEFAULT_FIRELIST_CACHE})")
|
|
406
|
+
print(f" OK Already available: {', '.join(have)}.")
|
|
407
|
+
return
|
|
408
|
+
print(" Pick a source (all are optional; the gpkg still drives the data):")
|
|
409
|
+
print(f" 1) FEDS-MTBS fire list -- {FEDS_MTBS_FIRELIST_NAME} from Zenodo")
|
|
410
|
+
print(" (~280 MB). Acreage aligned to FEDS;")
|
|
411
|
+
print(" 2012-2024 only; offline & most reliable.")
|
|
412
|
+
print(" 2) MTBS fire list -- built from mtbs.gov (~30k fires, ~30s).")
|
|
413
|
+
print(" More recent coverage; acreage NOT FEDS-")
|
|
414
|
+
print(" aligned; offline once built.")
|
|
415
|
+
print(" 3) On-the-fly (mtbs.gov) -- nothing staged; resolved live per fire.")
|
|
416
|
+
print(" Most up-to-date, but per-event network")
|
|
417
|
+
print(" and least reliable.")
|
|
418
|
+
choice = _prompt(" Choose 1/2/3 [3]: ", default="3").strip()
|
|
419
|
+
if choice == "1":
|
|
420
|
+
from firedataforge.remote_archive import download_feds_firelist
|
|
421
|
+
try:
|
|
422
|
+
dest = download_feds_firelist(FEDS_DIR)
|
|
423
|
+
if dest:
|
|
424
|
+
print(f" OK FEDS-MTBS fire list saved to {dest}")
|
|
425
|
+
else:
|
|
426
|
+
print(" ! Could not download the FEDS-MTBS fire list; "
|
|
427
|
+
"names will fall back to mtbs.gov / the Event ID.")
|
|
428
|
+
except Exception as exc: # pragma: no cover - network dependent
|
|
429
|
+
print(f" ! Could not download the FEDS-MTBS fire list ({exc}); skipping.")
|
|
430
|
+
elif choice == "2":
|
|
431
|
+
# Imported lazily to avoid a config -> events import cycle at module load.
|
|
432
|
+
from firedataforge.events import build_firelist
|
|
433
|
+
try:
|
|
434
|
+
build_firelist(DEFAULT_FIRELIST_CACHE)
|
|
435
|
+
print(f" OK Saved offline MTBS fire list to {DEFAULT_FIRELIST_CACHE}")
|
|
436
|
+
except Exception as exc: # pragma: no cover - network dependent
|
|
437
|
+
print(f" ! Could not build the MTBS fire list ({exc}); skipping.")
|
|
438
|
+
else:
|
|
439
|
+
print(" -- On-the-fly: names/acreage resolved live from mtbs.gov, "
|
|
440
|
+
"falling back to the Event ID. Stage a list later with "
|
|
441
|
+
"`python main.py --build-firelist`.")
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def run_setup_wizard(path: str = ENV_PATH) -> None:
|
|
445
|
+
"""Run the interactive first-run setup and persist results to ``.env``."""
|
|
446
|
+
print("=" * 70)
|
|
447
|
+
print(" FireDataForge -- first-run setup")
|
|
448
|
+
print("=" * 70)
|
|
449
|
+
print("All steps are optional; skip any and rerun later with: python main.py --setup")
|
|
450
|
+
print("Features needing no setup: " + ", ".join(FREE_FEATURES))
|
|
451
|
+
|
|
452
|
+
_setup_gee(path)
|
|
453
|
+
_setup_firms(path)
|
|
454
|
+
_setup_feds()
|
|
455
|
+
_setup_globalwui()
|
|
456
|
+
_setup_fire_metadata(path)
|
|
457
|
+
|
|
458
|
+
# Write the marker so .env exists and we don't nag on the next run.
|
|
459
|
+
set_env_var("FIREDATAFORGE_SETUP", datetime.now().strftime("%Y-%m-%dT%H:%M:%S"), path)
|
|
460
|
+
print(f"\nSettings saved to {path}. Setup complete.\n")
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def ensure_setup(interactive: Optional[bool] = None, path: str = ENV_PATH) -> None:
|
|
464
|
+
"""Load persisted settings and run the wizard on the first interactive run."""
|
|
465
|
+
if interactive is None:
|
|
466
|
+
interactive = is_interactive()
|
|
467
|
+
if is_first_run(path) and interactive:
|
|
468
|
+
run_setup_wizard(path)
|
|
469
|
+
load_env(path)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def _firms_map_key() -> Optional[str]:
|
|
473
|
+
"""Return the NASA FIRMS MAP_KEY from the environment, if configured."""
|
|
474
|
+
key = os.environ.get("FIRMS_MAP_KEY") or os.environ.get("MAP_KEY")
|
|
475
|
+
return key.strip() if key else None
|