npm - ecological-agent-skills - Versions diffs - 3.1.0 - Mend

ecological-agent-skills 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (217) hide show

package/templates/scripts/logger_setup.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""logger_setup.py — Structured logging template for ecological-agent-skills.
+Usage:
+    from pathlib import Path
+    import sys
+    sys.path.insert(0, str(Path(__file__).parents[2] / "templates" / "scripts"))
+    from logger_setup import setup_logger, log_step, log_decision, log_actionable_error
+    logger = setup_logger("skill-name")
+Log format : [TIMESTAMP] [LEVEL] [SKILL] message
+Log file   : logs/skill_{name}_{timestamp}.log  (relative to cwd)
+"""
+import logging
+import sys
+from datetime import datetime
+from pathlib import Path
+_SKILL_NAME = "eco-skill"
+def setup_logger(
+    skill_name: str = "eco-skill",
+    log_dir: str = "logs",
+    level: int = logging.INFO,
+) -> logging.Logger:
+    """Initialise a logger that writes to console AND a dated log file.
+    Parameters
+    ----------
+    skill_name : str
+        Short identifier used in the log filename and every log record.
+    log_dir : str
+        Directory where log files are written (created if absent).
+    level : int
+        Logging level (logging.DEBUG / INFO / WARNING / ERROR).
+    Returns
+    -------
+    logging.Logger
+        Configured logger instance.
+    """
+    global _SKILL_NAME
+    _SKILL_NAME = skill_name
+    Path(log_dir).mkdir(parents=True, exist_ok=True)
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    log_file = Path(log_dir) / f"skill_{skill_name}_{ts}.log"
+    fmt = f"[%(asctime)s] [%(levelname)s] [{skill_name}] %(message)s"
+    datefmt = "%Y-%m-%d %H:%M:%S"
+    formatter = logging.Formatter(fmt, datefmt=datefmt)
+    logger = logging.getLogger(skill_name)
+    logger.setLevel(level)
+    logger.handlers.clear()
+    # Console handler
+    ch = logging.StreamHandler(sys.stdout)
+    ch.setFormatter(formatter)
+    logger.addHandler(ch)
+    # File handler
+    fh = logging.FileHandler(log_file, encoding="utf-8")
+    fh.setFormatter(formatter)
+    logger.addHandler(fh)
+    logger.info("Logger initialised | skill=%s | log_file=%s", skill_name, log_file)
+    return logger
+# ── Convenience wrappers ─────────────────────────────────────────────────────
+def log_step(logger: logging.Logger, step_number: int, description: str) -> None:
+    """Mark the start of a numbered processing step."""
+    logger.info("── STEP %d: %s", step_number, description)
+def log_decision(
+    logger: logging.Logger, variable: str, value, rationale: str
+) -> None:
+    """Record an analytical decision with justification."""
+    logger.info("DECISION | %s = %s | rationale: %s", variable, value, rationale)
+def log_actionable_error(
+    logger: logging.Logger,
+    step: str,
+    error_msg: str,
+    probable_cause: str,
+    check_this: str,
+    prior_skill: str | None = None,
+) -> None:
+    """Log a structured, actionable error message.
+    Parameters
+    ----------
+    step           : Name of the failing processing step.
+    error_msg      : The exception message.
+    probable_cause : One-sentence explanation of likely cause.
+    check_this     : What the user should inspect to diagnose.
+    prior_skill    : Upstream skill that should have produced the missing input.
+    """
+    prior_line = (
+        f"\n  Skill anterior que deveria ter produzido este input: {prior_skill}"
+        if prior_skill
+        else ""
+    )
+    logger.error(
+        "[ERROR] Falha em %s: %s\n"
+        "  Causa provável: %s\n"
+        "  Verifique: %s%s",
+        step,
+        error_msg,
+        probable_cause,
+        check_this,
+        prior_line,
+    )

package/templates/scripts/params_loader.R ADDED Viewed

@@ -0,0 +1,28 @@
+# params_loader.R
+# Load project parameters from params.yaml into R environment
+# Source this at the top of every analysis script:
+#   source("scripts/params_loader.R")
+# Requires: yaml
+if (!requireNamespace("yaml", quietly = TRUE)) install.packages("yaml")
+library(yaml)
+PARAMS_FILE <- "params.yaml"
+if (!file.exists(PARAMS_FILE)) stop("params.yaml not found in working directory: ", getwd())
+p <- yaml::read_yaml(PARAMS_FILE)
+cat("params.yaml loaded. Project:", p$project$name, "| Version:", p$project$version, "\n")
+# Set random seeds
+set.seed(p$random_seeds$global)
+cat("Global random seed set to:", p$random_seeds$global, "\n")
+# Convenience aliases
+PROJECT_CRS  <- p$spatial$project_crs
+ANALYSIS_CRS <- p$spatial$analysis_crs
+OUTPUT_RES   <- p$spatial$raster_resolution_m
+CV_FOLDS     <- p$modeling$cv_folds
+N_BACKGROUND <- p$modeling$background_n
+VIF_THRESH   <- p$modeling$collinearity_vif_threshold
+cat("CRS:", PROJECT_CRS, "| CV folds:", CV_FOLDS, "| Background n:", N_BACKGROUND, "\n")

package/templates/scripts/params_loader.py ADDED Viewed

@@ -0,0 +1,38 @@
+# params_loader.py
+# Load project parameters from params.yaml into Python namespace
+# Usage: from scripts.params_loader import p, set_seeds
+# Requires: pyyaml, numpy, random
+import yaml
+import random
+import os
+import numpy as np
+PARAMS_FILE = "params.yaml"
+def load_params(path: str = PARAMS_FILE) -> dict:
+    if not os.path.exists(path):
+        raise FileNotFoundError(f"params.yaml not found at: {os.path.abspath(path)}")
+    with open(path, "r") as f:
+        params = yaml.safe_load(f)
+    print(f"params.yaml loaded. Project: {params['project']['name']} | Version: {params['project']['version']}")
+    return params
+def set_seeds(params: dict) -> None:
+    seed = params["random_seeds"]["global"]
+    random.seed(seed)
+    np.random.seed(seed)
+    try:
+        import torch
+        torch.manual_seed(seed)
+    except ImportError:
+        pass
+    print(f"Random seeds set to: {seed}")
+p = load_params()
+set_seeds(p)
+PROJECT_CRS  = p["spatial"]["project_crs"]
+ANALYSIS_CRS = p["spatial"]["analysis_crs"]
+CV_FOLDS     = p["modeling"]["cv_folds"]
+N_BACKGROUND = p["modeling"]["background_n"]

package/workflows/analyze-community-structure/WORKFLOW.md ADDED Viewed

@@ -0,0 +1,72 @@
+# Workflow: analyze-community-structure
+**Purpose:** Multivariate analysis of species community composition and diversity
+**Skills:** ecological-data-foundation → biostatistics-workbench → community-ecology-ordination → model-validation-and-uncertainty → reproducible-ecology-pipeline
+---
+## Trigger
+Invoke when the user wants to describe or compare species assemblages across sites, treatments, or gradients.
+**Example prompts:**
+- "Compare bird community composition between forest and pasture sites"
+- "Ordinate plant communities along an elevational gradient"
+- "Identify indicator species for each land use type"
+---
+## Steps
+### Step 1 — ecological-data-foundation
+- Validate species × site matrix (check species names, abundance values, site IDs)
+- QA environmental metadata
+- Output: `data_clean.csv`, `species_site_matrix.csv`
+### Step 2 — biostatistics-workbench
+- Test assumptions for parametric diversity comparisons (normality, homogeneity)
+- Fit GLM/LMM for alpha diversity response to environmental gradients
+- Output: `diversity_model_results.csv`, `assumption_diagnostics/`
+### Step 3 — community-ecology-ordination
+- Compute alpha diversity metrics (S, H', 1−D); rarefaction curves
+- Compute beta diversity; partition into turnover and nestedness
+- Run NMDS ordination; report stress
+- PERMANOVA + PERMDISP for group comparisons
+- SIMPER for species contributions
+- Hierarchical clustering
+- Output: `ordination_plot.png`, `diversity_metrics.csv`, `permanova_results.txt`
+### Step 4 — model-validation-and-uncertainty
+- Assess NMDS stress and convergence
+- Validate PERMANOVA assumptions (PERMDISP)
+- Report sensitivity to rare species inclusion/exclusion
+- Output: `validation_report.md`
+### Step 5 — reproducible-ecology-pipeline
+- Document ordination parameters, seed, and dissimilarity metric
+- Output: `parameter_manifest.yaml`, `decision_log.md`
+---
+## Expected Deliverables
+- NMDS ordination biplot
+- Alpha diversity summary per group
+- PERMANOVA results table
+- Indicator species list
+- Cluster dendrogram
+---
+## Decision Points
+| Condition | Diagnosis | Recommended Action |
+|---|---|---|
+| NMDS stress > 0.20 | Poor ordination fit in 2D | Increase to 3D; alternatively use PCoA (metric) or PCA after Hellinger transformation |
+| PERMANOVA significant but R² < 0.05 | Statistically significant but ecologically trivial effect | Report R² (effect size) prominently alongside p-value; contextualise effect magnitude |
+| Species with > 80% zeros in matrix dominating ordination | Rare species creating artefacts | Apply Hellinger transformation; remove singletons or use Bray-Curtis on untransformed data |
+| Groups fully overlap in ordination space | Environmental gradient is weak or wrong variables tested | Test additional environmental covariates; check if grouping factor is ecologically meaningful |
+| SIMPER output does not match ecological expectations | Potential data entry error or taxonomic issues | Verify species identity and abundance values; check for transposed rows/columns |
+| PERMANOVA significant but PERMDISP also significant | Cannot distinguish composition vs. dispersion differences | Report both; use betadisper plot; consider within-group diversity analysis |
+| Rarefaction curves not asymptoting | Insufficient sampling effort | Report sampling completeness (Chao1 completeness); caveat diversity comparisons |

package/workflows/analyze-environmental-change/WORKFLOW.md ADDED Viewed

@@ -0,0 +1,73 @@
+# Workflow: analyze-environmental-change
+**Purpose:** Detect and characterise long-term environmental change from remote sensing or monitoring data
+**Skills:** ecological-data-foundation → geoprocessing-for-ecology → environmental-time-series → ecological-impact-assessment → reproducible-ecology-pipeline
+---
+## Trigger
+Invoke when the user wants to analyse temporal trends in environmental conditions (NDVI, land cover, temperature, rainfall, deforestation) over years to decades.
+**Example prompts:**
+- "Analyse NDVI trends in the Pantanal over the last 20 years"
+- "Detect breakpoints in forest cover loss in the Brazilian Amazon"
+- "Characterise vegetation recovery after the 2019-2020 fires"
+---
+## Steps
+### Step 1 — ecological-data-foundation
+- Ingest time series data (satellite-derived or monitoring station)
+- Validate temporal consistency, units, and metadata
+- Output: `timeseries_clean.csv` or raster stack
+### Step 2 — geoprocessing-for-ecology
+- Reproject and align raster time stack
+- Extract series for specific zones or polygons
+- Create cloud-masked composites (if satellite data)
+- Output: `timeseries_stack.tif`, `zone_extracts.csv`
+### Step 3 — environmental-time-series
+- STL decomposition (trend + seasonal + remainder)
+- Mann-Kendall trend test + Sen's slope (pixel-wise or site-wise)
+- BFAST breakpoint detection
+- Standardised anomaly computation
+- Recovery trajectory (if post-disturbance)
+- Output: `trend_results.csv`, `breakpoints.csv`, `anomaly_series.csv`, `recovery_metrics.csv`
+### Step 4 — ecological-impact-assessment
+- Classify trend magnitude: significant improvement / stable / significant degradation
+- Identify spatial hotspots of change
+- Overlay with land cover, protected areas, and pressure layers
+- Synthesise drivers of observed change
+- Output: `change_classification.tif`, `impact_synthesis.md`
+### Step 5 — reproducible-ecology-pipeline
+- Document baseline period, decomposition parameters, breakpoint thresholds
+- Output: `parameter_manifest.yaml`, `decision_log.md`
+---
+## Expected Deliverables
+- Trend map (slope and significance per pixel)
+- Breakpoint map (date and magnitude)
+- Anomaly time series
+- Change classification map
+- Recovery metrics (if applicable)
+---
+## Decision Points
+| Condition | Diagnosis | Recommended Action |
+|---|---|---|
+| Mann-Kendall tau < 0.1 despite visual trend | Trend masked by high inter-annual variance | Apply pre-whitening (remove autocorrelation before MK test); report Sen's slope with 95% CI instead of tau alone |
+| BFAST detects > 5 breakpoints | Oversegmentation of time series | Increase `h` parameter (minimum segment length as fraction of series); inspect breakpoints for plausibility |
+| Time series < 10 years | Insufficient length for reliable trend detection | Report descriptive statistics (mean, SD, range) only; state limitation; do not report Mann-Kendall as significant |
+| Missing data > 20% in any season | Seasonal decomposition unreliable | Impute with STL or linear interpolation before decomposition; document imputation in methods |
+| Breakpoint coincides with sensor change or data gap | Artefact, not ecological signal | Verify against independent data source (e.g., Landsat vs MODIS comparison); exclude artefact breakpoints |
+| STL trend component shows oscillation at period equal to satellite revisit | Orbital artefact leaking into trend | Apply Fourier pre-filtering; use MODIS 16-day composites instead of 8-day |
+| Recovery rate > 100% (NDVI exceeds pre-disturbance level) | Regrowth exceeds baseline; possible change in land use | Verify with high-resolution imagery; investigate if secondary vegetation is replacing degraded pasture |

package/workflows/assess-ecological-impact/WORKFLOW.md ADDED Viewed

@@ -0,0 +1,75 @@
+# Workflow: assess-ecological-impact
+**Purpose:** Quantify the ecological effect of a disturbance or land-use change
+**Skills:** ecological-data-foundation → geoprocessing-for-ecology → ecological-impact-assessment → biostatistics-workbench → model-validation-and-uncertainty → reproducible-ecology-pipeline
+---
+## Trigger
+Invoke when the user wants to evaluate the impact of a disturbance (deforestation, fire, infrastructure, agriculture) on ecological indicators.
+**Example prompts:**
+- "Assess the impact of the road construction on bird richness using BACI"
+- "Quantify habitat loss and fragmentation from sugarcane expansion"
+- "Evaluate the effect of the 2020 fire on forest carbon stocks"
+---
+## Steps
+### Step 1 — ecological-data-foundation
+- Validate ecological indicator data (species richness, abundance, biomass, NDVI)
+- Confirm site-level metadata (control/impact designation, pre/post dates)
+- Output: `data_clean.csv`, `qa_report.md`
+### Step 2 — geoprocessing-for-ecology
+- Clip land cover and pressure layers to study area
+- Compute distance from disturbance for gradient analysis
+- Extract spatial covariates at site locations
+- Output: `points_with_env.csv`, `pressure_layers/`
+### Step 3 — ecological-impact-assessment
+- Run BACI mixed model
+- Compute landscape fragmentation metrics (pre/post)
+- Build composite pressure index
+- Output: `baci_results.csv`, `fragmentation_metrics.csv`, `pressure_index.tif`
+### Step 4 — biostatistics-workbench
+- Validate BACI model assumptions (residual diagnostics)
+- Compute effect sizes and CIs for the BACI interaction
+- Perform post-hoc tests if multiple indicators
+- Output: `assumption_diagnostics/`, `effect_sizes.csv`
+### Step 5 — model-validation-and-uncertainty
+- Report performance of BACI model (R², calibration)
+- Assess sensitivity to control site selection
+- Output: `validation_report.md`, `sensitivity_report.md`
+### Step 6 — reproducible-ecology-pipeline
+- Log all decisions and parameters
+- Complete reproducibility checklist
+- Output: `parameter_manifest.yaml`, `decision_log.md`
+---
+## Expected Deliverables
+- BACI interaction estimate with 95% CI
+- Landscape fragmentation change metrics
+- Pressure map
+- Impact classification (none / minor / moderate / major / critical)
+---
+## Decision Points
+| Condition | Diagnosis | Recommended Action |
+|---|---|---|
+| PERMDISP significant beyond PERMANOVA | Difference in group dispersion, not just centroid | Report both tests; do not interpret PERMANOVA alone as evidence of composition difference |
+| BACI interaction p > 0.10 | Effect not detected at current statistical power | Calculate post-hoc power; recommend minimum additional n in report |
+| Pseudo-replication detected (sites not independent) | Type I error inflation | Use random effects (LMM/GLMM) or aggregate to independent units |
+| Pre-impact period < 2 years | Insufficient baseline for trend estimation | Flag limitation explicitly; conduct sensitivity analysis with different baseline lengths |
+| Control sites show same trend as impact sites pre-impact | BACI parallel-trend assumption violated | Use synthetic control, difference-in-differences with covariate adjustment, or interrupted time series instead |
+| Effect size very small but p < 0.05 | Statistically significant but ecologically negligible | Report effect size (Cohen's d or partial η²) alongside p; contextualise against minimum ecologically meaningful difference |
+| BACI model fails to converge | Model too complex for available data | Reduce random effects structure; use simpler fixed-effects ANOVA-style BACI if n per group is small |

package/workflows/assess-ecosystem-services/WORKFLOW.md ADDED Viewed

@@ -0,0 +1,68 @@
+# Workflow: assess-ecosystem-services
+**Purpose:** Quantify and map ecosystem services across a landscape
+**Skills:** ecological-data-foundation → geoprocessing-for-ecology → ecosystem-services-assessment → biostatistics-workbench → reproducible-ecology-pipeline
+---
+## Trigger
+Invoke when the user wants to quantify, map, or compare ecosystem services across a study area.
+**Example prompts:**
+- "Map carbon storage, water regulation, and erosion control for the Atlantic Forest"
+- "Assess trade-offs between timber production and carbon sequestration in [region]"
+- "Quantify ecosystem service co-benefits of restoring [X ha] of native vegetation"
+---
+## Steps
+### Step 1 — ecological-data-foundation
+- Ingest land cover map, biomass data, soil data, and DEM
+- Validate attribute tables and temporal alignment
+- Output: `landcover_validated.tif`, `qa_report.md`
+### Step 2 — geoprocessing-for-ecology
+- Reproject and align all input layers to common CRS and resolution
+- Clip to study area; compute watershed delineation if needed
+- Output: `inputs_aligned/`, `watershed.gpkg`
+### Step 3 — ecosystem-services-assessment
+- Select ES portfolio relevant to the study context
+- Compute biophysical indicators per service
+- Aggregate by land cover class
+- Run trade-off analysis (pairwise correlations)
+- Output: `es_indicator_maps/`, `es_summary_table.csv`, `tradeoff_matrix.csv`
+### Step 4 — biostatistics-workbench
+- Test for significant differences in ES values between land cover classes
+- Report effect sizes for pairwise comparisons
+- Output: `es_stats_results.csv`, `effect_sizes.csv`
+### Step 5 — reproducible-ecology-pipeline
+- Document ES method choices and parameter sources
+- Output: `parameter_manifest.yaml`, `es_report.md`
+---
+## Expected Deliverables
+- ES indicator maps (one per service)
+- ES summary table by land cover class
+- Trade-off matrix and visualisation
+- Statistical comparison of ES across classes
+---
+## Decision Points
+| Condition | Diagnosis | Recommended Action |
+|---|---|---|
+| Land cover classification accuracy < 85% | Propagated classification error in ES estimates | Conduct uncertainty analysis using accuracy matrix; report ES ranges, not point estimates |
+| ES trade-off correlation > 0.8 between two services | Possible confounding by same land cover class driving both | Partial out land cover effect; test whether trade-off holds within land cover classes |
+| InVEST model output contains NoData in > 30% of area | Input layer misalignment (CRS, extent, or resolution mismatch) | Recheck CRS and extent of all inputs; use `terra::compareGeom()` to verify alignment |
+| Monetary valuation requested but local market data unavailable | Benefit transfer required; high uncertainty | Apply benefit transfer with explicit unit value uncertainty (±50% range); flag limitation prominently |
+| Provisioning and regulating services conflict across scenarios | Synergy/trade-off analysis needed | Use Pareto frontier visualisation; do not rank services without considering trade-offs |
+| ES values identical across all land cover classes | Model insensitive to land cover differences | Check if land cover classes are aggregated too broadly; inspect model parameters for land-cover-specific values |
+| Water yield model shows negative values | Model misconfiguration or negative ET correction | Verify PET inputs and calibration; check if precipitation minus AET is negative in any pixel |

package/workflows/assess-landscape-connectivity/WORKFLOW.md ADDED Viewed

@@ -0,0 +1,84 @@
+# Workflow: assess-landscape-connectivity
+**Purpose:** Assess habitat connectivity for a focal species using resistance surfaces and graph-theoretic metrics
+**Skills:** ecological-data-foundation → geoprocessing-for-ecology → landscape-connectivity → model-validation-and-uncertainty → reproducible-ecology-pipeline
+---
+## Trigger
+Invoke when the user wants to evaluate landscape connectivity, identify wildlife corridors, rank habitat patches by importance, or detect connectivity pinchpoints.
+**Example prompts:**
+- "Assess connectivity for jaguars across the Mesoamerican Biological Corridor"
+- "Build a resistance surface and identify corridor pinchpoints for [species]"
+- "Rank habitat patches by connectivity importance using IIC and dPC"
+---
+## Steps
+### Step 1 — ecological-data-foundation
+- Validate habitat patch layer (polygon or raster)
+- Verify land cover classification and reclassification table
+- Check dispersal distance estimate (literature or telemetry-based)
+- Output: `patches_clean.shp`, `landcover_validated.tif`, `dispersal_params.csv`
+### Step 2 — geoprocessing-for-ecology
+- Reproject all layers to equal-area CRS
+- Clip land cover and ancillary layers (slope, roads, rivers) to study area
+- Compute patch area, centroid coordinates, and pairwise Euclidean distances
+- Output: `patches_projected.shp`, `landcover_clipped.tif`, `patch_distances.csv`
+### Step 3 — landscape-connectivity
+- Build resistance surface from land cover reclassification (+ optional slope, road proximity)
+- Compute pairwise least-cost distances between patches
+- Calculate graph-theoretic metrics: IIC, PC, dIIC, dPC, betweenness centrality
+- Rank patches by contribution to overall connectivity
+- Run Circuitscape (if available) for current flow and pinchpoint detection
+- Output: `resistance_surface.tif`, `connectivity_metrics.csv`, `patch_importance.csv`, `pinchpoint_map.tif`
+### Step 4 — model-validation-and-uncertainty
+- Sensitivity analysis: vary resistance values +/- 50% for top 3 land cover classes
+- Compare patch rankings across resistance scenarios
+- Assess sensitivity to dispersal distance threshold
+- Output: `sensitivity_report.md`, `scenario_comparison.csv`
+### Step 5 — reproducible-ecology-pipeline
+- Document resistance value sources and justification
+- Log dispersal distance estimate and source
+- Record Circuitscape parameters and software versions
+- Output: `parameter_manifest.yaml`, `decision_log.md`, `reproducibility_checklist.md`
+---
+## Expected Deliverables
+- Resistance surface map
+- Patch importance ranking (IIC, dPC, betweenness)
+- Connectivity graph visualisation
+- Pinchpoint map (current flow)
+- Sensitivity analysis across resistance scenarios
+- Reproducibility package
+---
+## Minimum Data Requirements
+- Habitat patch layer with >= 5 patches
+- Land cover raster covering study area
+- Resistance reclassification table (land cover class -> resistance value)
+- Dispersal distance estimate for focal species (meters)
+---
+## Decision Points
+| Condition | Diagnosis | Recommended Action |
+|---|---|---|
+| IIC or PC = 0 | All patches are isolated beyond dispersal threshold | Increase dispersal threshold or verify it against telemetry data; report fragmentation severity |
+| Single patch dominates dPC (> 80%) | Connectivity depends on one critical patch | Flag as conservation priority; run removal scenario to quantify impact |
+| Resistance values lack empirical basis | Expert opinion only, no telemetry validation | Report as expert-based; run sensitivity analysis across plausible ranges |
+| Circuitscape fails to converge | Grid too large or resistance contrast too high | Reduce resolution or aggregate land cover classes; check for zero-resistance barriers |
+| Patch ranking changes > 30% across scenarios | High sensitivity to resistance parameterisation | Report full scenario range; do not present single-scenario ranking as definitive |
+| Dispersal distance estimate varies > 2x in literature | Uncertain movement capacity | Run analysis at low, medium, and high estimates; report range of connectivity outcomes |

package/workflows/build-fire-risk-map/WORKFLOW.md ADDED Viewed

@@ -0,0 +1,79 @@
+# Workflow: build-fire-risk-map
+**Purpose:** Produce a spatial fire risk map integrating historical fire data, vegetation, climate, and terrain
+**Skills:** ecological-data-foundation → geoprocessing-for-ecology → environmental-time-series → predictive-modeling-best-practices → model-validation-and-uncertainty → ecological-impact-assessment
+---
+## Trigger
+Invoke when the user wants to map fire risk or fire susceptibility for a landscape.
+**Example prompts:**
+- "Build a fire risk map for the Cerrado"
+- "Map fire susceptibility integrating NDVI trends, rainfall anomalies, and land use"
+- "Predict which areas are most likely to burn in the next dry season"
+---
+## Steps
+### Step 1 — ecological-data-foundation
+- Ingest fire occurrence data (INPE BDQueimadas, MODIS burned area, VIIRS)
+- Validate dates, coordinates, and confidence levels
+- Filter to high-confidence fire pixels
+- Output: `fire_records_clean.csv`, `qa_report.md`
+### Step 2 — geoprocessing-for-ecology
+- Reproject all layers to project CRS (UTM)
+- Align terrain (DEM, slope, aspect), land cover, and climate rasters to common grid
+- Compute distance to roads, edge density, and proximity to fire ignitions
+- Output: `predictors_stack.tif`, `spatial_qa_report.md`
+### Step 3 — environmental-time-series
+- Compute NDVI trend and anomalies (vegetation dryness proxy)
+- Compute rainfall SPI (drought index)
+- Detect fire frequency per pixel from historical MODIS record
+- Output: `ndvi_trend.tif`, `spi_anomaly.tif`, `fire_frequency.tif`
+### Step 4 — predictive-modeling-best-practices
+- Assess collinearity of all predictor candidates
+- Define spatial CV strategy (buffered k-fold)
+- Tune BRT and Random Forest hyperparameters
+- Output: `cv_strategy.md`, `selected_predictors.txt`, `tuning_results.csv`
+### Step 5 — model-validation-and-uncertainty
+- Compute AUC-ROC, TSS, and Brier score on spatial CV folds
+- Calibrate predicted probabilities
+- Map ensemble uncertainty (SD across algorithms)
+- Output: `performance_metrics.csv`, `fire_risk_uncertainty.tif`, `validation_report.md`
+### Step 6 — ecological-impact-assessment
+- Classify risk zones: low / moderate / high / very high
+- Compute area and proportion in each risk class per land cover type
+- Overlay with infrastructure and protected area boundaries
+- Output: `fire_risk_map.tif`, `risk_by_landcover.csv`, `impact_synthesis.md`
+---
+## Expected Deliverables
+- Continuous fire risk probability map
+- Classified risk zone map (4 classes)
+- Model performance table
+- Risk summary by land cover class
+- Uncertainty map
+---
+## Decision Points
+| Condition | Diagnosis | Recommended Action |
+|---|---|---|
+| VIF > 10 among predictors | Severe multicollinearity | Remove the redundant layer with highest VIF; recalculate VIF after each removal |
+| Fire history data < 10 years | Insufficient temporal coverage for reliable frequency estimates | Flag in metadata; use longer MODIS record if available; weight recent years more heavily |
+| Block CV AUC < 0.65 | Spatial autocorrelation inflating naive AUC; model has limited spatial transferability | Report block CV AUC as primary; investigate whether training area is too small |
+| Risk map shows hotspots near data gaps | Edge effects or extrapolation artefacts at raster boundaries | Apply focal mean smoothing (3×3 or 5×5 window); mask no-data buffer zones |
+| Temporal mismatch between predictor layers (different years) | Predictors reflect different time periods; potential confounding | Standardise all layers to same time period; document year of each layer in metadata |
+| BRT and Random Forest disagree strongly in high-risk areas | Model uncertainty is high where it matters most | Report ensemble mean AND standard deviation; flag high-disagreement areas in maps |
+| Fire risk map shows uniform high risk across entire area | Model underfits or predictors lack spatial contrast | Increase model complexity; check if predictors have sufficient spatial variation in study area |