npm - ecological-agent-skills - Versions diffs - 3.1.0 - Mend

ecological-agent-skills 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (217) hide show

package/skills/species-distribution-modeling/scripts/prepare_future_layers.R ADDED Viewed

@@ -0,0 +1,351 @@
+# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
+# SPDX-License-Identifier: GPL-3.0-or-later
+# Usage: Rscript prepare_future_layers.R <current_stack.tif> <future_layers_dir> <study_area.shp> <output_dir> [ssp_label] [year_label]
+# ── Inline logger ─────────────────────────────────────────────────────────────
+SKILL_NAME <- "species-distribution-modeling"
+.log_ts  <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
+log_info <- function(...) message(.log_ts(), " [INFO]  ", sprintf(...))
+log_warn <- function(...) message(.log_ts(), " [WARN]  ", sprintf(...))
+log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
+log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
+log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
+dir.create("logs", recursive=TRUE, showWarnings=FALSE)
+#
+# Arguments:
+#   current_stack.tif   : Reference calibration raster stack (sets CRS, resolution, extent)
+#   future_layers_dir   : Directory containing future climate .tif files (one per variable)
+#   study_area.shp      : Shapefile / GeoPackage defining the projection area (G area)
+#   output_dir          : Directory to write the prepared future stack (created if absent)
+#   ssp_label           : Optional SSP label for output filename (default: "ssp245")
+#   year_label          : Optional time horizon label for output filename (default: "2050")
+#
+# Output:
+#   future_stack_{ssp_label}_{year_label}.tif  — prepared stack ready for model projection
+suppressPackageStartupMessages(library(terra))
+suppressPackageStartupMessages(library(sf))
+# ── 1. Parse arguments ──────────────────────────────────────────────────────
+log_step(1, "Analisar argumentos da linha de comando")
+args <- commandArgs(trailingOnly = TRUE)
+if (length(args) < 4) {
+  log_warn("Menos de 4 argumentos. Usando caminhos padrao para teste.")
+  current_tif      <- "data/predictors/env_train.tif"
+  future_dir       <- "data/chelsa_future/ssp245_2050/"
+  study_area_path  <- "data/study_area/g_area.shp"
+  output_dir       <- "output/future_layers"
+  ssp_label        <- "ssp245"
+  year_label       <- "2050"
+} else {
+  current_tif      <- args[1]
+  future_dir       <- args[2]
+  study_area_path  <- args[3]
+  output_dir       <- args[4]
+  ssp_label        <- if (length(args) >= 5) args[5] else "ssp245"
+  year_label       <- if (length(args) >= 6) args[6] else "2050"
+}
+log_info("Script: prepare_future_layers.R | Skill: %s", SKILL_NAME)
+log_info("Stack de calibracao : %s", current_tif)
+log_info("Diretorio futuro    : %s", future_dir)
+log_info("Area de estudo      : %s", study_area_path)
+log_info("Output dir          : %s", output_dir)
+log_info("SSP label           : %s", ssp_label)
+log_info("Year label          : %s", year_label)
+log_decision("ssp_label",  ssp_label,  "cenario SSP para rotular o arquivo de saida")
+log_decision("year_label", year_label, "horizonte temporal para rotular o arquivo de saida")
+# ── Input precondition checks ─────────────────────────────────────────────────
+if (!file.exists(current_tif)) {
+  log_error(
+    "Input nao encontrado: %s\nCausa provavel: arquivo nao gerado pelo passo anterior.\nVerifique a saida de: species-distribution-modeling (prepare_predictors ou similar)\nSkill anterior: species-distribution-modeling",
+    current_tif
+  )
+  stop("Calibration stack not found: ", current_tif)
+}
+if (!file.exists(study_area_path)) {
+  log_error(
+    "Input nao encontrado: %s\nCausa provavel: shapefile de area de estudo ausente.\nVerifique a saida de: ecological-data-foundation ou etapa de definicao da G area.\nSkill anterior: species-distribution-modeling",
+    study_area_path
+  )
+  stop("Study area file not found: ", study_area_path)
+}
+if (!dir.exists(future_dir)) {
+  log_error(
+    "Diretorio de camadas futuras nao encontrado: %s\nCausa provavel: camadas CHELSA/WorldClim futuras nao baixadas.\nBaixe os GeoTIFFs futuros e coloque em: %s\nSkill anterior: species-distribution-modeling",
+    future_dir, future_dir
+  )
+  stop("Future layers directory not found: ", future_dir)
+}
+# ── 2. Create output directory ───────────────────────────────────────────────
+log_step(2, "Criar diretorio de saida")
+dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
+log_info("Diretorio de saida pronto: %s", output_dir)
+# ── 3. Load reference calibration stack ─────────────────────────────────────
+log_step(3, "Carregar stack de calibracao de referencia")
+ref_stack <- tryCatch({
+  rast(current_tif)
+}, error = function(e) {
+  log_error(
+    "Falha ao carregar stack de calibracao '%s': %s\nCausa provavel: arquivo GeoTIFF corrompido ou formato nao suportado pelo terra.\nSkill anterior: species-distribution-modeling",
+    current_tif, conditionMessage(e)
+  )
+  stop(e)
+})
+log_info("CRS    : %s", crs(ref_stack, describe = TRUE)$name)
+log_info("Extent : %s", paste(round(as.vector(ext(ref_stack)), 3), collapse = ", "))
+log_info("Res    : %s", paste(res(ref_stack), collapse = " x "))
+log_info("Layers : %s", paste(names(ref_stack), collapse = ", "))
+# ── 4. Load study area (G area) ──────────────────────────────────────────────
+log_step(4, "Carregar area de estudo (G area)")
+study_area <- tryCatch({
+  vect(study_area_path)
+}, error = function(e) {
+  log_error(
+    "Falha ao carregar area de estudo '%s': %s\nCausa provavel: shapefile corrompido, projecao invalida ou formato nao suportado.\nVerifique: ogrinfo '%s'\nSkill anterior: species-distribution-modeling",
+    study_area_path, conditionMessage(e), study_area_path
+  )
+  stop(e)
+})
+# Reproject study area to match calibration CRS if needed
+if (!identical(crs(study_area), crs(ref_stack))) {
+  log_info("Reprojetando area de estudo para o CRS de calibracao...")
+  study_area <- tryCatch(
+    project(study_area, crs(ref_stack)),
+    error = function(e) {
+      log_error(
+        "Falha ao reprojetar area de estudo: %s\nCausa provavel: CRS invalido ou incompativel.\nSkill anterior: species-distribution-modeling",
+        conditionMessage(e)
+      )
+      stop(e)
+    }
+  )
+  log_info("Reprojecao concluida.")
+} else {
+  log_info("CRS da area de estudo ja coincide com o de calibracao. Sem reprojecao necessaria.")
+}
+# ── 5. Load future climate layers ────────────────────────────────────────────
+log_step(5, "Carregar camadas climaticas futuras")
+future_files <- list.files(future_dir, pattern = "\\.tif$", full.names = TRUE,
+                            recursive = FALSE)
+if (length(future_files) == 0) {
+  log_error(
+    "Nenhum arquivo .tif encontrado em: %s\nCausa provavel: camadas futuras nao baixadas ou extensao diferente de .tif.\nVerifique o conteudo do diretorio.\nSkill anterior: species-distribution-modeling",
+    future_dir
+  )
+  stop("No .tif files found in: ", future_dir)
+}
+log_info("Arquivos de camada futura encontrados: %d", length(future_files))
+# Stack all future layers
+future_raw <- tryCatch({
+  rast(future_files)
+}, error = function(e) {
+  log_error(
+    "Falha ao empilhar camadas futuras: %s\nCausa provavel: GeoTIFFs corrompidos ou com extents incompativeis.\nVerifique: gdalinfo nos arquivos em %s\nSkill anterior: species-distribution-modeling",
+    conditionMessage(e), future_dir
+  )
+  stop(e)
+})
+log_info("Nomes das camadas futuras (brutos): %s", paste(names(future_raw), collapse = ", "))
+# ── 6. Rename future layers to match calibration ─────────────────────────────
+log_step(6, "Renomear camadas futuras para coincidir com a calibracao")
+# Strategy: if layer names differ but count matches, rename by position.
+# If counts differ, attempt name matching. Fail clearly if neither works.
+ref_names    <- names(ref_stack)
+future_names <- names(future_raw)
+if (setequal(ref_names, future_names)) {
+  # Names match already — reorder to calibration order
+  future_raw <- future_raw[[ref_names]]
+  log_info("Nomes das camadas coincidentes. Reordenados conforme calibracao.")
+  log_decision("rename_strategy", "reorder", "nomes identicos, apenas reordenados")
+} else if (length(future_names) == length(ref_names) &&
+           !setequal(ref_names, future_names)) {
+  # Same count but different names — rename by position (common with CHELSA long names)
+  log_warn(
+    "Nomes das camadas diferem da calibracao. Renomeando por posicao (%d camadas).",
+    length(ref_names)
+  )
+  log_info("Nomes antigos: %s", paste(future_names, collapse = ", "))
+  log_info("Novos nomes  : %s", paste(ref_names,    collapse = ", "))
+  log_decision("rename_strategy", "by_position", "mesmo numero de camadas mas nomes diferentes (comum com CHELSA)")
+  names(future_raw) <- ref_names
+} else {
+  # Different count — try to find matching layers by partial name
+  log_warn("Numero de camadas difere. Tentando correspondencia por nome parcial...")
+  matched <- sapply(ref_names, function(rn) {
+    idx <- which(grepl(rn, future_names, fixed = TRUE))
+    if (length(idx) == 1) idx else NA_integer_
+  })
+  if (any(is.na(matched))) {
+    missing_layers <- ref_names[is.na(matched)]
+    log_error(
+      "Nao e possivel associar camadas futuras as de calibracao.\nCalibracao espera: %s\nCamadas futuras: %s\nSem correspondencia para: %s\nAcao: renomeie os .tif futuros para coincidir exatamente com os nomes de calibracao.\nSkill anterior: species-distribution-modeling",
+      paste(ref_names,    collapse = ", "),
+      paste(future_names, collapse = ", "),
+      paste(missing_layers, collapse = ", ")
+    )
+    stop(
+      "Cannot match future layers to calibration layers.\n",
+      "  Calibration expects: ", paste(ref_names, collapse = ", "), "\n",
+      "  Future layers found: ", paste(future_names, collapse = ", "), "\n",
+      "  Could not find match for: ", paste(missing_layers, collapse = ", "), "\n",
+      "  Action: rename future .tif files to match calibration layer names exactly."
+    )
+  }
+  future_raw <- future_raw[[matched]]
+  names(future_raw) <- ref_names
+  log_info("Camadas associadas por nome parcial. Reordenadas conforme calibracao.")
+  log_decision("rename_strategy", "partial_name_match", "contagem diferente; correspondencia por substring")
+}
+# ── 7. Reproject to calibration CRS ──────────────────────────────────────────
+log_step(7, "Reprojetar stack futuro para o CRS de calibracao")
+if (!identical(crs(future_raw), crs(ref_stack))) {
+  log_info("Reprojetando stack futuro para CRS de calibracao...")
+  log_decision("resample_method_reproj", "bilinear", "interpolacao bilinear para dados continuos de clima")
+  future_raw <- tryCatch(
+    project(future_raw, crs(ref_stack), method = "bilinear"),
+    error = function(e) {
+      log_error(
+        "Falha ao reprojetar stack futuro: %s\nCausa provavel: CRS invalido ou falta de memoria para o raster.\nSkill anterior: species-distribution-modeling",
+        conditionMessage(e)
+      )
+      stop(e)
+    }
+  )
+  log_info("Reprojecao concluida.")
+} else {
+  log_info("CRS ja coincide com calibracao. Sem reprojecao necessaria.")
+}
+# ── 8. Crop and mask to study area (G area) ───────────────────────────────────
+log_step(8, "Recortar e mascarar para a area de estudo")
+tryCatch({
+  future_cropped <- crop(future_raw,    study_area)
+  future_masked  <- mask(future_cropped, study_area)
+  log_info("Recorte e mascara aplicados. Celulas validas apos mascara: nao calculado (use global(future_masked, 'notNA')).")
+}, error = function(e) {
+  log_error(
+    "Falha ao recortar/mascarar o stack futuro: %s\nCausa provavel: extent da area de estudo fora do extent do raster futuro.\nVerifique a projecao e o extent dos arquivos.\nSkill anterior: species-distribution-modeling",
+    conditionMessage(e)
+  )
+  stop(e)
+})
+# ── 9. Resample to exactly match calibration grid ────────────────────────────
+log_step(9, "Reamostrar para coincidir exatamente com o grid de calibracao")
+log_decision("resample_method", "bilinear", "interpolacao bilinear para dados continuos de clima")
+future_resampled <- tryCatch(
+  resample(future_masked, ref_stack, method = "bilinear"),
+  error = function(e) {
+    log_error(
+      "Falha na reamostragem do stack futuro: %s\nCausa provavel: incompatibilidade de CRS ou extent entre stack futuro e de calibracao.\nVerifique os passos 7 e 8.\nSkill anterior: species-distribution-modeling",
+      conditionMessage(e)
+    )
+    stop(e)
+  }
+)
+# ── 10. Geometry verification ────────────────────────────────────────────────
+log_step(10, "Verificar geometria do stack futuro contra o de calibracao")
+geom_ok <- tryCatch(
+  compareGeom(ref_stack, future_resampled, stopOnError = FALSE,
+              res = TRUE, orig = TRUE, crs = TRUE),
+  error = function(e) {
+    log_warn("compareGeom retornou erro: %s. Prosseguindo com cautela.", conditionMessage(e))
+    FALSE
+  }
+)
+if (!geom_ok) {
+  log_error(
+    "Verificacao de geometria FALHOU apos reamostragem.\nCalibracao: ext=%s res=%s crs=%s\nFuturo     : ext=%s res=%s crs=%s\nVerifique incompatibilidades de extent ou datum e re-execute.\nSkill anterior: species-distribution-modeling",
+    as.character(ext(ref_stack)),
+    paste(res(ref_stack), collapse = "x"),
+    crs(ref_stack, describe = TRUE)$name,
+    as.character(ext(future_resampled)),
+    paste(res(future_resampled), collapse = "x"),
+    crs(future_resampled, describe = TRUE)$name
+  )
+  stop(
+    "Geometry verification FAILED after resampling.\n",
+    "  Calibration: ext=", as.character(ext(ref_stack)),
+    " res=", paste(res(ref_stack), collapse="x"),
+    " crs=", crs(ref_stack, describe=TRUE)$name, "\n",
+    "  Future:      ext=", as.character(ext(future_resampled)),
+    " res=", paste(res(future_resampled), collapse="x"),
+    " crs=", crs(future_resampled, describe=TRUE)$name, "\n",
+    "  Check for extent or datum mismatches and re-run."
+  )
+}
+log_info("Verificacao de geometria PASSOU.")
+# ── 11. Final layer name verification ────────────────────────────────────────
+log_step(11, "Verificar nomes finais das camadas")
+if (!identical(names(future_resampled), names(ref_stack))) {
+  name_diff <- setdiff(names(future_resampled), names(ref_stack))
+  log_error(
+    "Discrepancia de nomes de camadas no stack final.\nEsperado: %s\nObtido  : %s\nDivergentes: %s\nSkill anterior: species-distribution-modeling",
+    paste(names(ref_stack),       collapse = ", "),
+    paste(names(future_resampled), collapse = ", "),
+    paste(name_diff, collapse = ", ")
+  )
+  stop(
+    "Layer name mismatch in final stack.\n",
+    "  Expected: ", paste(names(ref_stack), collapse = ", "), "\n",
+    "  Got:      ", paste(names(future_resampled), collapse = ", "), "\n",
+    "  Differing layers: ", paste(name_diff, collapse = ", ")
+  )
+}
+log_info("Nomes das camadas verificados. Camadas: %s", paste(names(future_resampled), collapse = ", "))
+# ── 12. Save output ───────────────────────────────────────────────────────────
+log_step(12, "Gravar stack futuro preparado")
+out_filename <- paste0("future_stack_", ssp_label, "_", year_label, ".tif")
+out_path     <- file.path(output_dir, out_filename)
+tryCatch({
+  writeRaster(future_resampled, out_path, overwrite = TRUE)
+  log_info("Gravado: %s", out_path)
+}, error = function(e) {
+  log_error(
+    "Falha ao gravar raster de saida '%s': %s\nCausa provavel: sem permissao de escrita ou espaco em disco insuficiente.\nSkill anterior: species-distribution-modeling",
+    out_path, conditionMessage(e)
+  )
+  stop(e)
+})
+# ── 13. Summary ───────────────────────────────────────────────────────────────
+log_step(13, "Exibir resumo das camadas futuras preparadas")
+log_info("========== RESUMO DAS CAMADAS FUTURAS ==========")
+log_info("SSP                : %s", ssp_label)
+log_info("Horizonte temporal : %s", year_label)
+log_info("Camadas preparadas : %d", nlyr(future_resampled))
+log_info("Nomes das camadas  : %s", paste(names(future_resampled), collapse = ", "))
+log_info("CRS de saida       : %s", crs(future_resampled, describe = TRUE)$name)
+log_info("Resolucao de saida : %s unidades", paste(res(future_resampled), collapse = " x "))
+log_info("Arquivo de saida   : %s", out_path)
+log_info("=================================================")
+log_info("Pronto para: maxnet::predict(), biomod2::BIOMOD_Projection() ou sdm_pipeline.py")

package/skills/species-distribution-modeling/scripts/project_scenarios.R ADDED Viewed

@@ -0,0 +1,220 @@
+# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
+# SPDX-License-Identifier: GPL-3.0-or-later
+# Usage: Rscript project_scenarios.R <model_rds> <scenarios_dir> <output_dir> [threshold_from_csv]
+# Project a fitted SDM across multiple future climate scenario stacks.
+# scenarios_dir must contain .tif files named: <ssp>_<year>.tif (e.g. ssp245_2050.tif)
+# ── Inline logger ─────────────────────────────────────────────────────────────
+SKILL_NAME <- "species-distribution-modeling"
+.log_ts  <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
+log_info <- function(...) message(.log_ts(), " [INFO]  ", sprintf(...))
+log_warn <- function(...) message(.log_ts(), " [WARN]  ", sprintf(...))
+log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
+log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
+log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
+dir.create("logs", recursive=TRUE, showWarnings=FALSE)
+suppressPackageStartupMessages({
+  library(terra)
+})
+# ── Arguments ─────────────────────────────────────────────────────────────────
+args              <- commandArgs(trailingOnly = TRUE)
+model_rds         <- if (length(args) >= 1) args[1] else stop("model_rds required")
+scenarios_dir     <- if (length(args) >= 2) args[2] else stop("scenarios_dir required")
+output_dir        <- if (length(args) >= 3) args[3] else "outputs/projections"
+threshold_from_csv<- if (length(args) >= 4) args[4] else NULL  # path to prediction_summary.csv
+log_decision("scenarios_dir",      scenarios_dir, "Directory containing SSP scenario stacks")
+log_decision("threshold_from_csv", ifelse(is.null(threshold_from_csv), "NULL", threshold_from_csv),
+             "If provided, reuse threshold from predict_distribution.R output")
+# ── Precondition checks ───────────────────────────────────────────────────────
+if (!file.exists(model_rds)) {
+  log_error("Model RDS nao encontrado: %s\nCausa provavel: run_ensemble_sdm.R nao concluiu.\nVerifique: a saida de skills/species-distribution-modeling.\nSkill anterior: species-distribution-modeling", model_rds)
+  stop("Missing model: ", model_rds)
+}
+if (!dir.exists(scenarios_dir)) {
+  log_error("Scenarios dir nao encontrado: %s\nCausa provavel: prepare_future_layers.R nao foi executado.\nVerifique: CMIP6 stacks foram baixados e preparados.\nSkill anterior: geoprocessing-for-ecology", scenarios_dir)
+  stop("Missing scenarios dir: ", scenarios_dir)
+}
+dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
+# ── Step 1: Load model ────────────────────────────────────────────────────────
+log_step(1, "Loading model")
+model_obj <- tryCatch(readRDS(model_rds), error = function(e) {
+  log_error("Falha ao ler model RDS: %s\nCausa provavel: arquivo corrompido.\nVerifique: Rscript gerou o modelo com saveRDS().", conditionMessage(e))
+  stop(e)
+})
+is_ensemble <- is.list(model_obj) && !inherits(model_obj, "MaxEnt")
+log_info("Model class: %s | ensemble: %s", class(model_obj)[1], is_ensemble)
+# ── Step 2: Get threshold ─────────────────────────────────────────────────────
+log_step(2, "Resolving binary threshold")
+threshold_val <- NA_real_
+if (!is.null(threshold_from_csv) && file.exists(threshold_from_csv)) {
+  sum_df        <- read.csv(threshold_from_csv, stringsAsFactors = FALSE)
+  threshold_val <- sum_df$threshold_value[1]
+  log_info("Threshold loaded from CSV: %.4f", threshold_val)
+} else if (!is.null(model_obj$threshold_values)) {
+  threshold_val <- model_obj$threshold_values["MaxTSS"]
+  log_info("Threshold from model (MaxTSS): %.4f", threshold_val)
+} else {
+  log_warn("No threshold source available; binary maps will use P10 of each scenario map")
+}
+# ── Helper: predict from one stack ────────────────────────────────────────────
+predict_stack <- function(mod, preds) {
+  cls <- class(mod)[1]
+  if (cls == "maxnet") {
+    suppressPackageStartupMessages(library(maxnet))
+    pred_df <- as.data.frame(preds, na.rm = FALSE)
+    p       <- predict(mod, pred_df, type = "cloglog")
+    r       <- preds[[1]]; values(r) <- p; return(r)
+  } else if (cls %in% c("gbm", "BRT")) {
+    suppressPackageStartupMessages(library(gbm))
+    pred_df <- as.data.frame(preds, na.rm = FALSE)
+    p       <- predict.gbm(mod, pred_df, n.trees = mod$n.trees, type = "response")
+    r       <- preds[[1]]; values(r) <- p; return(r)
+  } else if (cls == "randomForest") {
+    suppressPackageStartupMessages(library(randomForest))
+    pred_df <- as.data.frame(preds, na.rm = FALSE)
+    p       <- predict(mod, pred_df, type = "prob")[, 2]
+    r       <- preds[[1]]; values(r) <- p; return(r)
+  } else {
+    return(predict(mod, preds))
+  }
+}
+# ── Step 3: Find scenario stacks ──────────────────────────────────────────────
+log_step(3, "Scanning scenario stacks")
+tif_files <- list.files(scenarios_dir, pattern = "\\.tif$", full.names = TRUE)
+if (length(tif_files) == 0) {
+  log_error("Nenhum .tif encontrado em: %s\nCausa provavel: prepare_future_layers.R nao gerou os stacks.\nVerifique: arquivos .tif existem em scenarios_dir.", scenarios_dir)
+  stop("No .tif files in scenarios_dir")
+}
+log_info("Found %d scenario stack(s)", length(tif_files))
+# Parse scenario labels from filenames (e.g. ssp245_2050.tif → ssp245_2050)
+scenario_labels <- tools::file_path_sans_ext(basename(tif_files))
+log_decision("scenario_labels", paste(scenario_labels, collapse=", "),
+             "Derived from .tif filenames in scenarios_dir")
+# ── Step 4: Project each scenario ────────────────────────────────────────────
+log_step(4, "Projecting across all scenarios")
+results <- vector("list", length(tif_files))
+current_suit <- NULL   # will be set from first file for change map
+for (i in seq_along(tif_files)) {
+  lbl  <- scenario_labels[i]
+  tif  <- tif_files[i]
+  log_info("Projecting scenario %d/%d: %s", i, length(tif_files), lbl)
+  preds <- tryCatch(rast(tif), error = function(e) {
+    log_error("Falha ao ler stack %s: %s\nCausa provavel: arquivo corrompido ou caminho errado.", lbl, conditionMessage(e))
+    return(NULL)
+  })
+  if (is.null(preds)) { results[[i]] <- NULL; next }
+  suit <- tryCatch({
+    if (is_ensemble) {
+      preds_list <- lapply(model_obj$models, predict_stack, preds = preds)
+      suit_stack <- rast(preds_list)
+      w <- if (!is.null(model_obj$auc_weights)) model_obj$auc_weights else
+           rep(1 / nlyr(suit_stack), nlyr(suit_stack))
+      app(suit_stack, function(x) weighted.mean(x, w, na.rm = TRUE))
+    } else {
+      predict_stack(model_obj, preds)
+    }
+  }, error = function(e) {
+    log_error("Falha na projecao do cenario %s: %s\nVerifique: stack tem os mesmos preditores do modelo.", lbl, conditionMessage(e))
+    NULL
+  })
+  if (is.null(suit)) { results[[i]] <- NULL; next }
+  # Save suitability raster
+  suit_file <- file.path(output_dir, paste0("suitability_", lbl, ".tif"))
+  writeRaster(suit, suit_file, overwrite = TRUE)
+  # Binary map
+  thr_use <- if (!is.na(threshold_val)) threshold_val else
+             quantile(values(suit), 0.10, na.rm = TRUE)
+  binary  <- suit >= thr_use
+  bin_file <- file.path(output_dir, paste0("binary_", lbl, ".tif"))
+  writeRaster(binary, bin_file, overwrite = TRUE, datatype = "INT1U")
+  # Area stats
+  suitable_cells <- sum(values(binary) == 1, na.rm = TRUE)
+  total_cells    <- sum(!is.na(values(suit)))
+  cell_area_km2  <- prod(res(suit)) / 1e6
+  results[[i]] <- data.frame(
+    scenario          = lbl,
+    threshold_used    = round(thr_use, 4),
+    suitable_area_km2 = round(suitable_cells * cell_area_km2, 1),
+    total_area_km2    = round(total_cells    * cell_area_km2, 1),
+    pct_suitable      = round(100 * suitable_cells / total_cells, 2),
+    mean_suitability  = round(mean(values(suit), na.rm = TRUE), 4),
+    stringsAsFactors  = FALSE
+  )
+  # Keep first scenario as "current" reference for change map
+  if (i == 1) current_suit <- suit
+  log_info("  %s: suitable = %.1f km2 (%.1f%%)",
+           lbl, results[[i]]$suitable_area_km2, results[[i]]$pct_suitable)
+}
+# ── Step 5: Scenario comparison CSV ───────────────────────────────────────────
+log_step(5, "Writing scenario comparison table")
+valid_results  <- Filter(Negate(is.null), results)
+comparison_df  <- do.call(rbind, valid_results)
+comp_file      <- file.path(output_dir, "scenario_comparison.csv")
+write.csv(comparison_df, comp_file, row.names = FALSE)
+log_info("Scenario comparison saved: %s", comp_file)
+# ── Step 6: Change map (vs. first scenario) ────────────────────────────────────
+log_step(6, "Computing change map relative to first scenario")
+if (!is.null(current_suit) && length(tif_files) > 1) {
+  tryCatch({
+    # Load and stack all suitability rasters
+    suit_files  <- file.path(output_dir, paste0("suitability_", scenario_labels, ".tif"))
+    suit_files  <- suit_files[file.exists(suit_files)]
+    if (length(suit_files) >= 2) {
+      all_suits   <- rast(suit_files)
+      mean_future <- app(all_suits[[-1]], mean, na.rm = TRUE)
+      change_map  <- (mean_future - current_suit) / (current_suit + 1e-6)
+      change_file <- file.path(output_dir, "scenario_change_map.tif")
+      writeRaster(change_map, change_file, overwrite = TRUE)
+      log_info("Change map saved (relative to first scenario): %s", change_file)
+    }
+  }, error = function(e) {
+    log_warn("Change map computation failed: %s. Skipping.", conditionMessage(e))
+  })
+}
+# ── Step 7: Summary plot ───────────────────────────────────────────────────────
+log_step(7, "Generating scenario summary plot")
+tryCatch({
+  suppressPackageStartupMessages(library(ggplot2))
+  p <- ggplot(comparison_df, aes(x = scenario, y = suitable_area_km2,
+                                  fill = pct_suitable)) +
+    geom_col() +
+    geom_text(aes(label = paste0(pct_suitable, "%")), vjust = -0.3, size = 3) +
+    scale_fill_gradient(low = "#d4e6f1", high = "#1a5276",
+                        name = "% Suitable") +
+    labs(title = "Suitable Area by Scenario",
+         x = "Scenario", y = "Suitable Area (km²)") +
+    theme_bw() +
+    theme(axis.text.x = element_text(angle = 45, hjust = 1))
+  plot_file <- file.path(output_dir, "scenario_summary_plot.png")
+  ggsave(plot_file, p, width = max(6, length(tif_files) * 1.5), height = 5, dpi = 150)
+  log_info("Summary plot saved: %s", plot_file)
+}, error = function(e) {
+  log_warn("ggplot2 summary plot failed: %s. Skipping.", conditionMessage(e))
+})
+log_step(8, "Done — all scenario projections in: %s", output_dir)

package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R ADDED Viewed

@@ -0,0 +1,99 @@
+# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
+# SPDX-License-Identifier: GPL-3.0-or-later
+# Usage: Rscript run_ensemble_sdm.R <occurrences.csv> <predictors_stack.tif> <study_area.shp> <output_dir>
+# ── Inline logger ─────────────────────────────────────────────────────────────
+SKILL_NAME <- "species-distribution-modeling"
+.log_ts  <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
+log_info <- function(...) message(.log_ts(), " [INFO]  ", sprintf(...))
+log_warn <- function(...) message(.log_ts(), " [WARN]  ", sprintf(...))
+log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
+log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
+log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
+dir.create("logs", recursive=TRUE, showWarnings=FALSE)
+# Fit MaxEnt + BRT + RF ensemble SDM
+# Usage: Rscript run_ensemble_sdm.R <params_yaml> <output_dir>
+# Requires: terra, sf, maxnet, gbm, randomForest, dismo, blockCV, yaml
+suppressPackageStartupMessages({
+  library(terra); library(sf); library(maxnet)
+  library(gbm); library(randomForest); library(yaml)
+})
+log_step(1, "Analisar argumentos e carregar parametros YAML")
+args       <- commandArgs(trailingOnly = TRUE)
+params_f   <- ifelse(length(args) >= 1, args[1], "params.yaml")
+output_dir <- ifelse(length(args) >= 2, args[2], "outputs/sdm")
+log_info("Script: run_ensemble_sdm.R | Skill: %s", SKILL_NAME)
+log_info("Params file : %s", params_f)
+log_info("Output dir  : %s", output_dir)
+# ── Input precondition check ──────────────────────────────────────────────────
+if (!file.exists(params_f)) {
+  log_error(
+    "Input nao encontrado: %s\nCausa provavel: arquivo nao gerado pelo passo anterior.\nVerifique a saida de: species-distribution-modeling (tune_maxnet ou prepare_future_layers)\nSkill anterior: species-distribution-modeling",
+    params_f
+  )
+  stop("Missing: ", params_f)
+}
+dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
+log_info("Diretorio de saida pronto: %s", output_dir)
+log_step(2, "Ler parametros do arquivo YAML")
+p <- tryCatch({
+  yaml::read_yaml(params_f)
+}, error = function(e) {
+  log_error(
+    "Falha ao ler arquivo YAML '%s': %s\nCausa provavel: YAML malformado ou encoding incorreto.\nVerifique a sintaxe do arquivo.\nSkill anterior: species-distribution-modeling",
+    params_f, conditionMessage(e)
+  )
+  stop(e)
+})
+set.seed(p$random_seeds$global)
+log_decision("random_seed", p$random_seeds$global, "semente global definida no params.yaml para reprodutibilidade")
+log_decision("algorithms",  paste(p$modeling$algorithms, collapse = ","), "algoritmos definidos em params.yaml$modeling$algorithms")
+log_decision("cv_method",   p$modeling$cv_method, "metodo de validacao cruzada definido em params.yaml")
+log_decision("cv_folds",    p$modeling$cv_folds,  "numero de folds definido em params.yaml")
+log_info("=== SDM Ensemble Pipeline ===")
+log_info("Output    : %s", output_dir)
+log_info("Algoritmos: %s", paste(p$modeling$algorithms, collapse = ", "))
+log_info("CV method : %s | Folds: %d", p$modeling$cv_method, p$modeling$cv_folds)
+# NOTE: This is a scaffold. Load your data and call your modeling functions below.
+# Example structure:
+#
+# occ    <- read.csv("data/processed/occ_thinned.csv")
+# bg     <- read.csv("data/processed/background.csv")
+# stack  <- rast("data/predictors_stack.tif")
+# predictors <- readLines("outputs/selected_predictors.txt")
+#
+# occ_env <- extract(stack[[predictors]], occ[, c("decimalLongitude","decimalLatitude")])
+# bg_env  <- extract(stack[[predictors]], bg[, c("lon","lat")])
+#
+# train_df <- rbind(
+#   cbind(pa = 1, occ_env),
+#   cbind(pa = 0, bg_env)
+# ) |> na.omit()
+#
+# # MaxEnt
+# mx <- maxnet(p = train_df$pa, data = train_df[,-1],
+#              regmult = p$hyperparameters$maxnet$regularization_multiplier[2])
+#
+# # Predict and ensemble — see biomod2 for full ensemble workflow
+log_step(3, "Scaffold carregado — preencher com carregamento de dados e chamadas de modelo")
+tryCatch({
+  log_info("Scaffold carregado. Adicione o carregamento de dados e as chamadas de modelo para seu estudo.")
+}, error = function(e) {
+  log_error(
+    "Falha no bloco principal do scaffold: %s\nCausa provavel: erro de configuracao ou dados ausentes.\nVerifique os arquivos de dados e o params.yaml.\nSkill anterior: species-distribution-modeling",
+    conditionMessage(e)
+  )
+  stop(e)
+})