npm - ecological-agent-skills - Versions diffs - 3.1.0 - Mend

ecological-agent-skills 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (217) hide show

package/skills/landscape-connectivity/scripts/connectivity_metrics.R ADDED Viewed

@@ -0,0 +1,274 @@
+# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
+# SPDX-License-Identifier: GPL-3.0-or-later
+# Usage: Rscript connectivity_metrics.R <patches_shp> <output_dir> [dmax_m] [area_col]
+#
+# Computes graph-based landscape connectivity metrics (IIC, PC, dIIC, dPC,
+# betweenness centrality) for a set of habitat patches.
+#
+# Arguments:
+#   patches_shp — Shapefile or GeoPackage of habitat patches (.shp or .gpkg)
+#   output_dir  — Directory for output files
+#   dmax_m      — Maximum dispersal distance in metres (default: 1000)
+#   area_col    — Column name for patch area in patches layer (default: "area_ha")
+#
+# Outputs:
+#   patch_metrics.csv        — IIC, PC, dIIC, dPC, BC per patch
+#   landscape_summary.csv    — IIC, PC, number of components, largest component
+#   connectivity_graph.png   — Network visualisation coloured by dPC
+# ── Inline logger ─────────────────────────────────────────────────────────────
+SKILL_NAME <- "landscape-connectivity"
+.log_ts  <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
+log_info <- function(...) message(.log_ts(), " [INFO]  ", sprintf(...))
+log_warn <- function(...) message(.log_ts(), " [WARN]  ", sprintf(...))
+log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
+log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
+log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
+dir.create("logs", recursive=TRUE, showWarnings=FALSE)
+suppressPackageStartupMessages(library(sf))
+suppressPackageStartupMessages(library(igraph))
+suppressPackageStartupMessages(library(dplyr))
+suppressPackageStartupMessages(library(ggplot2))
+args <- commandArgs(trailingOnly = TRUE)
+if (length(args) < 2) {
+  log_error("Argumentos insuficientes. Uso: Rscript connectivity_metrics.R <patches_shp> <output_dir> [dmax_m] [area_col]")
+  cat("Usage: Rscript connectivity_metrics.R <patches_shp> <output_dir>",
+      "[dmax_m] [area_col]\n")
+  quit(status = 1)
+}
+patches_path <- args[1]
+output_dir   <- args[2]
+dmax_m       <- if (length(args) >= 3) as.numeric(args[3]) else 1000
+area_col     <- if (length(args) >= 4) args[4] else "area_ha"
+# ── Input precondition checks ────────────────────────────────────────────────
+if (!file.exists(patches_path)) {
+  log_error("Input nao encontrado: %s\nCausa provavel: arquivo shapefile/GeoPackage nao existe ou caminho incorreto\nVerifique: se o arquivo .shp ou .gpkg existe no caminho especificado\nSkill anterior: [nenhuma — etapa inicial ou saida de processamento GIS]", patches_path)
+  stop("Missing patches file: ", patches_path)
+}
+log_decision("dmax_m", dmax_m,
+             "distancia maxima de dispersao em metros; define conectividade estrutural entre manchas")
+log_decision("area_col", area_col,
+             "coluna de area das manchas; usada para calculo de IIC e PC ponderados por area")
+dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
+log_step(1, "Carregando camada de manchas de habitat")
+# ── Load patches ────────────────────────────────────────────────────────────
+patches <- tryCatch({
+  sf::st_read(patches_path, quiet = TRUE)
+}, error = function(e) {
+  log_error("Falha ao ler shapefile de manchas: %s\nCausa provavel: arquivo corrompido ou CRS invalido\nVerifique: integridade do shapefile\nSkill anterior: [nenhuma]", conditionMessage(e))
+  stop(e)
+})
+if (!area_col %in% names(patches)) {
+  # Compute area from geometry if column missing
+  log_warn("Coluna '%s' nao encontrada; calculando area a partir da geometria", area_col)
+  patches[[area_col]] <- as.numeric(st_area(patches)) / 10000  # m² → ha
+}
+n   <- nrow(patches)
+A   <- sum(patches[[area_col]])  # total landscape area proxy (sum of patch areas)
+log_info("Manchas carregadas: %d. Area total das manchas: %.1f ha", n, A)
+if (n < 3) {
+  log_error("Minimo de 3 manchas necessario para analise de conectividade. Encontradas: %d\nCausa provavel: dado de entrada com poucas feicoes\nVerifique: arquivo de manchas e filtros de area minima\nSkill anterior: [nenhuma]", n)
+  stop("At least 3 patches required for connectivity analysis. ",
+       "Found ", n, " patches.")
+}
+log_step(2, "Calculando distancias entre centroides das manchas")
+# ── Compute pairwise distances between patch centroids ──────────────────────
+centroids <- st_centroid(patches)
+# Reproject to projected CRS if needed
+if (st_is_longlat(centroids)) {
+  log_warn("Entrada em CRS geografico. Reprojetando para UTM para calculo de distancias")
+  utm_crs <- 32700 + round((mean(st_coordinates(centroids)[, 1]) + 180) / 6) + 1
+  centroids <- st_transform(centroids, crs = utm_crs)
+  log_info("CRS UTM usado: EPSG:%d", utm_crs)
+}
+coords   <- st_coordinates(centroids)
+dist_mat <- as.matrix(dist(coords))  # metres
+log_step(3, "Construindo grafo de adjacencia binaria")
+# ── Build adjacency matrix (binary, distance < dmax) ───────────────────────
+adj_binary <- (dist_mat < dmax_m) * 1
+diag(adj_binary) <- 0
+g_binary <- graph_from_adjacency_matrix(adj_binary, mode = "undirected",
+                                         weighted = FALSE)
+sp_hops <- distances(g_binary, algorithm = "bfs")
+n_edges <- sum(adj_binary) / 2
+log_info("Grafo construido: %d nos, %d arestas (dmax = %g m)", n, n_edges, dmax_m)
+if (n_edges == 0) {
+  log_warn("Nenhuma mancha conectada dentro de dmax = %g m; aumente dmax_m ou verifique CRS", dmax_m)
+}
+log_step(4, "Calculando IIC — Integral Index of Connectivity")
+# ── IIC (Integral Index of Connectivity) ────────────────────────────────────
+areas <- patches[[area_col]]
+compute_iic_internal <- function(areas_vec, sp_hops_mat) {
+  n   <- length(areas_vec)
+  num <- 0
+  for (i in seq_len(n)) {
+    for (j in seq_len(n)) {
+      nij <- sp_hops_mat[i, j]
+      if (is.finite(nij)) {
+        num <- num + (areas_vec[i] * areas_vec[j]) / (1 + nij)
+      }
+    }
+  }
+  num / sum(areas_vec)^2
+}
+IIC_full <- compute_iic_internal(areas, sp_hops)
+log_info("IIC (paisagem completa) = %.6f", IIC_full)
+log_step(5, "Calculando dIIC por mancha (leave-one-out)")
+# ── dIIC per patch ──────────────────────────────────────────────────────────
+log_info("Calculando dIIC para cada mancha (pode levar um momento)...")
+dIIC_vec <- numeric(n)
+for (i in seq_len(n)) {
+  areas_i  <- areas[-i]
+  adj_i    <- adj_binary[-i, -i]
+  g_i      <- graph_from_adjacency_matrix(adj_i, mode = "undirected")
+  sp_i     <- distances(g_i, algorithm = "bfs")
+  IIC_i    <- compute_iic_internal(areas_i, sp_i)
+  dIIC_vec[i] <- (IIC_full - IIC_i) / IIC_full * 100
+}
+log_step(6, "Calculando PC — Probability of Connectivity")
+# ── PC (Probability of Connectivity) ────────────────────────────────────────
+# Dispersal probability: p = exp(-d / dmax)  (negative exponential kernel)
+# dmax serves as mean dispersal distance parameter
+p_mat <- exp(-dist_mat / dmax_m)
+diag(p_mat) <- 0
+log_decision("dispersal_kernel", "exponencial negativo exp(-d/dmax)",
+             "nucleo padrao para probabilidade de colonizacao; ajuste se dados de telemetria disponíveis")
+# Build weighted graph for shortest path probabilities
+p_weight <- ifelse(p_mat > 0.001, -log(p_mat), Inf)
+diag(p_weight) <- 0
+g_prob <- graph_from_adjacency_matrix(p_weight, mode = "undirected",
+                                       weighted = TRUE)
+sp_prob_dist <- distances(g_prob, algorithm = "dijkstra")
+pij_star <- exp(-sp_prob_dist)  # convert back to probability
+PC_full <- sum(outer(areas, areas) * pij_star) / sum(areas)^2
+log_info("PC (paisagem completa) = %.6f", PC_full)
+log_step(7, "Calculando dPC por mancha (leave-one-out)")
+# ── dPC per patch ────────────────────────────────────────────────────────────
+log_info("Calculando dPC para cada mancha...")
+dPC_vec <- numeric(n)
+for (i in seq_len(n)) {
+  areas_i  <- areas[-i]
+  p_mat_i  <- p_mat[-i, -i]
+  pw_i     <- ifelse(p_mat_i > 0.001, -log(p_mat_i), Inf)
+  g_i      <- graph_from_adjacency_matrix(pw_i, mode = "undirected",
+                                           weighted = TRUE)
+  sp_i     <- distances(g_i, algorithm = "dijkstra")
+  pij_i    <- exp(-sp_i)
+  PC_i     <- sum(outer(areas_i, areas_i) * pij_i) / sum(areas_i)^2
+  dPC_vec[i] <- (PC_full - PC_i) / PC_full * 100
+}
+# ── Betweenness centrality ───────────────────────────────────────────────────
+BC <- betweenness(g_binary, normalized = TRUE)
+log_step(8, "Escrevendo resumo da paisagem e metricas por mancha")
+# ── Landscape summary ────────────────────────────────────────────────────────
+components_g <- components(g_binary)
+largest_comp  <- max(components_g$csize)
+summary_df <- data.frame(
+  metric          = c("IIC", "PC", "n_patches", "n_components",
+                      "largest_component_size", "dmax_m"),
+  value           = c(IIC_full, PC_full, n, components_g$no,
+                      largest_comp, dmax_m)
+)
+sum_path <- file.path(output_dir, "landscape_summary.csv")
+write.csv(summary_df, sum_path, row.names = FALSE)
+log_info("Resumo da paisagem gravado: %s", sum_path)
+if (components_g$no > n / 2) {
+  log_warn("Paisagem altamente fragmentada: %d componentes para %d manchas; considere aumentar dmax_m",
+           components_g$no, n)
+}
+# ── Patch metrics ────────────────────────────────────────────────────────────
+patch_id <- if ("id" %in% names(patches)) patches$id else seq_len(n)
+patch_metrics <- data.frame(
+  patch_id  = patch_id,
+  area_ha   = areas,
+  dIIC_pct  = round(dIIC_vec, 4),
+  dPC_pct   = round(dPC_vec,  4),
+  BC_norm   = round(BC,       4),
+  component = components_g$membership
+)
+patch_metrics <- patch_metrics[order(-patch_metrics$dPC_pct), ]
+patch_path <- file.path(output_dir, "patch_metrics.csv")
+write.csv(patch_metrics, patch_path, row.names = FALSE)
+log_info("Metricas por mancha gravadas: %s (%d manchas)", patch_path, n)
+# Report top patches
+log_info("Top 5 manchas por dPC:")
+top5 <- head(patch_metrics[, c("patch_id", "area_ha", "dPC_pct", "BC_norm")], 5)
+for (r in seq_len(nrow(top5))) {
+  log_info("  patch_id=%s | area=%.1f ha | dPC=%.2f%% | BC=%.4f",
+           top5$patch_id[r], top5$area_ha[r], top5$dPC_pct[r], top5$BC_norm[r])
+}
+log_step(9, "Gerando visualizacao do grafo de conectividade")
+# ── Network visualisation ────────────────────────────────────────────────────
+V(g_binary)$dPC <- dPC_vec
+V(g_binary)$area <- areas
+layout_coords <- coords[, 1:2]
+patch_df <- data.frame(
+  x     = coords[, 1],
+  y     = coords[, 2],
+  dPC   = dPC_vec,
+  area  = areas / max(areas) * 3 + 0.5  # size scaled
+)
+edges_df <- as.data.frame(get.edgelist(g_binary))
+edges_df$x_from <- coords[as.integer(edges_df$V1), 1]
+edges_df$y_from <- coords[as.integer(edges_df$V1), 2]
+edges_df$x_to   <- coords[as.integer(edges_df$V2), 1]
+edges_df$y_to   <- coords[as.integer(edges_df$V2), 2]
+p <- ggplot() +
+  geom_segment(data = edges_df,
+               aes(x = x_from, y = y_from, xend = x_to, yend = y_to),
+               colour = "grey60", linewidth = 0.5, alpha = 0.7) +
+  geom_point(data = patch_df,
+             aes(x = x, y = y, size = area, fill = dPC),
+             shape = 21, colour = "grey30") +
+  scale_fill_viridis_c(option = "plasma", name = "dPC (%)") +
+  scale_size_continuous(range = c(2, 8), guide = "none") +
+  labs(x = "Easting", y = "Northing",
+       title = sprintf("Connectivity graph (dmax = %g m, IIC = %.4f)",
+                       dmax_m, IIC_full)) +
+  theme_minimal(base_size = 10) +
+  coord_equal()
+plot_path <- file.path(output_dir, "connectivity_graph.png")
+tryCatch({
+  ggsave(plot_path, p, width = 8, height = 7, dpi = 150)
+  log_info("Visualizacao do grafo salva: %s", plot_path)
+}, error = function(e) {
+  log_warn("Nao foi possivel salvar visualizacao do grafo: %s", conditionMessage(e))
+})
+log_info("Analise de conectividade concluida")

package/skills/landscape-connectivity/scripts/resistance_surface.R ADDED Viewed

@@ -0,0 +1,239 @@
+# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
+# SPDX-License-Identifier: GPL-3.0-or-later
+# Usage: Rscript resistance_surface.R <landcover_tif> <resistance_csv> <output_dir>
+#        [dem_tif] [road_shp]
+#
+# Constructs a resistance surface from a land cover raster and resistance
+# lookup table. Optionally adds slope-based and road-proximity penalties.
+#
+# Arguments:
+#   landcover_tif   — Land cover raster (.tif) with integer class codes
+#   resistance_csv  — CSV with columns: lc_code (int), resistance (num), description (char)
+#   output_dir      — Directory for output files
+#   dem_tif         — (optional) DEM raster for slope penalty
+#   road_shp        — (optional) Road vector layer for proximity penalty
+#
+# Outputs:
+#   resistance_lc.tif       — Land cover resistance only
+#   resistance_combined.tif — Combined resistance (lc + optional slope + road)
+#   resistance_stats.csv    — Summary statistics of resistance surface
+#   resistance_map.png      — Visualisation of combined resistance
+# ── Inline logger ─────────────────────────────────────────────────────────────
+SKILL_NAME <- "landscape-connectivity"
+.log_ts  <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
+log_info <- function(...) message(.log_ts(), " [INFO]  ", sprintf(...))
+log_warn <- function(...) message(.log_ts(), " [WARN]  ", sprintf(...))
+log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
+log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
+log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
+dir.create("logs", recursive=TRUE, showWarnings=FALSE)
+suppressPackageStartupMessages(library(terra))
+suppressPackageStartupMessages(library(sf))
+suppressPackageStartupMessages(library(dplyr))
+suppressPackageStartupMessages(library(ggplot2))
+args <- commandArgs(trailingOnly = TRUE)
+if (length(args) < 3) {
+  log_error("Argumentos insuficientes. Uso: Rscript resistance_surface.R <landcover_tif> <resistance_csv> <output_dir> [dem_tif] [road_shp]")
+  cat("Usage: Rscript resistance_surface.R <landcover_tif> <resistance_csv>",
+      "<output_dir> [dem_tif] [road_shp]\n")
+  quit(status = 1)
+}
+lc_path     <- args[1]
+res_csv     <- args[2]
+output_dir  <- args[3]
+dem_path    <- if (length(args) >= 4 && args[4] != "NA") args[4] else NULL
+road_path   <- if (length(args) >= 5 && args[5] != "NA") args[5] else NULL
+# ── Input precondition checks ────────────────────────────────────────────────
+if (!file.exists(lc_path)) {
+  log_error("Input nao encontrado: %s\nCausa provavel: raster de cobertura de terra nao existe\nVerifique: caminho e existencia do arquivo .tif\nSkill anterior: [nenhuma — entrada do usuario]", lc_path)
+  stop("Missing landcover_tif: ", lc_path)
+}
+if (!file.exists(res_csv)) {
+  log_error("Input nao encontrado: %s\nCausa provavel: CSV de resistencia nao existe\nVerifique: se o arquivo contem colunas lc_code e resistance\nSkill anterior: [nenhuma — entrada do usuario]", res_csv)
+  stop("Missing resistance_csv: ", res_csv)
+}
+if (!is.null(dem_path) && !file.exists(dem_path)) {
+  log_error("DEM opcional nao encontrado: %s\nCausa provavel: caminho incorreto para DEM\nVerifique: existencia do arquivo .tif do DEM\nSkill anterior: [nenhuma]", dem_path)
+  stop("Missing dem_tif: ", dem_path)
+}
+if (!is.null(road_path) && !file.exists(road_path)) {
+  log_error("Shapefile de estradas opcional nao encontrado: %s\nCausa provavel: caminho incorreto\nVerifique: existencia do shapefile de estradas\nSkill anterior: [nenhuma]", road_path)
+  stop("Missing road_shp: ", road_path)
+}
+log_decision("dem_path", ifelse(is.null(dem_path), "NULL", dem_path),
+             "DEM incluido na penalidade de declividade; NULL = sem penalidade de relevo")
+log_decision("road_path", ifelse(is.null(road_path), "NULL", road_path),
+             "shapefile de estradas para penalidade de proximidade; NULL = sem penalidade viaria")
+dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
+log_step(1, "Carregando raster de cobertura de terra e tabela de resistencia")
+# ── Load inputs ──────────────────────────────────────────────────────────────
+lc <- tryCatch({
+  rast(lc_path)
+}, error = function(e) {
+  log_error("Falha ao carregar raster de cobertura de terra: %s\nCausa provavel: arquivo .tif corrompido ou formato invalido\nVerifique: integridade do raster\nSkill anterior: [nenhuma]", conditionMessage(e))
+  stop(e)
+})
+rt <- tryCatch({
+  read.csv(res_csv)
+}, error = function(e) {
+  log_error("Falha ao ler CSV de resistencia: %s\nCausa provavel: formato incorreto\nVerifique: colunas lc_code e resistance\nSkill anterior: [nenhuma]", conditionMessage(e))
+  stop(e)
+})
+required_cols <- c("lc_code", "resistance")
+if (!all(required_cols %in% names(rt))) {
+  log_error("CSV de resistencia deve conter colunas: lc_code, resistance. Encontradas: %s\nCausa provavel: cabecalho do CSV incorreto\nVerifique: estrutura do arquivo resistance_csv\nSkill anterior: [nenhuma]",
+            paste(names(rt), collapse = ", "))
+  stop("resistance_csv must contain columns: lc_code, resistance. Found: ",
+       paste(names(rt), collapse = ", "))
+}
+log_info("Raster de cobertura: %d linhas x %d colunas, CRS: %s",
+         nrow(lc), ncol(lc), crs(lc, describe = TRUE)$name)
+log_info("Tabela de resistencia: %d classes", nrow(rt))
+log_step(2, "Reclassificando cobertura de terra para resistencia")
+# ── Reclassify land cover to resistance ──────────────────────────────────────
+rcl_mat <- as.matrix(rt %>%
+  arrange(lc_code) %>%
+  mutate(from = lc_code - 0.5, to = lc_code + 0.5) %>%
+  select(from, to, resistance))
+res_lc <- tryCatch({
+  classify(lc, rcl_mat, include.lowest = TRUE, right = FALSE)
+}, error = function(e) {
+  log_error("Falha em reclassificacao: %s\nCausa provavel: codigos de classe no raster fora do intervalo da tabela\nVerifique: consistencia entre lc_code e valores do raster\nSkill anterior: [nenhuma]", conditionMessage(e))
+  stop(e)
+})
+names(res_lc) <- "resistance"
+# Check for unmatched codes
+lc_vals    <- unique(values(lc, na.rm = TRUE))
+unmatched  <- setdiff(lc_vals, rt$lc_code)
+if (length(unmatched) > 0) {
+  log_warn("%d codigos de cobertura de terra sem atribuicao de resistencia: %s",
+           length(unmatched), paste(unmatched, collapse = ", "))
+}
+lc_path_out <- file.path(output_dir, "resistance_lc.tif")
+writeRaster(res_lc, lc_path_out, overwrite = TRUE)
+log_info("Resistencia LC gravada: %s", lc_path_out)
+log_step(3, "Adicionando penalidades opcionais (declividade, estradas)")
+# ── Optional: slope penalty ──────────────────────────────────────────────────
+if (!is.null(dem_path)) {
+  log_info("Adicionando resistencia baseada em declividade...")
+  tryCatch({
+    dem       <- rast(dem_path)
+    dem_proj  <- project(dem, lc, method = "bilinear")
+    slope_deg <- terrain(dem_proj, v = "slope", unit = "degrees")
+    # Exponential cost: doubles every ~10° of slope
+    slope_res  <- exp(slope_deg / 15)
+    slope_res  <- slope_res / global(slope_res, "min", na.rm = TRUE)[[1]]
+    names(slope_res) <- "slope_resistance"
+    writeRaster(slope_res, file.path(output_dir, "slope_resistance.tif"),
+                overwrite = TRUE)
+    log_info("Resistencia de declividade gravada")
+    log_decision("slope_decay_param", 15,
+                 "parametro de decaimento exponencial; resistencia dobra a cada 15 graus de declividade")
+  }, error = function(e) {
+    log_error("Falha ao processar DEM para declividade: %s\nCausa provavel: DEM com CRS incompativel ou valores invalidos\nVerifique: CRS e extensao do DEM\nSkill anterior: [nenhuma]", conditionMessage(e))
+    stop(e)
+  })
+} else {
+  slope_res <- NULL
+}
+# ── Optional: road proximity penalty ─────────────────────────────────────────
+if (!is.null(road_path)) {
+  log_info("Adicionando resistencia de proximidade a estradas...")
+  tryCatch({
+    roads    <- st_read(road_path, quiet = TRUE)
+    roads_v  <- vect(roads)
+    # Compute distance to nearest road
+    road_dist <- distance(res_lc, roads_v)
+    road_dist <- project(road_dist, lc, method = "bilinear")
+    # Resistance peaks at road (dist=0) and decays with distance
+    # max_penalty = 10× at road edge, decays to 1 at 500 m
+    road_res <- pmax(1, 10 * exp(-road_dist / 200))
+    names(road_res) <- "road_resistance"
+    writeRaster(road_res, file.path(output_dir, "road_resistance.tif"),
+                overwrite = TRUE)
+    log_info("Resistencia de estradas gravada")
+    log_decision("road_max_penalty", 10,
+                 "penalidade maxima na beira da estrada (10x); decai a 1 a 500 m")
+  }, error = function(e) {
+    log_error("Falha ao processar shapefile de estradas: %s\nCausa provavel: shapefile invalido ou CRS incompativel\nVerifique: integridade do shapefile de estradas\nSkill anterior: [nenhuma]", conditionMessage(e))
+    stop(e)
+  })
+} else {
+  road_res <- NULL
+}
+log_step(4, "Combinando camadas de resistencia e normalizando")
+# ── Combine resistance layers ─────────────────────────────────────────────────
+combined <- res_lc
+if (!is.null(slope_res)) combined <- combined * slope_res
+if (!is.null(road_res))  combined <- combined * road_res
+# Cap at maximum to avoid instability
+max_cap <- 1000
+combined[combined > max_cap] <- max_cap
+log_decision("max_cap_resistance", max_cap,
+             "resistencia maxima para evitar instabilidade numerica nos algoritmos de menor custo")
+# Rescale so minimum = 1
+min_val  <- global(combined, "min", na.rm = TRUE)[[1]]
+combined <- combined / min_val
+combined_path <- file.path(output_dir, "resistance_combined.tif")
+writeRaster(combined, combined_path, overwrite = TRUE)
+log_info("Resistencia combinada gravada: %s", combined_path)
+log_step(5, "Calculando estatisticas da superficie de resistencia")
+# ── Statistics ────────────────────────────────────────────────────────────────
+vals <- values(combined, na.rm = TRUE)
+stats_df <- data.frame(
+  statistic = c("min", "q25", "median", "mean", "q75", "q95", "max"),
+  value     = round(quantile(vals, c(0, 0.25, 0.5, NA, 0.75, 0.95, 1),
+                              na.rm = TRUE), 3)
+)
+stats_df$value[4] <- round(mean(vals, na.rm = TRUE), 3)
+stats_path <- file.path(output_dir, "resistance_stats.csv")
+write.csv(stats_df, stats_path, row.names = FALSE)
+log_info("Estatisticas da superficie de resistencia:")
+for (r in seq_len(nrow(stats_df))) {
+  log_info("  %s = %.3f", stats_df$statistic[r], stats_df$value[r])
+}
+log_step(6, "Gerando visualizacao do mapa de resistencia")
+# ── Visualisation ─────────────────────────────────────────────────────────────
+tryCatch({
+  plot_r   <- aggregate(combined, fact = max(1, floor(nrow(combined) / 500)))
+  plot_df  <- as.data.frame(plot_r, xy = TRUE)
+  names(plot_df)[3] <- "resistance"
+  p <- ggplot(plot_df, aes(x = x, y = y, fill = log1p(resistance))) +
+    geom_raster() +
+    scale_fill_viridis_c(option = "magma", name = "log(resistance + 1)") +
+    coord_equal() +
+    labs(x = "Easting", y = "Northing",
+         title = "Combined resistance surface") +
+    theme_minimal(base_size = 10)
+  map_path <- file.path(output_dir, "resistance_map.png")
+  ggsave(map_path, p, width = 8, height = 7, dpi = 150)
+  log_info("Mapa de resistencia salvo: %s", map_path)
+}, error = function(e) {
+  log_warn("Nao foi possivel gerar mapa de resistencia: %s", conditionMessage(e))
+})
+log_info("Construcao da superficie de resistencia concluida")

package/skills/model-validation-and-uncertainty/SKILL.md ADDED Viewed

@@ -0,0 +1,131 @@
+---
+name: model-validation-and-uncertainty
+description: "Validates predictive models and quantifies uncertainty including AUC/TSS metrics, calibration, extrapolation risk (MOP/MESS/ExDet), and ensemble uncertainty maps. Use this skill when the user needs model performance evaluation, ROC curves, cross-validation results, calibration curves, overfitting diagnostics, prediction intervals, bootstrap uncertainty, sensitivity/specificity assessment, or extrapolation risk analysis."
+skill_version: 1.0.0
+---
+# Skill: model-validation-and-uncertainty
+**Domain:** Metrics · Calibration · Sensitivity · External validation · Uncertainty
+**Phase:** 2 — Modeling
+**Used by:** run-sdm-study, assess-ecological-impact, analyze-community-structure, build-fire-risk-map, run-occupancy-analysis
+---
+## Purpose
+Guides the agent through rigorous evaluation of any fitted model: computing performance metrics on held-out data, assessing calibration, running sensitivity analyses, performing external validation, and quantifying and visualising prediction uncertainty.
+---
+## When to Invoke
+- After any model is fitted, before results are reported
+- When the user asks about model performance, reliability, or uncertainty
+- When preparing results for publication or decision-making
+---
+## Inputs
+| Input | Format | Required |
+|-------|--------|----------|
+| Fitted model object | RData, pkl, ONNX | Yes |
+| Validation dataset (independent) | CSV | Yes |
+| Training/CV predictions | CSV | Yes |
+| Prediction surface/map | GeoTIFF | Conditional |
+---
+## Outputs
+| Output | Description |
+|--------|-------------|
+| `performance_metrics.csv` | Full metric table (train, CV, test) |
+| `calibration_plot.png` | Observed vs predicted calibration curve |
+| `roc_curve.png` | ROC curve with AUC (for classifiers) |
+| `sensitivity_report.md` | Effect of parameter/predictor perturbation |
+| `uncertainty_map.tif` | Spatial uncertainty (SD across ensemble or bootstrap) |
+| `validation_report.md` | Comprehensive validation narrative |
+---
+## Steps
+### 1. Select Appropriate Metrics
+| Task | Primary metrics | Secondary |
+|------|----------------|-----------|
+| Binary classification (SDM) | AUC-ROC, TSS, Boyce index | Sensitivity, specificity |
+| Regression (abundance, biomass) | RMSE, MAE, R² | Bias, MAPE |
+| Occupancy | AUC, WAIC, posterior predictive checks | |
+| Community ordination | Stress (NMDS), R² (RDA/CCA) | Procrustes |
+| Count / Poisson | RMSE, Pseudo-R², dispersion | |
+### 2. Compute Metrics on Train, CV, and Test Sets
+- Report all three to diagnose overfitting (train >> CV/test gap)
+- Report metric mean ± SD across CV folds
+### 3. Assess Calibration
+- For classifiers: plot mean predicted probability vs. observed occurrence rate across bins
+- For regression: plot predicted vs. observed scatter
+- Compute calibration slope and intercept; values near 1 and 0 are ideal
+### 4. Threshold Selection (for binary predictions)
+- Maximise TSS (Youden's J)
+- Maximise Sensitivity + Specificity
+- Fixed prevalence-based threshold
+- Document chosen threshold and rationale
+### 5. Variable Importance and Response Curves
+- Compute permutation importance for each predictor
+- Plot partial dependence / marginal effect curves for top predictors
+- Flag predictors with response curves showing ecologically implausible patterns
+### 6. Sensitivity Analysis
+- Perturb each hyperparameter by ±10%; measure effect on primary metric
+- Remove each predictor in turn; measure effect (jackknife importance)
+- Assess sensitivity to background/pseudo-absence sampling strategy (for SDMs)
+### 7. External Validation
+- Apply model to an independent dataset (different time period, different region)
+- Report metric degradation; quantify transferability
+- Flag models with poor transferability before applying to novel conditions
+### 8. Uncertainty Quantification
+- Ensemble SD: standard deviation across ensemble members or bootstrap replicates
+- Bayesian credible intervals: for Bayesian models, report posterior predictive intervals
+- Map uncertainty spatially where relevant
+- Report regions of high uncertainty explicitly
+---
+## Key Decisions to Document
+- Primary and secondary metrics chosen and rationale
+- Threshold selection method
+- External validation dataset description
+- Uncertainty quantification method
+---
+## Tools and Libraries
+**R:** `ROCR`, `PresenceAbsence`, `dismo`, `gbm`, `boot`, `brms`
+**Python:** `sklearn.metrics`, `scikit-learn`, `shap`, `pysdm`
+---
+## Resources
+- `resources/metric-selection-guide.md` — which metrics for which task
+- `resources/threshold-selection-guide.md` — threshold methods compared
+- `examples/` — calibration plot and uncertainty map examples
+---
+## Notes
+- Never report only training performance
+- Boyce index is preferred over AUC for presence-only SDMs
+- For small samples, report bootstrapped CIs around all metrics

package/skills/model-validation-and-uncertainty/examples/example-prompts.md ADDED Viewed

@@ -0,0 +1,30 @@
+# Example Invocation Prompts — model-validation-and-uncertainty
+## Full Validation Suite
+```
+Load skill: model-validation-and-uncertainty
+Task: Validate the SDM ensemble for Chrysocyon brachyurus.
+Files:
+  - models/ensemble_predictions.csv (columns: site_id, observed, predicted_prob)
+  - models/cv_fold_predictions.csv  (columns: fold, observed, predicted_prob)
+  - outputs/suitability_ensemble.tif
+  - outputs/suitability_sd.tif (uncertainty)
+Run:
+1. AUC-ROC and TSS on CV folds and independent test set.
+2. Boyce index on test set.
+3. Calibration plot (10 bins).
+4. Threshold selection: MaxTSS and P10.
+5. Report: performance_metrics.csv, calibration_plot.png, validation_report.md
+```
+## Calibration Check Only
+```
+Load skill: model-validation-and-uncertainty
+Task: Calibration assessment only.
+Predictions: outputs/glm_predictions.csv (obs: 0/1, pred: probability 0-1).
+Generate calibration plot with 10 bins. Report calibration slope and intercept.
+A well-calibrated model should have slope ≈ 1 and intercept ≈ 0.
+```