ecological-agent-skills 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. package/AGENT_CONTEXT.md +191 -0
  2. package/CATALOG.md +329 -0
  3. package/LICENSE +692 -0
  4. package/README.md +347 -0
  5. package/bin/install.mjs +168 -0
  6. package/docs/comparison-with-alternatives.md +38 -0
  7. package/docs/global-examples-index.md +103 -0
  8. package/docs/repository-statistics.md +101 -0
  9. package/docs/theoretical-foundations.md +188 -0
  10. package/environment.yaml +106 -0
  11. package/examples/community/arctic_tundra_vegetation_example.md +247 -0
  12. package/examples/community/bird_landuse_example.md +63 -0
  13. package/examples/community/phytoplankton_reservoir_example.md +60 -0
  14. package/examples/community/reef_fish_indopacific_example.md +221 -0
  15. package/examples/impact/baci_road_example.md +57 -0
  16. package/examples/impact/ecosystem_services_atlantic_forest.md +83 -0
  17. package/examples/impact/forest_loss_borneo_timeseries_example.md +225 -0
  18. package/examples/occupancy/puma_camera_example.md +61 -0
  19. package/examples/occupancy/snow_leopard_himalayas_example.md +204 -0
  20. package/examples/reproducible/whittaker_biome_sdm_example.md +406 -0
  21. package/examples/sdm/anteater_cerrado_example.md +69 -0
  22. package/examples/sdm/jaguar_amazon_example.md +80 -0
  23. package/examples/sdm/koala_climate_change_example.md +170 -0
  24. package/examples/sdm/wolf_recolonization_europe_example.md +193 -0
  25. package/package.json +43 -0
  26. package/renv.lock +194 -0
  27. package/skills/SKILL_INDEX.json +1020 -0
  28. package/skills/acoustic-monitoring/SKILL.md +163 -0
  29. package/skills/acoustic-monitoring/examples/example-prompts.md +100 -0
  30. package/skills/acoustic-monitoring/examples/temperate_forest_birds_example.md +285 -0
  31. package/skills/acoustic-monitoring/resources/acoustic-indices-reference.md +93 -0
  32. package/skills/acoustic-monitoring/resources/soundscape-ecology-guide.md +90 -0
  33. package/skills/acoustic-monitoring/resources/species-id-tools-comparison.md +89 -0
  34. package/skills/acoustic-monitoring/scripts/batch_species_detection.py +360 -0
  35. package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.R +235 -0
  36. package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.py +374 -0
  37. package/skills/biostatistics-workbench/SKILL.md +140 -0
  38. package/skills/biostatistics-workbench/examples/example-prompts.md +39 -0
  39. package/skills/biostatistics-workbench/resources/effect-size-reference.md +81 -0
  40. package/skills/biostatistics-workbench/resources/glm-family-link-reference.md +47 -0
  41. package/skills/biostatistics-workbench/resources/test-selection-guide.md +93 -0
  42. package/skills/biostatistics-workbench/scripts/glm_pipeline.R +78 -0
  43. package/skills/biostatistics-workbench/scripts/glm_pipeline.py +210 -0
  44. package/skills/camera-trap-processing/SKILL.md +159 -0
  45. package/skills/camera-trap-processing/examples/example-prompts.md +103 -0
  46. package/skills/camera-trap-processing/examples/leopard_serengeti_example.md +231 -0
  47. package/skills/camera-trap-processing/resources/activity-patterns-reference.md +113 -0
  48. package/skills/camera-trap-processing/resources/camtrapR-workflow-guide.md +130 -0
  49. package/skills/camera-trap-processing/resources/detection-event-definition-guide.md +89 -0
  50. package/skills/camera-trap-processing/scripts/estimate_activity.R +169 -0
  51. package/skills/camera-trap-processing/scripts/process_camtrap_data.R +179 -0
  52. package/skills/camera-trap-processing/scripts/process_camtrap_data.py +192 -0
  53. package/skills/community-ecology-ordination/SKILL.md +133 -0
  54. package/skills/community-ecology-ordination/examples/example-prompts.md +35 -0
  55. package/skills/community-ecology-ordination/resources/dissimilarity-metric-guide.md +53 -0
  56. package/skills/community-ecology-ordination/resources/nmds-interpretation-guide.md +104 -0
  57. package/skills/community-ecology-ordination/scripts/__pycache__/community_analysis.cpython-311.pyc +0 -0
  58. package/skills/community-ecology-ordination/scripts/community_analysis.R +143 -0
  59. package/skills/community-ecology-ordination/scripts/community_analysis.py +231 -0
  60. package/skills/ecological-data-foundation/SKILL.md +129 -0
  61. package/skills/ecological-data-foundation/examples/example-prompts.md +40 -0
  62. package/skills/ecological-data-foundation/resources/coordinate-cleaning-flags.md +66 -0
  63. package/skills/ecological-data-foundation/resources/darwin-core-glossary.md +91 -0
  64. package/skills/ecological-data-foundation/resources/data-citation-guide.md +265 -0
  65. package/skills/ecological-data-foundation/resources/gbif-data-citation-guide.md +193 -0
  66. package/skills/ecological-data-foundation/resources/qa-checklist.md +83 -0
  67. package/skills/ecological-data-foundation/scripts/__pycache__/clean_occurrences.cpython-311.pyc +0 -0
  68. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_ebird.cpython-311.pyc +0 -0
  69. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_inat.cpython-311.pyc +0 -0
  70. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_iucn.cpython-311.pyc +0 -0
  71. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_obis.cpython-311.pyc +0 -0
  72. package/skills/ecological-data-foundation/scripts/clean_occurrences.R +230 -0
  73. package/skills/ecological-data-foundation/scripts/clean_occurrences.py +268 -0
  74. package/skills/ecological-data-foundation/scripts/download_from_ebird.R +251 -0
  75. package/skills/ecological-data-foundation/scripts/download_from_ebird.py +364 -0
  76. package/skills/ecological-data-foundation/scripts/download_from_gbif.R +315 -0
  77. package/skills/ecological-data-foundation/scripts/download_from_gbif.py +407 -0
  78. package/skills/ecological-data-foundation/scripts/download_from_inat.R +238 -0
  79. package/skills/ecological-data-foundation/scripts/download_from_inat.py +304 -0
  80. package/skills/ecological-data-foundation/scripts/download_from_iucn.R +273 -0
  81. package/skills/ecological-data-foundation/scripts/download_from_iucn.py +344 -0
  82. package/skills/ecological-data-foundation/scripts/download_from_obis.R +248 -0
  83. package/skills/ecological-data-foundation/scripts/download_from_obis.py +318 -0
  84. package/skills/ecological-impact-assessment/SKILL.md +123 -0
  85. package/skills/ecological-impact-assessment/examples/example-prompts.md +32 -0
  86. package/skills/ecological-impact-assessment/resources/baci-design-guide.md +55 -0
  87. package/skills/ecological-impact-assessment/resources/fragmentation-metrics-reference.md +86 -0
  88. package/skills/ecological-impact-assessment/resources/pressure-index-template.md +78 -0
  89. package/skills/ecological-impact-assessment/resources/study-design-guide.md +168 -0
  90. package/skills/ecological-impact-assessment/scripts/baci_analysis.R +161 -0
  91. package/skills/ecological-impact-assessment/scripts/fragmentation_analysis.py +141 -0
  92. package/skills/ecological-impact-assessment/scripts/power_analysis_baci.R +274 -0
  93. package/skills/ecosystem-services-assessment/SKILL.md +125 -0
  94. package/skills/ecosystem-services-assessment/examples/example-prompts.md +24 -0
  95. package/skills/ecosystem-services-assessment/resources/es-indicator-reference.md +45 -0
  96. package/skills/ecosystem-services-assessment/resources/invest-parameter-guide.md +86 -0
  97. package/skills/ecosystem-services-assessment/resources/rusle-coefficients.md +88 -0
  98. package/skills/ecosystem-services-assessment/scripts/__pycache__/compute_es.cpython-311.pyc +0 -0
  99. package/skills/ecosystem-services-assessment/scripts/compute_es.py +189 -0
  100. package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.R +161 -0
  101. package/skills/environmental-time-series/SKILL.md +125 -0
  102. package/skills/environmental-time-series/examples/example-prompts.md +33 -0
  103. package/skills/environmental-time-series/resources/anomaly-indices-reference.md +88 -0
  104. package/skills/environmental-time-series/resources/bfast-parameter-guide.md +69 -0
  105. package/skills/environmental-time-series/scripts/__pycache__/recovery_trajectory.cpython-311.pyc +0 -0
  106. package/skills/environmental-time-series/scripts/__pycache__/trend_analysis.cpython-311.pyc +0 -0
  107. package/skills/environmental-time-series/scripts/recovery_trajectory.R +305 -0
  108. package/skills/environmental-time-series/scripts/recovery_trajectory.py +178 -0
  109. package/skills/environmental-time-series/scripts/trend_analysis.R +192 -0
  110. package/skills/environmental-time-series/scripts/trend_analysis.py +184 -0
  111. package/skills/geoprocessing-for-ecology/SKILL.md +123 -0
  112. package/skills/geoprocessing-for-ecology/examples/example-prompts.md +32 -0
  113. package/skills/geoprocessing-for-ecology/resources/crs-reference.md +62 -0
  114. package/skills/geoprocessing-for-ecology/resources/global-predictor-sources.md +331 -0
  115. package/skills/geoprocessing-for-ecology/resources/resampling-methods.md +57 -0
  116. package/skills/geoprocessing-for-ecology/scripts/__pycache__/download_predictors.cpython-311.pyc +0 -0
  117. package/skills/geoprocessing-for-ecology/scripts/download_predictors.R +239 -0
  118. package/skills/geoprocessing-for-ecology/scripts/download_predictors.py +379 -0
  119. package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.R +224 -0
  120. package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.py +172 -0
  121. package/skills/landscape-connectivity/SKILL.md +170 -0
  122. package/skills/landscape-connectivity/examples/example-prompts.md +96 -0
  123. package/skills/landscape-connectivity/examples/jaguar_mesoamerica_corridor_example.md +271 -0
  124. package/skills/landscape-connectivity/resources/circuitscape-parameter-guide.md +155 -0
  125. package/skills/landscape-connectivity/resources/graph-theory-for-ecology.md +134 -0
  126. package/skills/landscape-connectivity/resources/resistance-surface-guide.md +141 -0
  127. package/skills/landscape-connectivity/scripts/connectivity_analysis.py +387 -0
  128. package/skills/landscape-connectivity/scripts/connectivity_metrics.R +274 -0
  129. package/skills/landscape-connectivity/scripts/resistance_surface.R +239 -0
  130. package/skills/model-validation-and-uncertainty/SKILL.md +131 -0
  131. package/skills/model-validation-and-uncertainty/examples/example-prompts.md +30 -0
  132. package/skills/model-validation-and-uncertainty/resources/extrapolation-risk-guide.md +236 -0
  133. package/skills/model-validation-and-uncertainty/resources/metric-selection-guide.md +52 -0
  134. package/skills/model-validation-and-uncertainty/resources/threshold-selection-guide.md +64 -0
  135. package/skills/model-validation-and-uncertainty/scripts/__pycache__/validate_model.cpython-311.pyc +0 -0
  136. package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.R +315 -0
  137. package/skills/model-validation-and-uncertainty/scripts/validate_model.py +226 -0
  138. package/skills/model-validation-and-uncertainty/scripts/validate_sdm.R +162 -0
  139. package/skills/occupancy-and-detection/SKILL.md +126 -0
  140. package/skills/occupancy-and-detection/examples/example-prompts.md +33 -0
  141. package/skills/occupancy-and-detection/resources/detection-history-format.md +100 -0
  142. package/skills/occupancy-and-detection/resources/occupancy-study-design.md +47 -0
  143. package/skills/occupancy-and-detection/scripts/__pycache__/occupancy_analysis.cpython-311.pyc +0 -0
  144. package/skills/occupancy-and-detection/scripts/occupancy_analysis.R +160 -0
  145. package/skills/occupancy-and-detection/scripts/occupancy_analysis.py +159 -0
  146. package/skills/population-viability-analysis/SKILL.md +161 -0
  147. package/skills/population-viability-analysis/examples/african_elephant_pva_example.md +266 -0
  148. package/skills/population-viability-analysis/examples/example-prompts.md +95 -0
  149. package/skills/population-viability-analysis/resources/extinction-risk-thresholds.md +128 -0
  150. package/skills/population-viability-analysis/resources/matrix-model-guide.md +139 -0
  151. package/skills/population-viability-analysis/resources/sensitivity-elasticity-reference.md +182 -0
  152. package/skills/population-viability-analysis/scripts/matrix_pva.R +258 -0
  153. package/skills/population-viability-analysis/scripts/pva_analysis.py +442 -0
  154. package/skills/population-viability-analysis/scripts/stochastic_pva.R +353 -0
  155. package/skills/predictive-modeling-best-practices/SKILL.md +136 -0
  156. package/skills/predictive-modeling-best-practices/examples/example-prompts.md +58 -0
  157. package/skills/predictive-modeling-best-practices/resources/collinearity-decision-tree.md +65 -0
  158. package/skills/predictive-modeling-best-practices/resources/sampling-bias-correction.md +267 -0
  159. package/skills/predictive-modeling-best-practices/resources/spatial-cv-guide.md +73 -0
  160. package/skills/predictive-modeling-best-practices/scripts/__pycache__/spatial_cv.cpython-311.pyc +0 -0
  161. package/skills/predictive-modeling-best-practices/scripts/collinearity_check.R +112 -0
  162. package/skills/predictive-modeling-best-practices/scripts/spatial_cv.py +182 -0
  163. package/skills/reproducible-ecology-pipeline/SKILL.md +139 -0
  164. package/skills/reproducible-ecology-pipeline/examples/example-prompts.md +35 -0
  165. package/skills/reproducible-ecology-pipeline/resources/directory-structure-template.md +94 -0
  166. package/skills/reproducible-ecology-pipeline/resources/params-yaml-template.yaml +84 -0
  167. package/skills/reproducible-ecology-pipeline/resources/reproducibility-checklist-template.md +66 -0
  168. package/skills/reproducible-ecology-pipeline/scripts/generate_file_manifest.py +110 -0
  169. package/skills/reproducible-ecology-pipeline/scripts/init_project.sh +53 -0
  170. package/skills/spatial-prioritization/SKILL.md +162 -0
  171. package/skills/spatial-prioritization/examples/biodiversity_hotspot_prioritization_example.md +289 -0
  172. package/skills/spatial-prioritization/examples/example-prompts.md +93 -0
  173. package/skills/spatial-prioritization/resources/cost-surface-reference.md +130 -0
  174. package/skills/spatial-prioritization/resources/marxan-vs-prioritizr-comparison.md +125 -0
  175. package/skills/spatial-prioritization/resources/prioritizr-formulation-guide.md +188 -0
  176. package/skills/spatial-prioritization/resources/representation-targets-guide.md +186 -0
  177. package/skills/spatial-prioritization/scripts/prioritization_sensitivity.R +320 -0
  178. package/skills/spatial-prioritization/scripts/run_prioritization.R +336 -0
  179. package/skills/species-distribution-modeling/SKILL.md +139 -0
  180. package/skills/species-distribution-modeling/examples/example-prompts.md +36 -0
  181. package/skills/species-distribution-modeling/resources/algorithm-comparison.md +25 -0
  182. package/skills/species-distribution-modeling/resources/calibration-area-guide.md +71 -0
  183. package/skills/species-distribution-modeling/resources/climate-scenario-preparation.md +170 -0
  184. package/skills/species-distribution-modeling/resources/maxent-calibration-guide.md +211 -0
  185. package/skills/species-distribution-modeling/resources/sdm-checklist.md +37 -0
  186. package/skills/species-distribution-modeling/scripts/predict_distribution.R +236 -0
  187. package/skills/species-distribution-modeling/scripts/predict_distribution.py +286 -0
  188. package/skills/species-distribution-modeling/scripts/prepare_future_layers.R +351 -0
  189. package/skills/species-distribution-modeling/scripts/project_scenarios.R +220 -0
  190. package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +99 -0
  191. package/skills/species-distribution-modeling/scripts/sdm_pipeline.py +318 -0
  192. package/skills/species-distribution-modeling/scripts/tune_maxnet.R +344 -0
  193. package/templates/SKILL_TEMPLATE.md +225 -0
  194. package/templates/checklists/data-submission-checklist.md +38 -0
  195. package/templates/checklists/post-analysis-checklist.md +55 -0
  196. package/templates/checklists/pre-analysis-checklist.md +31 -0
  197. package/templates/prompts/debug-skill.md +47 -0
  198. package/templates/prompts/invoke-skill.md +34 -0
  199. package/templates/prompts/invoke-workflow.md +45 -0
  200. package/templates/reports/technical-report-template.md +80 -0
  201. package/templates/scripts/logger_setup.R +79 -0
  202. package/templates/scripts/logger_setup.py +119 -0
  203. package/templates/scripts/params_loader.R +28 -0
  204. package/templates/scripts/params_loader.py +38 -0
  205. package/workflows/analyze-community-structure/WORKFLOW.md +72 -0
  206. package/workflows/analyze-environmental-change/WORKFLOW.md +73 -0
  207. package/workflows/assess-ecological-impact/WORKFLOW.md +75 -0
  208. package/workflows/assess-ecosystem-services/WORKFLOW.md +68 -0
  209. package/workflows/assess-landscape-connectivity/WORKFLOW.md +84 -0
  210. package/workflows/build-fire-risk-map/WORKFLOW.md +79 -0
  211. package/workflows/produce-technical-report/WORKFLOW.md +113 -0
  212. package/workflows/run-camera-trap-occupancy/WORKFLOW.md +87 -0
  213. package/workflows/run-conservation-prioritization/WORKFLOW.md +89 -0
  214. package/workflows/run-multispecies-screening/WORKFLOW.md +197 -0
  215. package/workflows/run-occupancy-analysis/WORKFLOW.md +74 -0
  216. package/workflows/run-population-viability/WORKFLOW.md +90 -0
  217. package/workflows/run-sdm-study/WORKFLOW.md +99 -0
@@ -0,0 +1,236 @@
1
+ # Extrapolation Risk Guide
2
+
3
+ Assessing environmental novelty before interpreting model projections.
4
+
5
+ ---
6
+
7
+ ## 1. Interpolation vs Extrapolation in Environmental Space
8
+
9
+ A species distribution model learns relationships between occurrence records and
10
+ environmental predictors within the **calibration area** (M area). When a model
11
+ is projected to a new region or time period, every prediction pixel falls into one
12
+ of two categories:
13
+
14
+ | Category | Definition | Risk |
15
+ |---|---|---|
16
+ | **Interpolation** | Pixel lies within the multivariate environmental range seen at calibration | Low — model is working within its learned space |
17
+ | **Extrapolation (strict)** | Pixel has at least one predictor value outside the full range observed in calibration | High — model must extrapolate; response is undefined |
18
+ | **Extrapolation (combinatorial)** | Pixel has individual values within calibration range, but their *combination* was not observed | Medium-High — subtler but still novel |
19
+
20
+ **Key principle:** All three methods below (MOP, ExDet, MESS) detect different
21
+ aspects of environmental novelty. None of them tells you *whether* the model will
22
+ extrapolate correctly — only that it *is* extrapolating.
23
+
24
+ ---
25
+
26
+ ## 2. MOP — Mobility-Oriented Parity
27
+
28
+ **Reference:** Owens et al. 2013. Constraints on interpretation of ecological niche
29
+ models by limited environmental ranges on calibration areas.
30
+ *Ecological Modelling* 263: 10–18.
31
+ DOI: [10.1016/j.ecolmodel.2013.04.011](https://doi.org/10.1016/j.ecolmodel.2013.04.011)
32
+
33
+ ### What MOP measures
34
+
35
+ MOP computes, for each projection pixel, the proportion of calibration points that
36
+ fall *closer* (in multivariate Euclidean space) than the projection pixel itself.
37
+
38
+ - **Scale:** 0 to 1
39
+ - **MOP = 1:** pixel is well within calibration environmental range
40
+ - **MOP = 0:** strict extrapolation — the pixel is more extreme than *all* calibration points in at least one predictor dimension
41
+ - **MOP = 0.1:** only 10% of calibration points are environmentally similar
42
+
43
+ ### Interpretation
44
+
45
+ | MOP value | Interpretation | Action |
46
+ |---|---|---|
47
+ | > 0.75 | Safe interpolation | Interpret predictions normally |
48
+ | 0.50 – 0.75 | Moderate novelty | Report with caution |
49
+ | 0.25 – 0.50 | High novelty | Flag in figure caption |
50
+ | < 0.25 | Very high novelty | Strong caveat; consider masking |
51
+ | = 0 | Strict extrapolation | Mask in publication figures |
52
+
53
+ ### R implementation (terra)
54
+
55
+ ```r
56
+ # MOP function using terra (no Java required)
57
+ calc_mop <- function(train_stack, proj_stack, prop = 0.1) {
58
+ suppressPackageStartupMessages(library(terra))
59
+
60
+ # Extract calibration values
61
+ cal_vals <- as.data.frame(train_stack, na.rm = TRUE)
62
+ proj_vals <- as.data.frame(proj_stack, na.rm = TRUE)
63
+ n_cal <- nrow(cal_vals)
64
+ n_vars <- ncol(cal_vals)
65
+
66
+ # For each projection pixel, compute proportion of calibration points
67
+ # that are "closer" (Euclidean distance in scaled predictor space)
68
+ cal_scaled <- scale(cal_vals)
69
+ center <- attr(cal_scaled, "scaled:center")
70
+ sdev <- attr(cal_scaled, "scaled:scale")
71
+
72
+ proj_scaled <- sweep(sweep(proj_vals, 2, center, "-"), 2, sdev, "/")
73
+
74
+ mop_vals <- apply(proj_scaled, 1, function(px) {
75
+ if (any(is.na(px))) return(NA)
76
+ d_px_to_cal <- sqrt(rowSums(sweep(cal_scaled, 2, px, "-")^2))
77
+ d_cal_centroid <- sqrt(rowSums(cal_scaled^2))
78
+ # proportion of calibration points less extreme than projection pixel
79
+ sum(d_cal_centroid < quantile(d_px_to_cal, prop)) / n_cal
80
+ })
81
+
82
+ # Place back into raster
83
+ mop_rast <- proj_stack[[1]]
84
+ values(mop_rast) <- mop_vals
85
+ names(mop_rast) <- "MOP"
86
+ return(mop_rast)
87
+ }
88
+ ```
89
+
90
+ ---
91
+
92
+ ## 3. ExDet — Extrapolation Detection
93
+
94
+ **Reference:** Mesgaran et al. 2014. Here be dragons: a tool for quantifying novelty
95
+ due to covariate range and collinearity extrapolation when predicting species
96
+ distributions. *Diversity and Distributions* 20: 1147–1159.
97
+ DOI: [10.1111/ddi.12209](https://doi.org/10.1111/ddi.12209)
98
+
99
+ ### What ExDet measures
100
+
101
+ ExDet distinguishes two types of extrapolation:
102
+
103
+ | Type | Code | Meaning |
104
+ |---|---|---|
105
+ | **NT1** (univariate) | Negative value | At least one predictor is outside its calibration min–max range |
106
+ | **NT2** (combinatorial) | 0 to 1 | All predictors in range, but combination novel; value = Mahalanobis-based dissimilarity |
107
+ | **Interpolation** | > 1 | Pixel well within calibration cloud |
108
+
109
+ **NT1 extrapolation is the most dangerous** — the model is predicting beyond any
110
+ observed value for that variable. NT2 is subtler but still represents novel
111
+ environmental combinations that the model has not seen.
112
+
113
+ ### R code (manual ExDet)
114
+
115
+ ```r
116
+ calc_exdet <- function(train_mat, proj_mat) {
117
+ # Standardize using calibration mean and sd
118
+ mu <- colMeans(train_mat, na.rm = TRUE)
119
+ sig <- apply(train_mat, 2, sd, na.rm = TRUE)
120
+ S <- cov(train_mat, use = "complete.obs")
121
+ S_inv <- solve(S)
122
+
123
+ apply(proj_mat, 1, function(px) {
124
+ if (any(is.na(px))) return(NA)
125
+ px_s <- (px - mu) / sig
126
+ tr_s <- sweep(train_mat, 2, mu, "-")
127
+ tr_s <- sweep(tr_s, 2, sig, "/")
128
+ # NT1: univariate extrapolation
129
+ below <- any(px < apply(train_mat, 2, min, na.rm = TRUE))
130
+ above <- any(px > apply(train_mat, 2, max, na.rm = TRUE))
131
+ if (below || above) {
132
+ # NT1 score: negative, proportional to extent of extrapolation
133
+ return(-1 * max(abs(px_s) - apply(abs(tr_s), 2, max)))
134
+ }
135
+ # NT2: Mahalanobis-based combinatorial novelty
136
+ mah_px <- t(px - mu) %*% S_inv %*% (px - mu)
137
+ mah_ref <- median(apply(train_mat, 1, function(r) t(r - mu) %*% S_inv %*% (r - mu)))
138
+ return(as.numeric(mah_ref / mah_px))
139
+ })
140
+ }
141
+ ```
142
+
143
+ ---
144
+
145
+ ## 4. MESS — Multivariate Environmental Similarity Surfaces
146
+
147
+ **Package:** `dismo` (R)
148
+
149
+ **Reference:** Elith et al. 2010. The art of modelling range-shifting species.
150
+ *Methods in Ecology and Evolution* 1: 330–342.
151
+ DOI: [10.1111/j.2041-210X.2010.00036.x](https://doi.org/10.1111/j.2041-210X.2010.00036.x)
152
+
153
+ ### What MESS measures
154
+
155
+ MESS computes a similarity score for each projection pixel relative to the
156
+ calibration reference set. Negative MESS values indicate novel environments.
157
+
158
+ ```r
159
+ suppressPackageStartupMessages(library(dismo))
160
+ suppressPackageStartupMessages(library(terra))
161
+
162
+ # Reference points from calibration area
163
+ ref_pts <- as.data.frame(train_stack, na.rm = TRUE)
164
+
165
+ # MESS calculation
166
+ mess_rast <- mess(proj_stack, ref_pts, full = FALSE)
167
+ ```
168
+
169
+ - **MESS > 0:** similar to calibration set
170
+ - **MESS = 0:** boundary of calibration range
171
+ - **MESS < 0:** novel environment; magnitude indicates degree of novelty
172
+
173
+ ---
174
+
175
+ ## 5. Comparative Summary
176
+
177
+ | Method | What it detects | Scale | Distinguishes NT1/NT2 | R package | Java needed |
178
+ |---|---|---|---|---|---|
179
+ | **MOP** | Overall proximity to calibration cloud | 0–1 (continuous) | No | `terra` (custom) | No |
180
+ | **ExDet** | Univariate (NT1) and combinatorial (NT2) extrapolation | Continuous (negative=NT1, 0–1=NT2) | Yes | Custom / `ntbox` | No |
181
+ | **MESS** | Multivariate similarity | Continuous (negative=novel) | No | `dismo` | No |
182
+
183
+ **Recommendation for publication:**
184
+ - **Minimum:** always compute MOP; mask MOP = 0 pixels in figures
185
+ - **Recommended:** compute MESS alongside MOP for independent confirmation
186
+ - **Full analysis:** use ExDet to distinguish NT1 from NT2 when many predictors exceed range
187
+
188
+ ---
189
+
190
+ ## 6. Practical Recommendations
191
+
192
+ 1. **Always run MOP before interpreting future projections.** Do not publish
193
+ suitability maps without showing MOP alongside them.
194
+ 2. **Mask MOP = 0 pixels in publication figures.** Use `terra::mask()` with the
195
+ MOP layer thresholded at 0.
196
+ 3. **Report the % of projection area with MOP < 0.25** in the methods section.
197
+ 4. If > 30% of the area has MOP < 0.25, add an explicit caveat in the abstract
198
+ or results section.
199
+ 5. For climate change projections, MOP to future periods tends to increase with
200
+ more extreme SSPs and longer time horizons — always report SSP-specific MOP.
201
+
202
+ ### Concern thresholds
203
+
204
+ | % area with MOP < 0.25 | Recommended action |
205
+ |---|---|
206
+ | < 10% | Note in methods, no figure modification needed |
207
+ | 10–30% | Report in results; add caption noting extrapolation zones |
208
+ | 30–50% | Mask those pixels in primary figure; show MOP map in supplement |
209
+ | > 50% | Strong caveat in abstract; consider restricting projection area |
210
+
211
+ ---
212
+
213
+ ## 7. Common Pitfalls
214
+
215
+ - **Ignoring MOP entirely:** projecting to future SSP5-8.5 without any novelty
216
+ assessment is a major reviewer concern and methodological flaw.
217
+ - **Confusing MESS negative values with unsuitable habitat:** MESS < 0 means
218
+ *novel environment*, not predicted *absence*. These are independent signals.
219
+ - **Using only one method:** MOP and MESS are complementary; using both strengthens
220
+ the analysis.
221
+ - **Not separating NT1 from NT2 when temperatures exceed calibration range:** for
222
+ climate change projections where temperature strictly exceeds historical range,
223
+ NT1 extrapolation is certain — ExDet makes this explicit.
224
+ - **Masking too aggressively:** masking all MOP < 0.5 may remove large fractions
225
+ of a species' current range. Use MOP = 0 as the primary mask.
226
+
227
+ ---
228
+
229
+ ## 8. References
230
+
231
+ | Citation | DOI |
232
+ |---|---|
233
+ | Owens et al. 2013. Ecol. Model. 263:10–18 | [10.1016/j.ecolmodel.2013.04.011](https://doi.org/10.1016/j.ecolmodel.2013.04.011) |
234
+ | Mesgaran et al. 2014. Div. Dist. 20:1147–1159 | [10.1111/ddi.12209](https://doi.org/10.1111/ddi.12209) |
235
+ | Elith et al. 2010. Meth. Ecol. Evol. 1:330–342 | [10.1111/j.2041-210X.2010.00036.x](https://doi.org/10.1111/j.2041-210X.2010.00036.x) |
236
+ | Peterson et al. 2011. Ecological Niches and Geographic Distributions. Princeton UP | ISBN 978-0691136882 |
@@ -0,0 +1,52 @@
1
+ # Model Performance Metric Selection Guide
2
+
3
+ ## Binary Classification (Presence/Absence, SDMs)
4
+
5
+ | Metric | Range | Better | Notes |
6
+ |--------|-------|--------|-------|
7
+ | AUC-ROC | 0–1 | Higher | Threshold-independent. 0.7 = acceptable, 0.8 = good, 0.9 = excellent. Inflated for large bg samples. |
8
+ | TSS (True Skill Statistic) | -1 to 1 | Higher | Threshold-dependent. TSS = Sensitivity + Specificity − 1. 0.4 = acceptable, 0.6 = good. |
9
+ | Boyce Index | -1 to 1 | Higher → 1 | Presence-only metric. Preferred over AUC for presence-background models. |
10
+ | Kappa | 0–1 | Higher | Prevalence-sensitive; avoid for imbalanced datasets. |
11
+ | Brier Score | 0–1 | Lower | Mean squared error of predicted probabilities. Good calibration metric. |
12
+ | Sensitivity (Recall) | 0–1 | Higher | True positive rate. Critical when false negatives are costly. |
13
+ | Specificity | 0–1 | Higher | True negative rate. |
14
+ | F1 Score | 0–1 | Higher | Harmonic mean of precision and recall. Good for imbalanced classes. |
15
+
16
+ **Recommendation for SDMs:** Report AUC + TSS + Boyce index. Use Boyce as primary for presence-background.
17
+
18
+ ## Regression (Abundance, Biomass, NDVI)
19
+
20
+ | Metric | Formula | Notes |
21
+ |--------|---------|-------|
22
+ | RMSE | √(mean((obs−pred)²)) | Same units as response. Lower is better. |
23
+ | MAE | mean(|obs−pred|) | Robust to outliers. Lower is better. |
24
+ | R² | 1 − SS_res/SS_tot | Proportion variance explained. Higher is better. |
25
+ | Bias | mean(pred−obs) | Systematic over/underestimation. Should be ≈ 0. |
26
+ | MAPE | mean(|obs−pred|/obs) × 100 | Percentage error. Problematic when obs ≈ 0. |
27
+
28
+ ## Count / Poisson Models
29
+
30
+ | Metric | Notes |
31
+ |--------|-------|
32
+ | Pseudo-R² (McFadden) | 1 − (logL_model / logL_null). > 0.2 = good fit. |
33
+ | Pearson dispersion | Sum(pearson²) / df. Should be ≈ 1 for well-fitted Poisson. |
34
+ | DHARMa KS test | Uniformity of randomised quantile residuals. |
35
+
36
+ ## Occupancy Models
37
+
38
+ | Metric | Notes |
39
+ |--------|-------|
40
+ | AUC (if binary) | Applied to site-level occupancy predictions |
41
+ | MacKenzie-Bailey χ² | Goodness-of-fit via parametric bootstrap |
42
+ | ĉ (c-hat) | Overdispersion factor. If > 1.5, use QAICc. |
43
+ | WAIC | For Bayesian occupancy models |
44
+
45
+ ## Reporting Template
46
+
47
+ Always report as: **metric (train / CV / test)**
48
+
49
+ Example:
50
+ > AUC = 0.91 (train) / 0.84 (spatial CV, 5-fold) / 0.82 (independent test)
51
+ > TSS = 0.78 (train) / 0.67 (CV) / 0.65 (test)
52
+ > Boyce Index = 0.93 (test)
@@ -0,0 +1,64 @@
1
+ # Threshold Selection Guide for Binary Predictions
2
+
3
+ ## Why Threshold Selection Matters
4
+
5
+ SDMs and classifiers produce continuous suitability/probability values. A threshold converts these to binary predictions (suitable/not suitable, present/absent). The choice of threshold directly affects the area predicted suitable and the balance of errors.
6
+
7
+ ## Common Methods
8
+
9
+ ### 1. Maximum TSS (Youden's J) — **Recommended general default**
10
+ - Threshold that maximises Sensitivity + Specificity − 1
11
+ - Balanced between omission and commission errors
12
+ - Not sensitive to prevalence
13
+
14
+ ```r
15
+ library(PresenceAbsence)
16
+ opt_thresh <- optimal.thresholds(
17
+ DATA = data.frame(plotID = 1:nrow(val), obs = val$observed, pred = val$predicted),
18
+ threshold = 101,
19
+ which.model = 1,
20
+ opt.methods = "MaxKappa" # or "MaxTSS"
21
+ )
22
+ ```
23
+
24
+ ### 2. Equal Sensitivity and Specificity
25
+ - Threshold where Sensitivity = Specificity
26
+ - Good when false positives and false negatives have equal cost
27
+
28
+ ### 3. Minimum Training Presence (MTP)
29
+ - Threshold below which no training presence falls (0th percentile of training scores)
30
+ - Very permissive (large suitable area); good for detecting all potential habitat
31
+ - Use when false negatives are very costly (conservation planning)
32
+
33
+ ### 4. 10th Percentile Training Presence (P10)
34
+ - Threshold below which 10% of training presences fall
35
+ - Slightly more restrictive than MTP; removes poorly-surveyed sites
36
+ - Standard in MaxEnt studies
37
+
38
+ ### 5. Fixed Prevalence Threshold
39
+ - Set threshold to match the observed prevalence in the dataset
40
+ - Appropriate when calibration data have known representative prevalence
41
+
42
+ ## Decision Guide
43
+
44
+ ```
45
+ Primary goal is conservation planning (find all habitat)?
46
+ → Use MTP or P10 (low omission error)
47
+
48
+ Primary goal is invasive species management (restrict false positives)?
49
+ → Use Maximum TSS or Equal Sensitivity/Specificity
50
+
51
+ Publishing an SDM study (general)?
52
+ → Report results at both MaxTSS and P10 thresholds
53
+
54
+ Comparing multiple species / scenarios?
55
+ → Use a consistent, a priori defined threshold for all
56
+ ```
57
+
58
+ ## Reporting Requirements
59
+
60
+ Always report:
61
+ - Threshold value used (e.g., 0.42)
62
+ - Method used to select it
63
+ - Resulting sensitivity, specificity, and TSS at that threshold
64
+ - Area predicted suitable (km²) above threshold
@@ -0,0 +1,315 @@
1
+ # ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
2
+ # SPDX-License-Identifier: GPL-3.0-or-later
3
+
4
+ # Usage: Rscript extrapolation_risk.R <training_raster_stack.tif> <projection_raster_stack.tif> <output_dir>
5
+ #
6
+ # Arguments:
7
+ # training_raster_stack.tif : Multi-band GeoTIFF used for model calibration
8
+ # projection_raster_stack.tif : Multi-band GeoTIFF for the projection area/period
9
+ # output_dir : Directory for outputs (created if absent)
10
+ #
11
+ # Outputs:
12
+ # mop_layer.tif — MOP raster (0 = strict extrapolation, 1 = fully within range)
13
+ # mess_layer.tif — MESS raster (negative = novel environment)
14
+ # extrapolation_summary.csv — Summary statistics (% area per threshold)
15
+ # extrapolation_plots.png — Side-by-side MOP and MESS maps
16
+
17
+ # ── Inline logger ─────────────────────────────────────────────────────────────
18
+ SKILL_NAME <- "model-validation-and-uncertainty"
19
+ .log_ts <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
20
+ log_info <- function(...) message(.log_ts(), " [INFO] ", sprintf(...))
21
+ log_warn <- function(...) message(.log_ts(), " [WARN] ", sprintf(...))
22
+ log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
23
+ log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
24
+ log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
25
+ dir.create("logs", recursive=TRUE, showWarnings=FALSE)
26
+
27
+ suppressPackageStartupMessages(library(terra))
28
+ suppressPackageStartupMessages(library(dismo))
29
+ suppressPackageStartupMessages(library(ggplot2))
30
+
31
+ # ── 1. Parse arguments ──────────────────────────────────────────────────────
32
+ log_step(1, "Parse arguments and validate inputs")
33
+ args <- commandArgs(trailingOnly = TRUE)
34
+
35
+ if (length(args) < 3) {
36
+ log_warn("Fewer than 3 arguments provided. Using default paths for testing.")
37
+ train_path <- "data/predictors/env_train.tif"
38
+ proj_path <- "data/predictors/env_proj.tif"
39
+ output_dir <- "output/extrapolation"
40
+ } else {
41
+ train_path <- args[1]
42
+ proj_path <- args[2]
43
+ output_dir <- args[3]
44
+ }
45
+
46
+ log_decision("train_path", train_path, "raster stack used for model calibration")
47
+ log_decision("proj_path", proj_path, "raster stack for the projection area/period")
48
+
49
+ if (!file.exists(train_path)) {
50
+ log_error(
51
+ "Falha em validate inputs: raster de treinamento nao encontrado: %s\nCausa provavel: caminho incorreto ou arquivo GeoTIFF nao gerado\nVerifique: o argumento training_raster_stack.tif e o diretorio de trabalho\nSkill anterior: species-distribution-modelling",
52
+ train_path
53
+ )
54
+ stop("Training raster not found.")
55
+ }
56
+ if (!file.exists(proj_path)) {
57
+ log_error(
58
+ "Falha em validate inputs: raster de projecao nao encontrado: %s\nCausa provavel: caminho incorreto ou arquivo GeoTIFF nao gerado\nVerifique: o argumento projection_raster_stack.tif e o diretorio de trabalho\nSkill anterior: species-distribution-modelling",
59
+ proj_path
60
+ )
61
+ stop("Projection raster not found.")
62
+ }
63
+
64
+ # ── 2. Create output directory ───────────────────────────────────────────────
65
+ dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
66
+
67
+ # ── 3. Load raster stacks ────────────────────────────────────────────────────
68
+ log_step(2, "Load raster stacks")
69
+ tryCatch({
70
+ log_info("Loading training stack: %s", train_path)
71
+ train_stack <- rast(train_path)
72
+
73
+ log_info("Loading projection stack: %s", proj_path)
74
+ proj_stack <- rast(proj_path)
75
+ }, error = function(e) {
76
+ log_error(
77
+ "Falha em load rasters: %s\nCausa provavel: arquivo GeoTIFF corrompido ou formato nao suportado\nVerifique: integridade dos arquivos TIF com gdalinfo\nSkill anterior: species-distribution-modelling",
78
+ conditionMessage(e)
79
+ )
80
+ stop(e)
81
+ })
82
+
83
+ # Validate that both stacks have the same layers
84
+ if (!setequal(names(train_stack), names(proj_stack))) {
85
+ mismatched <- setdiff(names(train_stack), names(proj_stack))
86
+ log_error(
87
+ "Falha em validate layers: nomes de camadas divergem entre stacks de treinamento e projecao.\nCamadas ausentes na projecao: %s\nCausa provavel: stacks gerados com variaveis diferentes\nVerifique: que ambos os TIFs tem as mesmas bandas nomeadas\nSkill anterior: species-distribution-modelling",
88
+ paste(mismatched, collapse = ", ")
89
+ )
90
+ stop("Layer name mismatch between training and projection stacks.\n Missing in projection: ",
91
+ paste(mismatched, collapse = ", "))
92
+ }
93
+
94
+ # Reorder projection layers to match training layer order
95
+ proj_stack <- proj_stack[[names(train_stack)]]
96
+ n_vars <- nlyr(train_stack)
97
+ log_info("Variables (%d): %s", n_vars, paste(names(train_stack), collapse = ", "))
98
+
99
+ # ── 4. Extract calibration reference values ──────────────────────────────────
100
+ log_step(3, "Extract calibration reference values")
101
+ tryCatch({
102
+ cal_vals <- as.data.frame(train_stack, na.rm = TRUE)
103
+ log_info("Calibration pixels extracted: %d", nrow(cal_vals))
104
+
105
+ if (nrow(cal_vals) < 100) {
106
+ log_warn("Only %d non-NA calibration pixels. MOP estimates may be unstable.", nrow(cal_vals))
107
+ }
108
+
109
+ # Scale calibration values for MOP distance computation
110
+ cal_center <- colMeans(cal_vals, na.rm = TRUE)
111
+ cal_sd <- apply(cal_vals, 2, sd, na.rm = TRUE)
112
+
113
+ zero_sd_vars <- names(cal_sd)[cal_sd == 0]
114
+ if (length(zero_sd_vars) > 0) {
115
+ log_warn("Variables with zero variance (will be set to sd=1): %s", paste(zero_sd_vars, collapse = ", "))
116
+ }
117
+
118
+ # Replace zero sd with 1 to avoid division by zero
119
+ cal_sd[cal_sd == 0] <- 1
120
+
121
+ cal_scaled <- scale(cal_vals, center = cal_center, scale = cal_sd)
122
+ n_cal <- nrow(cal_scaled)
123
+ log_decision("mop_scaling", "z-score using calibration mean/sd", "ensures all variables contribute equally to Euclidean distance")
124
+ }, error = function(e) {
125
+ log_error(
126
+ "Falha em extract calibration values: %s\nCausa provavel: raster de treinamento com todos os pixels NA\nVerifique: mascara e extent do raster de treinamento\nSkill anterior: species-distribution-modelling",
127
+ conditionMessage(e)
128
+ )
129
+ stop(e)
130
+ })
131
+
132
+ # ── 5. Compute MOP (Mobility-Oriented Parity) ────────────────────────────────
133
+ # Reference: Owens et al. 2013. Ecol. Model. 263:10-18.
134
+ # DOI: 10.1016/j.ecolmodel.2013.04.011
135
+ #
136
+ # For each projection pixel, MOP = proportion of calibration points that are
137
+ # "closer" (in standardised Euclidean space) than the projection pixel.
138
+ # MOP = 0 means the pixel is beyond ALL calibration points — strict extrapolation.
139
+
140
+ log_step(4, "Compute MOP layer (Owens et al. 2013)")
141
+ log_decision("mop_percentile", "10th percentile of pixel-to-calibration distances", "standard implementation following Owens et al. 2013")
142
+ tryCatch({
143
+ log_info("Computing MOP layer (this may take a few minutes)...")
144
+
145
+ # Compute the centroid distance of each calibration point
146
+ cal_centroid_dist <- sqrt(rowSums(cal_scaled^2))
147
+
148
+ # Apply MOP computation pixel by pixel using terra::app
149
+ proj_vals <- as.data.frame(proj_stack, na.rm = FALSE, xy = TRUE)
150
+ xy_cols <- c("x", "y")
151
+ env_cols <- setdiff(names(proj_vals), xy_cols)
152
+
153
+ mop_compute <- function(px_env) {
154
+ if (any(is.na(px_env))) return(NA_real_)
155
+
156
+ # Scale projection pixel using calibration parameters
157
+ px_scaled <- (as.numeric(px_env) - cal_center) / cal_sd
158
+
159
+ # Euclidean distance from this pixel to every calibration point
160
+ d_px_to_cal <- sqrt(rowSums(sweep(cal_scaled, 2, px_scaled, "-")^2))
161
+
162
+ # MOP = proportion of calibration points whose centroid distance
163
+ # is less than the 10th percentile of distances from this pixel
164
+ ref_dist <- quantile(d_px_to_cal, 0.1)
165
+ sum(cal_centroid_dist < ref_dist) / n_cal
166
+ }
167
+
168
+ mop_vals <- apply(proj_vals[, env_cols], 1, mop_compute)
169
+
170
+ # Reconstruct as SpatRaster
171
+ mop_rast <- rast(proj_stack[[1]])
172
+ values(mop_rast) <- NA
173
+ # Map back to all pixels (including those with NA that were skipped)
174
+ full_vals <- rep(NA_real_, ncell(mop_rast))
175
+ non_na_idx <- which(!is.na(values(proj_stack[[1]])))
176
+ full_vals[non_na_idx[seq_along(mop_vals)]] <- mop_vals
177
+ values(mop_rast) <- full_vals
178
+ names(mop_rast) <- "MOP"
179
+
180
+ # Save MOP raster
181
+ mop_path <- file.path(output_dir, "mop_layer.tif")
182
+ writeRaster(mop_rast, mop_path, overwrite = TRUE)
183
+ log_info("Saved: %s", mop_path)
184
+ }, error = function(e) {
185
+ log_error(
186
+ "Falha em MOP computation: %s\nCausa provavel: memoria insuficiente para rasters grandes ou valores NA inesperados\nVerifique: tamanho do raster de projecao e memoria disponivel\nSkill anterior: model-validation-and-uncertainty (calibration extraction)",
187
+ conditionMessage(e)
188
+ )
189
+ stop(e)
190
+ })
191
+
192
+ # ── 6. Compute MESS (Multivariate Environmental Similarity Surfaces) ─────────
193
+ # Reference: Elith et al. 2010. Meth. Ecol. Evol. 1:330-342.
194
+ # DOI: 10.1111/j.2041-210X.2010.00036.x
195
+ #
196
+ # MESS < 0 indicates novel environment relative to calibration reference set.
197
+
198
+ log_step(5, "Compute MESS layer (Elith et al. 2010)")
199
+ tryCatch({
200
+ log_info("Computing MESS layer...")
201
+
202
+ # dismo::mess requires a RasterStack (terra → raster conversion for compatibility)
203
+ suppressPackageStartupMessages(library(raster))
204
+ proj_raster <- raster::stack(proj_stack)
205
+ train_df <- cal_vals # reference points
206
+
207
+ mess_result <- dismo::mess(proj_raster, train_df, full = FALSE)
208
+
209
+ # Convert back to terra SpatRaster
210
+ mess_rast <- rast(mess_result)
211
+ names(mess_rast) <- "MESS"
212
+
213
+ # Save MESS raster
214
+ mess_path <- file.path(output_dir, "mess_layer.tif")
215
+ writeRaster(mess_rast, mess_path, overwrite = TRUE)
216
+ log_info("Saved: %s", mess_path)
217
+ }, error = function(e) {
218
+ log_error(
219
+ "Falha em MESS computation: %s\nCausa provavel: incompatibilidade entre pacotes terra/raster ou raster sem CRS\nVerifique: versoes de terra e dismo, e que os rasters tem CRS definido\nSkill anterior: model-validation-and-uncertainty (calibration extraction)",
220
+ conditionMessage(e)
221
+ )
222
+ stop(e)
223
+ })
224
+
225
+ # ── 7. Compute summary statistics ────────────────────────────────────────────
226
+ log_step(6, "Compute extrapolation summary statistics")
227
+ tryCatch({
228
+ mop_v <- values(mop_rast, na.rm = TRUE)
229
+ mess_v <- values(mess_rast, na.rm = TRUE)
230
+ n_proj <- length(mop_v)
231
+
232
+ pct_mop_zero <- round(100 * sum(mop_v == 0, na.rm = TRUE) / n_proj, 2)
233
+ pct_mop_025 <- round(100 * sum(mop_v < 0.25, na.rm = TRUE) / n_proj, 2)
234
+ pct_mop_050 <- round(100 * sum(mop_v < 0.50, na.rm = TRUE) / n_proj, 2)
235
+ pct_mess_neg <- round(100 * sum(mess_v < 0, na.rm = TRUE) / length(mess_v[!is.na(mess_v)]), 2)
236
+
237
+ summary_df <- data.frame(
238
+ metric = c("pct_area_MOP_zero",
239
+ "pct_area_MOP_lt_0.25",
240
+ "pct_area_MOP_lt_0.50",
241
+ "pct_area_MESS_negative"),
242
+ value = c(pct_mop_zero, pct_mop_025, pct_mop_050, pct_mess_neg),
243
+ interpretation = c(
244
+ "Strict extrapolation (MOP = 0)",
245
+ "High novelty (MOP < 0.25)",
246
+ "Moderate-high novelty (MOP < 0.50)",
247
+ "Novel environment in MESS (MESS < 0)"
248
+ )
249
+ )
250
+
251
+ csv_path <- file.path(output_dir, "extrapolation_summary.csv")
252
+ write.csv(summary_df, csv_path, row.names = FALSE)
253
+ log_info("Saved: %s", csv_path)
254
+ }, error = function(e) {
255
+ log_error(
256
+ "Falha em summary statistics: %s\nCausa provavel: rasters MOP ou MESS invalidos\nVerifique: etapas anteriores para mensagens de erro\nSkill anterior: model-validation-and-uncertainty (MOP/MESS computation)",
257
+ conditionMessage(e)
258
+ )
259
+ stop(e)
260
+ })
261
+
262
+ # ── 8. Automatic warning if extrapolation is severe ──────────────────────────
263
+ if (pct_mop_025 > 30) {
264
+ log_warn(
265
+ "EXTRAPOLATION WARNING: %.1f%% of the projection area has MOP < 0.25 (high novelty relative to calibration). Predictions in these areas should be treated with extreme caution. Recommendation: mask MOP < 0.25 pixels in publication figures and add explicit caveats in the methods section.",
266
+ pct_mop_025
267
+ )
268
+ }
269
+
270
+ if (pct_mop_zero > 10) {
271
+ log_warn(
272
+ "STRICT EXTRAPOLATION WARNING: %.1f%% of the projection area has MOP = 0 (model extrapolates beyond all calibration data). These pixels MUST be masked in publication figures.",
273
+ pct_mop_zero
274
+ )
275
+ }
276
+
277
+ # ── 9. Side-by-side diagnostic plots ─────────────────────────────────────────
278
+ log_step(7, "Generate extrapolation diagnostic plots")
279
+ tryCatch({
280
+ png(file.path(output_dir, "extrapolation_plots.png"),
281
+ width = 1600, height = 700, res = 150)
282
+ par(mfrow = c(1, 2), mar = c(4, 4, 3, 5))
283
+
284
+ # MOP map
285
+ plot(mop_rast, main = "MOP (0 = strict extrapolation)",
286
+ col = rev(terrain.colors(100)),
287
+ legend = TRUE, axes = FALSE)
288
+ mtext(paste0("MOP = 0: ", pct_mop_zero, "% | MOP < 0.25: ", pct_mop_025, "%"),
289
+ side = 1, cex = 0.8)
290
+
291
+ # MESS map (diverging palette: red = novel, blue = similar)
292
+ mess_cols <- colorRampPalette(c("red", "white", "steelblue"))(100)
293
+ plot(mess_rast, main = "MESS (negative = novel environment)",
294
+ col = mess_cols,
295
+ legend = TRUE, axes = FALSE)
296
+ mtext(paste0("MESS < 0: ", pct_mess_neg, "%"),
297
+ side = 1, cex = 0.8)
298
+
299
+ dev.off()
300
+ log_info("Saved: %s", file.path(output_dir, "extrapolation_plots.png"))
301
+ }, error = function(e) {
302
+ log_error(
303
+ "Falha em diagnostic plots: %s\nCausa provavel: dispositivo grafico nao disponivel ou rasters invalidos\nVerifique: disponibilidade de X11/display e integridade dos rasters\nSkill anterior: model-validation-and-uncertainty (MOP/MESS computation)",
304
+ conditionMessage(e)
305
+ )
306
+ stop(e)
307
+ })
308
+
309
+ # ── 10. Final summary ─────────────────────────────────────────────────────────
310
+ log_info("========== EXTRAPOLATION SUMMARY ==========")
311
+ log_info("%% area MOP = 0 (strict extrapolation): %.2f%%", pct_mop_zero)
312
+ log_info("%% area MOP < 0.25 (high novelty) : %.2f%%", pct_mop_025)
313
+ log_info("%% area MOP < 0.50 (moderate novelty) : %.2f%%", pct_mop_050)
314
+ log_info("%% area MESS < 0 (novel environment) : %.2f%%", pct_mess_neg)
315
+ log_info("===========================================")