ecological-agent-skills 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT_CONTEXT.md +191 -0
- package/CATALOG.md +329 -0
- package/LICENSE +692 -0
- package/README.md +347 -0
- package/bin/install.mjs +168 -0
- package/docs/comparison-with-alternatives.md +38 -0
- package/docs/global-examples-index.md +103 -0
- package/docs/repository-statistics.md +101 -0
- package/docs/theoretical-foundations.md +188 -0
- package/environment.yaml +106 -0
- package/examples/community/arctic_tundra_vegetation_example.md +247 -0
- package/examples/community/bird_landuse_example.md +63 -0
- package/examples/community/phytoplankton_reservoir_example.md +60 -0
- package/examples/community/reef_fish_indopacific_example.md +221 -0
- package/examples/impact/baci_road_example.md +57 -0
- package/examples/impact/ecosystem_services_atlantic_forest.md +83 -0
- package/examples/impact/forest_loss_borneo_timeseries_example.md +225 -0
- package/examples/occupancy/puma_camera_example.md +61 -0
- package/examples/occupancy/snow_leopard_himalayas_example.md +204 -0
- package/examples/reproducible/whittaker_biome_sdm_example.md +406 -0
- package/examples/sdm/anteater_cerrado_example.md +69 -0
- package/examples/sdm/jaguar_amazon_example.md +80 -0
- package/examples/sdm/koala_climate_change_example.md +170 -0
- package/examples/sdm/wolf_recolonization_europe_example.md +193 -0
- package/package.json +43 -0
- package/renv.lock +194 -0
- package/skills/SKILL_INDEX.json +1020 -0
- package/skills/acoustic-monitoring/SKILL.md +163 -0
- package/skills/acoustic-monitoring/examples/example-prompts.md +100 -0
- package/skills/acoustic-monitoring/examples/temperate_forest_birds_example.md +285 -0
- package/skills/acoustic-monitoring/resources/acoustic-indices-reference.md +93 -0
- package/skills/acoustic-monitoring/resources/soundscape-ecology-guide.md +90 -0
- package/skills/acoustic-monitoring/resources/species-id-tools-comparison.md +89 -0
- package/skills/acoustic-monitoring/scripts/batch_species_detection.py +360 -0
- package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.R +235 -0
- package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.py +374 -0
- package/skills/biostatistics-workbench/SKILL.md +140 -0
- package/skills/biostatistics-workbench/examples/example-prompts.md +39 -0
- package/skills/biostatistics-workbench/resources/effect-size-reference.md +81 -0
- package/skills/biostatistics-workbench/resources/glm-family-link-reference.md +47 -0
- package/skills/biostatistics-workbench/resources/test-selection-guide.md +93 -0
- package/skills/biostatistics-workbench/scripts/glm_pipeline.R +78 -0
- package/skills/biostatistics-workbench/scripts/glm_pipeline.py +210 -0
- package/skills/camera-trap-processing/SKILL.md +159 -0
- package/skills/camera-trap-processing/examples/example-prompts.md +103 -0
- package/skills/camera-trap-processing/examples/leopard_serengeti_example.md +231 -0
- package/skills/camera-trap-processing/resources/activity-patterns-reference.md +113 -0
- package/skills/camera-trap-processing/resources/camtrapR-workflow-guide.md +130 -0
- package/skills/camera-trap-processing/resources/detection-event-definition-guide.md +89 -0
- package/skills/camera-trap-processing/scripts/estimate_activity.R +169 -0
- package/skills/camera-trap-processing/scripts/process_camtrap_data.R +179 -0
- package/skills/camera-trap-processing/scripts/process_camtrap_data.py +192 -0
- package/skills/community-ecology-ordination/SKILL.md +133 -0
- package/skills/community-ecology-ordination/examples/example-prompts.md +35 -0
- package/skills/community-ecology-ordination/resources/dissimilarity-metric-guide.md +53 -0
- package/skills/community-ecology-ordination/resources/nmds-interpretation-guide.md +104 -0
- package/skills/community-ecology-ordination/scripts/__pycache__/community_analysis.cpython-311.pyc +0 -0
- package/skills/community-ecology-ordination/scripts/community_analysis.R +143 -0
- package/skills/community-ecology-ordination/scripts/community_analysis.py +231 -0
- package/skills/ecological-data-foundation/SKILL.md +129 -0
- package/skills/ecological-data-foundation/examples/example-prompts.md +40 -0
- package/skills/ecological-data-foundation/resources/coordinate-cleaning-flags.md +66 -0
- package/skills/ecological-data-foundation/resources/darwin-core-glossary.md +91 -0
- package/skills/ecological-data-foundation/resources/data-citation-guide.md +265 -0
- package/skills/ecological-data-foundation/resources/gbif-data-citation-guide.md +193 -0
- package/skills/ecological-data-foundation/resources/qa-checklist.md +83 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/clean_occurrences.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_ebird.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_inat.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_iucn.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_obis.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/clean_occurrences.R +230 -0
- package/skills/ecological-data-foundation/scripts/clean_occurrences.py +268 -0
- package/skills/ecological-data-foundation/scripts/download_from_ebird.R +251 -0
- package/skills/ecological-data-foundation/scripts/download_from_ebird.py +364 -0
- package/skills/ecological-data-foundation/scripts/download_from_gbif.R +315 -0
- package/skills/ecological-data-foundation/scripts/download_from_gbif.py +407 -0
- package/skills/ecological-data-foundation/scripts/download_from_inat.R +238 -0
- package/skills/ecological-data-foundation/scripts/download_from_inat.py +304 -0
- package/skills/ecological-data-foundation/scripts/download_from_iucn.R +273 -0
- package/skills/ecological-data-foundation/scripts/download_from_iucn.py +344 -0
- package/skills/ecological-data-foundation/scripts/download_from_obis.R +248 -0
- package/skills/ecological-data-foundation/scripts/download_from_obis.py +318 -0
- package/skills/ecological-impact-assessment/SKILL.md +123 -0
- package/skills/ecological-impact-assessment/examples/example-prompts.md +32 -0
- package/skills/ecological-impact-assessment/resources/baci-design-guide.md +55 -0
- package/skills/ecological-impact-assessment/resources/fragmentation-metrics-reference.md +86 -0
- package/skills/ecological-impact-assessment/resources/pressure-index-template.md +78 -0
- package/skills/ecological-impact-assessment/resources/study-design-guide.md +168 -0
- package/skills/ecological-impact-assessment/scripts/baci_analysis.R +161 -0
- package/skills/ecological-impact-assessment/scripts/fragmentation_analysis.py +141 -0
- package/skills/ecological-impact-assessment/scripts/power_analysis_baci.R +274 -0
- package/skills/ecosystem-services-assessment/SKILL.md +125 -0
- package/skills/ecosystem-services-assessment/examples/example-prompts.md +24 -0
- package/skills/ecosystem-services-assessment/resources/es-indicator-reference.md +45 -0
- package/skills/ecosystem-services-assessment/resources/invest-parameter-guide.md +86 -0
- package/skills/ecosystem-services-assessment/resources/rusle-coefficients.md +88 -0
- package/skills/ecosystem-services-assessment/scripts/__pycache__/compute_es.cpython-311.pyc +0 -0
- package/skills/ecosystem-services-assessment/scripts/compute_es.py +189 -0
- package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.R +161 -0
- package/skills/environmental-time-series/SKILL.md +125 -0
- package/skills/environmental-time-series/examples/example-prompts.md +33 -0
- package/skills/environmental-time-series/resources/anomaly-indices-reference.md +88 -0
- package/skills/environmental-time-series/resources/bfast-parameter-guide.md +69 -0
- package/skills/environmental-time-series/scripts/__pycache__/recovery_trajectory.cpython-311.pyc +0 -0
- package/skills/environmental-time-series/scripts/__pycache__/trend_analysis.cpython-311.pyc +0 -0
- package/skills/environmental-time-series/scripts/recovery_trajectory.R +305 -0
- package/skills/environmental-time-series/scripts/recovery_trajectory.py +178 -0
- package/skills/environmental-time-series/scripts/trend_analysis.R +192 -0
- package/skills/environmental-time-series/scripts/trend_analysis.py +184 -0
- package/skills/geoprocessing-for-ecology/SKILL.md +123 -0
- package/skills/geoprocessing-for-ecology/examples/example-prompts.md +32 -0
- package/skills/geoprocessing-for-ecology/resources/crs-reference.md +62 -0
- package/skills/geoprocessing-for-ecology/resources/global-predictor-sources.md +331 -0
- package/skills/geoprocessing-for-ecology/resources/resampling-methods.md +57 -0
- package/skills/geoprocessing-for-ecology/scripts/__pycache__/download_predictors.cpython-311.pyc +0 -0
- package/skills/geoprocessing-for-ecology/scripts/download_predictors.R +239 -0
- package/skills/geoprocessing-for-ecology/scripts/download_predictors.py +379 -0
- package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.R +224 -0
- package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.py +172 -0
- package/skills/landscape-connectivity/SKILL.md +170 -0
- package/skills/landscape-connectivity/examples/example-prompts.md +96 -0
- package/skills/landscape-connectivity/examples/jaguar_mesoamerica_corridor_example.md +271 -0
- package/skills/landscape-connectivity/resources/circuitscape-parameter-guide.md +155 -0
- package/skills/landscape-connectivity/resources/graph-theory-for-ecology.md +134 -0
- package/skills/landscape-connectivity/resources/resistance-surface-guide.md +141 -0
- package/skills/landscape-connectivity/scripts/connectivity_analysis.py +387 -0
- package/skills/landscape-connectivity/scripts/connectivity_metrics.R +274 -0
- package/skills/landscape-connectivity/scripts/resistance_surface.R +239 -0
- package/skills/model-validation-and-uncertainty/SKILL.md +131 -0
- package/skills/model-validation-and-uncertainty/examples/example-prompts.md +30 -0
- package/skills/model-validation-and-uncertainty/resources/extrapolation-risk-guide.md +236 -0
- package/skills/model-validation-and-uncertainty/resources/metric-selection-guide.md +52 -0
- package/skills/model-validation-and-uncertainty/resources/threshold-selection-guide.md +64 -0
- package/skills/model-validation-and-uncertainty/scripts/__pycache__/validate_model.cpython-311.pyc +0 -0
- package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.R +315 -0
- package/skills/model-validation-and-uncertainty/scripts/validate_model.py +226 -0
- package/skills/model-validation-and-uncertainty/scripts/validate_sdm.R +162 -0
- package/skills/occupancy-and-detection/SKILL.md +126 -0
- package/skills/occupancy-and-detection/examples/example-prompts.md +33 -0
- package/skills/occupancy-and-detection/resources/detection-history-format.md +100 -0
- package/skills/occupancy-and-detection/resources/occupancy-study-design.md +47 -0
- package/skills/occupancy-and-detection/scripts/__pycache__/occupancy_analysis.cpython-311.pyc +0 -0
- package/skills/occupancy-and-detection/scripts/occupancy_analysis.R +160 -0
- package/skills/occupancy-and-detection/scripts/occupancy_analysis.py +159 -0
- package/skills/population-viability-analysis/SKILL.md +161 -0
- package/skills/population-viability-analysis/examples/african_elephant_pva_example.md +266 -0
- package/skills/population-viability-analysis/examples/example-prompts.md +95 -0
- package/skills/population-viability-analysis/resources/extinction-risk-thresholds.md +128 -0
- package/skills/population-viability-analysis/resources/matrix-model-guide.md +139 -0
- package/skills/population-viability-analysis/resources/sensitivity-elasticity-reference.md +182 -0
- package/skills/population-viability-analysis/scripts/matrix_pva.R +258 -0
- package/skills/population-viability-analysis/scripts/pva_analysis.py +442 -0
- package/skills/population-viability-analysis/scripts/stochastic_pva.R +353 -0
- package/skills/predictive-modeling-best-practices/SKILL.md +136 -0
- package/skills/predictive-modeling-best-practices/examples/example-prompts.md +58 -0
- package/skills/predictive-modeling-best-practices/resources/collinearity-decision-tree.md +65 -0
- package/skills/predictive-modeling-best-practices/resources/sampling-bias-correction.md +267 -0
- package/skills/predictive-modeling-best-practices/resources/spatial-cv-guide.md +73 -0
- package/skills/predictive-modeling-best-practices/scripts/__pycache__/spatial_cv.cpython-311.pyc +0 -0
- package/skills/predictive-modeling-best-practices/scripts/collinearity_check.R +112 -0
- package/skills/predictive-modeling-best-practices/scripts/spatial_cv.py +182 -0
- package/skills/reproducible-ecology-pipeline/SKILL.md +139 -0
- package/skills/reproducible-ecology-pipeline/examples/example-prompts.md +35 -0
- package/skills/reproducible-ecology-pipeline/resources/directory-structure-template.md +94 -0
- package/skills/reproducible-ecology-pipeline/resources/params-yaml-template.yaml +84 -0
- package/skills/reproducible-ecology-pipeline/resources/reproducibility-checklist-template.md +66 -0
- package/skills/reproducible-ecology-pipeline/scripts/generate_file_manifest.py +110 -0
- package/skills/reproducible-ecology-pipeline/scripts/init_project.sh +53 -0
- package/skills/spatial-prioritization/SKILL.md +162 -0
- package/skills/spatial-prioritization/examples/biodiversity_hotspot_prioritization_example.md +289 -0
- package/skills/spatial-prioritization/examples/example-prompts.md +93 -0
- package/skills/spatial-prioritization/resources/cost-surface-reference.md +130 -0
- package/skills/spatial-prioritization/resources/marxan-vs-prioritizr-comparison.md +125 -0
- package/skills/spatial-prioritization/resources/prioritizr-formulation-guide.md +188 -0
- package/skills/spatial-prioritization/resources/representation-targets-guide.md +186 -0
- package/skills/spatial-prioritization/scripts/prioritization_sensitivity.R +320 -0
- package/skills/spatial-prioritization/scripts/run_prioritization.R +336 -0
- package/skills/species-distribution-modeling/SKILL.md +139 -0
- package/skills/species-distribution-modeling/examples/example-prompts.md +36 -0
- package/skills/species-distribution-modeling/resources/algorithm-comparison.md +25 -0
- package/skills/species-distribution-modeling/resources/calibration-area-guide.md +71 -0
- package/skills/species-distribution-modeling/resources/climate-scenario-preparation.md +170 -0
- package/skills/species-distribution-modeling/resources/maxent-calibration-guide.md +211 -0
- package/skills/species-distribution-modeling/resources/sdm-checklist.md +37 -0
- package/skills/species-distribution-modeling/scripts/predict_distribution.R +236 -0
- package/skills/species-distribution-modeling/scripts/predict_distribution.py +286 -0
- package/skills/species-distribution-modeling/scripts/prepare_future_layers.R +351 -0
- package/skills/species-distribution-modeling/scripts/project_scenarios.R +220 -0
- package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +99 -0
- package/skills/species-distribution-modeling/scripts/sdm_pipeline.py +318 -0
- package/skills/species-distribution-modeling/scripts/tune_maxnet.R +344 -0
- package/templates/SKILL_TEMPLATE.md +225 -0
- package/templates/checklists/data-submission-checklist.md +38 -0
- package/templates/checklists/post-analysis-checklist.md +55 -0
- package/templates/checklists/pre-analysis-checklist.md +31 -0
- package/templates/prompts/debug-skill.md +47 -0
- package/templates/prompts/invoke-skill.md +34 -0
- package/templates/prompts/invoke-workflow.md +45 -0
- package/templates/reports/technical-report-template.md +80 -0
- package/templates/scripts/logger_setup.R +79 -0
- package/templates/scripts/logger_setup.py +119 -0
- package/templates/scripts/params_loader.R +28 -0
- package/templates/scripts/params_loader.py +38 -0
- package/workflows/analyze-community-structure/WORKFLOW.md +72 -0
- package/workflows/analyze-environmental-change/WORKFLOW.md +73 -0
- package/workflows/assess-ecological-impact/WORKFLOW.md +75 -0
- package/workflows/assess-ecosystem-services/WORKFLOW.md +68 -0
- package/workflows/assess-landscape-connectivity/WORKFLOW.md +84 -0
- package/workflows/build-fire-risk-map/WORKFLOW.md +79 -0
- package/workflows/produce-technical-report/WORKFLOW.md +113 -0
- package/workflows/run-camera-trap-occupancy/WORKFLOW.md +87 -0
- package/workflows/run-conservation-prioritization/WORKFLOW.md +89 -0
- package/workflows/run-multispecies-screening/WORKFLOW.md +197 -0
- package/workflows/run-occupancy-analysis/WORKFLOW.md +74 -0
- package/workflows/run-population-viability/WORKFLOW.md +90 -0
- package/workflows/run-sdm-study/WORKFLOW.md +99 -0
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
|
|
3
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
sdm_pipeline.py
|
|
7
|
+
SDM pipeline scaffold using elapid (MaxEnt equivalent in Python) + sklearn.
|
|
8
|
+
Usage: python sdm_pipeline.py <params_yaml> <output_dir>
|
|
9
|
+
Requires: pandas, numpy, sklearn, elapid (optional), yaml, matplotlib
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import sys
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
SKILL_NAME = "species-distribution-modeling"
|
|
18
|
+
_LOG_DIR = Path("logs")
|
|
19
|
+
_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
_log_file = _LOG_DIR / f"skill_{SKILL_NAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
|
|
21
|
+
logging.basicConfig(
|
|
22
|
+
level=logging.INFO,
|
|
23
|
+
format="[%(asctime)s] [%(levelname)s] [" + SKILL_NAME + "] %(message)s",
|
|
24
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
25
|
+
handlers=[
|
|
26
|
+
logging.StreamHandler(sys.stdout),
|
|
27
|
+
logging.FileHandler(_log_file, encoding="utf-8"),
|
|
28
|
+
],
|
|
29
|
+
)
|
|
30
|
+
logger = logging.getLogger(SKILL_NAME)
|
|
31
|
+
|
|
32
|
+
def log_step(n: int, desc: str) -> None:
|
|
33
|
+
logger.info("-- STEP %d: %s", n, desc)
|
|
34
|
+
|
|
35
|
+
def log_decision(var: str, val, why: str) -> None:
|
|
36
|
+
logger.info("DECISION | %s = %s | %s", var, val, why)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
import yaml
|
|
40
|
+
import json
|
|
41
|
+
import numpy as np
|
|
42
|
+
import pandas as pd
|
|
43
|
+
import matplotlib.pyplot as plt
|
|
44
|
+
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
|
45
|
+
from sklearn.metrics import roc_auc_score
|
|
46
|
+
from sklearn.model_selection import StratifiedKFold
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
import elapid
|
|
50
|
+
HAS_ELAPID = True
|
|
51
|
+
logger.info("elapid disponivel para modelagem MaxEnt.")
|
|
52
|
+
except ImportError:
|
|
53
|
+
HAS_ELAPID = False
|
|
54
|
+
logger.warning("elapid nao instalado (MaxEnt). Instale com: pip install elapid")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def load_params(path: str) -> dict:
|
|
58
|
+
try:
|
|
59
|
+
with open(path) as f:
|
|
60
|
+
return yaml.safe_load(f)
|
|
61
|
+
except FileNotFoundError:
|
|
62
|
+
logger.error(
|
|
63
|
+
"Arquivo de parametros nao encontrado: %s\n Causa provavel: arquivo nao gerado pelo passo anterior.\n Verifique a saida de: species-distribution-modeling (preparacao de parametros)\n Skill anterior: species-distribution-modeling",
|
|
64
|
+
path,
|
|
65
|
+
)
|
|
66
|
+
sys.exit(1)
|
|
67
|
+
except yaml.YAMLError as e:
|
|
68
|
+
logger.error(
|
|
69
|
+
"Falha ao analisar YAML '%s': %s\n Causa provavel: YAML malformado ou encoding incorreto.\n Skill anterior: species-distribution-modeling",
|
|
70
|
+
path, e,
|
|
71
|
+
)
|
|
72
|
+
sys.exit(1)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def load_data(p: dict) -> tuple:
|
|
76
|
+
data_path = "data/processed/points_with_env.csv"
|
|
77
|
+
predictors_path = "outputs/selected_predictors.txt"
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
pts = pd.read_csv(data_path)
|
|
81
|
+
except FileNotFoundError:
|
|
82
|
+
logger.error(
|
|
83
|
+
"Arquivo de dados nao encontrado: %s\n Causa provavel: arquivo nao gerado pelo passo anterior.\n Esperado como saida de: ecological-data-foundation (clean_occurrences)\n Skill anterior: ecological-data-foundation",
|
|
84
|
+
data_path,
|
|
85
|
+
)
|
|
86
|
+
raise
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.error(
|
|
89
|
+
"Falha ao carregar dados de pontos '%s': %s\n Skill anterior: ecological-data-foundation",
|
|
90
|
+
data_path, e,
|
|
91
|
+
)
|
|
92
|
+
raise
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
predictors = open(predictors_path).read().strip().split("\n")
|
|
96
|
+
except FileNotFoundError:
|
|
97
|
+
logger.error(
|
|
98
|
+
"Arquivo de preditores nao encontrado: %s\n Causa provavel: etapa de selecao de variaveis nao concluida.\n Skill anterior: species-distribution-modeling",
|
|
99
|
+
predictors_path,
|
|
100
|
+
)
|
|
101
|
+
raise
|
|
102
|
+
|
|
103
|
+
predictors = [pr for pr in predictors if pr in pts.columns]
|
|
104
|
+
|
|
105
|
+
if not predictors:
|
|
106
|
+
logger.error(
|
|
107
|
+
"Nenhum preditor valido encontrado em '%s' que coincida com as colunas de '%s'.\n Causa provavel: nomes de colunas divergem entre os arquivos.\n Skill anterior: species-distribution-modeling",
|
|
108
|
+
predictors_path, data_path,
|
|
109
|
+
)
|
|
110
|
+
raise ValueError("No valid predictors found.")
|
|
111
|
+
|
|
112
|
+
logger.info("Preditores carregados (%d): %s", len(predictors), predictors)
|
|
113
|
+
|
|
114
|
+
if "pa" in pts.columns:
|
|
115
|
+
y = pts["pa"].values
|
|
116
|
+
log_decision("response_col", "pa", "coluna 'pa' encontrada nos dados")
|
|
117
|
+
elif "presence" in pts.columns:
|
|
118
|
+
y = pts["presence"].values
|
|
119
|
+
log_decision("response_col", "presence", "coluna 'presence' usada como alternativa")
|
|
120
|
+
else:
|
|
121
|
+
logger.error(
|
|
122
|
+
"Nenhuma coluna de resposta encontrada nos dados. Esperado: 'pa' ou 'presence'.\n Causa provavel: dados nao preparados pelo script de background.\n Skill anterior: ecological-data-foundation",
|
|
123
|
+
)
|
|
124
|
+
raise KeyError("Missing response column 'pa' or 'presence'.")
|
|
125
|
+
|
|
126
|
+
X = pts[predictors].values
|
|
127
|
+
|
|
128
|
+
n_presence = int(y.sum())
|
|
129
|
+
n_bg = int((y == 0).sum())
|
|
130
|
+
logger.info("Registros carregados — Presencas: %d | Background: %d", n_presence, n_bg)
|
|
131
|
+
|
|
132
|
+
if n_presence < 10:
|
|
133
|
+
logger.warning(
|
|
134
|
+
"Poucos registros de presenca (%d). SDM pode ser instavel. Recomendado: >= 30.",
|
|
135
|
+
n_presence,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
return X, y, predictors
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def spatial_cv_splits(pts_df, fold_col="cv_fold"):
|
|
142
|
+
folds = pts_df[fold_col].unique()
|
|
143
|
+
for fold in sorted(folds):
|
|
144
|
+
test_idx = pts_df.index[pts_df[fold_col] == fold].tolist()
|
|
145
|
+
train_idx = pts_df.index[pts_df[fold_col] != fold].tolist()
|
|
146
|
+
yield train_idx, test_idx
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def fit_rf(X_train, y_train, params: dict):
|
|
150
|
+
rf_p = params.get("hyperparameters", {}).get("random_forest", {})
|
|
151
|
+
n_trees = rf_p.get("n_trees", 500)
|
|
152
|
+
min_node_size = rf_p.get("min_node_size", 5)
|
|
153
|
+
log_decision("rf_n_trees", n_trees, "numero de arvores do RF definido em params.yaml")
|
|
154
|
+
log_decision("rf_min_node_size", min_node_size, "tamanho minimo de no do RF definido em params.yaml")
|
|
155
|
+
try:
|
|
156
|
+
clf = RandomForestClassifier(
|
|
157
|
+
n_estimators=n_trees,
|
|
158
|
+
min_samples_leaf=min_node_size,
|
|
159
|
+
random_state=params["random_seeds"]["global"],
|
|
160
|
+
n_jobs=-1,
|
|
161
|
+
)
|
|
162
|
+
clf.fit(X_train, y_train)
|
|
163
|
+
return clf
|
|
164
|
+
except Exception as e:
|
|
165
|
+
logger.error(
|
|
166
|
+
"Falha ao ajustar RandomForest: %s\n Causa provavel: dados de treinamento invalidos ou parametros incompativeis.\n Skill anterior: species-distribution-modeling",
|
|
167
|
+
e,
|
|
168
|
+
)
|
|
169
|
+
raise
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def fit_brt(X_train, y_train, params: dict):
|
|
173
|
+
brt_p = params.get("hyperparameters", {}).get("brt", {})
|
|
174
|
+
n_trees = brt_p.get("n_trees", [500])[0]
|
|
175
|
+
learning_rate = brt_p.get("learning_rate", [0.01])[0]
|
|
176
|
+
tree_depth = brt_p.get("tree_complexity", [3])[0]
|
|
177
|
+
bag_fraction = brt_p.get("bag_fraction", 0.75)
|
|
178
|
+
log_decision("brt_n_trees", n_trees, "numero de arvores do BRT definido em params.yaml")
|
|
179
|
+
log_decision("brt_learning_rate", learning_rate, "taxa de aprendizado do BRT definida em params.yaml")
|
|
180
|
+
log_decision("brt_tree_depth", tree_depth, "profundidade das arvores do BRT definida em params.yaml")
|
|
181
|
+
log_decision("brt_bag_fraction", bag_fraction, "fracao de subamostras por arvore (BRT) definida em params.yaml")
|
|
182
|
+
try:
|
|
183
|
+
clf = GradientBoostingClassifier(
|
|
184
|
+
n_estimators=n_trees,
|
|
185
|
+
learning_rate=learning_rate,
|
|
186
|
+
max_depth=tree_depth,
|
|
187
|
+
subsample=bag_fraction,
|
|
188
|
+
random_state=params["random_seeds"]["global"],
|
|
189
|
+
)
|
|
190
|
+
clf.fit(X_train, y_train)
|
|
191
|
+
return clf
|
|
192
|
+
except Exception as e:
|
|
193
|
+
logger.error(
|
|
194
|
+
"Falha ao ajustar GradientBoosting (BRT): %s\n Causa provavel: dados de treinamento invalidos ou parametros incompativeis.\n Skill anterior: species-distribution-modeling",
|
|
195
|
+
e,
|
|
196
|
+
)
|
|
197
|
+
raise
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def main():
|
|
201
|
+
logger.info("Script: sdm_pipeline.py | Skill: %s", SKILL_NAME)
|
|
202
|
+
|
|
203
|
+
params_file = sys.argv[1] if len(sys.argv) > 1 else "params.yaml"
|
|
204
|
+
output_dir = Path(sys.argv[2]) if len(sys.argv) > 2 else Path("outputs/sdm")
|
|
205
|
+
|
|
206
|
+
logger.info("Params file : %s", params_file)
|
|
207
|
+
logger.info("Output dir : %s", output_dir)
|
|
208
|
+
|
|
209
|
+
# Input precondition check
|
|
210
|
+
if not Path(params_file).exists():
|
|
211
|
+
logger.error(
|
|
212
|
+
"Input nao encontrado: %s\n Causa provavel: arquivo nao gerado pelo passo anterior.\n Verifique a saida de: species-distribution-modeling (preparacao de parametros)\n Skill anterior: species-distribution-modeling",
|
|
213
|
+
params_file,
|
|
214
|
+
)
|
|
215
|
+
sys.exit(1)
|
|
216
|
+
|
|
217
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
218
|
+
logger.info("Diretorio de saida pronto: %s", output_dir)
|
|
219
|
+
|
|
220
|
+
log_step(1, "Carregar parametros YAML")
|
|
221
|
+
p = load_params(params_file)
|
|
222
|
+
|
|
223
|
+
np.random.seed(p["random_seeds"]["global"])
|
|
224
|
+
log_decision("random_seed", p["random_seeds"]["global"], "semente global definida em params.yaml para reprodutibilidade")
|
|
225
|
+
log_decision("algorithms", p["modeling"]["algorithms"], "algoritmos definidos em params.yaml")
|
|
226
|
+
log_decision("cv_folds", p["modeling"]["cv_folds"], "numero de folds definido em params.yaml")
|
|
227
|
+
|
|
228
|
+
logger.info("Pipeline SDM | Algoritmos: %s", p["modeling"]["algorithms"])
|
|
229
|
+
|
|
230
|
+
log_step(2, "Carregar dados de ocorrencia e preditores")
|
|
231
|
+
try:
|
|
232
|
+
X, y, predictors = load_data(p)
|
|
233
|
+
except FileNotFoundError as e:
|
|
234
|
+
logger.error(
|
|
235
|
+
"Arquivo de entrada nao encontrado: %s\n Esperado como saida de: ecological-data-foundation\n Verifique se o passo anterior foi concluido.",
|
|
236
|
+
e,
|
|
237
|
+
)
|
|
238
|
+
logger.info("Scaffold carregado. Adicione o carregamento de dados para seu estudo.")
|
|
239
|
+
return
|
|
240
|
+
except Exception as e:
|
|
241
|
+
logger.error(
|
|
242
|
+
"Nao foi possivel carregar dados: %s\n Scaffold carregado. Adicione o carregamento de dados para seu estudo.",
|
|
243
|
+
e,
|
|
244
|
+
)
|
|
245
|
+
return
|
|
246
|
+
|
|
247
|
+
log_step(3, "Executar validacao cruzada estratificada por fold")
|
|
248
|
+
log_decision(
|
|
249
|
+
"cv_method", "StratifiedKFold",
|
|
250
|
+
"fallback para k-fold estratificado quando coluna cv_fold nao esta presente",
|
|
251
|
+
)
|
|
252
|
+
skf = StratifiedKFold(
|
|
253
|
+
n_splits=p["modeling"]["cv_folds"],
|
|
254
|
+
shuffle=True,
|
|
255
|
+
random_state=p["random_seeds"]["global"],
|
|
256
|
+
)
|
|
257
|
+
auc_rf, auc_brt = [], []
|
|
258
|
+
|
|
259
|
+
try:
|
|
260
|
+
for fold, (train_idx, test_idx) in enumerate(skf.split(X, y)):
|
|
261
|
+
X_tr, X_te = X[train_idx], X[test_idx]
|
|
262
|
+
y_tr, y_te = y[train_idx], y[test_idx]
|
|
263
|
+
|
|
264
|
+
if "random_forest" in p["modeling"]["algorithms"]:
|
|
265
|
+
rf = fit_rf(X_tr, y_tr, p)
|
|
266
|
+
fold_auc = roc_auc_score(y_te, rf.predict_proba(X_te)[:, 1])
|
|
267
|
+
auc_rf.append(fold_auc)
|
|
268
|
+
logger.info("Fold %d | RF AUC = %.3f", fold + 1, fold_auc)
|
|
269
|
+
if fold_auc < 0.7:
|
|
270
|
+
logger.warning(
|
|
271
|
+
"Fold %d: RF AUC baixo (%.3f). Verifique qualidade dos dados de background.",
|
|
272
|
+
fold + 1, fold_auc,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
if "brt" in p["modeling"]["algorithms"]:
|
|
276
|
+
brt = fit_brt(X_tr, y_tr, p)
|
|
277
|
+
fold_auc_brt = roc_auc_score(y_te, brt.predict_proba(X_te)[:, 1])
|
|
278
|
+
auc_brt.append(fold_auc_brt)
|
|
279
|
+
logger.info("Fold %d | BRT AUC = %.3f", fold + 1, fold_auc_brt)
|
|
280
|
+
if fold_auc_brt < 0.7:
|
|
281
|
+
logger.warning(
|
|
282
|
+
"Fold %d: BRT AUC baixo (%.3f). Verifique qualidade dos dados de background.",
|
|
283
|
+
fold + 1, fold_auc_brt,
|
|
284
|
+
)
|
|
285
|
+
except Exception as e:
|
|
286
|
+
logger.error(
|
|
287
|
+
"Falha durante validacao cruzada: %s\n Causa provavel: dados de treinamento invalidos, preditores com NaN, ou parametros incompativeis.\n Skill anterior: species-distribution-modeling",
|
|
288
|
+
e,
|
|
289
|
+
)
|
|
290
|
+
raise
|
|
291
|
+
|
|
292
|
+
log_step(4, "Compilar e salvar metricas de desempenho")
|
|
293
|
+
results = {}
|
|
294
|
+
if auc_rf:
|
|
295
|
+
results["RandomForest"] = {"AUC_mean": float(np.mean(auc_rf)), "AUC_sd": float(np.std(auc_rf))}
|
|
296
|
+
logger.info("RandomForest — AUC medio: %.3f (+/- %.3f)", np.mean(auc_rf), np.std(auc_rf))
|
|
297
|
+
if auc_brt:
|
|
298
|
+
results["BRT"] = {"AUC_mean": float(np.mean(auc_brt)), "AUC_sd": float(np.std(auc_brt))}
|
|
299
|
+
logger.info("BRT — AUC medio: %.3f (+/- %.3f)", np.mean(auc_brt), np.std(auc_brt))
|
|
300
|
+
|
|
301
|
+
results_df = pd.DataFrame(results).T
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
results_df.to_csv(output_dir / "cv_performance.csv")
|
|
305
|
+
logger.info("Gravado: %s", output_dir / "cv_performance.csv")
|
|
306
|
+
except OSError as e:
|
|
307
|
+
logger.error(
|
|
308
|
+
"Falha ao gravar cv_performance.csv em '%s': %s\n Causa provavel: sem permissao de escrita no diretorio.\n Skill anterior: species-distribution-modeling",
|
|
309
|
+
output_dir, e,
|
|
310
|
+
)
|
|
311
|
+
raise
|
|
312
|
+
|
|
313
|
+
logger.info("Desempenho de CV:\n%s", results_df.to_string())
|
|
314
|
+
logger.info("Saidas gravadas em: %s", output_dir)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
if __name__ == "__main__":
|
|
318
|
+
main()
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
|
|
2
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
3
|
+
|
|
4
|
+
# Usage: Rscript tune_maxnet.R <points_with_env_csv> <output_dir> [rm_values] [fc_values]
|
|
5
|
+
|
|
6
|
+
# ── Inline logger ─────────────────────────────────────────────────────────────
|
|
7
|
+
SKILL_NAME <- "species-distribution-modeling"
|
|
8
|
+
.log_ts <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
|
|
9
|
+
log_info <- function(...) message(.log_ts(), " [INFO] ", sprintf(...))
|
|
10
|
+
log_warn <- function(...) message(.log_ts(), " [WARN] ", sprintf(...))
|
|
11
|
+
log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
|
|
12
|
+
log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
|
|
13
|
+
log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
|
|
14
|
+
dir.create("logs", recursive=TRUE, showWarnings=FALSE)
|
|
15
|
+
|
|
16
|
+
#
|
|
17
|
+
# Arguments:
|
|
18
|
+
# points_with_env_csv : CSV with columns decimalLongitude, decimalLatitude + env variables
|
|
19
|
+
# output_dir : Directory to write all outputs (created if absent)
|
|
20
|
+
# rm_values : Optional comma-separated RM grid (default: "0.5,1,1.5,2,3,4,6")
|
|
21
|
+
# fc_values : Optional comma-separated FC grid (default: "L,LQ,LQH,LQHP,LQHPT")
|
|
22
|
+
#
|
|
23
|
+
# Outputs:
|
|
24
|
+
# calibration_results.csv — all 35 model combinations with metrics
|
|
25
|
+
# best_model_params.csv — models selected by OR_AICc criterion
|
|
26
|
+
# calibration_plot.png — delta_AICc × OR10 scatterplot
|
|
27
|
+
# best_maxnet.rds — fitted maxnet model with best parameters
|
|
28
|
+
|
|
29
|
+
suppressPackageStartupMessages(library(ENMeval))
|
|
30
|
+
suppressPackageStartupMessages(library(terra))
|
|
31
|
+
suppressPackageStartupMessages(library(dplyr))
|
|
32
|
+
suppressPackageStartupMessages(library(ggplot2))
|
|
33
|
+
|
|
34
|
+
# ── 1. Parse arguments ──────────────────────────────────────────────────────
|
|
35
|
+
log_step(1, "Analisar argumentos da linha de comando")
|
|
36
|
+
args <- commandArgs(trailingOnly = TRUE)
|
|
37
|
+
|
|
38
|
+
if (length(args) < 2) {
|
|
39
|
+
# Defaults for interactive/test use
|
|
40
|
+
occ_csv <- "tests/data/points_with_env.csv"
|
|
41
|
+
output_dir <- "output/sdm_calibration"
|
|
42
|
+
rm_vals <- c(0.5, 1, 1.5, 2, 3, 4, 6)
|
|
43
|
+
fc_vals <- c("L", "LQ", "LQH", "LQHP", "LQHPT")
|
|
44
|
+
log_warn("Menos de 2 argumentos. Usando valores padrao para teste interativo.")
|
|
45
|
+
} else {
|
|
46
|
+
occ_csv <- args[1]
|
|
47
|
+
output_dir <- args[2]
|
|
48
|
+
rm_vals <- if (length(args) >= 3) as.numeric(strsplit(args[3], ",")[[1]]) else c(0.5, 1, 1.5, 2, 3, 4, 6)
|
|
49
|
+
fc_vals <- if (length(args) >= 4) strsplit(args[4], ",")[[1]] else c("L", "LQ", "LQH", "LQHP", "LQHPT")
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
log_info("Script: tune_maxnet.R | Skill: %s", SKILL_NAME)
|
|
53
|
+
log_info("OCC CSV : %s", occ_csv)
|
|
54
|
+
log_info("Output dir : %s", output_dir)
|
|
55
|
+
|
|
56
|
+
# ── Input precondition check ──────────────────────────────────────────────────
|
|
57
|
+
if (!file.exists(occ_csv)) {
|
|
58
|
+
log_error(
|
|
59
|
+
"Input nao encontrado: %s\nCausa provavel: arquivo nao gerado pelo passo anterior.\nVerifique a saida de: ecological-data-foundation (clean_occurrences)\nSkill anterior: ecological-data-foundation",
|
|
60
|
+
occ_csv
|
|
61
|
+
)
|
|
62
|
+
stop("Missing: ", occ_csv)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
# ── 2. Create output directory ───────────────────────────────────────────────
|
|
66
|
+
log_step(2, "Criar diretorio de saida")
|
|
67
|
+
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
|
68
|
+
log_info("Diretorio de saida pronto: %s", output_dir)
|
|
69
|
+
|
|
70
|
+
log_decision("rm_vals", paste(rm_vals, collapse = ","), "grade de multiplicadores de regularizacao para busca em grade MaxEnt")
|
|
71
|
+
log_decision("fc_vals", paste(fc_vals, collapse = ","), "grade de classes de features para busca em grade MaxEnt")
|
|
72
|
+
log_decision("total_models", length(rm_vals) * length(fc_vals), "numero total de combinacoes RM x FC")
|
|
73
|
+
|
|
74
|
+
# ── 3. Load occurrence data ──────────────────────────────────────────────────
|
|
75
|
+
log_step(3, "Carregar dados de ocorrencia com variaveis ambientais")
|
|
76
|
+
tryCatch({
|
|
77
|
+
occ_data <- read.csv(occ_csv)
|
|
78
|
+
log_info("Registros carregados: %d | Colunas: %d", nrow(occ_data), ncol(occ_data))
|
|
79
|
+
}, error = function(e) {
|
|
80
|
+
log_error(
|
|
81
|
+
"Falha ao ler CSV de ocorrencias '%s': %s\nCausa provavel: arquivo corrompido ou formato invalido.\nVerifique: %s\nSkill anterior: ecological-data-foundation",
|
|
82
|
+
occ_csv, conditionMessage(e), occ_csv
|
|
83
|
+
)
|
|
84
|
+
stop(e)
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
# Identify coordinate columns (standard names)
|
|
88
|
+
lon_col <- intersect(c("decimalLongitude", "longitude", "lon", "x"), names(occ_data))[1]
|
|
89
|
+
lat_col <- intersect(c("decimalLatitude", "latitude", "lat", "y"), names(occ_data))[1]
|
|
90
|
+
|
|
91
|
+
if (is.na(lon_col) || is.na(lat_col)) {
|
|
92
|
+
log_error(
|
|
93
|
+
"Colunas de coordenadas nao encontradas.\nEsperadas: decimalLongitude/decimalLatitude (ou longitude/latitude, lon/lat, x/y).\nColunas presentes: %s\nCausa provavel: CSV nao processado por clean_occurrences.\nSkill anterior: ecological-data-foundation",
|
|
94
|
+
paste(names(occ_data), collapse = ", ")
|
|
95
|
+
)
|
|
96
|
+
stop("Cannot find coordinate columns. Expected: decimalLongitude/decimalLatitude")
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
log_info("Coluna de longitude: '%s' | Coluna de latitude: '%s'", lon_col, lat_col)
|
|
100
|
+
occ_pts <- occ_data[, c(lon_col, lat_col)]
|
|
101
|
+
names(occ_pts) <- c("x", "y")
|
|
102
|
+
|
|
103
|
+
# Environmental predictor columns = everything except coordinates and species metadata
|
|
104
|
+
meta_cols <- c(lon_col, lat_col, "species", "scientificName", "gbifID",
|
|
105
|
+
"occurrenceID", "datasetKey")
|
|
106
|
+
env_cols <- setdiff(names(occ_data), meta_cols)
|
|
107
|
+
|
|
108
|
+
log_info("Registros de ocorrencia: %d", nrow(occ_pts))
|
|
109
|
+
log_info("Variaveis ambientais (%d): %s", length(env_cols), paste(env_cols, collapse = ", "))
|
|
110
|
+
|
|
111
|
+
if (nrow(occ_pts) < 10) {
|
|
112
|
+
log_error(
|
|
113
|
+
"Registros de ocorrencia insuficientes (%d). Minimo requerido: 10.\nCausa provavel: filtragem excessiva em clean_occurrences ou especie com distribuicao muito restrita.\nSkill anterior: ecological-data-foundation",
|
|
114
|
+
nrow(occ_pts)
|
|
115
|
+
)
|
|
116
|
+
stop("Too few occurrences (", nrow(occ_pts), ") for calibration. Minimum required: 10")
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (nrow(occ_pts) < 30) {
|
|
120
|
+
log_warn(
|
|
121
|
+
"Poucos registros de ocorrencia (%d). Resultados de calibracao podem ser instáveis. Recomendado: >= 30.",
|
|
122
|
+
nrow(occ_pts)
|
|
123
|
+
)
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
# ── 4. Build environmental SpatRaster from occurrence columns ────────────────
|
|
127
|
+
log_step(4, "Preparar dados ambientais e pontos de background")
|
|
128
|
+
# When a raster stack is not provided, construct a mock raster for ENMeval
|
|
129
|
+
# using the env values in the CSV. In full use, load a real raster stack instead.
|
|
130
|
+
log_warn("Construindo background a partir de valores env no CSV. Para uso em producao, use um SpatRaster real.")
|
|
131
|
+
|
|
132
|
+
env_mat <- as.matrix(occ_data[, env_cols])
|
|
133
|
+
|
|
134
|
+
# Background: if CSV has a 'background' column flagging bg points, use them;
|
|
135
|
+
# otherwise use a random sample of all non-occurrence rows.
|
|
136
|
+
if ("type" %in% names(occ_data)) {
|
|
137
|
+
bg_idx <- occ_data$type == "background"
|
|
138
|
+
bg_pts <- occ_data[bg_idx, c(lon_col, lat_col)]
|
|
139
|
+
bg_env <- occ_data[bg_idx, env_cols]
|
|
140
|
+
occ_env <- occ_data[!bg_idx, env_cols]
|
|
141
|
+
log_info("Coluna 'type' encontrada. Usando %d pontos de background definidos.", sum(bg_idx))
|
|
142
|
+
log_decision("background_source", "type column", "coluna 'type' presente no CSV define pontos de background")
|
|
143
|
+
} else {
|
|
144
|
+
# Use all points as both occurrences and generate background by jittering
|
|
145
|
+
# In production: load bg from a proper background CSV
|
|
146
|
+
log_warn("Coluna 'type' ausente. Gerando pseudo-background por jitter. Use um CSV de background real em producao.")
|
|
147
|
+
log_decision("background_source", "jitter", "coluna 'type' ausente; pseudo-background gerado por jitter aleatorio")
|
|
148
|
+
set.seed(42)
|
|
149
|
+
n_bg <- min(10000, nrow(occ_data) * 10)
|
|
150
|
+
bg_pts <- data.frame(
|
|
151
|
+
x = occ_pts$x + runif(n_bg, -2, 2),
|
|
152
|
+
y = occ_pts$y + runif(n_bg, -2, 2)
|
|
153
|
+
)
|
|
154
|
+
bg_env <- occ_data[sample(nrow(occ_data), n_bg, replace = TRUE), env_cols]
|
|
155
|
+
occ_env <- occ_data[, env_cols]
|
|
156
|
+
log_info("Pseudo-background gerado: %d pontos", n_bg)
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
names(bg_pts) <- c("x", "y")
|
|
160
|
+
|
|
161
|
+
# ── 5. Run ENMeval grid search ───────────────────────────────────────────────
|
|
162
|
+
log_step(5, "Executar busca em grade ENMeval (MaxNet)")
|
|
163
|
+
log_info("Valores de RM : %s", paste(rm_vals, collapse = ", "))
|
|
164
|
+
log_info("Valores de FC : %s", paste(fc_vals, collapse = ", "))
|
|
165
|
+
log_info("Total de modelos: %d", length(rm_vals) * length(fc_vals))
|
|
166
|
+
log_decision(
|
|
167
|
+
"partitions", "block",
|
|
168
|
+
"particao espacial por blocos geograficos — evita inflacao de AUC por autocorrelacao espacial"
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# ENMevaluate with maxnet and spatial block CV
|
|
172
|
+
# block partitioning divides geographic space into quadrants for spatial CV
|
|
173
|
+
eval_out <- tryCatch({
|
|
174
|
+
ENMevaluate(
|
|
175
|
+
occs = occ_pts,
|
|
176
|
+
envs = NULL, # using occs.testing below when envs is NULL
|
|
177
|
+
bg = bg_pts,
|
|
178
|
+
occs.testing = NULL,
|
|
179
|
+
algorithm = "maxnet",
|
|
180
|
+
partitions = "block", # spatial cross-validation — avoids autocorrelation inflation
|
|
181
|
+
tune.args = list(
|
|
182
|
+
rm = rm_vals,
|
|
183
|
+
fc = fc_vals
|
|
184
|
+
),
|
|
185
|
+
other.settings = list(
|
|
186
|
+
abs.auc.diff = FALSE
|
|
187
|
+
),
|
|
188
|
+
occs.grp = NULL,
|
|
189
|
+
bg.grp = NULL
|
|
190
|
+
)
|
|
191
|
+
}, error = function(e) {
|
|
192
|
+
log_error(
|
|
193
|
+
"Falha em ENMevaluate: %s\nCausa provavel: dados ambientais insuficientes, pacote ENMeval nao instalado, ou pontos de ocorrencia fora do extent dos preditores.\nVerifique: install.packages('ENMeval') e a qualidade dos dados de entrada.\nSkill anterior: ecological-data-foundation",
|
|
194
|
+
conditionMessage(e)
|
|
195
|
+
)
|
|
196
|
+
stop(e)
|
|
197
|
+
})
|
|
198
|
+
|
|
199
|
+
log_info("Calibracao ENMeval concluida.")
|
|
200
|
+
|
|
201
|
+
# ── 6. Extract and process results table ─────────────────────────────────────
|
|
202
|
+
log_step(6, "Extrair e processar tabela de resultados da calibracao")
|
|
203
|
+
tryCatch({
|
|
204
|
+
res <- eval.results(eval_out)
|
|
205
|
+
|
|
206
|
+
# Compute delta_AICc relative to the best (lowest AICc) model
|
|
207
|
+
res$delta.AICc <- res$AICc - min(res$AICc, na.rm = TRUE)
|
|
208
|
+
|
|
209
|
+
# Rename for clarity
|
|
210
|
+
res <- res %>%
|
|
211
|
+
rename(
|
|
212
|
+
OR10 = or.10p.avg,
|
|
213
|
+
AUC_train = auc.train,
|
|
214
|
+
AUC_val = auc.val.avg
|
|
215
|
+
) %>%
|
|
216
|
+
arrange(delta.AICc)
|
|
217
|
+
|
|
218
|
+
# Save full calibration table
|
|
219
|
+
calib_path <- file.path(output_dir, "calibration_results.csv")
|
|
220
|
+
write.csv(res, calib_path, row.names = FALSE)
|
|
221
|
+
log_info("Gravado: %s", calib_path)
|
|
222
|
+
}, error = function(e) {
|
|
223
|
+
log_error(
|
|
224
|
+
"Falha ao extrair resultados da calibracao: %s\nCausa provavel: objeto ENMeval com estrutura inesperada ou colunas renomeadas na versao do pacote.\nVerifique a versao do ENMeval instalada.\nSkill anterior: species-distribution-modeling",
|
|
225
|
+
conditionMessage(e)
|
|
226
|
+
)
|
|
227
|
+
stop(e)
|
|
228
|
+
})
|
|
229
|
+
|
|
230
|
+
# ── 7. Select best models by OR_AICc criterion ───────────────────────────────
|
|
231
|
+
log_step(7, "Selecionar melhores modelos pelo criterio OR_AICc")
|
|
232
|
+
# Rule: OR10 <= 0.15 (allows slight tolerance above 0.10 expected)
|
|
233
|
+
# AND delta_AICc < 2 (equivalent models by Burnham & Anderson)
|
|
234
|
+
or_threshold <- 0.15
|
|
235
|
+
aicc_threshold <- 2
|
|
236
|
+
|
|
237
|
+
log_decision("or_threshold", or_threshold, "tolerancia acima de 0.10 conforme Anderson et al. 2010")
|
|
238
|
+
log_decision("aicc_threshold", aicc_threshold, "modelos equivalentes por Burnham & Anderson 2002 (delta_AICc < 2)")
|
|
239
|
+
|
|
240
|
+
best_models <- tryCatch({
|
|
241
|
+
bm <- res %>%
|
|
242
|
+
filter(OR10 <= or_threshold, delta.AICc < aicc_threshold) %>%
|
|
243
|
+
arrange(OR10, delta.AICc)
|
|
244
|
+
|
|
245
|
+
if (nrow(bm) == 0) {
|
|
246
|
+
# Fallback: relax OR threshold and take AICc-best model
|
|
247
|
+
log_warn(
|
|
248
|
+
"Nenhum modelo atende OR10 <= %.2f E delta_AICc < %.1f. Usando modelo com menor AICc como fallback.",
|
|
249
|
+
or_threshold, aicc_threshold
|
|
250
|
+
)
|
|
251
|
+
log_decision(
|
|
252
|
+
"selection_fallback", "aicc_best",
|
|
253
|
+
"nenhum modelo no quadrante ideal; selecionado o melhor por AICc para prosseguir"
|
|
254
|
+
)
|
|
255
|
+
bm <- res[1, ]
|
|
256
|
+
} else {
|
|
257
|
+
log_info("%d modelo(s) atendem ao criterio OR_AICc.", nrow(bm))
|
|
258
|
+
}
|
|
259
|
+
bm
|
|
260
|
+
}, error = function(e) {
|
|
261
|
+
log_error(
|
|
262
|
+
"Falha ao selecionar melhores modelos: %s\nCausa provavel: colunas OR10 ou AICc ausentes na tabela de resultados.\nSkill anterior: species-distribution-modeling",
|
|
263
|
+
conditionMessage(e)
|
|
264
|
+
)
|
|
265
|
+
stop(e)
|
|
266
|
+
})
|
|
267
|
+
|
|
268
|
+
best_path <- file.path(output_dir, "best_model_params.csv")
|
|
269
|
+
write.csv(best_models, best_path, row.names = FALSE)
|
|
270
|
+
log_info("Gravado: %s", best_path)
|
|
271
|
+
log_info("Melhores modelos (primeiras linhas):")
|
|
272
|
+
message(capture.output(print(best_models[, c("tune.args.rm", "tune.args.fc", "OR10", "AICc", "delta.AICc")])))
|
|
273
|
+
|
|
274
|
+
# ── 8. Calibration plot ───────────────────────────────────────────────────────
|
|
275
|
+
log_step(8, "Gerar grafico de calibracao (delta_AICc x OR10)")
|
|
276
|
+
tryCatch({
|
|
277
|
+
p <- ggplot(res, aes(x = delta.AICc, y = OR10,
|
|
278
|
+
colour = tune.args.fc, size = tune.args.rm)) +
|
|
279
|
+
geom_point(alpha = 0.8) +
|
|
280
|
+
geom_hline(yintercept = or_threshold, linetype = "dashed", colour = "red",
|
|
281
|
+
linewidth = 0.7) +
|
|
282
|
+
geom_vline(xintercept = aicc_threshold, linetype = "dashed", colour = "blue",
|
|
283
|
+
linewidth = 0.7) +
|
|
284
|
+
annotate("text", x = aicc_threshold + 0.5, y = max(res$OR10) * 0.95,
|
|
285
|
+
label = "delta_AICc = 2", colour = "blue", hjust = 0, size = 3) +
|
|
286
|
+
annotate("text", x = max(res$delta.AICc) * 0.7, y = or_threshold + 0.005,
|
|
287
|
+
label = "OR10 = 0.15", colour = "red", size = 3) +
|
|
288
|
+
labs(
|
|
289
|
+
title = "MaxEnt Calibration: OR10 vs delta_AICc",
|
|
290
|
+
subtitle = "Lower-left quadrant = best models (low omission + parsimonious)",
|
|
291
|
+
x = "delta AICc (relative to best model)",
|
|
292
|
+
y = "OR10 (omission rate at 10% training threshold)",
|
|
293
|
+
colour = "Feature Class",
|
|
294
|
+
size = "Regularization Multiplier"
|
|
295
|
+
) +
|
|
296
|
+
theme_bw(base_size = 12)
|
|
297
|
+
|
|
298
|
+
plot_path <- file.path(output_dir, "calibration_plot.png")
|
|
299
|
+
ggsave(plot_path, p, width = 10, height = 7, dpi = 150)
|
|
300
|
+
log_info("Gravado: %s", plot_path)
|
|
301
|
+
}, error = function(e) {
|
|
302
|
+
log_error(
|
|
303
|
+
"Falha ao gerar grafico de calibracao: %s\nCausa provavel: pacote ggplot2 nao instalado ou colunas ausentes na tabela de resultados.\nVerifique: install.packages('ggplot2')\nSkill anterior: species-distribution-modeling",
|
|
304
|
+
conditionMessage(e)
|
|
305
|
+
)
|
|
306
|
+
stop(e)
|
|
307
|
+
})
|
|
308
|
+
|
|
309
|
+
# ── 9. Fit final model with best parameters ────────────────────────────────
|
|
310
|
+
log_step(9, "Ajustar modelo final com os melhores parametros")
|
|
311
|
+
best_rm <- best_models$tune.args.rm[1]
|
|
312
|
+
best_fc <- best_models$tune.args.fc[1]
|
|
313
|
+
log_info("Ajustando modelo final: RM = %s | FC = %s", best_rm, best_fc)
|
|
314
|
+
log_decision("final_rm", best_rm, "RM do modelo de melhor desempenho no criterio OR_AICc")
|
|
315
|
+
log_decision("final_fc", best_fc, "FC do modelo de melhor desempenho no criterio OR_AICc")
|
|
316
|
+
|
|
317
|
+
tryCatch({
|
|
318
|
+
# Retrieve the fitted model object from ENMeval results
|
|
319
|
+
best_idx <- which(res$tune.args.rm == best_rm & res$tune.args.fc == best_fc)[1]
|
|
320
|
+
best_model_obj <- eval.models(eval_out)[[best_idx]]
|
|
321
|
+
|
|
322
|
+
# Save as RDS for downstream projection
|
|
323
|
+
rds_path <- file.path(output_dir, "best_maxnet.rds")
|
|
324
|
+
saveRDS(best_model_obj, rds_path)
|
|
325
|
+
log_info("Gravado: %s", rds_path)
|
|
326
|
+
}, error = function(e) {
|
|
327
|
+
log_error(
|
|
328
|
+
"Falha ao ajustar ou salvar modelo final: %s\nCausa provavel: indice do modelo nao encontrado nos resultados ENMeval ou erro ao serializar o objeto.\nSkill anterior: species-distribution-modeling",
|
|
329
|
+
conditionMessage(e)
|
|
330
|
+
)
|
|
331
|
+
stop(e)
|
|
332
|
+
})
|
|
333
|
+
|
|
334
|
+
# ── 10. Summary ──────────────────────────────────────────────────────────────
|
|
335
|
+
log_step(10, "Exibir resumo da calibracao")
|
|
336
|
+
log_info("========== RESUMO DA CALIBRACAO ==========")
|
|
337
|
+
log_info("Modelos avaliados : %d", nrow(res))
|
|
338
|
+
log_info("Modelos OR_AICc-ok : %d", nrow(best_models))
|
|
339
|
+
log_info("RM selecionado : %s", best_rm)
|
|
340
|
+
log_info("FC selecionado : %s", best_fc)
|
|
341
|
+
log_info("Melhor OR10 : %.3f", best_models$OR10[1])
|
|
342
|
+
log_info("Melhor AUC (val) : %.3f", best_models$AUC_val[1])
|
|
343
|
+
log_info("Melhor delta_AICc : %.3f", best_models$delta.AICc[1])
|
|
344
|
+
log_info("==========================================")
|