ecological-agent-skills 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT_CONTEXT.md +191 -0
- package/CATALOG.md +329 -0
- package/LICENSE +692 -0
- package/README.md +347 -0
- package/bin/install.mjs +168 -0
- package/docs/comparison-with-alternatives.md +38 -0
- package/docs/global-examples-index.md +103 -0
- package/docs/repository-statistics.md +101 -0
- package/docs/theoretical-foundations.md +188 -0
- package/environment.yaml +106 -0
- package/examples/community/arctic_tundra_vegetation_example.md +247 -0
- package/examples/community/bird_landuse_example.md +63 -0
- package/examples/community/phytoplankton_reservoir_example.md +60 -0
- package/examples/community/reef_fish_indopacific_example.md +221 -0
- package/examples/impact/baci_road_example.md +57 -0
- package/examples/impact/ecosystem_services_atlantic_forest.md +83 -0
- package/examples/impact/forest_loss_borneo_timeseries_example.md +225 -0
- package/examples/occupancy/puma_camera_example.md +61 -0
- package/examples/occupancy/snow_leopard_himalayas_example.md +204 -0
- package/examples/reproducible/whittaker_biome_sdm_example.md +406 -0
- package/examples/sdm/anteater_cerrado_example.md +69 -0
- package/examples/sdm/jaguar_amazon_example.md +80 -0
- package/examples/sdm/koala_climate_change_example.md +170 -0
- package/examples/sdm/wolf_recolonization_europe_example.md +193 -0
- package/package.json +43 -0
- package/renv.lock +194 -0
- package/skills/SKILL_INDEX.json +1020 -0
- package/skills/acoustic-monitoring/SKILL.md +163 -0
- package/skills/acoustic-monitoring/examples/example-prompts.md +100 -0
- package/skills/acoustic-monitoring/examples/temperate_forest_birds_example.md +285 -0
- package/skills/acoustic-monitoring/resources/acoustic-indices-reference.md +93 -0
- package/skills/acoustic-monitoring/resources/soundscape-ecology-guide.md +90 -0
- package/skills/acoustic-monitoring/resources/species-id-tools-comparison.md +89 -0
- package/skills/acoustic-monitoring/scripts/batch_species_detection.py +360 -0
- package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.R +235 -0
- package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.py +374 -0
- package/skills/biostatistics-workbench/SKILL.md +140 -0
- package/skills/biostatistics-workbench/examples/example-prompts.md +39 -0
- package/skills/biostatistics-workbench/resources/effect-size-reference.md +81 -0
- package/skills/biostatistics-workbench/resources/glm-family-link-reference.md +47 -0
- package/skills/biostatistics-workbench/resources/test-selection-guide.md +93 -0
- package/skills/biostatistics-workbench/scripts/glm_pipeline.R +78 -0
- package/skills/biostatistics-workbench/scripts/glm_pipeline.py +210 -0
- package/skills/camera-trap-processing/SKILL.md +159 -0
- package/skills/camera-trap-processing/examples/example-prompts.md +103 -0
- package/skills/camera-trap-processing/examples/leopard_serengeti_example.md +231 -0
- package/skills/camera-trap-processing/resources/activity-patterns-reference.md +113 -0
- package/skills/camera-trap-processing/resources/camtrapR-workflow-guide.md +130 -0
- package/skills/camera-trap-processing/resources/detection-event-definition-guide.md +89 -0
- package/skills/camera-trap-processing/scripts/estimate_activity.R +169 -0
- package/skills/camera-trap-processing/scripts/process_camtrap_data.R +179 -0
- package/skills/camera-trap-processing/scripts/process_camtrap_data.py +192 -0
- package/skills/community-ecology-ordination/SKILL.md +133 -0
- package/skills/community-ecology-ordination/examples/example-prompts.md +35 -0
- package/skills/community-ecology-ordination/resources/dissimilarity-metric-guide.md +53 -0
- package/skills/community-ecology-ordination/resources/nmds-interpretation-guide.md +104 -0
- package/skills/community-ecology-ordination/scripts/__pycache__/community_analysis.cpython-311.pyc +0 -0
- package/skills/community-ecology-ordination/scripts/community_analysis.R +143 -0
- package/skills/community-ecology-ordination/scripts/community_analysis.py +231 -0
- package/skills/ecological-data-foundation/SKILL.md +129 -0
- package/skills/ecological-data-foundation/examples/example-prompts.md +40 -0
- package/skills/ecological-data-foundation/resources/coordinate-cleaning-flags.md +66 -0
- package/skills/ecological-data-foundation/resources/darwin-core-glossary.md +91 -0
- package/skills/ecological-data-foundation/resources/data-citation-guide.md +265 -0
- package/skills/ecological-data-foundation/resources/gbif-data-citation-guide.md +193 -0
- package/skills/ecological-data-foundation/resources/qa-checklist.md +83 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/clean_occurrences.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_ebird.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_inat.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_iucn.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_obis.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/clean_occurrences.R +230 -0
- package/skills/ecological-data-foundation/scripts/clean_occurrences.py +268 -0
- package/skills/ecological-data-foundation/scripts/download_from_ebird.R +251 -0
- package/skills/ecological-data-foundation/scripts/download_from_ebird.py +364 -0
- package/skills/ecological-data-foundation/scripts/download_from_gbif.R +315 -0
- package/skills/ecological-data-foundation/scripts/download_from_gbif.py +407 -0
- package/skills/ecological-data-foundation/scripts/download_from_inat.R +238 -0
- package/skills/ecological-data-foundation/scripts/download_from_inat.py +304 -0
- package/skills/ecological-data-foundation/scripts/download_from_iucn.R +273 -0
- package/skills/ecological-data-foundation/scripts/download_from_iucn.py +344 -0
- package/skills/ecological-data-foundation/scripts/download_from_obis.R +248 -0
- package/skills/ecological-data-foundation/scripts/download_from_obis.py +318 -0
- package/skills/ecological-impact-assessment/SKILL.md +123 -0
- package/skills/ecological-impact-assessment/examples/example-prompts.md +32 -0
- package/skills/ecological-impact-assessment/resources/baci-design-guide.md +55 -0
- package/skills/ecological-impact-assessment/resources/fragmentation-metrics-reference.md +86 -0
- package/skills/ecological-impact-assessment/resources/pressure-index-template.md +78 -0
- package/skills/ecological-impact-assessment/resources/study-design-guide.md +168 -0
- package/skills/ecological-impact-assessment/scripts/baci_analysis.R +161 -0
- package/skills/ecological-impact-assessment/scripts/fragmentation_analysis.py +141 -0
- package/skills/ecological-impact-assessment/scripts/power_analysis_baci.R +274 -0
- package/skills/ecosystem-services-assessment/SKILL.md +125 -0
- package/skills/ecosystem-services-assessment/examples/example-prompts.md +24 -0
- package/skills/ecosystem-services-assessment/resources/es-indicator-reference.md +45 -0
- package/skills/ecosystem-services-assessment/resources/invest-parameter-guide.md +86 -0
- package/skills/ecosystem-services-assessment/resources/rusle-coefficients.md +88 -0
- package/skills/ecosystem-services-assessment/scripts/__pycache__/compute_es.cpython-311.pyc +0 -0
- package/skills/ecosystem-services-assessment/scripts/compute_es.py +189 -0
- package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.R +161 -0
- package/skills/environmental-time-series/SKILL.md +125 -0
- package/skills/environmental-time-series/examples/example-prompts.md +33 -0
- package/skills/environmental-time-series/resources/anomaly-indices-reference.md +88 -0
- package/skills/environmental-time-series/resources/bfast-parameter-guide.md +69 -0
- package/skills/environmental-time-series/scripts/__pycache__/recovery_trajectory.cpython-311.pyc +0 -0
- package/skills/environmental-time-series/scripts/__pycache__/trend_analysis.cpython-311.pyc +0 -0
- package/skills/environmental-time-series/scripts/recovery_trajectory.R +305 -0
- package/skills/environmental-time-series/scripts/recovery_trajectory.py +178 -0
- package/skills/environmental-time-series/scripts/trend_analysis.R +192 -0
- package/skills/environmental-time-series/scripts/trend_analysis.py +184 -0
- package/skills/geoprocessing-for-ecology/SKILL.md +123 -0
- package/skills/geoprocessing-for-ecology/examples/example-prompts.md +32 -0
- package/skills/geoprocessing-for-ecology/resources/crs-reference.md +62 -0
- package/skills/geoprocessing-for-ecology/resources/global-predictor-sources.md +331 -0
- package/skills/geoprocessing-for-ecology/resources/resampling-methods.md +57 -0
- package/skills/geoprocessing-for-ecology/scripts/__pycache__/download_predictors.cpython-311.pyc +0 -0
- package/skills/geoprocessing-for-ecology/scripts/download_predictors.R +239 -0
- package/skills/geoprocessing-for-ecology/scripts/download_predictors.py +379 -0
- package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.R +224 -0
- package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.py +172 -0
- package/skills/landscape-connectivity/SKILL.md +170 -0
- package/skills/landscape-connectivity/examples/example-prompts.md +96 -0
- package/skills/landscape-connectivity/examples/jaguar_mesoamerica_corridor_example.md +271 -0
- package/skills/landscape-connectivity/resources/circuitscape-parameter-guide.md +155 -0
- package/skills/landscape-connectivity/resources/graph-theory-for-ecology.md +134 -0
- package/skills/landscape-connectivity/resources/resistance-surface-guide.md +141 -0
- package/skills/landscape-connectivity/scripts/connectivity_analysis.py +387 -0
- package/skills/landscape-connectivity/scripts/connectivity_metrics.R +274 -0
- package/skills/landscape-connectivity/scripts/resistance_surface.R +239 -0
- package/skills/model-validation-and-uncertainty/SKILL.md +131 -0
- package/skills/model-validation-and-uncertainty/examples/example-prompts.md +30 -0
- package/skills/model-validation-and-uncertainty/resources/extrapolation-risk-guide.md +236 -0
- package/skills/model-validation-and-uncertainty/resources/metric-selection-guide.md +52 -0
- package/skills/model-validation-and-uncertainty/resources/threshold-selection-guide.md +64 -0
- package/skills/model-validation-and-uncertainty/scripts/__pycache__/validate_model.cpython-311.pyc +0 -0
- package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.R +315 -0
- package/skills/model-validation-and-uncertainty/scripts/validate_model.py +226 -0
- package/skills/model-validation-and-uncertainty/scripts/validate_sdm.R +162 -0
- package/skills/occupancy-and-detection/SKILL.md +126 -0
- package/skills/occupancy-and-detection/examples/example-prompts.md +33 -0
- package/skills/occupancy-and-detection/resources/detection-history-format.md +100 -0
- package/skills/occupancy-and-detection/resources/occupancy-study-design.md +47 -0
- package/skills/occupancy-and-detection/scripts/__pycache__/occupancy_analysis.cpython-311.pyc +0 -0
- package/skills/occupancy-and-detection/scripts/occupancy_analysis.R +160 -0
- package/skills/occupancy-and-detection/scripts/occupancy_analysis.py +159 -0
- package/skills/population-viability-analysis/SKILL.md +161 -0
- package/skills/population-viability-analysis/examples/african_elephant_pva_example.md +266 -0
- package/skills/population-viability-analysis/examples/example-prompts.md +95 -0
- package/skills/population-viability-analysis/resources/extinction-risk-thresholds.md +128 -0
- package/skills/population-viability-analysis/resources/matrix-model-guide.md +139 -0
- package/skills/population-viability-analysis/resources/sensitivity-elasticity-reference.md +182 -0
- package/skills/population-viability-analysis/scripts/matrix_pva.R +258 -0
- package/skills/population-viability-analysis/scripts/pva_analysis.py +442 -0
- package/skills/population-viability-analysis/scripts/stochastic_pva.R +353 -0
- package/skills/predictive-modeling-best-practices/SKILL.md +136 -0
- package/skills/predictive-modeling-best-practices/examples/example-prompts.md +58 -0
- package/skills/predictive-modeling-best-practices/resources/collinearity-decision-tree.md +65 -0
- package/skills/predictive-modeling-best-practices/resources/sampling-bias-correction.md +267 -0
- package/skills/predictive-modeling-best-practices/resources/spatial-cv-guide.md +73 -0
- package/skills/predictive-modeling-best-practices/scripts/__pycache__/spatial_cv.cpython-311.pyc +0 -0
- package/skills/predictive-modeling-best-practices/scripts/collinearity_check.R +112 -0
- package/skills/predictive-modeling-best-practices/scripts/spatial_cv.py +182 -0
- package/skills/reproducible-ecology-pipeline/SKILL.md +139 -0
- package/skills/reproducible-ecology-pipeline/examples/example-prompts.md +35 -0
- package/skills/reproducible-ecology-pipeline/resources/directory-structure-template.md +94 -0
- package/skills/reproducible-ecology-pipeline/resources/params-yaml-template.yaml +84 -0
- package/skills/reproducible-ecology-pipeline/resources/reproducibility-checklist-template.md +66 -0
- package/skills/reproducible-ecology-pipeline/scripts/generate_file_manifest.py +110 -0
- package/skills/reproducible-ecology-pipeline/scripts/init_project.sh +53 -0
- package/skills/spatial-prioritization/SKILL.md +162 -0
- package/skills/spatial-prioritization/examples/biodiversity_hotspot_prioritization_example.md +289 -0
- package/skills/spatial-prioritization/examples/example-prompts.md +93 -0
- package/skills/spatial-prioritization/resources/cost-surface-reference.md +130 -0
- package/skills/spatial-prioritization/resources/marxan-vs-prioritizr-comparison.md +125 -0
- package/skills/spatial-prioritization/resources/prioritizr-formulation-guide.md +188 -0
- package/skills/spatial-prioritization/resources/representation-targets-guide.md +186 -0
- package/skills/spatial-prioritization/scripts/prioritization_sensitivity.R +320 -0
- package/skills/spatial-prioritization/scripts/run_prioritization.R +336 -0
- package/skills/species-distribution-modeling/SKILL.md +139 -0
- package/skills/species-distribution-modeling/examples/example-prompts.md +36 -0
- package/skills/species-distribution-modeling/resources/algorithm-comparison.md +25 -0
- package/skills/species-distribution-modeling/resources/calibration-area-guide.md +71 -0
- package/skills/species-distribution-modeling/resources/climate-scenario-preparation.md +170 -0
- package/skills/species-distribution-modeling/resources/maxent-calibration-guide.md +211 -0
- package/skills/species-distribution-modeling/resources/sdm-checklist.md +37 -0
- package/skills/species-distribution-modeling/scripts/predict_distribution.R +236 -0
- package/skills/species-distribution-modeling/scripts/predict_distribution.py +286 -0
- package/skills/species-distribution-modeling/scripts/prepare_future_layers.R +351 -0
- package/skills/species-distribution-modeling/scripts/project_scenarios.R +220 -0
- package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +99 -0
- package/skills/species-distribution-modeling/scripts/sdm_pipeline.py +318 -0
- package/skills/species-distribution-modeling/scripts/tune_maxnet.R +344 -0
- package/templates/SKILL_TEMPLATE.md +225 -0
- package/templates/checklists/data-submission-checklist.md +38 -0
- package/templates/checklists/post-analysis-checklist.md +55 -0
- package/templates/checklists/pre-analysis-checklist.md +31 -0
- package/templates/prompts/debug-skill.md +47 -0
- package/templates/prompts/invoke-skill.md +34 -0
- package/templates/prompts/invoke-workflow.md +45 -0
- package/templates/reports/technical-report-template.md +80 -0
- package/templates/scripts/logger_setup.R +79 -0
- package/templates/scripts/logger_setup.py +119 -0
- package/templates/scripts/params_loader.R +28 -0
- package/templates/scripts/params_loader.py +38 -0
- package/workflows/analyze-community-structure/WORKFLOW.md +72 -0
- package/workflows/analyze-environmental-change/WORKFLOW.md +73 -0
- package/workflows/assess-ecological-impact/WORKFLOW.md +75 -0
- package/workflows/assess-ecosystem-services/WORKFLOW.md +68 -0
- package/workflows/assess-landscape-connectivity/WORKFLOW.md +84 -0
- package/workflows/build-fire-risk-map/WORKFLOW.md +79 -0
- package/workflows/produce-technical-report/WORKFLOW.md +113 -0
- package/workflows/run-camera-trap-occupancy/WORKFLOW.md +87 -0
- package/workflows/run-conservation-prioritization/WORKFLOW.md +89 -0
- package/workflows/run-multispecies-screening/WORKFLOW.md +197 -0
- package/workflows/run-occupancy-analysis/WORKFLOW.md +74 -0
- package/workflows/run-population-viability/WORKFLOW.md +90 -0
- package/workflows/run-sdm-study/WORKFLOW.md +99 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
|
|
2
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
3
|
+
|
|
4
|
+
"""Download species assessment data from the IUCN Red List API v3.
|
|
5
|
+
|
|
6
|
+
Usage: python download_from_iucn.py <species_name_or_list_csv> <output_dir> [include_range_maps]
|
|
7
|
+
|
|
8
|
+
Arguments:
|
|
9
|
+
species_name_or_list_csv : Species name (e.g., "Panthera onca") or path to CSV
|
|
10
|
+
with column 'scientificName'
|
|
11
|
+
output_dir : Directory for outputs (created if absent)
|
|
12
|
+
include_range_maps : 'true' or 'false' — download range data if available (default: false)
|
|
13
|
+
|
|
14
|
+
Requires:
|
|
15
|
+
IUCN_REDLIST_KEY environment variable — obtain at: https://apiv3.iucnredlist.org/
|
|
16
|
+
|
|
17
|
+
Outputs (per species):
|
|
18
|
+
iucn_status_{species}.csv — Red List category, criteria, country occurrences
|
|
19
|
+
iucn_habitats_{species}.csv — suitable habitats
|
|
20
|
+
download_metadata_IUCN_{species}.txt — provenance and citation
|
|
21
|
+
|
|
22
|
+
Standard output schema (iucn_status CSV):
|
|
23
|
+
species, decimalLatitude, decimalLongitude, eventDate, countryCode,
|
|
24
|
+
basisOfRecord, coordinateUncertaintyInMeters, datasetName, occurrenceID,
|
|
25
|
+
source, download_doi
|
|
26
|
+
Extra IUCN columns:
|
|
27
|
+
rl_category, rl_criteria, population_trend, assessment_year
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import logging
|
|
31
|
+
import os
|
|
32
|
+
import sys
|
|
33
|
+
from datetime import datetime
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
|
|
36
|
+
SKILL_NAME = "ecological-data-foundation"
|
|
37
|
+
_LOG_DIR = Path("logs")
|
|
38
|
+
_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
_log_file = _LOG_DIR / f"skill_{SKILL_NAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
|
|
40
|
+
logging.basicConfig(
|
|
41
|
+
level=logging.INFO,
|
|
42
|
+
format="[%(asctime)s] [%(levelname)s] [" + SKILL_NAME + "] %(message)s",
|
|
43
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
44
|
+
handlers=[
|
|
45
|
+
logging.StreamHandler(sys.stdout),
|
|
46
|
+
logging.FileHandler(_log_file, encoding="utf-8"),
|
|
47
|
+
],
|
|
48
|
+
)
|
|
49
|
+
logger = logging.getLogger(SKILL_NAME)
|
|
50
|
+
|
|
51
|
+
def log_step(n: int, desc: str) -> None:
|
|
52
|
+
logger.info("-- STEP %d: %s", n, desc)
|
|
53
|
+
|
|
54
|
+
def log_decision(var: str, val, why: str) -> None:
|
|
55
|
+
logger.info("DECISION | %s = %s | %s", var, val, why)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
from datetime import date
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
import requests
|
|
62
|
+
import pandas as pd
|
|
63
|
+
except ImportError as e:
|
|
64
|
+
logger.error(
|
|
65
|
+
"Dependencia ausente: %s\n Instale com: pip install requests pandas\n Skill anterior: ecological-data-foundation",
|
|
66
|
+
e,
|
|
67
|
+
)
|
|
68
|
+
sys.exit(1)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ── Constants ─────────────────────────────────────────────────────────────────
|
|
72
|
+
IUCN_API_BASE = "https://apiv3.iucnredlist.org/api/v3"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# ── Helper functions ──────────────────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
def iucn_get(endpoint: str, api_key: str) -> dict:
|
|
78
|
+
"""Make an authenticated GET request to the IUCN API."""
|
|
79
|
+
url = f"{IUCN_API_BASE}/{endpoint}?token={api_key}"
|
|
80
|
+
try:
|
|
81
|
+
resp = requests.get(url, timeout=30)
|
|
82
|
+
resp.raise_for_status()
|
|
83
|
+
return resp.json()
|
|
84
|
+
except requests.RequestException as e:
|
|
85
|
+
logger.error(
|
|
86
|
+
"Falha na requisicao IUCN API '%s': %s\n Causa provavel: chave invalida ou API indisponivel.\n Verifique: https://apiv3.iucnredlist.org/\n Skill anterior: ecological-data-foundation",
|
|
87
|
+
endpoint, e,
|
|
88
|
+
)
|
|
89
|
+
raise
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def fetch_assessment(species_name: str, api_key: str) -> dict:
|
|
93
|
+
"""Fetch the most recent IUCN assessment for a species."""
|
|
94
|
+
sp_encoded = species_name.replace(" ", "%20")
|
|
95
|
+
return iucn_get(f"species/{sp_encoded}", api_key)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def fetch_country_occurrences(species_name: str, api_key: str) -> list[dict]:
|
|
99
|
+
"""Fetch country-level occurrences from IUCN."""
|
|
100
|
+
sp_encoded = species_name.replace(" ", "%20")
|
|
101
|
+
try:
|
|
102
|
+
data = iucn_get(f"species/countries/name/{sp_encoded}", api_key)
|
|
103
|
+
return data.get("result", [])
|
|
104
|
+
except Exception as e:
|
|
105
|
+
logger.warning("Falha ao buscar paises de ocorrencia para '%s': %s", species_name, e)
|
|
106
|
+
return []
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def fetch_habitats(species_name: str, api_key: str) -> list[dict]:
|
|
110
|
+
"""Fetch suitable habitats for a species."""
|
|
111
|
+
sp_encoded = species_name.replace(" ", "%20")
|
|
112
|
+
try:
|
|
113
|
+
data = iucn_get(f"habitats/species/name/{sp_encoded}", api_key)
|
|
114
|
+
return data.get("result", [])
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logger.warning("Falha ao buscar habitats para '%s': %s", species_name, e)
|
|
117
|
+
return []
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def standardise_records(assessment: dict, countries: list[dict],
|
|
121
|
+
species_name: str) -> "pd.DataFrame":
|
|
122
|
+
"""Convert IUCN assessment data to the standard occurrence schema."""
|
|
123
|
+
results = assessment.get("result", [])
|
|
124
|
+
if not results:
|
|
125
|
+
return pd.DataFrame()
|
|
126
|
+
|
|
127
|
+
res = results[0]
|
|
128
|
+
taxon_id = res.get("taxonid", "")
|
|
129
|
+
category = res.get("category", "")
|
|
130
|
+
criteria = res.get("criteria", "")
|
|
131
|
+
pop_trend = res.get("population_trend", "")
|
|
132
|
+
assess_year = res.get("assessment_date", "")[:4] if res.get("assessment_date") else ""
|
|
133
|
+
|
|
134
|
+
if not countries:
|
|
135
|
+
rows = [{
|
|
136
|
+
"species": species_name,
|
|
137
|
+
"decimalLatitude": None,
|
|
138
|
+
"decimalLongitude": None,
|
|
139
|
+
"eventDate": None,
|
|
140
|
+
"countryCode": None,
|
|
141
|
+
"basisOfRecord": "LITERATURE",
|
|
142
|
+
"coordinateUncertaintyInMeters": None,
|
|
143
|
+
"datasetName": "IUCN Red List",
|
|
144
|
+
"occurrenceID": f"IUCN:{taxon_id}",
|
|
145
|
+
"source": "IUCN",
|
|
146
|
+
"download_doi": None,
|
|
147
|
+
"rl_category": category,
|
|
148
|
+
"rl_criteria": criteria,
|
|
149
|
+
"population_trend": pop_trend,
|
|
150
|
+
"assessment_year": assess_year,
|
|
151
|
+
}]
|
|
152
|
+
else:
|
|
153
|
+
rows = []
|
|
154
|
+
for ctry in countries:
|
|
155
|
+
rows.append({
|
|
156
|
+
"species": species_name,
|
|
157
|
+
"decimalLatitude": None,
|
|
158
|
+
"decimalLongitude": None,
|
|
159
|
+
"eventDate": None,
|
|
160
|
+
"countryCode": ctry.get("code", ""),
|
|
161
|
+
"basisOfRecord": "LITERATURE",
|
|
162
|
+
"coordinateUncertaintyInMeters": None,
|
|
163
|
+
"datasetName": "IUCN Red List",
|
|
164
|
+
"occurrenceID": f"IUCN:{taxon_id}:{ctry.get('code', '')}",
|
|
165
|
+
"source": "IUCN",
|
|
166
|
+
"download_doi": None,
|
|
167
|
+
"rl_category": category,
|
|
168
|
+
"rl_criteria": criteria,
|
|
169
|
+
"population_trend": pop_trend,
|
|
170
|
+
"assessment_year": assess_year,
|
|
171
|
+
})
|
|
172
|
+
|
|
173
|
+
return pd.DataFrame(rows)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def save_metadata(output_dir: Path, species_name: str, taxon_id,
|
|
177
|
+
category: str, assess_year: str) -> None:
|
|
178
|
+
safe_name = species_name.replace(" ", "_")
|
|
179
|
+
today = date.today().isoformat()
|
|
180
|
+
year = date.today().year
|
|
181
|
+
month = f"{date.today().month:02d}"
|
|
182
|
+
lines = [
|
|
183
|
+
f"Species: {species_name}",
|
|
184
|
+
f"IUCN Taxon ID: {taxon_id}",
|
|
185
|
+
f"Source: IUCN Red List (https://www.iucnredlist.org)",
|
|
186
|
+
f"API version: v3 (https://apiv3.iucnredlist.org)",
|
|
187
|
+
f"Red List category: {category}",
|
|
188
|
+
f"Assessment year: {assess_year}",
|
|
189
|
+
f"Download date: {today}",
|
|
190
|
+
(f"Citation: IUCN {year}. The IUCN Red List of Threatened Species. "
|
|
191
|
+
f"Version {year}-{month}. https://www.iucnredlist.org Accessed on {today}."),
|
|
192
|
+
"License: CC BY 4.0 (https://creativecommons.org/licenses/by/4.0/)",
|
|
193
|
+
]
|
|
194
|
+
meta_path = output_dir / f"download_metadata_IUCN_{safe_name}.txt"
|
|
195
|
+
try:
|
|
196
|
+
meta_path.write_text("\n".join(lines), encoding="utf-8")
|
|
197
|
+
logger.info("Metadados gravados: %s", meta_path)
|
|
198
|
+
except OSError as e:
|
|
199
|
+
logger.error(
|
|
200
|
+
"Falha ao gravar metadados: %s\n Skill anterior: ecological-data-foundation", e,
|
|
201
|
+
)
|
|
202
|
+
raise
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
# ── Main download logic ────────────────────────────────────────────────────────
|
|
206
|
+
|
|
207
|
+
def download_species(species_name: str, output_dir: Path, api_key: str) -> None:
|
|
208
|
+
logger.info("--- Iniciando download IUCN: %s ---", species_name)
|
|
209
|
+
safe_name = species_name.replace(" ", "_")
|
|
210
|
+
|
|
211
|
+
log_step(1, f"Buscar avaliacao IUCN para '{species_name}'")
|
|
212
|
+
assessment = fetch_assessment(species_name, api_key)
|
|
213
|
+
|
|
214
|
+
results = assessment.get("result", [])
|
|
215
|
+
if not results:
|
|
216
|
+
logger.warning("Nenhum resultado IUCN para '%s'. Especie pode nao estar avaliada.", species_name)
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
res = results[0]
|
|
220
|
+
taxon_id = res.get("taxonid", "")
|
|
221
|
+
category = res.get("category", "")
|
|
222
|
+
criteria = res.get("criteria", "")
|
|
223
|
+
pop_trend = res.get("population_trend", "")
|
|
224
|
+
assess_date = res.get("assessment_date", "")
|
|
225
|
+
assess_year = assess_date[:4] if assess_date else ""
|
|
226
|
+
|
|
227
|
+
logger.info("Categoria IUCN: %s | Criterios: %s | Tendencia: %s | Ano: %s",
|
|
228
|
+
category, criteria, pop_trend, assess_year)
|
|
229
|
+
|
|
230
|
+
if category in ("CR", "EN"):
|
|
231
|
+
logger.warning(
|
|
232
|
+
"Especie '%s' e %s — dados de distribuicao podem ser restritos por razoes de seguranca.",
|
|
233
|
+
species_name, category,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
log_step(2, f"Buscar ocorrencias por pais para '{species_name}'")
|
|
237
|
+
countries = fetch_country_occurrences(species_name, api_key)
|
|
238
|
+
logger.info("Paises de ocorrencia: %d", len(countries))
|
|
239
|
+
|
|
240
|
+
log_step(3, f"Buscar habitats adequados para '{species_name}'")
|
|
241
|
+
habitats = fetch_habitats(species_name, api_key)
|
|
242
|
+
logger.info("Habitats identificados: %d", len(habitats))
|
|
243
|
+
|
|
244
|
+
log_step(4, "Padronizar registros para schema de saida")
|
|
245
|
+
df = standardise_records(assessment, countries, species_name)
|
|
246
|
+
logger.info("Registros no schema padrao: %d", len(df))
|
|
247
|
+
|
|
248
|
+
log_step(5, "Gravar CSV de status IUCN")
|
|
249
|
+
csv_path = output_dir / f"iucn_status_{safe_name}.csv"
|
|
250
|
+
try:
|
|
251
|
+
df.to_csv(csv_path, index=False)
|
|
252
|
+
logger.info("Gravado: %s", csv_path)
|
|
253
|
+
except OSError as e:
|
|
254
|
+
logger.error(
|
|
255
|
+
"Falha ao gravar CSV IUCN: %s\n Skill anterior: ecological-data-foundation", e,
|
|
256
|
+
)
|
|
257
|
+
raise
|
|
258
|
+
|
|
259
|
+
if habitats:
|
|
260
|
+
hab_df = pd.DataFrame(habitats)
|
|
261
|
+
hab_df["species"] = species_name
|
|
262
|
+
hab_path = output_dir / f"iucn_habitats_{safe_name}.csv"
|
|
263
|
+
try:
|
|
264
|
+
hab_df.to_csv(hab_path, index=False)
|
|
265
|
+
logger.info("Habitats gravados: %s", hab_path)
|
|
266
|
+
except OSError as e:
|
|
267
|
+
logger.warning("Falha ao gravar habitats: %s", e)
|
|
268
|
+
|
|
269
|
+
log_step(6, "Gravar metadados")
|
|
270
|
+
save_metadata(output_dir, species_name, taxon_id, category, assess_year)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
# ── Entry point ────────────────────────────────────────────────────────────────
|
|
274
|
+
|
|
275
|
+
def main():
|
|
276
|
+
logger.info("Script: download_from_iucn.py | Skill: %s", SKILL_NAME)
|
|
277
|
+
|
|
278
|
+
# Check API key
|
|
279
|
+
api_key = os.environ.get("IUCN_REDLIST_KEY", "")
|
|
280
|
+
if not api_key:
|
|
281
|
+
logger.error(
|
|
282
|
+
"Variavel IUCN_REDLIST_KEY nao definida.\n Causa provavel: chave nao configurada no ambiente.\n Verifique: export IUCN_REDLIST_KEY=your_key (Linux/Mac) ou setx IUCN_REDLIST_KEY your_key (Windows)\n Skill anterior: ecological-data-foundation",
|
|
283
|
+
)
|
|
284
|
+
sys.exit(1)
|
|
285
|
+
logger.info("Chave IUCN detectada (primeiros 4 chars): %s...", api_key[:4])
|
|
286
|
+
log_decision("api_key", "***", "lida de IUCN_REDLIST_KEY; nunca exibida em logs")
|
|
287
|
+
|
|
288
|
+
argv = sys.argv[1:]
|
|
289
|
+
|
|
290
|
+
if len(argv) < 2:
|
|
291
|
+
species_input = "Panthera onca"
|
|
292
|
+
output_dir = Path("output/iucn")
|
|
293
|
+
logger.warning("Menos de 2 argumentos fornecidos. Usando valores padrao para teste.")
|
|
294
|
+
else:
|
|
295
|
+
species_input = argv[0]
|
|
296
|
+
output_dir = Path(argv[1])
|
|
297
|
+
|
|
298
|
+
logger.info("Species input : %s", species_input)
|
|
299
|
+
logger.info("Output dir : %s", output_dir)
|
|
300
|
+
|
|
301
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
302
|
+
|
|
303
|
+
# Build species list
|
|
304
|
+
log_step(0, "Construir lista de especies")
|
|
305
|
+
if species_input.endswith(".csv") and Path(species_input).exists():
|
|
306
|
+
try:
|
|
307
|
+
df_sp = pd.read_csv(species_input)
|
|
308
|
+
if "scientificName" not in df_sp.columns:
|
|
309
|
+
logger.error(
|
|
310
|
+
"Coluna 'scientificName' nao encontrada em: %s\n Skill anterior: ecological-data-foundation",
|
|
311
|
+
species_input,
|
|
312
|
+
)
|
|
313
|
+
sys.exit(1)
|
|
314
|
+
species_list = df_sp["scientificName"].dropna().unique().tolist()
|
|
315
|
+
logger.info("Modo batch: %d especies carregadas", len(species_list))
|
|
316
|
+
log_decision("mode", "batch", "CSV valido com coluna scientificName")
|
|
317
|
+
except Exception as e:
|
|
318
|
+
logger.error(
|
|
319
|
+
"Falha ao ler lista de especies: %s\n Skill anterior: ecological-data-foundation", e,
|
|
320
|
+
)
|
|
321
|
+
sys.exit(1)
|
|
322
|
+
else:
|
|
323
|
+
species_list = [species_input.strip()]
|
|
324
|
+
logger.info("Modo especie unica: %s", species_list[0])
|
|
325
|
+
log_decision("mode", "single_species", "argumento nao e arquivo CSV")
|
|
326
|
+
|
|
327
|
+
for sp in species_list:
|
|
328
|
+
try:
|
|
329
|
+
download_species(sp, output_dir, api_key)
|
|
330
|
+
except FileNotFoundError as e:
|
|
331
|
+
logger.error(
|
|
332
|
+
"Arquivo de entrada nao encontrado: %s\n Skill anterior: ecological-data-foundation", e,
|
|
333
|
+
)
|
|
334
|
+
except Exception as e:
|
|
335
|
+
logger.error(
|
|
336
|
+
"Falha ao baixar '%s' do IUCN: %s\n Causa provavel: chave invalida, especie nao avaliada ou API indisponivel.\n Skill anterior: ecological-data-foundation",
|
|
337
|
+
sp, e,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
logger.info("Todos os downloads IUCN concluidos. Verifique: %s", output_dir)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
if __name__ == "__main__":
|
|
344
|
+
main()
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
|
|
2
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
3
|
+
|
|
4
|
+
# Usage: Rscript download_from_obis.R <species_name_or_list_csv> <output_dir> [year_from] [year_to] [wkt_geometry]
|
|
5
|
+
|
|
6
|
+
# ── Inline logger ─────────────────────────────────────────────────────────────
|
|
7
|
+
SKILL_NAME <- "ecological-data-foundation"
|
|
8
|
+
.log_ts <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
|
|
9
|
+
log_info <- function(...) message(.log_ts(), " [INFO] ", sprintf(...))
|
|
10
|
+
log_warn <- function(...) message(.log_ts(), " [WARN] ", sprintf(...))
|
|
11
|
+
log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
|
|
12
|
+
log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
|
|
13
|
+
log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
|
|
14
|
+
dir.create("logs", recursive=TRUE, showWarnings=FALSE)
|
|
15
|
+
|
|
16
|
+
#
|
|
17
|
+
# Arguments:
|
|
18
|
+
# species_name_or_list_csv : Species name (e.g., "Chelonia mydas") or path to CSV
|
|
19
|
+
# with column "scientificName"
|
|
20
|
+
# output_dir : Directory to write outputs (created if absent)
|
|
21
|
+
# year_from : Minimum year of observation (optional, default: 1950)
|
|
22
|
+
# year_to : Maximum year of observation (optional, default: current year)
|
|
23
|
+
# wkt_geometry : WKT polygon to restrict query (optional, e.g., "POLYGON((-80 -30,-80 10,-30 10,-30 -30,-80 -30))")
|
|
24
|
+
#
|
|
25
|
+
# Outputs (per species):
|
|
26
|
+
# occurrences_raw_OBIS_{species}_{date}.csv — standardised occurrence records
|
|
27
|
+
# download_metadata_OBIS_{species}.txt — download provenance and citation
|
|
28
|
+
#
|
|
29
|
+
# Standard output schema:
|
|
30
|
+
# species, decimalLatitude, decimalLongitude, eventDate, countryCode,
|
|
31
|
+
# basisOfRecord, coordinateUncertaintyInMeters, datasetName, occurrenceID,
|
|
32
|
+
# source, download_doi
|
|
33
|
+
# Extra OBIS columns:
|
|
34
|
+
# depth, marine
|
|
35
|
+
|
|
36
|
+
suppressPackageStartupMessages(library(robis))
|
|
37
|
+
suppressPackageStartupMessages(library(dplyr))
|
|
38
|
+
suppressPackageStartupMessages(library(readr))
|
|
39
|
+
|
|
40
|
+
# ── 1. Parse arguments ───────────────────────────────────────────────────────
|
|
41
|
+
log_step(1, "Analisar argumentos da linha de comando")
|
|
42
|
+
args <- commandArgs(trailingOnly = TRUE)
|
|
43
|
+
|
|
44
|
+
if (length(args) < 2) {
|
|
45
|
+
species_input <- "Chelonia mydas"
|
|
46
|
+
output_dir <- "output/obis"
|
|
47
|
+
year_from <- 1950
|
|
48
|
+
year_to <- as.integer(format(Sys.Date(), "%Y"))
|
|
49
|
+
wkt_geometry <- NULL
|
|
50
|
+
log_warn("Menos de 2 argumentos fornecidos. Usando valores padrao para teste.")
|
|
51
|
+
} else {
|
|
52
|
+
species_input <- args[1]
|
|
53
|
+
output_dir <- args[2]
|
|
54
|
+
year_from <- if (length(args) >= 3) as.integer(args[3]) else 1950
|
|
55
|
+
year_to <- if (length(args) >= 4) as.integer(args[4]) else as.integer(format(Sys.Date(), "%Y"))
|
|
56
|
+
wkt_geometry <- if (length(args) >= 5 && args[5] != "") args[5] else NULL
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
log_info("Script: download_from_obis.R | Skill: %s", SKILL_NAME)
|
|
60
|
+
log_info("Species input : %s", species_input)
|
|
61
|
+
log_info("Output dir : %s", output_dir)
|
|
62
|
+
log_info("Year range : %d - %d", year_from, year_to)
|
|
63
|
+
log_info("WKT geometry : %s", ifelse(is.null(wkt_geometry), "nenhum (global)", wkt_geometry))
|
|
64
|
+
|
|
65
|
+
log_decision("absence", "FALSE",
|
|
66
|
+
"apenas registros de presenca confirmada; OBIS inclui dados de ausencia em alguns datasets")
|
|
67
|
+
log_decision("year_from", year_from, "filtro temporal; 1950 cobre era moderna de registros marinhos")
|
|
68
|
+
|
|
69
|
+
# ── 2. Create output directory ───────────────────────────────────────────────
|
|
70
|
+
log_step(2, "Criar diretorio de saida")
|
|
71
|
+
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
|
72
|
+
|
|
73
|
+
# ── 3. Build species list ────────────────────────────────────────────────────
|
|
74
|
+
log_step(3, "Construir lista de especies")
|
|
75
|
+
if (grepl("\\.csv$", species_input, ignore.case = TRUE) && file.exists(species_input)) {
|
|
76
|
+
tryCatch({
|
|
77
|
+
species_df <- read_csv(species_input, show_col_types = FALSE)
|
|
78
|
+
if (!"scientificName" %in% names(species_df)) {
|
|
79
|
+
log_error(
|
|
80
|
+
"Coluna 'scientificName' nao encontrada em: %s\nCausa provavel: CSV mal formatado.\nSkill anterior: ecological-data-foundation",
|
|
81
|
+
species_input
|
|
82
|
+
)
|
|
83
|
+
stop("Missing column 'scientificName'")
|
|
84
|
+
}
|
|
85
|
+
species_list <- unique(trimws(species_df$scientificName))
|
|
86
|
+
log_info("Modo batch: %d especies carregadas", length(species_list))
|
|
87
|
+
log_decision("mode", "batch", "CSV valido com coluna scientificName")
|
|
88
|
+
}, error = function(e) {
|
|
89
|
+
log_error(
|
|
90
|
+
"Falha ao ler lista de especies: %s\nSkill anterior: ecological-data-foundation",
|
|
91
|
+
conditionMessage(e)
|
|
92
|
+
)
|
|
93
|
+
stop(e)
|
|
94
|
+
})
|
|
95
|
+
} else {
|
|
96
|
+
species_list <- trimws(species_input)
|
|
97
|
+
log_info("Modo especie unica: %s", species_list)
|
|
98
|
+
log_decision("mode", "single_species", "argumento nao e arquivo CSV")
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# ── 4. Download function ─────────────────────────────────────────────────────
|
|
102
|
+
download_obis_species <- function(sp_name) {
|
|
103
|
+
log_info("--- Iniciando download OBIS: %s ---", sp_name)
|
|
104
|
+
today_str <- format(Sys.Date(), "%Y%m%d")
|
|
105
|
+
safe_name <- gsub(" ", "_", sp_name)
|
|
106
|
+
|
|
107
|
+
# Build query
|
|
108
|
+
query_args <- list(
|
|
109
|
+
scientificname = sp_name,
|
|
110
|
+
absence = FALSE,
|
|
111
|
+
startdate = paste0(year_from, "-01-01"),
|
|
112
|
+
enddate = paste0(year_to, "-12-31")
|
|
113
|
+
)
|
|
114
|
+
if (!is.null(wkt_geometry)) {
|
|
115
|
+
query_args$geometry <- wkt_geometry
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
occ_raw <- tryCatch({
|
|
119
|
+
do.call(robis::occurrence, query_args)
|
|
120
|
+
}, error = function(e) {
|
|
121
|
+
log_error(
|
|
122
|
+
"Falha em robis::occurrence para '%s': %s\nCausa provavel: sem conexao com a internet ou API OBIS indisponivel.\nVerifique: https://api.obis.org/\nSkill anterior: ecological-data-foundation",
|
|
123
|
+
sp_name, conditionMessage(e)
|
|
124
|
+
)
|
|
125
|
+
stop(e)
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
if (is.null(occ_raw) || nrow(occ_raw) == 0) {
|
|
129
|
+
log_warn("Nenhum registro OBIS encontrado para '%s'.", sp_name)
|
|
130
|
+
return(invisible(NULL))
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
n_raw <- nrow(occ_raw)
|
|
134
|
+
log_info("Registros brutos recuperados: %d", n_raw)
|
|
135
|
+
|
|
136
|
+
# ── Apply OBIS quality flags ───────────────────────────────────────────────
|
|
137
|
+
# Remove records flagged as having coordinate issues
|
|
138
|
+
qc_cols <- c("flags")
|
|
139
|
+
if ("flags" %in% names(occ_raw)) {
|
|
140
|
+
bad_flags <- c("NO_COORD", "ZERO_COORD", "ON_LAND", "DEPTH_EXCEEDS_BATH")
|
|
141
|
+
occ_raw <- occ_raw[!grepl(paste(bad_flags, collapse = "|"),
|
|
142
|
+
occ_raw$flags, ignore.case = TRUE), ]
|
|
143
|
+
log_info("Registros apos filtro de flags OBIS: %d", nrow(occ_raw))
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
# ── Standardise to output schema ──────────────────────────────────────────
|
|
147
|
+
get_col <- function(df, ...) {
|
|
148
|
+
cols <- c(...)
|
|
149
|
+
for (col in cols) {
|
|
150
|
+
if (col %in% names(df)) return(df[[col]])
|
|
151
|
+
}
|
|
152
|
+
return(rep(NA, nrow(df)))
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
std <- data.frame(
|
|
156
|
+
species = sp_name,
|
|
157
|
+
decimalLatitude = as.numeric(get_col(occ_raw, "decimalLatitude", "latitude")),
|
|
158
|
+
decimalLongitude = as.numeric(get_col(occ_raw, "decimalLongitude", "longitude")),
|
|
159
|
+
eventDate = as.character(get_col(occ_raw, "eventDate", "date_start")),
|
|
160
|
+
countryCode = as.character(get_col(occ_raw, "countryCode", "country")),
|
|
161
|
+
basisOfRecord = as.character(get_col(occ_raw, "basisOfRecord")),
|
|
162
|
+
coordinateUncertaintyInMeters = as.numeric(get_col(occ_raw, "coordinateUncertaintyInMeters")),
|
|
163
|
+
datasetName = as.character(get_col(occ_raw, "datasetName", "dataset_name")),
|
|
164
|
+
occurrenceID = as.character(get_col(occ_raw, "occurrenceID", "id")),
|
|
165
|
+
source = "OBIS",
|
|
166
|
+
download_doi = NA_character_,
|
|
167
|
+
depth = as.numeric(get_col(occ_raw, "depth")),
|
|
168
|
+
marine = TRUE,
|
|
169
|
+
stringsAsFactors = FALSE
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Replace empty basisOfRecord
|
|
173
|
+
std$basisOfRecord[is.na(std$basisOfRecord) | std$basisOfRecord == ""] <- "OCCURRENCE"
|
|
174
|
+
|
|
175
|
+
# Remove records with missing coordinates
|
|
176
|
+
n_before <- nrow(std)
|
|
177
|
+
std <- std[!is.na(std$decimalLatitude) & !is.na(std$decimalLongitude), ]
|
|
178
|
+
n_removed <- n_before - nrow(std)
|
|
179
|
+
if (n_removed > 0) {
|
|
180
|
+
log_warn("%d registros removidos por coordenadas ausentes.", n_removed)
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
n_final <- nrow(std)
|
|
184
|
+
log_info("Registros com coordenadas validas: %d", n_final)
|
|
185
|
+
|
|
186
|
+
if (n_final < 30) {
|
|
187
|
+
log_warn(
|
|
188
|
+
"Registros insuficientes para analise confiavel (n = %d). Considere relaxar filtros de datas ou area.",
|
|
189
|
+
n_final
|
|
190
|
+
)
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# ── Save CSV ───────────────────────────────────────────────────────────────
|
|
194
|
+
csv_path <- file.path(output_dir, paste0("occurrences_raw_OBIS_", safe_name, "_", today_str, ".csv"))
|
|
195
|
+
tryCatch({
|
|
196
|
+
write_csv(std, csv_path)
|
|
197
|
+
log_info("Gravado: %s (%d registros)", csv_path, n_final)
|
|
198
|
+
}, error = function(e) {
|
|
199
|
+
log_error(
|
|
200
|
+
"Falha ao gravar CSV para '%s': %s\nSkill anterior: ecological-data-foundation",
|
|
201
|
+
sp_name, conditionMessage(e)
|
|
202
|
+
)
|
|
203
|
+
stop(e)
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
# ── Save metadata ──────────────────────────────────────────────────────────
|
|
207
|
+
meta_lines <- c(
|
|
208
|
+
paste("Species:", sp_name),
|
|
209
|
+
paste("Source: Ocean Biodiversity Information System (OBIS) — https://obis.org"),
|
|
210
|
+
paste("API endpoint: https://api.obis.org/v3/occurrence"),
|
|
211
|
+
paste("Absence records excluded: TRUE"),
|
|
212
|
+
paste("OBIS quality flags applied: TRUE (NO_COORD, ZERO_COORD, ON_LAND, DEPTH_EXCEEDS_BATH removed)"),
|
|
213
|
+
paste("Year range:", year_from, "-", year_to),
|
|
214
|
+
paste("WKT geometry:", ifelse(is.null(wkt_geometry), "none (global)", wkt_geometry)),
|
|
215
|
+
paste("n_records:", n_final),
|
|
216
|
+
paste("Download date:", Sys.Date()),
|
|
217
|
+
paste("Citation: OBIS (", format(Sys.Date(), "%Y"), ") Ocean Biodiversity Information System. Intergovernmental Oceanographic Commission of UNESCO. www.obis.org. Accessed on", Sys.Date()),
|
|
218
|
+
paste("License: CC0 1.0 (https://creativecommons.org/publicdomain/zero/1.0/)")
|
|
219
|
+
)
|
|
220
|
+
meta_path <- file.path(output_dir, paste0("download_metadata_OBIS_", safe_name, ".txt"))
|
|
221
|
+
tryCatch({
|
|
222
|
+
writeLines(meta_lines, meta_path)
|
|
223
|
+
log_info("Gravado: %s", meta_path)
|
|
224
|
+
}, error = function(e) {
|
|
225
|
+
log_error(
|
|
226
|
+
"Falha ao gravar metadados para '%s': %s\nSkill anterior: ecological-data-foundation",
|
|
227
|
+
sp_name, conditionMessage(e)
|
|
228
|
+
)
|
|
229
|
+
})
|
|
230
|
+
|
|
231
|
+
return(invisible(csv_path))
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
# ── 5. Run for all species ───────────────────────────────────────────────────
|
|
235
|
+
log_step(4, "Executar download OBIS para todas as especies")
|
|
236
|
+
for (sp in species_list) {
|
|
237
|
+
tryCatch(
|
|
238
|
+
download_obis_species(sp),
|
|
239
|
+
error = function(e) {
|
|
240
|
+
log_error(
|
|
241
|
+
"Falha ao baixar '%s' do OBIS: %s\nCausa provavel: problema de rede ou especie nao encontrada.\nSkill anterior: ecological-data-foundation",
|
|
242
|
+
sp, conditionMessage(e)
|
|
243
|
+
)
|
|
244
|
+
}
|
|
245
|
+
)
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
log_info("Todos os downloads OBIS concluidos. Verifique: %s", output_dir)
|