ecological-agent-skills 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT_CONTEXT.md +191 -0
- package/CATALOG.md +329 -0
- package/LICENSE +692 -0
- package/README.md +347 -0
- package/bin/install.mjs +168 -0
- package/docs/comparison-with-alternatives.md +38 -0
- package/docs/global-examples-index.md +103 -0
- package/docs/repository-statistics.md +101 -0
- package/docs/theoretical-foundations.md +188 -0
- package/environment.yaml +106 -0
- package/examples/community/arctic_tundra_vegetation_example.md +247 -0
- package/examples/community/bird_landuse_example.md +63 -0
- package/examples/community/phytoplankton_reservoir_example.md +60 -0
- package/examples/community/reef_fish_indopacific_example.md +221 -0
- package/examples/impact/baci_road_example.md +57 -0
- package/examples/impact/ecosystem_services_atlantic_forest.md +83 -0
- package/examples/impact/forest_loss_borneo_timeseries_example.md +225 -0
- package/examples/occupancy/puma_camera_example.md +61 -0
- package/examples/occupancy/snow_leopard_himalayas_example.md +204 -0
- package/examples/reproducible/whittaker_biome_sdm_example.md +406 -0
- package/examples/sdm/anteater_cerrado_example.md +69 -0
- package/examples/sdm/jaguar_amazon_example.md +80 -0
- package/examples/sdm/koala_climate_change_example.md +170 -0
- package/examples/sdm/wolf_recolonization_europe_example.md +193 -0
- package/package.json +43 -0
- package/renv.lock +194 -0
- package/skills/SKILL_INDEX.json +1020 -0
- package/skills/acoustic-monitoring/SKILL.md +163 -0
- package/skills/acoustic-monitoring/examples/example-prompts.md +100 -0
- package/skills/acoustic-monitoring/examples/temperate_forest_birds_example.md +285 -0
- package/skills/acoustic-monitoring/resources/acoustic-indices-reference.md +93 -0
- package/skills/acoustic-monitoring/resources/soundscape-ecology-guide.md +90 -0
- package/skills/acoustic-monitoring/resources/species-id-tools-comparison.md +89 -0
- package/skills/acoustic-monitoring/scripts/batch_species_detection.py +360 -0
- package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.R +235 -0
- package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.py +374 -0
- package/skills/biostatistics-workbench/SKILL.md +140 -0
- package/skills/biostatistics-workbench/examples/example-prompts.md +39 -0
- package/skills/biostatistics-workbench/resources/effect-size-reference.md +81 -0
- package/skills/biostatistics-workbench/resources/glm-family-link-reference.md +47 -0
- package/skills/biostatistics-workbench/resources/test-selection-guide.md +93 -0
- package/skills/biostatistics-workbench/scripts/glm_pipeline.R +78 -0
- package/skills/biostatistics-workbench/scripts/glm_pipeline.py +210 -0
- package/skills/camera-trap-processing/SKILL.md +159 -0
- package/skills/camera-trap-processing/examples/example-prompts.md +103 -0
- package/skills/camera-trap-processing/examples/leopard_serengeti_example.md +231 -0
- package/skills/camera-trap-processing/resources/activity-patterns-reference.md +113 -0
- package/skills/camera-trap-processing/resources/camtrapR-workflow-guide.md +130 -0
- package/skills/camera-trap-processing/resources/detection-event-definition-guide.md +89 -0
- package/skills/camera-trap-processing/scripts/estimate_activity.R +169 -0
- package/skills/camera-trap-processing/scripts/process_camtrap_data.R +179 -0
- package/skills/camera-trap-processing/scripts/process_camtrap_data.py +192 -0
- package/skills/community-ecology-ordination/SKILL.md +133 -0
- package/skills/community-ecology-ordination/examples/example-prompts.md +35 -0
- package/skills/community-ecology-ordination/resources/dissimilarity-metric-guide.md +53 -0
- package/skills/community-ecology-ordination/resources/nmds-interpretation-guide.md +104 -0
- package/skills/community-ecology-ordination/scripts/__pycache__/community_analysis.cpython-311.pyc +0 -0
- package/skills/community-ecology-ordination/scripts/community_analysis.R +143 -0
- package/skills/community-ecology-ordination/scripts/community_analysis.py +231 -0
- package/skills/ecological-data-foundation/SKILL.md +129 -0
- package/skills/ecological-data-foundation/examples/example-prompts.md +40 -0
- package/skills/ecological-data-foundation/resources/coordinate-cleaning-flags.md +66 -0
- package/skills/ecological-data-foundation/resources/darwin-core-glossary.md +91 -0
- package/skills/ecological-data-foundation/resources/data-citation-guide.md +265 -0
- package/skills/ecological-data-foundation/resources/gbif-data-citation-guide.md +193 -0
- package/skills/ecological-data-foundation/resources/qa-checklist.md +83 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/clean_occurrences.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_ebird.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_inat.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_iucn.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_obis.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/clean_occurrences.R +230 -0
- package/skills/ecological-data-foundation/scripts/clean_occurrences.py +268 -0
- package/skills/ecological-data-foundation/scripts/download_from_ebird.R +251 -0
- package/skills/ecological-data-foundation/scripts/download_from_ebird.py +364 -0
- package/skills/ecological-data-foundation/scripts/download_from_gbif.R +315 -0
- package/skills/ecological-data-foundation/scripts/download_from_gbif.py +407 -0
- package/skills/ecological-data-foundation/scripts/download_from_inat.R +238 -0
- package/skills/ecological-data-foundation/scripts/download_from_inat.py +304 -0
- package/skills/ecological-data-foundation/scripts/download_from_iucn.R +273 -0
- package/skills/ecological-data-foundation/scripts/download_from_iucn.py +344 -0
- package/skills/ecological-data-foundation/scripts/download_from_obis.R +248 -0
- package/skills/ecological-data-foundation/scripts/download_from_obis.py +318 -0
- package/skills/ecological-impact-assessment/SKILL.md +123 -0
- package/skills/ecological-impact-assessment/examples/example-prompts.md +32 -0
- package/skills/ecological-impact-assessment/resources/baci-design-guide.md +55 -0
- package/skills/ecological-impact-assessment/resources/fragmentation-metrics-reference.md +86 -0
- package/skills/ecological-impact-assessment/resources/pressure-index-template.md +78 -0
- package/skills/ecological-impact-assessment/resources/study-design-guide.md +168 -0
- package/skills/ecological-impact-assessment/scripts/baci_analysis.R +161 -0
- package/skills/ecological-impact-assessment/scripts/fragmentation_analysis.py +141 -0
- package/skills/ecological-impact-assessment/scripts/power_analysis_baci.R +274 -0
- package/skills/ecosystem-services-assessment/SKILL.md +125 -0
- package/skills/ecosystem-services-assessment/examples/example-prompts.md +24 -0
- package/skills/ecosystem-services-assessment/resources/es-indicator-reference.md +45 -0
- package/skills/ecosystem-services-assessment/resources/invest-parameter-guide.md +86 -0
- package/skills/ecosystem-services-assessment/resources/rusle-coefficients.md +88 -0
- package/skills/ecosystem-services-assessment/scripts/__pycache__/compute_es.cpython-311.pyc +0 -0
- package/skills/ecosystem-services-assessment/scripts/compute_es.py +189 -0
- package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.R +161 -0
- package/skills/environmental-time-series/SKILL.md +125 -0
- package/skills/environmental-time-series/examples/example-prompts.md +33 -0
- package/skills/environmental-time-series/resources/anomaly-indices-reference.md +88 -0
- package/skills/environmental-time-series/resources/bfast-parameter-guide.md +69 -0
- package/skills/environmental-time-series/scripts/__pycache__/recovery_trajectory.cpython-311.pyc +0 -0
- package/skills/environmental-time-series/scripts/__pycache__/trend_analysis.cpython-311.pyc +0 -0
- package/skills/environmental-time-series/scripts/recovery_trajectory.R +305 -0
- package/skills/environmental-time-series/scripts/recovery_trajectory.py +178 -0
- package/skills/environmental-time-series/scripts/trend_analysis.R +192 -0
- package/skills/environmental-time-series/scripts/trend_analysis.py +184 -0
- package/skills/geoprocessing-for-ecology/SKILL.md +123 -0
- package/skills/geoprocessing-for-ecology/examples/example-prompts.md +32 -0
- package/skills/geoprocessing-for-ecology/resources/crs-reference.md +62 -0
- package/skills/geoprocessing-for-ecology/resources/global-predictor-sources.md +331 -0
- package/skills/geoprocessing-for-ecology/resources/resampling-methods.md +57 -0
- package/skills/geoprocessing-for-ecology/scripts/__pycache__/download_predictors.cpython-311.pyc +0 -0
- package/skills/geoprocessing-for-ecology/scripts/download_predictors.R +239 -0
- package/skills/geoprocessing-for-ecology/scripts/download_predictors.py +379 -0
- package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.R +224 -0
- package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.py +172 -0
- package/skills/landscape-connectivity/SKILL.md +170 -0
- package/skills/landscape-connectivity/examples/example-prompts.md +96 -0
- package/skills/landscape-connectivity/examples/jaguar_mesoamerica_corridor_example.md +271 -0
- package/skills/landscape-connectivity/resources/circuitscape-parameter-guide.md +155 -0
- package/skills/landscape-connectivity/resources/graph-theory-for-ecology.md +134 -0
- package/skills/landscape-connectivity/resources/resistance-surface-guide.md +141 -0
- package/skills/landscape-connectivity/scripts/connectivity_analysis.py +387 -0
- package/skills/landscape-connectivity/scripts/connectivity_metrics.R +274 -0
- package/skills/landscape-connectivity/scripts/resistance_surface.R +239 -0
- package/skills/model-validation-and-uncertainty/SKILL.md +131 -0
- package/skills/model-validation-and-uncertainty/examples/example-prompts.md +30 -0
- package/skills/model-validation-and-uncertainty/resources/extrapolation-risk-guide.md +236 -0
- package/skills/model-validation-and-uncertainty/resources/metric-selection-guide.md +52 -0
- package/skills/model-validation-and-uncertainty/resources/threshold-selection-guide.md +64 -0
- package/skills/model-validation-and-uncertainty/scripts/__pycache__/validate_model.cpython-311.pyc +0 -0
- package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.R +315 -0
- package/skills/model-validation-and-uncertainty/scripts/validate_model.py +226 -0
- package/skills/model-validation-and-uncertainty/scripts/validate_sdm.R +162 -0
- package/skills/occupancy-and-detection/SKILL.md +126 -0
- package/skills/occupancy-and-detection/examples/example-prompts.md +33 -0
- package/skills/occupancy-and-detection/resources/detection-history-format.md +100 -0
- package/skills/occupancy-and-detection/resources/occupancy-study-design.md +47 -0
- package/skills/occupancy-and-detection/scripts/__pycache__/occupancy_analysis.cpython-311.pyc +0 -0
- package/skills/occupancy-and-detection/scripts/occupancy_analysis.R +160 -0
- package/skills/occupancy-and-detection/scripts/occupancy_analysis.py +159 -0
- package/skills/population-viability-analysis/SKILL.md +161 -0
- package/skills/population-viability-analysis/examples/african_elephant_pva_example.md +266 -0
- package/skills/population-viability-analysis/examples/example-prompts.md +95 -0
- package/skills/population-viability-analysis/resources/extinction-risk-thresholds.md +128 -0
- package/skills/population-viability-analysis/resources/matrix-model-guide.md +139 -0
- package/skills/population-viability-analysis/resources/sensitivity-elasticity-reference.md +182 -0
- package/skills/population-viability-analysis/scripts/matrix_pva.R +258 -0
- package/skills/population-viability-analysis/scripts/pva_analysis.py +442 -0
- package/skills/population-viability-analysis/scripts/stochastic_pva.R +353 -0
- package/skills/predictive-modeling-best-practices/SKILL.md +136 -0
- package/skills/predictive-modeling-best-practices/examples/example-prompts.md +58 -0
- package/skills/predictive-modeling-best-practices/resources/collinearity-decision-tree.md +65 -0
- package/skills/predictive-modeling-best-practices/resources/sampling-bias-correction.md +267 -0
- package/skills/predictive-modeling-best-practices/resources/spatial-cv-guide.md +73 -0
- package/skills/predictive-modeling-best-practices/scripts/__pycache__/spatial_cv.cpython-311.pyc +0 -0
- package/skills/predictive-modeling-best-practices/scripts/collinearity_check.R +112 -0
- package/skills/predictive-modeling-best-practices/scripts/spatial_cv.py +182 -0
- package/skills/reproducible-ecology-pipeline/SKILL.md +139 -0
- package/skills/reproducible-ecology-pipeline/examples/example-prompts.md +35 -0
- package/skills/reproducible-ecology-pipeline/resources/directory-structure-template.md +94 -0
- package/skills/reproducible-ecology-pipeline/resources/params-yaml-template.yaml +84 -0
- package/skills/reproducible-ecology-pipeline/resources/reproducibility-checklist-template.md +66 -0
- package/skills/reproducible-ecology-pipeline/scripts/generate_file_manifest.py +110 -0
- package/skills/reproducible-ecology-pipeline/scripts/init_project.sh +53 -0
- package/skills/spatial-prioritization/SKILL.md +162 -0
- package/skills/spatial-prioritization/examples/biodiversity_hotspot_prioritization_example.md +289 -0
- package/skills/spatial-prioritization/examples/example-prompts.md +93 -0
- package/skills/spatial-prioritization/resources/cost-surface-reference.md +130 -0
- package/skills/spatial-prioritization/resources/marxan-vs-prioritizr-comparison.md +125 -0
- package/skills/spatial-prioritization/resources/prioritizr-formulation-guide.md +188 -0
- package/skills/spatial-prioritization/resources/representation-targets-guide.md +186 -0
- package/skills/spatial-prioritization/scripts/prioritization_sensitivity.R +320 -0
- package/skills/spatial-prioritization/scripts/run_prioritization.R +336 -0
- package/skills/species-distribution-modeling/SKILL.md +139 -0
- package/skills/species-distribution-modeling/examples/example-prompts.md +36 -0
- package/skills/species-distribution-modeling/resources/algorithm-comparison.md +25 -0
- package/skills/species-distribution-modeling/resources/calibration-area-guide.md +71 -0
- package/skills/species-distribution-modeling/resources/climate-scenario-preparation.md +170 -0
- package/skills/species-distribution-modeling/resources/maxent-calibration-guide.md +211 -0
- package/skills/species-distribution-modeling/resources/sdm-checklist.md +37 -0
- package/skills/species-distribution-modeling/scripts/predict_distribution.R +236 -0
- package/skills/species-distribution-modeling/scripts/predict_distribution.py +286 -0
- package/skills/species-distribution-modeling/scripts/prepare_future_layers.R +351 -0
- package/skills/species-distribution-modeling/scripts/project_scenarios.R +220 -0
- package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +99 -0
- package/skills/species-distribution-modeling/scripts/sdm_pipeline.py +318 -0
- package/skills/species-distribution-modeling/scripts/tune_maxnet.R +344 -0
- package/templates/SKILL_TEMPLATE.md +225 -0
- package/templates/checklists/data-submission-checklist.md +38 -0
- package/templates/checklists/post-analysis-checklist.md +55 -0
- package/templates/checklists/pre-analysis-checklist.md +31 -0
- package/templates/prompts/debug-skill.md +47 -0
- package/templates/prompts/invoke-skill.md +34 -0
- package/templates/prompts/invoke-workflow.md +45 -0
- package/templates/reports/technical-report-template.md +80 -0
- package/templates/scripts/logger_setup.R +79 -0
- package/templates/scripts/logger_setup.py +119 -0
- package/templates/scripts/params_loader.R +28 -0
- package/templates/scripts/params_loader.py +38 -0
- package/workflows/analyze-community-structure/WORKFLOW.md +72 -0
- package/workflows/analyze-environmental-change/WORKFLOW.md +73 -0
- package/workflows/assess-ecological-impact/WORKFLOW.md +75 -0
- package/workflows/assess-ecosystem-services/WORKFLOW.md +68 -0
- package/workflows/assess-landscape-connectivity/WORKFLOW.md +84 -0
- package/workflows/build-fire-risk-map/WORKFLOW.md +79 -0
- package/workflows/produce-technical-report/WORKFLOW.md +113 -0
- package/workflows/run-camera-trap-occupancy/WORKFLOW.md +87 -0
- package/workflows/run-conservation-prioritization/WORKFLOW.md +89 -0
- package/workflows/run-multispecies-screening/WORKFLOW.md +197 -0
- package/workflows/run-occupancy-analysis/WORKFLOW.md +74 -0
- package/workflows/run-population-viability/WORKFLOW.md +90 -0
- package/workflows/run-sdm-study/WORKFLOW.md +99 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
|
|
2
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
3
|
+
|
|
4
|
+
"""Download occurrence records from GBIF via pygbif.
|
|
5
|
+
|
|
6
|
+
Usage: python download_from_gbif.py <species_name_or_list_csv> <output_dir> [country_code] [year_from] [year_to]
|
|
7
|
+
|
|
8
|
+
Arguments:
|
|
9
|
+
species_name_or_list_csv : Species name (e.g., "Panthera onca") or path to CSV
|
|
10
|
+
with column 'scientificName'
|
|
11
|
+
output_dir : Directory for outputs (created if absent)
|
|
12
|
+
country_code : ISO 3166-1 alpha-2 code to restrict records (optional)
|
|
13
|
+
year_from : Minimum occurrence year (default: 1950)
|
|
14
|
+
year_to : Maximum occurrence year (default: current year)
|
|
15
|
+
|
|
16
|
+
Outputs (per species):
|
|
17
|
+
occurrences_raw_GBIF_{species}_{date}.csv — occurrence records
|
|
18
|
+
download_metadata_{species}.txt — download info including GBIF DOI
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import logging
|
|
22
|
+
import sys
|
|
23
|
+
from datetime import datetime
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
SKILL_NAME = "ecological-data-foundation"
|
|
27
|
+
_LOG_DIR = Path("logs")
|
|
28
|
+
_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
_log_file = _LOG_DIR / f"skill_{SKILL_NAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
|
|
30
|
+
logging.basicConfig(
|
|
31
|
+
level=logging.INFO,
|
|
32
|
+
format="[%(asctime)s] [%(levelname)s] [" + SKILL_NAME + "] %(message)s",
|
|
33
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
34
|
+
handlers=[
|
|
35
|
+
logging.StreamHandler(sys.stdout),
|
|
36
|
+
logging.FileHandler(_log_file, encoding="utf-8"),
|
|
37
|
+
],
|
|
38
|
+
)
|
|
39
|
+
logger = logging.getLogger(SKILL_NAME)
|
|
40
|
+
|
|
41
|
+
def log_step(n: int, desc: str) -> None:
|
|
42
|
+
logger.info("-- STEP %d: %s", n, desc)
|
|
43
|
+
|
|
44
|
+
def log_decision(var: str, val, why: str) -> None:
|
|
45
|
+
logger.info("DECISION | %s = %s | %s", var, val, why)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
import os
|
|
49
|
+
import time
|
|
50
|
+
import csv
|
|
51
|
+
from datetime import date
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
import pandas as pd
|
|
55
|
+
import pygbif.occurrences as occ
|
|
56
|
+
import pygbif.species as spp
|
|
57
|
+
except ImportError as e:
|
|
58
|
+
logger.error(
|
|
59
|
+
"Dependencia ausente: %s\n Instale com: pip install pygbif pandas\n Skill anterior: ecological-data-foundation",
|
|
60
|
+
e,
|
|
61
|
+
)
|
|
62
|
+
sys.exit(1)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# ── Constants ─────────────────────────────────────────────────────────────────
|
|
66
|
+
BASIS_OF_RECORD = [
|
|
67
|
+
"HUMAN_OBSERVATION",
|
|
68
|
+
"MACHINE_OBSERVATION",
|
|
69
|
+
"PRESERVED_SPECIMEN",
|
|
70
|
+
]
|
|
71
|
+
COORD_UNCERTAINTY_MAX = 10000 # metres
|
|
72
|
+
POLL_INTERVAL = 30 # seconds between status checks for async downloads
|
|
73
|
+
SEARCH_LIMIT = 100000 # max records via occ.search (GBIF API cap)
|
|
74
|
+
LARGE_DATASET_THRESHOLD = 50000
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# ── Helper functions ──────────────────────────────────────────────────────────
|
|
78
|
+
|
|
79
|
+
def get_taxon_key(species_name: str) -> int | None:
|
|
80
|
+
"""Look up GBIF backbone taxon key for a species name."""
|
|
81
|
+
try:
|
|
82
|
+
result = spp.name_backbone(name=species_name, rank="SPECIES")
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.error(
|
|
85
|
+
"Falha ao buscar taxon key no backbone GBIF para '%s': %s\n Causa provavel: sem conexao com a internet ou API do GBIF indisponivel.\n Skill anterior: ecological-data-foundation",
|
|
86
|
+
species_name, e,
|
|
87
|
+
)
|
|
88
|
+
raise
|
|
89
|
+
key = result.get("usageKey")
|
|
90
|
+
if key is None:
|
|
91
|
+
logger.warning("Nenhum taxon key GBIF encontrado para '%s'", species_name)
|
|
92
|
+
return key
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def count_records(taxon_key: int) -> int:
|
|
96
|
+
"""Estimate number of records for a taxon (unfiltered, approximate)."""
|
|
97
|
+
try:
|
|
98
|
+
return occ.count(taxonKey=taxon_key, hasCoordinate=True,
|
|
99
|
+
occurrenceStatus="PRESENT")
|
|
100
|
+
except Exception as e:
|
|
101
|
+
logger.warning(
|
|
102
|
+
"Falha ao consultar contagem de registros para taxon_key=%d: %s. Assumindo dataset pequeno.",
|
|
103
|
+
taxon_key, e,
|
|
104
|
+
)
|
|
105
|
+
return 0
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def search_download(taxon_key: int, country_code: str | None,
|
|
109
|
+
year_from: int, year_to: int) -> tuple[list[dict], str]:
|
|
110
|
+
"""Download via occ.search (< LARGE_DATASET_THRESHOLD records). No DOI."""
|
|
111
|
+
filters = dict(
|
|
112
|
+
taxonKey=taxon_key,
|
|
113
|
+
hasCoordinate=True,
|
|
114
|
+
occurrenceStatus="PRESENT",
|
|
115
|
+
basisOfRecord=",".join(BASIS_OF_RECORD),
|
|
116
|
+
coordinateUncertaintyInMeters=f"0,{COORD_UNCERTAINTY_MAX}",
|
|
117
|
+
year=f"{year_from},{year_to}",
|
|
118
|
+
limit=SEARCH_LIMIT,
|
|
119
|
+
fields="minimal",
|
|
120
|
+
)
|
|
121
|
+
if country_code:
|
|
122
|
+
filters["country"] = country_code
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
result = occ.search(**filters)
|
|
126
|
+
except Exception as e:
|
|
127
|
+
logger.error(
|
|
128
|
+
"Falha em occ.search (taxon_key=%d): %s\n Causa provavel: sem conexao com a internet ou API do GBIF indisponivel.\n Skill anterior: ecological-data-foundation",
|
|
129
|
+
taxon_key, e,
|
|
130
|
+
)
|
|
131
|
+
raise
|
|
132
|
+
records = result.get("results", [])
|
|
133
|
+
return records, None # None = no DOI available
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def async_download(taxon_key: int, country_code: str | None,
|
|
137
|
+
year_from: int, year_to: int) -> tuple[list[dict], str]:
|
|
138
|
+
"""Download via occ.download (reproducible, DOI generated). For large datasets."""
|
|
139
|
+
predicates = [
|
|
140
|
+
f"taxonKey = {taxon_key}",
|
|
141
|
+
"hasCoordinate = TRUE",
|
|
142
|
+
"occurrenceStatus = PRESENT",
|
|
143
|
+
f"basisOfRecord in {','.join(BASIS_OF_RECORD)}",
|
|
144
|
+
f"coordinateUncertaintyInMeters <= {COORD_UNCERTAINTY_MAX}",
|
|
145
|
+
f"year >= {year_from}",
|
|
146
|
+
f"year <= {year_to}",
|
|
147
|
+
]
|
|
148
|
+
if country_code:
|
|
149
|
+
predicates.append(f"country = {country_code}")
|
|
150
|
+
|
|
151
|
+
logger.info("Iniciando download assincrono (DOI sera gerado)...")
|
|
152
|
+
try:
|
|
153
|
+
dl_result = occ.download(predicates)
|
|
154
|
+
except Exception as e:
|
|
155
|
+
logger.error(
|
|
156
|
+
"Falha ao iniciar occ.download (taxon_key=%d): %s\n Causa provavel: credenciais GBIF ausentes ou invalidas (GBIF_USER, GBIF_PWD, GBIF_EMAIL).\n Configure via: export GBIF_USER=... (Linux/Mac) ou setx GBIF_USER ... (Windows).\n Skill anterior: ecological-data-foundation",
|
|
157
|
+
taxon_key, e,
|
|
158
|
+
)
|
|
159
|
+
raise
|
|
160
|
+
|
|
161
|
+
dl_key = dl_result[0]
|
|
162
|
+
logger.info("Download key: %s", dl_key)
|
|
163
|
+
logger.info("Aguardando GBIF preparar o download...")
|
|
164
|
+
|
|
165
|
+
# Poll until complete
|
|
166
|
+
while True:
|
|
167
|
+
try:
|
|
168
|
+
meta = occ.download_meta(dl_key)
|
|
169
|
+
except Exception as e:
|
|
170
|
+
logger.error(
|
|
171
|
+
"Falha ao consultar status do download '%s': %s\n Causa provavel: sem conexao com a internet.\n Skill anterior: ecological-data-foundation",
|
|
172
|
+
dl_key, e,
|
|
173
|
+
)
|
|
174
|
+
raise
|
|
175
|
+
status = meta.get("status", "UNKNOWN")
|
|
176
|
+
logger.info("Status do download: %s", status)
|
|
177
|
+
if status == "SUCCEEDED":
|
|
178
|
+
break
|
|
179
|
+
elif status in ("FAILED", "KILLED", "CANCELLED"):
|
|
180
|
+
logger.error(
|
|
181
|
+
"Download GBIF falhou com status '%s' (key=%s)\n Causa provavel: predicados invalidos ou erro interno do GBIF.\n Verifique em: https://www.gbif.org/user/download\n Skill anterior: ecological-data-foundation",
|
|
182
|
+
status, dl_key,
|
|
183
|
+
)
|
|
184
|
+
raise RuntimeError(f"GBIF download failed with status: {status}")
|
|
185
|
+
time.sleep(POLL_INTERVAL)
|
|
186
|
+
|
|
187
|
+
doi = meta.get("doi", "")
|
|
188
|
+
logger.info("DOI gerado: %s", doi)
|
|
189
|
+
|
|
190
|
+
# Get download URL and fetch via pandas
|
|
191
|
+
download_url = meta.get("downloadLink", "")
|
|
192
|
+
logger.info("Buscando dados de: %s", download_url)
|
|
193
|
+
try:
|
|
194
|
+
df = pd.read_csv(download_url, sep="\t", on_bad_lines="skip", low_memory=False)
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.error(
|
|
197
|
+
"Falha ao importar dados do download GBIF (url=%s): %s\n Causa provavel: arquivo corrompido ou link expirado.\n Skill anterior: ecological-data-foundation",
|
|
198
|
+
download_url, e,
|
|
199
|
+
)
|
|
200
|
+
raise
|
|
201
|
+
records = df.to_dict("records")
|
|
202
|
+
return records, doi
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def save_metadata(output_dir: Path, species_name: str, taxon_key: int,
|
|
206
|
+
dl_key: str | None, doi: str | None,
|
|
207
|
+
n_records: int, country_code: str | None,
|
|
208
|
+
year_from: int, year_to: int) -> None:
|
|
209
|
+
"""Write download_metadata.txt with citation information."""
|
|
210
|
+
safe_name = species_name.replace(" ", "_")
|
|
211
|
+
meta_path = output_dir / f"download_metadata_{safe_name}.txt"
|
|
212
|
+
|
|
213
|
+
today = date.today().isoformat()
|
|
214
|
+
year = date.today().year
|
|
215
|
+
|
|
216
|
+
if doi:
|
|
217
|
+
citation = (f"GBIF.org ({year}) GBIF Occurrence Download. "
|
|
218
|
+
f"https://doi.org/{doi} Accessed on {today}")
|
|
219
|
+
else:
|
|
220
|
+
citation = ("occ.search used — no citable DOI. "
|
|
221
|
+
"Re-run with async download for publication.")
|
|
222
|
+
|
|
223
|
+
lines = [
|
|
224
|
+
f"Species: {species_name}",
|
|
225
|
+
f"GBIF taxon key: {taxon_key}",
|
|
226
|
+
f"Download key: {dl_key or 'N/A (occ.search used)'}",
|
|
227
|
+
f"DOI: {doi or 'NOT AVAILABLE'}",
|
|
228
|
+
f"Citation: {citation}",
|
|
229
|
+
f"Download date: {today}",
|
|
230
|
+
f"n_records: {n_records}",
|
|
231
|
+
f"year_from: {year_from}",
|
|
232
|
+
f"year_to: {year_to}",
|
|
233
|
+
f"country_filter: {country_code or 'none'}",
|
|
234
|
+
f"basisOfRecord: {', '.join(BASIS_OF_RECORD)}",
|
|
235
|
+
f"coordinateUncertainty_max_m: {COORD_UNCERTAINTY_MAX}",
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
meta_path.write_text("\n".join(lines))
|
|
240
|
+
logger.info("Metadados gravados: %s", meta_path)
|
|
241
|
+
except OSError as e:
|
|
242
|
+
logger.error(
|
|
243
|
+
"Falha ao gravar metadados em '%s': %s\n Causa provavel: sem permissao de escrita no diretorio.\n Skill anterior: ecological-data-foundation",
|
|
244
|
+
meta_path, e,
|
|
245
|
+
)
|
|
246
|
+
raise
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
# ── Main download logic (single species) ──────────────────────────────────────
|
|
250
|
+
|
|
251
|
+
def download_species(species_name: str, output_dir: Path,
|
|
252
|
+
country_code: str | None,
|
|
253
|
+
year_from: int, year_to: int) -> None:
|
|
254
|
+
logger.info("--- Iniciando download: %s ---", species_name)
|
|
255
|
+
today_str = date.today().strftime("%Y%m%d")
|
|
256
|
+
safe_name = species_name.replace(" ", "_")
|
|
257
|
+
|
|
258
|
+
# Lookup taxon key
|
|
259
|
+
log_step(1, f"Buscar taxon key GBIF para '{species_name}'")
|
|
260
|
+
taxon_key = get_taxon_key(species_name)
|
|
261
|
+
if taxon_key is None:
|
|
262
|
+
logger.warning("Pulando '%s' — nenhum taxon key GBIF encontrado.", species_name)
|
|
263
|
+
return
|
|
264
|
+
|
|
265
|
+
logger.info("Taxon key GBIF: %d", taxon_key)
|
|
266
|
+
|
|
267
|
+
# Estimate record count to decide download method
|
|
268
|
+
log_step(2, "Estimar contagem de registros para escolha do metodo de download")
|
|
269
|
+
approx_n = count_records(taxon_key)
|
|
270
|
+
logger.info("Contagem aproximada de registros (sem filtros): %d", approx_n)
|
|
271
|
+
|
|
272
|
+
if approx_n > LARGE_DATASET_THRESHOLD:
|
|
273
|
+
log_decision(
|
|
274
|
+
"download_method", "async_download",
|
|
275
|
+
f"dataset grande ({approx_n} registros) -> download assincrono com DOI para reprodutibilidade",
|
|
276
|
+
)
|
|
277
|
+
log_step(3, "Executar download assincrono (occ.download) com DOI")
|
|
278
|
+
records, doi = async_download(taxon_key, country_code, year_from, year_to)
|
|
279
|
+
dl_key = "see metadata"
|
|
280
|
+
else:
|
|
281
|
+
log_decision(
|
|
282
|
+
"download_method", "search_download",
|
|
283
|
+
f"dataset pequeno ({approx_n} registros) -> occ.search e mais rapido; sem DOI",
|
|
284
|
+
)
|
|
285
|
+
logger.warning("occ.search nao gera DOI. Para publicacoes, use download assincrono.")
|
|
286
|
+
log_step(3, "Executar download via occ.search (dataset pequeno)")
|
|
287
|
+
records, doi = search_download(taxon_key, country_code, year_from, year_to)
|
|
288
|
+
dl_key = None
|
|
289
|
+
|
|
290
|
+
n_records = len(records)
|
|
291
|
+
logger.info("Registros recuperados: %d", n_records)
|
|
292
|
+
|
|
293
|
+
if n_records < 30:
|
|
294
|
+
logger.warning(
|
|
295
|
+
"Registros insuficientes para SDM confiavel (n = %d). Considere: (1) relaxar filtros, (2) ampliar escopo geografico, (3) usar outras bases de dados (VertNet, iNaturalist).",
|
|
296
|
+
n_records,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# Save occurrence CSV
|
|
300
|
+
log_step(4, "Gravar CSV de ocorrencias")
|
|
301
|
+
csv_path = output_dir / f"occurrences_raw_GBIF_{safe_name}_{today_str}.csv"
|
|
302
|
+
if records:
|
|
303
|
+
try:
|
|
304
|
+
df = pd.DataFrame(records)
|
|
305
|
+
df.to_csv(csv_path, index=False)
|
|
306
|
+
logger.info("Gravado: %s", csv_path)
|
|
307
|
+
except OSError as e:
|
|
308
|
+
logger.error(
|
|
309
|
+
"Falha ao gravar CSV de ocorrencias para '%s': %s\n Causa provavel: sem permissao de escrita em '%s'.\n Skill anterior: ecological-data-foundation",
|
|
310
|
+
species_name, e, output_dir,
|
|
311
|
+
)
|
|
312
|
+
raise
|
|
313
|
+
else:
|
|
314
|
+
logger.warning("Nenhum registro para gravar para '%s'.", species_name)
|
|
315
|
+
|
|
316
|
+
# Save metadata
|
|
317
|
+
log_step(5, "Gravar metadados do download")
|
|
318
|
+
save_metadata(output_dir, species_name, taxon_key, dl_key, doi,
|
|
319
|
+
n_records, country_code, year_from, year_to)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
# ── Entry point ───────────────────────────────────────────────────────────────
|
|
323
|
+
|
|
324
|
+
def main():
|
|
325
|
+
logger.info("Script: download_from_gbif.py | Skill: %s", SKILL_NAME)
|
|
326
|
+
|
|
327
|
+
argv = sys.argv[1:]
|
|
328
|
+
|
|
329
|
+
if len(argv) < 2:
|
|
330
|
+
species_input = "Panthera onca"
|
|
331
|
+
output_dir = Path("output/gbif")
|
|
332
|
+
country_code = None
|
|
333
|
+
year_from = 1950
|
|
334
|
+
year_to = date.today().year
|
|
335
|
+
logger.warning("Menos de 2 argumentos fornecidos. Usando valores padrao para teste.")
|
|
336
|
+
else:
|
|
337
|
+
species_input = argv[0]
|
|
338
|
+
output_dir = Path(argv[1])
|
|
339
|
+
country_code = argv[2] if len(argv) >= 3 and argv[2] else None
|
|
340
|
+
year_from = int(argv[3]) if len(argv) >= 4 else 1950
|
|
341
|
+
year_to = int(argv[4]) if len(argv) >= 5 else date.today().year
|
|
342
|
+
|
|
343
|
+
logger.info("Species input : %s", species_input)
|
|
344
|
+
logger.info("Output dir : %s", output_dir)
|
|
345
|
+
logger.info("Country code : %s", country_code or "nenhum")
|
|
346
|
+
logger.info("Year range : %d - %d", year_from, year_to)
|
|
347
|
+
|
|
348
|
+
log_decision("year_from", year_from, "limite inferior do periodo; 1950 = pos-era moderna")
|
|
349
|
+
log_decision("year_to", year_to, "limite superior do periodo; ano corrente por padrao")
|
|
350
|
+
log_decision(
|
|
351
|
+
"coord_uncertainty_max_m", COORD_UNCERTAINTY_MAX,
|
|
352
|
+
"excluir registros com incerteza > 10 km (imprecisao inaceitavel para SDM)",
|
|
353
|
+
)
|
|
354
|
+
log_decision(
|
|
355
|
+
"basis_of_record",
|
|
356
|
+
BASIS_OF_RECORD,
|
|
357
|
+
"apenas observacoes de campo/especimes; exclui literatura e fosseis",
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
361
|
+
logger.info("Diretorio de saida pronto: %s", output_dir)
|
|
362
|
+
|
|
363
|
+
# Build species list
|
|
364
|
+
log_step(0, "Construir lista de especies")
|
|
365
|
+
if species_input.endswith(".csv") and Path(species_input).exists():
|
|
366
|
+
try:
|
|
367
|
+
df_species = pd.read_csv(species_input)
|
|
368
|
+
if "scientificName" not in df_species.columns:
|
|
369
|
+
logger.error(
|
|
370
|
+
"Coluna 'scientificName' nao encontrada em: %s\n Causa provavel: CSV de lista de especies mal formatado.\n Skill anterior: ecological-data-foundation",
|
|
371
|
+
species_input,
|
|
372
|
+
)
|
|
373
|
+
sys.exit(1)
|
|
374
|
+
species_list = df_species["scientificName"].dropna().unique().tolist()
|
|
375
|
+
logger.info("Modo batch: %d especies carregadas de %s", len(species_list), species_input)
|
|
376
|
+
log_decision("mode", "batch", "argumento e um CSV valido com coluna scientificName")
|
|
377
|
+
except Exception as e:
|
|
378
|
+
logger.error(
|
|
379
|
+
"Falha ao ler lista de especies '%s': %s\n Causa provavel: arquivo CSV invalido.\n Skill anterior: ecological-data-foundation",
|
|
380
|
+
species_input, e,
|
|
381
|
+
)
|
|
382
|
+
sys.exit(1)
|
|
383
|
+
else:
|
|
384
|
+
species_list = [species_input.strip()]
|
|
385
|
+
logger.info("Modo especie unica: %s", species_list[0])
|
|
386
|
+
log_decision("mode", "single_species", "argumento nao e um arquivo CSV existente")
|
|
387
|
+
|
|
388
|
+
# Download each species
|
|
389
|
+
for sp in species_list:
|
|
390
|
+
try:
|
|
391
|
+
download_species(sp, output_dir, country_code, year_from, year_to)
|
|
392
|
+
except FileNotFoundError as e:
|
|
393
|
+
logger.error(
|
|
394
|
+
"Arquivo de entrada nao encontrado ao processar '%s': %s\n Esperado como saida de: ecological-data-foundation\n Verifique se o passo anterior foi concluido.",
|
|
395
|
+
sp, e,
|
|
396
|
+
)
|
|
397
|
+
except Exception as e:
|
|
398
|
+
logger.error(
|
|
399
|
+
"Falha ao baixar '%s': %s\n Causa provavel: problema de rede, taxon nao encontrado ou credenciais GBIF invalidas.\n Skill anterior: ecological-data-foundation",
|
|
400
|
+
sp, e,
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
logger.info("Todos os downloads concluidos. Verifique: %s", output_dir)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
if __name__ == "__main__":
|
|
407
|
+
main()
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
|
|
2
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
3
|
+
|
|
4
|
+
# Usage: Rscript download_from_inat.R <species_name_or_list_csv> <output_dir> [year_from] [year_to] [quality_grade]
|
|
5
|
+
|
|
6
|
+
# ── Inline logger ─────────────────────────────────────────────────────────────
|
|
7
|
+
SKILL_NAME <- "ecological-data-foundation"
|
|
8
|
+
.log_ts <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
|
|
9
|
+
log_info <- function(...) message(.log_ts(), " [INFO] ", sprintf(...))
|
|
10
|
+
log_warn <- function(...) message(.log_ts(), " [WARN] ", sprintf(...))
|
|
11
|
+
log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
|
|
12
|
+
log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
|
|
13
|
+
log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
|
|
14
|
+
dir.create("logs", recursive=TRUE, showWarnings=FALSE)
|
|
15
|
+
|
|
16
|
+
#
|
|
17
|
+
# Arguments:
|
|
18
|
+
# species_name_or_list_csv : Species name (e.g., "Panthera onca") or path to a
|
|
19
|
+
# CSV with column "scientificName"
|
|
20
|
+
# output_dir : Directory to write outputs (created if absent)
|
|
21
|
+
# year_from : Minimum year of observation (optional, default: 2000)
|
|
22
|
+
# year_to : Maximum year of observation (optional, default: current year)
|
|
23
|
+
# quality_grade : iNaturalist quality grade: "research" or "any" (default: "research")
|
|
24
|
+
#
|
|
25
|
+
# Outputs (per species):
|
|
26
|
+
# occurrences_raw_iNat_{species}_{date}.csv — standardised occurrence records
|
|
27
|
+
# download_metadata_iNat_{species}.txt — download provenance and citation info
|
|
28
|
+
#
|
|
29
|
+
# Standard output schema:
|
|
30
|
+
# species, decimalLatitude, decimalLongitude, eventDate, countryCode,
|
|
31
|
+
# basisOfRecord, coordinateUncertaintyInMeters, datasetName, occurrenceID,
|
|
32
|
+
# source, download_doi
|
|
33
|
+
|
|
34
|
+
suppressPackageStartupMessages(library(rinat))
|
|
35
|
+
suppressPackageStartupMessages(library(dplyr))
|
|
36
|
+
suppressPackageStartupMessages(library(readr))
|
|
37
|
+
|
|
38
|
+
# ── 1. Parse arguments ───────────────────────────────────────────────────────
|
|
39
|
+
log_step(1, "Analisar argumentos da linha de comando")
|
|
40
|
+
args <- commandArgs(trailingOnly = TRUE)
|
|
41
|
+
|
|
42
|
+
if (length(args) < 2) {
|
|
43
|
+
species_input <- "Panthera onca"
|
|
44
|
+
output_dir <- "output/inat"
|
|
45
|
+
year_from <- 2000
|
|
46
|
+
year_to <- as.integer(format(Sys.Date(), "%Y"))
|
|
47
|
+
quality_grade <- "research"
|
|
48
|
+
log_warn("Menos de 2 argumentos fornecidos. Usando valores padrao para teste.")
|
|
49
|
+
} else {
|
|
50
|
+
species_input <- args[1]
|
|
51
|
+
output_dir <- args[2]
|
|
52
|
+
year_from <- if (length(args) >= 3) as.integer(args[3]) else 2000
|
|
53
|
+
year_to <- if (length(args) >= 4) as.integer(args[4]) else as.integer(format(Sys.Date(), "%Y"))
|
|
54
|
+
quality_grade <- if (length(args) >= 5) args[5] else "research"
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
log_info("Script: download_from_inat.R | Skill: %s", SKILL_NAME)
|
|
58
|
+
log_info("Species input : %s", species_input)
|
|
59
|
+
log_info("Output dir : %s", output_dir)
|
|
60
|
+
log_info("Year range : %d - %d", year_from, year_to)
|
|
61
|
+
log_info("Quality grade : %s", quality_grade)
|
|
62
|
+
|
|
63
|
+
log_decision("quality_grade", quality_grade,
|
|
64
|
+
"research = comunidade validou ID + possui coordenadas; recomendado para SDM")
|
|
65
|
+
log_decision("year_from", year_from, "filtro temporal; 2000 equilibra tamanho de dataset e qualidade")
|
|
66
|
+
log_decision("captive", "FALSE",
|
|
67
|
+
"excluir organismos em cativeiro/cultivados (nao representam distribuicao selvagem)")
|
|
68
|
+
|
|
69
|
+
# ── 2. Create output directory ───────────────────────────────────────────────
|
|
70
|
+
log_step(2, "Criar diretorio de saida")
|
|
71
|
+
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
|
72
|
+
log_info("Diretorio de saida pronto: %s", output_dir)
|
|
73
|
+
|
|
74
|
+
# ── 3. Build species list ────────────────────────────────────────────────────
|
|
75
|
+
log_step(3, "Construir lista de especies")
|
|
76
|
+
if (grepl("\\.csv$", species_input, ignore.case = TRUE) && file.exists(species_input)) {
|
|
77
|
+
tryCatch({
|
|
78
|
+
species_df <- read_csv(species_input, show_col_types = FALSE)
|
|
79
|
+
if (!"scientificName" %in% names(species_df)) {
|
|
80
|
+
log_error(
|
|
81
|
+
"Coluna 'scientificName' nao encontrada em: %s\nCausa provavel: CSV mal formatado.\nVerifique o cabecalho do arquivo.\nSkill anterior: ecological-data-foundation",
|
|
82
|
+
species_input
|
|
83
|
+
)
|
|
84
|
+
stop("Missing column 'scientificName'")
|
|
85
|
+
}
|
|
86
|
+
species_list <- unique(trimws(species_df$scientificName))
|
|
87
|
+
log_info("Modo batch: %d especies carregadas de %s", length(species_list), species_input)
|
|
88
|
+
log_decision("mode", "batch", "argumento e um CSV valido com coluna scientificName")
|
|
89
|
+
}, error = function(e) {
|
|
90
|
+
log_error(
|
|
91
|
+
"Falha ao ler lista de especies: %s\nCausa provavel: arquivo CSV invalido.\nVerifique: %s\nSkill anterior: ecological-data-foundation",
|
|
92
|
+
conditionMessage(e), species_input
|
|
93
|
+
)
|
|
94
|
+
stop(e)
|
|
95
|
+
})
|
|
96
|
+
} else {
|
|
97
|
+
species_list <- trimws(species_input)
|
|
98
|
+
log_info("Modo especie unica: %s", species_list)
|
|
99
|
+
log_decision("mode", "single_species", "argumento nao e arquivo CSV")
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
# ── 4. Download function ─────────────────────────────────────────────────────
|
|
103
|
+
download_inat_species <- function(sp_name) {
|
|
104
|
+
log_info("--- Iniciando download iNaturalist: %s ---", sp_name)
|
|
105
|
+
today_str <- format(Sys.Date(), "%Y%m%d")
|
|
106
|
+
safe_name <- gsub(" ", "_", sp_name)
|
|
107
|
+
|
|
108
|
+
# Build date filters
|
|
109
|
+
d_from <- paste0(year_from, "-01-01")
|
|
110
|
+
d_to <- paste0(year_to, "-12-31")
|
|
111
|
+
|
|
112
|
+
occ_raw <- tryCatch({
|
|
113
|
+
rinat::get_inat_obs(
|
|
114
|
+
taxon_name = sp_name,
|
|
115
|
+
quality = quality_grade,
|
|
116
|
+
geo = TRUE,
|
|
117
|
+
captive = FALSE,
|
|
118
|
+
year = NULL, # date range used instead via d1/d2 in extra_params
|
|
119
|
+
maxresults = 10000,
|
|
120
|
+
meta = FALSE
|
|
121
|
+
)
|
|
122
|
+
}, error = function(e) {
|
|
123
|
+
log_error(
|
|
124
|
+
"Falha em get_inat_obs para '%s': %s\nCausa provavel: sem conexao com a internet ou API iNaturalist indisponivel.\nVerifique sua conexao e tente novamente.\nSkill anterior: ecological-data-foundation",
|
|
125
|
+
sp_name, conditionMessage(e)
|
|
126
|
+
)
|
|
127
|
+
stop(e)
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
if (is.null(occ_raw) || nrow(occ_raw) == 0) {
|
|
131
|
+
log_warn("Nenhum registro encontrado para '%s' no iNaturalist.", sp_name)
|
|
132
|
+
return(invisible(NULL))
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
n_raw <- nrow(occ_raw)
|
|
136
|
+
log_info("Registros brutos recuperados: %d", n_raw)
|
|
137
|
+
|
|
138
|
+
# ── Filter by year range ───────────────────────────────────────────────────
|
|
139
|
+
if ("observed_on" %in% names(occ_raw)) {
|
|
140
|
+
occ_raw$obs_year <- as.integer(substr(occ_raw$observed_on, 1, 4))
|
|
141
|
+
occ_raw <- occ_raw[!is.na(occ_raw$obs_year) &
|
|
142
|
+
occ_raw$obs_year >= year_from &
|
|
143
|
+
occ_raw$obs_year <= year_to, ]
|
|
144
|
+
log_info("Registros apos filtro de ano (%d-%d): %d", year_from, year_to, nrow(occ_raw))
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
# ── Standardise to output schema ──────────────────────────────────────────
|
|
148
|
+
std <- data.frame(
|
|
149
|
+
species = sp_name,
|
|
150
|
+
decimalLatitude = as.numeric(occ_raw$latitude),
|
|
151
|
+
decimalLongitude = as.numeric(occ_raw$longitude),
|
|
152
|
+
eventDate = as.character(occ_raw$observed_on),
|
|
153
|
+
countryCode = as.character(occ_raw$place_guess), # iNat has no ISO code
|
|
154
|
+
basisOfRecord = "HUMAN_OBSERVATION",
|
|
155
|
+
coordinateUncertaintyInMeters = as.numeric(occ_raw$positional_accuracy),
|
|
156
|
+
datasetName = "iNaturalist",
|
|
157
|
+
occurrenceID = as.character(occ_raw$id),
|
|
158
|
+
source = "iNaturalist",
|
|
159
|
+
download_doi = NA_character_,
|
|
160
|
+
stringsAsFactors = FALSE
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Remove records with missing coordinates
|
|
164
|
+
n_before <- nrow(std)
|
|
165
|
+
std <- std[!is.na(std$decimalLatitude) & !is.na(std$decimalLongitude), ]
|
|
166
|
+
n_removed <- n_before - nrow(std)
|
|
167
|
+
if (n_removed > 0) {
|
|
168
|
+
log_warn("%d registros removidos por coordenadas ausentes.", n_removed)
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
n_final <- nrow(std)
|
|
172
|
+
log_info("Registros com coordenadas validas: %d", n_final)
|
|
173
|
+
|
|
174
|
+
if (n_final < 30) {
|
|
175
|
+
log_warn(
|
|
176
|
+
"Registros insuficientes para SDM confiavel (n = %d). Considere: (1) ampliar periodo, (2) usar quality='any', (3) combinar com outras fontes.",
|
|
177
|
+
n_final
|
|
178
|
+
)
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
# ── Save CSV ───────────────────────────────────────────────────────────────
|
|
182
|
+
csv_path <- file.path(output_dir, paste0("occurrences_raw_iNat_", safe_name, "_", today_str, ".csv"))
|
|
183
|
+
tryCatch({
|
|
184
|
+
write_csv(std, csv_path)
|
|
185
|
+
log_info("Gravado: %s (%d registros)", csv_path, n_final)
|
|
186
|
+
}, error = function(e) {
|
|
187
|
+
log_error(
|
|
188
|
+
"Falha ao gravar CSV para '%s': %s\nCausa provavel: sem permissao de escrita em '%s'.\nSkill anterior: ecological-data-foundation",
|
|
189
|
+
sp_name, conditionMessage(e), output_dir
|
|
190
|
+
)
|
|
191
|
+
stop(e)
|
|
192
|
+
})
|
|
193
|
+
|
|
194
|
+
# ── Save metadata ──────────────────────────────────────────────────────────
|
|
195
|
+
meta_lines <- c(
|
|
196
|
+
paste("Species:", sp_name),
|
|
197
|
+
paste("Source: iNaturalist (https://www.inaturalist.org)"),
|
|
198
|
+
paste("Quality grade:", quality_grade),
|
|
199
|
+
paste("Year range:", year_from, "-", year_to),
|
|
200
|
+
paste("Captive excluded: TRUE"),
|
|
201
|
+
paste("Geo-referenced only: TRUE"),
|
|
202
|
+
paste("n_records:", n_final),
|
|
203
|
+
paste("Download date:", Sys.Date()),
|
|
204
|
+
paste("Citation: iNaturalist contributors and the California Academy of Sciences (", format(Sys.Date(), "%Y"),
|
|
205
|
+
"). iNaturalist Research-grade Observations. iNaturalist.org. Accessed ", Sys.Date(), ".", sep = ""),
|
|
206
|
+
paste("License: CC BY-NC (individual records may vary; see iNaturalist for details)"),
|
|
207
|
+
paste("Note: iNaturalist does not issue download DOIs; record the access date for reproducibility.")
|
|
208
|
+
)
|
|
209
|
+
meta_path <- file.path(output_dir, paste0("download_metadata_iNat_", safe_name, ".txt"))
|
|
210
|
+
tryCatch({
|
|
211
|
+
writeLines(meta_lines, meta_path)
|
|
212
|
+
log_info("Gravado: %s", meta_path)
|
|
213
|
+
}, error = function(e) {
|
|
214
|
+
log_error(
|
|
215
|
+
"Falha ao gravar metadados para '%s': %s\nCausa provavel: sem permissao de escrita.\nSkill anterior: ecological-data-foundation",
|
|
216
|
+
sp_name, conditionMessage(e)
|
|
217
|
+
)
|
|
218
|
+
stop(e)
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
return(invisible(csv_path))
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
# ── 5. Run for all species ───────────────────────────────────────────────────
|
|
225
|
+
log_step(4, "Executar download iNaturalist para todas as especies")
|
|
226
|
+
for (sp in species_list) {
|
|
227
|
+
tryCatch(
|
|
228
|
+
download_inat_species(sp),
|
|
229
|
+
error = function(e) {
|
|
230
|
+
log_error(
|
|
231
|
+
"Falha ao baixar '%s' do iNaturalist: %s\nCausa provavel: problema de rede ou especie nao encontrada.\nVerifique os logs acima.\nSkill anterior: ecological-data-foundation",
|
|
232
|
+
sp, conditionMessage(e)
|
|
233
|
+
)
|
|
234
|
+
}
|
|
235
|
+
)
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
log_info("Todos os downloads iNaturalist concluidos. Verifique: %s", output_dir)
|