ecological-agent-skills 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. package/AGENT_CONTEXT.md +191 -0
  2. package/CATALOG.md +329 -0
  3. package/LICENSE +692 -0
  4. package/README.md +347 -0
  5. package/bin/install.mjs +168 -0
  6. package/docs/comparison-with-alternatives.md +38 -0
  7. package/docs/global-examples-index.md +103 -0
  8. package/docs/repository-statistics.md +101 -0
  9. package/docs/theoretical-foundations.md +188 -0
  10. package/environment.yaml +106 -0
  11. package/examples/community/arctic_tundra_vegetation_example.md +247 -0
  12. package/examples/community/bird_landuse_example.md +63 -0
  13. package/examples/community/phytoplankton_reservoir_example.md +60 -0
  14. package/examples/community/reef_fish_indopacific_example.md +221 -0
  15. package/examples/impact/baci_road_example.md +57 -0
  16. package/examples/impact/ecosystem_services_atlantic_forest.md +83 -0
  17. package/examples/impact/forest_loss_borneo_timeseries_example.md +225 -0
  18. package/examples/occupancy/puma_camera_example.md +61 -0
  19. package/examples/occupancy/snow_leopard_himalayas_example.md +204 -0
  20. package/examples/reproducible/whittaker_biome_sdm_example.md +406 -0
  21. package/examples/sdm/anteater_cerrado_example.md +69 -0
  22. package/examples/sdm/jaguar_amazon_example.md +80 -0
  23. package/examples/sdm/koala_climate_change_example.md +170 -0
  24. package/examples/sdm/wolf_recolonization_europe_example.md +193 -0
  25. package/package.json +43 -0
  26. package/renv.lock +194 -0
  27. package/skills/SKILL_INDEX.json +1020 -0
  28. package/skills/acoustic-monitoring/SKILL.md +163 -0
  29. package/skills/acoustic-monitoring/examples/example-prompts.md +100 -0
  30. package/skills/acoustic-monitoring/examples/temperate_forest_birds_example.md +285 -0
  31. package/skills/acoustic-monitoring/resources/acoustic-indices-reference.md +93 -0
  32. package/skills/acoustic-monitoring/resources/soundscape-ecology-guide.md +90 -0
  33. package/skills/acoustic-monitoring/resources/species-id-tools-comparison.md +89 -0
  34. package/skills/acoustic-monitoring/scripts/batch_species_detection.py +360 -0
  35. package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.R +235 -0
  36. package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.py +374 -0
  37. package/skills/biostatistics-workbench/SKILL.md +140 -0
  38. package/skills/biostatistics-workbench/examples/example-prompts.md +39 -0
  39. package/skills/biostatistics-workbench/resources/effect-size-reference.md +81 -0
  40. package/skills/biostatistics-workbench/resources/glm-family-link-reference.md +47 -0
  41. package/skills/biostatistics-workbench/resources/test-selection-guide.md +93 -0
  42. package/skills/biostatistics-workbench/scripts/glm_pipeline.R +78 -0
  43. package/skills/biostatistics-workbench/scripts/glm_pipeline.py +210 -0
  44. package/skills/camera-trap-processing/SKILL.md +159 -0
  45. package/skills/camera-trap-processing/examples/example-prompts.md +103 -0
  46. package/skills/camera-trap-processing/examples/leopard_serengeti_example.md +231 -0
  47. package/skills/camera-trap-processing/resources/activity-patterns-reference.md +113 -0
  48. package/skills/camera-trap-processing/resources/camtrapR-workflow-guide.md +130 -0
  49. package/skills/camera-trap-processing/resources/detection-event-definition-guide.md +89 -0
  50. package/skills/camera-trap-processing/scripts/estimate_activity.R +169 -0
  51. package/skills/camera-trap-processing/scripts/process_camtrap_data.R +179 -0
  52. package/skills/camera-trap-processing/scripts/process_camtrap_data.py +192 -0
  53. package/skills/community-ecology-ordination/SKILL.md +133 -0
  54. package/skills/community-ecology-ordination/examples/example-prompts.md +35 -0
  55. package/skills/community-ecology-ordination/resources/dissimilarity-metric-guide.md +53 -0
  56. package/skills/community-ecology-ordination/resources/nmds-interpretation-guide.md +104 -0
  57. package/skills/community-ecology-ordination/scripts/__pycache__/community_analysis.cpython-311.pyc +0 -0
  58. package/skills/community-ecology-ordination/scripts/community_analysis.R +143 -0
  59. package/skills/community-ecology-ordination/scripts/community_analysis.py +231 -0
  60. package/skills/ecological-data-foundation/SKILL.md +129 -0
  61. package/skills/ecological-data-foundation/examples/example-prompts.md +40 -0
  62. package/skills/ecological-data-foundation/resources/coordinate-cleaning-flags.md +66 -0
  63. package/skills/ecological-data-foundation/resources/darwin-core-glossary.md +91 -0
  64. package/skills/ecological-data-foundation/resources/data-citation-guide.md +265 -0
  65. package/skills/ecological-data-foundation/resources/gbif-data-citation-guide.md +193 -0
  66. package/skills/ecological-data-foundation/resources/qa-checklist.md +83 -0
  67. package/skills/ecological-data-foundation/scripts/__pycache__/clean_occurrences.cpython-311.pyc +0 -0
  68. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_ebird.cpython-311.pyc +0 -0
  69. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_inat.cpython-311.pyc +0 -0
  70. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_iucn.cpython-311.pyc +0 -0
  71. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_obis.cpython-311.pyc +0 -0
  72. package/skills/ecological-data-foundation/scripts/clean_occurrences.R +230 -0
  73. package/skills/ecological-data-foundation/scripts/clean_occurrences.py +268 -0
  74. package/skills/ecological-data-foundation/scripts/download_from_ebird.R +251 -0
  75. package/skills/ecological-data-foundation/scripts/download_from_ebird.py +364 -0
  76. package/skills/ecological-data-foundation/scripts/download_from_gbif.R +315 -0
  77. package/skills/ecological-data-foundation/scripts/download_from_gbif.py +407 -0
  78. package/skills/ecological-data-foundation/scripts/download_from_inat.R +238 -0
  79. package/skills/ecological-data-foundation/scripts/download_from_inat.py +304 -0
  80. package/skills/ecological-data-foundation/scripts/download_from_iucn.R +273 -0
  81. package/skills/ecological-data-foundation/scripts/download_from_iucn.py +344 -0
  82. package/skills/ecological-data-foundation/scripts/download_from_obis.R +248 -0
  83. package/skills/ecological-data-foundation/scripts/download_from_obis.py +318 -0
  84. package/skills/ecological-impact-assessment/SKILL.md +123 -0
  85. package/skills/ecological-impact-assessment/examples/example-prompts.md +32 -0
  86. package/skills/ecological-impact-assessment/resources/baci-design-guide.md +55 -0
  87. package/skills/ecological-impact-assessment/resources/fragmentation-metrics-reference.md +86 -0
  88. package/skills/ecological-impact-assessment/resources/pressure-index-template.md +78 -0
  89. package/skills/ecological-impact-assessment/resources/study-design-guide.md +168 -0
  90. package/skills/ecological-impact-assessment/scripts/baci_analysis.R +161 -0
  91. package/skills/ecological-impact-assessment/scripts/fragmentation_analysis.py +141 -0
  92. package/skills/ecological-impact-assessment/scripts/power_analysis_baci.R +274 -0
  93. package/skills/ecosystem-services-assessment/SKILL.md +125 -0
  94. package/skills/ecosystem-services-assessment/examples/example-prompts.md +24 -0
  95. package/skills/ecosystem-services-assessment/resources/es-indicator-reference.md +45 -0
  96. package/skills/ecosystem-services-assessment/resources/invest-parameter-guide.md +86 -0
  97. package/skills/ecosystem-services-assessment/resources/rusle-coefficients.md +88 -0
  98. package/skills/ecosystem-services-assessment/scripts/__pycache__/compute_es.cpython-311.pyc +0 -0
  99. package/skills/ecosystem-services-assessment/scripts/compute_es.py +189 -0
  100. package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.R +161 -0
  101. package/skills/environmental-time-series/SKILL.md +125 -0
  102. package/skills/environmental-time-series/examples/example-prompts.md +33 -0
  103. package/skills/environmental-time-series/resources/anomaly-indices-reference.md +88 -0
  104. package/skills/environmental-time-series/resources/bfast-parameter-guide.md +69 -0
  105. package/skills/environmental-time-series/scripts/__pycache__/recovery_trajectory.cpython-311.pyc +0 -0
  106. package/skills/environmental-time-series/scripts/__pycache__/trend_analysis.cpython-311.pyc +0 -0
  107. package/skills/environmental-time-series/scripts/recovery_trajectory.R +305 -0
  108. package/skills/environmental-time-series/scripts/recovery_trajectory.py +178 -0
  109. package/skills/environmental-time-series/scripts/trend_analysis.R +192 -0
  110. package/skills/environmental-time-series/scripts/trend_analysis.py +184 -0
  111. package/skills/geoprocessing-for-ecology/SKILL.md +123 -0
  112. package/skills/geoprocessing-for-ecology/examples/example-prompts.md +32 -0
  113. package/skills/geoprocessing-for-ecology/resources/crs-reference.md +62 -0
  114. package/skills/geoprocessing-for-ecology/resources/global-predictor-sources.md +331 -0
  115. package/skills/geoprocessing-for-ecology/resources/resampling-methods.md +57 -0
  116. package/skills/geoprocessing-for-ecology/scripts/__pycache__/download_predictors.cpython-311.pyc +0 -0
  117. package/skills/geoprocessing-for-ecology/scripts/download_predictors.R +239 -0
  118. package/skills/geoprocessing-for-ecology/scripts/download_predictors.py +379 -0
  119. package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.R +224 -0
  120. package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.py +172 -0
  121. package/skills/landscape-connectivity/SKILL.md +170 -0
  122. package/skills/landscape-connectivity/examples/example-prompts.md +96 -0
  123. package/skills/landscape-connectivity/examples/jaguar_mesoamerica_corridor_example.md +271 -0
  124. package/skills/landscape-connectivity/resources/circuitscape-parameter-guide.md +155 -0
  125. package/skills/landscape-connectivity/resources/graph-theory-for-ecology.md +134 -0
  126. package/skills/landscape-connectivity/resources/resistance-surface-guide.md +141 -0
  127. package/skills/landscape-connectivity/scripts/connectivity_analysis.py +387 -0
  128. package/skills/landscape-connectivity/scripts/connectivity_metrics.R +274 -0
  129. package/skills/landscape-connectivity/scripts/resistance_surface.R +239 -0
  130. package/skills/model-validation-and-uncertainty/SKILL.md +131 -0
  131. package/skills/model-validation-and-uncertainty/examples/example-prompts.md +30 -0
  132. package/skills/model-validation-and-uncertainty/resources/extrapolation-risk-guide.md +236 -0
  133. package/skills/model-validation-and-uncertainty/resources/metric-selection-guide.md +52 -0
  134. package/skills/model-validation-and-uncertainty/resources/threshold-selection-guide.md +64 -0
  135. package/skills/model-validation-and-uncertainty/scripts/__pycache__/validate_model.cpython-311.pyc +0 -0
  136. package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.R +315 -0
  137. package/skills/model-validation-and-uncertainty/scripts/validate_model.py +226 -0
  138. package/skills/model-validation-and-uncertainty/scripts/validate_sdm.R +162 -0
  139. package/skills/occupancy-and-detection/SKILL.md +126 -0
  140. package/skills/occupancy-and-detection/examples/example-prompts.md +33 -0
  141. package/skills/occupancy-and-detection/resources/detection-history-format.md +100 -0
  142. package/skills/occupancy-and-detection/resources/occupancy-study-design.md +47 -0
  143. package/skills/occupancy-and-detection/scripts/__pycache__/occupancy_analysis.cpython-311.pyc +0 -0
  144. package/skills/occupancy-and-detection/scripts/occupancy_analysis.R +160 -0
  145. package/skills/occupancy-and-detection/scripts/occupancy_analysis.py +159 -0
  146. package/skills/population-viability-analysis/SKILL.md +161 -0
  147. package/skills/population-viability-analysis/examples/african_elephant_pva_example.md +266 -0
  148. package/skills/population-viability-analysis/examples/example-prompts.md +95 -0
  149. package/skills/population-viability-analysis/resources/extinction-risk-thresholds.md +128 -0
  150. package/skills/population-viability-analysis/resources/matrix-model-guide.md +139 -0
  151. package/skills/population-viability-analysis/resources/sensitivity-elasticity-reference.md +182 -0
  152. package/skills/population-viability-analysis/scripts/matrix_pva.R +258 -0
  153. package/skills/population-viability-analysis/scripts/pva_analysis.py +442 -0
  154. package/skills/population-viability-analysis/scripts/stochastic_pva.R +353 -0
  155. package/skills/predictive-modeling-best-practices/SKILL.md +136 -0
  156. package/skills/predictive-modeling-best-practices/examples/example-prompts.md +58 -0
  157. package/skills/predictive-modeling-best-practices/resources/collinearity-decision-tree.md +65 -0
  158. package/skills/predictive-modeling-best-practices/resources/sampling-bias-correction.md +267 -0
  159. package/skills/predictive-modeling-best-practices/resources/spatial-cv-guide.md +73 -0
  160. package/skills/predictive-modeling-best-practices/scripts/__pycache__/spatial_cv.cpython-311.pyc +0 -0
  161. package/skills/predictive-modeling-best-practices/scripts/collinearity_check.R +112 -0
  162. package/skills/predictive-modeling-best-practices/scripts/spatial_cv.py +182 -0
  163. package/skills/reproducible-ecology-pipeline/SKILL.md +139 -0
  164. package/skills/reproducible-ecology-pipeline/examples/example-prompts.md +35 -0
  165. package/skills/reproducible-ecology-pipeline/resources/directory-structure-template.md +94 -0
  166. package/skills/reproducible-ecology-pipeline/resources/params-yaml-template.yaml +84 -0
  167. package/skills/reproducible-ecology-pipeline/resources/reproducibility-checklist-template.md +66 -0
  168. package/skills/reproducible-ecology-pipeline/scripts/generate_file_manifest.py +110 -0
  169. package/skills/reproducible-ecology-pipeline/scripts/init_project.sh +53 -0
  170. package/skills/spatial-prioritization/SKILL.md +162 -0
  171. package/skills/spatial-prioritization/examples/biodiversity_hotspot_prioritization_example.md +289 -0
  172. package/skills/spatial-prioritization/examples/example-prompts.md +93 -0
  173. package/skills/spatial-prioritization/resources/cost-surface-reference.md +130 -0
  174. package/skills/spatial-prioritization/resources/marxan-vs-prioritizr-comparison.md +125 -0
  175. package/skills/spatial-prioritization/resources/prioritizr-formulation-guide.md +188 -0
  176. package/skills/spatial-prioritization/resources/representation-targets-guide.md +186 -0
  177. package/skills/spatial-prioritization/scripts/prioritization_sensitivity.R +320 -0
  178. package/skills/spatial-prioritization/scripts/run_prioritization.R +336 -0
  179. package/skills/species-distribution-modeling/SKILL.md +139 -0
  180. package/skills/species-distribution-modeling/examples/example-prompts.md +36 -0
  181. package/skills/species-distribution-modeling/resources/algorithm-comparison.md +25 -0
  182. package/skills/species-distribution-modeling/resources/calibration-area-guide.md +71 -0
  183. package/skills/species-distribution-modeling/resources/climate-scenario-preparation.md +170 -0
  184. package/skills/species-distribution-modeling/resources/maxent-calibration-guide.md +211 -0
  185. package/skills/species-distribution-modeling/resources/sdm-checklist.md +37 -0
  186. package/skills/species-distribution-modeling/scripts/predict_distribution.R +236 -0
  187. package/skills/species-distribution-modeling/scripts/predict_distribution.py +286 -0
  188. package/skills/species-distribution-modeling/scripts/prepare_future_layers.R +351 -0
  189. package/skills/species-distribution-modeling/scripts/project_scenarios.R +220 -0
  190. package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +99 -0
  191. package/skills/species-distribution-modeling/scripts/sdm_pipeline.py +318 -0
  192. package/skills/species-distribution-modeling/scripts/tune_maxnet.R +344 -0
  193. package/templates/SKILL_TEMPLATE.md +225 -0
  194. package/templates/checklists/data-submission-checklist.md +38 -0
  195. package/templates/checklists/post-analysis-checklist.md +55 -0
  196. package/templates/checklists/pre-analysis-checklist.md +31 -0
  197. package/templates/prompts/debug-skill.md +47 -0
  198. package/templates/prompts/invoke-skill.md +34 -0
  199. package/templates/prompts/invoke-workflow.md +45 -0
  200. package/templates/reports/technical-report-template.md +80 -0
  201. package/templates/scripts/logger_setup.R +79 -0
  202. package/templates/scripts/logger_setup.py +119 -0
  203. package/templates/scripts/params_loader.R +28 -0
  204. package/templates/scripts/params_loader.py +38 -0
  205. package/workflows/analyze-community-structure/WORKFLOW.md +72 -0
  206. package/workflows/analyze-environmental-change/WORKFLOW.md +73 -0
  207. package/workflows/assess-ecological-impact/WORKFLOW.md +75 -0
  208. package/workflows/assess-ecosystem-services/WORKFLOW.md +68 -0
  209. package/workflows/assess-landscape-connectivity/WORKFLOW.md +84 -0
  210. package/workflows/build-fire-risk-map/WORKFLOW.md +79 -0
  211. package/workflows/produce-technical-report/WORKFLOW.md +113 -0
  212. package/workflows/run-camera-trap-occupancy/WORKFLOW.md +87 -0
  213. package/workflows/run-conservation-prioritization/WORKFLOW.md +89 -0
  214. package/workflows/run-multispecies-screening/WORKFLOW.md +197 -0
  215. package/workflows/run-occupancy-analysis/WORKFLOW.md +74 -0
  216. package/workflows/run-population-viability/WORKFLOW.md +90 -0
  217. package/workflows/run-sdm-study/WORKFLOW.md +99 -0
@@ -0,0 +1,364 @@
1
+ # ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
2
+ # SPDX-License-Identifier: GPL-3.0-or-later
3
+
4
+ """Parse eBird Basic Dataset (EBD) for occurrence records.
5
+
6
+ Usage: python download_from_ebird.py <ebd_file> <species_name_or_list_csv> <output_dir> [year_from] [year_to] [country_code]
7
+
8
+ Arguments:
9
+ ebd_file : Path to the eBird Basic Dataset text file (.txt / .gz)
10
+ (pre-downloaded from https://ebird.org/data/download)
11
+ species_name_or_list_csv : Scientific name or path to CSV with column 'scientificName'
12
+ output_dir : Directory for outputs (created if absent)
13
+ year_from : Minimum year of observation (default: 2000)
14
+ year_to : Maximum year of observation (default: current year)
15
+ country_code : ISO 3166-1 alpha-2 country code to filter (optional)
16
+
17
+ Note:
18
+ eBird data requires a pre-downloaded EBD file. Apply for access at:
19
+ https://ebird.org/data/download
20
+
21
+ Outputs (per species):
22
+ occurrences_raw_eBird_{species}_{date}.csv — standardised occurrence records
23
+ download_metadata_eBird_{species}.txt — download provenance and citation
24
+
25
+ Standard output schema:
26
+ species, decimalLatitude, decimalLongitude, eventDate, countryCode,
27
+ basisOfRecord, coordinateUncertaintyInMeters, datasetName, occurrenceID,
28
+ source, download_doi
29
+ Extra eBird columns:
30
+ effort_distance_km, duration_minutes, observer_id
31
+ """
32
+
33
+ import logging
34
+ import sys
35
+ from datetime import datetime
36
+ from pathlib import Path
37
+
38
+ SKILL_NAME = "ecological-data-foundation"
39
+ _LOG_DIR = Path("logs")
40
+ _LOG_DIR.mkdir(parents=True, exist_ok=True)
41
+ _log_file = _LOG_DIR / f"skill_{SKILL_NAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
42
+ logging.basicConfig(
43
+ level=logging.INFO,
44
+ format="[%(asctime)s] [%(levelname)s] [" + SKILL_NAME + "] %(message)s",
45
+ datefmt="%Y-%m-%d %H:%M:%S",
46
+ handlers=[
47
+ logging.StreamHandler(sys.stdout),
48
+ logging.FileHandler(_log_file, encoding="utf-8"),
49
+ ],
50
+ )
51
+ logger = logging.getLogger(SKILL_NAME)
52
+
53
+ def log_step(n: int, desc: str) -> None:
54
+ logger.info("-- STEP %d: %s", n, desc)
55
+
56
+ def log_decision(var: str, val, why: str) -> None:
57
+ logger.info("DECISION | %s = %s | %s", var, val, why)
58
+
59
+
60
+ from datetime import date
61
+
62
+ try:
63
+ import pandas as pd
64
+ except ImportError as e:
65
+ logger.error(
66
+ "Dependencia ausente: %s\n Instale com: pip install pandas\n Skill anterior: ecological-data-foundation",
67
+ e,
68
+ )
69
+ sys.exit(1)
70
+
71
+
72
+ # ── Constants ─────────────────────────────────────────────────────────────────
73
+ VALID_PROTOCOLS = {"Stationary", "Traveling"}
74
+ CHUNK_SIZE = 500_000 # rows per chunk for large EBD files
75
+
76
+ # EBD column name mappings (tab-delimited file)
77
+ EBD_COLS = {
78
+ "SCIENTIFIC NAME": "scientific_name",
79
+ "COMMON NAME": "common_name",
80
+ "LATITUDE": "decimalLatitude",
81
+ "LONGITUDE": "decimalLongitude",
82
+ "OBSERVATION DATE": "eventDate",
83
+ "COUNTRY CODE": "countryCode",
84
+ "SAMPLING EVENT IDENTIFIER": "occurrenceID",
85
+ "PROTOCOL TYPE": "protocol_type",
86
+ "EFFORT DISTANCE KM": "effort_distance_km",
87
+ "DURATION MINUTES": "duration_minutes",
88
+ "OBSERVER ID": "observer_id",
89
+ "APPROVED": "approved",
90
+ "REVIEWED": "reviewed",
91
+ }
92
+
93
+
94
+ # ── Helper functions ──────────────────────────────────────────────────────────
95
+
96
+ def parse_ebd(ebd_path: Path, species_set: set, year_from: int, year_to: int,
97
+ country_code: str | None) -> "pd.DataFrame":
98
+ """Read EBD in chunks and filter for target species, years, protocols."""
99
+ sep = "\t"
100
+ # Detect gzip
101
+ compression = "gzip" if str(ebd_path).endswith(".gz") else None
102
+
103
+ chunks = []
104
+ try:
105
+ reader = pd.read_csv(
106
+ ebd_path,
107
+ sep=sep,
108
+ compression=compression,
109
+ chunksize=CHUNK_SIZE,
110
+ low_memory=False,
111
+ encoding="utf-8",
112
+ on_bad_lines="skip",
113
+ )
114
+ except Exception as e:
115
+ logger.error(
116
+ "Falha ao abrir arquivo EBD '%s': %s\n Causa provavel: arquivo corrompido ou formato incorreto.\n Skill anterior: ecological-data-foundation",
117
+ ebd_path, e,
118
+ )
119
+ raise
120
+
121
+ n_total = 0
122
+ for i, chunk in enumerate(reader):
123
+ n_total += len(chunk)
124
+ # Normalise column names
125
+ chunk.columns = [c.upper() for c in chunk.columns]
126
+
127
+ sci_col = next((c for c in chunk.columns if "SCIENTIFIC" in c), None)
128
+ date_col = next((c for c in chunk.columns if "OBSERVATION DATE" in c), None)
129
+ proto_col = next((c for c in chunk.columns if "PROTOCOL" in c), None)
130
+ approved_col = next((c for c in chunk.columns if "APPROVED" in c), None)
131
+ country_col = next((c for c in chunk.columns if "COUNTRY CODE" in c), None)
132
+
133
+ if sci_col is None:
134
+ logger.warning("Coluna de nome cientifico nao encontrada no chunk %d; pulando.", i)
135
+ continue
136
+
137
+ # Filter species
138
+ mask = chunk[sci_col].isin(species_set)
139
+
140
+ # Filter protocol
141
+ if proto_col:
142
+ mask &= chunk[proto_col].isin(VALID_PROTOCOLS)
143
+
144
+ # Filter approved
145
+ if approved_col:
146
+ mask &= chunk[approved_col].astype(str).str.strip().str.upper().isin({"1", "TRUE", "YES"})
147
+
148
+ # Filter year
149
+ if date_col:
150
+ years = pd.to_numeric(chunk[date_col].str[:4], errors="coerce")
151
+ mask &= (years >= year_from) & (years <= year_to)
152
+
153
+ # Filter country
154
+ if country_col and country_code:
155
+ mask &= chunk[country_col].str.upper() == country_code.upper()
156
+
157
+ filtered = chunk[mask]
158
+ if len(filtered) > 0:
159
+ chunks.append(filtered)
160
+ logger.info("Chunk %d: %d/%d registros selecionados", i, len(filtered), len(chunk))
161
+
162
+ logger.info("Total de linhas lidas no EBD: %d", n_total)
163
+ if not chunks:
164
+ return pd.DataFrame()
165
+ return pd.concat(chunks, ignore_index=True)
166
+
167
+
168
+ def standardise_ebd(df: "pd.DataFrame", species_name: str) -> "pd.DataFrame":
169
+ """Map EBD columns to the standard occurrence schema."""
170
+ # Find column names (case-insensitive search already done above)
171
+ def find_col(keywords: list[str]) -> str | None:
172
+ for col in df.columns:
173
+ if any(kw in col for kw in keywords):
174
+ return col
175
+ return None
176
+
177
+ sci_col = find_col(["SCIENTIFIC"])
178
+ lat_col = find_col(["LATITUDE"])
179
+ lon_col = find_col(["LONGITUDE"])
180
+ date_col = find_col(["OBSERVATION DATE"])
181
+ country_col = find_col(["COUNTRY CODE"])
182
+ id_col = find_col(["SAMPLING EVENT"])
183
+ dist_col = find_col(["EFFORT DISTANCE"])
184
+ dur_col = find_col(["DURATION"])
185
+ obs_col = find_col(["OBSERVER"])
186
+
187
+ rows = df[df[sci_col] == species_name].copy() if sci_col else df.copy()
188
+
189
+ std = pd.DataFrame({
190
+ "species": species_name,
191
+ "decimalLatitude": pd.to_numeric(rows[lat_col], errors="coerce") if lat_col else None,
192
+ "decimalLongitude": pd.to_numeric(rows[lon_col], errors="coerce") if lon_col else None,
193
+ "eventDate": rows[date_col].astype(str) if date_col else None,
194
+ "countryCode": rows[country_col].astype(str) if country_col else None,
195
+ "basisOfRecord": "HUMAN_OBSERVATION",
196
+ "coordinateUncertaintyInMeters": None,
197
+ "datasetName": "eBird Basic Dataset",
198
+ "occurrenceID": rows[id_col].astype(str) if id_col else None,
199
+ "source": "eBird",
200
+ "download_doi": None,
201
+ "effort_distance_km": pd.to_numeric(rows[dist_col], errors="coerce") if dist_col else None,
202
+ "duration_minutes": pd.to_numeric(rows[dur_col], errors="coerce") if dur_col else None,
203
+ "observer_id": rows[obs_col].astype(str) if obs_col else None,
204
+ })
205
+ std = std.dropna(subset=["decimalLatitude", "decimalLongitude"])
206
+ return std.reset_index(drop=True)
207
+
208
+
209
+ def save_metadata(output_dir: Path, species_name: str, n_records: int,
210
+ year_from: int, year_to: int, country_code: str | None,
211
+ ebd_path: Path) -> None:
212
+ safe_name = species_name.replace(" ", "_")
213
+ today = date.today().isoformat()
214
+ year = date.today().year
215
+ lines = [
216
+ f"Species: {species_name}",
217
+ f"Source: eBird Basic Dataset (https://ebird.org/data/download)",
218
+ f"EBD file: {ebd_path}",
219
+ f"Protocols: Stationary, Traveling",
220
+ f"Approved only: True",
221
+ f"Year range: {year_from} - {year_to}",
222
+ f"Country filter: {country_code or 'none'}",
223
+ f"n_records: {n_records}",
224
+ f"Download date: {today}",
225
+ (f"Citation: eBird Basic Dataset. Version: {year}-{date.today().month:02d}. "
226
+ "Cornell Lab of Ornithology, Ithaca, New York."),
227
+ "Note: eBird data requires a signed Data Use Agreement. Cite the dataset version used.",
228
+ ]
229
+ meta_path = output_dir / f"download_metadata_eBird_{safe_name}.txt"
230
+ try:
231
+ meta_path.write_text("\n".join(lines), encoding="utf-8")
232
+ logger.info("Metadados gravados: %s", meta_path)
233
+ except OSError as e:
234
+ logger.error(
235
+ "Falha ao gravar metadados em '%s': %s\n Skill anterior: ecological-data-foundation",
236
+ meta_path, e,
237
+ )
238
+ raise
239
+
240
+
241
+ # ── Entry point ────────────────────────────────────────────────────────────────
242
+
243
+ def main():
244
+ logger.info("Script: download_from_ebird.py | Skill: %s", SKILL_NAME)
245
+
246
+ argv = sys.argv[1:]
247
+
248
+ if len(argv) < 3:
249
+ ebd_file = "data/ebird/ebd_sample.txt"
250
+ species_input = "Jabiru mycteria"
251
+ output_dir = Path("output/ebird")
252
+ year_from = 2000
253
+ year_to = date.today().year
254
+ country_code = None
255
+ logger.warning("Menos de 3 argumentos fornecidos. Usando valores padrao para teste.")
256
+ else:
257
+ ebd_file = argv[0]
258
+ species_input = argv[1]
259
+ output_dir = Path(argv[2])
260
+ year_from = int(argv[3]) if len(argv) >= 4 else 2000
261
+ year_to = int(argv[4]) if len(argv) >= 5 else date.today().year
262
+ country_code = argv[5] if len(argv) >= 6 and argv[5] else None
263
+
264
+ ebd_path = Path(ebd_file)
265
+
266
+ logger.info("EBD file : %s", ebd_path)
267
+ logger.info("Species input : %s", species_input)
268
+ logger.info("Output dir : %s", output_dir)
269
+ logger.info("Year range : %d - %d", year_from, year_to)
270
+ logger.info("Country code : %s", country_code or "nenhum")
271
+
272
+ log_decision("protocol", "Stationary,Traveling",
273
+ "apenas protocolos quantificaveis para modelagem")
274
+ log_decision("approved", True,
275
+ "apenas listas aprovadas pelo eBird")
276
+
277
+ # Check EBD file exists
278
+ log_step(1, "Verificar existencia do arquivo EBD")
279
+ if not ebd_path.exists():
280
+ logger.error(
281
+ "Input nao encontrado: %s\n Causa provavel: arquivo EBD nao baixado.\n Verifique: https://ebird.org/data/download\n Skill anterior: ecological-data-foundation",
282
+ ebd_path,
283
+ )
284
+ sys.exit(1)
285
+
286
+ output_dir.mkdir(parents=True, exist_ok=True)
287
+
288
+ # Build species list
289
+ log_step(2, "Construir lista de especies")
290
+ if species_input.endswith(".csv") and Path(species_input).exists():
291
+ try:
292
+ df_sp = pd.read_csv(species_input)
293
+ if "scientificName" not in df_sp.columns:
294
+ logger.error(
295
+ "Coluna 'scientificName' nao encontrada em: %s\n Skill anterior: ecological-data-foundation",
296
+ species_input,
297
+ )
298
+ sys.exit(1)
299
+ species_list = df_sp["scientificName"].dropna().unique().tolist()
300
+ logger.info("Modo batch: %d especies carregadas", len(species_list))
301
+ except Exception as e:
302
+ logger.error(
303
+ "Falha ao ler lista de especies: %s\n Skill anterior: ecological-data-foundation", e,
304
+ )
305
+ sys.exit(1)
306
+ else:
307
+ species_list = [species_input.strip()]
308
+ logger.info("Modo especie unica: %s", species_list[0])
309
+
310
+ species_set = set(species_list)
311
+
312
+ # Parse EBD
313
+ log_step(3, "Analisar arquivo EBD em chunks")
314
+ try:
315
+ ebd_df = parse_ebd(ebd_path, species_set, year_from, year_to, country_code)
316
+ except Exception as e:
317
+ logger.error(
318
+ "Falha ao analisar EBD: %s\n Causa provavel: arquivo corrompido ou incompativel.\n Skill anterior: ecological-data-foundation",
319
+ e,
320
+ )
321
+ sys.exit(1)
322
+
323
+ logger.info("Total de registros filtrados do EBD: %d", len(ebd_df))
324
+
325
+ if len(ebd_df) == 0:
326
+ logger.warning("Nenhum registro encontrado. Verifique nomes das especies e periodo.")
327
+ return
328
+
329
+ today_str = date.today().strftime("%Y%m%d")
330
+
331
+ # Save per species
332
+ log_step(4, "Padronizar e gravar CSVs por especie")
333
+ for sp in species_list:
334
+ try:
335
+ std = standardise_ebd(ebd_df, sp)
336
+ n_sp = len(std)
337
+ logger.info("Especie '%s': %d registros com coordenadas", sp, n_sp)
338
+
339
+ if n_sp == 0:
340
+ logger.warning("Nenhum registro para '%s'.", sp)
341
+ continue
342
+ if n_sp < 30:
343
+ logger.warning(
344
+ "Registros insuficientes para SDM confiavel para '%s' (n = %d).", sp, n_sp,
345
+ )
346
+
347
+ safe_name = sp.replace(" ", "_")
348
+ csv_path = output_dir / f"occurrences_raw_eBird_{safe_name}_{today_str}.csv"
349
+ std.to_csv(csv_path, index=False)
350
+ logger.info("Gravado: %s", csv_path)
351
+
352
+ save_metadata(output_dir, sp, n_sp, year_from, year_to, country_code, ebd_path)
353
+
354
+ except Exception as e:
355
+ logger.error(
356
+ "Falha ao processar especie '%s': %s\n Skill anterior: ecological-data-foundation",
357
+ sp, e,
358
+ )
359
+
360
+ logger.info("Todos os processamentos eBird concluidos. Verifique: %s", output_dir)
361
+
362
+
363
+ if __name__ == "__main__":
364
+ main()
@@ -0,0 +1,315 @@
1
+ # ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
2
+ # SPDX-License-Identifier: GPL-3.0-or-later
3
+
4
+ # Usage: Rscript download_from_gbif.R <species_name_or_list_csv> <output_dir> [country_code] [year_from] [year_to]
5
+
6
+ # ── Inline logger ─────────────────────────────────────────────────────────────
7
+ SKILL_NAME <- "ecological-data-foundation"
8
+ .log_ts <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
9
+ log_info <- function(...) message(.log_ts(), " [INFO] ", sprintf(...))
10
+ log_warn <- function(...) message(.log_ts(), " [WARN] ", sprintf(...))
11
+ log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
12
+ log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
13
+ log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
14
+ dir.create("logs", recursive=TRUE, showWarnings=FALSE)
15
+
16
+ #
17
+ # Arguments:
18
+ # species_name_or_list_csv : Either a species name (e.g., "Panthera onca") or
19
+ # path to a CSV file with column "scientificName"
20
+ # output_dir : Directory to write outputs (created if absent)
21
+ # country_code : ISO 3166-1 alpha-2 country code to restrict records (optional)
22
+ # year_from : Minimum year of occurrence records (optional, default: 1950)
23
+ # year_to : Maximum year of occurrence records (optional, default: current year)
24
+ #
25
+ # Outputs (per species):
26
+ # occurrences_raw_GBIF_{species}_{date}.csv — cleaned occurrence records
27
+ # download_metadata.txt — download info including DOI for citation
28
+
29
+ suppressPackageStartupMessages(library(rgbif))
30
+ suppressPackageStartupMessages(library(dplyr))
31
+ suppressPackageStartupMessages(library(readr))
32
+
33
+ # ── 1. Parse arguments ──────────────────────────────────────────────────────
34
+ log_step(1, "Analisar argumentos da linha de comando")
35
+ args <- commandArgs(trailingOnly = TRUE)
36
+
37
+ if (length(args) < 2) {
38
+ species_input <- "Panthera onca"
39
+ output_dir <- "output/gbif"
40
+ country_code <- NULL
41
+ year_from <- 1950
42
+ year_to <- as.integer(format(Sys.Date(), "%Y"))
43
+ log_warn("Menos de 2 argumentos fornecidos. Usando valores padrao para teste.")
44
+ } else {
45
+ species_input <- args[1]
46
+ output_dir <- args[2]
47
+ country_code <- if (length(args) >= 3 && args[3] != "") args[3] else NULL
48
+ year_from <- if (length(args) >= 4) as.integer(args[4]) else 1950
49
+ year_to <- if (length(args) >= 5) as.integer(args[5]) else as.integer(format(Sys.Date(), "%Y"))
50
+ }
51
+
52
+ log_info("Script: download_from_gbif.R | Skill: %s", SKILL_NAME)
53
+ log_info("Species input : %s", species_input)
54
+ log_info("Output dir : %s", output_dir)
55
+ log_info("Country code : %s", ifelse(is.null(country_code), "nenhum", country_code))
56
+ log_info("Year range : %d - %d", year_from, year_to)
57
+
58
+ log_decision("year_from", year_from, "limite inferior do periodo de registros; 1950 = pos-era moderna")
59
+ log_decision("year_to", year_to, "limite superior do periodo de registros; ano corrente por padrao")
60
+ log_decision(
61
+ "coord_uncertainty_max_m", "10000",
62
+ "excluir registros com incerteza de coordenada > 10 km (imprecisao inaceitavel para SDM)"
63
+ )
64
+
65
+ # ── 2. Create output directory ───────────────────────────────────────────────
66
+ log_step(2, "Criar diretorio de saida")
67
+ dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
68
+ log_info("Diretorio de saida pronto: %s", output_dir)
69
+
70
+ # ── 3. Build species list ────────────────────────────────────────────────────
71
+ log_step(3, "Construir lista de especies")
72
+ # If input is a CSV file, read the scientificName column; otherwise treat as species name
73
+ if (grepl("\\.csv$", species_input, ignore.case = TRUE) && file.exists(species_input)) {
74
+ tryCatch({
75
+ species_df <- read_csv(species_input, show_col_types = FALSE)
76
+ if (!"scientificName" %in% names(species_df)) {
77
+ log_error(
78
+ "Coluna 'scientificName' nao encontrada em: %s\nCausa provavel: CSV de lista de especies mal formatado.\nVerifique o cabecalho do arquivo.\nSkill anterior: ecological-data-foundation",
79
+ species_input
80
+ )
81
+ stop("Missing column 'scientificName' in: ", species_input)
82
+ }
83
+ species_list <- unique(trimws(species_df$scientificName))
84
+ log_info("Modo batch: %d especies carregadas de %s", length(species_list), species_input)
85
+ log_decision("mode", "batch", "argumento e um CSV valido com coluna scientificName")
86
+ }, error = function(e) {
87
+ log_error(
88
+ "Falha ao ler lista de especies: %s\nCausa provavel: arquivo CSV invalido ou ausente.\nVerifique: %s\nSkill anterior: ecological-data-foundation",
89
+ conditionMessage(e), species_input
90
+ )
91
+ stop(e)
92
+ })
93
+ } else {
94
+ species_list <- trimws(species_input)
95
+ log_info("Modo especie unica: %s", species_list)
96
+ log_decision("mode", "single_species", "argumento nao e um arquivo CSV existente")
97
+ }
98
+
99
+ # ── 4. Default filters ───────────────────────────────────────────────────────
100
+ log_step(4, "Definir filtros padrao de download")
101
+ # Applied to all downloads regardless of species:
102
+ # - hasCoordinate=TRUE: only georeferenced records
103
+ # - occurrenceStatus=PRESENT: no absence records
104
+ # - basisOfRecord: only field/specimen observations (no literature, fossils)
105
+ # - coordinateUncertaintyInMeters < 10000: exclude coarse records (> 10 km uncertainty)
106
+
107
+ basis_of_record_values <- c(
108
+ "HUMAN_OBSERVATION",
109
+ "MACHINE_OBSERVATION",
110
+ "PRESERVED_SPECIMEN"
111
+ )
112
+ log_decision(
113
+ "basis_of_record",
114
+ paste(basis_of_record_values, collapse = ","),
115
+ "apenas observacoes de campo/especimes; exclui literatura e fosseis"
116
+ )
117
+
118
+ # ── 5. Download function (single species) ────────────────────────────────────
119
+ download_species <- function(sp_name) {
120
+ log_info("--- Iniciando download: %s ---", sp_name)
121
+ today_str <- format(Sys.Date(), "%Y%m%d")
122
+ safe_name <- gsub(" ", "_", sp_name)
123
+
124
+ # Lookup GBIF taxon key (backbone match)
125
+ taxon_match <- tryCatch(
126
+ name_backbone(name = sp_name, rank = "SPECIES"),
127
+ error = function(e) {
128
+ log_error(
129
+ "Falha ao buscar taxon key no backbone GBIF para '%s': %s\nCausa provavel: sem conexao com a internet ou API do GBIF indisponivel.\nVerifique sua conexao e tente novamente.\nSkill anterior: ecological-data-foundation",
130
+ sp_name, conditionMessage(e)
131
+ )
132
+ stop(e)
133
+ }
134
+ )
135
+
136
+ if (is.null(taxon_match$usageKey)) {
137
+ log_warn("Taxon key GBIF nao encontrado para '%s'. Pulando.", sp_name)
138
+ return(invisible(NULL))
139
+ }
140
+ taxon_key <- taxon_match$usageKey
141
+ log_info("Taxon key GBIF: %d para '%s'", taxon_key, sp_name)
142
+
143
+ # Build predicates for occ_download
144
+ preds <- list(
145
+ pred("taxonKey", taxon_key),
146
+ pred("hasCoordinate", TRUE),
147
+ pred("occurrenceStatus", "PRESENT"),
148
+ pred_in("basisOfRecord", basis_of_record_values),
149
+ pred_lt("coordinateUncertaintyInMeters", 10000),
150
+ pred_gte("year", year_from),
151
+ pred_lte("year", year_to)
152
+ )
153
+ if (!is.null(country_code)) {
154
+ preds <- c(preds, list(pred("country", country_code)))
155
+ log_info("Filtro de pais aplicado: %s", country_code)
156
+ }
157
+
158
+ # Decide between occ_search (quick, no DOI) and occ_download (DOI, reproducible)
159
+ # First, check approximate record count
160
+ count_check <- tryCatch(
161
+ occ_count(
162
+ taxonKey = taxon_key,
163
+ hasCoordinate = TRUE,
164
+ occurrenceStatus = "PRESENT"
165
+ ),
166
+ error = function(e) {
167
+ log_warn("Falha ao consultar contagem de registros para '%s': %s. Assumindo dataset pequeno.", sp_name, conditionMessage(e))
168
+ 0L
169
+ }
170
+ )
171
+ log_info("Contagem aproximada de registros (sem filtros): %d", count_check)
172
+
173
+ if (count_check > 50000) {
174
+ log_decision(
175
+ "download_method", "occ_download",
176
+ sprintf("dataset grande (%d registros) -> download assincrono com DOI para reprodutibilidade", count_check)
177
+ )
178
+ log_info("Usando occ_download (dataset grande; DOI sera gerado)...")
179
+
180
+ dl_key <- tryCatch(
181
+ do.call(occ_download, preds),
182
+ error = function(e) {
183
+ log_error(
184
+ "Falha ao iniciar occ_download para '%s': %s\nCausa provavel: credenciais GBIF ausentes (GBIF_USER, GBIF_PWD, GBIF_EMAIL) ou API indisponivel.\nVerifique: usethis::edit_r_environ() e adicione as variaveis GBIF.\nSkill anterior: ecological-data-foundation",
185
+ sp_name, conditionMessage(e)
186
+ )
187
+ stop(e)
188
+ }
189
+ )
190
+
191
+ log_info("Download iniciado. Aguardando conclusao (verificacao a cada 30s)...")
192
+ occ_download_wait(dl_key, status_ping = 30)
193
+
194
+ # Retrieve DOI from metadata
195
+ meta <- occ_download_meta(dl_key)
196
+ doi <- meta$doi
197
+ log_info("DOI gerado: %s", ifelse(is.null(doi) || is.na(doi), "N/D", doi))
198
+
199
+ # Import data
200
+ occ_raw <- tryCatch({
201
+ occ_download_get(dl_key, path = tempdir()) |>
202
+ occ_download_import()
203
+ }, error = function(e) {
204
+ log_error(
205
+ "Falha ao importar download do GBIF para '%s': %s\nCausa provavel: arquivo de download corrompido ou expirado.\nVerifique o status em: https://www.gbif.org/user/download\nSkill anterior: ecological-data-foundation",
206
+ sp_name, conditionMessage(e)
207
+ )
208
+ stop(e)
209
+ })
210
+
211
+ } else {
212
+ log_decision(
213
+ "download_method", "occ_search",
214
+ sprintf("dataset pequeno (%d registros) -> occ_search e mais rapido; sem DOI", count_check)
215
+ )
216
+ log_info("Usando occ_search (dataset pequeno)...")
217
+ log_warn("occ_search nao gera DOI. Para publicacoes, use occ_download.")
218
+ doi <- NA_character_
219
+ dl_key <- NA_character_
220
+
221
+ occ_raw <- tryCatch({
222
+ occ_search(
223
+ taxonKey = taxon_key,
224
+ hasCoordinate = TRUE,
225
+ occurrenceStatus = "PRESENT",
226
+ basisOfRecord = basis_of_record_values,
227
+ coordinateUncertaintyInMeters = c(0, 10000),
228
+ year = paste(year_from, year_to, sep = ","),
229
+ country = country_code,
230
+ limit = 100000,
231
+ fields = "minimal"
232
+ )$data
233
+ }, error = function(e) {
234
+ log_error(
235
+ "Falha em occ_search para '%s': %s\nCausa provavel: sem conexao com a internet ou API do GBIF indisponivel.\nVerifique sua conexao e tente novamente.\nSkill anterior: ecological-data-foundation",
236
+ sp_name, conditionMessage(e)
237
+ )
238
+ stop(e)
239
+ })
240
+ }
241
+
242
+ n_raw <- nrow(occ_raw)
243
+ log_info("Registros recuperados: %d", n_raw)
244
+
245
+ if (n_raw < 30) {
246
+ log_warn(
247
+ "Registros insuficientes para SDM confiavel (n = %d). Considere: (1) relaxar filtros, (2) ampliar escopo geografico, (3) usar outra base de dados.",
248
+ n_raw
249
+ )
250
+ }
251
+
252
+ # ── Save occurrence CSV ───────────────────────────────────────────────────
253
+ csv_name <- file.path(output_dir,
254
+ paste0("occurrences_raw_GBIF_", safe_name, "_", today_str, ".csv"))
255
+ tryCatch({
256
+ write_csv(occ_raw, csv_name)
257
+ log_info("Gravado: %s", csv_name)
258
+ }, error = function(e) {
259
+ log_error(
260
+ "Falha ao gravar CSV de ocorrencias para '%s': %s\nCausa provavel: sem permissao de escrita em '%s'.\nSkill anterior: ecological-data-foundation",
261
+ sp_name, conditionMessage(e), output_dir
262
+ )
263
+ stop(e)
264
+ })
265
+
266
+ # ── Save metadata (including DOI for citation) ────────────────────────────
267
+ meta_text <- c(
268
+ paste("Species:", sp_name),
269
+ paste("GBIF taxon key:", taxon_key),
270
+ paste("Download key:", dl_key),
271
+ paste("DOI:", ifelse(is.na(doi), "NOT AVAILABLE (used occ_search)", doi)),
272
+ paste("Citation:",
273
+ ifelse(!is.na(doi),
274
+ paste0("GBIF.org (", format(Sys.Date(), "%Y"), ") GBIF Occurrence Download. ",
275
+ "https://doi.org/", doi, " Accessed on ", Sys.Date()),
276
+ "occ_search used — no citable DOI. Re-run with occ_download for publication.")),
277
+ paste("Download date:", Sys.Date()),
278
+ paste("n_records:", n_raw),
279
+ paste("year_from:", year_from),
280
+ paste("year_to:", year_to),
281
+ paste("country_filter:", ifelse(is.null(country_code), "none", country_code)),
282
+ paste("basisOfRecord:", paste(basis_of_record_values, collapse = ", ")),
283
+ paste("coordinateUncertainty_max_m: 10000")
284
+ )
285
+
286
+ meta_path <- file.path(output_dir, paste0("download_metadata_", safe_name, ".txt"))
287
+ tryCatch({
288
+ writeLines(meta_text, meta_path)
289
+ log_info("Gravado: %s", meta_path)
290
+ }, error = function(e) {
291
+ log_error(
292
+ "Falha ao gravar metadados para '%s': %s\nCausa provavel: sem permissao de escrita em '%s'.\nSkill anterior: ecological-data-foundation",
293
+ sp_name, conditionMessage(e), output_dir
294
+ )
295
+ stop(e)
296
+ })
297
+
298
+ return(invisible(csv_name))
299
+ }
300
+
301
+ # ── 6. Run for all species ───────────────────────────────────────────────────
302
+ log_step(5, "Executar download para todas as especies")
303
+ for (sp in species_list) {
304
+ tryCatch(
305
+ download_species(sp),
306
+ error = function(e) {
307
+ log_error(
308
+ "Falha ao baixar '%s': %s\nCausa provavel: problema de rede, taxon nao encontrado ou credenciais GBIF invalidas.\nVerifique os logs acima para detalhes.\nSkill anterior: ecological-data-foundation",
309
+ sp, conditionMessage(e)
310
+ )
311
+ }
312
+ )
313
+ }
314
+
315
+ log_info("Todos os downloads concluidos. Verifique: %s", output_dir)