ecological-agent-skills 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT_CONTEXT.md +191 -0
- package/CATALOG.md +329 -0
- package/LICENSE +692 -0
- package/README.md +347 -0
- package/bin/install.mjs +168 -0
- package/docs/comparison-with-alternatives.md +38 -0
- package/docs/global-examples-index.md +103 -0
- package/docs/repository-statistics.md +101 -0
- package/docs/theoretical-foundations.md +188 -0
- package/environment.yaml +106 -0
- package/examples/community/arctic_tundra_vegetation_example.md +247 -0
- package/examples/community/bird_landuse_example.md +63 -0
- package/examples/community/phytoplankton_reservoir_example.md +60 -0
- package/examples/community/reef_fish_indopacific_example.md +221 -0
- package/examples/impact/baci_road_example.md +57 -0
- package/examples/impact/ecosystem_services_atlantic_forest.md +83 -0
- package/examples/impact/forest_loss_borneo_timeseries_example.md +225 -0
- package/examples/occupancy/puma_camera_example.md +61 -0
- package/examples/occupancy/snow_leopard_himalayas_example.md +204 -0
- package/examples/reproducible/whittaker_biome_sdm_example.md +406 -0
- package/examples/sdm/anteater_cerrado_example.md +69 -0
- package/examples/sdm/jaguar_amazon_example.md +80 -0
- package/examples/sdm/koala_climate_change_example.md +170 -0
- package/examples/sdm/wolf_recolonization_europe_example.md +193 -0
- package/package.json +43 -0
- package/renv.lock +194 -0
- package/skills/SKILL_INDEX.json +1020 -0
- package/skills/acoustic-monitoring/SKILL.md +163 -0
- package/skills/acoustic-monitoring/examples/example-prompts.md +100 -0
- package/skills/acoustic-monitoring/examples/temperate_forest_birds_example.md +285 -0
- package/skills/acoustic-monitoring/resources/acoustic-indices-reference.md +93 -0
- package/skills/acoustic-monitoring/resources/soundscape-ecology-guide.md +90 -0
- package/skills/acoustic-monitoring/resources/species-id-tools-comparison.md +89 -0
- package/skills/acoustic-monitoring/scripts/batch_species_detection.py +360 -0
- package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.R +235 -0
- package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.py +374 -0
- package/skills/biostatistics-workbench/SKILL.md +140 -0
- package/skills/biostatistics-workbench/examples/example-prompts.md +39 -0
- package/skills/biostatistics-workbench/resources/effect-size-reference.md +81 -0
- package/skills/biostatistics-workbench/resources/glm-family-link-reference.md +47 -0
- package/skills/biostatistics-workbench/resources/test-selection-guide.md +93 -0
- package/skills/biostatistics-workbench/scripts/glm_pipeline.R +78 -0
- package/skills/biostatistics-workbench/scripts/glm_pipeline.py +210 -0
- package/skills/camera-trap-processing/SKILL.md +159 -0
- package/skills/camera-trap-processing/examples/example-prompts.md +103 -0
- package/skills/camera-trap-processing/examples/leopard_serengeti_example.md +231 -0
- package/skills/camera-trap-processing/resources/activity-patterns-reference.md +113 -0
- package/skills/camera-trap-processing/resources/camtrapR-workflow-guide.md +130 -0
- package/skills/camera-trap-processing/resources/detection-event-definition-guide.md +89 -0
- package/skills/camera-trap-processing/scripts/estimate_activity.R +169 -0
- package/skills/camera-trap-processing/scripts/process_camtrap_data.R +179 -0
- package/skills/camera-trap-processing/scripts/process_camtrap_data.py +192 -0
- package/skills/community-ecology-ordination/SKILL.md +133 -0
- package/skills/community-ecology-ordination/examples/example-prompts.md +35 -0
- package/skills/community-ecology-ordination/resources/dissimilarity-metric-guide.md +53 -0
- package/skills/community-ecology-ordination/resources/nmds-interpretation-guide.md +104 -0
- package/skills/community-ecology-ordination/scripts/__pycache__/community_analysis.cpython-311.pyc +0 -0
- package/skills/community-ecology-ordination/scripts/community_analysis.R +143 -0
- package/skills/community-ecology-ordination/scripts/community_analysis.py +231 -0
- package/skills/ecological-data-foundation/SKILL.md +129 -0
- package/skills/ecological-data-foundation/examples/example-prompts.md +40 -0
- package/skills/ecological-data-foundation/resources/coordinate-cleaning-flags.md +66 -0
- package/skills/ecological-data-foundation/resources/darwin-core-glossary.md +91 -0
- package/skills/ecological-data-foundation/resources/data-citation-guide.md +265 -0
- package/skills/ecological-data-foundation/resources/gbif-data-citation-guide.md +193 -0
- package/skills/ecological-data-foundation/resources/qa-checklist.md +83 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/clean_occurrences.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_ebird.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_inat.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_iucn.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_obis.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/clean_occurrences.R +230 -0
- package/skills/ecological-data-foundation/scripts/clean_occurrences.py +268 -0
- package/skills/ecological-data-foundation/scripts/download_from_ebird.R +251 -0
- package/skills/ecological-data-foundation/scripts/download_from_ebird.py +364 -0
- package/skills/ecological-data-foundation/scripts/download_from_gbif.R +315 -0
- package/skills/ecological-data-foundation/scripts/download_from_gbif.py +407 -0
- package/skills/ecological-data-foundation/scripts/download_from_inat.R +238 -0
- package/skills/ecological-data-foundation/scripts/download_from_inat.py +304 -0
- package/skills/ecological-data-foundation/scripts/download_from_iucn.R +273 -0
- package/skills/ecological-data-foundation/scripts/download_from_iucn.py +344 -0
- package/skills/ecological-data-foundation/scripts/download_from_obis.R +248 -0
- package/skills/ecological-data-foundation/scripts/download_from_obis.py +318 -0
- package/skills/ecological-impact-assessment/SKILL.md +123 -0
- package/skills/ecological-impact-assessment/examples/example-prompts.md +32 -0
- package/skills/ecological-impact-assessment/resources/baci-design-guide.md +55 -0
- package/skills/ecological-impact-assessment/resources/fragmentation-metrics-reference.md +86 -0
- package/skills/ecological-impact-assessment/resources/pressure-index-template.md +78 -0
- package/skills/ecological-impact-assessment/resources/study-design-guide.md +168 -0
- package/skills/ecological-impact-assessment/scripts/baci_analysis.R +161 -0
- package/skills/ecological-impact-assessment/scripts/fragmentation_analysis.py +141 -0
- package/skills/ecological-impact-assessment/scripts/power_analysis_baci.R +274 -0
- package/skills/ecosystem-services-assessment/SKILL.md +125 -0
- package/skills/ecosystem-services-assessment/examples/example-prompts.md +24 -0
- package/skills/ecosystem-services-assessment/resources/es-indicator-reference.md +45 -0
- package/skills/ecosystem-services-assessment/resources/invest-parameter-guide.md +86 -0
- package/skills/ecosystem-services-assessment/resources/rusle-coefficients.md +88 -0
- package/skills/ecosystem-services-assessment/scripts/__pycache__/compute_es.cpython-311.pyc +0 -0
- package/skills/ecosystem-services-assessment/scripts/compute_es.py +189 -0
- package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.R +161 -0
- package/skills/environmental-time-series/SKILL.md +125 -0
- package/skills/environmental-time-series/examples/example-prompts.md +33 -0
- package/skills/environmental-time-series/resources/anomaly-indices-reference.md +88 -0
- package/skills/environmental-time-series/resources/bfast-parameter-guide.md +69 -0
- package/skills/environmental-time-series/scripts/__pycache__/recovery_trajectory.cpython-311.pyc +0 -0
- package/skills/environmental-time-series/scripts/__pycache__/trend_analysis.cpython-311.pyc +0 -0
- package/skills/environmental-time-series/scripts/recovery_trajectory.R +305 -0
- package/skills/environmental-time-series/scripts/recovery_trajectory.py +178 -0
- package/skills/environmental-time-series/scripts/trend_analysis.R +192 -0
- package/skills/environmental-time-series/scripts/trend_analysis.py +184 -0
- package/skills/geoprocessing-for-ecology/SKILL.md +123 -0
- package/skills/geoprocessing-for-ecology/examples/example-prompts.md +32 -0
- package/skills/geoprocessing-for-ecology/resources/crs-reference.md +62 -0
- package/skills/geoprocessing-for-ecology/resources/global-predictor-sources.md +331 -0
- package/skills/geoprocessing-for-ecology/resources/resampling-methods.md +57 -0
- package/skills/geoprocessing-for-ecology/scripts/__pycache__/download_predictors.cpython-311.pyc +0 -0
- package/skills/geoprocessing-for-ecology/scripts/download_predictors.R +239 -0
- package/skills/geoprocessing-for-ecology/scripts/download_predictors.py +379 -0
- package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.R +224 -0
- package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.py +172 -0
- package/skills/landscape-connectivity/SKILL.md +170 -0
- package/skills/landscape-connectivity/examples/example-prompts.md +96 -0
- package/skills/landscape-connectivity/examples/jaguar_mesoamerica_corridor_example.md +271 -0
- package/skills/landscape-connectivity/resources/circuitscape-parameter-guide.md +155 -0
- package/skills/landscape-connectivity/resources/graph-theory-for-ecology.md +134 -0
- package/skills/landscape-connectivity/resources/resistance-surface-guide.md +141 -0
- package/skills/landscape-connectivity/scripts/connectivity_analysis.py +387 -0
- package/skills/landscape-connectivity/scripts/connectivity_metrics.R +274 -0
- package/skills/landscape-connectivity/scripts/resistance_surface.R +239 -0
- package/skills/model-validation-and-uncertainty/SKILL.md +131 -0
- package/skills/model-validation-and-uncertainty/examples/example-prompts.md +30 -0
- package/skills/model-validation-and-uncertainty/resources/extrapolation-risk-guide.md +236 -0
- package/skills/model-validation-and-uncertainty/resources/metric-selection-guide.md +52 -0
- package/skills/model-validation-and-uncertainty/resources/threshold-selection-guide.md +64 -0
- package/skills/model-validation-and-uncertainty/scripts/__pycache__/validate_model.cpython-311.pyc +0 -0
- package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.R +315 -0
- package/skills/model-validation-and-uncertainty/scripts/validate_model.py +226 -0
- package/skills/model-validation-and-uncertainty/scripts/validate_sdm.R +162 -0
- package/skills/occupancy-and-detection/SKILL.md +126 -0
- package/skills/occupancy-and-detection/examples/example-prompts.md +33 -0
- package/skills/occupancy-and-detection/resources/detection-history-format.md +100 -0
- package/skills/occupancy-and-detection/resources/occupancy-study-design.md +47 -0
- package/skills/occupancy-and-detection/scripts/__pycache__/occupancy_analysis.cpython-311.pyc +0 -0
- package/skills/occupancy-and-detection/scripts/occupancy_analysis.R +160 -0
- package/skills/occupancy-and-detection/scripts/occupancy_analysis.py +159 -0
- package/skills/population-viability-analysis/SKILL.md +161 -0
- package/skills/population-viability-analysis/examples/african_elephant_pva_example.md +266 -0
- package/skills/population-viability-analysis/examples/example-prompts.md +95 -0
- package/skills/population-viability-analysis/resources/extinction-risk-thresholds.md +128 -0
- package/skills/population-viability-analysis/resources/matrix-model-guide.md +139 -0
- package/skills/population-viability-analysis/resources/sensitivity-elasticity-reference.md +182 -0
- package/skills/population-viability-analysis/scripts/matrix_pva.R +258 -0
- package/skills/population-viability-analysis/scripts/pva_analysis.py +442 -0
- package/skills/population-viability-analysis/scripts/stochastic_pva.R +353 -0
- package/skills/predictive-modeling-best-practices/SKILL.md +136 -0
- package/skills/predictive-modeling-best-practices/examples/example-prompts.md +58 -0
- package/skills/predictive-modeling-best-practices/resources/collinearity-decision-tree.md +65 -0
- package/skills/predictive-modeling-best-practices/resources/sampling-bias-correction.md +267 -0
- package/skills/predictive-modeling-best-practices/resources/spatial-cv-guide.md +73 -0
- package/skills/predictive-modeling-best-practices/scripts/__pycache__/spatial_cv.cpython-311.pyc +0 -0
- package/skills/predictive-modeling-best-practices/scripts/collinearity_check.R +112 -0
- package/skills/predictive-modeling-best-practices/scripts/spatial_cv.py +182 -0
- package/skills/reproducible-ecology-pipeline/SKILL.md +139 -0
- package/skills/reproducible-ecology-pipeline/examples/example-prompts.md +35 -0
- package/skills/reproducible-ecology-pipeline/resources/directory-structure-template.md +94 -0
- package/skills/reproducible-ecology-pipeline/resources/params-yaml-template.yaml +84 -0
- package/skills/reproducible-ecology-pipeline/resources/reproducibility-checklist-template.md +66 -0
- package/skills/reproducible-ecology-pipeline/scripts/generate_file_manifest.py +110 -0
- package/skills/reproducible-ecology-pipeline/scripts/init_project.sh +53 -0
- package/skills/spatial-prioritization/SKILL.md +162 -0
- package/skills/spatial-prioritization/examples/biodiversity_hotspot_prioritization_example.md +289 -0
- package/skills/spatial-prioritization/examples/example-prompts.md +93 -0
- package/skills/spatial-prioritization/resources/cost-surface-reference.md +130 -0
- package/skills/spatial-prioritization/resources/marxan-vs-prioritizr-comparison.md +125 -0
- package/skills/spatial-prioritization/resources/prioritizr-formulation-guide.md +188 -0
- package/skills/spatial-prioritization/resources/representation-targets-guide.md +186 -0
- package/skills/spatial-prioritization/scripts/prioritization_sensitivity.R +320 -0
- package/skills/spatial-prioritization/scripts/run_prioritization.R +336 -0
- package/skills/species-distribution-modeling/SKILL.md +139 -0
- package/skills/species-distribution-modeling/examples/example-prompts.md +36 -0
- package/skills/species-distribution-modeling/resources/algorithm-comparison.md +25 -0
- package/skills/species-distribution-modeling/resources/calibration-area-guide.md +71 -0
- package/skills/species-distribution-modeling/resources/climate-scenario-preparation.md +170 -0
- package/skills/species-distribution-modeling/resources/maxent-calibration-guide.md +211 -0
- package/skills/species-distribution-modeling/resources/sdm-checklist.md +37 -0
- package/skills/species-distribution-modeling/scripts/predict_distribution.R +236 -0
- package/skills/species-distribution-modeling/scripts/predict_distribution.py +286 -0
- package/skills/species-distribution-modeling/scripts/prepare_future_layers.R +351 -0
- package/skills/species-distribution-modeling/scripts/project_scenarios.R +220 -0
- package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +99 -0
- package/skills/species-distribution-modeling/scripts/sdm_pipeline.py +318 -0
- package/skills/species-distribution-modeling/scripts/tune_maxnet.R +344 -0
- package/templates/SKILL_TEMPLATE.md +225 -0
- package/templates/checklists/data-submission-checklist.md +38 -0
- package/templates/checklists/post-analysis-checklist.md +55 -0
- package/templates/checklists/pre-analysis-checklist.md +31 -0
- package/templates/prompts/debug-skill.md +47 -0
- package/templates/prompts/invoke-skill.md +34 -0
- package/templates/prompts/invoke-workflow.md +45 -0
- package/templates/reports/technical-report-template.md +80 -0
- package/templates/scripts/logger_setup.R +79 -0
- package/templates/scripts/logger_setup.py +119 -0
- package/templates/scripts/params_loader.R +28 -0
- package/templates/scripts/params_loader.py +38 -0
- package/workflows/analyze-community-structure/WORKFLOW.md +72 -0
- package/workflows/analyze-environmental-change/WORKFLOW.md +73 -0
- package/workflows/assess-ecological-impact/WORKFLOW.md +75 -0
- package/workflows/assess-ecosystem-services/WORKFLOW.md +68 -0
- package/workflows/assess-landscape-connectivity/WORKFLOW.md +84 -0
- package/workflows/build-fire-risk-map/WORKFLOW.md +79 -0
- package/workflows/produce-technical-report/WORKFLOW.md +113 -0
- package/workflows/run-camera-trap-occupancy/WORKFLOW.md +87 -0
- package/workflows/run-conservation-prioritization/WORKFLOW.md +89 -0
- package/workflows/run-multispecies-screening/WORKFLOW.md +197 -0
- package/workflows/run-occupancy-analysis/WORKFLOW.md +74 -0
- package/workflows/run-population-viability/WORKFLOW.md +90 -0
- package/workflows/run-sdm-study/WORKFLOW.md +99 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# GBIF Data Citation Guide
|
|
2
|
+
|
|
3
|
+
How to download, cite, and document GBIF occurrence data correctly for scientific publications.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 1. Why Citation Method Matters
|
|
8
|
+
|
|
9
|
+
GBIF provides two download mechanisms with fundamentally different citability:
|
|
10
|
+
|
|
11
|
+
| Method | DOI generated | Citable for publication | Reproducible | Use case |
|
|
12
|
+
|---|---|---|---|---|
|
|
13
|
+
| `occ_search()` / `pygbif.occurrences.search()` | **No** | No — not recommended for peer review | No (results may change) | Exploration, pilot analysis, dashboards |
|
|
14
|
+
| `occ_download()` / `pygbif.occurrences.download()` | **Yes** | Yes — required for publication | Yes (snapshot frozen) | All published analyses |
|
|
15
|
+
|
|
16
|
+
**Rule:** If your analysis will appear in a publication or technical report, always
|
|
17
|
+
use the download API (`occ_download` / `occurrences.download`) to obtain a citable DOI.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## 2. How to Cite GBIF Data Correctly
|
|
22
|
+
|
|
23
|
+
### Standard GBIF citation format (APA-style)
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
GBIF.org (YEAR) GBIF Occurrence Download. https://doi.org/10.15468/dl.XXXXXXX
|
|
27
|
+
Accessed on YYYY-MM-DD.
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
**Example:**
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
GBIF.org (2024) GBIF Occurrence Download.
|
|
34
|
+
https://doi.org/10.15468/dl.abc123
|
|
35
|
+
Accessed on 2024-03-15.
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Fields required in every citation
|
|
39
|
+
|
|
40
|
+
| Field | Example | Notes |
|
|
41
|
+
|---|---|---|
|
|
42
|
+
| Portal name | `GBIF.org` | Always "GBIF.org", not the institution name |
|
|
43
|
+
| Year of download | `(2024)` | Year the download was created |
|
|
44
|
+
| Record type | `GBIF Occurrence Download` | Fixed string |
|
|
45
|
+
| DOI | `https://doi.org/10.15468/dl.XXXXXXX` | Full URL, not just the suffix |
|
|
46
|
+
| Access date | `Accessed on 2024-03-15` | ISO 8601 date format |
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## 3. Retrieving the DOI from a Download in R
|
|
51
|
+
|
|
52
|
+
```r
|
|
53
|
+
suppressPackageStartupMessages(library(rgbif))
|
|
54
|
+
|
|
55
|
+
# Initiate download (runs asynchronously on GBIF servers)
|
|
56
|
+
download_key <- occ_download(
|
|
57
|
+
pred("taxonKey", 2435098), # GBIF taxon key for your species
|
|
58
|
+
pred("hasCoordinate", TRUE),
|
|
59
|
+
pred("occurrenceStatus", "PRESENT"),
|
|
60
|
+
pred_in("basisOfRecord", c("HUMAN_OBSERVATION",
|
|
61
|
+
"MACHINE_OBSERVATION",
|
|
62
|
+
"PRESERVED_SPECIMEN")),
|
|
63
|
+
pred_lt("coordinateUncertaintyInMeters", 10000),
|
|
64
|
+
format = "SIMPLE_CSV"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Wait for completion (polls every 10 seconds)
|
|
68
|
+
occ_download_wait(download_key)
|
|
69
|
+
|
|
70
|
+
# Retrieve metadata (includes DOI)
|
|
71
|
+
meta <- occ_download_meta(download_key)
|
|
72
|
+
doi <- meta$doi # e.g., "10.15468/dl.abc123"
|
|
73
|
+
cat("Download DOI:", doi, "\n")
|
|
74
|
+
|
|
75
|
+
# Save DOI to metadata file for citation
|
|
76
|
+
writeLines(
|
|
77
|
+
c(
|
|
78
|
+
paste("GBIF download key:", download_key),
|
|
79
|
+
paste("DOI:", doi),
|
|
80
|
+
paste("Citation: GBIF.org (", format(Sys.Date(), "%Y"), ") GBIF Occurrence Download.",
|
|
81
|
+
paste0("https://doi.org/", doi),
|
|
82
|
+
"Accessed on", format(Sys.Date(), "%Y-%m-%d")),
|
|
83
|
+
paste("Download date:", Sys.Date()),
|
|
84
|
+
paste("Species:", "Panthera onca") # replace with your species
|
|
85
|
+
),
|
|
86
|
+
"download_metadata.txt"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Import the data
|
|
90
|
+
occ_data <- occ_download_get(download_key) |> occ_download_import()
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## 4. Retrieving the DOI from a Download in Python
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
import pygbif.occurrences as occ
|
|
99
|
+
import time
|
|
100
|
+
from pathlib import Path
|
|
101
|
+
from datetime import date
|
|
102
|
+
|
|
103
|
+
# Initiate download
|
|
104
|
+
download_key = occ.download(
|
|
105
|
+
"taxonKey = 2435098", # replace with actual taxon key
|
|
106
|
+
"hasCoordinate = TRUE",
|
|
107
|
+
"occurrenceStatus = PRESENT",
|
|
108
|
+
"basisOfRecord in HUMAN_OBSERVATION,MACHINE_OBSERVATION,PRESERVED_SPECIMEN",
|
|
109
|
+
"coordinateUncertaintyInMeters <= 10000"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Poll until complete
|
|
113
|
+
while True:
|
|
114
|
+
status = occ.download_meta(download_key[0])["status"]
|
|
115
|
+
print(f"Status: {status}")
|
|
116
|
+
if status == "SUCCEEDED":
|
|
117
|
+
break
|
|
118
|
+
elif status == "FAILED":
|
|
119
|
+
raise RuntimeError("GBIF download failed")
|
|
120
|
+
time.sleep(30)
|
|
121
|
+
|
|
122
|
+
# Get DOI from metadata
|
|
123
|
+
meta = occ.download_meta(download_key[0])
|
|
124
|
+
doi = meta.get("doi", "")
|
|
125
|
+
print(f"Download DOI: {doi}")
|
|
126
|
+
|
|
127
|
+
# Save metadata
|
|
128
|
+
output_dir = Path("output/gbif")
|
|
129
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
130
|
+
|
|
131
|
+
with open(output_dir / "download_metadata.txt", "w") as f:
|
|
132
|
+
f.write(f"GBIF download key: {download_key[0]}\n")
|
|
133
|
+
f.write(f"DOI: {doi}\n")
|
|
134
|
+
f.write(f"Citation: GBIF.org ({date.today().year}) GBIF Occurrence Download. "
|
|
135
|
+
f"https://doi.org/{doi} Accessed on {date.today().isoformat()}\n")
|
|
136
|
+
f.write(f"Download date: {date.today().isoformat()}\n")
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## 5. Registering the DOI in data_provenance.md
|
|
142
|
+
|
|
143
|
+
Every project using GBIF data must have a `data_provenance.md` at the project root.
|
|
144
|
+
Add an entry like:
|
|
145
|
+
|
|
146
|
+
```markdown
|
|
147
|
+
## GBIF Occurrence Data
|
|
148
|
+
|
|
149
|
+
| Species | GBIF Taxon Key | Download Key | DOI | Download Date | Filters Applied |
|
|
150
|
+
|---|---|---|---|---|---|
|
|
151
|
+
| *Panthera onca* | 2435098 | 0001234-240101 | [10.15468/dl.abc123](https://doi.org/10.15468/dl.abc123) | 2024-03-15 | hasCoordinate, PRESENT, uncertainty < 10 km |
|
|
152
|
+
| *Chrysocyon brachyurus* | 2441050 | 0001235-240101 | [10.15468/dl.def456](https://doi.org/10.15468/dl.def456) | 2024-03-15 | hasCoordinate, PRESENT, uncertainty < 10 km |
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## 6. occ_search vs occ_download — Decision Guide
|
|
158
|
+
|
|
159
|
+
| Situation | Use |
|
|
160
|
+
|---|---|
|
|
161
|
+
| Exploring data availability, checking record counts | `occ_search` (no DOI needed) |
|
|
162
|
+
| Pilot/exploratory analysis not for publication | `occ_search` acceptable |
|
|
163
|
+
| Analysis to be included in a paper, report, or thesis | **`occ_download` required** |
|
|
164
|
+
| Dataset with > 100,000 records | **`occ_download` required** (occ_search limited to 100k) |
|
|
165
|
+
| Reproducible analysis shared with collaborators | **`occ_download` required** |
|
|
166
|
+
| Training an SDM for conservation planning | **`occ_download` required** |
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## 7. Common Pitfalls
|
|
171
|
+
|
|
172
|
+
- **Citing the GBIF portal URL instead of the DOI:** `https://www.gbif.org/occurrence/search?...` is
|
|
173
|
+
not citable. Always use the download DOI.
|
|
174
|
+
- **Using `occ_search` for final analysis:** results from `occ_search` are not frozen;
|
|
175
|
+
re-running the same query months later may return different records. Only `occ_download`
|
|
176
|
+
creates a reproducible, citeable snapshot.
|
|
177
|
+
- **Forgetting to record the download date:** required even when DOI is present.
|
|
178
|
+
- **Not saving `download_metadata.txt`:** always save alongside the occurrence CSV.
|
|
179
|
+
- **Using taxon name instead of taxon key:** names can be ambiguous. Use the GBIF
|
|
180
|
+
backbone taxon key (`occ_search(scientificName=...)$key`) for unambiguous queries.
|
|
181
|
+
- **Not filtering `coordinateUncertaintyInMeters`:** records with large uncertainty
|
|
182
|
+
(> 10 km) should be excluded or handled explicitly.
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## 8. References
|
|
187
|
+
|
|
188
|
+
| Resource | URL |
|
|
189
|
+
|---|---|
|
|
190
|
+
| GBIF citation guidelines | https://www.gbif.org/citation-guidelines |
|
|
191
|
+
| rgbif R package | https://docs.ropensci.org/rgbif/ |
|
|
192
|
+
| pygbif Python package | https://pygbif.readthedocs.io/ |
|
|
193
|
+
| GBIF DOI minting policy | https://www.gbif.org/faq?question=what-doi-does-gbif-assign-to-downloaded-data |
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# QA Checklist — Ecological Data Foundation
|
|
2
|
+
|
|
3
|
+
Use this checklist for every new dataset before proceeding to analysis.
|
|
4
|
+
|
|
5
|
+
## 1. File and Ingest
|
|
6
|
+
|
|
7
|
+
- [ ] Source file preserved in `data/raw/` (never overwrite)
|
|
8
|
+
- [ ] File encoding confirmed (UTF-8 preferred)
|
|
9
|
+
- [ ] Delimiter and quoting confirmed
|
|
10
|
+
- [ ] Row count matches expected (check for truncation)
|
|
11
|
+
- [ ] Column names documented
|
|
12
|
+
|
|
13
|
+
## 2. Schema and Types
|
|
14
|
+
|
|
15
|
+
- [ ] All fields mapped to Darwin Core (or equivalent standard)
|
|
16
|
+
- [ ] Dates parsed as ISO-8601 strings
|
|
17
|
+
- [ ] Coordinates as decimal degrees (float)
|
|
18
|
+
- [ ] Country codes as ISO 3166-1 alpha-2
|
|
19
|
+
- [ ] Categorical fields enumerated and valid entries listed
|
|
20
|
+
|
|
21
|
+
## 3. Duplicates
|
|
22
|
+
|
|
23
|
+
- [ ] Exact duplicates identified and counted
|
|
24
|
+
- [ ] Spatial-temporal near-duplicates checked (same species, same coords ± X km, same date ± Y days)
|
|
25
|
+
- [ ] Resolution strategy documented and applied
|
|
26
|
+
|
|
27
|
+
## 4. Coordinates
|
|
28
|
+
|
|
29
|
+
- [ ] Latitude in range [-90, 90]
|
|
30
|
+
- [ ] Longitude in range [-180, 180]
|
|
31
|
+
- [ ] Zero coordinates (0, 0) flagged
|
|
32
|
+
- [ ] Country centroid coordinates flagged
|
|
33
|
+
- [ ] Capital city coordinates flagged
|
|
34
|
+
- [ ] Coordinates fall within stated country/region polygon
|
|
35
|
+
- [ ] Coordinate uncertainty recorded where available
|
|
36
|
+
|
|
37
|
+
## 5. Taxonomy
|
|
38
|
+
|
|
39
|
+
- [ ] Species names checked against reference backbone (GBIF / Catalogue of Life)
|
|
40
|
+
- [ ] Synonyms resolved to accepted name
|
|
41
|
+
- [ ] Misspellings corrected (with original preserved)
|
|
42
|
+
- [ ] Higher-rank-only identifications flagged
|
|
43
|
+
- [ ] Hybrids and cultivars handled according to study scope
|
|
44
|
+
|
|
45
|
+
## 6. Temporal
|
|
46
|
+
|
|
47
|
+
- [ ] No dates in the future
|
|
48
|
+
- [ ] No dates before plausible survey era for the taxon
|
|
49
|
+
- [ ] Temporal precision meets study requirements
|
|
50
|
+
- [ ] Records with year-only precision flagged if day-level is needed
|
|
51
|
+
|
|
52
|
+
## 7. Attribute Ranges
|
|
53
|
+
|
|
54
|
+
- [ ] Numeric fields checked for biologically impossible values
|
|
55
|
+
- [ ] Missing value rate per field computed and documented
|
|
56
|
+
- [ ] Fields exceeding missing value threshold (default 20%) flagged for decision
|
|
57
|
+
|
|
58
|
+
## 8. Outputs
|
|
59
|
+
|
|
60
|
+
- [ ] `data_clean.csv` written with `QA_status` column
|
|
61
|
+
- [ ] `flagged_records.csv` written with reason codes
|
|
62
|
+
- [ ] `qa_report.md` summarises issue counts and resolutions
|
|
63
|
+
- [ ] `schema.yaml` documents all field definitions
|
|
64
|
+
- [ ] `metadata.xml` (EML or Dublin Core) completed
|
|
65
|
+
|
|
66
|
+
## QA Status Codes
|
|
67
|
+
|
|
68
|
+
| Code | Meaning |
|
|
69
|
+
|------|---------|
|
|
70
|
+
| `OK` | Record passed all checks |
|
|
71
|
+
| `COORD_CENTROID` | Coordinates at country/institution centroid |
|
|
72
|
+
| `COORD_ZERO` | Coordinates are (0, 0) |
|
|
73
|
+
| `COORD_OUT_OF_RANGE` | lat or lon outside valid bounds |
|
|
74
|
+
| `COORD_OUTSIDE_COUNTRY` | Point falls outside declared country polygon |
|
|
75
|
+
| `DATE_FUTURE` | Event date is in the future |
|
|
76
|
+
| `DATE_UNLIKELY` | Event date before plausible survey era |
|
|
77
|
+
| `DUPLICATE_EXACT` | Identical to another record |
|
|
78
|
+
| `DUPLICATE_SPATIOTEMPORAL` | Near-duplicate (spatial-temporal proximity) |
|
|
79
|
+
| `TAXON_SYNONYM` | Name is a synonym; resolved to accepted name |
|
|
80
|
+
| `TAXON_MISSPELLING` | Misspelling detected and corrected |
|
|
81
|
+
| `TAXON_HIGH_RANK` | Identified only to genus or higher |
|
|
82
|
+
| `MISSING_COORDS` | No coordinate information |
|
|
83
|
+
| `REMOVED` | Record excluded from clean dataset |
|
package/skills/ecological-data-foundation/scripts/__pycache__/clean_occurrences.cpython-311.pyc
ADDED
|
Binary file
|
package/skills/ecological-data-foundation/scripts/__pycache__/download_from_ebird.cpython-311.pyc
ADDED
|
Binary file
|
package/skills/ecological-data-foundation/scripts/__pycache__/download_from_inat.cpython-311.pyc
ADDED
|
Binary file
|
package/skills/ecological-data-foundation/scripts/__pycache__/download_from_iucn.cpython-311.pyc
ADDED
|
Binary file
|
package/skills/ecological-data-foundation/scripts/__pycache__/download_from_obis.cpython-311.pyc
ADDED
|
Binary file
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
|
|
2
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
3
|
+
|
|
4
|
+
# Usage: Rscript clean_occurrences.R <raw_occurrences.csv> <output_dir> [country_code]
|
|
5
|
+
|
|
6
|
+
# ── Inline logger ─────────────────────────────────────────────────────────────
|
|
7
|
+
SKILL_NAME <- "ecological-data-foundation"
|
|
8
|
+
.log_ts <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
|
|
9
|
+
log_info <- function(...) message(.log_ts(), " [INFO] ", sprintf(...))
|
|
10
|
+
log_warn <- function(...) message(.log_ts(), " [WARN] ", sprintf(...))
|
|
11
|
+
log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
|
|
12
|
+
log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
|
|
13
|
+
log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
|
|
14
|
+
dir.create("logs", recursive=TRUE, showWarnings=FALSE)
|
|
15
|
+
|
|
16
|
+
# Standard occurrence cleaning pipeline
|
|
17
|
+
# Usage: Rscript clean_occurrences.R <input_csv> <output_dir>
|
|
18
|
+
# Requires: dplyr, readr, CoordinateCleaner, taxize, janitor
|
|
19
|
+
|
|
20
|
+
suppressPackageStartupMessages({
|
|
21
|
+
library(dplyr)
|
|
22
|
+
library(readr)
|
|
23
|
+
library(CoordinateCleaner)
|
|
24
|
+
library(janitor)
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
args <- commandArgs(trailingOnly = TRUE)
|
|
28
|
+
input_file <- ifelse(length(args) >= 1, args[1], "data/raw/occurrences.csv")
|
|
29
|
+
output_dir <- ifelse(length(args) >= 2, args[2], "data/processed")
|
|
30
|
+
|
|
31
|
+
log_info("Script: clean_occurrences.R | Skill: %s", SKILL_NAME)
|
|
32
|
+
log_info("Input file : %s", input_file)
|
|
33
|
+
log_info("Output dir : %s", output_dir)
|
|
34
|
+
|
|
35
|
+
# ── Input precondition check ──────────────────────────────────────────────────
|
|
36
|
+
if (!file.exists(input_file)) {
|
|
37
|
+
log_error(
|
|
38
|
+
"Input nao encontrado: %s\nCausa provavel: arquivo nao gerado pelo passo anterior.\nVerifique a saida de: ecological-data-foundation (download_from_gbif)\nSkill anterior: ecological-data-foundation",
|
|
39
|
+
input_file
|
|
40
|
+
)
|
|
41
|
+
stop("Missing: ", input_file)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
|
45
|
+
|
|
46
|
+
log_decision("input_file", input_file, "caminho passado como args[1] ou padrao")
|
|
47
|
+
log_decision("output_dir", output_dir, "caminho passado como args[2] ou padrao")
|
|
48
|
+
|
|
49
|
+
# ── 1. Ingest ──────────────────────────────────────────────────────────────
|
|
50
|
+
log_step(1, "Ingerir dados brutos de ocorrencias")
|
|
51
|
+
tryCatch({
|
|
52
|
+
raw <- read_csv(input_file, show_col_types = FALSE) |>
|
|
53
|
+
clean_names()
|
|
54
|
+
log_info("Registros brutos lidos: %d | Colunas: %d", nrow(raw), ncol(raw))
|
|
55
|
+
}, error = function(e) {
|
|
56
|
+
log_error(
|
|
57
|
+
"Falha ao ler CSV de entrada: %s\nCausa provavel: arquivo corrompido ou nao e CSV valido.\nVerifique: %s\nSkill anterior: ecological-data-foundation",
|
|
58
|
+
conditionMessage(e), input_file
|
|
59
|
+
)
|
|
60
|
+
stop(e)
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
if (nrow(raw) == 0) {
|
|
64
|
+
log_warn("Arquivo de entrada nao contem registros: %s", input_file)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# ── 2. Require minimum columns ─────────────────────────────────────────────
|
|
68
|
+
log_step(2, "Verificar colunas obrigatorias")
|
|
69
|
+
tryCatch({
|
|
70
|
+
required_cols <- c("decimal_latitude", "decimal_longitude", "species")
|
|
71
|
+
missing_req <- setdiff(required_cols, names(raw))
|
|
72
|
+
if (length(missing_req) > 0) {
|
|
73
|
+
log_error(
|
|
74
|
+
"Colunas obrigatorias ausentes: %s\nCausa provavel: CSV gerado por fonte diferente ou com nomes de colunas alterados.\nVerifique o schema do arquivo: %s\nSkill anterior: ecological-data-foundation",
|
|
75
|
+
paste(missing_req, collapse = ", "), input_file
|
|
76
|
+
)
|
|
77
|
+
stop("Missing required columns: ", paste(missing_req, collapse = ", "))
|
|
78
|
+
}
|
|
79
|
+
log_info("Todas as colunas obrigatorias presentes: %s", paste(required_cols, collapse = ", "))
|
|
80
|
+
}, error = function(e) {
|
|
81
|
+
log_error("Falha na verificacao de colunas: %s", conditionMessage(e))
|
|
82
|
+
stop(e)
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
# ── 3. Remove records with missing coordinates ─────────────────────────────
|
|
86
|
+
log_step(3, "Remover registros sem coordenadas e converter para numerico")
|
|
87
|
+
tryCatch({
|
|
88
|
+
n_before_na <- nrow(raw)
|
|
89
|
+
raw <- raw |>
|
|
90
|
+
filter(!is.na(decimal_latitude), !is.na(decimal_longitude)) |>
|
|
91
|
+
mutate(
|
|
92
|
+
decimal_latitude = as.numeric(decimal_latitude),
|
|
93
|
+
decimal_longitude = as.numeric(decimal_longitude)
|
|
94
|
+
)
|
|
95
|
+
n_removed_na <- n_before_na - nrow(raw)
|
|
96
|
+
if (n_removed_na > 0) {
|
|
97
|
+
log_warn("Registros removidos por coordenadas ausentes: %d", n_removed_na)
|
|
98
|
+
} else {
|
|
99
|
+
log_info("Nenhum registro removido por coordenadas ausentes.")
|
|
100
|
+
}
|
|
101
|
+
}, error = function(e) {
|
|
102
|
+
log_error(
|
|
103
|
+
"Falha ao filtrar coordenadas ausentes: %s\nCausa provavel: tipos de coluna inesperados no CSV.\nVerifique: %s\nSkill anterior: ecological-data-foundation",
|
|
104
|
+
conditionMessage(e), input_file
|
|
105
|
+
)
|
|
106
|
+
stop(e)
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
if (nrow(raw) < 30) {
|
|
110
|
+
log_warn("Poucos registros apos remocao de NAs (%d). Minimo recomendado para SDM: 30.", nrow(raw))
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# ── 4. Coordinate cleaning ─────────────────────────────────────────────────
|
|
114
|
+
log_step(4, "Limpeza de coordenadas com CoordinateCleaner")
|
|
115
|
+
log_decision(
|
|
116
|
+
"cc_tests",
|
|
117
|
+
"capitals,centroids,equal,gbif,institutions,validity,zeros",
|
|
118
|
+
"conjunto padrao de testes para detectar registros suspeitos"
|
|
119
|
+
)
|
|
120
|
+
tryCatch({
|
|
121
|
+
flags <- clean_coordinates(
|
|
122
|
+
x = raw,
|
|
123
|
+
lon = "decimal_longitude",
|
|
124
|
+
lat = "decimal_latitude",
|
|
125
|
+
species = "species",
|
|
126
|
+
tests = c("capitals", "centroids", "equal", "gbif",
|
|
127
|
+
"institutions", "validity", "zeros"),
|
|
128
|
+
verbose = FALSE
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
clean <- raw[flags$.summary, ] |> mutate(QA_status = "OK")
|
|
132
|
+
flagged <- raw[!flags$.summary, ] |>
|
|
133
|
+
mutate(QA_status = apply(
|
|
134
|
+
flags[!flags$.summary, grep("^\\.", names(flags))], 1,
|
|
135
|
+
function(r) paste(names(r)[!r], collapse = "|")
|
|
136
|
+
))
|
|
137
|
+
|
|
138
|
+
log_info("Registros limpos: %d | Registros sinalizados: %d", nrow(clean), nrow(flagged))
|
|
139
|
+
if (nrow(flagged) > 0) {
|
|
140
|
+
log_warn("%d registros sinalizados pelo CoordinateCleaner (%.1f%% do total).",
|
|
141
|
+
nrow(flagged), 100 * nrow(flagged) / nrow(raw))
|
|
142
|
+
}
|
|
143
|
+
}, error = function(e) {
|
|
144
|
+
log_error(
|
|
145
|
+
"Falha na limpeza de coordenadas (CoordinateCleaner): %s\nCausa provavel: dados malformados ou pacote CoordinateCleaner nao instalado.\nVerifique: install.packages('CoordinateCleaner')\nSkill anterior: ecological-data-foundation",
|
|
146
|
+
conditionMessage(e)
|
|
147
|
+
)
|
|
148
|
+
stop(e)
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
# ── 5. Remove exact duplicates ─────────────────────────────────────────────
|
|
152
|
+
log_step(5, "Remover duplicatas exatas")
|
|
153
|
+
tryCatch({
|
|
154
|
+
n_before <- nrow(clean)
|
|
155
|
+
clean <- clean |> distinct(species, decimal_latitude, decimal_longitude,
|
|
156
|
+
event_date, .keep_all = TRUE)
|
|
157
|
+
n_dup <- n_before - nrow(clean)
|
|
158
|
+
log_decision(
|
|
159
|
+
"dedup_cols",
|
|
160
|
+
"species,decimal_latitude,decimal_longitude,event_date",
|
|
161
|
+
"combinacao padrao para identificar duplicatas espaciotemporais"
|
|
162
|
+
)
|
|
163
|
+
if (n_dup > 0) {
|
|
164
|
+
log_warn("Duplicatas exatas removidas: %d", n_dup)
|
|
165
|
+
} else {
|
|
166
|
+
log_info("Nenhuma duplicata exata encontrada.")
|
|
167
|
+
}
|
|
168
|
+
}, error = function(e) {
|
|
169
|
+
log_error(
|
|
170
|
+
"Falha ao remover duplicatas: %s\nCausa provavel: coluna event_date ausente ou mal formatada.\nVerifique o schema do CSV.\nSkill anterior: ecological-data-foundation",
|
|
171
|
+
conditionMessage(e)
|
|
172
|
+
)
|
|
173
|
+
stop(e)
|
|
174
|
+
})
|
|
175
|
+
|
|
176
|
+
if (nrow(clean) < 30) {
|
|
177
|
+
log_warn(
|
|
178
|
+
"Apenas %d registros limpos apos todas as filtragens. SDMs requerem >= 30 registros confiaveis.",
|
|
179
|
+
nrow(clean)
|
|
180
|
+
)
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
# ── 6. Write outputs ───────────────────────────────────────────────────────
|
|
184
|
+
log_step(6, "Escrever arquivos de saida")
|
|
185
|
+
tryCatch({
|
|
186
|
+
write_csv(clean, file.path(output_dir, "data_clean.csv"))
|
|
187
|
+
write_csv(flagged, file.path(output_dir, "flagged_records.csv"))
|
|
188
|
+
log_info("Gravado: %s", file.path(output_dir, "data_clean.csv"))
|
|
189
|
+
log_info("Gravado: %s", file.path(output_dir, "flagged_records.csv"))
|
|
190
|
+
}, error = function(e) {
|
|
191
|
+
log_error(
|
|
192
|
+
"Falha ao gravar arquivos de saida: %s\nCausa provavel: sem permissao de escrita em '%s'.\nVerifique permissoes do diretorio.\nSkill anterior: ecological-data-foundation",
|
|
193
|
+
conditionMessage(e), output_dir
|
|
194
|
+
)
|
|
195
|
+
stop(e)
|
|
196
|
+
})
|
|
197
|
+
|
|
198
|
+
# ── 7. QA report ──────────────────────────────────────────────────────────
|
|
199
|
+
log_step(7, "Gerar relatorio de QA")
|
|
200
|
+
tryCatch({
|
|
201
|
+
report <- c(
|
|
202
|
+
"# QA Report — Occurrence Cleaning",
|
|
203
|
+
"",
|
|
204
|
+
paste("- Input file:", input_file),
|
|
205
|
+
paste("- Raw records:", nrow(raw) + nrow(flagged)),
|
|
206
|
+
paste("- Exact duplicates removed:", n_dup),
|
|
207
|
+
paste("- Records flagged by CoordinateCleaner:", nrow(flagged)),
|
|
208
|
+
paste("- Clean records written:", nrow(clean)),
|
|
209
|
+
"",
|
|
210
|
+
"## Flag Counts",
|
|
211
|
+
""
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
flag_cols <- grep("^\\.", names(flags), value = TRUE)
|
|
215
|
+
for (fc in flag_cols) {
|
|
216
|
+
n_fail <- sum(!flags[[fc]], na.rm = TRUE)
|
|
217
|
+
if (n_fail > 0) report <- c(report, paste0("- `", fc, "`: ", n_fail))
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
writeLines(report, file.path(output_dir, "qa_report.md"))
|
|
221
|
+
log_info("Gravado: %s", file.path(output_dir, "qa_report.md"))
|
|
222
|
+
}, error = function(e) {
|
|
223
|
+
log_error(
|
|
224
|
+
"Falha ao gerar relatorio QA: %s\nCausa provavel: problema ao escrever no diretorio '%s'.\nSkill anterior: ecological-data-foundation",
|
|
225
|
+
conditionMessage(e), output_dir
|
|
226
|
+
)
|
|
227
|
+
stop(e)
|
|
228
|
+
})
|
|
229
|
+
|
|
230
|
+
log_info("Concluido. Registros limpos: %d | Sinalizados: %d", nrow(clean), nrow(flagged))
|