ecological-agent-skills 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/AGENT_CONTEXT.md +54 -1
  2. package/CATALOG.md +6 -5
  3. package/README.md +10 -5
  4. package/docs/DECISION_TREE.md +218 -0
  5. package/docs/GBIF_SETUP.md +88 -0
  6. package/docs/glossary.md +197 -0
  7. package/docs/repository-statistics.md +11 -8
  8. package/docs/taxonomy-diagram.md +182 -0
  9. package/environment.yaml +68 -64
  10. package/package.json +3 -3
  11. package/skills/SKILL_INDEX.json +163 -44
  12. package/skills/acoustic-monitoring/scripts/batch_species_detection.py +360 -360
  13. package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.R +235 -235
  14. package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.py +374 -374
  15. package/skills/biostatistics-workbench/scripts/glm_pipeline.R +4 -4
  16. package/skills/biostatistics-workbench/scripts/glm_pipeline.py +21 -21
  17. package/skills/camera-trap-processing/scripts/estimate_activity.R +169 -169
  18. package/skills/camera-trap-processing/scripts/process_camtrap_data.R +179 -179
  19. package/skills/camera-trap-processing/scripts/process_camtrap_data.py +192 -192
  20. package/skills/community-ecology-ordination/SKILL.md +12 -0
  21. package/skills/community-ecology-ordination/scripts/community_analysis.R +9 -9
  22. package/skills/community-ecology-ordination/scripts/community_analysis.py +231 -231
  23. package/skills/ecological-data-foundation/SKILL.md +11 -0
  24. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_ebird.cpython-314.pyc +0 -0
  25. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_gbif.cpython-314.pyc +0 -0
  26. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_inat.cpython-314.pyc +0 -0
  27. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_iucn.cpython-314.pyc +0 -0
  28. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_obis.cpython-314.pyc +0 -0
  29. package/skills/ecological-data-foundation/scripts/clean_occurrences.R +34 -34
  30. package/skills/ecological-data-foundation/scripts/clean_occurrences.py +314 -268
  31. package/skills/ecological-data-foundation/scripts/download_from_ebird.R +251 -251
  32. package/skills/ecological-data-foundation/scripts/download_from_ebird.py +364 -364
  33. package/skills/ecological-data-foundation/scripts/download_from_gbif.R +315 -315
  34. package/skills/ecological-data-foundation/scripts/download_from_gbif.py +561 -407
  35. package/skills/ecological-data-foundation/scripts/download_from_inat.R +238 -238
  36. package/skills/ecological-data-foundation/scripts/download_from_inat.py +304 -304
  37. package/skills/ecological-data-foundation/scripts/download_from_iucn.R +273 -273
  38. package/skills/ecological-data-foundation/scripts/download_from_iucn.py +344 -344
  39. package/skills/ecological-data-foundation/scripts/download_from_obis.R +248 -248
  40. package/skills/ecological-data-foundation/scripts/download_from_obis.py +318 -318
  41. package/skills/ecological-impact-assessment/SKILL.md +17 -1
  42. package/skills/ecological-impact-assessment/scripts/baci_analysis.R +21 -21
  43. package/skills/ecological-impact-assessment/scripts/fragmentation_analysis.py +141 -141
  44. package/skills/ecological-impact-assessment/scripts/power_analysis_baci.R +274 -274
  45. package/skills/ecosystem-services-assessment/SKILL.md +12 -1
  46. package/skills/ecosystem-services-assessment/scripts/compute_es.py +189 -189
  47. package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.R +26 -26
  48. package/skills/environmental-time-series/scripts/recovery_trajectory.R +43 -43
  49. package/skills/environmental-time-series/scripts/recovery_trajectory.py +178 -178
  50. package/skills/environmental-time-series/scripts/trend_analysis.R +33 -33
  51. package/skills/environmental-time-series/scripts/trend_analysis.py +184 -184
  52. package/skills/geoprocessing-for-ecology/SKILL.md +12 -1
  53. package/skills/geoprocessing-for-ecology/scripts/__pycache__/download_predictors.cpython-314.pyc +0 -0
  54. package/skills/geoprocessing-for-ecology/scripts/download_predictors.R +257 -239
  55. package/skills/geoprocessing-for-ecology/scripts/download_predictors.py +540 -379
  56. package/skills/geoprocessing-for-ecology/scripts/gee_time_series.py +412 -0
  57. package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.R +30 -30
  58. package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.py +172 -172
  59. package/skills/landscape-connectivity/scripts/connectivity_analysis.py +389 -387
  60. package/skills/landscape-connectivity/scripts/connectivity_metrics.R +278 -274
  61. package/skills/landscape-connectivity/scripts/resistance_surface.R +246 -239
  62. package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.R +312 -315
  63. package/skills/model-validation-and-uncertainty/scripts/validate_model.py +226 -226
  64. package/skills/model-validation-and-uncertainty/scripts/validate_sdm.R +10 -10
  65. package/skills/occupancy-and-detection/scripts/occupancy_analysis.R +11 -11
  66. package/skills/occupancy-and-detection/scripts/occupancy_analysis.py +159 -159
  67. package/skills/population-viability-analysis/scripts/matrix_pva.R +258 -258
  68. package/skills/population-viability-analysis/scripts/pva_analysis.py +446 -442
  69. package/skills/population-viability-analysis/scripts/stochastic_pva.R +359 -353
  70. package/skills/predictive-modeling-best-practices/SKILL.md +11 -0
  71. package/skills/predictive-modeling-best-practices/scripts/collinearity_check.R +12 -12
  72. package/skills/predictive-modeling-best-practices/scripts/spatial_cv.py +182 -182
  73. package/skills/reproducible-ecology-pipeline/SKILL.md +10 -0
  74. package/skills/reproducible-ecology-pipeline/scripts/check_packages.R +214 -0
  75. package/skills/reproducible-ecology-pipeline/scripts/generate_file_manifest.py +110 -110
  76. package/skills/spatial-prioritization/scripts/prioritization_sensitivity.R +315 -320
  77. package/skills/spatial-prioritization/scripts/run_prioritization.R +336 -336
  78. package/skills/species-distribution-modeling/SKILL.md +37 -0
  79. package/skills/species-distribution-modeling/scripts/predict_distribution.R +244 -236
  80. package/skills/species-distribution-modeling/scripts/predict_distribution.py +289 -286
  81. package/skills/species-distribution-modeling/scripts/prepare_future_layers.R +351 -351
  82. package/skills/species-distribution-modeling/scripts/project_scenarios.R +220 -220
  83. package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +688 -96
  84. package/skills/species-distribution-modeling/scripts/sdm_pipeline.py +320 -318
  85. package/skills/species-distribution-modeling/scripts/tune_maxnet.R +351 -344
  86. package/templates/SKILL_TEMPLATE.md +29 -26
  87. package/templates/reports/technical-report-template.md +18 -3
  88. package/workflows/assess-ecological-impact/WORKFLOW.md +0 -1
  89. package/workflows/produce-technical-report/WORKFLOW.md +1 -1
  90. package/workflows/run-acoustic-monitoring/WORKFLOW.md +83 -0
package/AGENT_CONTEXT.md CHANGED
@@ -47,10 +47,17 @@ Each workflow lives at `workflows/<workflow-id>/WORKFLOW.md`.
47
47
  | **Environmental time series** | ecological-data-foundation | environmental-time-series | biostatistics-workbench | reproducible-ecology-pipeline | — |
48
48
  | **Ecosystem services** | ecological-data-foundation | geoprocessing-for-ecology | ecosystem-services-assessment | model-validation-and-uncertainty | reproducible-ecology-pipeline |
49
49
  | **Multispecies screening** | ecological-data-foundation | geoprocessing-for-ecology | species-distribution-modeling (loop) | model-validation-and-uncertainty | reproducible-ecology-pipeline |
50
+ | **Acoustic monitoring** | ecological-data-foundation | acoustic-monitoring | biostatistics-workbench | model-validation-and-uncertainty | reproducible-ecology-pipeline |
51
+ | **Camera trap occupancy** | ecological-data-foundation | camera-trap-processing | occupancy-and-detection | model-validation-and-uncertainty | reproducible-ecology-pipeline |
52
+ | **Landscape connectivity** | ecological-data-foundation | geoprocessing-for-ecology | landscape-connectivity | model-validation-and-uncertainty | reproducible-ecology-pipeline |
53
+ | **Population viability** | ecological-data-foundation | biostatistics-workbench | population-viability-analysis | model-validation-and-uncertainty | reproducible-ecology-pipeline |
54
+ | **Conservation prioritization** | ecological-data-foundation | geoprocessing-for-ecology | species-distribution-modeling | spatial-prioritization | reproducible-ecology-pipeline |
50
55
 
51
56
  For multispecies projects always read `workflows/run-multispecies-screening/WORKFLOW.md`
52
57
  before starting. It contains the priority classification logic.
53
58
 
59
+ Available workflows (14): `run-sdm-study`, `run-multispecies-screening`, `run-occupancy-analysis`, `run-camera-trap-occupancy`, `run-acoustic-monitoring`, `run-population-viability`, `run-conservation-prioritization`, `assess-ecological-impact`, `assess-ecosystem-services`, `assess-landscape-connectivity`, `analyze-community-structure`, `analyze-environmental-change`, `build-fire-risk-map`, `produce-technical-report`.
60
+
54
61
  ---
55
62
 
56
63
  ## 3. Disambiguation Rules for Overlapping Skills
@@ -153,7 +160,53 @@ Apply the appropriate routing based on project complexity:
153
160
 
154
161
  ---
155
162
 
156
- ## 7. File Conventions of This Repository
163
+ ## 7. Environmental Data Sources
164
+
165
+ ### Default source: CHELSA v2.1
166
+
167
+ The default environmental predictor source for all SDM and geoprocessing workflows is **CHELSA v2.1**
168
+ (Climatologies at High resolution for the Earth's Land Surface Areas).
169
+
170
+ - Resolution: ~1 km (30 arcsec)
171
+ - Variables: BIO1-BIO19 (1981-2010 climatology)
172
+ - Download URL pattern: `https://os.zhdk.cloud.switch.ch/chelsav2/GLOBAL/climatologies/1981-2010/bio/CHELSA_bio{N}_1981-2010_V.2.1.tif`
173
+ - No authentication required
174
+ - License: CC BY 4.0
175
+
176
+ **WorldClim v2.1** is the automatic fallback if CHELSA downloads fail.
177
+ The `download_predictors.py` script handles this transparently — no user action required.
178
+
179
+ Do not default to WorldClim unless the user explicitly requests it or CHELSA is unavailable.
180
+
181
+ ### Python version compatibility
182
+
183
+ | Python | Status for this repository |
184
+ |--------|---------------------------|
185
+ | 3.11 | Recommended — all packages build correctly |
186
+ | 3.12 | Supported — most packages available |
187
+ | 3.13 | Untested |
188
+ | 3.14 | Not recommended — `elapid` (MaxEnt) fails to build; `pygbif` has API bugs |
189
+
190
+ If `elapid` is unavailable (Python 3.12+), `sdm_pipeline.py` automatically falls back to an
191
+ RF + BRT ensemble without MaxEnt. This is logged as a DECISION entry.
192
+
193
+ If `pygbif` raises a `TypeError` on `Session.request()` (Python 3.14), `download_from_gbif.py`
194
+ automatically falls back to direct HTTP calls to the GBIF REST API.
195
+
196
+ ### Spatial thinning
197
+
198
+ `clean_occurrences.py` accepts an optional third argument `thin_deg` (decimal degrees).
199
+ When provided, one record is retained per `thin_deg × thin_deg` grid cell to reduce
200
+ spatial autocorrelation before SDM fitting. Recommended values: 0.1 (~11 km) to 0.5 (~55 km).
201
+
202
+ Example:
203
+ ```bash
204
+ python clean_occurrences.py data/raw/occurrences.csv data/processed 0.1
205
+ ```
206
+
207
+ ---
208
+
209
+ ## 8. File Conventions of This Repository
157
210
 
158
211
  ### Where to put input data
159
212
  - Place raw input files in `data/raw/` (create if absent).
package/CATALOG.md CHANGED
@@ -152,7 +152,7 @@ A quick-reference index for all 17 skills. Each row summarises the skill's domai
152
152
  **When to use:** When processing audio recordings from AudioMoth, SM4, or similar recorders to compute soundscape indices or detect species via BirdNET or similar classifiers.
153
153
  **Inputs:** Audio directory (WAV/FLAC), recording metadata, species list (optional), location coordinates
154
154
  **Outputs:** Acoustic indices time series (CSV), detection list with confidence scores, species accumulation curve, soundscape heatmap
155
- **Used by workflows:** (standalone; pairs with environmental-time-series)
155
+ **Used by workflows:** run-acoustic-monitoring; pairs with environmental-time-series
156
156
 
157
157
  ---
158
158
 
@@ -191,15 +191,16 @@ A quick-reference index for all 17 skills. Each row summarises the skill's domai
191
191
  | assess-ecological-impact | ✓ | ✓ | ✓ | | ✓ | | | | ✓ | | | ✓ | | | | | |
192
192
  | analyze-community-structure | ✓ | | ✓ | | ✓ | | | ✓ | | | | ✓ | | | | | |
193
193
  | build-fire-risk-map | ✓ | ✓ | | ✓ | ✓ | | | | ✓ | ✓ | | | | | | | |
194
- | run-occupancy-analysis | ✓ | | ✓ | | ✓ | | ✓ | | | | | ✓ | | | | | |
195
- | analyze-environmental-change | ✓ | ✓ | | | | | | | ✓ | ✓ | | ✓ | | | | | |
196
- | assess-ecosystem-services | ✓ | ✓ | ✓ | | | | | | | | ✓ | ✓ | | | | | |
197
- | produce-technical-report | | | | | | | | | | | | ✓ | | | | | |
194
+ | run-occupancy-analysis | ✓ | | ✓ | | ✓ | | ✓ | | | | | ✓ | | | | | |
195
+ | analyze-environmental-change | ✓ | ✓ | | | | | | | ✓ | ✓ | | ✓ | | | | | |
196
+ | assess-ecosystem-services | ✓ | ✓ | ✓ | | | | | | | | ✓ | ✓ | | | | | |
197
+ | produce-technical-report | | | | | | | | | | | | ✓ | | | | | |
198
198
  | run-multispecies-screening | ✓ | ✓ | | ✓ | ✓ | ✓ | | | | | | | | | | | |
199
199
  | run-camera-trap-occupancy | ✓ | | | | ✓ | | ✓ | | | | | ✓ | ✓ | | | | |
200
200
  | assess-landscape-connectivity | ✓ | ✓ | | | ✓ | | | | | | | ✓ | | | ✓ | | |
201
201
  | run-population-viability | ✓ | | ✓ | | ✓ | | | | | | | ✓ | | | | ✓ | |
202
202
  | run-conservation-prioritization | ✓ | ✓ | | | | ✓ | | | | | | ✓ | | | | | ✓ |
203
+ | run-acoustic-monitoring | ✓ | | ✓ | | ✓ | | | | | | | ✓ | | ✓ | | | |
203
204
 
204
205
  ---
205
206
 
package/README.md CHANGED
@@ -2,9 +2,13 @@
2
2
 
3
3
  [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
4
4
 
5
- A curated, agent-agnostic skill library for **quantitative ecology** workflows, designed for use with any AI coding agent.
5
+ Teach your AI agent to do rigorous ecology.
6
6
 
7
- **17 modular skills** | **13 multi-step workflows** | **58 R/Python scripts** | **14 worked examples across 6 continents**
7
+ **ecological-agent-skills** is a structured instruction library that turns any AI coding agent into a quantitative ecology assistant — capable of cleaning occurrence data, fitting species distribution models, estimating occupancy, assessing landscape connectivity, running population viability analyses, and designing reserve networks, all with built-in decision logic, reproducibility tracking, and validated R/Python scripts.
8
+
9
+ Works with Claude Code, Gemini CLI, Cursor, GitHub Copilot, and any agent that reads Markdown.
10
+
11
+ **17 modular skills** | **14 multi-step workflows** | **58 R/Python scripts** | **14 worked examples across 6 continents**
8
12
 
9
13
  ---
10
14
 
@@ -14,7 +18,7 @@ A curated, agent-agnostic skill library for **quantitative ecology** workflows,
14
18
  - [Installation](#installation)
15
19
  - [Core Concepts](#core-concepts)
16
20
  - [Skills (17)](#skills-17)
17
- - [Workflows (13)](#workflows-13)
21
+ - [Workflows (14)](#workflows-14)
18
22
  - [How to Use Skills](#how-to-use-skills)
19
23
  - [For AI Agents](#for-ai-agents)
20
24
  - [Examples (14)](#examples-14)
@@ -190,7 +194,7 @@ See [CATALOG.md](CATALOG.md) for full metadata, inputs, outputs, and workflow li
190
194
 
191
195
  ---
192
196
 
193
- ## Workflows (13)
197
+ ## Workflows (14)
194
198
 
195
199
  | Workflow | Skills Used | Purpose |
196
200
  |----------|-------------|---------|
@@ -207,6 +211,7 @@ See [CATALOG.md](CATALOG.md) for full metadata, inputs, outputs, and workflow li
207
211
  | `assess-landscape-connectivity` | 1 → 2 → 15 → 5 → 12 | Corridor and patch importance |
208
212
  | `run-population-viability` | 1 → 3 → 16 → 5 → 12 | PVA and extinction risk |
209
213
  | `run-conservation-prioritization` | 1 → 2 → 6 → 17 → 12 | Reserve network design |
214
+ | `run-acoustic-monitoring` | 1 → 14 → 3 → 5 → 12 | Soundscape indices and species detection |
210
215
 
211
216
  ---
212
217
 
@@ -305,7 +310,7 @@ ecological-agent-skills/
305
310
  │ ├── resources/ ← checklists, glossaries, templates
306
311
  │ ├── examples/ ← usage prompt examples
307
312
  │ └── scripts/ ← R/Python helpers
308
- ├── workflows/ ← 13 multi-step playbooks
313
+ ├── workflows/ ← 14 multi-step playbooks
309
314
  │ └── <workflow-name>/
310
315
  │ └── WORKFLOW.md
311
316
  ├── templates/ ← reusable prompts, reports, checklists
@@ -0,0 +1,218 @@
1
+ # Skill Selection Decision Tree
2
+
3
+ Use this guide to identify which skill (or sequence of skills) to invoke based on your data type and analytical goal. Start at the top and follow the branches that match your situation.
4
+
5
+ ---
6
+
7
+ ## Quick Reference Table
8
+
9
+ | If your data is... | And your goal is... | Start with skill |
10
+ |--------------------|---------------------|------------------|
11
+ | Occurrence records (species coordinates) | Download, clean, validate | ecological-data-foundation |
12
+ | Occurrence records + environmental layers | Model species distribution | ecological-data-foundation → geoprocessing-for-ecology → predictive-modeling-best-practices → species-distribution-modeling |
13
+ | Raster / vector layers only | Reproject, clip, stack, extract values | geoprocessing-for-ecology |
14
+ | Tabular ecological data (1 response, N predictors) | Statistical test or regression | biostatistics-workbench |
15
+ | Species × site matrix | Community composition / ordination | ecological-data-foundation → community-ecology-ordination |
16
+ | Replicated detection/non-detection surveys | Occupancy and detection probability | ecological-data-foundation → occupancy-and-detection |
17
+ | Before/after, control/impact data | Quantify disturbance effect (BACI) | ecological-data-foundation → ecological-impact-assessment |
18
+ | Time-indexed environmental variable | Trend, breakpoint, anomaly | ecological-data-foundation → environmental-time-series |
19
+ | Land cover + biophysical layers | Ecosystem services mapping | ecological-data-foundation → geoprocessing-for-ecology → ecosystem-services-assessment |
20
+ | Camera trap image records | Detection events, diel activity | camera-trap-processing (→ occupancy-and-detection) |
21
+ | Audio recordings (WAV/FLAC) | Soundscape indices, species detection | acoustic-monitoring (→ environmental-time-series) |
22
+ | Habitat patches + land cover raster | Landscape connectivity, corridors | geoprocessing-for-ecology → landscape-connectivity |
23
+ | Vital rates (survival, fecundity by stage) | Population viability, IUCN Criterion E | biostatistics-workbench → population-viability-analysis |
24
+ | Suitability maps + planning units | Reserve design, prioritization | species-distribution-modeling → spatial-prioritization |
25
+ | Any fitted model | Evaluate performance, quantify uncertainty | model-validation-and-uncertainty |
26
+ | Any completed analysis | Audit trail, reproducibility checklist | reproducible-ecology-pipeline |
27
+
28
+ ---
29
+
30
+ ## Decision Tree — Detailed
31
+
32
+ ### Step 1: What type of data do you have?
33
+
34
+ ```
35
+ Root
36
+ ├── A. Occurrence records (species lat/lon) → go to Branch A
37
+ ├── B. Spatial rasters or vectors only → go to Branch B
38
+ ├── C. Tabular survey data (sites × variables) → go to Branch C
39
+ ├── D. Time series data → go to Branch D
40
+ ├── E. Multimedia data (images / audio) → go to Branch E
41
+ └── F. Model outputs (predictions, suitability) → go to Branch F
42
+ ```
43
+
44
+ ---
45
+
46
+ ### Branch A — Occurrence Records
47
+
48
+ ```
49
+ A. Occurrence records
50
+ ├── A1. Raw / uncleaned records?
51
+ │ └── ALWAYS start with: ecological-data-foundation
52
+ │ (cleaning, QA, deduplication, coordinate flags)
53
+
54
+ ├── A2. Goal: map species distribution / suitability?
55
+ │ └── ecological-data-foundation
56
+ │ → geoprocessing-for-ecology (extract env. values)
57
+ │ → predictive-modeling-best-practices (predictor selection, CV design)
58
+ │ → species-distribution-modeling (MaxEnt / RF / BRT / ensemble)
59
+ │ → model-validation-and-uncertainty (AUC, TSS, uncertainty maps)
60
+ │ → reproducible-ecology-pipeline
61
+
62
+ ├── A3. Goal: occupancy and detection probability?
63
+ │ → Replicated visit data required (detection history matrix)
64
+ │ └── ecological-data-foundation
65
+ │ → occupancy-and-detection
66
+ │ → biostatistics-workbench
67
+ │ → reproducible-ecology-pipeline
68
+
69
+ └── A4. Multiple species simultaneously?
70
+ └── Follow workflow: run-multispecies-screening
71
+ (ecological-data-foundation → geoprocessing-for-ecology
72
+ → species-distribution-modeling (loop) → model-validation-and-uncertainty)
73
+ ```
74
+
75
+ ---
76
+
77
+ ### Branch B — Spatial Rasters or Vectors
78
+
79
+ ```
80
+ B. Spatial rasters or vectors
81
+ ├── B1. Need to reproject, clip, resample, stack layers?
82
+ │ └── geoprocessing-for-ecology (standalone)
83
+
84
+ ├── B2. Need to extract environmental values at occurrence points?
85
+ │ └── geoprocessing-for-ecology (stack_and_extract.py)
86
+ │ → then continue with predictive-modeling-best-practices
87
+
88
+ ├── B3. Land cover raster + biophysical data → ecosystem services?
89
+ │ └── ecological-data-foundation → geoprocessing-for-ecology
90
+ │ → ecosystem-services-assessment → reproducible-ecology-pipeline
91
+
92
+ └── B4. Habitat patches + dispersal distance → connectivity?
93
+ └── geoprocessing-for-ecology → landscape-connectivity
94
+ ```
95
+
96
+ ---
97
+
98
+ ### Branch C — Tabular Survey Data
99
+
100
+ ```
101
+ C. Tabular survey data
102
+ ├── C1. Single response variable (abundance, richness, biomass)?
103
+ │ └── ecological-data-foundation → biostatistics-workbench
104
+ │ (GLM/GLMM, hypothesis testing, model selection)
105
+
106
+ ├── C2. Species × site matrix (multiple species, multiple sites)?
107
+ │ └── ecological-data-foundation → community-ecology-ordination
108
+ │ (NMDS, PCA, PCoA, diversity indices, PERMANOVA)
109
+
110
+ ├── C3. Before/after, control/impact sites (BACI)?
111
+ │ └── ecological-data-foundation → ecological-impact-assessment
112
+ │ → biostatistics-workbench → model-validation-and-uncertainty
113
+
114
+ ├── C4. Vital rates by age/stage class?
115
+ │ └── biostatistics-workbench → population-viability-analysis
116
+ │ (lambda, elasticity, stochastic PVA, IUCN Criterion E)
117
+
118
+ └── C5. Planning units + species features?
119
+ └── species-distribution-modeling → spatial-prioritization
120
+ (prioritizr/Marxan, representation targets, reserve design)
121
+ ```
122
+
123
+ ---
124
+
125
+ ### Branch D — Time Series Data
126
+
127
+ ```
128
+ D. Time series data
129
+ ├── D1. Environmental signal (NDVI, temperature, rainfall)?
130
+ │ └── ecological-data-foundation → environmental-time-series
131
+ │ (Mann-Kendall trend, BFAST breakpoint, anomaly detection)
132
+
133
+ ├── D2. Biodiversity metric over time (species index, acoustic)?
134
+ │ └── ecological-data-foundation → environmental-time-series
135
+ │ → biostatistics-workbench (if inferential comparison needed)
136
+
137
+ └── D3. Fire risk or land cover change?
138
+ └── Follow workflow: build-fire-risk-map or analyze-environmental-change
139
+ (ecological-data-foundation → geoprocessing-for-ecology
140
+ → ecological-impact-assessment → environmental-time-series)
141
+ ```
142
+
143
+ ---
144
+
145
+ ### Branch E — Multimedia Data
146
+
147
+ ```
148
+ E. Multimedia data
149
+ ├── E1. Camera trap images?
150
+ │ └── camera-trap-processing
151
+ │ (detection events, diel activity, trap effort table)
152
+ │ → occupancy-and-detection (if occupancy estimation needed)
153
+
154
+ └── E2. Audio recordings (WAV/FLAC)?
155
+ └── acoustic-monitoring
156
+ (ACI/NDSI/ADI soundscape indices, BirdNET species detection)
157
+ → environmental-time-series (if temporal trend analysis needed)
158
+ ```
159
+
160
+ ---
161
+
162
+ ### Branch F — Model Outputs
163
+
164
+ ```
165
+ F. Model outputs
166
+ ├── F1. Model fitted; need performance metrics?
167
+ │ └── model-validation-and-uncertainty
168
+ │ (AUC, TSS, RMSE, calibration, sensitivity, uncertainty maps)
169
+
170
+ ├── F2. Suitability maps; need conservation priorities?
171
+ │ └── spatial-prioritization
172
+ │ (planning units, targets, BLM, prioritizr/Marxan)
173
+
174
+ └── F3. Any outputs; need reproducibility checklist?
175
+ └── reproducible-ecology-pipeline
176
+ (parameter manifest, decision log, audit trail)
177
+ ```
178
+
179
+ ---
180
+
181
+ ## Disambiguation Rules
182
+
183
+ Apply exactly one rule when two skills seem applicable.
184
+
185
+ ### ecological-data-foundation vs. geoprocessing-for-ecology
186
+ - **ecological-data-foundation**: cleaning, deduplication, column validation, coordinate QA.
187
+ - **geoprocessing-for-ecology**: spatial operations on rasters/vectors (reproject, mask, stack, extract).
188
+ - If the task involves occurrence records AND spatial operations, run **ecological-data-foundation first**.
189
+
190
+ ### predictive-modeling-best-practices vs. model-validation-and-uncertainty
191
+ - **predictive-modeling-best-practices**: BEFORE fitting — predictor selection, collinearity, CV design.
192
+ - **model-validation-and-uncertainty**: AFTER fitting — metrics, calibration, uncertainty quantification.
193
+ - Never reverse this order.
194
+
195
+ ### species-distribution-modeling vs. occupancy-and-detection
196
+ - **species-distribution-modeling**: suitability surface from presence/absence + environmental predictors.
197
+ - **occupancy-and-detection**: occupancy probability (psi) + detection (p) from replicated visit data.
198
+
199
+ ### ecological-impact-assessment vs. biostatistics-workbench
200
+ - **ecological-impact-assessment**: data with explicit before/after + control/impact structure (BACI), or landscape fragmentation metrics.
201
+ - **biostatistics-workbench**: statistical comparisons without BACI design.
202
+
203
+ ### biostatistics-workbench vs. community-ecology-ordination
204
+ - **biostatistics-workbench**: one response variable, one or more predictors.
205
+ - **community-ecology-ordination**: species × site matrix as primary input (multivariate assemblage data).
206
+
207
+ ---
208
+
209
+ ## Minimum Records Quick Reference
210
+
211
+ | Analysis | Do NOT proceed below | Recommended |
212
+ |----------|----------------------|-------------|
213
+ | SDM (any algorithm) | 10 occurrences | >= 30 |
214
+ | Occupancy model | 15 sites | >= 30 sites, >= 3 visits |
215
+ | BACI mixed model | 5 control + 5 impact sites | >= 10 per group |
216
+ | Community ordination | 5 sites | >= 10 sites |
217
+ | GLM (single predictor) | 20 observations | >= 50 |
218
+ | Stochastic PVA | 5 years of count data | >= 10 years |
@@ -0,0 +1,88 @@
1
+ # GBIF Credentials Setup
2
+
3
+ GBIF credentials are **optional** but enable async downloads with a citable DOI,
4
+ which is required for reproducible publications and for datasets > 100,000 records.
5
+
6
+ Without credentials, `download_from_gbif.py` falls back to `occ.search`
7
+ (no DOI, max ~100,000 records, still functional for most studies).
8
+
9
+ ---
10
+
11
+ ## Step 1 — Create a free GBIF account
12
+
13
+ 1. Go to https://www.gbif.org
14
+ 2. Click **Login** > **Register**
15
+ 3. Fill in username, email, and password
16
+ 4. Confirm your email address
17
+
18
+ ---
19
+
20
+ ## Step 2 — Set environment variables
21
+
22
+ Set these three variables in your shell before running any skill:
23
+
24
+ ```bash
25
+ # Linux / macOS — add to ~/.bashrc or ~/.zshrc
26
+ export GBIF_USER="your_gbif_username"
27
+ export GBIF_PWD="your_gbif_password"
28
+ export GBIF_EMAIL="your_email@example.com"
29
+ ```
30
+
31
+ ```powershell
32
+ # Windows — PowerShell (current session only)
33
+ $env:GBIF_USER = "your_gbif_username"
34
+ $env:GBIF_PWD = "your_gbif_password"
35
+ $env:GBIF_EMAIL = "your_email@example.com"
36
+ ```
37
+
38
+ ```powershell
39
+ # Windows — PowerShell (permanent, per user)
40
+ [System.Environment]::SetEnvironmentVariable("GBIF_USER", "your_gbif_username", "User")
41
+ [System.Environment]::SetEnvironmentVariable("GBIF_PWD", "your_gbif_password", "User")
42
+ [System.Environment]::SetEnvironmentVariable("GBIF_EMAIL", "your_email@example.com", "User")
43
+ ```
44
+
45
+ If using conda, you can also add them to `environment.yaml` under `variables:`
46
+ (not recommended for shared/committed files — use a `.env` file instead).
47
+
48
+ ---
49
+
50
+ ## Step 3 — Verify
51
+
52
+ ```bash
53
+ python -c "import os; print(os.getenv('GBIF_USER'))"
54
+ ```
55
+
56
+ Should print your GBIF username.
57
+
58
+ ---
59
+
60
+ ## When credentials are used vs. not used
61
+
62
+ | Scenario | Method used | DOI | Max records |
63
+ |----------|-------------|-----|-------------|
64
+ | Credentials set + dataset > 100k records | Async download | Yes | Unlimited |
65
+ | Credentials set + dataset <= 100k records | `occ.search` | No | ~100k |
66
+ | No credentials | `occ.search` fallback | No | ~100k |
67
+
68
+ ---
69
+
70
+ ## Citing GBIF downloads
71
+
72
+ When credentials are set and an async download completes, the script saves a
73
+ `download_metadata_*.txt` file containing the DOI. Cite it in publications as:
74
+
75
+ > GBIF.org (YYYY) GBIF Occurrence Download https://doi.org/10.15468/dl.XXXXXX
76
+
77
+ ---
78
+
79
+ ## Troubleshooting
80
+
81
+ **"GBIF async download requires env vars"** — credentials not set; see Step 2 above.
82
+
83
+ **"occ.count returned 400"** — `hasCoordinate` is not supported by the count endpoint;
84
+ this is a known GBIF API limitation. The script handles this silently.
85
+
86
+ **pygbif TypeError on Python 3.14** — `pygbif` has an API incompatibility with
87
+ Python 3.14's `requests` session. The script automatically falls back to direct
88
+ HTTP calls to the GBIF REST API. No action needed.