ecological-agent-skills 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. package/AGENT_CONTEXT.md +191 -0
  2. package/CATALOG.md +329 -0
  3. package/LICENSE +692 -0
  4. package/README.md +347 -0
  5. package/bin/install.mjs +168 -0
  6. package/docs/comparison-with-alternatives.md +38 -0
  7. package/docs/global-examples-index.md +103 -0
  8. package/docs/repository-statistics.md +101 -0
  9. package/docs/theoretical-foundations.md +188 -0
  10. package/environment.yaml +106 -0
  11. package/examples/community/arctic_tundra_vegetation_example.md +247 -0
  12. package/examples/community/bird_landuse_example.md +63 -0
  13. package/examples/community/phytoplankton_reservoir_example.md +60 -0
  14. package/examples/community/reef_fish_indopacific_example.md +221 -0
  15. package/examples/impact/baci_road_example.md +57 -0
  16. package/examples/impact/ecosystem_services_atlantic_forest.md +83 -0
  17. package/examples/impact/forest_loss_borneo_timeseries_example.md +225 -0
  18. package/examples/occupancy/puma_camera_example.md +61 -0
  19. package/examples/occupancy/snow_leopard_himalayas_example.md +204 -0
  20. package/examples/reproducible/whittaker_biome_sdm_example.md +406 -0
  21. package/examples/sdm/anteater_cerrado_example.md +69 -0
  22. package/examples/sdm/jaguar_amazon_example.md +80 -0
  23. package/examples/sdm/koala_climate_change_example.md +170 -0
  24. package/examples/sdm/wolf_recolonization_europe_example.md +193 -0
  25. package/package.json +43 -0
  26. package/renv.lock +194 -0
  27. package/skills/SKILL_INDEX.json +1020 -0
  28. package/skills/acoustic-monitoring/SKILL.md +163 -0
  29. package/skills/acoustic-monitoring/examples/example-prompts.md +100 -0
  30. package/skills/acoustic-monitoring/examples/temperate_forest_birds_example.md +285 -0
  31. package/skills/acoustic-monitoring/resources/acoustic-indices-reference.md +93 -0
  32. package/skills/acoustic-monitoring/resources/soundscape-ecology-guide.md +90 -0
  33. package/skills/acoustic-monitoring/resources/species-id-tools-comparison.md +89 -0
  34. package/skills/acoustic-monitoring/scripts/batch_species_detection.py +360 -0
  35. package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.R +235 -0
  36. package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.py +374 -0
  37. package/skills/biostatistics-workbench/SKILL.md +140 -0
  38. package/skills/biostatistics-workbench/examples/example-prompts.md +39 -0
  39. package/skills/biostatistics-workbench/resources/effect-size-reference.md +81 -0
  40. package/skills/biostatistics-workbench/resources/glm-family-link-reference.md +47 -0
  41. package/skills/biostatistics-workbench/resources/test-selection-guide.md +93 -0
  42. package/skills/biostatistics-workbench/scripts/glm_pipeline.R +78 -0
  43. package/skills/biostatistics-workbench/scripts/glm_pipeline.py +210 -0
  44. package/skills/camera-trap-processing/SKILL.md +159 -0
  45. package/skills/camera-trap-processing/examples/example-prompts.md +103 -0
  46. package/skills/camera-trap-processing/examples/leopard_serengeti_example.md +231 -0
  47. package/skills/camera-trap-processing/resources/activity-patterns-reference.md +113 -0
  48. package/skills/camera-trap-processing/resources/camtrapR-workflow-guide.md +130 -0
  49. package/skills/camera-trap-processing/resources/detection-event-definition-guide.md +89 -0
  50. package/skills/camera-trap-processing/scripts/estimate_activity.R +169 -0
  51. package/skills/camera-trap-processing/scripts/process_camtrap_data.R +179 -0
  52. package/skills/camera-trap-processing/scripts/process_camtrap_data.py +192 -0
  53. package/skills/community-ecology-ordination/SKILL.md +133 -0
  54. package/skills/community-ecology-ordination/examples/example-prompts.md +35 -0
  55. package/skills/community-ecology-ordination/resources/dissimilarity-metric-guide.md +53 -0
  56. package/skills/community-ecology-ordination/resources/nmds-interpretation-guide.md +104 -0
  57. package/skills/community-ecology-ordination/scripts/__pycache__/community_analysis.cpython-311.pyc +0 -0
  58. package/skills/community-ecology-ordination/scripts/community_analysis.R +143 -0
  59. package/skills/community-ecology-ordination/scripts/community_analysis.py +231 -0
  60. package/skills/ecological-data-foundation/SKILL.md +129 -0
  61. package/skills/ecological-data-foundation/examples/example-prompts.md +40 -0
  62. package/skills/ecological-data-foundation/resources/coordinate-cleaning-flags.md +66 -0
  63. package/skills/ecological-data-foundation/resources/darwin-core-glossary.md +91 -0
  64. package/skills/ecological-data-foundation/resources/data-citation-guide.md +265 -0
  65. package/skills/ecological-data-foundation/resources/gbif-data-citation-guide.md +193 -0
  66. package/skills/ecological-data-foundation/resources/qa-checklist.md +83 -0
  67. package/skills/ecological-data-foundation/scripts/__pycache__/clean_occurrences.cpython-311.pyc +0 -0
  68. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_ebird.cpython-311.pyc +0 -0
  69. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_inat.cpython-311.pyc +0 -0
  70. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_iucn.cpython-311.pyc +0 -0
  71. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_obis.cpython-311.pyc +0 -0
  72. package/skills/ecological-data-foundation/scripts/clean_occurrences.R +230 -0
  73. package/skills/ecological-data-foundation/scripts/clean_occurrences.py +268 -0
  74. package/skills/ecological-data-foundation/scripts/download_from_ebird.R +251 -0
  75. package/skills/ecological-data-foundation/scripts/download_from_ebird.py +364 -0
  76. package/skills/ecological-data-foundation/scripts/download_from_gbif.R +315 -0
  77. package/skills/ecological-data-foundation/scripts/download_from_gbif.py +407 -0
  78. package/skills/ecological-data-foundation/scripts/download_from_inat.R +238 -0
  79. package/skills/ecological-data-foundation/scripts/download_from_inat.py +304 -0
  80. package/skills/ecological-data-foundation/scripts/download_from_iucn.R +273 -0
  81. package/skills/ecological-data-foundation/scripts/download_from_iucn.py +344 -0
  82. package/skills/ecological-data-foundation/scripts/download_from_obis.R +248 -0
  83. package/skills/ecological-data-foundation/scripts/download_from_obis.py +318 -0
  84. package/skills/ecological-impact-assessment/SKILL.md +123 -0
  85. package/skills/ecological-impact-assessment/examples/example-prompts.md +32 -0
  86. package/skills/ecological-impact-assessment/resources/baci-design-guide.md +55 -0
  87. package/skills/ecological-impact-assessment/resources/fragmentation-metrics-reference.md +86 -0
  88. package/skills/ecological-impact-assessment/resources/pressure-index-template.md +78 -0
  89. package/skills/ecological-impact-assessment/resources/study-design-guide.md +168 -0
  90. package/skills/ecological-impact-assessment/scripts/baci_analysis.R +161 -0
  91. package/skills/ecological-impact-assessment/scripts/fragmentation_analysis.py +141 -0
  92. package/skills/ecological-impact-assessment/scripts/power_analysis_baci.R +274 -0
  93. package/skills/ecosystem-services-assessment/SKILL.md +125 -0
  94. package/skills/ecosystem-services-assessment/examples/example-prompts.md +24 -0
  95. package/skills/ecosystem-services-assessment/resources/es-indicator-reference.md +45 -0
  96. package/skills/ecosystem-services-assessment/resources/invest-parameter-guide.md +86 -0
  97. package/skills/ecosystem-services-assessment/resources/rusle-coefficients.md +88 -0
  98. package/skills/ecosystem-services-assessment/scripts/__pycache__/compute_es.cpython-311.pyc +0 -0
  99. package/skills/ecosystem-services-assessment/scripts/compute_es.py +189 -0
  100. package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.R +161 -0
  101. package/skills/environmental-time-series/SKILL.md +125 -0
  102. package/skills/environmental-time-series/examples/example-prompts.md +33 -0
  103. package/skills/environmental-time-series/resources/anomaly-indices-reference.md +88 -0
  104. package/skills/environmental-time-series/resources/bfast-parameter-guide.md +69 -0
  105. package/skills/environmental-time-series/scripts/__pycache__/recovery_trajectory.cpython-311.pyc +0 -0
  106. package/skills/environmental-time-series/scripts/__pycache__/trend_analysis.cpython-311.pyc +0 -0
  107. package/skills/environmental-time-series/scripts/recovery_trajectory.R +305 -0
  108. package/skills/environmental-time-series/scripts/recovery_trajectory.py +178 -0
  109. package/skills/environmental-time-series/scripts/trend_analysis.R +192 -0
  110. package/skills/environmental-time-series/scripts/trend_analysis.py +184 -0
  111. package/skills/geoprocessing-for-ecology/SKILL.md +123 -0
  112. package/skills/geoprocessing-for-ecology/examples/example-prompts.md +32 -0
  113. package/skills/geoprocessing-for-ecology/resources/crs-reference.md +62 -0
  114. package/skills/geoprocessing-for-ecology/resources/global-predictor-sources.md +331 -0
  115. package/skills/geoprocessing-for-ecology/resources/resampling-methods.md +57 -0
  116. package/skills/geoprocessing-for-ecology/scripts/__pycache__/download_predictors.cpython-311.pyc +0 -0
  117. package/skills/geoprocessing-for-ecology/scripts/download_predictors.R +239 -0
  118. package/skills/geoprocessing-for-ecology/scripts/download_predictors.py +379 -0
  119. package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.R +224 -0
  120. package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.py +172 -0
  121. package/skills/landscape-connectivity/SKILL.md +170 -0
  122. package/skills/landscape-connectivity/examples/example-prompts.md +96 -0
  123. package/skills/landscape-connectivity/examples/jaguar_mesoamerica_corridor_example.md +271 -0
  124. package/skills/landscape-connectivity/resources/circuitscape-parameter-guide.md +155 -0
  125. package/skills/landscape-connectivity/resources/graph-theory-for-ecology.md +134 -0
  126. package/skills/landscape-connectivity/resources/resistance-surface-guide.md +141 -0
  127. package/skills/landscape-connectivity/scripts/connectivity_analysis.py +387 -0
  128. package/skills/landscape-connectivity/scripts/connectivity_metrics.R +274 -0
  129. package/skills/landscape-connectivity/scripts/resistance_surface.R +239 -0
  130. package/skills/model-validation-and-uncertainty/SKILL.md +131 -0
  131. package/skills/model-validation-and-uncertainty/examples/example-prompts.md +30 -0
  132. package/skills/model-validation-and-uncertainty/resources/extrapolation-risk-guide.md +236 -0
  133. package/skills/model-validation-and-uncertainty/resources/metric-selection-guide.md +52 -0
  134. package/skills/model-validation-and-uncertainty/resources/threshold-selection-guide.md +64 -0
  135. package/skills/model-validation-and-uncertainty/scripts/__pycache__/validate_model.cpython-311.pyc +0 -0
  136. package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.R +315 -0
  137. package/skills/model-validation-and-uncertainty/scripts/validate_model.py +226 -0
  138. package/skills/model-validation-and-uncertainty/scripts/validate_sdm.R +162 -0
  139. package/skills/occupancy-and-detection/SKILL.md +126 -0
  140. package/skills/occupancy-and-detection/examples/example-prompts.md +33 -0
  141. package/skills/occupancy-and-detection/resources/detection-history-format.md +100 -0
  142. package/skills/occupancy-and-detection/resources/occupancy-study-design.md +47 -0
  143. package/skills/occupancy-and-detection/scripts/__pycache__/occupancy_analysis.cpython-311.pyc +0 -0
  144. package/skills/occupancy-and-detection/scripts/occupancy_analysis.R +160 -0
  145. package/skills/occupancy-and-detection/scripts/occupancy_analysis.py +159 -0
  146. package/skills/population-viability-analysis/SKILL.md +161 -0
  147. package/skills/population-viability-analysis/examples/african_elephant_pva_example.md +266 -0
  148. package/skills/population-viability-analysis/examples/example-prompts.md +95 -0
  149. package/skills/population-viability-analysis/resources/extinction-risk-thresholds.md +128 -0
  150. package/skills/population-viability-analysis/resources/matrix-model-guide.md +139 -0
  151. package/skills/population-viability-analysis/resources/sensitivity-elasticity-reference.md +182 -0
  152. package/skills/population-viability-analysis/scripts/matrix_pva.R +258 -0
  153. package/skills/population-viability-analysis/scripts/pva_analysis.py +442 -0
  154. package/skills/population-viability-analysis/scripts/stochastic_pva.R +353 -0
  155. package/skills/predictive-modeling-best-practices/SKILL.md +136 -0
  156. package/skills/predictive-modeling-best-practices/examples/example-prompts.md +58 -0
  157. package/skills/predictive-modeling-best-practices/resources/collinearity-decision-tree.md +65 -0
  158. package/skills/predictive-modeling-best-practices/resources/sampling-bias-correction.md +267 -0
  159. package/skills/predictive-modeling-best-practices/resources/spatial-cv-guide.md +73 -0
  160. package/skills/predictive-modeling-best-practices/scripts/__pycache__/spatial_cv.cpython-311.pyc +0 -0
  161. package/skills/predictive-modeling-best-practices/scripts/collinearity_check.R +112 -0
  162. package/skills/predictive-modeling-best-practices/scripts/spatial_cv.py +182 -0
  163. package/skills/reproducible-ecology-pipeline/SKILL.md +139 -0
  164. package/skills/reproducible-ecology-pipeline/examples/example-prompts.md +35 -0
  165. package/skills/reproducible-ecology-pipeline/resources/directory-structure-template.md +94 -0
  166. package/skills/reproducible-ecology-pipeline/resources/params-yaml-template.yaml +84 -0
  167. package/skills/reproducible-ecology-pipeline/resources/reproducibility-checklist-template.md +66 -0
  168. package/skills/reproducible-ecology-pipeline/scripts/generate_file_manifest.py +110 -0
  169. package/skills/reproducible-ecology-pipeline/scripts/init_project.sh +53 -0
  170. package/skills/spatial-prioritization/SKILL.md +162 -0
  171. package/skills/spatial-prioritization/examples/biodiversity_hotspot_prioritization_example.md +289 -0
  172. package/skills/spatial-prioritization/examples/example-prompts.md +93 -0
  173. package/skills/spatial-prioritization/resources/cost-surface-reference.md +130 -0
  174. package/skills/spatial-prioritization/resources/marxan-vs-prioritizr-comparison.md +125 -0
  175. package/skills/spatial-prioritization/resources/prioritizr-formulation-guide.md +188 -0
  176. package/skills/spatial-prioritization/resources/representation-targets-guide.md +186 -0
  177. package/skills/spatial-prioritization/scripts/prioritization_sensitivity.R +320 -0
  178. package/skills/spatial-prioritization/scripts/run_prioritization.R +336 -0
  179. package/skills/species-distribution-modeling/SKILL.md +139 -0
  180. package/skills/species-distribution-modeling/examples/example-prompts.md +36 -0
  181. package/skills/species-distribution-modeling/resources/algorithm-comparison.md +25 -0
  182. package/skills/species-distribution-modeling/resources/calibration-area-guide.md +71 -0
  183. package/skills/species-distribution-modeling/resources/climate-scenario-preparation.md +170 -0
  184. package/skills/species-distribution-modeling/resources/maxent-calibration-guide.md +211 -0
  185. package/skills/species-distribution-modeling/resources/sdm-checklist.md +37 -0
  186. package/skills/species-distribution-modeling/scripts/predict_distribution.R +236 -0
  187. package/skills/species-distribution-modeling/scripts/predict_distribution.py +286 -0
  188. package/skills/species-distribution-modeling/scripts/prepare_future_layers.R +351 -0
  189. package/skills/species-distribution-modeling/scripts/project_scenarios.R +220 -0
  190. package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +99 -0
  191. package/skills/species-distribution-modeling/scripts/sdm_pipeline.py +318 -0
  192. package/skills/species-distribution-modeling/scripts/tune_maxnet.R +344 -0
  193. package/templates/SKILL_TEMPLATE.md +225 -0
  194. package/templates/checklists/data-submission-checklist.md +38 -0
  195. package/templates/checklists/post-analysis-checklist.md +55 -0
  196. package/templates/checklists/pre-analysis-checklist.md +31 -0
  197. package/templates/prompts/debug-skill.md +47 -0
  198. package/templates/prompts/invoke-skill.md +34 -0
  199. package/templates/prompts/invoke-workflow.md +45 -0
  200. package/templates/reports/technical-report-template.md +80 -0
  201. package/templates/scripts/logger_setup.R +79 -0
  202. package/templates/scripts/logger_setup.py +119 -0
  203. package/templates/scripts/params_loader.R +28 -0
  204. package/templates/scripts/params_loader.py +38 -0
  205. package/workflows/analyze-community-structure/WORKFLOW.md +72 -0
  206. package/workflows/analyze-environmental-change/WORKFLOW.md +73 -0
  207. package/workflows/assess-ecological-impact/WORKFLOW.md +75 -0
  208. package/workflows/assess-ecosystem-services/WORKFLOW.md +68 -0
  209. package/workflows/assess-landscape-connectivity/WORKFLOW.md +84 -0
  210. package/workflows/build-fire-risk-map/WORKFLOW.md +79 -0
  211. package/workflows/produce-technical-report/WORKFLOW.md +113 -0
  212. package/workflows/run-camera-trap-occupancy/WORKFLOW.md +87 -0
  213. package/workflows/run-conservation-prioritization/WORKFLOW.md +89 -0
  214. package/workflows/run-multispecies-screening/WORKFLOW.md +197 -0
  215. package/workflows/run-occupancy-analysis/WORKFLOW.md +74 -0
  216. package/workflows/run-population-viability/WORKFLOW.md +90 -0
  217. package/workflows/run-sdm-study/WORKFLOW.md +99 -0
@@ -0,0 +1,139 @@
1
+ ---
2
+ name: reproducible-ecology-pipeline
3
+ description: "Ensures full reproducibility of ecological analyses through provenance tracking, decision logging, parameter manifests, and environment documentation. Use this skill when the user mentions reproducibility, audit trails, data provenance, decision logs, file manifests, session info, renv, targets, DVC, MLflow, parameter versioning, checksums, or pipeline documentation and project initialization."
4
+ skill_version: 1.0.0
5
+ ---
6
+
7
+ # Skill: reproducible-ecology-pipeline
8
+
9
+ **Domain:** Provenance · Parameter logging · Decision audit · Checklist · Reporting
10
+ **Phase:** 1 — Foundation
11
+ **Used by:** All workflows
12
+
13
+ ---
14
+
15
+ ## Purpose
16
+
17
+ Ensures that every quantitative ecology project generates an auditable, reproducible record: all parameters, software versions, data sources, analytical decisions, and intermediate outputs are logged so the study can be independently replicated.
18
+
19
+ ---
20
+
21
+ ## When to Invoke
22
+
23
+ - At project initialisation (to set up the logging structure)
24
+ - At the end of every analytical step (to record decisions and outputs)
25
+ - Before generating a technical report or submitting results
26
+ - When the user asks about reproducibility, provenance, or audit trails
27
+
28
+ ---
29
+
30
+ ## Inputs
31
+
32
+ | Input | Format | Required |
33
+ |-------|--------|----------|
34
+ | Project directory with analytical outputs | Directory path | Yes |
35
+ | Analysis scripts | R, Python, bash | Recommended |
36
+ | Model parameter files | JSON, YAML, RData | Recommended |
37
+ | QA reports from upstream skills | Markdown, CSV | Recommended |
38
+
39
+ ---
40
+
41
+ ## Outputs
42
+
43
+ | Output | Description |
44
+ |--------|-------------|
45
+ | `reproducibility_checklist.md` | Completed checklist with pass/fail per criterion |
46
+ | `parameter_manifest.yaml` | All parameters used across all steps |
47
+ | `decision_log.md` | Chronological log of all analytical decisions |
48
+ | `software_environment.txt` | Package versions (`sessionInfo()` / `pip freeze`) |
49
+ | `data_provenance.md` | Source, version, access date for every dataset |
50
+ | `file_manifest.md` | All input/output files with checksums |
51
+
52
+ ---
53
+
54
+ ## Steps
55
+
56
+ ### 1. Project Initialisation
57
+ - Create the standard directory structure (see template)
58
+ - Initialise a Git repository (or equivalent version control)
59
+ - Set a fixed random seed for all stochastic operations; document the seed
60
+ - Create `params.yaml` as the single source of truth for all parameters
61
+
62
+ ### 2. Data Provenance Logging
63
+ - For each dataset: record institution, URL/DOI, access date, version/release, license
64
+ - Compute MD5/SHA256 checksums for all raw input files
65
+ - Store checksums in `file_manifest.md`
66
+
67
+ ### 3. Software Environment Capture
68
+ - Run `sessionInfo()` (R) or `pip freeze` + `conda list` (Python) and save to `software_environment.txt`
69
+ - Record OS, R/Python version, key package versions
70
+ - Prefer `renv` (R) or `conda environment.yaml` (Python) for environment reproducibility
71
+
72
+ ### 4. Parameter Manifest
73
+ - Centralise all analysis parameters in `params.yaml`:
74
+ - Random seed
75
+ - CRS / spatial resolution
76
+ - Train/test split ratios
77
+ - Model hyperparameters
78
+ - Thresholds (QA, significance, etc.)
79
+ - No hard-coded parameters in scripts; all values referenced from `params.yaml`
80
+
81
+ ### 5. Decision Log
82
+ - After each analytical step, append an entry to `decision_log.md`:
83
+ - Date/time
84
+ - Step name
85
+ - Decision made
86
+ - Rationale
87
+ - Output files generated
88
+
89
+ ### 6. Reproducibility Checklist
90
+ Evaluate each criterion as PASS / FAIL / N/A:
91
+
92
+ - [ ] Raw data preserved unchanged
93
+ - [ ] All input data checksums recorded
94
+ - [ ] Random seed(s) fixed and documented
95
+ - [ ] All parameters stored in `params.yaml` (no hard-coded values)
96
+ - [ ] Software environment captured
97
+ - [ ] All analytical decisions logged
98
+ - [ ] Scripts run end-to-end without manual intervention
99
+ - [ ] Intermediate outputs versioned or hashable
100
+ - [ ] Final outputs match those in the report
101
+
102
+ ### 7. Pre-report Audit
103
+ - Re-run the full pipeline from raw data; confirm outputs match
104
+ - Cross-check all numbers in the report against the parameter manifest and output files
105
+ - Confirm all figures can be regenerated from code
106
+
107
+ ---
108
+
109
+ ## Key Decisions to Document
110
+
111
+ - Version control system and branching strategy
112
+ - Random seed value(s)
113
+ - Environment management tool (renv, conda, Docker)
114
+ - Intermediate output storage strategy (local, cloud, Zenodo)
115
+
116
+ ---
117
+
118
+ ## Tools and Libraries
119
+
120
+ **R:** `renv`, `targets`, `drake`, `sessionInfo()`
121
+ **Python:** `DVC`, `MLflow`, `conda`, `pip freeze`, `hashlib`
122
+ **General:** Git, GitHub/GitLab, Zenodo, OSF
123
+
124
+ ---
125
+
126
+ ## Resources
127
+
128
+ - `resources/reproducibility-checklist-template.md` — blank checklist
129
+ - `resources/params-yaml-template.yaml` — standard parameter manifest template
130
+ - `resources/directory-structure-template.md` — recommended project layout
131
+ - `examples/` — example decision log and provenance record
132
+
133
+ ---
134
+
135
+ ## Notes
136
+
137
+ - Reproducibility is not optional; it is a precondition for scientific validity
138
+ - Prefer `targets` (R) or `DVC` (Python) for pipeline orchestration in complex studies
139
+ - Archive raw data and final outputs to a persistent repository (Zenodo, OSF) before publication
@@ -0,0 +1,35 @@
1
+ # Example Invocation Prompts — reproducible-ecology-pipeline
2
+
3
+ ## Project Initialisation
4
+
5
+ ```
6
+ Load skill: reproducible-ecology-pipeline
7
+ Task: Initialise a reproducible project structure for a jaguar SDM study.
8
+ Project name: "jaguar-sdm-amazon"
9
+ Create: standard directory layout, params.yaml (pre-filled with SDM defaults),
10
+ decision_log.md, data_provenance.md, and run `git init`.
11
+ ```
12
+
13
+ ## Pre-report Audit
14
+
15
+ ```
16
+ Load skill: reproducible-ecology-pipeline
17
+ Task: Run a pre-submission reproducibility audit.
18
+ Project directory: /projects/cerrado-fire-risk/
19
+ 1. Complete the reproducibility checklist template.
20
+ 2. Verify all numbers in outputs/fire_risk_report_v3.md against outputs/*.csv.
21
+ 3. Capture current R session info.
22
+ 4. Generate file_manifest.md with SHA256 checksums for all files in outputs/.
23
+ ```
24
+
25
+ ## Decision Log Entry
26
+
27
+ ```
28
+ Load skill: reproducible-ecology-pipeline
29
+ Task: Add an entry to the decision log for today's predictor selection step.
30
+ Decision: Removed bio7 (VIF = 11.2, highly collinear with bio4) from the predictor set.
31
+ Kept: bio1, bio4, bio12, bio15, NDVI, slope (6 variables total, all VIF < 5).
32
+ Rationale: bio4 retained over bio7 because it captures seasonality rather than range,
33
+ more ecologically meaningful for jaguar thermoregulation.
34
+ Append to: decision_log.md
35
+ ```
@@ -0,0 +1,94 @@
1
+ # Recommended Project Directory Structure
2
+
3
+ ```
4
+ my-ecology-project/
5
+
6
+ ├── README.md ← project overview, setup instructions
7
+ ├── params.yaml ← ALL parameters; source of truth
8
+ ├── .gitignore
9
+
10
+ ├── data/
11
+ │ ├── raw/ ← NEVER modified; read-only after deposit
12
+ │ │ ├── occurrences_raw.csv
13
+ │ │ ├── predictors/ ← original rasters
14
+ │ │ └── spatial/ ← original shapefiles
15
+ │ ├── processed/ ← cleaned, validated, analysis-ready
16
+ │ │ ├── data_clean.csv
17
+ │ │ ├── points_with_env.csv
18
+ │ │ └── predictors_stack.tif
19
+ │ └── spatial/ ← derived spatial layers
20
+ │ ├── study_area.gpkg
21
+ │ └── M_area.gpkg
22
+
23
+ ├── scripts/ ← analysis scripts (numbered by order)
24
+ │ ├── 00_setup.R ← load packages, set paths, source params
25
+ │ ├── 01_data_cleaning.R
26
+ │ ├── 02_geoprocessing.R
27
+ │ ├── 03_modeling.R
28
+ │ ├── 04_validation.R
29
+ │ └── 05_figures.R
30
+
31
+ ├── models/ ← fitted model objects
32
+ │ ├── maxnet_tuned.rds
33
+ │ ├── brt_tuned.rds
34
+ │ └── ensemble_weights.csv
35
+
36
+ ├── outputs/
37
+ │ ├── figures/ ← all plots
38
+ │ ├── tables/ ← all CSV results
39
+ │ ├── maps/ ← all raster outputs
40
+ │ └── reports/ ← rendered reports
41
+
42
+ ├── logs/
43
+ │ ├── decision_log.md
44
+ │ ├── data_provenance.md
45
+ │ ├── software_environment.txt
46
+ │ ├── file_manifest.md ← checksums for all outputs
47
+ │ └── reproducibility_checklist.md
48
+
49
+ └── reports/
50
+ ├── technical_report.md ← or .Rmd / .qmd for literate programming
51
+ └── supplementary/
52
+ ```
53
+
54
+ ## Naming Conventions
55
+
56
+ - Scripts: `NN_descriptive_name.R` (numbered for execution order)
57
+ - Data files: `snake_case`, no spaces, include version or date if multiple versions
58
+ - Rasters: `variable_source_resolution_date.tif` (e.g., `ndvi_modis_1km_2023.tif`)
59
+ - Models: `algorithm_species_version.rds`
60
+ - Outputs: `metric_context_date.csv`
61
+
62
+ ## Version Control Rules
63
+
64
+ - Commit after each major step (cleaning, modeling, validation)
65
+ - Commit message format: `step: brief description` (e.g., `modeling: add BRT with spatial CV`)
66
+ - Tag the commit used for the submitted manuscript: `git tag -a v1.0 -m "manuscript submission"`
67
+ - Never commit raw data files (add to .gitignore); archive separately at Zenodo/OSF
68
+
69
+ ## .gitignore Template
70
+
71
+ ```
72
+ # Data (archive separately)
73
+ data/raw/*
74
+ !data/raw/.gitkeep
75
+
76
+ # Large model objects
77
+ models/*.rds
78
+ models/*.pkl
79
+
80
+ # Large rasters
81
+ data/**/*.tif
82
+ outputs/maps/*.tif
83
+
84
+ # Environment files
85
+ .Rhistory
86
+ .RData
87
+ __pycache__/
88
+ *.pyc
89
+ .ipynb_checkpoints/
90
+
91
+ # OS files
92
+ .DS_Store
93
+ Thumbs.db
94
+ ```
@@ -0,0 +1,84 @@
1
+ # params.yaml — Standard Parameter Manifest Template
2
+ # Copy this file to your project root and fill in all values before analysis.
3
+ # Never hard-code parameter values in scripts; always reference this file.
4
+
5
+ project:
6
+ name: "my-ecology-study"
7
+ version: "1.0.0"
8
+ created: "YYYY-MM-DD"
9
+ author: "Your Name"
10
+ repository: "https://github.com/yourrepo/yourproject"
11
+
12
+ random_seeds:
13
+ global: 42
14
+ spatial_cv: 42
15
+ background_sampling: 42
16
+ bootstrap: 42
17
+
18
+ data:
19
+ occurrence_source: "GBIF download YYYY-MM-DD"
20
+ occurrence_doi: "https://doi.org/10.15468/dl.XXXXX"
21
+ predictor_source: "WorldClim v2.1"
22
+ predictor_resolution: "2.5 arcmin"
23
+ predictor_version: "2.1"
24
+ study_area_source: "IBGE biome boundaries 2019"
25
+ baseline_period: ["2000-01-01", "2020-12-31"]
26
+
27
+ spatial:
28
+ project_crs: "EPSG:4326"
29
+ analysis_crs: "EPSG:31982"
30
+ raster_resolution_m: 1000
31
+ resampling_method: "bilinear"
32
+ study_area_buffer_km: 100
33
+
34
+ data_cleaning:
35
+ coordinate_uncertainty_max_m: 10000
36
+ spatial_thinning_distance_km: 10
37
+ duplicate_temporal_buffer_days: 7
38
+ missing_value_threshold: 0.20
39
+ taxonomy_backbone: "GBIF Backbone v2023"
40
+ coordinate_cleaner_flags:
41
+ - capitals
42
+ - centroids
43
+ - gbif
44
+ - zeros
45
+ - validity
46
+
47
+ modeling:
48
+ algorithms:
49
+ - maxnet
50
+ - brt
51
+ - random_forest
52
+ cv_method: "spatial_block"
53
+ cv_folds: 5
54
+ cv_block_size_km: 300
55
+ background_n: 10000
56
+ background_method: "random_within_M"
57
+ collinearity_vif_threshold: 5
58
+ collinearity_r_threshold: 0.70
59
+ primary_metric: "TSS"
60
+ threshold_method: "MaxTSS"
61
+ ensemble_method: "weighted_mean_TSS"
62
+
63
+ hyperparameters:
64
+ maxnet:
65
+ regularization_multiplier: [0.5, 1.0, 2.0, 3.0]
66
+ feature_classes: ["LQP", "LQPH"]
67
+ brt:
68
+ n_trees: [500, 1000, 2000]
69
+ learning_rate: [0.01, 0.001]
70
+ tree_complexity: [3, 5]
71
+ bag_fraction: 0.75
72
+ random_forest:
73
+ n_trees: 500
74
+ mtry: "auto"
75
+ min_node_size: 5
76
+
77
+ software:
78
+ r_version: "4.X.X"
79
+ key_packages:
80
+ terra: "X.X.X"
81
+ sf: "X.X.X"
82
+ biomod2: "X.X.X"
83
+ blockCV: "X.X.X"
84
+ dismo: "X.X.X"
@@ -0,0 +1,66 @@
1
+ # Reproducibility Checklist Template
2
+
3
+ Project: ___________________________
4
+ Date: ___________________________
5
+ Analyst: ___________________________
6
+
7
+ ## 1. Data Management
8
+
9
+ | Criterion | Status | Notes |
10
+ |-----------|--------|-------|
11
+ | Raw data preserved unchanged in `data/raw/` | PASS / FAIL | |
12
+ | All raw files have MD5/SHA256 checksums recorded | PASS / FAIL | |
13
+ | Data provenance (source, DOI, access date, license) documented | PASS / FAIL | |
14
+ | No manual edits to raw files | PASS / FAIL | |
15
+
16
+ ## 2. Code and Parameters
17
+
18
+ | Criterion | Status | Notes |
19
+ |-----------|--------|-------|
20
+ | All parameters in `params.yaml` (no hard-coded values in scripts) | PASS / FAIL | |
21
+ | Random seeds fixed and documented in `params.yaml` | PASS / FAIL | |
22
+ | Scripts run end-to-end without manual steps | PASS / FAIL | |
23
+ | Code version-controlled (Git) | PASS / FAIL | |
24
+ | Final commit tagged or release created | PASS / FAIL | |
25
+
26
+ ## 3. Environment
27
+
28
+ | Criterion | Status | Notes |
29
+ |-----------|--------|-------|
30
+ | `sessionInfo()` or `pip freeze` output saved | PASS / FAIL | |
31
+ | R version or Python version recorded | PASS / FAIL | |
32
+ | Package versions locked (`renv.lock` or `environment.yaml`) | PASS / FAIL | |
33
+ | OS and hardware noted | PASS / FAIL | |
34
+
35
+ ## 4. Decisions and Decisions Log
36
+
37
+ | Criterion | Status | Notes |
38
+ |-----------|--------|-------|
39
+ | `decision_log.md` updated after each major step | PASS / FAIL | |
40
+ | Rationale documented for: QA thresholds, predictor selection, CV strategy, threshold method | PASS / FAIL | |
41
+ | Deviations from planned protocol documented | PASS / FAIL | |
42
+
43
+ ## 5. Outputs
44
+
45
+ | Criterion | Status | Notes |
46
+ |-----------|--------|-------|
47
+ | All output files in `outputs/` with meaningful names | PASS / FAIL | |
48
+ | File manifest with checksums for all outputs | PASS / FAIL | |
49
+ | Figures regenerable from code | PASS / FAIL | |
50
+ | Numbers in report cross-checked against output files | PASS / FAIL | |
51
+
52
+ ## 6. Archival
53
+
54
+ | Criterion | Status | Notes |
55
+ |-----------|--------|-------|
56
+ | Raw data archived at Zenodo / OSF / institutional repository | PASS / FAIL | |
57
+ | Code archived at GitHub / GitLab with DOI | PASS / FAIL | |
58
+ | Data availability statement included in report | PASS / FAIL | |
59
+
60
+ ## Overall Assessment
61
+
62
+ - Total criteria: ___
63
+ - PASS: ___
64
+ - FAIL: ___
65
+ - N/A: ___
66
+ - **Reproducibility score:** ___/___
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env python3
2
+ # ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
3
+ # SPDX-License-Identifier: GPL-3.0-or-later
4
+
5
+ """
6
+ generate_file_manifest.py
7
+ Generate SHA256 checksums for all files in a directory and write a manifest.
8
+ Usage: python generate_file_manifest.py <directory> [output_file]
9
+ """
10
+ import logging
11
+ import sys
12
+ import hashlib
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+
16
+ SKILL_NAME = "reproducible-ecology-pipeline"
17
+ _LOG_DIR = Path("logs")
18
+ _LOG_DIR.mkdir(parents=True, exist_ok=True)
19
+ _log_file = _LOG_DIR / f"skill_{SKILL_NAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
20
+ logging.basicConfig(
21
+ level=logging.INFO,
22
+ format="[%(asctime)s] [%(levelname)s] [" + SKILL_NAME + "] %(message)s",
23
+ datefmt="%Y-%m-%d %H:%M:%S",
24
+ handlers=[
25
+ logging.StreamHandler(sys.stdout),
26
+ logging.FileHandler(_log_file, encoding="utf-8"),
27
+ ],
28
+ )
29
+ logger = logging.getLogger(SKILL_NAME)
30
+
31
+ def log_step(n: int, desc: str) -> None:
32
+ logger.info("-- STEP %d: %s", n, desc)
33
+
34
+ def log_decision(var: str, val, why: str) -> None:
35
+ logger.info("DECISION | %s = %s | %s", var, val, why)
36
+
37
+
38
+ def sha256(path: Path) -> str:
39
+ h = hashlib.sha256()
40
+ with open(path, "rb") as f:
41
+ for chunk in iter(lambda: f.read(65536), b""):
42
+ h.update(chunk)
43
+ return h.hexdigest()
44
+
45
+ def main():
46
+ target_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("outputs")
47
+ output_file = Path(sys.argv[2]) if len(sys.argv) > 2 else Path("logs/file_manifest.md")
48
+ output_file.parent.mkdir(parents=True, exist_ok=True)
49
+
50
+ log_decision("target_dir", str(target_dir),
51
+ "Directory to scan for files and compute checksums")
52
+ log_decision("output_file", str(output_file),
53
+ "Markdown manifest output path")
54
+
55
+ if not target_dir.exists():
56
+ logger.error(
57
+ "Input nao encontrado: %s\n"
58
+ " Causa provavel: passo anterior nao concluiu.\n"
59
+ " Skill anterior que deveria ter produzido este input: geoprocessing-for-ecology",
60
+ target_dir
61
+ )
62
+ sys.exit(1)
63
+
64
+ try:
65
+ log_step(1, "Discovering files in target directory")
66
+ files = sorted(f for f in target_dir.rglob("*") if f.is_file())
67
+ logger.info("Files found: %d in %s", len(files), target_dir)
68
+ if len(files) == 0:
69
+ logger.warning(
70
+ "No files found in %s. The directory may be empty or outputs not yet produced.",
71
+ target_dir
72
+ )
73
+
74
+ log_step(2, "Computing SHA256 checksums and building manifest")
75
+ lines = [
76
+ "# File Manifest",
77
+ f"Directory: `{target_dir}`",
78
+ f"Files: {len(files)}",
79
+ "",
80
+ "| File | Size (KB) | SHA256 |",
81
+ "|------|----------|--------|",
82
+ ]
83
+ for f in files:
84
+ try:
85
+ size_kb = round(f.stat().st_size / 1024, 2)
86
+ checksum = sha256(f)
87
+ rel = f.relative_to(target_dir)
88
+ lines.append(f"| `{rel}` | {size_kb} | `{checksum[:16]}...` |")
89
+ logger.info(" Checksummed: %s (%s KB)", rel, size_kb)
90
+ except (OSError, PermissionError) as e:
91
+ logger.warning("Could not process file %s: %s", f, e)
92
+
93
+ log_step(3, "Writing manifest file")
94
+ output_file.write_text("\n".join(lines))
95
+ logger.info("Manifest written: %s (%d files)", output_file, len(files))
96
+
97
+ except FileNotFoundError as e:
98
+ logger.error(
99
+ "Input file not found: %s\n"
100
+ " Expected output from: geoprocessing-for-ecology\n"
101
+ " Check that previous step completed.",
102
+ e
103
+ )
104
+ raise
105
+ except Exception as e:
106
+ logger.error("Unexpected error in generate_file_manifest: %s", e)
107
+ raise
108
+
109
+ if __name__ == "__main__":
110
+ main()
@@ -0,0 +1,53 @@
1
+ #!/bin/bash
2
+ # init_project.sh
3
+ # Initialise a reproducible ecology project structure
4
+ # Usage: bash init_project.sh <project_name>
5
+
6
+ PROJECT="${1:-my-ecology-project}"
7
+ echo "Initialising project: $PROJECT"
8
+
9
+ mkdir -p "$PROJECT"/{data/{raw,processed,spatial},models,outputs/{figures,tables,maps},reports,scripts,logs}
10
+ touch "$PROJECT/data/raw/.gitkeep"
11
+ touch "$PROJECT/logs/decision_log.md"
12
+
13
+ # Create params.yaml from template
14
+ cp "$(dirname "$0")/../resources/params-yaml-template.yaml" "$PROJECT/params.yaml" 2>/dev/null || \
15
+ echo "# params.yaml — fill in values" > "$PROJECT/params.yaml"
16
+
17
+ # Git init
18
+ cd "$PROJECT"
19
+ git init -q
20
+
21
+ # .gitignore
22
+ cat > .gitignore << 'GITIGNORE'
23
+ data/raw/*
24
+ !data/raw/.gitkeep
25
+ *.Rhistory
26
+ .Rdata
27
+ __pycache__/
28
+ *.pyc
29
+ .DS_Store
30
+ *.log
31
+ GITIGNORE
32
+
33
+ # decision_log.md header
34
+ cat > logs/decision_log.md << 'DLOG'
35
+ # Decision Log
36
+
37
+ | Date | Step | Decision | Rationale | Output files |
38
+ |------|------|----------|-----------|-------------|
39
+ DLOG
40
+
41
+ # data_provenance.md
42
+ cat > logs/data_provenance.md << 'PROV'
43
+ # Data Provenance
44
+
45
+ | Dataset | Source | Version | Access date | DOI/URL | License | Checksum |
46
+ |---------|--------|---------|-------------|---------|---------|---------|
47
+ PROV
48
+
49
+ echo "Project structure created in: $PROJECT/"
50
+ echo "Next steps:"
51
+ echo " 1. Fill in params.yaml"
52
+ echo " 2. Add raw data to $PROJECT/data/raw/"
53
+ echo " 3. Record provenance in logs/data_provenance.md"