ecological-agent-skills 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. package/AGENT_CONTEXT.md +191 -0
  2. package/CATALOG.md +329 -0
  3. package/LICENSE +692 -0
  4. package/README.md +347 -0
  5. package/bin/install.mjs +168 -0
  6. package/docs/comparison-with-alternatives.md +38 -0
  7. package/docs/global-examples-index.md +103 -0
  8. package/docs/repository-statistics.md +101 -0
  9. package/docs/theoretical-foundations.md +188 -0
  10. package/environment.yaml +106 -0
  11. package/examples/community/arctic_tundra_vegetation_example.md +247 -0
  12. package/examples/community/bird_landuse_example.md +63 -0
  13. package/examples/community/phytoplankton_reservoir_example.md +60 -0
  14. package/examples/community/reef_fish_indopacific_example.md +221 -0
  15. package/examples/impact/baci_road_example.md +57 -0
  16. package/examples/impact/ecosystem_services_atlantic_forest.md +83 -0
  17. package/examples/impact/forest_loss_borneo_timeseries_example.md +225 -0
  18. package/examples/occupancy/puma_camera_example.md +61 -0
  19. package/examples/occupancy/snow_leopard_himalayas_example.md +204 -0
  20. package/examples/reproducible/whittaker_biome_sdm_example.md +406 -0
  21. package/examples/sdm/anteater_cerrado_example.md +69 -0
  22. package/examples/sdm/jaguar_amazon_example.md +80 -0
  23. package/examples/sdm/koala_climate_change_example.md +170 -0
  24. package/examples/sdm/wolf_recolonization_europe_example.md +193 -0
  25. package/package.json +43 -0
  26. package/renv.lock +194 -0
  27. package/skills/SKILL_INDEX.json +1020 -0
  28. package/skills/acoustic-monitoring/SKILL.md +163 -0
  29. package/skills/acoustic-monitoring/examples/example-prompts.md +100 -0
  30. package/skills/acoustic-monitoring/examples/temperate_forest_birds_example.md +285 -0
  31. package/skills/acoustic-monitoring/resources/acoustic-indices-reference.md +93 -0
  32. package/skills/acoustic-monitoring/resources/soundscape-ecology-guide.md +90 -0
  33. package/skills/acoustic-monitoring/resources/species-id-tools-comparison.md +89 -0
  34. package/skills/acoustic-monitoring/scripts/batch_species_detection.py +360 -0
  35. package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.R +235 -0
  36. package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.py +374 -0
  37. package/skills/biostatistics-workbench/SKILL.md +140 -0
  38. package/skills/biostatistics-workbench/examples/example-prompts.md +39 -0
  39. package/skills/biostatistics-workbench/resources/effect-size-reference.md +81 -0
  40. package/skills/biostatistics-workbench/resources/glm-family-link-reference.md +47 -0
  41. package/skills/biostatistics-workbench/resources/test-selection-guide.md +93 -0
  42. package/skills/biostatistics-workbench/scripts/glm_pipeline.R +78 -0
  43. package/skills/biostatistics-workbench/scripts/glm_pipeline.py +210 -0
  44. package/skills/camera-trap-processing/SKILL.md +159 -0
  45. package/skills/camera-trap-processing/examples/example-prompts.md +103 -0
  46. package/skills/camera-trap-processing/examples/leopard_serengeti_example.md +231 -0
  47. package/skills/camera-trap-processing/resources/activity-patterns-reference.md +113 -0
  48. package/skills/camera-trap-processing/resources/camtrapR-workflow-guide.md +130 -0
  49. package/skills/camera-trap-processing/resources/detection-event-definition-guide.md +89 -0
  50. package/skills/camera-trap-processing/scripts/estimate_activity.R +169 -0
  51. package/skills/camera-trap-processing/scripts/process_camtrap_data.R +179 -0
  52. package/skills/camera-trap-processing/scripts/process_camtrap_data.py +192 -0
  53. package/skills/community-ecology-ordination/SKILL.md +133 -0
  54. package/skills/community-ecology-ordination/examples/example-prompts.md +35 -0
  55. package/skills/community-ecology-ordination/resources/dissimilarity-metric-guide.md +53 -0
  56. package/skills/community-ecology-ordination/resources/nmds-interpretation-guide.md +104 -0
  57. package/skills/community-ecology-ordination/scripts/__pycache__/community_analysis.cpython-311.pyc +0 -0
  58. package/skills/community-ecology-ordination/scripts/community_analysis.R +143 -0
  59. package/skills/community-ecology-ordination/scripts/community_analysis.py +231 -0
  60. package/skills/ecological-data-foundation/SKILL.md +129 -0
  61. package/skills/ecological-data-foundation/examples/example-prompts.md +40 -0
  62. package/skills/ecological-data-foundation/resources/coordinate-cleaning-flags.md +66 -0
  63. package/skills/ecological-data-foundation/resources/darwin-core-glossary.md +91 -0
  64. package/skills/ecological-data-foundation/resources/data-citation-guide.md +265 -0
  65. package/skills/ecological-data-foundation/resources/gbif-data-citation-guide.md +193 -0
  66. package/skills/ecological-data-foundation/resources/qa-checklist.md +83 -0
  67. package/skills/ecological-data-foundation/scripts/__pycache__/clean_occurrences.cpython-311.pyc +0 -0
  68. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_ebird.cpython-311.pyc +0 -0
  69. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_inat.cpython-311.pyc +0 -0
  70. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_iucn.cpython-311.pyc +0 -0
  71. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_obis.cpython-311.pyc +0 -0
  72. package/skills/ecological-data-foundation/scripts/clean_occurrences.R +230 -0
  73. package/skills/ecological-data-foundation/scripts/clean_occurrences.py +268 -0
  74. package/skills/ecological-data-foundation/scripts/download_from_ebird.R +251 -0
  75. package/skills/ecological-data-foundation/scripts/download_from_ebird.py +364 -0
  76. package/skills/ecological-data-foundation/scripts/download_from_gbif.R +315 -0
  77. package/skills/ecological-data-foundation/scripts/download_from_gbif.py +407 -0
  78. package/skills/ecological-data-foundation/scripts/download_from_inat.R +238 -0
  79. package/skills/ecological-data-foundation/scripts/download_from_inat.py +304 -0
  80. package/skills/ecological-data-foundation/scripts/download_from_iucn.R +273 -0
  81. package/skills/ecological-data-foundation/scripts/download_from_iucn.py +344 -0
  82. package/skills/ecological-data-foundation/scripts/download_from_obis.R +248 -0
  83. package/skills/ecological-data-foundation/scripts/download_from_obis.py +318 -0
  84. package/skills/ecological-impact-assessment/SKILL.md +123 -0
  85. package/skills/ecological-impact-assessment/examples/example-prompts.md +32 -0
  86. package/skills/ecological-impact-assessment/resources/baci-design-guide.md +55 -0
  87. package/skills/ecological-impact-assessment/resources/fragmentation-metrics-reference.md +86 -0
  88. package/skills/ecological-impact-assessment/resources/pressure-index-template.md +78 -0
  89. package/skills/ecological-impact-assessment/resources/study-design-guide.md +168 -0
  90. package/skills/ecological-impact-assessment/scripts/baci_analysis.R +161 -0
  91. package/skills/ecological-impact-assessment/scripts/fragmentation_analysis.py +141 -0
  92. package/skills/ecological-impact-assessment/scripts/power_analysis_baci.R +274 -0
  93. package/skills/ecosystem-services-assessment/SKILL.md +125 -0
  94. package/skills/ecosystem-services-assessment/examples/example-prompts.md +24 -0
  95. package/skills/ecosystem-services-assessment/resources/es-indicator-reference.md +45 -0
  96. package/skills/ecosystem-services-assessment/resources/invest-parameter-guide.md +86 -0
  97. package/skills/ecosystem-services-assessment/resources/rusle-coefficients.md +88 -0
  98. package/skills/ecosystem-services-assessment/scripts/__pycache__/compute_es.cpython-311.pyc +0 -0
  99. package/skills/ecosystem-services-assessment/scripts/compute_es.py +189 -0
  100. package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.R +161 -0
  101. package/skills/environmental-time-series/SKILL.md +125 -0
  102. package/skills/environmental-time-series/examples/example-prompts.md +33 -0
  103. package/skills/environmental-time-series/resources/anomaly-indices-reference.md +88 -0
  104. package/skills/environmental-time-series/resources/bfast-parameter-guide.md +69 -0
  105. package/skills/environmental-time-series/scripts/__pycache__/recovery_trajectory.cpython-311.pyc +0 -0
  106. package/skills/environmental-time-series/scripts/__pycache__/trend_analysis.cpython-311.pyc +0 -0
  107. package/skills/environmental-time-series/scripts/recovery_trajectory.R +305 -0
  108. package/skills/environmental-time-series/scripts/recovery_trajectory.py +178 -0
  109. package/skills/environmental-time-series/scripts/trend_analysis.R +192 -0
  110. package/skills/environmental-time-series/scripts/trend_analysis.py +184 -0
  111. package/skills/geoprocessing-for-ecology/SKILL.md +123 -0
  112. package/skills/geoprocessing-for-ecology/examples/example-prompts.md +32 -0
  113. package/skills/geoprocessing-for-ecology/resources/crs-reference.md +62 -0
  114. package/skills/geoprocessing-for-ecology/resources/global-predictor-sources.md +331 -0
  115. package/skills/geoprocessing-for-ecology/resources/resampling-methods.md +57 -0
  116. package/skills/geoprocessing-for-ecology/scripts/__pycache__/download_predictors.cpython-311.pyc +0 -0
  117. package/skills/geoprocessing-for-ecology/scripts/download_predictors.R +239 -0
  118. package/skills/geoprocessing-for-ecology/scripts/download_predictors.py +379 -0
  119. package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.R +224 -0
  120. package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.py +172 -0
  121. package/skills/landscape-connectivity/SKILL.md +170 -0
  122. package/skills/landscape-connectivity/examples/example-prompts.md +96 -0
  123. package/skills/landscape-connectivity/examples/jaguar_mesoamerica_corridor_example.md +271 -0
  124. package/skills/landscape-connectivity/resources/circuitscape-parameter-guide.md +155 -0
  125. package/skills/landscape-connectivity/resources/graph-theory-for-ecology.md +134 -0
  126. package/skills/landscape-connectivity/resources/resistance-surface-guide.md +141 -0
  127. package/skills/landscape-connectivity/scripts/connectivity_analysis.py +387 -0
  128. package/skills/landscape-connectivity/scripts/connectivity_metrics.R +274 -0
  129. package/skills/landscape-connectivity/scripts/resistance_surface.R +239 -0
  130. package/skills/model-validation-and-uncertainty/SKILL.md +131 -0
  131. package/skills/model-validation-and-uncertainty/examples/example-prompts.md +30 -0
  132. package/skills/model-validation-and-uncertainty/resources/extrapolation-risk-guide.md +236 -0
  133. package/skills/model-validation-and-uncertainty/resources/metric-selection-guide.md +52 -0
  134. package/skills/model-validation-and-uncertainty/resources/threshold-selection-guide.md +64 -0
  135. package/skills/model-validation-and-uncertainty/scripts/__pycache__/validate_model.cpython-311.pyc +0 -0
  136. package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.R +315 -0
  137. package/skills/model-validation-and-uncertainty/scripts/validate_model.py +226 -0
  138. package/skills/model-validation-and-uncertainty/scripts/validate_sdm.R +162 -0
  139. package/skills/occupancy-and-detection/SKILL.md +126 -0
  140. package/skills/occupancy-and-detection/examples/example-prompts.md +33 -0
  141. package/skills/occupancy-and-detection/resources/detection-history-format.md +100 -0
  142. package/skills/occupancy-and-detection/resources/occupancy-study-design.md +47 -0
  143. package/skills/occupancy-and-detection/scripts/__pycache__/occupancy_analysis.cpython-311.pyc +0 -0
  144. package/skills/occupancy-and-detection/scripts/occupancy_analysis.R +160 -0
  145. package/skills/occupancy-and-detection/scripts/occupancy_analysis.py +159 -0
  146. package/skills/population-viability-analysis/SKILL.md +161 -0
  147. package/skills/population-viability-analysis/examples/african_elephant_pva_example.md +266 -0
  148. package/skills/population-viability-analysis/examples/example-prompts.md +95 -0
  149. package/skills/population-viability-analysis/resources/extinction-risk-thresholds.md +128 -0
  150. package/skills/population-viability-analysis/resources/matrix-model-guide.md +139 -0
  151. package/skills/population-viability-analysis/resources/sensitivity-elasticity-reference.md +182 -0
  152. package/skills/population-viability-analysis/scripts/matrix_pva.R +258 -0
  153. package/skills/population-viability-analysis/scripts/pva_analysis.py +442 -0
  154. package/skills/population-viability-analysis/scripts/stochastic_pva.R +353 -0
  155. package/skills/predictive-modeling-best-practices/SKILL.md +136 -0
  156. package/skills/predictive-modeling-best-practices/examples/example-prompts.md +58 -0
  157. package/skills/predictive-modeling-best-practices/resources/collinearity-decision-tree.md +65 -0
  158. package/skills/predictive-modeling-best-practices/resources/sampling-bias-correction.md +267 -0
  159. package/skills/predictive-modeling-best-practices/resources/spatial-cv-guide.md +73 -0
  160. package/skills/predictive-modeling-best-practices/scripts/__pycache__/spatial_cv.cpython-311.pyc +0 -0
  161. package/skills/predictive-modeling-best-practices/scripts/collinearity_check.R +112 -0
  162. package/skills/predictive-modeling-best-practices/scripts/spatial_cv.py +182 -0
  163. package/skills/reproducible-ecology-pipeline/SKILL.md +139 -0
  164. package/skills/reproducible-ecology-pipeline/examples/example-prompts.md +35 -0
  165. package/skills/reproducible-ecology-pipeline/resources/directory-structure-template.md +94 -0
  166. package/skills/reproducible-ecology-pipeline/resources/params-yaml-template.yaml +84 -0
  167. package/skills/reproducible-ecology-pipeline/resources/reproducibility-checklist-template.md +66 -0
  168. package/skills/reproducible-ecology-pipeline/scripts/generate_file_manifest.py +110 -0
  169. package/skills/reproducible-ecology-pipeline/scripts/init_project.sh +53 -0
  170. package/skills/spatial-prioritization/SKILL.md +162 -0
  171. package/skills/spatial-prioritization/examples/biodiversity_hotspot_prioritization_example.md +289 -0
  172. package/skills/spatial-prioritization/examples/example-prompts.md +93 -0
  173. package/skills/spatial-prioritization/resources/cost-surface-reference.md +130 -0
  174. package/skills/spatial-prioritization/resources/marxan-vs-prioritizr-comparison.md +125 -0
  175. package/skills/spatial-prioritization/resources/prioritizr-formulation-guide.md +188 -0
  176. package/skills/spatial-prioritization/resources/representation-targets-guide.md +186 -0
  177. package/skills/spatial-prioritization/scripts/prioritization_sensitivity.R +320 -0
  178. package/skills/spatial-prioritization/scripts/run_prioritization.R +336 -0
  179. package/skills/species-distribution-modeling/SKILL.md +139 -0
  180. package/skills/species-distribution-modeling/examples/example-prompts.md +36 -0
  181. package/skills/species-distribution-modeling/resources/algorithm-comparison.md +25 -0
  182. package/skills/species-distribution-modeling/resources/calibration-area-guide.md +71 -0
  183. package/skills/species-distribution-modeling/resources/climate-scenario-preparation.md +170 -0
  184. package/skills/species-distribution-modeling/resources/maxent-calibration-guide.md +211 -0
  185. package/skills/species-distribution-modeling/resources/sdm-checklist.md +37 -0
  186. package/skills/species-distribution-modeling/scripts/predict_distribution.R +236 -0
  187. package/skills/species-distribution-modeling/scripts/predict_distribution.py +286 -0
  188. package/skills/species-distribution-modeling/scripts/prepare_future_layers.R +351 -0
  189. package/skills/species-distribution-modeling/scripts/project_scenarios.R +220 -0
  190. package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +99 -0
  191. package/skills/species-distribution-modeling/scripts/sdm_pipeline.py +318 -0
  192. package/skills/species-distribution-modeling/scripts/tune_maxnet.R +344 -0
  193. package/templates/SKILL_TEMPLATE.md +225 -0
  194. package/templates/checklists/data-submission-checklist.md +38 -0
  195. package/templates/checklists/post-analysis-checklist.md +55 -0
  196. package/templates/checklists/pre-analysis-checklist.md +31 -0
  197. package/templates/prompts/debug-skill.md +47 -0
  198. package/templates/prompts/invoke-skill.md +34 -0
  199. package/templates/prompts/invoke-workflow.md +45 -0
  200. package/templates/reports/technical-report-template.md +80 -0
  201. package/templates/scripts/logger_setup.R +79 -0
  202. package/templates/scripts/logger_setup.py +119 -0
  203. package/templates/scripts/params_loader.R +28 -0
  204. package/templates/scripts/params_loader.py +38 -0
  205. package/workflows/analyze-community-structure/WORKFLOW.md +72 -0
  206. package/workflows/analyze-environmental-change/WORKFLOW.md +73 -0
  207. package/workflows/assess-ecological-impact/WORKFLOW.md +75 -0
  208. package/workflows/assess-ecosystem-services/WORKFLOW.md +68 -0
  209. package/workflows/assess-landscape-connectivity/WORKFLOW.md +84 -0
  210. package/workflows/build-fire-risk-map/WORKFLOW.md +79 -0
  211. package/workflows/produce-technical-report/WORKFLOW.md +113 -0
  212. package/workflows/run-camera-trap-occupancy/WORKFLOW.md +87 -0
  213. package/workflows/run-conservation-prioritization/WORKFLOW.md +89 -0
  214. package/workflows/run-multispecies-screening/WORKFLOW.md +197 -0
  215. package/workflows/run-occupancy-analysis/WORKFLOW.md +74 -0
  216. package/workflows/run-population-viability/WORKFLOW.md +90 -0
  217. package/workflows/run-sdm-study/WORKFLOW.md +99 -0
@@ -0,0 +1,140 @@
1
+ ---
2
+ name: biostatistics-workbench
3
+ description: "Runs frequentist statistical analyses including GLMs, GLMMs, model selection, and assumption diagnostics for ecological data. Use this skill when the user needs statistical tests, linear or mixed models, ANOVA, effect sizes, confidence intervals, AIC-based model selection, residual diagnostics, overdispersion checks, regression analysis, p-value interpretation, normality tests, or hypothesis testing on ecological datasets."
4
+ skill_version: 1.0.0
5
+ ---
6
+
7
+ # Skill: biostatistics-workbench
8
+
9
+ **Domain:** Hypothesis testing · GLM/GLMM · Assumptions · Effect sizes · CIs
10
+ **Phase:** 1 — Foundation
11
+ **Used by:** assess-ecological-impact, analyze-community-structure, run-occupancy-analysis, assess-ecosystem-services
12
+
13
+ ---
14
+
15
+ ## Purpose
16
+
17
+ Guides the agent through the selection, execution, and interpretation of statistical methods appropriate for ecological data. Covers classical tests, generalised linear models, mixed models, assumption diagnostics, effect size estimation, and model selection.
18
+
19
+ ---
20
+
21
+ ## When to Invoke
22
+
23
+ - Choosing a statistical test for a specific research question and data structure
24
+ - Fitting GLM or GLMM with ecological response variables
25
+ - Checking distributional assumptions (normality, homoscedasticity, independence)
26
+ - Reporting effect sizes and confidence intervals
27
+ - Performing model selection (AIC, LRT, cross-validation)
28
+
29
+ ---
30
+
31
+ ## Inputs
32
+
33
+ | Input | Format | Required |
34
+ |-------|--------|----------|
35
+ | Response variable(s) | Numeric or count vector | Yes |
36
+ | Predictor variable(s) | Numeric, categorical, or both | Yes |
37
+ | Random effects structure (if any) | Description or formula | Conditional |
38
+ | Study design description | Text | Recommended |
39
+
40
+ ---
41
+
42
+ ## Outputs
43
+
44
+ | Output | Description |
45
+ |--------|-------------|
46
+ | `model_summary.txt` | Full model output (coefficients, SE, z/t, p) |
47
+ | `model_selection_table.csv` | AIC/BIC/ΔAIC comparison across candidate models |
48
+ | `assumption_diagnostics/` | Residual plots, QQ plots, variance inflation factors |
49
+ | `effect_sizes.csv` | Effect size estimates with 95% CIs |
50
+ | `stats_report.md` | Plain-language interpretation of results |
51
+
52
+ ---
53
+
54
+ ## Steps
55
+
56
+ ### 1. Define the Research Question and Data Structure
57
+ - Clarify the response variable type: continuous, count, binary, proportion, ordinal
58
+ - Clarify the predictor types: fixed categorical, fixed continuous, random grouping
59
+ - Identify the sampling design: independent, nested, repeated measures, spatial
60
+
61
+ ### 2. Select the Appropriate Method
62
+
63
+ | Response | Distribution | Recommended model |
64
+ |----------|-------------|------------------|
65
+ | Continuous, normal | Gaussian | LM / LMM |
66
+ | Continuous, non-normal | Log-normal, Gamma | GLM Gamma / LM on log |
67
+ | Count, no excess zeros | Poisson | GLM Poisson |
68
+ | Count, overdispersed | Negative binomial | GLM NB |
69
+ | Count, zero-inflated | ZIP / ZINB | Zero-inflated model |
70
+ | Binary (0/1) | Binomial | GLM logistic |
71
+ | Proportion (0–1) | Beta | Beta regression |
72
+ | Ordinal | Ordered | Proportional odds model |
73
+
74
+ ### 3. Check Assumptions Before Fitting
75
+ - Collinearity: compute VIF for all predictors (flag VIF > 5; critical > 10)
76
+ - Sample size adequacy: events-per-variable rule (EPV ≥ 10 for logistic)
77
+ - Independence: confirm no pseudoreplication; identify random effects structure
78
+
79
+ ### 4. Fit Model(s)
80
+ - Fit the global model first
81
+ - Fit a set of candidate models based on a priori hypotheses
82
+ - Avoid purely data-driven stepwise selection; document candidate model rationale
83
+
84
+ ### 5. Check Assumptions After Fitting
85
+ - Residual plots (Pearson, deviance, randomised quantile residuals for GLMs)
86
+ - QQ plot of residuals
87
+ - Residuals vs fitted values
88
+ - Scale-location plot for heteroscedasticity
89
+ - Cook's distance for influential observations
90
+
91
+ ### 6. Model Selection
92
+ - Compute AIC/AICc/BIC for all candidate models
93
+ - Report ΔAIC and Akaike weights
94
+ - Use LRT for nested model comparison
95
+ - Avoid selecting models based on p-value alone
96
+
97
+ ### 7. Report Effect Sizes and CIs
98
+ - Report standardised coefficients (βstd) for comparability
99
+ - Report 95% CI for all estimates (profile likelihood preferred over Wald for GLMs)
100
+ - Report R²m and R²c for LMMs (marginal and conditional)
101
+
102
+ ### 8. Generate Outputs
103
+ - Write model summary to `model_summary.txt`
104
+ - Write model selection table to `model_selection_table.csv`
105
+ - Save all diagnostic plots to `assumption_diagnostics/`
106
+ - Write `stats_report.md` with plain-language interpretation
107
+
108
+ ---
109
+
110
+ ## Key Decisions to Document
111
+
112
+ - Response variable distribution and link function
113
+ - Random effects structure and rationale
114
+ - Candidate model set and justification
115
+ - Model selection criterion used
116
+ - Effect size metric chosen
117
+
118
+ ---
119
+
120
+ ## Tools and Libraries
121
+
122
+ **R:** `lme4`, `glmmTMB`, `MuMIn`, `DHARMa`, `emmeans`, `performance`, `effectsize`
123
+ **Python:** `statsmodels`, `pymer4`, `pingouin`, `scipy.stats`
124
+
125
+ ---
126
+
127
+ ## Resources
128
+
129
+ - `resources/test-selection-guide.md` — flowchart for test selection
130
+ - `resources/glm-family-link-reference.md` — GLM family and link function guide
131
+ - `resources/effect-size-reference.md` — which effect size to report per test
132
+ - `examples/` — worked GLM and GLMM examples
133
+
134
+ ---
135
+
136
+ ## Notes
137
+
138
+ - Never report p-values without effect sizes
139
+ - Multiple comparisons: apply Bonferroni or FDR correction when testing many hypotheses
140
+ - Overdispersion in Poisson models must always be checked (dispersion parameter)
@@ -0,0 +1,39 @@
1
+ # Example Invocation Prompts — biostatistics-workbench
2
+
3
+ ## GLM for Species Richness
4
+
5
+ ```
6
+ Load skill: biostatistics-workbench
7
+ Task: I have species richness counts (count variable, integers) at 80 plots
8
+ across three vegetation types (factor: "forest", "savanna", "wetland").
9
+ Additional predictors: elevation (m), precipitation (mm/year), distance_to_edge (m).
10
+ Fit an appropriate GLM, check assumptions, select the best model by AICc,
11
+ and report effect sizes with 95% CIs.
12
+ Data file: data/processed/richness_data.csv
13
+ ```
14
+
15
+ ## BACI Mixed Model
16
+
17
+ ```
18
+ Load skill: biostatistics-workbench
19
+ Task: BACI analysis of bird abundance before and after road construction.
20
+ Response: bird_abundance (count)
21
+ Fixed effects: period (before/after), treatment (control/impact), period:treatment interaction
22
+ Random effects: site (repeated measures)
23
+ Data: data/baci_birds.csv
24
+ Report: BACI interaction coefficient, 95% CI, p-value, Cohen's d.
25
+ ```
26
+
27
+ ## Assumption Checking
28
+
29
+ ```
30
+ Load skill: biostatistics-workbench
31
+ Task: I fitted a Poisson GLM (model object saved in models/glm_poisson.rds).
32
+ Run a full assumption check using DHARMa:
33
+ - Uniformity test
34
+ - Dispersion test
35
+ - Zero-inflation test
36
+ - Outlier test
37
+ Save all diagnostic plots to outputs/diagnostics/.
38
+ If overdispersion detected, refit with negative binomial and compare AIC.
39
+ ```
@@ -0,0 +1,81 @@
1
+ # Effect Size Reference for Ecological Statistics
2
+
3
+ Effect sizes quantify the **magnitude** of an effect independently of sample size. Always report them alongside p-values.
4
+
5
+ ## Continuous Response (t-tests, ANOVA, LMM)
6
+
7
+ | Measure | Formula | Interpretation | R function |
8
+ |---------|---------|---------------|-----------|
9
+ | Cohen's d | (μ₁ − μ₂) / SD_pooled | 0.2 = small, 0.5 = medium, 0.8 = large | `effectsize::cohens_d()` |
10
+ | Hedges' g | Cohen's d × correction factor | Preferred for unequal n | `effectsize::hedges_g()` |
11
+ | η² (eta-squared) | SS_effect / SS_total | % variance explained by factor | `effectsize::eta_squared()` |
12
+ | ω² (omega-squared) | Bias-corrected η² | Preferred over η² for small n | `effectsize::omega_squared()` |
13
+ | partial η² | SS_effect / (SS_effect + SS_residual) | For multiple predictors | `effectsize::eta_squared(partial=TRUE)` |
14
+ | R² / R²_adj | Model variance explained | 0.01 = tiny, 0.09 = small, 0.25 = large | `performance::r2()` |
15
+ | R²_m, R²_c | Marginal / conditional R² for LMMs | R²_m = fixed only; R²_c = fixed + random | `performance::r2()` |
16
+
17
+ ## Binary Response (logistic regression, GLM binomial)
18
+
19
+ | Measure | Interpretation | How to compute |
20
+ |---------|---------------|----------------|
21
+ | Odds ratio (OR) | exp(β); OR = 1 means no effect | `exp(coef(model))` |
22
+ | OR 95% CI | exp(confint(model)) | `exp(confint(model))` |
23
+ | Risk ratio (RR) | More interpretable than OR when prevalence is high | Compute from marginal predictions |
24
+ | Cohen's h | h = 2arcsin(√p₁) − 2arcsin(√p₂) | `effectsize::cohens_h()` |
25
+ | Cramér's V | For chi-square tests; 0–1 | `effectsize::cramers_v()` |
26
+
27
+ ## Count Response (Poisson, negative binomial)
28
+
29
+ | Measure | Interpretation |
30
+ |---------|---------------|
31
+ | Rate ratio (IRR) | exp(β); multiplicative effect on count |
32
+ | % change | (exp(β) − 1) × 100% |
33
+ | McFadden's pseudo-R² | 1 − LL_model/LL_null; > 0.2 = good fit |
34
+
35
+ ## Non-Parametric Tests
36
+
37
+ | Test | Effect size | Measure | Range |
38
+ |------|------------|---------|-------|
39
+ | Mann-Whitney U | Rank-biserial r | r = 1 − 2U/(n₁×n₂) | -1 to 1 |
40
+ | Wilcoxon signed-rank | r = Z/√N | | -1 to 1 |
41
+ | Kruskal-Wallis | η²_H = (H − k + 1) / (n − k) | | 0 to 1 |
42
+ | Spearman | ρ (rho) | | -1 to 1 |
43
+
44
+ ## Multivariate (PERMANOVA)
45
+
46
+ | Measure | Interpretation |
47
+ |---------|---------------|
48
+ | R² from adonis2 | Proportion of dissimilarity explained by the factor |
49
+ | Partial R² | For models with multiple terms |
50
+
51
+ **Note:** PERMANOVA R² tends to be modest even for ecologically strong effects; R² = 0.15–0.30 is typical and meaningful in community ecology.
52
+
53
+ ## Benchmarks Summary
54
+
55
+ | Size | d | r | R² | OR |
56
+ |------|---|---|----|----|
57
+ | Negligible | < 0.2 | < 0.10 | < 0.01 | < 1.5 |
58
+ | Small | 0.2–0.5 | 0.10–0.30 | 0.01–0.09 | 1.5–3.0 |
59
+ | Medium | 0.5–0.8 | 0.30–0.50 | 0.09–0.25 | 3.0–6.0 |
60
+ | Large | > 0.8 | > 0.50 | > 0.25 | > 6.0 |
61
+
62
+ ## R Package: effectsize
63
+
64
+ ```r
65
+ library(effectsize)
66
+ library(lme4)
67
+
68
+ # From a t-test
69
+ t_result <- t.test(group1, group2)
70
+ cohens_d(group1, group2)
71
+
72
+ # From a linear model
73
+ m <- lm(richness ~ land_use + elevation, data = dat)
74
+ eta_squared(m) # η² for each term
75
+ omega_squared(m) # bias-corrected
76
+
77
+ # From a GLM
78
+ m_glm <- glm(presence ~ forest_cover, family = binomial, data = dat)
79
+ exp(coef(m_glm)) # odds ratios
80
+ exp(confint(m_glm)) # 95% CI for ORs
81
+ ```
@@ -0,0 +1,47 @@
1
+ # GLM Family and Link Function Reference
2
+
3
+ ## Common Families and Links
4
+
5
+ | Family | Default link | Canonical use | R syntax |
6
+ |--------|-------------|---------------|---------|
7
+ | `gaussian` | identity | Continuous, normal | `glm(y ~ x, family = gaussian)` |
8
+ | `Gamma` | inverse | Positive continuous, right-skewed | `glm(y ~ x, family = Gamma(link="log"))` |
9
+ | `inverse.gaussian` | 1/mu² | Positive continuous, extreme skew | `glm(y ~ x, family = inverse.gaussian)` |
10
+ | `binomial` | logit | Binary (0/1), proportion (cbind) | `glm(y ~ x, family = binomial)` |
11
+ | `quasibinomial` | logit | Binary, overdispersed | `glm(y ~ x, family = quasibinomial)` |
12
+ | `poisson` | log | Count data | `glm(y ~ x, family = poisson)` |
13
+ | `quasipoisson` | log | Count, overdispersed | `glm(y ~ x, family = quasipoisson)` |
14
+ | `nbinom2` (glmmTMB) | log | Count, overdispersed (NB) | `glmmTMB(y ~ x, family = nbinom2)` |
15
+ | `tweedie` | log | Zero-inflated continuous (precipitation) | `glmmTMB(y ~ x, family = tweedie)` |
16
+ | `beta_family` | logit | Proportion (0,1) exclusive | `glmmTMB(y ~ x, family = beta_family)` |
17
+ | `ordbeta` | logit | Proportion including 0 and 1 | `glmmTMB(y ~ x, family = ordbeta)` |
18
+ | `truncated_poisson` | log | Count with no zeros | `glmmTMB(y ~ x, family = truncated_poisson)` |
19
+
20
+ ## Checking Overdispersion (Poisson)
21
+
22
+ ```r
23
+ # After fitting a Poisson GLM:
24
+ dispersion_ratio <- sum(residuals(model, type = "pearson")^2) / df.residual(model)
25
+ # If dispersion_ratio >> 1 (>1.5), switch to quasipoisson or negative binomial
26
+ ```
27
+
28
+ ## Checking Distributional Assumptions with DHARMa
29
+
30
+ ```r
31
+ library(DHARMa)
32
+ sim_res <- simulateResiduals(fittedModel = model, plot = TRUE)
33
+ # Provides: QQ plot, residuals vs fitted, uniformity test
34
+ testDispersion(sim_res)
35
+ testZeroInflation(sim_res)
36
+ testOutliers(sim_res)
37
+ ```
38
+
39
+ ## Link Function Interpretation
40
+
41
+ | Link | Function | Interpretation of coefficient |
42
+ |------|----------|-------------------------------|
43
+ | identity | η = μ | 1-unit change in x → β change in y (additive) |
44
+ | log | η = log(μ) | 1-unit change in x → exp(β) multiplicative change in y |
45
+ | logit | η = log(μ/(1−μ)) | 1-unit change in x → exp(β) odds ratio |
46
+ | inverse | η = 1/μ | Less common; log link usually preferred for Gamma |
47
+ | sqrt | η = √μ | Intermediate between identity and log |
@@ -0,0 +1,93 @@
1
+ # Statistical Test Selection Guide
2
+
3
+ ## Step 1 — What is your goal?
4
+
5
+ ```
6
+ Compare groups → Step 2
7
+ Assess relationship → Step 5
8
+ Predict a response → Step 7
9
+ Describe distribution → Step 9
10
+ ```
11
+
12
+ ## Step 2 — Comparing Groups: How many?
13
+
14
+ ```
15
+ 2 groups → Step 3
16
+ 3+ groups → Step 4
17
+ ```
18
+
19
+ ## Step 3 — Comparing 2 Groups
20
+
21
+ | Data type | Independent? | Parametric? | Test |
22
+ |-----------|-------------|-------------|------|
23
+ | Continuous | Yes | Yes (normal, equal var) | t-test (Student's) |
24
+ | Continuous | Yes | Yes (normal, unequal var) | t-test (Welch's) |
25
+ | Continuous | Yes | No | Mann-Whitney U |
26
+ | Continuous | No (paired) | Yes | Paired t-test |
27
+ | Continuous | No (paired) | No | Wilcoxon signed-rank |
28
+ | Binary proportion | Yes | — | Chi-square / Fisher's exact |
29
+ | Count | Yes | — | Poisson test |
30
+
31
+ ## Step 4 — Comparing 3+ Groups
32
+
33
+ | Data type | Design | Test | Post-hoc |
34
+ |-----------|--------|------|---------|
35
+ | Continuous, normal, equal var | 1 factor | One-way ANOVA | Tukey HSD |
36
+ | Continuous, non-normal | 1 factor | Kruskal-Wallis | Dunn's |
37
+ | Continuous | 2 factors | Two-way ANOVA | Tukey / emmeans |
38
+ | Continuous, repeated measures | 1 factor | Repeated-measures ANOVA | emmeans |
39
+ | Count data | Groups | GLM Poisson | emmeans on log scale |
40
+ | Binary | Groups | GLM Binomial | emmeans |
41
+
42
+ ## Step 5 — Association / Correlation
43
+
44
+ | Data type | Test | Coefficient |
45
+ |-----------|------|-------------|
46
+ | Continuous vs continuous, linear | Pearson | r |
47
+ | Continuous vs continuous, non-linear / ranks | Spearman | ρ |
48
+ | Ordinal vs ordinal | Kendall | τ |
49
+ | Binary vs binary | Chi-square | φ (phi) |
50
+ | Continuous vs binary | Point-biserial | r_pb |
51
+
52
+ ## Step 6 — Multivariate Association
53
+
54
+ | Goal | Method |
55
+ |------|--------|
56
+ | Community similarity | Bray-Curtis / Jaccard dissimilarity |
57
+ | Community differences between groups | PERMANOVA |
58
+ | Gradient ordination | NMDS, PCA, RDA |
59
+ | Taxon association network | Co-occurrence analysis |
60
+
61
+ ## Step 7 — Predicting a Response
62
+
63
+ | Response type | Distribution | Model |
64
+ |--------------|-------------|-------|
65
+ | Continuous, normal | Gaussian | LM / LMM |
66
+ | Continuous, positive skew | Log-normal or Gamma | GLM Gamma |
67
+ | Count, no excess zeros | Poisson | GLM Poisson |
68
+ | Count, overdispersed | Negative binomial | GLM NB (glmmTMB) |
69
+ | Count, zero-inflated | Zero-inflated Poisson/NB | glmmTMB |
70
+ | Binary (presence/absence) | Binomial | GLM Logistic |
71
+ | Proportion (0–1) | Beta | betareg |
72
+ | Ordinal | Ordered logistic | polr / clm |
73
+ | Multivariate community | — | RDA, CCA, mvabund |
74
+
75
+ ## Step 8 — Random Effects?
76
+
77
+ Add random effects if:
78
+ - Data are nested (plots within sites within regions)
79
+ - Data are repeated measures on the same individual/site
80
+ - Groups are a random sample of a larger population
81
+
82
+ Use `lme4::lmer()` (Gaussian) or `lme4::glmer()` / `glmmTMB::glmmTMB()` (non-Gaussian).
83
+
84
+ ## Step 9 — Normality Tests
85
+
86
+ | Test | Use | Package |
87
+ |------|-----|---------|
88
+ | Shapiro-Wilk | n < 50 (most powerful for small n) | base R `shapiro.test()` |
89
+ | Kolmogorov-Smirnov | Large samples | base R `ks.test()` |
90
+ | Lilliefors | Large samples, unknown μ/σ | `nortest::lillie.test()` |
91
+ | Q-Q plot | Visual, any n | base R `qqnorm()` |
92
+
93
+ **Note:** Normality tests are sensitive to n. For large datasets, trivial departures become significant. Always inspect Q-Q plots in addition to test p-values.
@@ -0,0 +1,78 @@
1
+ # ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
2
+ # SPDX-License-Identifier: GPL-3.0-or-later
3
+
4
+ # Usage: Rscript glm_pipeline.R <data.csv> <response_col> <predictor_cols> <output_dir> [family]
5
+ # Fit candidate GLMs, check assumptions, model selection
6
+ # Usage: source this script or adapt interactively
7
+ # Requires: glmmTMB, DHARMa, MuMIn, emmeans, dplyr
8
+
9
+ # ── Inline logger ─────────────────────────────────────────────────────────────
10
+ SKILL_NAME <- "biostatistics-workbench"
11
+ .log_ts <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
12
+ log_info <- function(...) message(.log_ts(), " [INFO] ", sprintf(...))
13
+ log_warn <- function(...) message(.log_ts(), " [WARN] ", sprintf(...))
14
+ log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
15
+ log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
16
+ log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
17
+ dir.create("logs", recursive=TRUE, showWarnings=FALSE)
18
+
19
+ suppressPackageStartupMessages({
20
+ library(glmmTMB)
21
+ library(DHARMa)
22
+ library(MuMIn)
23
+ library(emmeans)
24
+ library(dplyr)
25
+ })
26
+
27
+ # ── Helper: fit and summarise a GLM ───────────────────────────────────────
28
+ fit_and_check <- function(formula, data, family, label, output_dir = "outputs") {
29
+ log_info("Fitting model: %s", label)
30
+ dir.create(file.path(output_dir, "diagnostics"), recursive = TRUE, showWarnings = FALSE)
31
+
32
+ tryCatch({
33
+ m <- glmmTMB(formula, data = data, family = family,
34
+ control = glmmTMBControl(optimizer = optim, optArgs = list(method = "BFGS")))
35
+ log_info("Model %s converged. AIC = %.2f", label, AIC(m))
36
+ log_info(paste(capture.output(summary(m)), collapse = "\n"))
37
+
38
+ log_step(1, sprintf("DHARMa residual diagnostics for %s", label))
39
+ sim_res <- simulateResiduals(m, plot = FALSE, n = 500)
40
+ png(file.path(output_dir, "diagnostics", paste0(label, "_dharma.png")),
41
+ width = 1200, height = 600, res = 150)
42
+ plot(sim_res, main = label)
43
+ dev.off()
44
+ log_info("DHARMa diagnostic plot saved for %s", label)
45
+
46
+ list(model = m, label = label, AIC = AIC(m))
47
+ }, error = function(e) {
48
+ log_error(
49
+ "Falha em fit_and_check [%s]: %s\nCausa provavel: convergencia ou dados insuficientes para a familia escolhida\nVerifique: formula, familia de distribuicao, e dados de entrada\nSkill anterior: data-cleaning",
50
+ label, conditionMessage(e)
51
+ )
52
+ stop(e)
53
+ })
54
+ }
55
+
56
+ # ── Example usage ─────────────────────────────────────────────────────────
57
+ # Uncomment and adapt:
58
+ #
59
+ # dat <- read.csv("data/processed/richness_data.csv")
60
+ #
61
+ # candidates <- list(
62
+ # fit_and_check(richness ~ vegetation_type + elevation, dat, poisson(), "m1_poisson"),
63
+ # fit_and_check(richness ~ vegetation_type + elevation, dat, nbinom2(), "m2_nbinom"),
64
+ # fit_and_check(richness ~ vegetation_type + elevation + precipitation, dat, nbinom2(), "m3_nbinom_full")
65
+ # )
66
+ #
67
+ # # Model selection table
68
+ # aic_table <- do.call(rbind, lapply(candidates, function(x) data.frame(
69
+ # model = x$label, AIC = x$AIC
70
+ # ))) |> arrange(AIC) |> mutate(deltaAIC = AIC - min(AIC))
71
+ # print(aic_table)
72
+ # write.csv(aic_table, "outputs/model_selection_table.csv", row.names = FALSE)
73
+ #
74
+ # # Best model effects
75
+ # best <- candidates[[which.min(sapply(candidates, function(x) x$AIC))]]$model
76
+ # em <- emmeans(best, ~ vegetation_type)
77
+ # print(em)
78
+ log_info("glm_pipeline.R loaded. Adapt the example usage section for your data.")