ecological-agent-skills 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. package/AGENT_CONTEXT.md +191 -0
  2. package/CATALOG.md +329 -0
  3. package/LICENSE +692 -0
  4. package/README.md +347 -0
  5. package/bin/install.mjs +168 -0
  6. package/docs/comparison-with-alternatives.md +38 -0
  7. package/docs/global-examples-index.md +103 -0
  8. package/docs/repository-statistics.md +101 -0
  9. package/docs/theoretical-foundations.md +188 -0
  10. package/environment.yaml +106 -0
  11. package/examples/community/arctic_tundra_vegetation_example.md +247 -0
  12. package/examples/community/bird_landuse_example.md +63 -0
  13. package/examples/community/phytoplankton_reservoir_example.md +60 -0
  14. package/examples/community/reef_fish_indopacific_example.md +221 -0
  15. package/examples/impact/baci_road_example.md +57 -0
  16. package/examples/impact/ecosystem_services_atlantic_forest.md +83 -0
  17. package/examples/impact/forest_loss_borneo_timeseries_example.md +225 -0
  18. package/examples/occupancy/puma_camera_example.md +61 -0
  19. package/examples/occupancy/snow_leopard_himalayas_example.md +204 -0
  20. package/examples/reproducible/whittaker_biome_sdm_example.md +406 -0
  21. package/examples/sdm/anteater_cerrado_example.md +69 -0
  22. package/examples/sdm/jaguar_amazon_example.md +80 -0
  23. package/examples/sdm/koala_climate_change_example.md +170 -0
  24. package/examples/sdm/wolf_recolonization_europe_example.md +193 -0
  25. package/package.json +43 -0
  26. package/renv.lock +194 -0
  27. package/skills/SKILL_INDEX.json +1020 -0
  28. package/skills/acoustic-monitoring/SKILL.md +163 -0
  29. package/skills/acoustic-monitoring/examples/example-prompts.md +100 -0
  30. package/skills/acoustic-monitoring/examples/temperate_forest_birds_example.md +285 -0
  31. package/skills/acoustic-monitoring/resources/acoustic-indices-reference.md +93 -0
  32. package/skills/acoustic-monitoring/resources/soundscape-ecology-guide.md +90 -0
  33. package/skills/acoustic-monitoring/resources/species-id-tools-comparison.md +89 -0
  34. package/skills/acoustic-monitoring/scripts/batch_species_detection.py +360 -0
  35. package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.R +235 -0
  36. package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.py +374 -0
  37. package/skills/biostatistics-workbench/SKILL.md +140 -0
  38. package/skills/biostatistics-workbench/examples/example-prompts.md +39 -0
  39. package/skills/biostatistics-workbench/resources/effect-size-reference.md +81 -0
  40. package/skills/biostatistics-workbench/resources/glm-family-link-reference.md +47 -0
  41. package/skills/biostatistics-workbench/resources/test-selection-guide.md +93 -0
  42. package/skills/biostatistics-workbench/scripts/glm_pipeline.R +78 -0
  43. package/skills/biostatistics-workbench/scripts/glm_pipeline.py +210 -0
  44. package/skills/camera-trap-processing/SKILL.md +159 -0
  45. package/skills/camera-trap-processing/examples/example-prompts.md +103 -0
  46. package/skills/camera-trap-processing/examples/leopard_serengeti_example.md +231 -0
  47. package/skills/camera-trap-processing/resources/activity-patterns-reference.md +113 -0
  48. package/skills/camera-trap-processing/resources/camtrapR-workflow-guide.md +130 -0
  49. package/skills/camera-trap-processing/resources/detection-event-definition-guide.md +89 -0
  50. package/skills/camera-trap-processing/scripts/estimate_activity.R +169 -0
  51. package/skills/camera-trap-processing/scripts/process_camtrap_data.R +179 -0
  52. package/skills/camera-trap-processing/scripts/process_camtrap_data.py +192 -0
  53. package/skills/community-ecology-ordination/SKILL.md +133 -0
  54. package/skills/community-ecology-ordination/examples/example-prompts.md +35 -0
  55. package/skills/community-ecology-ordination/resources/dissimilarity-metric-guide.md +53 -0
  56. package/skills/community-ecology-ordination/resources/nmds-interpretation-guide.md +104 -0
  57. package/skills/community-ecology-ordination/scripts/__pycache__/community_analysis.cpython-311.pyc +0 -0
  58. package/skills/community-ecology-ordination/scripts/community_analysis.R +143 -0
  59. package/skills/community-ecology-ordination/scripts/community_analysis.py +231 -0
  60. package/skills/ecological-data-foundation/SKILL.md +129 -0
  61. package/skills/ecological-data-foundation/examples/example-prompts.md +40 -0
  62. package/skills/ecological-data-foundation/resources/coordinate-cleaning-flags.md +66 -0
  63. package/skills/ecological-data-foundation/resources/darwin-core-glossary.md +91 -0
  64. package/skills/ecological-data-foundation/resources/data-citation-guide.md +265 -0
  65. package/skills/ecological-data-foundation/resources/gbif-data-citation-guide.md +193 -0
  66. package/skills/ecological-data-foundation/resources/qa-checklist.md +83 -0
  67. package/skills/ecological-data-foundation/scripts/__pycache__/clean_occurrences.cpython-311.pyc +0 -0
  68. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_ebird.cpython-311.pyc +0 -0
  69. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_inat.cpython-311.pyc +0 -0
  70. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_iucn.cpython-311.pyc +0 -0
  71. package/skills/ecological-data-foundation/scripts/__pycache__/download_from_obis.cpython-311.pyc +0 -0
  72. package/skills/ecological-data-foundation/scripts/clean_occurrences.R +230 -0
  73. package/skills/ecological-data-foundation/scripts/clean_occurrences.py +268 -0
  74. package/skills/ecological-data-foundation/scripts/download_from_ebird.R +251 -0
  75. package/skills/ecological-data-foundation/scripts/download_from_ebird.py +364 -0
  76. package/skills/ecological-data-foundation/scripts/download_from_gbif.R +315 -0
  77. package/skills/ecological-data-foundation/scripts/download_from_gbif.py +407 -0
  78. package/skills/ecological-data-foundation/scripts/download_from_inat.R +238 -0
  79. package/skills/ecological-data-foundation/scripts/download_from_inat.py +304 -0
  80. package/skills/ecological-data-foundation/scripts/download_from_iucn.R +273 -0
  81. package/skills/ecological-data-foundation/scripts/download_from_iucn.py +344 -0
  82. package/skills/ecological-data-foundation/scripts/download_from_obis.R +248 -0
  83. package/skills/ecological-data-foundation/scripts/download_from_obis.py +318 -0
  84. package/skills/ecological-impact-assessment/SKILL.md +123 -0
  85. package/skills/ecological-impact-assessment/examples/example-prompts.md +32 -0
  86. package/skills/ecological-impact-assessment/resources/baci-design-guide.md +55 -0
  87. package/skills/ecological-impact-assessment/resources/fragmentation-metrics-reference.md +86 -0
  88. package/skills/ecological-impact-assessment/resources/pressure-index-template.md +78 -0
  89. package/skills/ecological-impact-assessment/resources/study-design-guide.md +168 -0
  90. package/skills/ecological-impact-assessment/scripts/baci_analysis.R +161 -0
  91. package/skills/ecological-impact-assessment/scripts/fragmentation_analysis.py +141 -0
  92. package/skills/ecological-impact-assessment/scripts/power_analysis_baci.R +274 -0
  93. package/skills/ecosystem-services-assessment/SKILL.md +125 -0
  94. package/skills/ecosystem-services-assessment/examples/example-prompts.md +24 -0
  95. package/skills/ecosystem-services-assessment/resources/es-indicator-reference.md +45 -0
  96. package/skills/ecosystem-services-assessment/resources/invest-parameter-guide.md +86 -0
  97. package/skills/ecosystem-services-assessment/resources/rusle-coefficients.md +88 -0
  98. package/skills/ecosystem-services-assessment/scripts/__pycache__/compute_es.cpython-311.pyc +0 -0
  99. package/skills/ecosystem-services-assessment/scripts/compute_es.py +189 -0
  100. package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.R +161 -0
  101. package/skills/environmental-time-series/SKILL.md +125 -0
  102. package/skills/environmental-time-series/examples/example-prompts.md +33 -0
  103. package/skills/environmental-time-series/resources/anomaly-indices-reference.md +88 -0
  104. package/skills/environmental-time-series/resources/bfast-parameter-guide.md +69 -0
  105. package/skills/environmental-time-series/scripts/__pycache__/recovery_trajectory.cpython-311.pyc +0 -0
  106. package/skills/environmental-time-series/scripts/__pycache__/trend_analysis.cpython-311.pyc +0 -0
  107. package/skills/environmental-time-series/scripts/recovery_trajectory.R +305 -0
  108. package/skills/environmental-time-series/scripts/recovery_trajectory.py +178 -0
  109. package/skills/environmental-time-series/scripts/trend_analysis.R +192 -0
  110. package/skills/environmental-time-series/scripts/trend_analysis.py +184 -0
  111. package/skills/geoprocessing-for-ecology/SKILL.md +123 -0
  112. package/skills/geoprocessing-for-ecology/examples/example-prompts.md +32 -0
  113. package/skills/geoprocessing-for-ecology/resources/crs-reference.md +62 -0
  114. package/skills/geoprocessing-for-ecology/resources/global-predictor-sources.md +331 -0
  115. package/skills/geoprocessing-for-ecology/resources/resampling-methods.md +57 -0
  116. package/skills/geoprocessing-for-ecology/scripts/__pycache__/download_predictors.cpython-311.pyc +0 -0
  117. package/skills/geoprocessing-for-ecology/scripts/download_predictors.R +239 -0
  118. package/skills/geoprocessing-for-ecology/scripts/download_predictors.py +379 -0
  119. package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.R +224 -0
  120. package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.py +172 -0
  121. package/skills/landscape-connectivity/SKILL.md +170 -0
  122. package/skills/landscape-connectivity/examples/example-prompts.md +96 -0
  123. package/skills/landscape-connectivity/examples/jaguar_mesoamerica_corridor_example.md +271 -0
  124. package/skills/landscape-connectivity/resources/circuitscape-parameter-guide.md +155 -0
  125. package/skills/landscape-connectivity/resources/graph-theory-for-ecology.md +134 -0
  126. package/skills/landscape-connectivity/resources/resistance-surface-guide.md +141 -0
  127. package/skills/landscape-connectivity/scripts/connectivity_analysis.py +387 -0
  128. package/skills/landscape-connectivity/scripts/connectivity_metrics.R +274 -0
  129. package/skills/landscape-connectivity/scripts/resistance_surface.R +239 -0
  130. package/skills/model-validation-and-uncertainty/SKILL.md +131 -0
  131. package/skills/model-validation-and-uncertainty/examples/example-prompts.md +30 -0
  132. package/skills/model-validation-and-uncertainty/resources/extrapolation-risk-guide.md +236 -0
  133. package/skills/model-validation-and-uncertainty/resources/metric-selection-guide.md +52 -0
  134. package/skills/model-validation-and-uncertainty/resources/threshold-selection-guide.md +64 -0
  135. package/skills/model-validation-and-uncertainty/scripts/__pycache__/validate_model.cpython-311.pyc +0 -0
  136. package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.R +315 -0
  137. package/skills/model-validation-and-uncertainty/scripts/validate_model.py +226 -0
  138. package/skills/model-validation-and-uncertainty/scripts/validate_sdm.R +162 -0
  139. package/skills/occupancy-and-detection/SKILL.md +126 -0
  140. package/skills/occupancy-and-detection/examples/example-prompts.md +33 -0
  141. package/skills/occupancy-and-detection/resources/detection-history-format.md +100 -0
  142. package/skills/occupancy-and-detection/resources/occupancy-study-design.md +47 -0
  143. package/skills/occupancy-and-detection/scripts/__pycache__/occupancy_analysis.cpython-311.pyc +0 -0
  144. package/skills/occupancy-and-detection/scripts/occupancy_analysis.R +160 -0
  145. package/skills/occupancy-and-detection/scripts/occupancy_analysis.py +159 -0
  146. package/skills/population-viability-analysis/SKILL.md +161 -0
  147. package/skills/population-viability-analysis/examples/african_elephant_pva_example.md +266 -0
  148. package/skills/population-viability-analysis/examples/example-prompts.md +95 -0
  149. package/skills/population-viability-analysis/resources/extinction-risk-thresholds.md +128 -0
  150. package/skills/population-viability-analysis/resources/matrix-model-guide.md +139 -0
  151. package/skills/population-viability-analysis/resources/sensitivity-elasticity-reference.md +182 -0
  152. package/skills/population-viability-analysis/scripts/matrix_pva.R +258 -0
  153. package/skills/population-viability-analysis/scripts/pva_analysis.py +442 -0
  154. package/skills/population-viability-analysis/scripts/stochastic_pva.R +353 -0
  155. package/skills/predictive-modeling-best-practices/SKILL.md +136 -0
  156. package/skills/predictive-modeling-best-practices/examples/example-prompts.md +58 -0
  157. package/skills/predictive-modeling-best-practices/resources/collinearity-decision-tree.md +65 -0
  158. package/skills/predictive-modeling-best-practices/resources/sampling-bias-correction.md +267 -0
  159. package/skills/predictive-modeling-best-practices/resources/spatial-cv-guide.md +73 -0
  160. package/skills/predictive-modeling-best-practices/scripts/__pycache__/spatial_cv.cpython-311.pyc +0 -0
  161. package/skills/predictive-modeling-best-practices/scripts/collinearity_check.R +112 -0
  162. package/skills/predictive-modeling-best-practices/scripts/spatial_cv.py +182 -0
  163. package/skills/reproducible-ecology-pipeline/SKILL.md +139 -0
  164. package/skills/reproducible-ecology-pipeline/examples/example-prompts.md +35 -0
  165. package/skills/reproducible-ecology-pipeline/resources/directory-structure-template.md +94 -0
  166. package/skills/reproducible-ecology-pipeline/resources/params-yaml-template.yaml +84 -0
  167. package/skills/reproducible-ecology-pipeline/resources/reproducibility-checklist-template.md +66 -0
  168. package/skills/reproducible-ecology-pipeline/scripts/generate_file_manifest.py +110 -0
  169. package/skills/reproducible-ecology-pipeline/scripts/init_project.sh +53 -0
  170. package/skills/spatial-prioritization/SKILL.md +162 -0
  171. package/skills/spatial-prioritization/examples/biodiversity_hotspot_prioritization_example.md +289 -0
  172. package/skills/spatial-prioritization/examples/example-prompts.md +93 -0
  173. package/skills/spatial-prioritization/resources/cost-surface-reference.md +130 -0
  174. package/skills/spatial-prioritization/resources/marxan-vs-prioritizr-comparison.md +125 -0
  175. package/skills/spatial-prioritization/resources/prioritizr-formulation-guide.md +188 -0
  176. package/skills/spatial-prioritization/resources/representation-targets-guide.md +186 -0
  177. package/skills/spatial-prioritization/scripts/prioritization_sensitivity.R +320 -0
  178. package/skills/spatial-prioritization/scripts/run_prioritization.R +336 -0
  179. package/skills/species-distribution-modeling/SKILL.md +139 -0
  180. package/skills/species-distribution-modeling/examples/example-prompts.md +36 -0
  181. package/skills/species-distribution-modeling/resources/algorithm-comparison.md +25 -0
  182. package/skills/species-distribution-modeling/resources/calibration-area-guide.md +71 -0
  183. package/skills/species-distribution-modeling/resources/climate-scenario-preparation.md +170 -0
  184. package/skills/species-distribution-modeling/resources/maxent-calibration-guide.md +211 -0
  185. package/skills/species-distribution-modeling/resources/sdm-checklist.md +37 -0
  186. package/skills/species-distribution-modeling/scripts/predict_distribution.R +236 -0
  187. package/skills/species-distribution-modeling/scripts/predict_distribution.py +286 -0
  188. package/skills/species-distribution-modeling/scripts/prepare_future_layers.R +351 -0
  189. package/skills/species-distribution-modeling/scripts/project_scenarios.R +220 -0
  190. package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +99 -0
  191. package/skills/species-distribution-modeling/scripts/sdm_pipeline.py +318 -0
  192. package/skills/species-distribution-modeling/scripts/tune_maxnet.R +344 -0
  193. package/templates/SKILL_TEMPLATE.md +225 -0
  194. package/templates/checklists/data-submission-checklist.md +38 -0
  195. package/templates/checklists/post-analysis-checklist.md +55 -0
  196. package/templates/checklists/pre-analysis-checklist.md +31 -0
  197. package/templates/prompts/debug-skill.md +47 -0
  198. package/templates/prompts/invoke-skill.md +34 -0
  199. package/templates/prompts/invoke-workflow.md +45 -0
  200. package/templates/reports/technical-report-template.md +80 -0
  201. package/templates/scripts/logger_setup.R +79 -0
  202. package/templates/scripts/logger_setup.py +119 -0
  203. package/templates/scripts/params_loader.R +28 -0
  204. package/templates/scripts/params_loader.py +38 -0
  205. package/workflows/analyze-community-structure/WORKFLOW.md +72 -0
  206. package/workflows/analyze-environmental-change/WORKFLOW.md +73 -0
  207. package/workflows/assess-ecological-impact/WORKFLOW.md +75 -0
  208. package/workflows/assess-ecosystem-services/WORKFLOW.md +68 -0
  209. package/workflows/assess-landscape-connectivity/WORKFLOW.md +84 -0
  210. package/workflows/build-fire-risk-map/WORKFLOW.md +79 -0
  211. package/workflows/produce-technical-report/WORKFLOW.md +113 -0
  212. package/workflows/run-camera-trap-occupancy/WORKFLOW.md +87 -0
  213. package/workflows/run-conservation-prioritization/WORKFLOW.md +89 -0
  214. package/workflows/run-multispecies-screening/WORKFLOW.md +197 -0
  215. package/workflows/run-occupancy-analysis/WORKFLOW.md +74 -0
  216. package/workflows/run-population-viability/WORKFLOW.md +90 -0
  217. package/workflows/run-sdm-study/WORKFLOW.md +99 -0
@@ -0,0 +1,35 @@
1
+ # Example Invocation Prompts — community-ecology-ordination
2
+
3
+ ## Full Community Analysis
4
+
5
+ ```
6
+ Load skill: community-ecology-ordination
7
+ Task: Analyse bird community structure across three land use types
8
+ (old-growth forest, secondary forest, pasture) in the Atlantic Forest.
9
+
10
+ Files:
11
+ - data/bird_abundance_matrix.csv (100 sites × 87 species, count data)
12
+ - data/site_metadata.csv (land_use, elevation, canopy_cover, edge_distance)
13
+
14
+ Steps:
15
+ 1. Rarefaction curves to assess sampling adequacy.
16
+ 2. Alpha diversity (richness, Shannon, Simpson) per land use; compare with Kruskal-Wallis.
17
+ 3. NMDS (Bray-Curtis, k=2). Report stress.
18
+ 4. PERMANOVA: community ~ land_use. Test assumption with PERMDISP.
19
+ 5. SIMPER: top 10 species driving differences between land use pairs.
20
+ 6. Indicator species (IndVal) per land use type.
21
+ 7. Hierarchical clustering (Ward.D2) of sites.
22
+
23
+ Output: ordination_plot.png, diversity_metrics.csv, permanova_results.txt, community_report.md
24
+ ```
25
+
26
+ ## Beta Diversity Partitioning
27
+
28
+ ```
29
+ Load skill: community-ecology-ordination
30
+ Task: Partition beta diversity into turnover and nestedness components
31
+ for amphibian communities across an elevation gradient (1000–3500 m, 25 sites).
32
+ Data: data/amphibian_pa_matrix.csv (presence/absence)
33
+ Use betapart package. Report total beta, turnover fraction, and nestedness fraction.
34
+ Plot beta diversity components against elevation.
35
+ ```
@@ -0,0 +1,53 @@
1
+ # Dissimilarity Metric Selection Guide
2
+
3
+ ## Bray-Curtis (Sørensen quantitative)
4
+ - **Data:** Abundance (count or biomass)
5
+ - **Properties:** Asymmetric (treats double-zeros correctly); ranges 0–1
6
+ - **When:** Most ecological abundance data; default for NMDS of communities
7
+ - **R:** `vegan::vegdist(x, method = "bray")`
8
+
9
+ ## Jaccard
10
+ - **Data:** Presence/absence
11
+ - **Properties:** Symmetric; ranges 0–1
12
+ - **When:** Presence-only data; when all species are equally important
13
+ - **R:** `vegan::vegdist(x, method = "jaccard")`
14
+
15
+ ## Sørensen (Dice)
16
+ - **Data:** Presence/absence
17
+ - **Properties:** Emphasises co-occurrences more than Jaccard
18
+ - **When:** Similar to Jaccard; slightly more weight to shared species
19
+ - **R:** `vegan::vegdist(x, method = "bray")` on 0/1 matrix (equivalent)
20
+
21
+ ## Chao
22
+ - **Data:** Abundance (accounts for unobserved species)
23
+ - **Properties:** Estimates true dissimilarity adjusting for sampling effort
24
+ - **When:** Datasets with very different sampling intensities; rare species important
25
+ - **R:** `vegan::vegdist(x, method = "chao")`
26
+
27
+ ## Euclidean
28
+ - **Data:** Continuous environmental variables
29
+ - **Properties:** Symmetric; sensitive to magnitude; double-zero problem
30
+ - **When:** Environmental (not species) data in PCA / RDA
31
+ - **Avoid for:** Raw species abundances (use Hellinger transform first)
32
+
33
+ ## Hellinger Distance
34
+ - **Data:** Abundance (after Hellinger transformation)
35
+ - **Properties:** Avoids double-zero problem; linear methods applicable
36
+ - **When:** PCA or RDA on species data; good compromise
37
+ - **R:** `vegan::decostand(x, "hellinger")` then Euclidean distance
38
+
39
+ ## Aitchison Distance
40
+ - **Data:** Compositional / proportional abundance
41
+ - **Properties:** Log-ratio based; appropriate for compositional data
42
+ - **When:** Microbiome, pollen, compositional assemblage data
43
+ - **R:** `compositions::dist.acomp()` or `zCompositions` + Euclidean
44
+
45
+ ## Decision Summary
46
+
47
+ ```
48
+ Data type: Abundance?
49
+ YES → Bray-Curtis (default) | Chao (unequal effort) | Hellinger (for PCA/RDA)
50
+ NO → Presence/absence?
51
+ YES → Jaccard | Sørensen
52
+ NO → Continuous (env) → Euclidean | Gower (mixed types)
53
+ ```
@@ -0,0 +1,104 @@
1
+ # NMDS Interpretation Guide
2
+
3
+ ## What is NMDS?
4
+
5
+ Non-Metric Multidimensional Scaling (NMDS) is an ordination technique that represents the rank-order dissimilarity between samples in a low-dimensional space. Unlike PCA, it makes no assumptions about the data distribution and works with any dissimilarity matrix.
6
+
7
+ ## Stress Value — Quality of Fit
8
+
9
+ | Stress | Fit quality | Action |
10
+ |--------|-------------|--------|
11
+ | < 0.05 | Excellent | Report and proceed |
12
+ | 0.05–0.10 | Good | Report and proceed |
13
+ | 0.10–0.15 | Acceptable | Report; note limitation |
14
+ | 0.15–0.20 | Poor | Consider k=3 dimensions |
15
+ | > 0.20 | Unacceptable | Do not use 2D representation |
16
+
17
+ **Always report stress in the plot caption or legend.**
18
+
19
+ ## How to Run Properly
20
+
21
+ ```r
22
+ library(vegan)
23
+ set.seed(42) # for reproducibility
24
+
25
+ nmds <- metaMDS(
26
+ comm = species_matrix,
27
+ distance = "bray", # Bray-Curtis for abundance; jaccard for PA
28
+ k = 2, # start with 2; try 3 if stress > 0.15
29
+ trymax = 50, # run 50 random starts; keep best
30
+ autotransform = FALSE # don't auto-transform; apply Hellinger manually if needed
31
+ )
32
+
33
+ cat("Stress:", nmds$stress, "\n")
34
+ cat("Converged:", nmds$converged, "\n")
35
+
36
+ # Run multiple k values to choose
37
+ for (k in 2:4) {
38
+ tmp <- metaMDS(species_matrix, distance="bray", k=k, trymax=20, trace=0)
39
+ cat("k =", k, "| stress =", round(tmp$stress, 4), "\n")
40
+ }
41
+ ```
42
+
43
+ ## Reading an NMDS Plot
44
+
45
+ ### Site scores (samples)
46
+ - **Nearby points** → similar species composition
47
+ - **Distant points** → dissimilar composition
48
+ - **Clusters** → groups with consistently similar assemblages
49
+
50
+ ### Species scores (if added)
51
+ - Arrow/point direction → gradient of increasing species abundance/occurrence
52
+ - Arrow length → strength of association with NMDS axes
53
+
54
+ ### Environmental vectors (envfit)
55
+ - Added post-hoc to correlate environmental variables with ordination axes
56
+ - Arrow direction and length indicate direction and strength of environmental gradient
57
+
58
+ ```r
59
+ # Add environmental vectors
60
+ env_fit <- envfit(nmds, env_matrix, permutations = 999)
61
+ print(env_fit) # shows r² and p-value for each variable
62
+
63
+ # Plot
64
+ plot(nmds, display = "sites")
65
+ plot(env_fit, p.max = 0.05) # only significant vectors
66
+ ```
67
+
68
+ ## Producing a Publication-Quality Plot
69
+
70
+ ```r
71
+ library(ggplot2)
72
+
73
+ scores_df <- as.data.frame(scores(nmds, display = "sites"))
74
+ scores_df$group <- metadata$land_use # your grouping variable
75
+
76
+ ggplot(scores_df, aes(x = NMDS1, y = NMDS2, colour = group, shape = group)) +
77
+ geom_point(size = 3, alpha = 0.8) +
78
+ stat_ellipse(level = 0.95, linetype = "dashed") + # 95% confidence ellipses
79
+ annotate("text", x = Inf, y = Inf,
80
+ label = paste("Stress =", round(nmds$stress, 3)),
81
+ hjust = 1.1, vjust = 1.5, size = 3.5) +
82
+ scale_colour_brewer(palette = "Set2") +
83
+ labs(title = "NMDS (Bray-Curtis)", colour = "Land use", shape = "Land use") +
84
+ theme_bw() +
85
+ theme(legend.position = "right")
86
+ ```
87
+
88
+ ## Common Mistakes
89
+
90
+ | Mistake | Fix |
91
+ |---------|-----|
92
+ | Only running 1 random start | Set `trymax = 50` |
93
+ | Reporting stress > 0.20 as acceptable | Use k=3 or a different ordination |
94
+ | Not setting seed | Always `set.seed()` before metaMDS |
95
+ | Using autotransform=TRUE without checking | Turn off; apply transformation explicitly |
96
+ | Not checking convergence | Check `nmds$converged` |
97
+ | Interpreting axes as principal components | NMDS axes are arbitrary; only relative distances matter |
98
+
99
+ ## When to Use PCA Instead
100
+
101
+ - Data are continuous environmental variables (not species composition)
102
+ - Linear relationships are expected
103
+ - You need to explain specific % variance per axis
104
+ - For species data: apply Hellinger transformation first (PCA on Hellinger = RDA with no constraints)
@@ -0,0 +1,143 @@
1
+ # ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
2
+ # SPDX-License-Identifier: GPL-3.0-or-later
3
+
4
+ # Usage: Rscript community_analysis.R <species_site_matrix.csv> <metadata.csv> <output_dir> [method]
5
+ # NMDS ordination, diversity metrics, PERMANOVA
6
+ # Usage: Rscript community_analysis.R <species_matrix_csv> <metadata_csv> <output_dir>
7
+ # Requires: vegan, ggplot2, dplyr
8
+
9
+ # ── Inline logger ─────────────────────────────────────────────────────────────
10
+ SKILL_NAME <- "community-ecology-ordination"
11
+ .log_ts <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
12
+ log_info <- function(...) message(.log_ts(), " [INFO] ", sprintf(...))
13
+ log_warn <- function(...) message(.log_ts(), " [WARN] ", sprintf(...))
14
+ log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
15
+ log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
16
+ log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
17
+ dir.create("logs", recursive=TRUE, showWarnings=FALSE)
18
+
19
+ suppressPackageStartupMessages({
20
+ library(vegan)
21
+ library(ggplot2)
22
+ library(dplyr)
23
+ })
24
+
25
+ args <- commandArgs(trailingOnly = TRUE)
26
+ sp_file <- ifelse(length(args) >= 1, args[1], "data/species_matrix.csv")
27
+ meta_file <- ifelse(length(args) >= 2, args[2], "data/site_metadata.csv")
28
+ output_dir <- ifelse(length(args) >= 3, args[3], "outputs/community")
29
+
30
+ log_step(1, "Validate inputs")
31
+ if (!file.exists(sp_file)) {
32
+ log_error(
33
+ "Falha em validate inputs: arquivo de matriz de especies nao encontrado: %s\nCausa provavel: caminho incorreto ou arquivo nao gerado\nVerifique: o argumento species_matrix_csv e o diretorio de trabalho\nSkill anterior: data-cleaning",
34
+ sp_file
35
+ )
36
+ stop("Species matrix file not found.")
37
+ }
38
+ if (!file.exists(meta_file)) {
39
+ log_error(
40
+ "Falha em validate inputs: arquivo de metadados nao encontrado: %s\nCausa provavel: caminho incorreto ou arquivo nao gerado\nVerifique: o argumento metadata_csv e o diretorio de trabalho\nSkill anterior: data-cleaning",
41
+ meta_file
42
+ )
43
+ stop("Metadata file not found.")
44
+ }
45
+
46
+ dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
47
+ set.seed(42)
48
+ log_decision("random_seed", 42, "ensures reproducibility of NMDS and permutation tests")
49
+
50
+ log_step(2, "Load species matrix and metadata")
51
+ tryCatch({
52
+ sp <- read.csv(sp_file, row.names = 1)
53
+ meta <- read.csv(meta_file, row.names = 1)
54
+ }, error = function(e) {
55
+ log_error(
56
+ "Falha em load data: %s\nCausa provavel: CSV malformado ou sem coluna de rownames\nVerifique: estrutura dos arquivos (primeira coluna deve ser site ID)\nSkill anterior: data-cleaning",
57
+ conditionMessage(e)
58
+ )
59
+ stop(e)
60
+ })
61
+
62
+ log_info("Sites: %d | Species: %d", nrow(sp), ncol(sp))
63
+
64
+ if (any(sp < 0, na.rm = TRUE)) {
65
+ log_warn("Species matrix contains negative values. Abundances must be >= 0. Check your input data.")
66
+ }
67
+ if (anyNA(sp)) {
68
+ log_warn("Species matrix contains %d NA values. These will be treated as zero by vegan.", sum(is.na(sp)))
69
+ }
70
+
71
+ log_step(3, "Compute alpha diversity metrics")
72
+ tryCatch({
73
+ div <- data.frame(
74
+ site = rownames(sp),
75
+ richness = specnumber(sp),
76
+ shannon = diversity(sp, index = "shannon"),
77
+ simpson = diversity(sp, index = "simpson")
78
+ )
79
+ write.csv(div, file.path(output_dir, "diversity_metrics.csv"), row.names = FALSE)
80
+ log_info("Alpha diversity computed. Mean richness: %.1f | Mean Shannon: %.2f",
81
+ mean(div$richness), mean(div$shannon))
82
+ }, error = function(e) {
83
+ log_error(
84
+ "Falha em alpha diversity: %s\nCausa provavel: matriz de especies vazia ou nao numerica\nVerifique: estrutura do CSV de especies\nSkill anterior: data-cleaning",
85
+ conditionMessage(e)
86
+ )
87
+ stop(e)
88
+ })
89
+
90
+ log_step(4, "Run NMDS ordination (Bray-Curtis, k=2)")
91
+ log_decision("distance_metric", "bray", "Bray-Curtis is standard for community composition data")
92
+ log_decision("nmds_k", 2, "2 dimensions for interpretable 2D ordination plot")
93
+ tryCatch({
94
+ nmds <- metaMDS(sp, distance = "bray", k = 2, trymax = 50, trace = 0)
95
+ log_info("NMDS stress: %.4f", nmds$stress)
96
+ if (nmds$stress > 0.2) {
97
+ log_warn("NMDS stress = %.4f exceeds 0.20. Ordination may be unreliable; consider k=3 or data transformation.", nmds$stress)
98
+ }
99
+
100
+ scores_df <- as.data.frame(scores(nmds, display = "sites")) |>
101
+ mutate(site = rownames(sp)) |>
102
+ left_join(meta |> mutate(site = rownames(meta)), by = "site")
103
+
104
+ p_ord <- ggplot(scores_df, aes(x = NMDS1, y = NMDS2)) +
105
+ geom_point(size = 3) +
106
+ annotate("text", x = Inf, y = -Inf, label = paste("Stress =", round(nmds$stress, 3)),
107
+ hjust = 1.1, vjust = -0.5, size = 3.5) +
108
+ theme_bw() + labs(title = "NMDS Ordination (Bray-Curtis)")
109
+ ggsave(file.path(output_dir, "ordination_plot.png"), p_ord, width = 7, height = 6, dpi = 150)
110
+ log_info("Ordination plot saved.")
111
+ }, error = function(e) {
112
+ log_error(
113
+ "Falha em NMDS: %s\nCausa provavel: matriz com sites/especies insuficientes ou todos zeros\nVerifique: numero de sites (>= 3) e que a matriz nao seja toda zeros\nSkill anterior: data-cleaning",
114
+ conditionMessage(e)
115
+ )
116
+ stop(e)
117
+ })
118
+
119
+ log_step(5, "PERMANOVA and PERMDISP (if 'group' column present)")
120
+ if ("group" %in% names(meta)) {
121
+ log_decision("permanova_permutations", 999, "standard number for robust p-value estimation")
122
+ tryCatch({
123
+ dist_mat <- vegdist(sp, method = "bray")
124
+ perm <- adonis2(dist_mat ~ meta$group, permutations = 999)
125
+ disp <- betadisper(dist_mat, meta$group)
126
+ disp_test <- permutest(disp, permutations = 999)
127
+ log_info("PERMANOVA:\n%s", paste(capture.output(perm), collapse = "\n"))
128
+ log_info("PERMDISP:\n%s", paste(capture.output(disp_test), collapse = "\n"))
129
+ capture.output(perm, disp_test) |>
130
+ writeLines(file.path(output_dir, "permanova_results.txt"))
131
+ log_info("PERMANOVA results saved.")
132
+ }, error = function(e) {
133
+ log_error(
134
+ "Falha em PERMANOVA/PERMDISP: %s\nCausa provavel: grupo com apenas um nivel ou sites insuficientes por grupo\nVerifique: coluna 'group' nos metadados e balanceamento\nSkill anterior: data-cleaning",
135
+ conditionMessage(e)
136
+ )
137
+ stop(e)
138
+ })
139
+ } else {
140
+ log_warn("Column 'group' not found in metadata. PERMANOVA and PERMDISP skipped.")
141
+ }
142
+
143
+ log_info("Done. Outputs in: %s", output_dir)
@@ -0,0 +1,231 @@
1
+ #!/usr/bin/env python3
2
+ # ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
3
+ # SPDX-License-Identifier: GPL-3.0-or-later
4
+
5
+ """
6
+ community_analysis.py
7
+ Beta diversity, ordination (PCoA), and group comparison (PERMANOVA via skbio).
8
+ Usage: python community_analysis.py <species_matrix_csv> <metadata_csv> <output_dir>
9
+ Requires: pandas, numpy, scipy, skbio, matplotlib
10
+ """
11
+ import logging
12
+ import sys
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+
16
+ SKILL_NAME = "community-ecology-ordination"
17
+ _LOG_DIR = Path("logs")
18
+ _LOG_DIR.mkdir(parents=True, exist_ok=True)
19
+ _log_file = _LOG_DIR / f"skill_{SKILL_NAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
20
+ logging.basicConfig(
21
+ level=logging.INFO,
22
+ format="[%(asctime)s] [%(levelname)s] [" + SKILL_NAME + "] %(message)s",
23
+ datefmt="%Y-%m-%d %H:%M:%S",
24
+ handlers=[
25
+ logging.StreamHandler(sys.stdout),
26
+ logging.FileHandler(_log_file, encoding="utf-8"),
27
+ ],
28
+ )
29
+ logger = logging.getLogger(SKILL_NAME)
30
+
31
+ def log_step(n: int, desc: str) -> None:
32
+ logger.info("-- STEP %d: %s", n, desc)
33
+
34
+ def log_decision(var: str, val, why: str) -> None:
35
+ logger.info("DECISION | %s = %s | %s", var, val, why)
36
+
37
+ import numpy as np
38
+ import pandas as pd
39
+ import matplotlib.pyplot as plt
40
+ from scipy.spatial.distance import braycurtis
41
+ from scipy.cluster.hierarchy import dendrogram, linkage, copshenetic
42
+ from scipy.spatial.distance import squareform
43
+
44
+ try:
45
+ from skbio.diversity import beta_diversity
46
+ from skbio.stats.ordination import pcoa
47
+ from skbio.stats.distance import permanova, DistanceMatrix
48
+ HAS_SKBIO = True
49
+ except ImportError:
50
+ HAS_SKBIO = False
51
+ logger.warning("scikit-bio not installed. PCoA and PERMANOVA will be skipped. pip install scikit-bio")
52
+
53
+
54
+ def bray_curtis_matrix(sp: pd.DataFrame) -> np.ndarray:
55
+ n = len(sp)
56
+ dm = np.zeros((n, n))
57
+ vals = sp.values.astype(float)
58
+ for i in range(n):
59
+ for j in range(i+1, n):
60
+ d = braycurtis(vals[i], vals[j])
61
+ dm[i, j] = dm[j, i] = d
62
+ return dm
63
+
64
+ def alpha_diversity(sp: pd.DataFrame) -> pd.DataFrame:
65
+ richness = (sp > 0).sum(axis=1)
66
+ def shannon(row):
67
+ p = row[row > 0] / row.sum()
68
+ return -np.sum(p * np.log(p))
69
+ def simpson(row):
70
+ p = row[row > 0] / row.sum()
71
+ return 1 - np.sum(p**2)
72
+ return pd.DataFrame({
73
+ "site": sp.index,
74
+ "richness": richness.values,
75
+ "shannon": sp.apply(shannon, axis=1).values,
76
+ "simpson": sp.apply(simpson, axis=1).values,
77
+ })
78
+
79
+ def main():
80
+ sp_file = sys.argv[1] if len(sys.argv) > 1 else "data/species_matrix.csv"
81
+ meta_file = sys.argv[2] if len(sys.argv) > 2 else "data/site_metadata.csv"
82
+ output_dir = Path(sys.argv[3]) if len(sys.argv) > 3 else Path("outputs/community")
83
+
84
+ log_step(1, "Validate inputs")
85
+ if not Path(sp_file).exists():
86
+ logger.error(
87
+ "Species matrix file not found: %s\n"
88
+ "Causa provavel: caminho incorreto ou arquivo nao gerado\n"
89
+ "Verifique: o argumento species_matrix_csv e o diretorio de trabalho\n"
90
+ "Skill anterior: data-cleaning",
91
+ sp_file
92
+ )
93
+ sys.exit(1)
94
+ if not Path(meta_file).exists():
95
+ logger.error(
96
+ "Metadata file not found: %s\n"
97
+ "Causa provavel: caminho incorreto ou arquivo nao gerado\n"
98
+ "Verifique: o argumento metadata_csv e o diretorio de trabalho\n"
99
+ "Skill anterior: data-cleaning",
100
+ meta_file
101
+ )
102
+ sys.exit(1)
103
+
104
+ output_dir.mkdir(parents=True, exist_ok=True)
105
+
106
+ log_step(2, "Load species matrix and metadata")
107
+ try:
108
+ sp = pd.read_csv(sp_file, index_col=0)
109
+ meta = pd.read_csv(meta_file, index_col=0)
110
+ except Exception as e:
111
+ logger.error(
112
+ "Unexpected error in load data: %s\n"
113
+ "Causa provavel: CSV malformado ou sem coluna de rownames\n"
114
+ "Verifique: estrutura dos arquivos (primeira coluna deve ser site ID)\n"
115
+ "Skill anterior: data-cleaning",
116
+ e
117
+ )
118
+ raise
119
+
120
+ logger.info("Sites: %d | Species: %d", len(sp), len(sp.columns))
121
+
122
+ if (sp < 0).any().any():
123
+ logger.warning("Species matrix contains negative values. Abundances must be >= 0. Check your input data.")
124
+ if sp.isna().any().any():
125
+ logger.warning("Species matrix contains %d NA values. These will affect distance calculations.", sp.isna().sum().sum())
126
+
127
+ log_step(3, "Compute alpha diversity metrics")
128
+ try:
129
+ div = alpha_diversity(sp)
130
+ div.to_csv(output_dir / "diversity_metrics.csv", index=False)
131
+ logger.info("Mean richness: %.1f | Shannon: %.2f", div['richness'].mean(), div['shannon'].mean())
132
+ except Exception as e:
133
+ logger.error(
134
+ "Unexpected error in alpha diversity: %s\n"
135
+ "Causa provavel: matriz de especies vazia ou nao numerica\n"
136
+ "Verifique: estrutura do CSV de especies\n"
137
+ "Skill anterior: data-cleaning",
138
+ e
139
+ )
140
+ raise
141
+
142
+ log_step(4, "Compute Bray-Curtis distance matrix")
143
+ log_decision("distance_metric", "bray-curtis", "standard for community composition; handles double-zeros correctly")
144
+ try:
145
+ dm = bray_curtis_matrix(sp)
146
+ pd.DataFrame(dm, index=sp.index, columns=sp.index).to_csv(output_dir / "bray_curtis_matrix.csv")
147
+ logger.info("Bray-Curtis matrix computed (%d x %d).", len(sp), len(sp))
148
+ except Exception as e:
149
+ logger.error(
150
+ "Unexpected error in Bray-Curtis matrix: %s\n"
151
+ "Causa provavel: dados nao numericos na matriz de especies\n"
152
+ "Verifique: tipos de dados no CSV de especies\n"
153
+ "Skill anterior: data-cleaning",
154
+ e
155
+ )
156
+ raise
157
+
158
+ log_step(5, "PCoA ordination and PERMANOVA")
159
+ if HAS_SKBIO:
160
+ log_decision("permanova_permutations", 999, "standard number for robust p-value estimation")
161
+ try:
162
+ dist_mat = DistanceMatrix(dm, ids=list(sp.index))
163
+ pc = pcoa(dist_mat)
164
+ scores = pc.samples[["PC1", "PC2"]].copy()
165
+ scores["site"] = scores.index
166
+ if "group" in meta.columns:
167
+ scores["group"] = meta["group"].reindex(scores.index).values
168
+ groups_for_perm = meta["group"].reindex(sp.index).values
169
+ perm_result = permanova(dist_mat, groups_for_perm, permutations=999)
170
+ logger.info(
171
+ "PERMANOVA: F = %.3f | p = %.4f",
172
+ perm_result['test statistic'], perm_result['p-value']
173
+ )
174
+ perm_df = pd.DataFrame({"statistic": [perm_result["test statistic"]],
175
+ "p_value": [perm_result["p-value"]]})
176
+ perm_df.to_csv(output_dir / "permanova_results.csv", index=False)
177
+ # Plot coloured by group
178
+ fig, ax = plt.subplots(figsize=(7, 6))
179
+ for grp in scores["group"].unique():
180
+ sub = scores[scores["group"] == grp]
181
+ ax.scatter(sub["PC1"], sub["PC2"], label=grp, s=50, alpha=0.8)
182
+ ax.set_xlabel(f"PC1 ({pc.proportion_explained[0]*100:.1f}%)")
183
+ ax.set_ylabel(f"PC2 ({pc.proportion_explained[1]*100:.1f}%)")
184
+ ax.set_title("PCoA (Bray-Curtis)")
185
+ ax.legend(); plt.tight_layout()
186
+ plt.savefig(output_dir / "pcoa_plot.png", dpi=150)
187
+ plt.close()
188
+ logger.info("PCoA plot saved.")
189
+ else:
190
+ logger.warning("Column 'group' not found in metadata. PERMANOVA skipped.")
191
+ except Exception as e:
192
+ logger.error(
193
+ "Unexpected error in PCoA/PERMANOVA: %s\n"
194
+ "Causa provavel: grupo com apenas um nivel ou sites insuficientes\n"
195
+ "Verifique: coluna 'group' nos metadados e balanceamento\n"
196
+ "Skill anterior: data-cleaning",
197
+ e
198
+ )
199
+ raise
200
+ else:
201
+ logger.warning("scikit-bio unavailable. PCoA and PERMANOVA steps skipped.")
202
+
203
+ log_step(6, "Hierarchical clustering")
204
+ try:
205
+ Z = linkage(squareform(dm), method="ward")
206
+ c, _ = copshenetic(Z, squareform(dm))
207
+ log_decision("linkage_method", "ward", "minimises total within-cluster variance; standard for ecology")
208
+ logger.info("Cophenetic correlation (Ward): %.3f", c)
209
+ if c < 0.7:
210
+ logger.warning("Cophenetic correlation = %.3f < 0.70. Dendrogram may poorly represent distances.", c)
211
+ fig, ax = plt.subplots(figsize=(max(8, len(sp)//2), 5))
212
+ dendrogram(Z, labels=list(sp.index), ax=ax, leaf_rotation=90, leaf_font_size=8)
213
+ ax.set_title(f"Hierarchical Clustering (Ward.D2) | Cophenetic r = {c:.3f}")
214
+ plt.tight_layout()
215
+ plt.savefig(output_dir / "cluster_dendrogram.png", dpi=150)
216
+ plt.close()
217
+ logger.info("Cluster dendrogram saved.")
218
+ except Exception as e:
219
+ logger.error(
220
+ "Unexpected error in hierarchical clustering: %s\n"
221
+ "Causa provavel: matriz de distancias com NaN ou apenas um site\n"
222
+ "Verifique: integridade da matriz Bray-Curtis\n"
223
+ "Skill anterior: community-ecology-ordination (distance matrix)",
224
+ e
225
+ )
226
+ raise
227
+
228
+ logger.info("Outputs written to: %s", output_dir)
229
+
230
+ if __name__ == "__main__":
231
+ main()