ecological-agent-skills 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT_CONTEXT.md +191 -0
- package/CATALOG.md +329 -0
- package/LICENSE +692 -0
- package/README.md +347 -0
- package/bin/install.mjs +168 -0
- package/docs/comparison-with-alternatives.md +38 -0
- package/docs/global-examples-index.md +103 -0
- package/docs/repository-statistics.md +101 -0
- package/docs/theoretical-foundations.md +188 -0
- package/environment.yaml +106 -0
- package/examples/community/arctic_tundra_vegetation_example.md +247 -0
- package/examples/community/bird_landuse_example.md +63 -0
- package/examples/community/phytoplankton_reservoir_example.md +60 -0
- package/examples/community/reef_fish_indopacific_example.md +221 -0
- package/examples/impact/baci_road_example.md +57 -0
- package/examples/impact/ecosystem_services_atlantic_forest.md +83 -0
- package/examples/impact/forest_loss_borneo_timeseries_example.md +225 -0
- package/examples/occupancy/puma_camera_example.md +61 -0
- package/examples/occupancy/snow_leopard_himalayas_example.md +204 -0
- package/examples/reproducible/whittaker_biome_sdm_example.md +406 -0
- package/examples/sdm/anteater_cerrado_example.md +69 -0
- package/examples/sdm/jaguar_amazon_example.md +80 -0
- package/examples/sdm/koala_climate_change_example.md +170 -0
- package/examples/sdm/wolf_recolonization_europe_example.md +193 -0
- package/package.json +43 -0
- package/renv.lock +194 -0
- package/skills/SKILL_INDEX.json +1020 -0
- package/skills/acoustic-monitoring/SKILL.md +163 -0
- package/skills/acoustic-monitoring/examples/example-prompts.md +100 -0
- package/skills/acoustic-monitoring/examples/temperate_forest_birds_example.md +285 -0
- package/skills/acoustic-monitoring/resources/acoustic-indices-reference.md +93 -0
- package/skills/acoustic-monitoring/resources/soundscape-ecology-guide.md +90 -0
- package/skills/acoustic-monitoring/resources/species-id-tools-comparison.md +89 -0
- package/skills/acoustic-monitoring/scripts/batch_species_detection.py +360 -0
- package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.R +235 -0
- package/skills/acoustic-monitoring/scripts/compute_acoustic_indices.py +374 -0
- package/skills/biostatistics-workbench/SKILL.md +140 -0
- package/skills/biostatistics-workbench/examples/example-prompts.md +39 -0
- package/skills/biostatistics-workbench/resources/effect-size-reference.md +81 -0
- package/skills/biostatistics-workbench/resources/glm-family-link-reference.md +47 -0
- package/skills/biostatistics-workbench/resources/test-selection-guide.md +93 -0
- package/skills/biostatistics-workbench/scripts/glm_pipeline.R +78 -0
- package/skills/biostatistics-workbench/scripts/glm_pipeline.py +210 -0
- package/skills/camera-trap-processing/SKILL.md +159 -0
- package/skills/camera-trap-processing/examples/example-prompts.md +103 -0
- package/skills/camera-trap-processing/examples/leopard_serengeti_example.md +231 -0
- package/skills/camera-trap-processing/resources/activity-patterns-reference.md +113 -0
- package/skills/camera-trap-processing/resources/camtrapR-workflow-guide.md +130 -0
- package/skills/camera-trap-processing/resources/detection-event-definition-guide.md +89 -0
- package/skills/camera-trap-processing/scripts/estimate_activity.R +169 -0
- package/skills/camera-trap-processing/scripts/process_camtrap_data.R +179 -0
- package/skills/camera-trap-processing/scripts/process_camtrap_data.py +192 -0
- package/skills/community-ecology-ordination/SKILL.md +133 -0
- package/skills/community-ecology-ordination/examples/example-prompts.md +35 -0
- package/skills/community-ecology-ordination/resources/dissimilarity-metric-guide.md +53 -0
- package/skills/community-ecology-ordination/resources/nmds-interpretation-guide.md +104 -0
- package/skills/community-ecology-ordination/scripts/__pycache__/community_analysis.cpython-311.pyc +0 -0
- package/skills/community-ecology-ordination/scripts/community_analysis.R +143 -0
- package/skills/community-ecology-ordination/scripts/community_analysis.py +231 -0
- package/skills/ecological-data-foundation/SKILL.md +129 -0
- package/skills/ecological-data-foundation/examples/example-prompts.md +40 -0
- package/skills/ecological-data-foundation/resources/coordinate-cleaning-flags.md +66 -0
- package/skills/ecological-data-foundation/resources/darwin-core-glossary.md +91 -0
- package/skills/ecological-data-foundation/resources/data-citation-guide.md +265 -0
- package/skills/ecological-data-foundation/resources/gbif-data-citation-guide.md +193 -0
- package/skills/ecological-data-foundation/resources/qa-checklist.md +83 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/clean_occurrences.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_ebird.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_inat.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_iucn.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/__pycache__/download_from_obis.cpython-311.pyc +0 -0
- package/skills/ecological-data-foundation/scripts/clean_occurrences.R +230 -0
- package/skills/ecological-data-foundation/scripts/clean_occurrences.py +268 -0
- package/skills/ecological-data-foundation/scripts/download_from_ebird.R +251 -0
- package/skills/ecological-data-foundation/scripts/download_from_ebird.py +364 -0
- package/skills/ecological-data-foundation/scripts/download_from_gbif.R +315 -0
- package/skills/ecological-data-foundation/scripts/download_from_gbif.py +407 -0
- package/skills/ecological-data-foundation/scripts/download_from_inat.R +238 -0
- package/skills/ecological-data-foundation/scripts/download_from_inat.py +304 -0
- package/skills/ecological-data-foundation/scripts/download_from_iucn.R +273 -0
- package/skills/ecological-data-foundation/scripts/download_from_iucn.py +344 -0
- package/skills/ecological-data-foundation/scripts/download_from_obis.R +248 -0
- package/skills/ecological-data-foundation/scripts/download_from_obis.py +318 -0
- package/skills/ecological-impact-assessment/SKILL.md +123 -0
- package/skills/ecological-impact-assessment/examples/example-prompts.md +32 -0
- package/skills/ecological-impact-assessment/resources/baci-design-guide.md +55 -0
- package/skills/ecological-impact-assessment/resources/fragmentation-metrics-reference.md +86 -0
- package/skills/ecological-impact-assessment/resources/pressure-index-template.md +78 -0
- package/skills/ecological-impact-assessment/resources/study-design-guide.md +168 -0
- package/skills/ecological-impact-assessment/scripts/baci_analysis.R +161 -0
- package/skills/ecological-impact-assessment/scripts/fragmentation_analysis.py +141 -0
- package/skills/ecological-impact-assessment/scripts/power_analysis_baci.R +274 -0
- package/skills/ecosystem-services-assessment/SKILL.md +125 -0
- package/skills/ecosystem-services-assessment/examples/example-prompts.md +24 -0
- package/skills/ecosystem-services-assessment/resources/es-indicator-reference.md +45 -0
- package/skills/ecosystem-services-assessment/resources/invest-parameter-guide.md +86 -0
- package/skills/ecosystem-services-assessment/resources/rusle-coefficients.md +88 -0
- package/skills/ecosystem-services-assessment/scripts/__pycache__/compute_es.cpython-311.pyc +0 -0
- package/skills/ecosystem-services-assessment/scripts/compute_es.py +189 -0
- package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.R +161 -0
- package/skills/environmental-time-series/SKILL.md +125 -0
- package/skills/environmental-time-series/examples/example-prompts.md +33 -0
- package/skills/environmental-time-series/resources/anomaly-indices-reference.md +88 -0
- package/skills/environmental-time-series/resources/bfast-parameter-guide.md +69 -0
- package/skills/environmental-time-series/scripts/__pycache__/recovery_trajectory.cpython-311.pyc +0 -0
- package/skills/environmental-time-series/scripts/__pycache__/trend_analysis.cpython-311.pyc +0 -0
- package/skills/environmental-time-series/scripts/recovery_trajectory.R +305 -0
- package/skills/environmental-time-series/scripts/recovery_trajectory.py +178 -0
- package/skills/environmental-time-series/scripts/trend_analysis.R +192 -0
- package/skills/environmental-time-series/scripts/trend_analysis.py +184 -0
- package/skills/geoprocessing-for-ecology/SKILL.md +123 -0
- package/skills/geoprocessing-for-ecology/examples/example-prompts.md +32 -0
- package/skills/geoprocessing-for-ecology/resources/crs-reference.md +62 -0
- package/skills/geoprocessing-for-ecology/resources/global-predictor-sources.md +331 -0
- package/skills/geoprocessing-for-ecology/resources/resampling-methods.md +57 -0
- package/skills/geoprocessing-for-ecology/scripts/__pycache__/download_predictors.cpython-311.pyc +0 -0
- package/skills/geoprocessing-for-ecology/scripts/download_predictors.R +239 -0
- package/skills/geoprocessing-for-ecology/scripts/download_predictors.py +379 -0
- package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.R +224 -0
- package/skills/geoprocessing-for-ecology/scripts/stack_and_extract.py +172 -0
- package/skills/landscape-connectivity/SKILL.md +170 -0
- package/skills/landscape-connectivity/examples/example-prompts.md +96 -0
- package/skills/landscape-connectivity/examples/jaguar_mesoamerica_corridor_example.md +271 -0
- package/skills/landscape-connectivity/resources/circuitscape-parameter-guide.md +155 -0
- package/skills/landscape-connectivity/resources/graph-theory-for-ecology.md +134 -0
- package/skills/landscape-connectivity/resources/resistance-surface-guide.md +141 -0
- package/skills/landscape-connectivity/scripts/connectivity_analysis.py +387 -0
- package/skills/landscape-connectivity/scripts/connectivity_metrics.R +274 -0
- package/skills/landscape-connectivity/scripts/resistance_surface.R +239 -0
- package/skills/model-validation-and-uncertainty/SKILL.md +131 -0
- package/skills/model-validation-and-uncertainty/examples/example-prompts.md +30 -0
- package/skills/model-validation-and-uncertainty/resources/extrapolation-risk-guide.md +236 -0
- package/skills/model-validation-and-uncertainty/resources/metric-selection-guide.md +52 -0
- package/skills/model-validation-and-uncertainty/resources/threshold-selection-guide.md +64 -0
- package/skills/model-validation-and-uncertainty/scripts/__pycache__/validate_model.cpython-311.pyc +0 -0
- package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.R +315 -0
- package/skills/model-validation-and-uncertainty/scripts/validate_model.py +226 -0
- package/skills/model-validation-and-uncertainty/scripts/validate_sdm.R +162 -0
- package/skills/occupancy-and-detection/SKILL.md +126 -0
- package/skills/occupancy-and-detection/examples/example-prompts.md +33 -0
- package/skills/occupancy-and-detection/resources/detection-history-format.md +100 -0
- package/skills/occupancy-and-detection/resources/occupancy-study-design.md +47 -0
- package/skills/occupancy-and-detection/scripts/__pycache__/occupancy_analysis.cpython-311.pyc +0 -0
- package/skills/occupancy-and-detection/scripts/occupancy_analysis.R +160 -0
- package/skills/occupancy-and-detection/scripts/occupancy_analysis.py +159 -0
- package/skills/population-viability-analysis/SKILL.md +161 -0
- package/skills/population-viability-analysis/examples/african_elephant_pva_example.md +266 -0
- package/skills/population-viability-analysis/examples/example-prompts.md +95 -0
- package/skills/population-viability-analysis/resources/extinction-risk-thresholds.md +128 -0
- package/skills/population-viability-analysis/resources/matrix-model-guide.md +139 -0
- package/skills/population-viability-analysis/resources/sensitivity-elasticity-reference.md +182 -0
- package/skills/population-viability-analysis/scripts/matrix_pva.R +258 -0
- package/skills/population-viability-analysis/scripts/pva_analysis.py +442 -0
- package/skills/population-viability-analysis/scripts/stochastic_pva.R +353 -0
- package/skills/predictive-modeling-best-practices/SKILL.md +136 -0
- package/skills/predictive-modeling-best-practices/examples/example-prompts.md +58 -0
- package/skills/predictive-modeling-best-practices/resources/collinearity-decision-tree.md +65 -0
- package/skills/predictive-modeling-best-practices/resources/sampling-bias-correction.md +267 -0
- package/skills/predictive-modeling-best-practices/resources/spatial-cv-guide.md +73 -0
- package/skills/predictive-modeling-best-practices/scripts/__pycache__/spatial_cv.cpython-311.pyc +0 -0
- package/skills/predictive-modeling-best-practices/scripts/collinearity_check.R +112 -0
- package/skills/predictive-modeling-best-practices/scripts/spatial_cv.py +182 -0
- package/skills/reproducible-ecology-pipeline/SKILL.md +139 -0
- package/skills/reproducible-ecology-pipeline/examples/example-prompts.md +35 -0
- package/skills/reproducible-ecology-pipeline/resources/directory-structure-template.md +94 -0
- package/skills/reproducible-ecology-pipeline/resources/params-yaml-template.yaml +84 -0
- package/skills/reproducible-ecology-pipeline/resources/reproducibility-checklist-template.md +66 -0
- package/skills/reproducible-ecology-pipeline/scripts/generate_file_manifest.py +110 -0
- package/skills/reproducible-ecology-pipeline/scripts/init_project.sh +53 -0
- package/skills/spatial-prioritization/SKILL.md +162 -0
- package/skills/spatial-prioritization/examples/biodiversity_hotspot_prioritization_example.md +289 -0
- package/skills/spatial-prioritization/examples/example-prompts.md +93 -0
- package/skills/spatial-prioritization/resources/cost-surface-reference.md +130 -0
- package/skills/spatial-prioritization/resources/marxan-vs-prioritizr-comparison.md +125 -0
- package/skills/spatial-prioritization/resources/prioritizr-formulation-guide.md +188 -0
- package/skills/spatial-prioritization/resources/representation-targets-guide.md +186 -0
- package/skills/spatial-prioritization/scripts/prioritization_sensitivity.R +320 -0
- package/skills/spatial-prioritization/scripts/run_prioritization.R +336 -0
- package/skills/species-distribution-modeling/SKILL.md +139 -0
- package/skills/species-distribution-modeling/examples/example-prompts.md +36 -0
- package/skills/species-distribution-modeling/resources/algorithm-comparison.md +25 -0
- package/skills/species-distribution-modeling/resources/calibration-area-guide.md +71 -0
- package/skills/species-distribution-modeling/resources/climate-scenario-preparation.md +170 -0
- package/skills/species-distribution-modeling/resources/maxent-calibration-guide.md +211 -0
- package/skills/species-distribution-modeling/resources/sdm-checklist.md +37 -0
- package/skills/species-distribution-modeling/scripts/predict_distribution.R +236 -0
- package/skills/species-distribution-modeling/scripts/predict_distribution.py +286 -0
- package/skills/species-distribution-modeling/scripts/prepare_future_layers.R +351 -0
- package/skills/species-distribution-modeling/scripts/project_scenarios.R +220 -0
- package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +99 -0
- package/skills/species-distribution-modeling/scripts/sdm_pipeline.py +318 -0
- package/skills/species-distribution-modeling/scripts/tune_maxnet.R +344 -0
- package/templates/SKILL_TEMPLATE.md +225 -0
- package/templates/checklists/data-submission-checklist.md +38 -0
- package/templates/checklists/post-analysis-checklist.md +55 -0
- package/templates/checklists/pre-analysis-checklist.md +31 -0
- package/templates/prompts/debug-skill.md +47 -0
- package/templates/prompts/invoke-skill.md +34 -0
- package/templates/prompts/invoke-workflow.md +45 -0
- package/templates/reports/technical-report-template.md +80 -0
- package/templates/scripts/logger_setup.R +79 -0
- package/templates/scripts/logger_setup.py +119 -0
- package/templates/scripts/params_loader.R +28 -0
- package/templates/scripts/params_loader.py +38 -0
- package/workflows/analyze-community-structure/WORKFLOW.md +72 -0
- package/workflows/analyze-environmental-change/WORKFLOW.md +73 -0
- package/workflows/assess-ecological-impact/WORKFLOW.md +75 -0
- package/workflows/assess-ecosystem-services/WORKFLOW.md +68 -0
- package/workflows/assess-landscape-connectivity/WORKFLOW.md +84 -0
- package/workflows/build-fire-risk-map/WORKFLOW.md +79 -0
- package/workflows/produce-technical-report/WORKFLOW.md +113 -0
- package/workflows/run-camera-trap-occupancy/WORKFLOW.md +87 -0
- package/workflows/run-conservation-prioritization/WORKFLOW.md +89 -0
- package/workflows/run-multispecies-screening/WORKFLOW.md +197 -0
- package/workflows/run-occupancy-analysis/WORKFLOW.md +74 -0
- package/workflows/run-population-viability/WORKFLOW.md +90 -0
- package/workflows/run-sdm-study/WORKFLOW.md +99 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Example Invocation Prompts — community-ecology-ordination
|
|
2
|
+
|
|
3
|
+
## Full Community Analysis
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
Load skill: community-ecology-ordination
|
|
7
|
+
Task: Analyse bird community structure across three land use types
|
|
8
|
+
(old-growth forest, secondary forest, pasture) in the Atlantic Forest.
|
|
9
|
+
|
|
10
|
+
Files:
|
|
11
|
+
- data/bird_abundance_matrix.csv (100 sites × 87 species, count data)
|
|
12
|
+
- data/site_metadata.csv (land_use, elevation, canopy_cover, edge_distance)
|
|
13
|
+
|
|
14
|
+
Steps:
|
|
15
|
+
1. Rarefaction curves to assess sampling adequacy.
|
|
16
|
+
2. Alpha diversity (richness, Shannon, Simpson) per land use; compare with Kruskal-Wallis.
|
|
17
|
+
3. NMDS (Bray-Curtis, k=2). Report stress.
|
|
18
|
+
4. PERMANOVA: community ~ land_use. Test assumption with PERMDISP.
|
|
19
|
+
5. SIMPER: top 10 species driving differences between land use pairs.
|
|
20
|
+
6. Indicator species (IndVal) per land use type.
|
|
21
|
+
7. Hierarchical clustering (Ward.D2) of sites.
|
|
22
|
+
|
|
23
|
+
Output: ordination_plot.png, diversity_metrics.csv, permanova_results.txt, community_report.md
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Beta Diversity Partitioning
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
Load skill: community-ecology-ordination
|
|
30
|
+
Task: Partition beta diversity into turnover and nestedness components
|
|
31
|
+
for amphibian communities across an elevation gradient (1000–3500 m, 25 sites).
|
|
32
|
+
Data: data/amphibian_pa_matrix.csv (presence/absence)
|
|
33
|
+
Use betapart package. Report total beta, turnover fraction, and nestedness fraction.
|
|
34
|
+
Plot beta diversity components against elevation.
|
|
35
|
+
```
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Dissimilarity Metric Selection Guide
|
|
2
|
+
|
|
3
|
+
## Bray-Curtis (Sørensen quantitative)
|
|
4
|
+
- **Data:** Abundance (count or biomass)
|
|
5
|
+
- **Properties:** Asymmetric (treats double-zeros correctly); ranges 0–1
|
|
6
|
+
- **When:** Most ecological abundance data; default for NMDS of communities
|
|
7
|
+
- **R:** `vegan::vegdist(x, method = "bray")`
|
|
8
|
+
|
|
9
|
+
## Jaccard
|
|
10
|
+
- **Data:** Presence/absence
|
|
11
|
+
- **Properties:** Symmetric; ranges 0–1
|
|
12
|
+
- **When:** Presence-only data; when all species are equally important
|
|
13
|
+
- **R:** `vegan::vegdist(x, method = "jaccard")`
|
|
14
|
+
|
|
15
|
+
## Sørensen (Dice)
|
|
16
|
+
- **Data:** Presence/absence
|
|
17
|
+
- **Properties:** Emphasises co-occurrences more than Jaccard
|
|
18
|
+
- **When:** Similar to Jaccard; slightly more weight to shared species
|
|
19
|
+
- **R:** `vegan::vegdist(x, method = "bray")` on 0/1 matrix (equivalent)
|
|
20
|
+
|
|
21
|
+
## Chao
|
|
22
|
+
- **Data:** Abundance (accounts for unobserved species)
|
|
23
|
+
- **Properties:** Estimates true dissimilarity adjusting for sampling effort
|
|
24
|
+
- **When:** Datasets with very different sampling intensities; rare species important
|
|
25
|
+
- **R:** `vegan::vegdist(x, method = "chao")`
|
|
26
|
+
|
|
27
|
+
## Euclidean
|
|
28
|
+
- **Data:** Continuous environmental variables
|
|
29
|
+
- **Properties:** Symmetric; sensitive to magnitude; double-zero problem
|
|
30
|
+
- **When:** Environmental (not species) data in PCA / RDA
|
|
31
|
+
- **Avoid for:** Raw species abundances (use Hellinger transform first)
|
|
32
|
+
|
|
33
|
+
## Hellinger Distance
|
|
34
|
+
- **Data:** Abundance (after Hellinger transformation)
|
|
35
|
+
- **Properties:** Avoids double-zero problem; linear methods applicable
|
|
36
|
+
- **When:** PCA or RDA on species data; good compromise
|
|
37
|
+
- **R:** `vegan::decostand(x, "hellinger")` then Euclidean distance
|
|
38
|
+
|
|
39
|
+
## Aitchison Distance
|
|
40
|
+
- **Data:** Compositional / proportional abundance
|
|
41
|
+
- **Properties:** Log-ratio based; appropriate for compositional data
|
|
42
|
+
- **When:** Microbiome, pollen, compositional assemblage data
|
|
43
|
+
- **R:** `compositions::dist.acomp()` or `zCompositions` + Euclidean
|
|
44
|
+
|
|
45
|
+
## Decision Summary
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
Data type: Abundance?
|
|
49
|
+
YES → Bray-Curtis (default) | Chao (unequal effort) | Hellinger (for PCA/RDA)
|
|
50
|
+
NO → Presence/absence?
|
|
51
|
+
YES → Jaccard | Sørensen
|
|
52
|
+
NO → Continuous (env) → Euclidean | Gower (mixed types)
|
|
53
|
+
```
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# NMDS Interpretation Guide
|
|
2
|
+
|
|
3
|
+
## What is NMDS?
|
|
4
|
+
|
|
5
|
+
Non-Metric Multidimensional Scaling (NMDS) is an ordination technique that represents the rank-order dissimilarity between samples in a low-dimensional space. Unlike PCA, it makes no assumptions about the data distribution and works with any dissimilarity matrix.
|
|
6
|
+
|
|
7
|
+
## Stress Value — Quality of Fit
|
|
8
|
+
|
|
9
|
+
| Stress | Fit quality | Action |
|
|
10
|
+
|--------|-------------|--------|
|
|
11
|
+
| < 0.05 | Excellent | Report and proceed |
|
|
12
|
+
| 0.05–0.10 | Good | Report and proceed |
|
|
13
|
+
| 0.10–0.15 | Acceptable | Report; note limitation |
|
|
14
|
+
| 0.15–0.20 | Poor | Consider k=3 dimensions |
|
|
15
|
+
| > 0.20 | Unacceptable | Do not use 2D representation |
|
|
16
|
+
|
|
17
|
+
**Always report stress in the plot caption or legend.**
|
|
18
|
+
|
|
19
|
+
## How to Run Properly
|
|
20
|
+
|
|
21
|
+
```r
|
|
22
|
+
library(vegan)
|
|
23
|
+
set.seed(42) # for reproducibility
|
|
24
|
+
|
|
25
|
+
nmds <- metaMDS(
|
|
26
|
+
comm = species_matrix,
|
|
27
|
+
distance = "bray", # Bray-Curtis for abundance; jaccard for PA
|
|
28
|
+
k = 2, # start with 2; try 3 if stress > 0.15
|
|
29
|
+
trymax = 50, # run 50 random starts; keep best
|
|
30
|
+
autotransform = FALSE # don't auto-transform; apply Hellinger manually if needed
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
cat("Stress:", nmds$stress, "\n")
|
|
34
|
+
cat("Converged:", nmds$converged, "\n")
|
|
35
|
+
|
|
36
|
+
# Run multiple k values to choose
|
|
37
|
+
for (k in 2:4) {
|
|
38
|
+
tmp <- metaMDS(species_matrix, distance="bray", k=k, trymax=20, trace=0)
|
|
39
|
+
cat("k =", k, "| stress =", round(tmp$stress, 4), "\n")
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Reading an NMDS Plot
|
|
44
|
+
|
|
45
|
+
### Site scores (samples)
|
|
46
|
+
- **Nearby points** → similar species composition
|
|
47
|
+
- **Distant points** → dissimilar composition
|
|
48
|
+
- **Clusters** → groups with consistently similar assemblages
|
|
49
|
+
|
|
50
|
+
### Species scores (if added)
|
|
51
|
+
- Arrow/point direction → gradient of increasing species abundance/occurrence
|
|
52
|
+
- Arrow length → strength of association with NMDS axes
|
|
53
|
+
|
|
54
|
+
### Environmental vectors (envfit)
|
|
55
|
+
- Added post-hoc to correlate environmental variables with ordination axes
|
|
56
|
+
- Arrow direction and length indicate direction and strength of environmental gradient
|
|
57
|
+
|
|
58
|
+
```r
|
|
59
|
+
# Add environmental vectors
|
|
60
|
+
env_fit <- envfit(nmds, env_matrix, permutations = 999)
|
|
61
|
+
print(env_fit) # shows r² and p-value for each variable
|
|
62
|
+
|
|
63
|
+
# Plot
|
|
64
|
+
plot(nmds, display = "sites")
|
|
65
|
+
plot(env_fit, p.max = 0.05) # only significant vectors
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Producing a Publication-Quality Plot
|
|
69
|
+
|
|
70
|
+
```r
|
|
71
|
+
library(ggplot2)
|
|
72
|
+
|
|
73
|
+
scores_df <- as.data.frame(scores(nmds, display = "sites"))
|
|
74
|
+
scores_df$group <- metadata$land_use # your grouping variable
|
|
75
|
+
|
|
76
|
+
ggplot(scores_df, aes(x = NMDS1, y = NMDS2, colour = group, shape = group)) +
|
|
77
|
+
geom_point(size = 3, alpha = 0.8) +
|
|
78
|
+
stat_ellipse(level = 0.95, linetype = "dashed") + # 95% confidence ellipses
|
|
79
|
+
annotate("text", x = Inf, y = Inf,
|
|
80
|
+
label = paste("Stress =", round(nmds$stress, 3)),
|
|
81
|
+
hjust = 1.1, vjust = 1.5, size = 3.5) +
|
|
82
|
+
scale_colour_brewer(palette = "Set2") +
|
|
83
|
+
labs(title = "NMDS (Bray-Curtis)", colour = "Land use", shape = "Land use") +
|
|
84
|
+
theme_bw() +
|
|
85
|
+
theme(legend.position = "right")
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Common Mistakes
|
|
89
|
+
|
|
90
|
+
| Mistake | Fix |
|
|
91
|
+
|---------|-----|
|
|
92
|
+
| Only running 1 random start | Set `trymax = 50` |
|
|
93
|
+
| Reporting stress > 0.20 as acceptable | Use k=3 or a different ordination |
|
|
94
|
+
| Not setting seed | Always `set.seed()` before metaMDS |
|
|
95
|
+
| Using autotransform=TRUE without checking | Turn off; apply transformation explicitly |
|
|
96
|
+
| Not checking convergence | Check `nmds$converged` |
|
|
97
|
+
| Interpreting axes as principal components | NMDS axes are arbitrary; only relative distances matter |
|
|
98
|
+
|
|
99
|
+
## When to Use PCA Instead
|
|
100
|
+
|
|
101
|
+
- Data are continuous environmental variables (not species composition)
|
|
102
|
+
- Linear relationships are expected
|
|
103
|
+
- You need to explain specific % variance per axis
|
|
104
|
+
- For species data: apply Hellinger transformation first (PCA on Hellinger = RDA with no constraints)
|
package/skills/community-ecology-ordination/scripts/__pycache__/community_analysis.cpython-311.pyc
ADDED
|
Binary file
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
|
|
2
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
3
|
+
|
|
4
|
+
# Usage: Rscript community_analysis.R <species_site_matrix.csv> <metadata.csv> <output_dir> [method]
|
|
5
|
+
# NMDS ordination, diversity metrics, PERMANOVA
|
|
6
|
+
# Usage: Rscript community_analysis.R <species_matrix_csv> <metadata_csv> <output_dir>
|
|
7
|
+
# Requires: vegan, ggplot2, dplyr
|
|
8
|
+
|
|
9
|
+
# ── Inline logger ─────────────────────────────────────────────────────────────
|
|
10
|
+
SKILL_NAME <- "community-ecology-ordination"
|
|
11
|
+
.log_ts <- function() format(Sys.time(), "[%Y-%m-%d %H:%M:%S]")
|
|
12
|
+
log_info <- function(...) message(.log_ts(), " [INFO] ", sprintf(...))
|
|
13
|
+
log_warn <- function(...) message(.log_ts(), " [WARN] ", sprintf(...))
|
|
14
|
+
log_error<- function(...) message(.log_ts(), " [ERROR] ", sprintf(...))
|
|
15
|
+
log_step <- function(n, d) log_info("-- STEP %d: %s", n, d)
|
|
16
|
+
log_decision <- function(v, val, why) log_info("DECISION | %s = %s | %s", v, val, why)
|
|
17
|
+
dir.create("logs", recursive=TRUE, showWarnings=FALSE)
|
|
18
|
+
|
|
19
|
+
suppressPackageStartupMessages({
|
|
20
|
+
library(vegan)
|
|
21
|
+
library(ggplot2)
|
|
22
|
+
library(dplyr)
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
args <- commandArgs(trailingOnly = TRUE)
|
|
26
|
+
sp_file <- ifelse(length(args) >= 1, args[1], "data/species_matrix.csv")
|
|
27
|
+
meta_file <- ifelse(length(args) >= 2, args[2], "data/site_metadata.csv")
|
|
28
|
+
output_dir <- ifelse(length(args) >= 3, args[3], "outputs/community")
|
|
29
|
+
|
|
30
|
+
log_step(1, "Validate inputs")
|
|
31
|
+
if (!file.exists(sp_file)) {
|
|
32
|
+
log_error(
|
|
33
|
+
"Falha em validate inputs: arquivo de matriz de especies nao encontrado: %s\nCausa provavel: caminho incorreto ou arquivo nao gerado\nVerifique: o argumento species_matrix_csv e o diretorio de trabalho\nSkill anterior: data-cleaning",
|
|
34
|
+
sp_file
|
|
35
|
+
)
|
|
36
|
+
stop("Species matrix file not found.")
|
|
37
|
+
}
|
|
38
|
+
if (!file.exists(meta_file)) {
|
|
39
|
+
log_error(
|
|
40
|
+
"Falha em validate inputs: arquivo de metadados nao encontrado: %s\nCausa provavel: caminho incorreto ou arquivo nao gerado\nVerifique: o argumento metadata_csv e o diretorio de trabalho\nSkill anterior: data-cleaning",
|
|
41
|
+
meta_file
|
|
42
|
+
)
|
|
43
|
+
stop("Metadata file not found.")
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
|
47
|
+
set.seed(42)
|
|
48
|
+
log_decision("random_seed", 42, "ensures reproducibility of NMDS and permutation tests")
|
|
49
|
+
|
|
50
|
+
log_step(2, "Load species matrix and metadata")
|
|
51
|
+
tryCatch({
|
|
52
|
+
sp <- read.csv(sp_file, row.names = 1)
|
|
53
|
+
meta <- read.csv(meta_file, row.names = 1)
|
|
54
|
+
}, error = function(e) {
|
|
55
|
+
log_error(
|
|
56
|
+
"Falha em load data: %s\nCausa provavel: CSV malformado ou sem coluna de rownames\nVerifique: estrutura dos arquivos (primeira coluna deve ser site ID)\nSkill anterior: data-cleaning",
|
|
57
|
+
conditionMessage(e)
|
|
58
|
+
)
|
|
59
|
+
stop(e)
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
log_info("Sites: %d | Species: %d", nrow(sp), ncol(sp))
|
|
63
|
+
|
|
64
|
+
if (any(sp < 0, na.rm = TRUE)) {
|
|
65
|
+
log_warn("Species matrix contains negative values. Abundances must be >= 0. Check your input data.")
|
|
66
|
+
}
|
|
67
|
+
if (anyNA(sp)) {
|
|
68
|
+
log_warn("Species matrix contains %d NA values. These will be treated as zero by vegan.", sum(is.na(sp)))
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
log_step(3, "Compute alpha diversity metrics")
|
|
72
|
+
tryCatch({
|
|
73
|
+
div <- data.frame(
|
|
74
|
+
site = rownames(sp),
|
|
75
|
+
richness = specnumber(sp),
|
|
76
|
+
shannon = diversity(sp, index = "shannon"),
|
|
77
|
+
simpson = diversity(sp, index = "simpson")
|
|
78
|
+
)
|
|
79
|
+
write.csv(div, file.path(output_dir, "diversity_metrics.csv"), row.names = FALSE)
|
|
80
|
+
log_info("Alpha diversity computed. Mean richness: %.1f | Mean Shannon: %.2f",
|
|
81
|
+
mean(div$richness), mean(div$shannon))
|
|
82
|
+
}, error = function(e) {
|
|
83
|
+
log_error(
|
|
84
|
+
"Falha em alpha diversity: %s\nCausa provavel: matriz de especies vazia ou nao numerica\nVerifique: estrutura do CSV de especies\nSkill anterior: data-cleaning",
|
|
85
|
+
conditionMessage(e)
|
|
86
|
+
)
|
|
87
|
+
stop(e)
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
log_step(4, "Run NMDS ordination (Bray-Curtis, k=2)")
|
|
91
|
+
log_decision("distance_metric", "bray", "Bray-Curtis is standard for community composition data")
|
|
92
|
+
log_decision("nmds_k", 2, "2 dimensions for interpretable 2D ordination plot")
|
|
93
|
+
tryCatch({
|
|
94
|
+
nmds <- metaMDS(sp, distance = "bray", k = 2, trymax = 50, trace = 0)
|
|
95
|
+
log_info("NMDS stress: %.4f", nmds$stress)
|
|
96
|
+
if (nmds$stress > 0.2) {
|
|
97
|
+
log_warn("NMDS stress = %.4f exceeds 0.20. Ordination may be unreliable; consider k=3 or data transformation.", nmds$stress)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
scores_df <- as.data.frame(scores(nmds, display = "sites")) |>
|
|
101
|
+
mutate(site = rownames(sp)) |>
|
|
102
|
+
left_join(meta |> mutate(site = rownames(meta)), by = "site")
|
|
103
|
+
|
|
104
|
+
p_ord <- ggplot(scores_df, aes(x = NMDS1, y = NMDS2)) +
|
|
105
|
+
geom_point(size = 3) +
|
|
106
|
+
annotate("text", x = Inf, y = -Inf, label = paste("Stress =", round(nmds$stress, 3)),
|
|
107
|
+
hjust = 1.1, vjust = -0.5, size = 3.5) +
|
|
108
|
+
theme_bw() + labs(title = "NMDS Ordination (Bray-Curtis)")
|
|
109
|
+
ggsave(file.path(output_dir, "ordination_plot.png"), p_ord, width = 7, height = 6, dpi = 150)
|
|
110
|
+
log_info("Ordination plot saved.")
|
|
111
|
+
}, error = function(e) {
|
|
112
|
+
log_error(
|
|
113
|
+
"Falha em NMDS: %s\nCausa provavel: matriz com sites/especies insuficientes ou todos zeros\nVerifique: numero de sites (>= 3) e que a matriz nao seja toda zeros\nSkill anterior: data-cleaning",
|
|
114
|
+
conditionMessage(e)
|
|
115
|
+
)
|
|
116
|
+
stop(e)
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
log_step(5, "PERMANOVA and PERMDISP (if 'group' column present)")
|
|
120
|
+
if ("group" %in% names(meta)) {
|
|
121
|
+
log_decision("permanova_permutations", 999, "standard number for robust p-value estimation")
|
|
122
|
+
tryCatch({
|
|
123
|
+
dist_mat <- vegdist(sp, method = "bray")
|
|
124
|
+
perm <- adonis2(dist_mat ~ meta$group, permutations = 999)
|
|
125
|
+
disp <- betadisper(dist_mat, meta$group)
|
|
126
|
+
disp_test <- permutest(disp, permutations = 999)
|
|
127
|
+
log_info("PERMANOVA:\n%s", paste(capture.output(perm), collapse = "\n"))
|
|
128
|
+
log_info("PERMDISP:\n%s", paste(capture.output(disp_test), collapse = "\n"))
|
|
129
|
+
capture.output(perm, disp_test) |>
|
|
130
|
+
writeLines(file.path(output_dir, "permanova_results.txt"))
|
|
131
|
+
log_info("PERMANOVA results saved.")
|
|
132
|
+
}, error = function(e) {
|
|
133
|
+
log_error(
|
|
134
|
+
"Falha em PERMANOVA/PERMDISP: %s\nCausa provavel: grupo com apenas um nivel ou sites insuficientes por grupo\nVerifique: coluna 'group' nos metadados e balanceamento\nSkill anterior: data-cleaning",
|
|
135
|
+
conditionMessage(e)
|
|
136
|
+
)
|
|
137
|
+
stop(e)
|
|
138
|
+
})
|
|
139
|
+
} else {
|
|
140
|
+
log_warn("Column 'group' not found in metadata. PERMANOVA and PERMDISP skipped.")
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
log_info("Done. Outputs in: %s", output_dir)
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
|
|
3
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
community_analysis.py
|
|
7
|
+
Beta diversity, ordination (PCoA), and group comparison (PERMANOVA via skbio).
|
|
8
|
+
Usage: python community_analysis.py <species_matrix_csv> <metadata_csv> <output_dir>
|
|
9
|
+
Requires: pandas, numpy, scipy, skbio, matplotlib
|
|
10
|
+
"""
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
SKILL_NAME = "community-ecology-ordination"
|
|
17
|
+
_LOG_DIR = Path("logs")
|
|
18
|
+
_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
19
|
+
_log_file = _LOG_DIR / f"skill_{SKILL_NAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
|
|
20
|
+
logging.basicConfig(
|
|
21
|
+
level=logging.INFO,
|
|
22
|
+
format="[%(asctime)s] [%(levelname)s] [" + SKILL_NAME + "] %(message)s",
|
|
23
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
24
|
+
handlers=[
|
|
25
|
+
logging.StreamHandler(sys.stdout),
|
|
26
|
+
logging.FileHandler(_log_file, encoding="utf-8"),
|
|
27
|
+
],
|
|
28
|
+
)
|
|
29
|
+
logger = logging.getLogger(SKILL_NAME)
|
|
30
|
+
|
|
31
|
+
def log_step(n: int, desc: str) -> None:
|
|
32
|
+
logger.info("-- STEP %d: %s", n, desc)
|
|
33
|
+
|
|
34
|
+
def log_decision(var: str, val, why: str) -> None:
|
|
35
|
+
logger.info("DECISION | %s = %s | %s", var, val, why)
|
|
36
|
+
|
|
37
|
+
import numpy as np
|
|
38
|
+
import pandas as pd
|
|
39
|
+
import matplotlib.pyplot as plt
|
|
40
|
+
from scipy.spatial.distance import braycurtis
|
|
41
|
+
from scipy.cluster.hierarchy import dendrogram, linkage, copshenetic
|
|
42
|
+
from scipy.spatial.distance import squareform
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
from skbio.diversity import beta_diversity
|
|
46
|
+
from skbio.stats.ordination import pcoa
|
|
47
|
+
from skbio.stats.distance import permanova, DistanceMatrix
|
|
48
|
+
HAS_SKBIO = True
|
|
49
|
+
except ImportError:
|
|
50
|
+
HAS_SKBIO = False
|
|
51
|
+
logger.warning("scikit-bio not installed. PCoA and PERMANOVA will be skipped. pip install scikit-bio")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def bray_curtis_matrix(sp: pd.DataFrame) -> np.ndarray:
|
|
55
|
+
n = len(sp)
|
|
56
|
+
dm = np.zeros((n, n))
|
|
57
|
+
vals = sp.values.astype(float)
|
|
58
|
+
for i in range(n):
|
|
59
|
+
for j in range(i+1, n):
|
|
60
|
+
d = braycurtis(vals[i], vals[j])
|
|
61
|
+
dm[i, j] = dm[j, i] = d
|
|
62
|
+
return dm
|
|
63
|
+
|
|
64
|
+
def alpha_diversity(sp: pd.DataFrame) -> pd.DataFrame:
|
|
65
|
+
richness = (sp > 0).sum(axis=1)
|
|
66
|
+
def shannon(row):
|
|
67
|
+
p = row[row > 0] / row.sum()
|
|
68
|
+
return -np.sum(p * np.log(p))
|
|
69
|
+
def simpson(row):
|
|
70
|
+
p = row[row > 0] / row.sum()
|
|
71
|
+
return 1 - np.sum(p**2)
|
|
72
|
+
return pd.DataFrame({
|
|
73
|
+
"site": sp.index,
|
|
74
|
+
"richness": richness.values,
|
|
75
|
+
"shannon": sp.apply(shannon, axis=1).values,
|
|
76
|
+
"simpson": sp.apply(simpson, axis=1).values,
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
def main():
|
|
80
|
+
sp_file = sys.argv[1] if len(sys.argv) > 1 else "data/species_matrix.csv"
|
|
81
|
+
meta_file = sys.argv[2] if len(sys.argv) > 2 else "data/site_metadata.csv"
|
|
82
|
+
output_dir = Path(sys.argv[3]) if len(sys.argv) > 3 else Path("outputs/community")
|
|
83
|
+
|
|
84
|
+
log_step(1, "Validate inputs")
|
|
85
|
+
if not Path(sp_file).exists():
|
|
86
|
+
logger.error(
|
|
87
|
+
"Species matrix file not found: %s\n"
|
|
88
|
+
"Causa provavel: caminho incorreto ou arquivo nao gerado\n"
|
|
89
|
+
"Verifique: o argumento species_matrix_csv e o diretorio de trabalho\n"
|
|
90
|
+
"Skill anterior: data-cleaning",
|
|
91
|
+
sp_file
|
|
92
|
+
)
|
|
93
|
+
sys.exit(1)
|
|
94
|
+
if not Path(meta_file).exists():
|
|
95
|
+
logger.error(
|
|
96
|
+
"Metadata file not found: %s\n"
|
|
97
|
+
"Causa provavel: caminho incorreto ou arquivo nao gerado\n"
|
|
98
|
+
"Verifique: o argumento metadata_csv e o diretorio de trabalho\n"
|
|
99
|
+
"Skill anterior: data-cleaning",
|
|
100
|
+
meta_file
|
|
101
|
+
)
|
|
102
|
+
sys.exit(1)
|
|
103
|
+
|
|
104
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
105
|
+
|
|
106
|
+
log_step(2, "Load species matrix and metadata")
|
|
107
|
+
try:
|
|
108
|
+
sp = pd.read_csv(sp_file, index_col=0)
|
|
109
|
+
meta = pd.read_csv(meta_file, index_col=0)
|
|
110
|
+
except Exception as e:
|
|
111
|
+
logger.error(
|
|
112
|
+
"Unexpected error in load data: %s\n"
|
|
113
|
+
"Causa provavel: CSV malformado ou sem coluna de rownames\n"
|
|
114
|
+
"Verifique: estrutura dos arquivos (primeira coluna deve ser site ID)\n"
|
|
115
|
+
"Skill anterior: data-cleaning",
|
|
116
|
+
e
|
|
117
|
+
)
|
|
118
|
+
raise
|
|
119
|
+
|
|
120
|
+
logger.info("Sites: %d | Species: %d", len(sp), len(sp.columns))
|
|
121
|
+
|
|
122
|
+
if (sp < 0).any().any():
|
|
123
|
+
logger.warning("Species matrix contains negative values. Abundances must be >= 0. Check your input data.")
|
|
124
|
+
if sp.isna().any().any():
|
|
125
|
+
logger.warning("Species matrix contains %d NA values. These will affect distance calculations.", sp.isna().sum().sum())
|
|
126
|
+
|
|
127
|
+
log_step(3, "Compute alpha diversity metrics")
|
|
128
|
+
try:
|
|
129
|
+
div = alpha_diversity(sp)
|
|
130
|
+
div.to_csv(output_dir / "diversity_metrics.csv", index=False)
|
|
131
|
+
logger.info("Mean richness: %.1f | Shannon: %.2f", div['richness'].mean(), div['shannon'].mean())
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.error(
|
|
134
|
+
"Unexpected error in alpha diversity: %s\n"
|
|
135
|
+
"Causa provavel: matriz de especies vazia ou nao numerica\n"
|
|
136
|
+
"Verifique: estrutura do CSV de especies\n"
|
|
137
|
+
"Skill anterior: data-cleaning",
|
|
138
|
+
e
|
|
139
|
+
)
|
|
140
|
+
raise
|
|
141
|
+
|
|
142
|
+
log_step(4, "Compute Bray-Curtis distance matrix")
|
|
143
|
+
log_decision("distance_metric", "bray-curtis", "standard for community composition; handles double-zeros correctly")
|
|
144
|
+
try:
|
|
145
|
+
dm = bray_curtis_matrix(sp)
|
|
146
|
+
pd.DataFrame(dm, index=sp.index, columns=sp.index).to_csv(output_dir / "bray_curtis_matrix.csv")
|
|
147
|
+
logger.info("Bray-Curtis matrix computed (%d x %d).", len(sp), len(sp))
|
|
148
|
+
except Exception as e:
|
|
149
|
+
logger.error(
|
|
150
|
+
"Unexpected error in Bray-Curtis matrix: %s\n"
|
|
151
|
+
"Causa provavel: dados nao numericos na matriz de especies\n"
|
|
152
|
+
"Verifique: tipos de dados no CSV de especies\n"
|
|
153
|
+
"Skill anterior: data-cleaning",
|
|
154
|
+
e
|
|
155
|
+
)
|
|
156
|
+
raise
|
|
157
|
+
|
|
158
|
+
log_step(5, "PCoA ordination and PERMANOVA")
|
|
159
|
+
if HAS_SKBIO:
|
|
160
|
+
log_decision("permanova_permutations", 999, "standard number for robust p-value estimation")
|
|
161
|
+
try:
|
|
162
|
+
dist_mat = DistanceMatrix(dm, ids=list(sp.index))
|
|
163
|
+
pc = pcoa(dist_mat)
|
|
164
|
+
scores = pc.samples[["PC1", "PC2"]].copy()
|
|
165
|
+
scores["site"] = scores.index
|
|
166
|
+
if "group" in meta.columns:
|
|
167
|
+
scores["group"] = meta["group"].reindex(scores.index).values
|
|
168
|
+
groups_for_perm = meta["group"].reindex(sp.index).values
|
|
169
|
+
perm_result = permanova(dist_mat, groups_for_perm, permutations=999)
|
|
170
|
+
logger.info(
|
|
171
|
+
"PERMANOVA: F = %.3f | p = %.4f",
|
|
172
|
+
perm_result['test statistic'], perm_result['p-value']
|
|
173
|
+
)
|
|
174
|
+
perm_df = pd.DataFrame({"statistic": [perm_result["test statistic"]],
|
|
175
|
+
"p_value": [perm_result["p-value"]]})
|
|
176
|
+
perm_df.to_csv(output_dir / "permanova_results.csv", index=False)
|
|
177
|
+
# Plot coloured by group
|
|
178
|
+
fig, ax = plt.subplots(figsize=(7, 6))
|
|
179
|
+
for grp in scores["group"].unique():
|
|
180
|
+
sub = scores[scores["group"] == grp]
|
|
181
|
+
ax.scatter(sub["PC1"], sub["PC2"], label=grp, s=50, alpha=0.8)
|
|
182
|
+
ax.set_xlabel(f"PC1 ({pc.proportion_explained[0]*100:.1f}%)")
|
|
183
|
+
ax.set_ylabel(f"PC2 ({pc.proportion_explained[1]*100:.1f}%)")
|
|
184
|
+
ax.set_title("PCoA (Bray-Curtis)")
|
|
185
|
+
ax.legend(); plt.tight_layout()
|
|
186
|
+
plt.savefig(output_dir / "pcoa_plot.png", dpi=150)
|
|
187
|
+
plt.close()
|
|
188
|
+
logger.info("PCoA plot saved.")
|
|
189
|
+
else:
|
|
190
|
+
logger.warning("Column 'group' not found in metadata. PERMANOVA skipped.")
|
|
191
|
+
except Exception as e:
|
|
192
|
+
logger.error(
|
|
193
|
+
"Unexpected error in PCoA/PERMANOVA: %s\n"
|
|
194
|
+
"Causa provavel: grupo com apenas um nivel ou sites insuficientes\n"
|
|
195
|
+
"Verifique: coluna 'group' nos metadados e balanceamento\n"
|
|
196
|
+
"Skill anterior: data-cleaning",
|
|
197
|
+
e
|
|
198
|
+
)
|
|
199
|
+
raise
|
|
200
|
+
else:
|
|
201
|
+
logger.warning("scikit-bio unavailable. PCoA and PERMANOVA steps skipped.")
|
|
202
|
+
|
|
203
|
+
log_step(6, "Hierarchical clustering")
|
|
204
|
+
try:
|
|
205
|
+
Z = linkage(squareform(dm), method="ward")
|
|
206
|
+
c, _ = copshenetic(Z, squareform(dm))
|
|
207
|
+
log_decision("linkage_method", "ward", "minimises total within-cluster variance; standard for ecology")
|
|
208
|
+
logger.info("Cophenetic correlation (Ward): %.3f", c)
|
|
209
|
+
if c < 0.7:
|
|
210
|
+
logger.warning("Cophenetic correlation = %.3f < 0.70. Dendrogram may poorly represent distances.", c)
|
|
211
|
+
fig, ax = plt.subplots(figsize=(max(8, len(sp)//2), 5))
|
|
212
|
+
dendrogram(Z, labels=list(sp.index), ax=ax, leaf_rotation=90, leaf_font_size=8)
|
|
213
|
+
ax.set_title(f"Hierarchical Clustering (Ward.D2) | Cophenetic r = {c:.3f}")
|
|
214
|
+
plt.tight_layout()
|
|
215
|
+
plt.savefig(output_dir / "cluster_dendrogram.png", dpi=150)
|
|
216
|
+
plt.close()
|
|
217
|
+
logger.info("Cluster dendrogram saved.")
|
|
218
|
+
except Exception as e:
|
|
219
|
+
logger.error(
|
|
220
|
+
"Unexpected error in hierarchical clustering: %s\n"
|
|
221
|
+
"Causa provavel: matriz de distancias com NaN ou apenas um site\n"
|
|
222
|
+
"Verifique: integridade da matriz Bray-Curtis\n"
|
|
223
|
+
"Skill anterior: community-ecology-ordination (distance matrix)",
|
|
224
|
+
e
|
|
225
|
+
)
|
|
226
|
+
raise
|
|
227
|
+
|
|
228
|
+
logger.info("Outputs written to: %s", output_dir)
|
|
229
|
+
|
|
230
|
+
if __name__ == "__main__":
|
|
231
|
+
main()
|