npm - ecological-agent-skills - Versions diffs - 3.2.0 → 3.2.1 - Mend

ecological-agent-skills 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/docs/repository-statistics.md CHANGED Viewed

@@ -1,7 +1,7 @@
 # Repository Statistics
-Generated: 2026-04-03
-Version: 3.2.0 (unreleased patch)
+Generated: 2026-04-04
+Version: 3.2.0
 ---
@@ -11,8 +11,8 @@ Version: 3.2.0 (unreleased patch)
 |----------|-------|
 | Skills | 17 |
 | Workflows | 14 |
-| R scripts | 38 |
-| Python scripts | 20 |
+| R scripts | 34 |
+| Python scripts | 26 |
 | Worked examples | 14 |
 | Resource documents | 53 |
 | Documentation files (docs/) | 8 |
@@ -70,7 +70,7 @@ Version: 3.2.0 (unreleased patch)
 | Section | Checks |
 |---------|--------|
-| Structure checks | 585/585 passed |
+| Structure checks | 652/652 passed |
 | Skills verified | 17 |
 | Workflows verified | 14 |
 | Global coverage | 6/6 continents |
@@ -81,7 +81,7 @@ Version: 3.2.0 (unreleased patch)
 | Test type | Count |
 |-----------|-------|
-| CI structural checks | 585 |
+| CI structural checks | 652 |
 | Python unit tests (pytest) | 176+ |
 | R unit tests (testthat) | 28+ |
 | Agent smoke test cases | 15 |

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ecological-agent-skills",
-  "version": "3.2.0",
+  "version": "3.2.1",
   "description": "17 modular skills for quantitative ecology — SDM, occupancy, PVA, connectivity, prioritization, and more. Works with Claude Code, Gemini CLI, Cursor, Copilot, and any AI agent.",
   "keywords": [
     "ecology",

package/skills/biostatistics-workbench/SKILL.md CHANGED Viewed

@@ -107,7 +107,7 @@ Guides the agent through the selection, execution, and interpretation of statist
 ---
-## Key Decisions to Document
+## Decision Points
 - Response variable distribution and link function
 - Random effects structure and rationale

package/skills/community-ecology-ordination/scripts/community_analysis.py CHANGED Viewed

@@ -38,7 +38,7 @@ import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from scipy.spatial.distance import braycurtis
-from scipy.cluster.hierarchy import dendrogram, linkage, copshenetic
+from scipy.cluster.hierarchy import dendrogram, linkage, cophenet
 from scipy.spatial.distance import squareform
 try:
@@ -203,7 +203,7 @@ def main():
     log_step(6, "Hierarchical clustering")
     try:
         Z = linkage(squareform(dm), method="ward")
-        c, _ = copshenetic(Z, squareform(dm))
+        c, _ = cophenet(Z, squareform(dm))
         log_decision("linkage_method", "ward", "minimises total within-cluster variance; standard for ecology")
         logger.info("Cophenetic correlation (Ward): %.3f", c)
         if c < 0.7:

package/skills/ecological-impact-assessment/SKILL.md CHANGED Viewed

@@ -105,7 +105,7 @@ The BACI estimator is only valid if Control and Impact groups had parallel traje
 ---
-## Key Decisions to Document
+## Decision Points
 - Control site selection criteria
 - BACI model specification (fixed vs. random effects)

package/skills/ecological-impact-assessment/scripts/baci_analysis.py ADDED Viewed

@@ -0,0 +1,337 @@
+# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
+# SPDX-License-Identifier: GPL-3.0-or-later
+# Usage: python baci_analysis.py <data_csv> <response_var> <output_dir>
+#
+# Arguments:
+#   data_csv     : CSV with columns site, period, treatment, and the response variable
+#   response_var : Name of the response column (default: 'abundance')
+#   output_dir   : Directory for outputs (created if absent)
+#
+# Outputs:
+#   baci_results.csv  - BACI interaction coefficient, SE, z, p-value
+#   baci_plot.png     - Control vs Impact, Before vs After interaction plot
+#   residual_diagnostics.png - Residual diagnostic plots
+#
+# Requires: numpy, pandas, statsmodels, matplotlib
+import sys
+import os
+import logging
+import numpy as np
+import pandas as pd
+import statsmodels.formula.api as smf
+import matplotlib.pyplot as plt
+# -- Inline logger ------------------------------------------------------------
+SKILL_NAME = "ecological-impact-assessment"
+logging.basicConfig(
+    level=logging.INFO,
+    format="[%(asctime)s] [%(levelname)s] %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger(SKILL_NAME)
+os.makedirs("logs", exist_ok=True)
+def log_decision(variable: str, value: str, reason: str) -> None:
+    """Log a methodological decision."""
+    logger.info("DECISION | %s = %s | %s", variable, value, reason)
+def main():
+    # -- Parse arguments -----------------------------------------------------------
+    args = sys.argv[1:]
+    data_file = args[0] if len(args) >= 1 else "data/baci_data.csv"
+    response_var = args[1] if len(args) >= 2 else "abundance"
+    output_dir = args[2] if len(args) >= 3 else "outputs/baci"
+    os.makedirs(output_dir, exist_ok=True)
+    logger.info(
+        "Skill: %s | data_file=%s | response_var=%s | output_dir=%s",
+        SKILL_NAME,
+        data_file,
+        response_var,
+        output_dir,
+    )
+    # -- Input precondition check --------------------------------------------------
+    if not os.path.isfile(data_file):
+        logger.error(
+            "Input not found: %s\n"
+            "Probable cause: wrong path or file not generated by previous step.\n"
+            "Check: whether the CSV file exists and the name is correct.\n"
+            "Previous skill: ecological-sampling-design or field collection.",
+            data_file,
+        )
+        sys.exit(1)
+    # -- STEP 1: Load and validate BACI data --------------------------------------
+    logger.info("-- STEP 1: Load and validate BACI data")
+    try:
+        dat = pd.read_csv(data_file)
+    except Exception as e:
+        logger.error(
+            "Failed to read CSV: %s\n"
+            "Probable cause: corrupted file or incorrect encoding.\n"
+            "Check: open the file in a text editor and verify separators.\n"
+            "Previous skill: ecological-sampling-design.",
+            e,
+        )
+        sys.exit(1)
+    required_cols = {"site", "period", "treatment", response_var}
+    missing_cols = required_cols - set(dat.columns)
+    if missing_cols:
+        logger.error(
+            "Required columns missing: %s\n"
+            "Probable cause: CSV does not follow the expected BACI schema.\n"
+            "Check: the file must have columns site, period, treatment and the response variable.\n"
+            "Previous skill: ecological-sampling-design.",
+            ", ".join(missing_cols),
+        )
+        sys.exit(1)
+    n_na = int(dat[response_var].isna().sum())
+    if n_na > 0:
+        logger.warning(
+            "Column '%s' contains %d NA values -- may be excluded by the model function.",
+            response_var,
+            n_na,
+        )
+    # Encode factors
+    dat["period"] = pd.Categorical(dat["period"], categories=["before", "after"], ordered=True)
+    dat["treatment"] = pd.Categorical(
+        dat["treatment"], categories=["control", "impact"], ordered=True
+    )
+    n_sites = dat["site"].nunique()
+    n_before = int((dat["period"] == "before").sum())
+    n_after = int((dat["period"] == "after").sum())
+    logger.info("Sites: %d | Before: %d | After: %d", n_sites, n_before, n_after)
+    log_decision(
+        "response_var",
+        response_var,
+        "response variable provided by user or default 'abundance'",
+    )
+    log_decision(
+        "family",
+        "Gaussian mixed model (MixedLM)",
+        "linear mixed-effects model with random intercept for site; "
+        "for overdispersed counts consider Poisson/NB GLMMs via statsmodels or external packages",
+    )
+    if n_sites < 3:
+        logger.warning(
+            "Only %d site(s) detected -- random effects may not be estimable.", n_sites
+        )
+    # -- STEP 2: Fit BACI mixed-effects model -------------------------------------
+    logger.info("-- STEP 2: Fit BACI mixed-effects model")
+    # Create numeric dummy variables for the interaction term
+    # statsmodels MixedLM works with formula interface
+    # period: before=0, after=1; treatment: control=0, impact=1
+    dat["period_num"] = (dat["period"] == "after").astype(int)
+    dat["treatment_num"] = (dat["treatment"] == "impact").astype(int)
+    dat["interaction"] = dat["period_num"] * dat["treatment_num"]
+    formula_str = f"{response_var} ~ period_num + treatment_num + interaction"
+    logger.info("Formula: %s + (1|site)", formula_str)
+    try:
+        model = smf.mixedlm(
+            formula_str,
+            data=dat,
+            groups=dat["site"],
+        )
+        result = model.fit(reml=True)
+        logger.info("Model fitted successfully.")
+        logger.info("\n%s", result.summary())
+    except Exception as e:
+        logger.error(
+            "Failed to fit mixed-effects model: %s\n"
+            "Probable cause: insufficient data, mis-coded columns, or model singularity.\n"
+            "Check: number of levels per site/period and presence of excessive zeros.\n"
+            "Previous skill: ecological-sampling-design.",
+            e,
+        )
+        sys.exit(1)
+    # -- STEP 3: Extract BACI interaction ------------------------------------------
+    logger.info("-- STEP 3: Extract BACI interaction and compute effect")
+    try:
+        coef_table = result.summary().tables[1]
+        # Extract interaction row from the fitted result
+        params = result.params
+        pvalues = result.pvalues
+        bse = result.bse
+        tvalues = result.tvalues
+        if "interaction" in params.index:
+            baci_est = params["interaction"]
+            baci_se = bse["interaction"]
+            baci_z = tvalues["interaction"]
+            baci_p = pvalues["interaction"]
+            logger.info("=== BACI Interaction ===")
+            logger.info(
+                "Estimate: %.4f | SE: %.4f | z: %.4f | p-value: %.4f",
+                baci_est,
+                baci_se,
+                baci_z,
+                baci_p,
+            )
+            if baci_p < 0.05:
+                logger.info(
+                    "BACI interaction is statistically significant (p = %.4f). "
+                    "Evidence of impact effect.",
+                    baci_p,
+                )
+            else:
+                logger.info(
+                    "BACI interaction is NOT statistically significant (p = %.4f). "
+                    "No strong evidence of impact effect.",
+                    baci_p,
+                )
+            # Save results
+            baci_df = pd.DataFrame(
+                {
+                    "term": ["period_num:treatment_num (BACI interaction)"],
+                    "Estimate": [round(baci_est, 4)],
+                    "Std.Error": [round(baci_se, 4)],
+                    "z.value": [round(baci_z, 4)],
+                    "p.value": [round(baci_p, 6)],
+                }
+            )
+            baci_csv_path = os.path.join(output_dir, "baci_results.csv")
+            baci_df.to_csv(baci_csv_path, index=False)
+            logger.info("baci_results.csv saved in: %s", output_dir)
+        else:
+            logger.warning(
+                "BACI interaction term (period:treatment) not found in coefficients table."
+            )
+    except Exception as e:
+        logger.error(
+            "Failed to extract model coefficients: %s\n"
+            "Probable cause: model did not converge or unexpected structure.\n"
+            "Check: inspect model summary manually.\n"
+            "Previous skill: [none].",
+            e,
+        )
+        sys.exit(1)
+    # -- STEP 4: Generate BACI interaction plot ------------------------------------
+    logger.info("-- STEP 4: Generate BACI interaction plot (control vs impact, before vs after)")
+    try:
+        plot_dat = (
+            dat.groupby(["period", "treatment"], observed=True)
+            .agg(
+                mean_y=(response_var, "mean"),
+                se_y=(response_var, lambda x: x.std(ddof=1) / np.sqrt(len(x))),
+            )
+            .reset_index()
+        )
+        fig, ax = plt.subplots(figsize=(6, 5), dpi=150)
+        colors = {"control": "#2166ac", "impact": "#d6604d"}
+        for trt in ["control", "impact"]:
+            subset = plot_dat[plot_dat["treatment"] == trt].sort_values("period")
+            ax.plot(
+                subset["period"].astype(str),
+                subset["mean_y"],
+                marker="o",
+                markersize=8,
+                linewidth=2,
+                color=colors[trt],
+                label=trt,
+            )
+            ax.errorbar(
+                subset["period"].astype(str),
+                subset["mean_y"],
+                yerr=subset["se_y"],
+                fmt="none",
+                ecolor=colors[trt],
+                capsize=4,
+            )
+        ax.set_ylabel(response_var)
+        ax.set_title("BACI: Control vs Impact")
+        ax.legend()
+        ax.spines["top"].set_visible(False)
+        ax.spines["right"].set_visible(False)
+        plt.tight_layout()
+        baci_plot_path = os.path.join(output_dir, "baci_plot.png")
+        fig.savefig(baci_plot_path)
+        plt.close(fig)
+        logger.info("baci_plot.png saved in: %s", output_dir)
+    except Exception as e:
+        logger.error(
+            "Failed to generate or save BACI plot: %s\n"
+            "Probable cause: insufficient data for summarisation or directory without write permission.\n"
+            "Check: presence of at least one record per combination period/treatment.\n"
+            "Previous skill: [none].",
+            e,
+        )
+        sys.exit(1)
+    # -- STEP 5: Residual diagnostic plots ----------------------------------------
+    logger.info("-- STEP 5: Generate residual diagnostic plots")
+    try:
+        residuals = result.resid
+        fitted_vals = result.fittedvalues
+        fig, axes = plt.subplots(1, 3, figsize=(15, 5), dpi=150)
+        # Residuals vs Fitted
+        axes[0].scatter(fitted_vals, residuals, alpha=0.5, s=20, c="steelblue")
+        axes[0].axhline(y=0, color="red", linestyle="--", linewidth=0.8)
+        axes[0].set_xlabel("Fitted values")
+        axes[0].set_ylabel("Residuals")
+        axes[0].set_title("Residuals vs Fitted")
+        # Histogram of residuals
+        axes[1].hist(residuals, bins=30, color="steelblue", edgecolor="white", density=True)
+        axes[1].set_xlabel("Residuals")
+        axes[1].set_ylabel("Density")
+        axes[1].set_title("Distribution of Residuals")
+        # QQ plot
+        from scipy import stats
+        (osm, osr), (slope, intercept, r) = stats.probplot(residuals, dist="norm")
+        axes[2].scatter(osm, osr, alpha=0.5, s=20, c="steelblue")
+        axes[2].plot(osm, slope * np.array(osm) + intercept, color="red", linewidth=0.8)
+        axes[2].set_xlabel("Theoretical Quantiles")
+        axes[2].set_ylabel("Sample Quantiles")
+        axes[2].set_title("Normal Q-Q Plot")
+        plt.tight_layout()
+        diag_path = os.path.join(output_dir, "residual_diagnostics.png")
+        fig.savefig(diag_path)
+        plt.close(fig)
+        logger.info("residual_diagnostics.png saved in: %s", output_dir)
+    except Exception as e:
+        logger.error(
+            "Failed to generate residual diagnostics: %s\n"
+            "Probable cause: model residuals not available or plotting error.\n"
+            "Check: model convergence status.\n"
+            "Previous skill: [none].",
+            e,
+        )
+        sys.exit(1)
+    logger.info("BACI analysis completed. Outputs in: %s", output_dir)
+if __name__ == "__main__":
+    main()