ecological-agent-skills 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  # Repository Statistics
2
2
 
3
- Generated: 2026-04-03
4
- Version: 3.2.0 (unreleased patch)
3
+ Generated: 2026-04-04
4
+ Version: 3.2.0
5
5
 
6
6
  ---
7
7
 
@@ -11,8 +11,8 @@ Version: 3.2.0 (unreleased patch)
11
11
  |----------|-------|
12
12
  | Skills | 17 |
13
13
  | Workflows | 14 |
14
- | R scripts | 38 |
15
- | Python scripts | 20 |
14
+ | R scripts | 34 |
15
+ | Python scripts | 26 |
16
16
  | Worked examples | 14 |
17
17
  | Resource documents | 53 |
18
18
  | Documentation files (docs/) | 8 |
@@ -70,7 +70,7 @@ Version: 3.2.0 (unreleased patch)
70
70
 
71
71
  | Section | Checks |
72
72
  |---------|--------|
73
- | Structure checks | 585/585 passed |
73
+ | Structure checks | 652/652 passed |
74
74
  | Skills verified | 17 |
75
75
  | Workflows verified | 14 |
76
76
  | Global coverage | 6/6 continents |
@@ -81,7 +81,7 @@ Version: 3.2.0 (unreleased patch)
81
81
 
82
82
  | Test type | Count |
83
83
  |-----------|-------|
84
- | CI structural checks | 585 |
84
+ | CI structural checks | 652 |
85
85
  | Python unit tests (pytest) | 176+ |
86
86
  | R unit tests (testthat) | 28+ |
87
87
  | Agent smoke test cases | 15 |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ecological-agent-skills",
3
- "version": "3.2.0",
3
+ "version": "3.2.1",
4
4
  "description": "17 modular skills for quantitative ecology — SDM, occupancy, PVA, connectivity, prioritization, and more. Works with Claude Code, Gemini CLI, Cursor, Copilot, and any AI agent.",
5
5
  "keywords": [
6
6
  "ecology",
@@ -107,7 +107,7 @@ Guides the agent through the selection, execution, and interpretation of statist
107
107
 
108
108
  ---
109
109
 
110
- ## Key Decisions to Document
110
+ ## Decision Points
111
111
 
112
112
  - Response variable distribution and link function
113
113
  - Random effects structure and rationale
@@ -38,7 +38,7 @@ import numpy as np
38
38
  import pandas as pd
39
39
  import matplotlib.pyplot as plt
40
40
  from scipy.spatial.distance import braycurtis
41
- from scipy.cluster.hierarchy import dendrogram, linkage, copshenetic
41
+ from scipy.cluster.hierarchy import dendrogram, linkage, cophenet
42
42
  from scipy.spatial.distance import squareform
43
43
 
44
44
  try:
@@ -203,7 +203,7 @@ def main():
203
203
  log_step(6, "Hierarchical clustering")
204
204
  try:
205
205
  Z = linkage(squareform(dm), method="ward")
206
- c, _ = copshenetic(Z, squareform(dm))
206
+ c, _ = cophenet(Z, squareform(dm))
207
207
  log_decision("linkage_method", "ward", "minimises total within-cluster variance; standard for ecology")
208
208
  logger.info("Cophenetic correlation (Ward): %.3f", c)
209
209
  if c < 0.7:
@@ -105,7 +105,7 @@ The BACI estimator is only valid if Control and Impact groups had parallel traje
105
105
 
106
106
  ---
107
107
 
108
- ## Key Decisions to Document
108
+ ## Decision Points
109
109
 
110
110
  - Control site selection criteria
111
111
  - BACI model specification (fixed vs. random effects)
@@ -0,0 +1,337 @@
1
+ # ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
2
+ # SPDX-License-Identifier: GPL-3.0-or-later
3
+
4
+ # Usage: python baci_analysis.py <data_csv> <response_var> <output_dir>
5
+ #
6
+ # Arguments:
7
+ # data_csv : CSV with columns site, period, treatment, and the response variable
8
+ # response_var : Name of the response column (default: 'abundance')
9
+ # output_dir : Directory for outputs (created if absent)
10
+ #
11
+ # Outputs:
12
+ # baci_results.csv - BACI interaction coefficient, SE, z, p-value
13
+ # baci_plot.png - Control vs Impact, Before vs After interaction plot
14
+ # residual_diagnostics.png - Residual diagnostic plots
15
+ #
16
+ # Requires: numpy, pandas, statsmodels, matplotlib
17
+
18
+ import sys
19
+ import os
20
+ import logging
21
+ import numpy as np
22
+ import pandas as pd
23
+ import statsmodels.formula.api as smf
24
+ import matplotlib.pyplot as plt
25
+
26
+ # -- Inline logger ------------------------------------------------------------
27
+ SKILL_NAME = "ecological-impact-assessment"
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format="[%(asctime)s] [%(levelname)s] %(message)s",
31
+ datefmt="%Y-%m-%d %H:%M:%S",
32
+ )
33
+ logger = logging.getLogger(SKILL_NAME)
34
+ os.makedirs("logs", exist_ok=True)
35
+
36
+
37
+ def log_decision(variable: str, value: str, reason: str) -> None:
38
+ """Log a methodological decision."""
39
+ logger.info("DECISION | %s = %s | %s", variable, value, reason)
40
+
41
+
42
+
43
+ def main():
44
+ # -- Parse arguments -----------------------------------------------------------
45
+ args = sys.argv[1:]
46
+ data_file = args[0] if len(args) >= 1 else "data/baci_data.csv"
47
+ response_var = args[1] if len(args) >= 2 else "abundance"
48
+ output_dir = args[2] if len(args) >= 3 else "outputs/baci"
49
+ os.makedirs(output_dir, exist_ok=True)
50
+
51
+ logger.info(
52
+ "Skill: %s | data_file=%s | response_var=%s | output_dir=%s",
53
+ SKILL_NAME,
54
+ data_file,
55
+ response_var,
56
+ output_dir,
57
+ )
58
+
59
+ # -- Input precondition check --------------------------------------------------
60
+ if not os.path.isfile(data_file):
61
+ logger.error(
62
+ "Input not found: %s\n"
63
+ "Probable cause: wrong path or file not generated by previous step.\n"
64
+ "Check: whether the CSV file exists and the name is correct.\n"
65
+ "Previous skill: ecological-sampling-design or field collection.",
66
+ data_file,
67
+ )
68
+ sys.exit(1)
69
+
70
+ # -- STEP 1: Load and validate BACI data --------------------------------------
71
+ logger.info("-- STEP 1: Load and validate BACI data")
72
+ try:
73
+ dat = pd.read_csv(data_file)
74
+ except Exception as e:
75
+ logger.error(
76
+ "Failed to read CSV: %s\n"
77
+ "Probable cause: corrupted file or incorrect encoding.\n"
78
+ "Check: open the file in a text editor and verify separators.\n"
79
+ "Previous skill: ecological-sampling-design.",
80
+ e,
81
+ )
82
+ sys.exit(1)
83
+
84
+ required_cols = {"site", "period", "treatment", response_var}
85
+ missing_cols = required_cols - set(dat.columns)
86
+ if missing_cols:
87
+ logger.error(
88
+ "Required columns missing: %s\n"
89
+ "Probable cause: CSV does not follow the expected BACI schema.\n"
90
+ "Check: the file must have columns site, period, treatment and the response variable.\n"
91
+ "Previous skill: ecological-sampling-design.",
92
+ ", ".join(missing_cols),
93
+ )
94
+ sys.exit(1)
95
+
96
+ n_na = int(dat[response_var].isna().sum())
97
+ if n_na > 0:
98
+ logger.warning(
99
+ "Column '%s' contains %d NA values -- may be excluded by the model function.",
100
+ response_var,
101
+ n_na,
102
+ )
103
+
104
+ # Encode factors
105
+ dat["period"] = pd.Categorical(dat["period"], categories=["before", "after"], ordered=True)
106
+ dat["treatment"] = pd.Categorical(
107
+ dat["treatment"], categories=["control", "impact"], ordered=True
108
+ )
109
+
110
+ n_sites = dat["site"].nunique()
111
+ n_before = int((dat["period"] == "before").sum())
112
+ n_after = int((dat["period"] == "after").sum())
113
+ logger.info("Sites: %d | Before: %d | After: %d", n_sites, n_before, n_after)
114
+ log_decision(
115
+ "response_var",
116
+ response_var,
117
+ "response variable provided by user or default 'abundance'",
118
+ )
119
+ log_decision(
120
+ "family",
121
+ "Gaussian mixed model (MixedLM)",
122
+ "linear mixed-effects model with random intercept for site; "
123
+ "for overdispersed counts consider Poisson/NB GLMMs via statsmodels or external packages",
124
+ )
125
+
126
+ if n_sites < 3:
127
+ logger.warning(
128
+ "Only %d site(s) detected -- random effects may not be estimable.", n_sites
129
+ )
130
+
131
+ # -- STEP 2: Fit BACI mixed-effects model -------------------------------------
132
+ logger.info("-- STEP 2: Fit BACI mixed-effects model")
133
+
134
+ # Create numeric dummy variables for the interaction term
135
+ # statsmodels MixedLM works with formula interface
136
+ # period: before=0, after=1; treatment: control=0, impact=1
137
+ dat["period_num"] = (dat["period"] == "after").astype(int)
138
+ dat["treatment_num"] = (dat["treatment"] == "impact").astype(int)
139
+ dat["interaction"] = dat["period_num"] * dat["treatment_num"]
140
+
141
+ formula_str = f"{response_var} ~ period_num + treatment_num + interaction"
142
+ logger.info("Formula: %s + (1|site)", formula_str)
143
+
144
+ try:
145
+ model = smf.mixedlm(
146
+ formula_str,
147
+ data=dat,
148
+ groups=dat["site"],
149
+ )
150
+ result = model.fit(reml=True)
151
+ logger.info("Model fitted successfully.")
152
+ logger.info("\n%s", result.summary())
153
+ except Exception as e:
154
+ logger.error(
155
+ "Failed to fit mixed-effects model: %s\n"
156
+ "Probable cause: insufficient data, mis-coded columns, or model singularity.\n"
157
+ "Check: number of levels per site/period and presence of excessive zeros.\n"
158
+ "Previous skill: ecological-sampling-design.",
159
+ e,
160
+ )
161
+ sys.exit(1)
162
+
163
+ # -- STEP 3: Extract BACI interaction ------------------------------------------
164
+ logger.info("-- STEP 3: Extract BACI interaction and compute effect")
165
+ try:
166
+ coef_table = result.summary().tables[1]
167
+
168
+ # Extract interaction row from the fitted result
169
+ params = result.params
170
+ pvalues = result.pvalues
171
+ bse = result.bse
172
+ tvalues = result.tvalues
173
+
174
+ if "interaction" in params.index:
175
+ baci_est = params["interaction"]
176
+ baci_se = bse["interaction"]
177
+ baci_z = tvalues["interaction"]
178
+ baci_p = pvalues["interaction"]
179
+
180
+ logger.info("=== BACI Interaction ===")
181
+ logger.info(
182
+ "Estimate: %.4f | SE: %.4f | z: %.4f | p-value: %.4f",
183
+ baci_est,
184
+ baci_se,
185
+ baci_z,
186
+ baci_p,
187
+ )
188
+
189
+ if baci_p < 0.05:
190
+ logger.info(
191
+ "BACI interaction is statistically significant (p = %.4f). "
192
+ "Evidence of impact effect.",
193
+ baci_p,
194
+ )
195
+ else:
196
+ logger.info(
197
+ "BACI interaction is NOT statistically significant (p = %.4f). "
198
+ "No strong evidence of impact effect.",
199
+ baci_p,
200
+ )
201
+
202
+ # Save results
203
+ baci_df = pd.DataFrame(
204
+ {
205
+ "term": ["period_num:treatment_num (BACI interaction)"],
206
+ "Estimate": [round(baci_est, 4)],
207
+ "Std.Error": [round(baci_se, 4)],
208
+ "z.value": [round(baci_z, 4)],
209
+ "p.value": [round(baci_p, 6)],
210
+ }
211
+ )
212
+ baci_csv_path = os.path.join(output_dir, "baci_results.csv")
213
+ baci_df.to_csv(baci_csv_path, index=False)
214
+ logger.info("baci_results.csv saved in: %s", output_dir)
215
+ else:
216
+ logger.warning(
217
+ "BACI interaction term (period:treatment) not found in coefficients table."
218
+ )
219
+ except Exception as e:
220
+ logger.error(
221
+ "Failed to extract model coefficients: %s\n"
222
+ "Probable cause: model did not converge or unexpected structure.\n"
223
+ "Check: inspect model summary manually.\n"
224
+ "Previous skill: [none].",
225
+ e,
226
+ )
227
+ sys.exit(1)
228
+
229
+ # -- STEP 4: Generate BACI interaction plot ------------------------------------
230
+ logger.info("-- STEP 4: Generate BACI interaction plot (control vs impact, before vs after)")
231
+ try:
232
+ plot_dat = (
233
+ dat.groupby(["period", "treatment"], observed=True)
234
+ .agg(
235
+ mean_y=(response_var, "mean"),
236
+ se_y=(response_var, lambda x: x.std(ddof=1) / np.sqrt(len(x))),
237
+ )
238
+ .reset_index()
239
+ )
240
+
241
+ fig, ax = plt.subplots(figsize=(6, 5), dpi=150)
242
+ colors = {"control": "#2166ac", "impact": "#d6604d"}
243
+
244
+ for trt in ["control", "impact"]:
245
+ subset = plot_dat[plot_dat["treatment"] == trt].sort_values("period")
246
+ ax.plot(
247
+ subset["period"].astype(str),
248
+ subset["mean_y"],
249
+ marker="o",
250
+ markersize=8,
251
+ linewidth=2,
252
+ color=colors[trt],
253
+ label=trt,
254
+ )
255
+ ax.errorbar(
256
+ subset["period"].astype(str),
257
+ subset["mean_y"],
258
+ yerr=subset["se_y"],
259
+ fmt="none",
260
+ ecolor=colors[trt],
261
+ capsize=4,
262
+ )
263
+
264
+ ax.set_ylabel(response_var)
265
+ ax.set_title("BACI: Control vs Impact")
266
+ ax.legend()
267
+ ax.spines["top"].set_visible(False)
268
+ ax.spines["right"].set_visible(False)
269
+
270
+ plt.tight_layout()
271
+ baci_plot_path = os.path.join(output_dir, "baci_plot.png")
272
+ fig.savefig(baci_plot_path)
273
+ plt.close(fig)
274
+ logger.info("baci_plot.png saved in: %s", output_dir)
275
+
276
+ except Exception as e:
277
+ logger.error(
278
+ "Failed to generate or save BACI plot: %s\n"
279
+ "Probable cause: insufficient data for summarisation or directory without write permission.\n"
280
+ "Check: presence of at least one record per combination period/treatment.\n"
281
+ "Previous skill: [none].",
282
+ e,
283
+ )
284
+ sys.exit(1)
285
+
286
+ # -- STEP 5: Residual diagnostic plots ----------------------------------------
287
+ logger.info("-- STEP 5: Generate residual diagnostic plots")
288
+ try:
289
+ residuals = result.resid
290
+ fitted_vals = result.fittedvalues
291
+
292
+ fig, axes = plt.subplots(1, 3, figsize=(15, 5), dpi=150)
293
+
294
+ # Residuals vs Fitted
295
+ axes[0].scatter(fitted_vals, residuals, alpha=0.5, s=20, c="steelblue")
296
+ axes[0].axhline(y=0, color="red", linestyle="--", linewidth=0.8)
297
+ axes[0].set_xlabel("Fitted values")
298
+ axes[0].set_ylabel("Residuals")
299
+ axes[0].set_title("Residuals vs Fitted")
300
+
301
+ # Histogram of residuals
302
+ axes[1].hist(residuals, bins=30, color="steelblue", edgecolor="white", density=True)
303
+ axes[1].set_xlabel("Residuals")
304
+ axes[1].set_ylabel("Density")
305
+ axes[1].set_title("Distribution of Residuals")
306
+
307
+ # QQ plot
308
+ from scipy import stats
309
+
310
+ (osm, osr), (slope, intercept, r) = stats.probplot(residuals, dist="norm")
311
+ axes[2].scatter(osm, osr, alpha=0.5, s=20, c="steelblue")
312
+ axes[2].plot(osm, slope * np.array(osm) + intercept, color="red", linewidth=0.8)
313
+ axes[2].set_xlabel("Theoretical Quantiles")
314
+ axes[2].set_ylabel("Sample Quantiles")
315
+ axes[2].set_title("Normal Q-Q Plot")
316
+
317
+ plt.tight_layout()
318
+ diag_path = os.path.join(output_dir, "residual_diagnostics.png")
319
+ fig.savefig(diag_path)
320
+ plt.close(fig)
321
+ logger.info("residual_diagnostics.png saved in: %s", output_dir)
322
+
323
+ except Exception as e:
324
+ logger.error(
325
+ "Failed to generate residual diagnostics: %s\n"
326
+ "Probable cause: model residuals not available or plotting error.\n"
327
+ "Check: model convergence status.\n"
328
+ "Previous skill: [none].",
329
+ e,
330
+ )
331
+ sys.exit(1)
332
+
333
+ logger.info("BACI analysis completed. Outputs in: %s", output_dir)
334
+
335
+
336
+ if __name__ == "__main__":
337
+ main()