ecological-agent-skills 3.2.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/repository-statistics.md +6 -6
- package/package.json +1 -1
- package/skills/biostatistics-workbench/SKILL.md +1 -1
- package/skills/community-ecology-ordination/scripts/community_analysis.py +2 -2
- package/skills/ecological-impact-assessment/SKILL.md +1 -1
- package/skills/ecological-impact-assessment/scripts/baci_analysis.py +337 -0
- package/skills/ecological-impact-assessment/scripts/power_analysis_baci.py +284 -0
- package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.py +245 -0
- package/skills/environmental-time-series/SKILL.md +1 -1
- package/skills/model-validation-and-uncertainty/SKILL.md +1 -1
- package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.py +438 -0
- package/skills/model-validation-and-uncertainty/scripts/validate_sdm.py +257 -0
- package/skills/occupancy-and-detection/SKILL.md +1 -1
- package/skills/predictive-modeling-best-practices/scripts/collinearity_check.py +207 -0
- package/skills/reproducible-ecology-pipeline/scripts/check_packages.R +2 -0
- package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +40 -5
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Repository Statistics
|
|
2
2
|
|
|
3
|
-
Generated: 2026-04-
|
|
4
|
-
Version: 3.2.0
|
|
3
|
+
Generated: 2026-04-04
|
|
4
|
+
Version: 3.2.0
|
|
5
5
|
|
|
6
6
|
---
|
|
7
7
|
|
|
@@ -11,8 +11,8 @@ Version: 3.2.0 (unreleased patch)
|
|
|
11
11
|
|----------|-------|
|
|
12
12
|
| Skills | 17 |
|
|
13
13
|
| Workflows | 14 |
|
|
14
|
-
| R scripts |
|
|
15
|
-
| Python scripts |
|
|
14
|
+
| R scripts | 34 |
|
|
15
|
+
| Python scripts | 26 |
|
|
16
16
|
| Worked examples | 14 |
|
|
17
17
|
| Resource documents | 53 |
|
|
18
18
|
| Documentation files (docs/) | 8 |
|
|
@@ -70,7 +70,7 @@ Version: 3.2.0 (unreleased patch)
|
|
|
70
70
|
|
|
71
71
|
| Section | Checks |
|
|
72
72
|
|---------|--------|
|
|
73
|
-
| Structure checks |
|
|
73
|
+
| Structure checks | 652/652 passed |
|
|
74
74
|
| Skills verified | 17 |
|
|
75
75
|
| Workflows verified | 14 |
|
|
76
76
|
| Global coverage | 6/6 continents |
|
|
@@ -81,7 +81,7 @@ Version: 3.2.0 (unreleased patch)
|
|
|
81
81
|
|
|
82
82
|
| Test type | Count |
|
|
83
83
|
|-----------|-------|
|
|
84
|
-
| CI structural checks |
|
|
84
|
+
| CI structural checks | 652 |
|
|
85
85
|
| Python unit tests (pytest) | 176+ |
|
|
86
86
|
| R unit tests (testthat) | 28+ |
|
|
87
87
|
| Agent smoke test cases | 15 |
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ecological-agent-skills",
|
|
3
|
-
"version": "3.2.
|
|
3
|
+
"version": "3.2.1",
|
|
4
4
|
"description": "17 modular skills for quantitative ecology — SDM, occupancy, PVA, connectivity, prioritization, and more. Works with Claude Code, Gemini CLI, Cursor, Copilot, and any AI agent.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ecology",
|
|
@@ -107,7 +107,7 @@ Guides the agent through the selection, execution, and interpretation of statist
|
|
|
107
107
|
|
|
108
108
|
---
|
|
109
109
|
|
|
110
|
-
##
|
|
110
|
+
## Decision Points
|
|
111
111
|
|
|
112
112
|
- Response variable distribution and link function
|
|
113
113
|
- Random effects structure and rationale
|
|
@@ -38,7 +38,7 @@ import numpy as np
|
|
|
38
38
|
import pandas as pd
|
|
39
39
|
import matplotlib.pyplot as plt
|
|
40
40
|
from scipy.spatial.distance import braycurtis
|
|
41
|
-
from scipy.cluster.hierarchy import dendrogram, linkage,
|
|
41
|
+
from scipy.cluster.hierarchy import dendrogram, linkage, cophenet
|
|
42
42
|
from scipy.spatial.distance import squareform
|
|
43
43
|
|
|
44
44
|
try:
|
|
@@ -203,7 +203,7 @@ def main():
|
|
|
203
203
|
log_step(6, "Hierarchical clustering")
|
|
204
204
|
try:
|
|
205
205
|
Z = linkage(squareform(dm), method="ward")
|
|
206
|
-
c, _ =
|
|
206
|
+
c, _ = cophenet(Z, squareform(dm))
|
|
207
207
|
log_decision("linkage_method", "ward", "minimises total within-cluster variance; standard for ecology")
|
|
208
208
|
logger.info("Cophenetic correlation (Ward): %.3f", c)
|
|
209
209
|
if c < 0.7:
|
|
@@ -105,7 +105,7 @@ The BACI estimator is only valid if Control and Impact groups had parallel traje
|
|
|
105
105
|
|
|
106
106
|
---
|
|
107
107
|
|
|
108
|
-
##
|
|
108
|
+
## Decision Points
|
|
109
109
|
|
|
110
110
|
- Control site selection criteria
|
|
111
111
|
- BACI model specification (fixed vs. random effects)
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
|
|
2
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
3
|
+
|
|
4
|
+
# Usage: python baci_analysis.py <data_csv> <response_var> <output_dir>
|
|
5
|
+
#
|
|
6
|
+
# Arguments:
|
|
7
|
+
# data_csv : CSV with columns site, period, treatment, and the response variable
|
|
8
|
+
# response_var : Name of the response column (default: 'abundance')
|
|
9
|
+
# output_dir : Directory for outputs (created if absent)
|
|
10
|
+
#
|
|
11
|
+
# Outputs:
|
|
12
|
+
# baci_results.csv - BACI interaction coefficient, SE, z, p-value
|
|
13
|
+
# baci_plot.png - Control vs Impact, Before vs After interaction plot
|
|
14
|
+
# residual_diagnostics.png - Residual diagnostic plots
|
|
15
|
+
#
|
|
16
|
+
# Requires: numpy, pandas, statsmodels, matplotlib
|
|
17
|
+
|
|
18
|
+
import sys
|
|
19
|
+
import os
|
|
20
|
+
import logging
|
|
21
|
+
import numpy as np
|
|
22
|
+
import pandas as pd
|
|
23
|
+
import statsmodels.formula.api as smf
|
|
24
|
+
import matplotlib.pyplot as plt
|
|
25
|
+
|
|
26
|
+
# -- Inline logger ------------------------------------------------------------
|
|
27
|
+
SKILL_NAME = "ecological-impact-assessment"
|
|
28
|
+
logging.basicConfig(
|
|
29
|
+
level=logging.INFO,
|
|
30
|
+
format="[%(asctime)s] [%(levelname)s] %(message)s",
|
|
31
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
32
|
+
)
|
|
33
|
+
logger = logging.getLogger(SKILL_NAME)
|
|
34
|
+
os.makedirs("logs", exist_ok=True)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def log_decision(variable: str, value: str, reason: str) -> None:
|
|
38
|
+
"""Log a methodological decision."""
|
|
39
|
+
logger.info("DECISION | %s = %s | %s", variable, value, reason)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def main():
|
|
44
|
+
# -- Parse arguments -----------------------------------------------------------
|
|
45
|
+
args = sys.argv[1:]
|
|
46
|
+
data_file = args[0] if len(args) >= 1 else "data/baci_data.csv"
|
|
47
|
+
response_var = args[1] if len(args) >= 2 else "abundance"
|
|
48
|
+
output_dir = args[2] if len(args) >= 3 else "outputs/baci"
|
|
49
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
50
|
+
|
|
51
|
+
logger.info(
|
|
52
|
+
"Skill: %s | data_file=%s | response_var=%s | output_dir=%s",
|
|
53
|
+
SKILL_NAME,
|
|
54
|
+
data_file,
|
|
55
|
+
response_var,
|
|
56
|
+
output_dir,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# -- Input precondition check --------------------------------------------------
|
|
60
|
+
if not os.path.isfile(data_file):
|
|
61
|
+
logger.error(
|
|
62
|
+
"Input not found: %s\n"
|
|
63
|
+
"Probable cause: wrong path or file not generated by previous step.\n"
|
|
64
|
+
"Check: whether the CSV file exists and the name is correct.\n"
|
|
65
|
+
"Previous skill: ecological-sampling-design or field collection.",
|
|
66
|
+
data_file,
|
|
67
|
+
)
|
|
68
|
+
sys.exit(1)
|
|
69
|
+
|
|
70
|
+
# -- STEP 1: Load and validate BACI data --------------------------------------
|
|
71
|
+
logger.info("-- STEP 1: Load and validate BACI data")
|
|
72
|
+
try:
|
|
73
|
+
dat = pd.read_csv(data_file)
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.error(
|
|
76
|
+
"Failed to read CSV: %s\n"
|
|
77
|
+
"Probable cause: corrupted file or incorrect encoding.\n"
|
|
78
|
+
"Check: open the file in a text editor and verify separators.\n"
|
|
79
|
+
"Previous skill: ecological-sampling-design.",
|
|
80
|
+
e,
|
|
81
|
+
)
|
|
82
|
+
sys.exit(1)
|
|
83
|
+
|
|
84
|
+
required_cols = {"site", "period", "treatment", response_var}
|
|
85
|
+
missing_cols = required_cols - set(dat.columns)
|
|
86
|
+
if missing_cols:
|
|
87
|
+
logger.error(
|
|
88
|
+
"Required columns missing: %s\n"
|
|
89
|
+
"Probable cause: CSV does not follow the expected BACI schema.\n"
|
|
90
|
+
"Check: the file must have columns site, period, treatment and the response variable.\n"
|
|
91
|
+
"Previous skill: ecological-sampling-design.",
|
|
92
|
+
", ".join(missing_cols),
|
|
93
|
+
)
|
|
94
|
+
sys.exit(1)
|
|
95
|
+
|
|
96
|
+
n_na = int(dat[response_var].isna().sum())
|
|
97
|
+
if n_na > 0:
|
|
98
|
+
logger.warning(
|
|
99
|
+
"Column '%s' contains %d NA values -- may be excluded by the model function.",
|
|
100
|
+
response_var,
|
|
101
|
+
n_na,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Encode factors
|
|
105
|
+
dat["period"] = pd.Categorical(dat["period"], categories=["before", "after"], ordered=True)
|
|
106
|
+
dat["treatment"] = pd.Categorical(
|
|
107
|
+
dat["treatment"], categories=["control", "impact"], ordered=True
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
n_sites = dat["site"].nunique()
|
|
111
|
+
n_before = int((dat["period"] == "before").sum())
|
|
112
|
+
n_after = int((dat["period"] == "after").sum())
|
|
113
|
+
logger.info("Sites: %d | Before: %d | After: %d", n_sites, n_before, n_after)
|
|
114
|
+
log_decision(
|
|
115
|
+
"response_var",
|
|
116
|
+
response_var,
|
|
117
|
+
"response variable provided by user or default 'abundance'",
|
|
118
|
+
)
|
|
119
|
+
log_decision(
|
|
120
|
+
"family",
|
|
121
|
+
"Gaussian mixed model (MixedLM)",
|
|
122
|
+
"linear mixed-effects model with random intercept for site; "
|
|
123
|
+
"for overdispersed counts consider Poisson/NB GLMMs via statsmodels or external packages",
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
if n_sites < 3:
|
|
127
|
+
logger.warning(
|
|
128
|
+
"Only %d site(s) detected -- random effects may not be estimable.", n_sites
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# -- STEP 2: Fit BACI mixed-effects model -------------------------------------
|
|
132
|
+
logger.info("-- STEP 2: Fit BACI mixed-effects model")
|
|
133
|
+
|
|
134
|
+
# Create numeric dummy variables for the interaction term
|
|
135
|
+
# statsmodels MixedLM works with formula interface
|
|
136
|
+
# period: before=0, after=1; treatment: control=0, impact=1
|
|
137
|
+
dat["period_num"] = (dat["period"] == "after").astype(int)
|
|
138
|
+
dat["treatment_num"] = (dat["treatment"] == "impact").astype(int)
|
|
139
|
+
dat["interaction"] = dat["period_num"] * dat["treatment_num"]
|
|
140
|
+
|
|
141
|
+
formula_str = f"{response_var} ~ period_num + treatment_num + interaction"
|
|
142
|
+
logger.info("Formula: %s + (1|site)", formula_str)
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
model = smf.mixedlm(
|
|
146
|
+
formula_str,
|
|
147
|
+
data=dat,
|
|
148
|
+
groups=dat["site"],
|
|
149
|
+
)
|
|
150
|
+
result = model.fit(reml=True)
|
|
151
|
+
logger.info("Model fitted successfully.")
|
|
152
|
+
logger.info("\n%s", result.summary())
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.error(
|
|
155
|
+
"Failed to fit mixed-effects model: %s\n"
|
|
156
|
+
"Probable cause: insufficient data, mis-coded columns, or model singularity.\n"
|
|
157
|
+
"Check: number of levels per site/period and presence of excessive zeros.\n"
|
|
158
|
+
"Previous skill: ecological-sampling-design.",
|
|
159
|
+
e,
|
|
160
|
+
)
|
|
161
|
+
sys.exit(1)
|
|
162
|
+
|
|
163
|
+
# -- STEP 3: Extract BACI interaction ------------------------------------------
|
|
164
|
+
logger.info("-- STEP 3: Extract BACI interaction and compute effect")
|
|
165
|
+
try:
|
|
166
|
+
coef_table = result.summary().tables[1]
|
|
167
|
+
|
|
168
|
+
# Extract interaction row from the fitted result
|
|
169
|
+
params = result.params
|
|
170
|
+
pvalues = result.pvalues
|
|
171
|
+
bse = result.bse
|
|
172
|
+
tvalues = result.tvalues
|
|
173
|
+
|
|
174
|
+
if "interaction" in params.index:
|
|
175
|
+
baci_est = params["interaction"]
|
|
176
|
+
baci_se = bse["interaction"]
|
|
177
|
+
baci_z = tvalues["interaction"]
|
|
178
|
+
baci_p = pvalues["interaction"]
|
|
179
|
+
|
|
180
|
+
logger.info("=== BACI Interaction ===")
|
|
181
|
+
logger.info(
|
|
182
|
+
"Estimate: %.4f | SE: %.4f | z: %.4f | p-value: %.4f",
|
|
183
|
+
baci_est,
|
|
184
|
+
baci_se,
|
|
185
|
+
baci_z,
|
|
186
|
+
baci_p,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
if baci_p < 0.05:
|
|
190
|
+
logger.info(
|
|
191
|
+
"BACI interaction is statistically significant (p = %.4f). "
|
|
192
|
+
"Evidence of impact effect.",
|
|
193
|
+
baci_p,
|
|
194
|
+
)
|
|
195
|
+
else:
|
|
196
|
+
logger.info(
|
|
197
|
+
"BACI interaction is NOT statistically significant (p = %.4f). "
|
|
198
|
+
"No strong evidence of impact effect.",
|
|
199
|
+
baci_p,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Save results
|
|
203
|
+
baci_df = pd.DataFrame(
|
|
204
|
+
{
|
|
205
|
+
"term": ["period_num:treatment_num (BACI interaction)"],
|
|
206
|
+
"Estimate": [round(baci_est, 4)],
|
|
207
|
+
"Std.Error": [round(baci_se, 4)],
|
|
208
|
+
"z.value": [round(baci_z, 4)],
|
|
209
|
+
"p.value": [round(baci_p, 6)],
|
|
210
|
+
}
|
|
211
|
+
)
|
|
212
|
+
baci_csv_path = os.path.join(output_dir, "baci_results.csv")
|
|
213
|
+
baci_df.to_csv(baci_csv_path, index=False)
|
|
214
|
+
logger.info("baci_results.csv saved in: %s", output_dir)
|
|
215
|
+
else:
|
|
216
|
+
logger.warning(
|
|
217
|
+
"BACI interaction term (period:treatment) not found in coefficients table."
|
|
218
|
+
)
|
|
219
|
+
except Exception as e:
|
|
220
|
+
logger.error(
|
|
221
|
+
"Failed to extract model coefficients: %s\n"
|
|
222
|
+
"Probable cause: model did not converge or unexpected structure.\n"
|
|
223
|
+
"Check: inspect model summary manually.\n"
|
|
224
|
+
"Previous skill: [none].",
|
|
225
|
+
e,
|
|
226
|
+
)
|
|
227
|
+
sys.exit(1)
|
|
228
|
+
|
|
229
|
+
# -- STEP 4: Generate BACI interaction plot ------------------------------------
|
|
230
|
+
logger.info("-- STEP 4: Generate BACI interaction plot (control vs impact, before vs after)")
|
|
231
|
+
try:
|
|
232
|
+
plot_dat = (
|
|
233
|
+
dat.groupby(["period", "treatment"], observed=True)
|
|
234
|
+
.agg(
|
|
235
|
+
mean_y=(response_var, "mean"),
|
|
236
|
+
se_y=(response_var, lambda x: x.std(ddof=1) / np.sqrt(len(x))),
|
|
237
|
+
)
|
|
238
|
+
.reset_index()
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
fig, ax = plt.subplots(figsize=(6, 5), dpi=150)
|
|
242
|
+
colors = {"control": "#2166ac", "impact": "#d6604d"}
|
|
243
|
+
|
|
244
|
+
for trt in ["control", "impact"]:
|
|
245
|
+
subset = plot_dat[plot_dat["treatment"] == trt].sort_values("period")
|
|
246
|
+
ax.plot(
|
|
247
|
+
subset["period"].astype(str),
|
|
248
|
+
subset["mean_y"],
|
|
249
|
+
marker="o",
|
|
250
|
+
markersize=8,
|
|
251
|
+
linewidth=2,
|
|
252
|
+
color=colors[trt],
|
|
253
|
+
label=trt,
|
|
254
|
+
)
|
|
255
|
+
ax.errorbar(
|
|
256
|
+
subset["period"].astype(str),
|
|
257
|
+
subset["mean_y"],
|
|
258
|
+
yerr=subset["se_y"],
|
|
259
|
+
fmt="none",
|
|
260
|
+
ecolor=colors[trt],
|
|
261
|
+
capsize=4,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
ax.set_ylabel(response_var)
|
|
265
|
+
ax.set_title("BACI: Control vs Impact")
|
|
266
|
+
ax.legend()
|
|
267
|
+
ax.spines["top"].set_visible(False)
|
|
268
|
+
ax.spines["right"].set_visible(False)
|
|
269
|
+
|
|
270
|
+
plt.tight_layout()
|
|
271
|
+
baci_plot_path = os.path.join(output_dir, "baci_plot.png")
|
|
272
|
+
fig.savefig(baci_plot_path)
|
|
273
|
+
plt.close(fig)
|
|
274
|
+
logger.info("baci_plot.png saved in: %s", output_dir)
|
|
275
|
+
|
|
276
|
+
except Exception as e:
|
|
277
|
+
logger.error(
|
|
278
|
+
"Failed to generate or save BACI plot: %s\n"
|
|
279
|
+
"Probable cause: insufficient data for summarisation or directory without write permission.\n"
|
|
280
|
+
"Check: presence of at least one record per combination period/treatment.\n"
|
|
281
|
+
"Previous skill: [none].",
|
|
282
|
+
e,
|
|
283
|
+
)
|
|
284
|
+
sys.exit(1)
|
|
285
|
+
|
|
286
|
+
# -- STEP 5: Residual diagnostic plots ----------------------------------------
|
|
287
|
+
logger.info("-- STEP 5: Generate residual diagnostic plots")
|
|
288
|
+
try:
|
|
289
|
+
residuals = result.resid
|
|
290
|
+
fitted_vals = result.fittedvalues
|
|
291
|
+
|
|
292
|
+
fig, axes = plt.subplots(1, 3, figsize=(15, 5), dpi=150)
|
|
293
|
+
|
|
294
|
+
# Residuals vs Fitted
|
|
295
|
+
axes[0].scatter(fitted_vals, residuals, alpha=0.5, s=20, c="steelblue")
|
|
296
|
+
axes[0].axhline(y=0, color="red", linestyle="--", linewidth=0.8)
|
|
297
|
+
axes[0].set_xlabel("Fitted values")
|
|
298
|
+
axes[0].set_ylabel("Residuals")
|
|
299
|
+
axes[0].set_title("Residuals vs Fitted")
|
|
300
|
+
|
|
301
|
+
# Histogram of residuals
|
|
302
|
+
axes[1].hist(residuals, bins=30, color="steelblue", edgecolor="white", density=True)
|
|
303
|
+
axes[1].set_xlabel("Residuals")
|
|
304
|
+
axes[1].set_ylabel("Density")
|
|
305
|
+
axes[1].set_title("Distribution of Residuals")
|
|
306
|
+
|
|
307
|
+
# QQ plot
|
|
308
|
+
from scipy import stats
|
|
309
|
+
|
|
310
|
+
(osm, osr), (slope, intercept, r) = stats.probplot(residuals, dist="norm")
|
|
311
|
+
axes[2].scatter(osm, osr, alpha=0.5, s=20, c="steelblue")
|
|
312
|
+
axes[2].plot(osm, slope * np.array(osm) + intercept, color="red", linewidth=0.8)
|
|
313
|
+
axes[2].set_xlabel("Theoretical Quantiles")
|
|
314
|
+
axes[2].set_ylabel("Sample Quantiles")
|
|
315
|
+
axes[2].set_title("Normal Q-Q Plot")
|
|
316
|
+
|
|
317
|
+
plt.tight_layout()
|
|
318
|
+
diag_path = os.path.join(output_dir, "residual_diagnostics.png")
|
|
319
|
+
fig.savefig(diag_path)
|
|
320
|
+
plt.close(fig)
|
|
321
|
+
logger.info("residual_diagnostics.png saved in: %s", output_dir)
|
|
322
|
+
|
|
323
|
+
except Exception as e:
|
|
324
|
+
logger.error(
|
|
325
|
+
"Failed to generate residual diagnostics: %s\n"
|
|
326
|
+
"Probable cause: model residuals not available or plotting error.\n"
|
|
327
|
+
"Check: model convergence status.\n"
|
|
328
|
+
"Previous skill: [none].",
|
|
329
|
+
e,
|
|
330
|
+
)
|
|
331
|
+
sys.exit(1)
|
|
332
|
+
|
|
333
|
+
logger.info("BACI analysis completed. Outputs in: %s", output_dir)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
if __name__ == "__main__":
|
|
337
|
+
main()
|