ecological-agent-skills 3.2.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/repository-statistics.md +6 -6
- package/package.json +1 -1
- package/skills/biostatistics-workbench/SKILL.md +1 -1
- package/skills/community-ecology-ordination/scripts/community_analysis.py +2 -2
- package/skills/ecological-impact-assessment/SKILL.md +1 -1
- package/skills/ecological-impact-assessment/scripts/baci_analysis.py +337 -0
- package/skills/ecological-impact-assessment/scripts/power_analysis_baci.py +284 -0
- package/skills/ecosystem-services-assessment/scripts/tradeoff_analysis.py +245 -0
- package/skills/environmental-time-series/SKILL.md +1 -1
- package/skills/model-validation-and-uncertainty/SKILL.md +1 -1
- package/skills/model-validation-and-uncertainty/scripts/extrapolation_risk.py +438 -0
- package/skills/model-validation-and-uncertainty/scripts/validate_sdm.py +257 -0
- package/skills/occupancy-and-detection/SKILL.md +1 -1
- package/skills/predictive-modeling-best-practices/scripts/collinearity_check.py +207 -0
- package/skills/reproducible-ecology-pipeline/scripts/check_packages.R +2 -0
- package/skills/species-distribution-modeling/scripts/run_ensemble_sdm.R +40 -5
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
|
|
3
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
power_analysis_baci.py
|
|
7
|
+
Compute statistical power for BACI designs and recommend minimum sample sizes.
|
|
8
|
+
Usage: python power_analysis_baci.py <output_dir> [effect_size] [n_sites] [n_surveys] [alpha] [variance_estimate]
|
|
9
|
+
Outputs: power_curves.png, power_summary.csv, minimum_n_recommendation.md
|
|
10
|
+
Requires: numpy, scipy, matplotlib
|
|
11
|
+
"""
|
|
12
|
+
import logging
|
|
13
|
+
import math
|
|
14
|
+
import sys
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
SKILL_NAME = "ecological-impact-assessment"
|
|
19
|
+
_LOG_DIR = Path("logs")
|
|
20
|
+
_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
21
|
+
_log_file = _LOG_DIR / f"skill_{SKILL_NAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
|
|
22
|
+
logging.basicConfig(
|
|
23
|
+
level=logging.INFO,
|
|
24
|
+
format="[%(asctime)s] [%(levelname)s] [" + SKILL_NAME + "] %(message)s",
|
|
25
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
26
|
+
handlers=[
|
|
27
|
+
logging.StreamHandler(sys.stdout),
|
|
28
|
+
logging.FileHandler(_log_file, encoding="utf-8"),
|
|
29
|
+
],
|
|
30
|
+
)
|
|
31
|
+
logger = logging.getLogger(SKILL_NAME)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def log_step(n: int, desc: str) -> None:
|
|
35
|
+
logger.info("-- STEP %d: %s", n, desc)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def log_decision(var: str, val, why: str) -> None:
|
|
39
|
+
logger.info("DECISION | %s = %s | %s", var, val, why)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
import numpy as np
|
|
43
|
+
from scipy import stats
|
|
44
|
+
import matplotlib
|
|
45
|
+
matplotlib.use("Agg")
|
|
46
|
+
import matplotlib.pyplot as plt
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# ── Helper: BACI power ──────────────────────────────────────────────────────
|
|
50
|
+
# BACI interaction term is tested as a two-sample t-test on the
|
|
51
|
+
# difference-in-differences. Effective n per group = n_sites * n_surveys.
|
|
52
|
+
# Cohen's d adjusted for variance: d = effect_size / sqrt(variance_estimate)
|
|
53
|
+
|
|
54
|
+
def baci_power(n_s: int, n_sv: int, eff: float, var_est: float, a: float) -> float:
|
|
55
|
+
"""Compute power for a BACI design using non-central t distribution."""
|
|
56
|
+
try:
|
|
57
|
+
d_adj = eff / math.sqrt(var_est)
|
|
58
|
+
n_eff = n_s * n_sv # effective replication per group
|
|
59
|
+
df = 2 * n_eff - 2 # degrees of freedom for two-sample t-test
|
|
60
|
+
if df < 1:
|
|
61
|
+
return float("nan")
|
|
62
|
+
# Non-centrality parameter
|
|
63
|
+
ncp = d_adj * math.sqrt(n_eff / 2.0)
|
|
64
|
+
t_crit = stats.t.ppf(1.0 - a / 2.0, df)
|
|
65
|
+
# Power = P(|T_ncp| > t_crit) = 1 - P(-t_crit < T_ncp < t_crit)
|
|
66
|
+
power = 1.0 - (stats.nct.cdf(t_crit, df, ncp) - stats.nct.cdf(-t_crit, df, ncp))
|
|
67
|
+
return power
|
|
68
|
+
except Exception:
|
|
69
|
+
return float("nan")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def find_min_n(target_power: float, vary: str, fixed_n: int, eff: float,
|
|
73
|
+
var_est: float, a: float, n_sv_or_ns: int) -> int | None:
|
|
74
|
+
"""Find minimum n_sites or n_surveys to reach target_power."""
|
|
75
|
+
upper = 200 if vary == "sites" else 100
|
|
76
|
+
for n in range(2 if vary == "sites" else 1, upper + 1):
|
|
77
|
+
if vary == "sites":
|
|
78
|
+
p = baci_power(n, n_sv_or_ns, eff, var_est, a)
|
|
79
|
+
else:
|
|
80
|
+
p = baci_power(n_sv_or_ns, n, eff, var_est, a)
|
|
81
|
+
if not math.isnan(p) and p >= target_power:
|
|
82
|
+
return n
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def main():
|
|
87
|
+
# ── Arguments ────────────────────────────────────────────────────────────
|
|
88
|
+
args = sys.argv[1:]
|
|
89
|
+
output_dir = args[0] if len(args) >= 1 else "outputs/power_analysis"
|
|
90
|
+
effect_size = float(args[1]) if len(args) >= 2 else 0.5
|
|
91
|
+
n_sites = int(args[2]) if len(args) >= 3 else 10
|
|
92
|
+
n_surveys = int(args[3]) if len(args) >= 4 else 4
|
|
93
|
+
alpha = float(args[4]) if len(args) >= 5 else 0.05
|
|
94
|
+
variance_estimate = float(args[5]) if len(args) >= 6 else 1.0
|
|
95
|
+
|
|
96
|
+
log_decision("effect_size", effect_size,
|
|
97
|
+
"Cohen's d: 0.2=small, 0.5=medium, 0.8=large. Use 0.5 if no pilot data available.")
|
|
98
|
+
log_decision("n_sites", n_sites,
|
|
99
|
+
"Number of control + impact sites each side. Minimum recommended: 5 per group.")
|
|
100
|
+
log_decision("n_surveys", n_surveys,
|
|
101
|
+
"Survey occasions before + after. Minimum recommended: 3 pre + 3 post = 6 total.")
|
|
102
|
+
log_decision("alpha", alpha,
|
|
103
|
+
"Type I error rate. Standard: 0.05. Use 0.10 for preliminary screening.")
|
|
104
|
+
log_decision("variance_estimate", variance_estimate,
|
|
105
|
+
"Within-group variance from pilot data or literature. Affects Cohen's d calculation.")
|
|
106
|
+
|
|
107
|
+
out_path = Path(output_dir)
|
|
108
|
+
out_path.mkdir(parents=True, exist_ok=True)
|
|
109
|
+
|
|
110
|
+
# ── Step 1: Power x n_sites ──────────────────────────────────────────────
|
|
111
|
+
log_step(1, "Computing power x n_sites curve")
|
|
112
|
+
sites_range = np.arange(2, 31)
|
|
113
|
+
pow_vs_sites = np.array([baci_power(int(ns), n_surveys, effect_size, variance_estimate, alpha)
|
|
114
|
+
for ns in sites_range])
|
|
115
|
+
current_power = baci_power(n_sites, n_surveys, effect_size, variance_estimate, alpha)
|
|
116
|
+
logger.info("Power at n_sites=%d (n_surveys=%d fixed): %.3f", n_sites, n_surveys, current_power)
|
|
117
|
+
|
|
118
|
+
# ── Step 2: Power x n_surveys ────────────────────────────────────────────
|
|
119
|
+
log_step(2, "Computing power x n_surveys curve")
|
|
120
|
+
surveys_range = np.arange(1, 21)
|
|
121
|
+
pow_vs_surveys = np.array([baci_power(n_sites, int(nv), effect_size, variance_estimate, alpha)
|
|
122
|
+
for nv in surveys_range])
|
|
123
|
+
|
|
124
|
+
# ── Step 3: Power x effect_size ──────────────────────────────────────────
|
|
125
|
+
log_step(3, "Computing power x effect_size curve")
|
|
126
|
+
eff_range = np.arange(0.1, 1.55, 0.05)
|
|
127
|
+
pow_vs_eff = np.array([baci_power(n_sites, n_surveys, float(e), variance_estimate, alpha)
|
|
128
|
+
for e in eff_range])
|
|
129
|
+
|
|
130
|
+
# ── Step 4: Minimum n calculations ───────────────────────────────────────
|
|
131
|
+
log_step(4, "Calculating minimum n for power = 0.80 and 0.90")
|
|
132
|
+
|
|
133
|
+
min_sites_80 = find_min_n(0.80, "sites", n_sites, effect_size, variance_estimate, alpha, n_surveys)
|
|
134
|
+
min_sites_90 = find_min_n(0.90, "sites", n_sites, effect_size, variance_estimate, alpha, n_surveys)
|
|
135
|
+
min_surveys_80 = find_min_n(0.80, "surveys", n_surveys, effect_size, variance_estimate, alpha, n_sites)
|
|
136
|
+
min_surveys_90 = find_min_n(0.90, "surveys", n_surveys, effect_size, variance_estimate, alpha, n_sites)
|
|
137
|
+
|
|
138
|
+
logger.info("Min sites for power=0.80: %s | power=0.90: %s (surveys=%d fixed)",
|
|
139
|
+
min_sites_80, min_sites_90, n_surveys)
|
|
140
|
+
logger.info("Min surveys for power=0.80: %s | power=0.90: %s (sites=%d fixed)",
|
|
141
|
+
min_surveys_80, min_surveys_90, n_sites)
|
|
142
|
+
|
|
143
|
+
if min_sites_80 is None:
|
|
144
|
+
logger.warning("Power 0.80 not achievable with sites<=200. Increase effect_size or reduce variance_estimate.")
|
|
145
|
+
|
|
146
|
+
# ── Step 5: Power curves plot ────────────────────────────────────────────
|
|
147
|
+
log_step(5, "Generating power curves plot")
|
|
148
|
+
try:
|
|
149
|
+
fig, axes = plt.subplots(3, 1, figsize=(8, 12))
|
|
150
|
+
|
|
151
|
+
# Panel 1: power vs sites
|
|
152
|
+
ax = axes[0]
|
|
153
|
+
ax.plot(sites_range, pow_vs_sites, color="#2471a3", linewidth=2)
|
|
154
|
+
ax.axhline(0.80, linestyle="--", color="#e74c3c", linewidth=0.8)
|
|
155
|
+
ax.axhline(0.90, linestyle="--", color="#27ae60", linewidth=0.8)
|
|
156
|
+
if min_sites_80 is not None:
|
|
157
|
+
ax.axvline(min_sites_80, linestyle=":", color="#e74c3c", linewidth=0.8)
|
|
158
|
+
ax.annotate(f"n={min_sites_80}\n(80%)", xy=(min_sites_80 + 0.5, 0.05),
|
|
159
|
+
fontsize=8, color="#e74c3c")
|
|
160
|
+
if min_sites_90 is not None:
|
|
161
|
+
ax.axvline(min_sites_90, linestyle=":", color="#27ae60", linewidth=0.8)
|
|
162
|
+
ax.annotate(f"n={min_sites_90}\n(90%)", xy=(min_sites_90 + 0.5, 0.05),
|
|
163
|
+
fontsize=8, color="#27ae60")
|
|
164
|
+
ax.set_ylim(0, 1)
|
|
165
|
+
ax.set_ylabel("Statistical Power")
|
|
166
|
+
ax.set_xlabel("Number of sites per group")
|
|
167
|
+
ax.set_title(f"BACI Power vs. Sites (n_surveys={n_surveys} fixed)")
|
|
168
|
+
|
|
169
|
+
# Panel 2: power vs surveys
|
|
170
|
+
ax = axes[1]
|
|
171
|
+
ax.plot(surveys_range, pow_vs_surveys, color="#8e44ad", linewidth=2)
|
|
172
|
+
ax.axhline(0.80, linestyle="--", color="#e74c3c", linewidth=0.8)
|
|
173
|
+
ax.axhline(0.90, linestyle="--", color="#27ae60", linewidth=0.8)
|
|
174
|
+
ax.set_ylim(0, 1)
|
|
175
|
+
ax.set_ylabel("Statistical Power")
|
|
176
|
+
ax.set_xlabel("Survey occasions (before + after)")
|
|
177
|
+
ax.set_title(f"BACI Power vs. Surveys (n_sites={n_sites} fixed)")
|
|
178
|
+
|
|
179
|
+
# Panel 3: power vs effect size
|
|
180
|
+
ax = axes[2]
|
|
181
|
+
ax.plot(eff_range, pow_vs_eff, color="#e67e22", linewidth=2)
|
|
182
|
+
ax.axhline(0.80, linestyle="--", color="#e74c3c", linewidth=0.8)
|
|
183
|
+
ax.axhline(0.90, linestyle="--", color="#27ae60", linewidth=0.8)
|
|
184
|
+
ax.axvline(effect_size, linestyle=":", color="grey", linewidth=0.8)
|
|
185
|
+
ax.set_ylim(0, 1)
|
|
186
|
+
ax.set_ylabel("Statistical Power")
|
|
187
|
+
ax.set_xlabel("Effect size (Cohen's d)")
|
|
188
|
+
ax.set_title(f"BACI Power vs. Effect Size (n={n_sites} sites, {n_surveys} surveys)")
|
|
189
|
+
|
|
190
|
+
fig.tight_layout()
|
|
191
|
+
plot_file = out_path / "power_curves.png"
|
|
192
|
+
fig.savefig(plot_file, dpi=150)
|
|
193
|
+
plt.close(fig)
|
|
194
|
+
logger.info("Power curves plot saved: %s", plot_file)
|
|
195
|
+
except Exception as exc:
|
|
196
|
+
logger.error(
|
|
197
|
+
"Failed to generate power plot: %s\n"
|
|
198
|
+
"Probable cause: matplotlib not installed.\n"
|
|
199
|
+
"Check: pip install matplotlib",
|
|
200
|
+
exc,
|
|
201
|
+
)
|
|
202
|
+
sys.exit(1)
|
|
203
|
+
|
|
204
|
+
# ── Step 6: Power summary CSV ────────────────────────────────────────────
|
|
205
|
+
log_step(6, "Saving power summary CSV")
|
|
206
|
+
import pandas as pd
|
|
207
|
+
|
|
208
|
+
summary_df = pd.DataFrame({
|
|
209
|
+
"parameter": [
|
|
210
|
+
"effect_size", "n_sites", "n_surveys", "alpha", "variance_estimate",
|
|
211
|
+
"power_current", "min_sites_power80", "min_sites_power90",
|
|
212
|
+
"min_surveys_power80", "min_surveys_power90",
|
|
213
|
+
],
|
|
214
|
+
"value": [
|
|
215
|
+
effect_size, n_sites, n_surveys, alpha, variance_estimate,
|
|
216
|
+
round(current_power, 4),
|
|
217
|
+
min_sites_80 if min_sites_80 is not None else "NA",
|
|
218
|
+
min_sites_90 if min_sites_90 is not None else "NA",
|
|
219
|
+
min_surveys_80 if min_surveys_80 is not None else "NA",
|
|
220
|
+
min_surveys_90 if min_surveys_90 is not None else "NA",
|
|
221
|
+
],
|
|
222
|
+
})
|
|
223
|
+
csv_file = out_path / "power_summary.csv"
|
|
224
|
+
summary_df.to_csv(csv_file, index=False)
|
|
225
|
+
logger.info("Power summary saved: %s", csv_file)
|
|
226
|
+
|
|
227
|
+
# ── Step 7: Recommendation markdown ──────────────────────────────────────
|
|
228
|
+
log_step(7, "Writing minimum-n recommendation report")
|
|
229
|
+
adequacy = "ADEQUATE" if (not math.isnan(current_power) and current_power >= 0.80) else "INSUFFICIENT"
|
|
230
|
+
|
|
231
|
+
def _fmt(val):
|
|
232
|
+
return str(val) if val is not None else "not achievable"
|
|
233
|
+
|
|
234
|
+
rec_lines = f"""# BACI Power Analysis -- Field Protocol Recommendation
|
|
235
|
+
|
|
236
|
+
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
|
237
|
+
|
|
238
|
+
## Input Parameters
|
|
239
|
+
- **Effect size (Cohen's d):** {effect_size}
|
|
240
|
+
- **Number of sites (per group):** {n_sites}
|
|
241
|
+
- **Survey occasions (before + after):** {n_surveys}
|
|
242
|
+
- **Significance level (alpha):** {alpha}
|
|
243
|
+
- **Variance estimate:** {variance_estimate}
|
|
244
|
+
|
|
245
|
+
## Current Design Power
|
|
246
|
+
**Statistical power = {round(current_power * 100, 1)}%** ({adequacy})
|
|
247
|
+
|
|
248
|
+
{"WARNING: The current design has insufficient power to detect the target effect." if adequacy == "INSUFFICIENT" else "The current design has adequate power to detect the target effect."}
|
|
249
|
+
|
|
250
|
+
## Minimum Sample Size Recommendations
|
|
251
|
+
|
|
252
|
+
### To achieve power = 80% (alpha = {alpha})
|
|
253
|
+
- **Sites per group:** >= {_fmt(min_sites_80)} (with {n_surveys} survey occasions)
|
|
254
|
+
- **Survey occasions:** >= {_fmt(min_surveys_80)} (with {n_sites} sites)
|
|
255
|
+
|
|
256
|
+
### To achieve power = 90% (alpha = {alpha})
|
|
257
|
+
- **Sites per group:** >= {_fmt(min_sites_90)} (with {n_surveys} survey occasions)
|
|
258
|
+
- **Survey occasions:** >= {_fmt(min_surveys_90)} (with {n_sites} sites)
|
|
259
|
+
|
|
260
|
+
## Interpretation
|
|
261
|
+
- A Cohen's d of **{effect_size}** corresponds to detecting a {round(effect_size * math.sqrt(variance_estimate), 3)} unit difference between impact and control sites, adjusting for var = {variance_estimate}.
|
|
262
|
+
- **Rule of thumb:** BACI studies should have >=5 control and >=5 impact sites, with >=3 survey occasions before and >=3 after the impact.
|
|
263
|
+
- If power is insufficient, prioritise adding **sites** (stronger than adding surveys) because spatial replication reduces pseudo-replication bias.
|
|
264
|
+
|
|
265
|
+
## How to Obtain Variance Estimate
|
|
266
|
+
1. **From pilot data:** compute SD of the response variable across sites, then var = SD^2.
|
|
267
|
+
2. **From literature:** use SD values reported for the same metric and habitat type.
|
|
268
|
+
3. **Conservative default:** use variance_estimate = 1.0 (corresponds to Cohen's d units).
|
|
269
|
+
|
|
270
|
+
## References
|
|
271
|
+
- Cohen, J. (1988). *Statistical Power Analysis for the Behavioral Sciences* (2nd ed.).
|
|
272
|
+
- Underwood, A.J. (1994). On beyond BACI. *Ecological Applications*, 4(1), 3-15.
|
|
273
|
+
- Stewart-Oaten, A. & Bence, J.R. (2001). Temporal and spatial variation in environmental impact assessment. *Ecological Monographs*, 71(2), 305-339.
|
|
274
|
+
"""
|
|
275
|
+
|
|
276
|
+
md_file = out_path / "minimum_n_recommendation.md"
|
|
277
|
+
md_file.write_text(rec_lines, encoding="utf-8")
|
|
278
|
+
logger.info("Recommendation report saved: %s", md_file)
|
|
279
|
+
|
|
280
|
+
log_step(8, "Done -- power analysis outputs in: %s" % output_dir)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
if __name__ == "__main__":
|
|
284
|
+
main()
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# ecological-agent-skills / Copyright (C) 2026 Francisco Diego Barros Barata
|
|
3
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
tradeoff_analysis.py
|
|
7
|
+
ES trade-off and synergy analysis across pixels or land cover units.
|
|
8
|
+
Usage: python tradeoff_analysis.py <es_summary_csv> <output_dir>
|
|
9
|
+
Requires: pandas, numpy, scipy, matplotlib, seaborn
|
|
10
|
+
"""
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from itertools import combinations
|
|
16
|
+
|
|
17
|
+
SKILL_NAME = "ecosystem-services-assessment"
|
|
18
|
+
_LOG_DIR = Path("logs")
|
|
19
|
+
_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
_log_file = _LOG_DIR / f"skill_{SKILL_NAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
|
|
21
|
+
logging.basicConfig(
|
|
22
|
+
level=logging.INFO,
|
|
23
|
+
format="[%(asctime)s] [%(levelname)s] [" + SKILL_NAME + "] %(message)s",
|
|
24
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
25
|
+
handlers=[
|
|
26
|
+
logging.StreamHandler(sys.stdout),
|
|
27
|
+
logging.FileHandler(_log_file, encoding="utf-8"),
|
|
28
|
+
],
|
|
29
|
+
)
|
|
30
|
+
logger = logging.getLogger(SKILL_NAME)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def log_step(n: int, desc: str) -> None:
|
|
34
|
+
logger.info("-- STEP %d: %s", n, desc)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def log_decision(var: str, val, why: str) -> None:
|
|
38
|
+
logger.info("DECISION | %s = %s | %s", var, val, why)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
import numpy as np
|
|
42
|
+
import pandas as pd
|
|
43
|
+
from scipy.stats import spearmanr
|
|
44
|
+
import matplotlib
|
|
45
|
+
matplotlib.use("Agg")
|
|
46
|
+
import matplotlib.pyplot as plt
|
|
47
|
+
import seaborn as sns
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def main():
|
|
51
|
+
# ── Arguments ────────────────────────────────────────────────────────────
|
|
52
|
+
es_file = sys.argv[1] if len(sys.argv) >= 2 else "outputs/ecosystem_services/es_summary_table.csv"
|
|
53
|
+
output_dir = sys.argv[2] if len(sys.argv) >= 3 else "outputs/ecosystem_services"
|
|
54
|
+
|
|
55
|
+
out_path = Path(output_dir)
|
|
56
|
+
out_path.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
|
|
58
|
+
logger.info("Skill: %s | es_file=%s | output_dir=%s", SKILL_NAME, es_file, output_dir)
|
|
59
|
+
|
|
60
|
+
# ── Input precondition check ─────────────────────────────────────────────
|
|
61
|
+
if not Path(es_file).exists():
|
|
62
|
+
logger.error(
|
|
63
|
+
"Input not found: %s\n"
|
|
64
|
+
"Probable cause: o script de quantificacao de servicos ecossistemicos nao foi executado ou o caminho esta errado.\n"
|
|
65
|
+
"Check: execute primeiro o script de mapeamento/quantificacao de ES.\n"
|
|
66
|
+
"Previous skill: ecosystem-services-assessment (quantification step).",
|
|
67
|
+
es_file,
|
|
68
|
+
)
|
|
69
|
+
sys.exit(1)
|
|
70
|
+
|
|
71
|
+
# ── Step 1: Load ─────────────────────────────────────────────────────────
|
|
72
|
+
log_step(1, "Load ecosystem services table")
|
|
73
|
+
try:
|
|
74
|
+
es = pd.read_csv(es_file)
|
|
75
|
+
except Exception as exc:
|
|
76
|
+
logger.error(
|
|
77
|
+
"Failed to read CSV de servicos ecossistemicos: %s\n"
|
|
78
|
+
"Probable cause: corrupted file ou com separador incorreto.\n"
|
|
79
|
+
"Check: abra o arquivo em editor de texto e confira o formato.\n"
|
|
80
|
+
"Previous skill: ecosystem-services-assessment (quantification step).",
|
|
81
|
+
exc,
|
|
82
|
+
)
|
|
83
|
+
sys.exit(1)
|
|
84
|
+
|
|
85
|
+
logger.info("ES table loaded: %d land use classes", len(es))
|
|
86
|
+
|
|
87
|
+
n_na_total = int(es.isna().sum().sum())
|
|
88
|
+
if n_na_total > 0:
|
|
89
|
+
logger.warning(
|
|
90
|
+
"ES table contains %d NA values total — correlations will be computed with complete observations.",
|
|
91
|
+
n_na_total,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# ── Step 2: Identify numeric ES columns ──────────────────────────────────
|
|
95
|
+
log_step(2, "Identify ES indicator columns and normalise 0-1")
|
|
96
|
+
exclude_cols = {"lulc_code", "n_pixels"}
|
|
97
|
+
es_cols = [c for c in es.select_dtypes(include=[np.number]).columns if c not in exclude_cols]
|
|
98
|
+
logger.info("ES indicators: %s", ", ".join(es_cols))
|
|
99
|
+
log_decision(
|
|
100
|
+
"es_cols", ", ".join(es_cols),
|
|
101
|
+
"colunas numericas excluindo lulc_code e n_pixels sao tratadas como indicadores de ES",
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
if len(es_cols) < 2:
|
|
105
|
+
logger.warning(
|
|
106
|
+
"Fewer than 2 ES indicators found — trade-off analysis not possible with only %d column(s).",
|
|
107
|
+
len(es_cols),
|
|
108
|
+
)
|
|
109
|
+
logger.info("Exiting without error. Add more indicators to the input CSV.")
|
|
110
|
+
sys.exit(0)
|
|
111
|
+
|
|
112
|
+
# ── Normalise to 0-1 ─────────────────────────────────────────────────────
|
|
113
|
+
try:
|
|
114
|
+
es_norm = es.copy()
|
|
115
|
+
eps = 1e-10
|
|
116
|
+
for col in es_cols:
|
|
117
|
+
cmin = es_norm[col].min()
|
|
118
|
+
cmax = es_norm[col].max()
|
|
119
|
+
es_norm[col] = (es_norm[col] - cmin) / (cmax - cmin + eps)
|
|
120
|
+
except Exception as exc:
|
|
121
|
+
logger.error(
|
|
122
|
+
"Failed to normalise indicators 0-1: %s\n"
|
|
123
|
+
"Probable cause: non-numeric columns incorrectly identified as indicators.\n"
|
|
124
|
+
"Check: column types in the input CSV.\n"
|
|
125
|
+
"Previous skill: ecosystem-services-assessment (quantification step).",
|
|
126
|
+
exc,
|
|
127
|
+
)
|
|
128
|
+
sys.exit(1)
|
|
129
|
+
|
|
130
|
+
log_decision(
|
|
131
|
+
"normalization", "min-max [0,1] with epsilon 1e-10",
|
|
132
|
+
"evita divisao por zero quando todos os valores de um indicador sao iguais",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# ── Step 3: Correlation matrix (Spearman) ────────────────────────────────
|
|
136
|
+
log_step(3, "Compute Spearman correlation matrix among ES indicators")
|
|
137
|
+
try:
|
|
138
|
+
subset = es_norm[es_cols].dropna()
|
|
139
|
+
rho, _ = spearmanr(subset)
|
|
140
|
+
if len(es_cols) == 2:
|
|
141
|
+
# spearmanr returns scalar for 2 variables — reshape to matrix
|
|
142
|
+
cor_mat = np.array([[1.0, rho], [rho, 1.0]])
|
|
143
|
+
else:
|
|
144
|
+
cor_mat = rho
|
|
145
|
+
cor_df = pd.DataFrame(cor_mat, index=es_cols, columns=es_cols)
|
|
146
|
+
except Exception as exc:
|
|
147
|
+
logger.error(
|
|
148
|
+
"Failed to compute correlation matrix: %s\n"
|
|
149
|
+
"Probable cause: all values in some column are NA after normalisation.\n"
|
|
150
|
+
"Check: presence of variation in ES indicators.\n"
|
|
151
|
+
"Previous skill: ecosystem-services-assessment (quantification step).",
|
|
152
|
+
exc,
|
|
153
|
+
)
|
|
154
|
+
sys.exit(1)
|
|
155
|
+
|
|
156
|
+
log_decision(
|
|
157
|
+
"correlation_method", "Spearman",
|
|
158
|
+
"metodo nao-parametrico robusto a distribuicoes assimetricas comuns em dados de ES",
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
csv_path = out_path / "tradeoff_matrix.csv"
|
|
163
|
+
cor_df.to_csv(csv_path)
|
|
164
|
+
logger.info("tradeoff_matrix.csv saved in: %s", output_dir)
|
|
165
|
+
except Exception as exc:
|
|
166
|
+
logger.error(
|
|
167
|
+
"Failed to salvar tradeoff_matrix.csv: %s\n"
|
|
168
|
+
"Probable cause: permissao negada ou disco cheio.\n"
|
|
169
|
+
"Check: permissoes do output directory.\n"
|
|
170
|
+
"Previous skill: [none].",
|
|
171
|
+
exc,
|
|
172
|
+
)
|
|
173
|
+
sys.exit(1)
|
|
174
|
+
|
|
175
|
+
# ── Step 4: Correlation heatmap ──────────────────────────────────────────
|
|
176
|
+
log_step(4, "Generate trade-off heatmap")
|
|
177
|
+
try:
|
|
178
|
+
fig, ax = plt.subplots(figsize=(8, 7))
|
|
179
|
+
mask = np.triu(np.ones_like(cor_df, dtype=bool), k=1)
|
|
180
|
+
sns.heatmap(
|
|
181
|
+
cor_df, mask=mask, annot=True, fmt=".2f", cmap="RdBu_r",
|
|
182
|
+
vmin=-1, vmax=1, center=0, square=True, linewidths=0.5, ax=ax,
|
|
183
|
+
)
|
|
184
|
+
ax.set_title("ES Trade-offs (Spearman r)")
|
|
185
|
+
fig.tight_layout()
|
|
186
|
+
fig.savefig(out_path / "tradeoff_heatmap.png", dpi=150)
|
|
187
|
+
plt.close(fig)
|
|
188
|
+
logger.info("tradeoff_heatmap.png saved in: %s", output_dir)
|
|
189
|
+
except Exception as exc:
|
|
190
|
+
logger.error(
|
|
191
|
+
"Failed to generate heatmap de trade-offs: %s\n"
|
|
192
|
+
"Probable cause: matplotlib/seaborn not installed ou matriz de correlacao invalida.\n"
|
|
193
|
+
"Check: se os pacotes matplotlib e seaborn estao disponiveis e a matriz tem pelo menos 2 variaveis.\n"
|
|
194
|
+
"Previous skill: [none].",
|
|
195
|
+
exc,
|
|
196
|
+
)
|
|
197
|
+
sys.exit(1)
|
|
198
|
+
|
|
199
|
+
# ── Step 5: Scatter plots for top pairs ──────────────────────────────────
|
|
200
|
+
log_step(5, "Generate scatter plots for ES indicator pairs")
|
|
201
|
+
pair_combos = list(combinations(es_cols, 2))
|
|
202
|
+
n_pairs = min(6, len(pair_combos))
|
|
203
|
+
logger.info("Generating %d scatter plots (of %d possible pairs)", n_pairs, len(pair_combos))
|
|
204
|
+
log_decision(
|
|
205
|
+
"max_scatter_plots", str(n_pairs),
|
|
206
|
+
"limitado a 6 pares para evitar geracao excessiva de arquivos",
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
label_col = "lulc_code" if "lulc_code" in es.columns else None
|
|
210
|
+
|
|
211
|
+
for v1, v2 in pair_combos[:n_pairs]:
|
|
212
|
+
try:
|
|
213
|
+
fig, ax = plt.subplots(figsize=(5, 4))
|
|
214
|
+
ax.scatter(es[v1], es[v2], s=30, color="#2166ac", edgecolors="white", linewidths=0.5)
|
|
215
|
+
if label_col is not None:
|
|
216
|
+
for idx, row in es.iterrows():
|
|
217
|
+
ax.annotate(
|
|
218
|
+
str(row[label_col]),
|
|
219
|
+
(row[v1], row[v2]),
|
|
220
|
+
fontsize=7, alpha=0.7,
|
|
221
|
+
textcoords="offset points", xytext=(4, 4),
|
|
222
|
+
)
|
|
223
|
+
ax.set_xlabel(v1)
|
|
224
|
+
ax.set_ylabel(v2)
|
|
225
|
+
ax.set_title(f"Trade-off: {v1} vs {v2}")
|
|
226
|
+
fig.tight_layout()
|
|
227
|
+
fname = f"scatter_{v1}_vs_{v2}.png"
|
|
228
|
+
fig.savefig(out_path / fname, dpi=150)
|
|
229
|
+
plt.close(fig)
|
|
230
|
+
logger.info("Saved: %s", fname)
|
|
231
|
+
except Exception as exc:
|
|
232
|
+
logger.error(
|
|
233
|
+
"Failed to generate scatter plot for pair %s vs %s: %s\n"
|
|
234
|
+
"Probable cause: column missing after filtering or annotation error.\n"
|
|
235
|
+
"Check: columns '%s' and '%s' exist and have valid data.\n"
|
|
236
|
+
"Previous skill: [none].",
|
|
237
|
+
v1, v2, exc, v1, v2,
|
|
238
|
+
)
|
|
239
|
+
sys.exit(1)
|
|
240
|
+
|
|
241
|
+
logger.info("Trade-off analysis completed. Outputs in: %s", output_dir)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
if __name__ == "__main__":
|
|
245
|
+
main()
|