rahil-clm 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rahil_clm-0.1.1 → rahil_clm-0.1.2}/PKG-INFO +2 -1
- {rahil_clm-0.1.1 → rahil_clm-0.1.2}/pyproject.toml +4 -2
- rahil_clm-0.1.2/src/rahil/__init__.py +5 -0
- rahil_clm-0.1.2/src/rahil/clm/__init__.py +4 -0
- rahil_clm-0.1.2/src/rahil/clm/plotting.py +244 -0
- rahil_clm-0.1.1/src/rahil/__init__.py +0 -4
- rahil_clm-0.1.1/src/rahil/clm/__init__.py +0 -3
- {rahil_clm-0.1.1 → rahil_clm-0.1.2}/.github/workflows/pypi-publish.yml +0 -0
- {rahil_clm-0.1.1 → rahil_clm-0.1.2}/.gitignore +0 -0
- {rahil_clm-0.1.1 → rahil_clm-0.1.2}/README.md +0 -0
- {rahil_clm-0.1.1 → rahil_clm-0.1.2}/examples/run_generate.py +0 -0
- {rahil_clm-0.1.1 → rahil_clm-0.1.2}/src/rahil/clm/core.py +0 -0
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rahil-clm
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Generate LHS samples and CLM crop PFT NetCDF parameter files for yield optimization.
|
|
5
5
|
Author: Mohammad Uzair Rahil
|
|
6
6
|
License: MIT
|
|
7
7
|
Requires-Python: >=3.9
|
|
8
|
+
Requires-Dist: matplotlib>=3.6
|
|
8
9
|
Requires-Dist: netcdf4>=1.6
|
|
9
10
|
Requires-Dist: numpy<2
|
|
10
11
|
Requires-Dist: openpyxl>=3.1
|
|
@@ -4,20 +4,22 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "rahil-clm"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.2"
|
|
8
8
|
|
|
9
9
|
description = "Generate LHS samples and CLM crop PFT NetCDF parameter files for yield optimization."
|
|
10
10
|
readme = "README.md"
|
|
11
11
|
requires-python = ">=3.9"
|
|
12
12
|
license = { text = "MIT" }
|
|
13
13
|
authors = [{ name = "Mohammad Uzair Rahil" }]
|
|
14
|
+
|
|
14
15
|
dependencies = [
|
|
15
16
|
"numpy<2",
|
|
16
17
|
"pandas>=1.5",
|
|
17
18
|
"xarray>=2023.1",
|
|
18
19
|
"netcdf4>=1.6",
|
|
19
20
|
"requests>=2.31",
|
|
20
|
-
"openpyxl>=3.1"
|
|
21
|
+
"openpyxl>=3.1",
|
|
22
|
+
"matplotlib>=3.6"
|
|
21
23
|
]
|
|
22
24
|
|
|
23
25
|
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import math
|
|
5
|
+
import glob
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import matplotlib.pyplot as plt
|
|
9
|
+
|
|
10
|
+
# Reuse your core download/cache logic + constants
|
|
11
|
+
from .core import _ensure_inputs # must exist in your core.py
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ============================================================
|
|
15
|
+
# Defaults (must match your LHS generator)
|
|
16
|
+
# ============================================================
|
|
17
|
+
PFTS = {"corn": 17, "soybean": 23, "wheat": 19}
|
|
18
|
+
|
|
19
|
+
BOUND_COLS = {
|
|
20
|
+
"corn": ("corn min", "corn max"),
|
|
21
|
+
"soybean": ("soybean min", "soybean max"),
|
|
22
|
+
"wheat": ("wheat min", "wheat max"),
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
NS_PER_DAY = 86400.0 * 1e9
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ============================================================
|
|
29
|
+
# Helpers
|
|
30
|
+
# ============================================================
|
|
31
|
+
def _find_latest_param_list(workflow_dir: str) -> str:
|
|
32
|
+
candidates = glob.glob(os.path.join(workflow_dir, "*.param_list.txt"))
|
|
33
|
+
if not candidates:
|
|
34
|
+
raise FileNotFoundError(
|
|
35
|
+
f"No *.param_list.txt found in workflow dir: {workflow_dir}\n"
|
|
36
|
+
"Make sure you already ran rahil.generate_lhs(output_dir=...)."
|
|
37
|
+
)
|
|
38
|
+
candidates.sort(key=os.path.getmtime, reverse=True)
|
|
39
|
+
return candidates[0]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
|
|
43
|
+
df = df.copy()
|
|
44
|
+
df.columns = df.columns.astype(str).str.strip()
|
|
45
|
+
return df
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _normalize_parameter_index(bounds_df: pd.DataFrame) -> pd.DataFrame:
|
|
49
|
+
bounds_df = _normalize_columns(bounds_df)
|
|
50
|
+
|
|
51
|
+
# normalize parameter column name -> "Parameters"
|
|
52
|
+
rename_map = {}
|
|
53
|
+
for c in bounds_df.columns:
|
|
54
|
+
if str(c).strip().lower() in ["parameter", "parameters", "param", "par"]:
|
|
55
|
+
rename_map[c] = "Parameters"
|
|
56
|
+
bounds_df = bounds_df.rename(columns=rename_map)
|
|
57
|
+
|
|
58
|
+
if "Parameters" not in bounds_df.columns:
|
|
59
|
+
raise KeyError(
|
|
60
|
+
"No parameter column found in bounds file.\n"
|
|
61
|
+
f"Available columns: {list(bounds_df.columns)}"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
bounds_df["Parameters"] = bounds_df["Parameters"].astype(str).str.strip()
|
|
65
|
+
bounds_df = bounds_df.dropna(subset=["Parameters"]).set_index("Parameters")
|
|
66
|
+
bounds_df = bounds_df[~bounds_df.index.duplicated(keep="first")]
|
|
67
|
+
return bounds_df
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _infer_param_names_from_sampled_columns(cols) -> list[str]:
|
|
71
|
+
params = set()
|
|
72
|
+
for c in cols:
|
|
73
|
+
c = str(c)
|
|
74
|
+
if "__pft" in c:
|
|
75
|
+
params.add(c.split("__pft")[0].strip())
|
|
76
|
+
return sorted(params)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _load_bounds_excel(bounds_xlsx_path: str, sheet_name=None) -> pd.DataFrame:
|
|
80
|
+
"""
|
|
81
|
+
Load bounds from Excel. If sheet_name is None, try to find a sheet that contains
|
|
82
|
+
required columns; otherwise use the provided sheet.
|
|
83
|
+
"""
|
|
84
|
+
xls = pd.ExcelFile(bounds_xlsx_path)
|
|
85
|
+
|
|
86
|
+
if sheet_name is not None:
|
|
87
|
+
b = pd.read_excel(bounds_xlsx_path, sheet_name=sheet_name)
|
|
88
|
+
return _normalize_parameter_index(b)
|
|
89
|
+
|
|
90
|
+
required = {
|
|
91
|
+
"Parameters",
|
|
92
|
+
"corn min", "corn max",
|
|
93
|
+
"soybean min", "soybean max",
|
|
94
|
+
"wheat min", "wheat max",
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
for sh in xls.sheet_names:
|
|
98
|
+
b = pd.read_excel(bounds_xlsx_path, sheet_name=sh)
|
|
99
|
+
b.columns = b.columns.str.strip()
|
|
100
|
+
b = b.rename(columns={"parameter": "Parameters", "Parameter": "Parameters", "PARAMETER": "Parameters"})
|
|
101
|
+
if required.issubset(set(b.columns)):
|
|
102
|
+
return _normalize_parameter_index(b)
|
|
103
|
+
|
|
104
|
+
# fallback: first sheet, but will raise meaningful error if missing columns
|
|
105
|
+
b = pd.read_excel(bounds_xlsx_path, sheet_name=xls.sheet_names[0])
|
|
106
|
+
return _normalize_parameter_index(b)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# ============================================================
|
|
110
|
+
# Public API
|
|
111
|
+
# ============================================================
|
|
112
|
+
def check_distribution(
|
|
113
|
+
output_dir: str = "outputs",
|
|
114
|
+
*,
|
|
115
|
+
sampled_file: str | None = None,
|
|
116
|
+
bounds_sheet=None,
|
|
117
|
+
outdir: str | None = None,
|
|
118
|
+
n_cols: int = 4,
|
|
119
|
+
panel_w: float = 3.2,
|
|
120
|
+
panel_h: float = 2.4,
|
|
121
|
+
show: bool = False,
|
|
122
|
+
dpi: int = 300,
|
|
123
|
+
cache_dir: str | None = None,
|
|
124
|
+
) -> str:
|
|
125
|
+
"""
|
|
126
|
+
Create distribution plots (one figure per crop) for whichever samples exist
|
|
127
|
+
in output_dir/workflow/*.param_list.txt.
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
output_dir : str
|
|
132
|
+
Folder used in rahil.generate_lhs(output_dir=...).
|
|
133
|
+
sampled_file : str | None
|
|
134
|
+
Optional explicit path to *.param_list.txt. If None, automatically picks newest.
|
|
135
|
+
bounds_sheet : str|int|None
|
|
136
|
+
Optional bounds Excel sheet name (or index). If None, auto-detect.
|
|
137
|
+
outdir : str | None
|
|
138
|
+
Where to save plots. Default: output_dir/figs_distributions_by_crop
|
|
139
|
+
n_cols : int
|
|
140
|
+
Number of columns in subplot grid (article layout).
|
|
141
|
+
panel_w, panel_h : float
|
|
142
|
+
Size per subplot (inches).
|
|
143
|
+
show : bool
|
|
144
|
+
If True, plt.show() each figure. Default False (faster, cleaner).
|
|
145
|
+
dpi : int
|
|
146
|
+
Saved figure dpi.
|
|
147
|
+
cache_dir : str | None
|
|
148
|
+
Optional cache dir for bounds/base files. If None, core decides.
|
|
149
|
+
|
|
150
|
+
Returns
|
|
151
|
+
-------
|
|
152
|
+
str : directory path where figures were saved
|
|
153
|
+
"""
|
|
154
|
+
# 1) locate sampled param list
|
|
155
|
+
workflow_dir = os.path.join(output_dir, "workflow")
|
|
156
|
+
if sampled_file is None:
|
|
157
|
+
sampled_file = _find_latest_param_list(workflow_dir)
|
|
158
|
+
|
|
159
|
+
# 2) ensure bounds file exists (download/cache from GitHub release)
|
|
160
|
+
bounds_xlsx_path, _, _ = _ensure_inputs(cache_dir=cache_dir)
|
|
161
|
+
|
|
162
|
+
# 3) read sampled + bounds
|
|
163
|
+
df = pd.read_csv(sampled_file, index_col=0)
|
|
164
|
+
df = _normalize_columns(df)
|
|
165
|
+
|
|
166
|
+
bounds = _load_bounds_excel(bounds_xlsx_path, sheet_name=bounds_sheet)
|
|
167
|
+
bounds.index = bounds.index.astype(str).str.strip()
|
|
168
|
+
|
|
169
|
+
sampled_params = _infer_param_names_from_sampled_columns(df.columns)
|
|
170
|
+
params_to_plot = [p for p in bounds.index if p in sampled_params]
|
|
171
|
+
|
|
172
|
+
# 4) output plot directory
|
|
173
|
+
if outdir is None:
|
|
174
|
+
outdir = os.path.join(output_dir, "figs_distributions_by_crop")
|
|
175
|
+
os.makedirs(outdir, exist_ok=True)
|
|
176
|
+
|
|
177
|
+
plt.rcParams.update({"axes.grid": True, "font.size": 9})
|
|
178
|
+
|
|
179
|
+
# 5) one figure per crop
|
|
180
|
+
for crop, pid in PFTS.items():
|
|
181
|
+
min_col, max_col = BOUND_COLS[crop]
|
|
182
|
+
|
|
183
|
+
n = len(params_to_plot)
|
|
184
|
+
if n == 0:
|
|
185
|
+
raise ValueError(
|
|
186
|
+
"No parameters found to plot.\n"
|
|
187
|
+
"Check that your sampled param_list columns match the bounds table parameter names."
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
ncols = min(n_cols, max(1, n))
|
|
191
|
+
nrows = int(math.ceil(n / ncols))
|
|
192
|
+
fig_w = panel_w * ncols
|
|
193
|
+
fig_h = panel_h * nrows
|
|
194
|
+
|
|
195
|
+
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(fig_w, fig_h))
|
|
196
|
+
axes = np.array(axes).reshape(-1)
|
|
197
|
+
|
|
198
|
+
fig.suptitle(
|
|
199
|
+
f"{crop.capitalize()} (PFT {pid}) — LHS sampled distributions",
|
|
200
|
+
fontsize=14, y=0.995
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
for i, param in enumerate(params_to_plot):
|
|
204
|
+
ax = axes[i]
|
|
205
|
+
col = f"{param}__pft{pid}_{crop}"
|
|
206
|
+
|
|
207
|
+
if col not in df.columns:
|
|
208
|
+
ax.set_title(f"{param} (missing)", fontsize=9)
|
|
209
|
+
ax.axis("off")
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
vals = pd.to_numeric(df[col], errors="coerce").dropna().to_numpy()
|
|
213
|
+
|
|
214
|
+
vmin = float(bounds.loc[param, min_col])
|
|
215
|
+
vmax = float(bounds.loc[param, max_col])
|
|
216
|
+
|
|
217
|
+
# mxmat bounds may be ns in excel; values are days in sampled list
|
|
218
|
+
if param == "mxmat":
|
|
219
|
+
if abs(vmin) > 1e6 or abs(vmax) > 1e6:
|
|
220
|
+
vmin /= NS_PER_DAY
|
|
221
|
+
vmax /= NS_PER_DAY
|
|
222
|
+
vals = np.rint(vals).astype(int)
|
|
223
|
+
|
|
224
|
+
ax.hist(vals, bins=20, edgecolor="k", alpha=0.75)
|
|
225
|
+
ax.axvline(vmin, color="red", linestyle="--", linewidth=1)
|
|
226
|
+
ax.axvline(vmax, color="red", linestyle="--", linewidth=1)
|
|
227
|
+
|
|
228
|
+
ax.set_title(param, fontsize=10)
|
|
229
|
+
ax.set_ylabel("count", fontsize=8)
|
|
230
|
+
ax.set_xlabel("value" + (" (days)" if param == "mxmat" else ""), fontsize=8)
|
|
231
|
+
ax.tick_params(axis="both", labelsize=8)
|
|
232
|
+
|
|
233
|
+
for j in range(n, len(axes)):
|
|
234
|
+
axes[j].axis("off")
|
|
235
|
+
|
|
236
|
+
plt.tight_layout(rect=[0, 0, 1, 0.97])
|
|
237
|
+
fig.savefig(os.path.join(outdir, f"LHS_distributions_{crop}.png"),
|
|
238
|
+
dpi=dpi, bbox_inches="tight")
|
|
239
|
+
|
|
240
|
+
if show:
|
|
241
|
+
plt.show()
|
|
242
|
+
plt.close(fig)
|
|
243
|
+
|
|
244
|
+
return outdir
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|