rahil-clm 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rahil-clm
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: Generate LHS samples and CLM crop PFT NetCDF parameter files for yield optimization.
5
5
  Author: Mohammad Uzair Rahil
6
6
  License: MIT
7
7
  Requires-Python: >=3.9
8
+ Requires-Dist: matplotlib>=3.6
8
9
  Requires-Dist: netcdf4>=1.6
9
10
  Requires-Dist: numpy<2
10
11
  Requires-Dist: openpyxl>=3.1
@@ -4,20 +4,22 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "rahil-clm"
7
- version = "0.1.1"
7
+ version = "0.1.2"
8
8
 
9
9
  description = "Generate LHS samples and CLM crop PFT NetCDF parameter files for yield optimization."
10
10
  readme = "README.md"
11
11
  requires-python = ">=3.9"
12
12
  license = { text = "MIT" }
13
13
  authors = [{ name = "Mohammad Uzair Rahil" }]
14
+
14
15
  dependencies = [
15
16
  "numpy<2",
16
17
  "pandas>=1.5",
17
18
  "xarray>=2023.1",
18
19
  "netcdf4>=1.6",
19
20
  "requests>=2.31",
20
- "openpyxl>=3.1"
21
+ "openpyxl>=3.1",
22
+ "matplotlib>=3.6"
21
23
  ]
22
24
 
23
25
 
@@ -0,0 +1,5 @@
1
+ from .clm import generate_lhs, GenerateLHSResult, check_distribution
2
+
3
+ __all__ = ["generate_lhs", "GenerateLHSResult", "check_distribution"]
4
+ __version__ = "0.1.2"
5
+
@@ -0,0 +1,4 @@
1
+ from .core import generate_lhs, GenerateLHSResult
2
+ from .plotting import check_distribution
3
+
4
+ __all__ = ["generate_lhs", "GenerateLHSResult", "check_distribution"]
@@ -0,0 +1,244 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import math
5
+ import glob
6
+ import numpy as np
7
+ import pandas as pd
8
+ import matplotlib.pyplot as plt
9
+
10
+ # Reuse your core download/cache logic + constants
11
+ from .core import _ensure_inputs # must exist in your core.py
12
+
13
+
14
+ # ============================================================
15
+ # Defaults (must match your LHS generator)
16
+ # ============================================================
17
+ PFTS = {"corn": 17, "soybean": 23, "wheat": 19}
18
+
19
+ BOUND_COLS = {
20
+ "corn": ("corn min", "corn max"),
21
+ "soybean": ("soybean min", "soybean max"),
22
+ "wheat": ("wheat min", "wheat max"),
23
+ }
24
+
25
+ NS_PER_DAY = 86400.0 * 1e9
26
+
27
+
28
+ # ============================================================
29
+ # Helpers
30
+ # ============================================================
31
+ def _find_latest_param_list(workflow_dir: str) -> str:
32
+ candidates = glob.glob(os.path.join(workflow_dir, "*.param_list.txt"))
33
+ if not candidates:
34
+ raise FileNotFoundError(
35
+ f"No *.param_list.txt found in workflow dir: {workflow_dir}\n"
36
+ "Make sure you already ran rahil.generate_lhs(output_dir=...)."
37
+ )
38
+ candidates.sort(key=os.path.getmtime, reverse=True)
39
+ return candidates[0]
40
+
41
+
42
+ def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
43
+ df = df.copy()
44
+ df.columns = df.columns.astype(str).str.strip()
45
+ return df
46
+
47
+
48
+ def _normalize_parameter_index(bounds_df: pd.DataFrame) -> pd.DataFrame:
49
+ bounds_df = _normalize_columns(bounds_df)
50
+
51
+ # normalize parameter column name -> "Parameters"
52
+ rename_map = {}
53
+ for c in bounds_df.columns:
54
+ if str(c).strip().lower() in ["parameter", "parameters", "param", "par"]:
55
+ rename_map[c] = "Parameters"
56
+ bounds_df = bounds_df.rename(columns=rename_map)
57
+
58
+ if "Parameters" not in bounds_df.columns:
59
+ raise KeyError(
60
+ "No parameter column found in bounds file.\n"
61
+ f"Available columns: {list(bounds_df.columns)}"
62
+ )
63
+
64
+ bounds_df["Parameters"] = bounds_df["Parameters"].astype(str).str.strip()
65
+ bounds_df = bounds_df.dropna(subset=["Parameters"]).set_index("Parameters")
66
+ bounds_df = bounds_df[~bounds_df.index.duplicated(keep="first")]
67
+ return bounds_df
68
+
69
+
70
+ def _infer_param_names_from_sampled_columns(cols) -> list[str]:
71
+ params = set()
72
+ for c in cols:
73
+ c = str(c)
74
+ if "__pft" in c:
75
+ params.add(c.split("__pft")[0].strip())
76
+ return sorted(params)
77
+
78
+
79
+ def _load_bounds_excel(bounds_xlsx_path: str, sheet_name=None) -> pd.DataFrame:
80
+ """
81
+ Load bounds from Excel. If sheet_name is None, try to find a sheet that contains
82
+ required columns; otherwise use the provided sheet.
83
+ """
84
+ xls = pd.ExcelFile(bounds_xlsx_path)
85
+
86
+ if sheet_name is not None:
87
+ b = pd.read_excel(bounds_xlsx_path, sheet_name=sheet_name)
88
+ return _normalize_parameter_index(b)
89
+
90
+ required = {
91
+ "Parameters",
92
+ "corn min", "corn max",
93
+ "soybean min", "soybean max",
94
+ "wheat min", "wheat max",
95
+ }
96
+
97
+ for sh in xls.sheet_names:
98
+ b = pd.read_excel(bounds_xlsx_path, sheet_name=sh)
99
+ b.columns = b.columns.str.strip()
100
+ b = b.rename(columns={"parameter": "Parameters", "Parameter": "Parameters", "PARAMETER": "Parameters"})
101
+ if required.issubset(set(b.columns)):
102
+ return _normalize_parameter_index(b)
103
+
104
+ # fallback: first sheet, but will raise meaningful error if missing columns
105
+ b = pd.read_excel(bounds_xlsx_path, sheet_name=xls.sheet_names[0])
106
+ return _normalize_parameter_index(b)
107
+
108
+
109
+ # ============================================================
110
+ # Public API
111
+ # ============================================================
112
+ def check_distribution(
113
+ output_dir: str = "outputs",
114
+ *,
115
+ sampled_file: str | None = None,
116
+ bounds_sheet=None,
117
+ outdir: str | None = None,
118
+ n_cols: int = 4,
119
+ panel_w: float = 3.2,
120
+ panel_h: float = 2.4,
121
+ show: bool = False,
122
+ dpi: int = 300,
123
+ cache_dir: str | None = None,
124
+ ) -> str:
125
+ """
126
+ Create distribution plots (one figure per crop) for whichever samples exist
127
+ in output_dir/workflow/*.param_list.txt.
128
+
129
+ Parameters
130
+ ----------
131
+ output_dir : str
132
+ Folder used in rahil.generate_lhs(output_dir=...).
133
+ sampled_file : str | None
134
+ Optional explicit path to *.param_list.txt. If None, automatically picks newest.
135
+ bounds_sheet : str|int|None
136
+ Optional bounds Excel sheet name (or index). If None, auto-detect.
137
+ outdir : str | None
138
+ Where to save plots. Default: output_dir/figs_distributions_by_crop
139
+ n_cols : int
140
+ Number of columns in subplot grid (article layout).
141
+ panel_w, panel_h : float
142
+ Size per subplot (inches).
143
+ show : bool
144
+ If True, plt.show() each figure. Default False (faster, cleaner).
145
+ dpi : int
146
+ Saved figure dpi.
147
+ cache_dir : str | None
148
+ Optional cache dir for bounds/base files. If None, core decides.
149
+
150
+ Returns
151
+ -------
152
+ str : directory path where figures were saved
153
+ """
154
+ # 1) locate sampled param list
155
+ workflow_dir = os.path.join(output_dir, "workflow")
156
+ if sampled_file is None:
157
+ sampled_file = _find_latest_param_list(workflow_dir)
158
+
159
+ # 2) ensure bounds file exists (download/cache from GitHub release)
160
+ bounds_xlsx_path, _, _ = _ensure_inputs(cache_dir=cache_dir)
161
+
162
+ # 3) read sampled + bounds
163
+ df = pd.read_csv(sampled_file, index_col=0)
164
+ df = _normalize_columns(df)
165
+
166
+ bounds = _load_bounds_excel(bounds_xlsx_path, sheet_name=bounds_sheet)
167
+ bounds.index = bounds.index.astype(str).str.strip()
168
+
169
+ sampled_params = _infer_param_names_from_sampled_columns(df.columns)
170
+ params_to_plot = [p for p in bounds.index if p in sampled_params]
171
+
172
+ # 4) output plot directory
173
+ if outdir is None:
174
+ outdir = os.path.join(output_dir, "figs_distributions_by_crop")
175
+ os.makedirs(outdir, exist_ok=True)
176
+
177
+ plt.rcParams.update({"axes.grid": True, "font.size": 9})
178
+
179
+ # 5) one figure per crop
180
+ for crop, pid in PFTS.items():
181
+ min_col, max_col = BOUND_COLS[crop]
182
+
183
+ n = len(params_to_plot)
184
+ if n == 0:
185
+ raise ValueError(
186
+ "No parameters found to plot.\n"
187
+ "Check that your sampled param_list columns match the bounds table parameter names."
188
+ )
189
+
190
+ ncols = min(n_cols, max(1, n))
191
+ nrows = int(math.ceil(n / ncols))
192
+ fig_w = panel_w * ncols
193
+ fig_h = panel_h * nrows
194
+
195
+ fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(fig_w, fig_h))
196
+ axes = np.array(axes).reshape(-1)
197
+
198
+ fig.suptitle(
199
+ f"{crop.capitalize()} (PFT {pid}) — LHS sampled distributions",
200
+ fontsize=14, y=0.995
201
+ )
202
+
203
+ for i, param in enumerate(params_to_plot):
204
+ ax = axes[i]
205
+ col = f"{param}__pft{pid}_{crop}"
206
+
207
+ if col not in df.columns:
208
+ ax.set_title(f"{param} (missing)", fontsize=9)
209
+ ax.axis("off")
210
+ continue
211
+
212
+ vals = pd.to_numeric(df[col], errors="coerce").dropna().to_numpy()
213
+
214
+ vmin = float(bounds.loc[param, min_col])
215
+ vmax = float(bounds.loc[param, max_col])
216
+
217
+ # mxmat bounds may be ns in excel; values are days in sampled list
218
+ if param == "mxmat":
219
+ if abs(vmin) > 1e6 or abs(vmax) > 1e6:
220
+ vmin /= NS_PER_DAY
221
+ vmax /= NS_PER_DAY
222
+ vals = np.rint(vals).astype(int)
223
+
224
+ ax.hist(vals, bins=20, edgecolor="k", alpha=0.75)
225
+ ax.axvline(vmin, color="red", linestyle="--", linewidth=1)
226
+ ax.axvline(vmax, color="red", linestyle="--", linewidth=1)
227
+
228
+ ax.set_title(param, fontsize=10)
229
+ ax.set_ylabel("count", fontsize=8)
230
+ ax.set_xlabel("value" + (" (days)" if param == "mxmat" else ""), fontsize=8)
231
+ ax.tick_params(axis="both", labelsize=8)
232
+
233
+ for j in range(n, len(axes)):
234
+ axes[j].axis("off")
235
+
236
+ plt.tight_layout(rect=[0, 0, 1, 0.97])
237
+ fig.savefig(os.path.join(outdir, f"LHS_distributions_{crop}.png"),
238
+ dpi=dpi, bbox_inches="tight")
239
+
240
+ if show:
241
+ plt.show()
242
+ plt.close(fig)
243
+
244
+ return outdir
@@ -1,4 +0,0 @@
1
- from .clm import generate_lhs, GenerateLHSResult
2
-
3
- __all__ = ["generate_lhs", "GenerateLHSResult"]
4
- __version__ = "0.1.1"
@@ -1,3 +0,0 @@
1
- from .core import generate_lhs, GenerateLHSResult
2
-
3
- __all__ = ["generate_lhs", "GenerateLHSResult"]
File without changes
File without changes