rahil-clm 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rahil/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .clm import generate_lhs, GenerateLHSResult
2
+
3
+ __all__ = ["generate_lhs", "GenerateLHSResult"]
4
+ __version__ = "0.1.1"
rahil/clm/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .core import generate_lhs, GenerateLHSResult
2
+
3
+ __all__ = ["generate_lhs", "GenerateLHSResult"]
rahil/clm/core.py ADDED
@@ -0,0 +1,351 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from os import path
6
+ from dataclasses import dataclass
7
+ from typing import Dict, Tuple, List, Optional
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ import xarray as xr
12
+ import requests
13
+
14
+
15
+
16
+ # ============================================================
17
+ # GitHub Release URLs (edit only if your tag/filenames differ)
18
+ # ============================================================
19
+ RELEASE_TAG = "v0.1.0"
20
+
21
+ BASE_REPO = "M-Uzair-Rahil/rahil-clm"
22
+
23
+ DEFAULT_XLSX_NAME = "finalized_params_for_run.xlsx"
24
+ DEFAULT_NC_NAME = "clm50_params.c240207b.nc"
25
+
26
+ GITHUB_XLSX_URL = f"https://github.com/{BASE_REPO}/releases/download/{RELEASE_TAG}/{DEFAULT_XLSX_NAME}"
27
+ GITHUB_NC_URL = f"https://github.com/{BASE_REPO}/releases/download/{RELEASE_TAG}/{DEFAULT_NC_NAME}"
28
+
29
+
30
+ # ============================================================
31
+ # OUTPUT CONTAINER
32
+ # ============================================================
33
+ @dataclass
34
+ class GenerateLHSResult:
35
+ psets_df: pd.DataFrame
36
+ param_output_dir: str
37
+ main_run_file: str
38
+ param_list_file: str
39
+ case_ids: List[str]
40
+ used_excel: str
41
+ used_base_nc: str
42
+ cache_dir: str
43
+
44
+
45
+ # ============================================================
46
+ # CACHE + DOWNLOAD HELPERS
47
+ # ============================================================
48
+ def _default_cache_dir(appname: str = "rahil-clm") -> str:
49
+ """
50
+ Cross-platform cache directory.
51
+ Windows: %LOCALAPPDATA%\\<appname>\\cache
52
+ macOS: ~/Library/Caches/<appname>
53
+ Linux: ~/.cache/<appname>
54
+ """
55
+ if os.name == "nt":
56
+ base = os.environ.get("LOCALAPPDATA") or path.expanduser(r"~\AppData\Local")
57
+ return path.join(base, appname, "cache")
58
+
59
+ # macOS
60
+ if sys.platform == "darwin": # type: ignore[name-defined]
61
+ return path.join(path.expanduser("~/Library/Caches"), appname)
62
+
63
+ # linux/unix
64
+ base = os.environ.get("XDG_CACHE_HOME", path.expanduser("~/.cache"))
65
+ return path.join(base, appname)
66
+
67
+
68
+ def _download_file(url: str, out_path: str, timeout: int = 120) -> None:
69
+ os.makedirs(path.dirname(out_path), exist_ok=True)
70
+
71
+ # already downloaded
72
+ if path.exists(out_path) and path.getsize(out_path) > 0:
73
+ return
74
+
75
+ r = requests.get(url, stream=True, timeout=timeout)
76
+ r.raise_for_status()
77
+
78
+ tmp_path = out_path + ".part"
79
+ with open(tmp_path, "wb") as f:
80
+ for chunk in r.iter_content(chunk_size=1024 * 1024):
81
+ if chunk:
82
+ f.write(chunk)
83
+
84
+ os.replace(tmp_path, out_path)
85
+
86
+
87
+ def _ensure_inputs(cache_dir: Optional[str] = None) -> tuple[str, str, str]:
88
+ """
89
+ Ensure Excel and NetCDF exist locally; download if missing.
90
+ Returns (excel_path, nc_path, cache_dir_used).
91
+ """
92
+ if cache_dir is None:
93
+ cache_dir = _default_cache_dir()
94
+
95
+ os.makedirs(cache_dir, exist_ok=True)
96
+
97
+ xlsx_path = path.join(cache_dir, DEFAULT_XLSX_NAME)
98
+ nc_path = path.join(cache_dir, DEFAULT_NC_NAME)
99
+
100
+ _download_file(GITHUB_XLSX_URL, xlsx_path)
101
+ _download_file(GITHUB_NC_URL, nc_path)
102
+
103
+ return xlsx_path, nc_path, cache_dir
104
+
105
+
106
+ # ============================================================
107
+ # LHS SAMPLER
108
+ # ============================================================
109
+ def lhs(n_samples: int, n_dim: int, rng: np.random.Generator) -> np.ndarray:
110
+ """Latin Hypercube Sampling in [0,1]."""
111
+ if n_samples <= 0:
112
+ raise ValueError("n_samples must be > 0")
113
+ if n_dim <= 0:
114
+ raise ValueError("n_dim must be > 0")
115
+
116
+ cut = np.linspace(0.0, 1.0, n_samples + 1)
117
+ u = rng.random((n_samples, n_dim))
118
+ a = cut[:-1]
119
+ b = cut[1:]
120
+ H = u * (b - a)[:, None] + a[:, None]
121
+ for j in range(n_dim):
122
+ rng.shuffle(H[:, j])
123
+ return H
124
+
125
+
126
+ # ============================================================
127
+ # HELPERS
128
+ # ============================================================
129
+ def _normalize_param_colname(pf: pd.DataFrame) -> pd.DataFrame:
130
+ pf = pf.copy()
131
+ pf.columns = pf.columns.str.strip()
132
+
133
+ rename_map = {}
134
+ for cand in ("parameter", "Parameter", "PARAMETER"):
135
+ if cand in pf.columns:
136
+ rename_map[cand] = "Parameters"
137
+ if rename_map:
138
+ pf = pf.rename(columns=rename_map)
139
+
140
+ if "Parameters" not in pf.columns:
141
+ raise KeyError(
142
+ "Input file must contain a 'Parameters' column "
143
+ "(or 'parameter' / 'Parameter')."
144
+ )
145
+
146
+ pf = pf.dropna(subset=["Parameters"])
147
+ pf["Parameters"] = pf["Parameters"].astype(str).str.strip()
148
+ return pf
149
+
150
+
151
+ def _find_pft_dim(da: xr.DataArray) -> str:
152
+ for d in da.dims:
153
+ if "pft" in d.lower():
154
+ return d
155
+ raise ValueError(f"No PFT dimension found in {da.dims}")
156
+
157
+
158
+ def _cast_like_base(param: str, base_dtype, new_val: float):
159
+ """
160
+ Cast sampled value to base NetCDF dtype.
161
+ Special handling for mxmat (integer DAYS).
162
+ """
163
+ if param == "mxmat":
164
+ v = float(new_val)
165
+ # nanoseconds -> days (if huge)
166
+ if abs(v) > 1.0e6:
167
+ v = v / (86400.0 * 1.0e9)
168
+ return int(np.rint(v))
169
+
170
+ if np.issubdtype(base_dtype, np.integer):
171
+ return int(np.rint(new_val))
172
+
173
+ return float(new_val)
174
+
175
+
176
+ # ============================================================
177
+ # PUBLIC API (no user file paths needed)
178
+ # ============================================================
179
+ def generate_lhs(
180
+ Ninit: int = 150,
181
+ seed: int = 42,
182
+ output_dir: str = ".",
183
+ *,
184
+ location: str = "pe_crops",
185
+ iteration: int = 0,
186
+ PFT_CORN: int = 17, # rainfed_temperate_corn
187
+ PFT_SOY: int = 23, # rainfed_temperate_soybean
188
+ PFT_WHEAT: int = 19, # rainfed_spring_wheat
189
+ bounds_cols: Optional[Dict[str, Tuple[str, str]]] = None,
190
+ cache_dir: Optional[str] = None,
191
+ ) -> GenerateLHSResult:
192
+ """
193
+ Minimal user call:
194
+ import rahil
195
+ out = rahil.generate_lhs(Ninit=150, seed=42, output_dir="outputs")
196
+
197
+ Downloads (once) and caches:
198
+ - Excel bounds table
199
+ - Base CLM NetCDF params file
200
+ from GitHub Releases.
201
+ """
202
+ if bounds_cols is None:
203
+ bounds_cols = {
204
+ "corn": ("corn min", "corn max"),
205
+ "soybean": ("soybean min", "soybean max"),
206
+ "wheat": ("wheat min", "wheat max"),
207
+ }
208
+
209
+ # 0) ensure input files exist locally (download/cache)
210
+ excel_path, base_nc_path, cache_dir_used = _ensure_inputs(cache_dir=cache_dir)
211
+
212
+ output_dir = path.abspath(output_dir)
213
+ workflow_dir = path.join(output_dir, "workflow")
214
+ param_output_dir = path.join(output_dir, "paramfile", location)
215
+ os.makedirs(workflow_dir, exist_ok=True)
216
+ os.makedirs(param_output_dir, exist_ok=True)
217
+
218
+ # 1) read Excel bounds
219
+ pf = pd.read_excel(excel_path)
220
+ pf = _normalize_param_colname(pf)
221
+
222
+ need_cols = [
223
+ "Parameters",
224
+ bounds_cols["corn"][0], bounds_cols["corn"][1],
225
+ bounds_cols["soybean"][0], bounds_cols["soybean"][1],
226
+ bounds_cols["wheat"][0], bounds_cols["wheat"][1],
227
+ ]
228
+ missing = [c for c in need_cols if c not in pf.columns]
229
+ if missing:
230
+ raise KeyError(f"Missing columns in Excel: {missing}")
231
+
232
+ pf = pf[need_cols].copy()
233
+ pf = pf.dropna(subset=["Parameters"])
234
+ pf["Parameters"] = pf["Parameters"].astype(str).str.strip()
235
+ param_list = pf["Parameters"].values
236
+
237
+ # 2) build bounds vectors and map for 3 crops
238
+ xlb: List[float] = []
239
+ xub: List[float] = []
240
+ var_map: List[Tuple[str, str, int]] = []
241
+
242
+ for _, row in pf.iterrows():
243
+ param = row["Parameters"]
244
+
245
+ mn, mx = bounds_cols["corn"]
246
+ xlb.append(float(row[mn])); xub.append(float(row[mx]))
247
+ var_map.append((param, "corn", PFT_CORN))
248
+
249
+ mn, mx = bounds_cols["soybean"]
250
+ xlb.append(float(row[mn])); xub.append(float(row[mx]))
251
+ var_map.append((param, "soybean", PFT_SOY))
252
+
253
+ mn, mx = bounds_cols["wheat"]
254
+ xlb.append(float(row[mn])); xub.append(float(row[mx]))
255
+ var_map.append((param, "wheat", PFT_WHEAT))
256
+
257
+ xlb_arr = np.array(xlb, dtype=float)
258
+ xub_arr = np.array(xub, dtype=float)
259
+
260
+ # 3) LHS and scale
261
+ rng = np.random.default_rng(seed)
262
+ X01 = lhs(Ninit, len(xlb_arr), rng)
263
+ perturbed = X01 * (xub_arr - xlb_arr) + xlb_arr
264
+
265
+ # 4) build case IDs + workflow outputs
266
+ case_ids = [f"{location}_{iteration}_{i:04d}" for i in range(Ninit)]
267
+ colnames = [f"{p}__pft{pid}_{tag}" for (p, tag, pid) in var_map]
268
+ psets_df = pd.DataFrame(perturbed, columns=colnames, index=case_ids)
269
+
270
+ # mxmat: ensure integer days
271
+ for c in psets_df.columns:
272
+ if c.startswith("mxmat__"):
273
+ v = psets_df[c].astype(float).to_numpy()
274
+ v = np.where(np.abs(v) > 1.0e6, v / (86400.0 * 1.0e9), v)
275
+ psets_df[c] = np.rint(v).astype(int)
276
+
277
+ param_list_file = path.join(workflow_dir, f"{location}_{iteration}.param_list.txt")
278
+ main_run_file = path.join(workflow_dir, f"{location}_{iteration}.main_run.txt")
279
+
280
+ psets_df.to_csv(param_list_file)
281
+ with open(main_run_file, "w") as f:
282
+ f.write("\n".join(case_ids) + "\n")
283
+
284
+ # 5) write NetCDFs
285
+ base = xr.open_dataset(base_nc_path, decode_times=False)
286
+
287
+ missing_params = [p for p in param_list if p not in base.variables]
288
+ if missing_params:
289
+ base.close()
290
+ raise KeyError(f"These parameters are not in the base NetCDF: {missing_params}")
291
+
292
+ param_meta = {}
293
+ for p in param_list:
294
+ da = base[p]
295
+ pft_dim = _find_pft_dim(da)
296
+ other_dims = [d for d in da.dims if d != pft_dim]
297
+ param_meta[p] = {"pft_dim": pft_dim, "dtype": da.dtype, "other_dims": other_dims}
298
+ base.close()
299
+
300
+ for case_id, row in psets_df.iterrows():
301
+ ds = xr.open_dataset(base_nc_path, decode_times=False)
302
+ encoding = {}
303
+
304
+ for (param, tag, pid) in var_map:
305
+ col = f"{param}__pft{pid}_{tag}"
306
+ new_val = float(row[col])
307
+
308
+ meta = param_meta[param]
309
+ pft_dim = meta["pft_dim"]
310
+ base_dtype = meta["dtype"]
311
+ other_dims = meta["other_dims"]
312
+
313
+ casted = _cast_like_base(param, base_dtype, new_val)
314
+
315
+ if len(other_dims) == 0:
316
+ ds[param].loc[{pft_dim: pid}] = casted
317
+ else:
318
+ idx = {pft_dim: pid}
319
+ for d in other_dims:
320
+ idx[d] = slice(None)
321
+ ds[param].loc[idx] = casted
322
+
323
+ if param == "mxmat":
324
+ mx = ds["mxmat"]
325
+ mxv = np.array(mx, dtype="float64")
326
+ mask_ns = np.isfinite(mxv) & (np.abs(mxv) > 1.0e6)
327
+ mxv[mask_ns] = mxv[mask_ns] / (86400.0 * 1.0e9)
328
+ mxv = np.rint(mxv)
329
+ mxv = np.where(np.isfinite(mxv), mxv, 0.0)
330
+
331
+ ds["mxmat"] = xr.DataArray(
332
+ mxv.astype("int32"),
333
+ dims=mx.dims,
334
+ coords=mx.coords
335
+ )
336
+ encoding["mxmat"] = {"dtype": "i4", "_FillValue": 0}
337
+
338
+ out_nc = path.join(param_output_dir, f"{case_id}.nc")
339
+ ds.to_netcdf(out_nc, mode="w", encoding=encoding)
340
+ ds.close()
341
+
342
+ return GenerateLHSResult(
343
+ psets_df=psets_df,
344
+ param_output_dir=param_output_dir,
345
+ main_run_file=main_run_file,
346
+ param_list_file=param_list_file,
347
+ case_ids=case_ids,
348
+ used_excel=excel_path,
349
+ used_base_nc=base_nc_path,
350
+ cache_dir=cache_dir_used,
351
+ )
@@ -0,0 +1,64 @@
1
+ Metadata-Version: 2.4
2
+ Name: rahil-clm
3
+ Version: 0.1.1
4
+ Summary: Generate LHS samples and CLM crop PFT NetCDF parameter files for yield optimization.
5
+ Author: Mohammad Uzair Rahil
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Requires-Dist: netcdf4>=1.6
9
+ Requires-Dist: numpy<2
10
+ Requires-Dist: openpyxl>=3.1
11
+ Requires-Dist: pandas>=1.5
12
+ Requires-Dist: requests>=2.31
13
+ Requires-Dist: xarray>=2023.1
14
+ Description-Content-Type: text/markdown
15
+
16
+ # Author: Mohammad Uzair Rahil
17
+ # Michigan State University (MSU)
18
+ # Date: 01/01/2026
19
+
20
+ # rahil-clm
21
+
22
+ `rahil-clm` is a Python package to generate Latin Hypercube Sampling (LHS)–based
23
+ crop parameter ensembles for **CLM/CTSM crop yield optimization**.
24
+
25
+ The package is designed so users **do not need to provide any input file paths**.
26
+ Required input files (Excel parameter bounds and base CLM NetCDF parameters) are
27
+ automatically downloaded from GitHub Releases and cached locally on first use.
28
+
29
+ ---
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ pip install rahil-clm
35
+
36
+
37
+
38
+ ## Quick start
39
+
40
+ import rahil
41
+
42
+ out = rahil.generate_lhs(
43
+ Ninit=150,
44
+ seed=42,
45
+ output_dir="outputs"
46
+ )
47
+
48
+ print(out.param_output_dir)
49
+ print(out.main_run_file)
50
+ print(out.param_list_file)
51
+ print(out.psets_df.head())
52
+
53
+
54
+
55
+ This will:
56
+
57
+ generate LHS samples for crop parameters
58
+ write per-case CLM NetCDF parameter files
59
+ create workflow text files for running CTSM/CLM ensembles
60
+
61
+
62
+
63
+
64
+
@@ -0,0 +1,6 @@
1
+ rahil/__init__.py,sha256=rfni_vokufZ7cYFFa_kc2-FGBMYXDRVVkmXf7HeojaQ,120
2
+ rahil/clm/__init__.py,sha256=dhDZzQiIXxdC-EhcYb9-0gGrNc-yGmZz3HglA5p0o0w,99
3
+ rahil/clm/core.py,sha256=x85Kw0vTRShPfiwlswW7ZTWmOVoHEb2r7u42d1ER1RA,11344
4
+ rahil_clm-0.1.1.dist-info/METADATA,sha256=hGy5ls0f6knHqQ62MKelzC-7NKZfzB-rul1hW9LMrog,1347
5
+ rahil_clm-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ rahil_clm-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any