geoloop 0.0.1__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. geoloop/axisym/AxisymetricEL.py +751 -0
  2. geoloop/axisym/__init__.py +3 -0
  3. geoloop/bin/Flowdatamain.py +89 -0
  4. geoloop/bin/Lithologymain.py +84 -0
  5. geoloop/bin/Loadprofilemain.py +100 -0
  6. geoloop/bin/Plotmain.py +250 -0
  7. geoloop/bin/Runbatch.py +81 -0
  8. geoloop/bin/Runmain.py +86 -0
  9. geoloop/bin/SingleRunSim.py +928 -0
  10. geoloop/bin/__init__.py +3 -0
  11. geoloop/cli/__init__.py +0 -0
  12. geoloop/cli/batch.py +106 -0
  13. geoloop/cli/main.py +105 -0
  14. geoloop/configuration.py +946 -0
  15. geoloop/constants.py +112 -0
  16. geoloop/geoloopcore/CoaxialPipe.py +503 -0
  17. geoloop/geoloopcore/CustomPipe.py +727 -0
  18. geoloop/geoloopcore/__init__.py +3 -0
  19. geoloop/geoloopcore/b2g.py +739 -0
  20. geoloop/geoloopcore/b2g_ana.py +535 -0
  21. geoloop/geoloopcore/boreholedesign.py +683 -0
  22. geoloop/geoloopcore/getloaddata.py +112 -0
  23. geoloop/geoloopcore/pyg_ana.py +280 -0
  24. geoloop/geoloopcore/pygfield_ana.py +519 -0
  25. geoloop/geoloopcore/simulationparameters.py +130 -0
  26. geoloop/geoloopcore/soilproperties.py +152 -0
  27. geoloop/geoloopcore/strat_interpolator.py +194 -0
  28. geoloop/lithology/__init__.py +3 -0
  29. geoloop/lithology/plot_lithology.py +277 -0
  30. geoloop/lithology/process_lithology.py +697 -0
  31. geoloop/loadflowdata/__init__.py +3 -0
  32. geoloop/loadflowdata/flow_data.py +161 -0
  33. geoloop/loadflowdata/loadprofile.py +325 -0
  34. geoloop/plotting/__init__.py +3 -0
  35. geoloop/plotting/create_plots.py +1137 -0
  36. geoloop/plotting/load_data.py +432 -0
  37. geoloop/utils/RunManager.py +164 -0
  38. geoloop/utils/__init__.py +0 -0
  39. geoloop/utils/helpers.py +841 -0
  40. geoloop-1.0.0b1.dist-info/METADATA +112 -0
  41. geoloop-1.0.0b1.dist-info/RECORD +46 -0
  42. geoloop-1.0.0b1.dist-info/entry_points.txt +2 -0
  43. geoloop-0.0.1.dist-info/licenses/LICENSE → geoloop-1.0.0b1.dist-info/licenses/LICENSE.md +2 -1
  44. geoloop-0.0.1.dist-info/METADATA +0 -10
  45. geoloop-0.0.1.dist-info/RECORD +0 -6
  46. {geoloop-0.0.1.dist-info → geoloop-1.0.0b1.dist-info}/WHEEL +0 -0
  47. {geoloop-0.0.1.dist-info → geoloop-1.0.0b1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,841 @@
1
+ from pathlib import Path
2
+ from typing import Any
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ import xarray as xr
7
+
8
+ from geoloop.configuration import SingleRunConfig, StochasticRunConfig
9
+
10
+
11
+ def get_param_names(
12
+ config: SingleRunConfig | StochasticRunConfig | None = None,
13
+ ) -> tuple[list[str], list[str]]:
14
+ """
15
+ Identify locked and variable parameter names from a configuration dictionary for Monte Carlo simulations.
16
+
17
+ These parameters are used to organize results in HDF5 files during
18
+ simulation post-processing.
19
+
20
+ Parameters
21
+ ----------
22
+ config : SingleRunConfig, optional
23
+ Configuration object that may contain additional optional
24
+ parameters. If provided, certain optional keys will be added
25
+ to the list of locked parameters.
26
+
27
+ Returns
28
+ -------
29
+ tuple of (list of str, list of str)
30
+ - `variable_param_names` : List of variable parameter names that
31
+ can change between Monte Carlo runs.
32
+ - `locked_param_names` : List of locked parameter names that
33
+ remain fixed across runs. Optional parameters found in `config`
34
+ are also included.
35
+ """
36
+ variable_param_names = [
37
+ "k_s_scale",
38
+ "k_p",
39
+ "insu_z",
40
+ "insu_dr",
41
+ "insu_k",
42
+ "m_flow",
43
+ "Tin",
44
+ "H",
45
+ "epsilon",
46
+ "alfa",
47
+ "Tgrad",
48
+ "Q",
49
+ "fluid_percent",
50
+ ]
51
+ locked_param_names = [
52
+ "type",
53
+ "D",
54
+ "r_b",
55
+ "pos",
56
+ "r_out",
57
+ "SDR",
58
+ "fluid_str",
59
+ "nInlets",
60
+ "Tg",
61
+ "z_Tg",
62
+ "k_s",
63
+ "z_k_s",
64
+ "model_type",
65
+ "run_type",
66
+ "nyear",
67
+ "nled",
68
+ "nsegments",
69
+ "k_g",
70
+ "z_k_g",
71
+ ]
72
+
73
+ return variable_param_names, locked_param_names
74
+
75
+
76
+ def getresultsvar_fordims(var_names_dim: list[str], results_object: Any) -> np.ndarray:
77
+ """
78
+ Extract an array of result values for specified variable names from a results object.
79
+
80
+ Parameters
81
+ ----------
82
+ var_names_dim : list of str
83
+ List of variable names to extract from the results object.
84
+ results_object : SingleRunResult
85
+ SingleRunResult object containing the results of a single model run. The function
86
+ will attempt to access each variable as an attribute of this object.
87
+
88
+ Returns
89
+ -------
90
+ np.ndarray
91
+ Array of result values corresponding to the requested variable names.
92
+ If a variable is not present in the object, `None` is used for that entry.
93
+
94
+ Notes
95
+ -----
96
+ The resulting array preserves the order of `var_names_dim`.
97
+ """
98
+ # Create array of the result arrays that should be stored in the dataset based on the list of variable names
99
+ results_list = []
100
+ for var in var_names_dim:
101
+ var_value = getattr(results_object, var, None)
102
+
103
+ if var_value is not None:
104
+ results_list.append(var_value)
105
+ else:
106
+ results_list.append(None)
107
+
108
+ results_list = np.array(results_list)
109
+
110
+ return results_list
111
+
112
+
113
+ def save_singlerun_results(config: SingleRunConfig, result: Any, outpath: Path) -> None:
114
+ """
115
+ Saves the results from a single model run into an HDF5 file. Results and input parameters are grouped into datasets
116
+ based on their dimensions, ensuring compatibility with HDF5 storage formats.
117
+
118
+ The function organizes results by:
119
+ - Time series (scalar variables)
120
+ - Time and depth segment
121
+ - Time, depth, and pipe
122
+ - Depth segment only
123
+
124
+ Input parameters are grouped by:
125
+ - Scalar parameters
126
+ - Pipe-specific parameters
127
+ - Depth-segment parameters (e.g., `k_s`)
128
+ - Depth-specific parameters (e.g., `k_g`, `Tg`)
129
+
130
+ All datasets are saved into an HDF5 file with groups "results" and "parameters".
131
+
132
+ Parameters
133
+ ----------
134
+ config : SingleRunConfig
135
+ Configuration object containing all input parameters from the JSON configuration file.
136
+
137
+ result : SingleRunSim object
138
+ Results object from a single model run.
139
+
140
+ outpath : pathlib.Path
141
+ Filepath for saving the output HDF5 file. The file will be named
142
+ `{stem}_SR.h5` based on `outpath`.
143
+
144
+ Returns
145
+ -------
146
+ None
147
+ Saves the results and input parameters to an HDF5 file on disk.
148
+
149
+ """
150
+ # ==================================================================
151
+ # Results grouped by time dimension
152
+ # ==================================================================
153
+
154
+ # Retrieve variable names for timeseries scalar results
155
+ var_names_t = result.getResultAttributesTimeseriesScalar()
156
+
157
+ # Retrieve time coordinate values
158
+ time_coord = result.gethours()
159
+
160
+ # Extract result arrays for timeseries variables
161
+ results_t = getresultsvar_fordims(var_names_t, result)
162
+
163
+ # Create DataArray for timeseries results and convert to dataset
164
+ Results_da_t = xr.DataArray(
165
+ results_t,
166
+ coords={"variables": var_names_t, "time": time_coord},
167
+ dims=["variables", "time"],
168
+ )
169
+ Results_da_t = Results_da_t.rename("Results_t")
170
+ Results_ds_t = Results_da_t.to_dataset(dim="variables")
171
+
172
+ # ==================================================================
173
+ # Results grouped by time and depth segment dimensions
174
+ # ==================================================================
175
+
176
+ # Retrieve variable names for timeseries depth-segment results
177
+ var_names_tzseg = result.getResultAttributesTimeseriesDepthseg()
178
+
179
+ # Retrieve depth-segment coordinate values
180
+ zseg_coord = result.getzseg()
181
+
182
+ # Extract result arrays for timeseries depth-segment variables
183
+ results_tzseg = getresultsvar_fordims(var_names_tzseg, result)
184
+
185
+ # Create DataArray for timeseries depth-segment results and convert to dataset
186
+ Results_da_tzseg = xr.DataArray(
187
+ results_tzseg,
188
+ coords={"variables": var_names_tzseg, "time": time_coord, "zseg": zseg_coord},
189
+ dims=["variables", "time", "zseg"],
190
+ )
191
+ Results_da_tzseg = Results_da_tzseg.rename("Results_tzseg")
192
+ Results_ds_tzseg = Results_da_tzseg.to_dataset(dim="variables")
193
+
194
+ # ==================================================================
195
+ # Results grouped by time, depth, and pipe dimensions
196
+ # ==================================================================
197
+
198
+ # Retrieve variable names for timeseries depth-pipe results
199
+ var_names_tzp = result.getResultAttributesTimeserieDepth()
200
+
201
+ # Retrieve depth and pipe coordinate values
202
+ z_coord = result.getz()
203
+ pipes_coord = range(result.get_n_pipes())
204
+
205
+ # Extract result arrays for timeseries depth-pipe variables
206
+ results_tzp = getresultsvar_fordims(var_names_tzp, result)
207
+
208
+ # Create DataArray for timeseries depth-pipe results and convert to dataset
209
+ Results_da_tzp = xr.DataArray(
210
+ results_tzp,
211
+ coords={
212
+ "variables": var_names_tzp,
213
+ "time": time_coord,
214
+ "z": z_coord,
215
+ "nPipes": pipes_coord,
216
+ },
217
+ dims=["variables", "time", "z", "nPipes"],
218
+ )
219
+ Results_da_tzp = Results_da_tzp.rename("Results_tzp")
220
+ Results_ds_tzp = Results_da_tzp.to_dataset(dim="variables")
221
+
222
+ # ==================================================================
223
+ # Results grouped by depth segment dimension
224
+ # ==================================================================
225
+
226
+ # Retrieve variable names for depth-segment results
227
+ var_names_zseg = result.getResultAttributesDepthseg()
228
+
229
+ # Extract result arrays for depth-segment variables
230
+ results_zseg = getresultsvar_fordims(var_names_zseg, result)
231
+
232
+ # Create DataArray for depth-segment results and convert to dataset
233
+ Results_da_zseg = xr.DataArray(
234
+ results_zseg,
235
+ coords={"variables": var_names_zseg, "zseg": zseg_coord},
236
+ dims=["variables", "zseg"],
237
+ )
238
+ Results_da_zseg = Results_da_zseg.rename("Results_zseg")
239
+ Results_ds_zseg = Results_da_zseg.to_dataset(dim="variables")
240
+
241
+ # ==================================================================
242
+ # Input parameters grouped by various dimensions
243
+ # ==================================================================
244
+
245
+ # Combine variable and locked parameter names
246
+ variable_param_names, locked_param_names = get_param_names(config)
247
+ param_names = variable_param_names + locked_param_names
248
+
249
+ # Create dictionary of input parameters
250
+ param_dict = {
251
+ key: (
252
+ [getattr(config, key)]
253
+ if isinstance(getattr(config, key), np.ndarray)
254
+ else getattr(config, key)
255
+ )
256
+ for key in param_names
257
+ }
258
+
259
+ # Create dataset for scalar parameters
260
+ param = {
261
+ key: value for key, value in param_dict.items() if not isinstance(value, list)
262
+ }
263
+ param_da = xr.DataArray(
264
+ list(param.values()), coords={"param": list(param.keys())}, dims=["param"]
265
+ )
266
+ param_ds = param_da.to_dataset(dim="param")
267
+
268
+ # Create dataset for pipe-specific parameters
269
+ param_x = {
270
+ key: value
271
+ for key, value in param_dict.items()
272
+ if isinstance(value, list) and len(value) == len(pipes_coord)
273
+ }
274
+
275
+ pipe_pos = np.asarray(param_x["pos"])
276
+ r_out = param_x["r_out"]
277
+
278
+ pipe_pos_ds = xr.Dataset(
279
+ data_vars=dict(pipe_pos=(["nPipes", "xy"], pipe_pos)),
280
+ coords=dict(nPipes=(["nPipes"], pipes_coord)),
281
+ )
282
+ r_out_ds = xr.Dataset(
283
+ data_vars=dict(r_out=(["nPipes"], r_out)),
284
+ coords=dict(nPipes=(["nPipes"], pipes_coord)),
285
+ )
286
+
287
+ # Create dataset for depth-segment specific parameters (k_s)
288
+ param_z_k_s = {
289
+ key: value
290
+ for key, value in param_dict.items()
291
+ if "k_s" in key and key not in param
292
+ }
293
+ param_z_k_s_da = xr.DataArray(
294
+ list(param_z_k_s.values()),
295
+ coords={
296
+ "param": (list(param_z_k_s.keys())),
297
+ "layer_k_s": range(len(param_z_k_s["k_s"])),
298
+ },
299
+ dims=["param", "layer_k_s"],
300
+ )
301
+ param_z_k_s_ds = param_z_k_s_da.to_dataset(dim="param")
302
+
303
+ # Create dataset for depth-specific parameters (k_g)
304
+ param_z_k_g = {
305
+ key: value
306
+ for key, value in param_dict.items()
307
+ if key not in param and "k_g" in key
308
+ }
309
+ param_z_da = xr.DataArray(
310
+ list(param_z_k_g.values()),
311
+ coords={
312
+ "param": (list(param_z_k_g.keys())),
313
+ "layer_k_g": range(len(param_z_k_g["k_g"])),
314
+ },
315
+ dims=["param", "layer_k_g"],
316
+ )
317
+ param_z_k_g_ds = param_z_da.to_dataset(dim="param")
318
+
319
+ # Create dataset for depth-specific temperature parameters (Tg)
320
+ param_z_Tg = {
321
+ key: value
322
+ for key, value in param_dict.items()
323
+ if key not in param and "Tg" in key
324
+ }
325
+
326
+ if "Tg" in param_z_Tg:
327
+ param_z_da = xr.DataArray(
328
+ list(param_z_Tg.values()),
329
+ coords={
330
+ "param": (list(param_z_Tg.keys())),
331
+ "layer_Tg": range(len(param_z_Tg["Tg"])),
332
+ },
333
+ dims=["param", "layer_Tg"],
334
+ )
335
+ param_z_Tg_ds = param_z_da.to_dataset(dim="param")
336
+ else:
337
+ param_z_Tg_ds = None
338
+
339
+ # ==================================================================
340
+ # Merging datasets and saving to HDF5
341
+ # ==================================================================
342
+
343
+ # Merge parameter datasets, excluding None values
344
+ datasets_to_merge = [
345
+ ds
346
+ for ds in [
347
+ param_ds,
348
+ pipe_pos_ds,
349
+ r_out_ds,
350
+ param_z_k_s_ds,
351
+ param_z_k_g_ds,
352
+ param_z_Tg_ds,
353
+ ]
354
+ if ds
355
+ ]
356
+ param_ds_fromds = xr.merge(datasets_to_merge)
357
+
358
+ # Merge results datasets
359
+ Results_ds_fromds = xr.merge(
360
+ [Results_ds_t, Results_ds_tzseg, Results_ds_tzp, Results_ds_zseg]
361
+ )
362
+
363
+ results_file = outpath.with_name(outpath.stem + "_SR.h5")
364
+
365
+ # Save merged datasets to HDF5 file
366
+ Results_ds_fromds.to_netcdf(results_file, group="results", engine="h5netcdf")
367
+ param_ds_fromds.to_netcdf(
368
+ results_file, group="parameters", engine="h5netcdf", mode="a"
369
+ )
370
+
371
+
372
+ def process_variable_values(
373
+ results: Any, var_names: list[str], sample_da_shape: np.ndarray
374
+ ) -> np.ndarray:
375
+ """
376
+ Process variable values from Monte Carlo simulation results into a structured sample array.
377
+
378
+ This function extracts variables from a results object and constructs a
379
+ NumPy array that conforms to a given sample shape. Each variable's values
380
+ are expanded and stacked to ensure consistent dimensionality.
381
+
382
+ Parameters
383
+ ----------
384
+ results : object
385
+ Results from a Monte Carlo simulation run, containing attributes
386
+ corresponding to variable names.
387
+ var_names : list of str
388
+ List of variable names to extract from the results object. These
389
+ will form one dimension of the output array.
390
+ sample_da_shape : np.ndarray
391
+ Template array specifying the desired shape of the sample. Used to
392
+ ensure consistency in the output array dimensions.
393
+
394
+ Returns
395
+ -------
396
+ np.ndarray
397
+ Array containing variable values stacked according to the template
398
+ shape. Missing variables are filled with `None` entries.
399
+
400
+ Notes
401
+ -----
402
+ - Each variable is expanded along new axes if needed to match the sample array's dimensionality.
403
+ - Variables are stacked along the first axis in the order of `var_names`.
404
+ """
405
+ # Initialize the sample array with the desired shape.
406
+ # This will be used to ensure all extracted data conforms to the same structure.
407
+ sample_da = sample_da_shape
408
+
409
+ # Iterate over each variable name to extract corresponding values from the results.
410
+ for var in var_names:
411
+ var_values = [] # List to store values for the current variable across Monte Carlo runs.
412
+
413
+ # Extract values for the current variable from the results object.
414
+ # If the variable is not present, append None.
415
+ singlerun = results
416
+ var_value = getattr(singlerun, var, None)
417
+
418
+ if var_value is not None:
419
+ var_values.append(var_value) # Append the variable's value if found.
420
+ else:
421
+ var_values.append(None) # Append None if the variable is not found.
422
+
423
+ # Convert the list of values to a numpy array for consistent processing.
424
+ var_values = np.array(var_values)
425
+
426
+ # Expand dimensions of the variable values array to match the dimensions of the sample array.
427
+ while var_values.ndim < sample_da.ndim:
428
+ var_values = np.expand_dims(var_values, axis=0)
429
+
430
+ # Stack the variable values into the sample array.
431
+ # If the sample array is empty, initialize it with the current variable values.
432
+ if sample_da.size == 0:
433
+ sample_da = var_values
434
+ else:
435
+ sample_da = np.vstack((sample_da, var_values))
436
+
437
+ return sample_da
438
+
439
+
440
+ def save_MCrun_results(
441
+ config: SingleRunConfig, param_dict: dict, results: list[Any], outpath: Path
442
+ ) -> None:
443
+ """
444
+ Save the results of a Monte Carlo model run to an HDF5 file.
445
+
446
+ The function organizes the results and input parameters into
447
+ structured xarray datasets, grouped by their dimensions, and
448
+ writes them to the specified HDF5 file.
449
+
450
+ Parameters
451
+ ----------
452
+ config : SingleRunConfig
453
+ Configuration object containing model settings and metadata.
454
+ param_dict : dict
455
+ Dictionary containing locked and variable input parameters
456
+ along with their values, as specified in the configuration.
457
+ results : list[Any]
458
+ List of results from Monte Carlo simulation runs. Each element
459
+ is a results object containing simulation outputs for one run.
460
+ outpath : Path
461
+ Path to the directory and file name for saving the HDF5 output.
462
+
463
+ Returns
464
+ -------
465
+ None
466
+ The function writes the datasets to an HDF5 file and does not return any value.
467
+
468
+ Notes
469
+ -----
470
+ - Each Monte Carlo sample is processed individually, and results are
471
+ concatenated along a 'samples' dimension.
472
+ - Input parameters are grouped into variable and locked parameters,
473
+ and datasets are created for parameters with special dimensions
474
+ such as pipe-specific or layer-specific values.
475
+ - The final HDF5 file contains two main groups: "results" and "parameters".
476
+ """
477
+ # List to store datasets for all Monte Carlo samples
478
+ Results_datasets = []
479
+
480
+ # Create sample and time coordinate values for all DataArrays
481
+ n_samples = len(results)
482
+ sample_coord = range(n_samples)
483
+ time_coord = results[0].gethours()
484
+
485
+ for sample in sample_coord:
486
+ # ================================================================
487
+ # Results: Variables with [variables, samples, time] dimensions
488
+ # ================================================================
489
+ var_names = results[sample].getResultAttributesTimeseriesScalar()
490
+
491
+ # Create DataArray for the above defined var_names
492
+ # Sample_da array shape must be the same shape as the one that will be created from var values, except for the first dim
493
+ sample_da_shape = np.empty((0, sample, 0), dtype=object)
494
+ sample_da = process_variable_values(results[sample], var_names, sample_da_shape)
495
+ Results_da_st = xr.DataArray(
496
+ sample_da,
497
+ coords={"variables": var_names, "samples": [sample], "time": time_coord},
498
+ dims=["variables", "samples", "time"],
499
+ )
500
+ Results_da_st = Results_da_st.rename("Results_st")
501
+
502
+ Results_ds_st = Results_da_st.to_dataset(dim="variables")
503
+
504
+ # ================================================================
505
+ # Results: Variables with [variables, samples, time, zseg] dimensions
506
+ # ================================================================
507
+ var_names = results[sample].getResultAttributesTimeseriesDepthseg()
508
+
509
+ # Create coordinate values for the DataArray
510
+ zseg_coord = results[sample].getzseg()
511
+
512
+ # Create DataArray for the above defined var_names
513
+ # Sample_da array shape must be the same shape as the one that will be created from var values, except for the first dim
514
+ sample_da_shape = np.empty((0, sample, len(time_coord), 0), dtype=object)
515
+ sample_da = process_variable_values(results[sample], var_names, sample_da_shape)
516
+ Results_da_stzseg = xr.DataArray(
517
+ sample_da,
518
+ coords={"variables": var_names, "samples": [sample], "time": time_coord},
519
+ dims=["variables", "samples", "time", "zseg"],
520
+ )
521
+ Results_da_stzseg = Results_da_stzseg.rename("Results_stzseg")
522
+ Results_ds_stzseg = Results_da_stzseg.to_dataset(dim="variables")
523
+
524
+ # Create coordinate values for the DataArray
525
+ pipes_coord = range(results[sample].get_n_pipes())
526
+
527
+ # ================================================================
528
+ # Results: Variables with [variables, samples, time, z, nPipes] dimensions
529
+ # ================================================================
530
+ var_names = results[sample].getResultAttributesTimeserieDepth()
531
+
532
+ # Create coordinate values for the DataArray
533
+ z_coord = results[sample].getz()
534
+
535
+ # Create DataArray for the above defined var_names
536
+ # Sample_da array shape must be the same shape as the one that will be created from var values, except for the first dim
537
+ sample_da_shape = np.empty((0, sample, len(time_coord), 0, 0), dtype=object)
538
+ sample_da = process_variable_values(results[sample], var_names, sample_da_shape)
539
+ Results_da_stz = xr.DataArray(
540
+ sample_da,
541
+ coords={
542
+ "variables": var_names,
543
+ "samples": [sample],
544
+ "time": time_coord,
545
+ "nPipes": pipes_coord,
546
+ },
547
+ dims=["variables", "samples", "time", "z", "nPipes"],
548
+ )
549
+ Results_da_stz = Results_da_stz.rename("Results_stz")
550
+ Results_ds_stz = Results_da_stz.to_dataset(dim="variables")
551
+
552
+ # ================================================================
553
+ # Interpolated subsurface variables with [variables, samples, zseg] dimensions
554
+ # ================================================================
555
+ # create dataset from results with dim(len(resnames2), nsamples, zseg)
556
+ # Variables with zseg dims
557
+ var_names = results[sample].getResultAttributesDepthseg()
558
+
559
+ # Create DataArray for the above defined var_names
560
+ # Sample_da array shape must be the same shape as the one that will be created from var values, except for the first dim
561
+ sample_da_shape = np.empty((0, sample, 0), dtype=object)
562
+ sample_da = process_variable_values(results[sample], var_names, sample_da_shape)
563
+
564
+ Results_da_zsegk = xr.DataArray(
565
+ sample_da,
566
+ coords={"variables": var_names, "samples": [sample]},
567
+ dims=["variables", "samples", "zseg"],
568
+ )
569
+ Results_da_zsegk = Results_da_zsegk.rename("Results_zsegk")
570
+ Results_ds_zsegk = Results_da_zsegk.to_dataset(dim="variables")
571
+
572
+ # ==================================================================
573
+ # Ceate datasets for the coordinates that have a variable length over the samples
574
+ # These coordinates are not stored for the variables themselves (they have dimensions without coordinates) but are stored here seperately
575
+ # ==================================================================
576
+
577
+ # dim nresnames, nsamples, zseg
578
+ sample_da_shape = np.empty((0, sample, len(zseg_coord)), dtype=object)
579
+ sample_da = process_variable_values(results[sample], ["zseg"], sample_da_shape)
580
+ Results_da_zseg = xr.DataArray(
581
+ sample_da,
582
+ coords={"variables": ["zseg_coord"], "samples": [sample]},
583
+ dims=["variables", "samples", "zseg"],
584
+ )
585
+ Results_da_zseg = Results_da_zseg.rename("Results_zseg")
586
+ Results_ds_zseg = Results_da_zseg.to_dataset(dim="variables")
587
+
588
+ # dim nresnames, nsamples, z
589
+ sample_da_shape = np.empty((0, sample, len(z_coord)), dtype=object)
590
+ sample_da = process_variable_values(results[sample], ["z"], sample_da_shape)
591
+ Results_da_z = xr.DataArray(
592
+ sample_da,
593
+ coords={"variables": ["z_coord"], "samples": [sample]},
594
+ dims=["variables", "samples", "z"],
595
+ )
596
+ Results_da_z = Results_da_z.rename("Results_z")
597
+ Results_ds_z = Results_da_z.to_dataset(dim="variables")
598
+
599
+ # ================================================================
600
+ # Merge all datasets for the current sample
601
+ # ================================================================
602
+ Results_ds_fromds = xr.merge(
603
+ [
604
+ Results_ds_st,
605
+ Results_ds_stzseg,
606
+ Results_ds_stz,
607
+ Results_ds_zseg,
608
+ Results_ds_z,
609
+ Results_ds_zsegk,
610
+ ]
611
+ )
612
+ Results_datasets.append(Results_ds_fromds)
613
+
614
+ # Concatenate datasets from all samples along the 'samples' dimension
615
+ Results_ds_total = xr.concat(Results_datasets, dim="samples")
616
+
617
+ # ================================================================
618
+ # Process input parameters
619
+ # ================================================================
620
+
621
+ # Retrieve input parameters names, split in locked and variable parameters
622
+ variable_param_names, locked_param_names = get_param_names(config)
623
+
624
+ # Create two dictionaries with the parameter values, one with the locked_param and one with the variable_param
625
+ variable_param_dict = {key: param_dict[key] for key in variable_param_names}
626
+ locked_param_dict = {key: param_dict[key] for key in locked_param_names}
627
+
628
+ # Variable parameters: Create dataset with [param, samples] dimensions
629
+ variable_param_da = xr.DataArray(
630
+ list(variable_param_dict.values()),
631
+ coords={"param": list(variable_param_dict.keys()), "samples": sample_coord},
632
+ dims=["param", "samples"],
633
+ )
634
+ variable_param_ds = variable_param_da.to_dataset(dim="param")
635
+
636
+ # Locked parameters: Extract the first value for each parameter (assumes constant values)
637
+ locked_param_values = list(locked_param_dict.values())
638
+ single_locked_param_values = [sublist[0] for sublist in locked_param_values]
639
+ locked_param_single_dict = dict(
640
+ zip(locked_param_dict.keys(), single_locked_param_values)
641
+ )
642
+
643
+ # Convert ndarray values to lists in locked_param_single_dict
644
+ for key, value in locked_param_single_dict.items():
645
+ if isinstance(value, np.ndarray):
646
+ locked_param_single_dict[key] = value.tolist()
647
+
648
+ locked_param = {
649
+ key: value
650
+ for key, value in locked_param_single_dict.items()
651
+ if not isinstance(value, list)
652
+ }
653
+ locked_param_ds = xr.Dataset(locked_param)
654
+
655
+ # Third, create a dataset from locked parameters with dim(len(parnames), npipes)
656
+ # Param below stored in seperate datasets with pipe dimensions and x,y dimensions in the borehole
657
+ # These datasets will further on be merged together for all parameters
658
+ locked_param_x = {
659
+ key: value
660
+ for key, value in locked_param_single_dict.items()
661
+ if isinstance(value, list) and (len(value) == len(pipes_coord))
662
+ }
663
+
664
+ pipe_pos = np.asarray(locked_param_x["pos"])
665
+ r_out = locked_param_x["r_out"]
666
+
667
+ pipe_pos_ds = xr.Dataset(
668
+ data_vars=dict(pipe_pos=(["nPipes", "xy"], pipe_pos)),
669
+ coords=dict(nPipes=(["nPipes"], pipes_coord)),
670
+ )
671
+ r_out_ds = xr.Dataset(
672
+ data_vars=dict(r_out=(["nPipes"], r_out)),
673
+ coords=dict(nPipes=(["nPipes"], pipes_coord)),
674
+ )
675
+
676
+ # Then, create dataset from locked parameters with dim(len(parnames), nlayers_k_s)
677
+ locked_param_z_k_s = {
678
+ key: value
679
+ for key, value in locked_param_single_dict.items()
680
+ if key not in locked_param and "k_s" in key
681
+ }
682
+ locked_param_z_k_s_da = xr.DataArray(
683
+ list(locked_param_z_k_s.values()),
684
+ coords={
685
+ "param": (list(locked_param_z_k_s.keys())),
686
+ "layer_k_s": range(len(locked_param_z_k_s["k_s"])),
687
+ },
688
+ dims=["param", "layer_k_s"],
689
+ )
690
+ locked_param_z_k_s_ds = locked_param_z_k_s_da.to_dataset(dim="param")
691
+
692
+ # Ceate dataset from locked parameters with dim(len(parnames), nlayers_k_g)
693
+ locked_param_z_k_g = {
694
+ key: value
695
+ for key, value in locked_param_single_dict.items()
696
+ if key not in locked_param and "k_g" in key
697
+ }
698
+ locked_param_z_k_g_da = xr.DataArray(
699
+ list(locked_param_z_k_g.values()),
700
+ coords={
701
+ "param": (list(locked_param_z_k_g.keys())),
702
+ "layer_k_g": range(len(locked_param_z_k_g["k_g"])),
703
+ },
704
+ dims=["param", "layer_k_g"],
705
+ )
706
+ locked_param_z_k_g_ds = locked_param_z_k_g_da.to_dataset(dim="param")
707
+
708
+ # Create dataset from locked parameters with dim(len(parnames), nlayers_Tg)
709
+ locked_param_z_Tg = {
710
+ key: value
711
+ for key, value in locked_param_single_dict.items()
712
+ if key not in locked_param and "Tg" in key
713
+ }
714
+
715
+ if "Tg" in locked_param_z_Tg:
716
+ locked_param_z_Tg_da = xr.DataArray(
717
+ list(locked_param_z_Tg.values()),
718
+ coords={
719
+ "param": (list(locked_param_z_Tg.keys())),
720
+ "layer_Tg": range(len(locked_param_z_Tg["Tg"])),
721
+ },
722
+ dims=["param", "layer_Tg"],
723
+ )
724
+ locked_param_z_Tg_ds = locked_param_z_Tg_da.to_dataset(dim="param")
725
+ else:
726
+ locked_param_z_Tg_ds = None
727
+
728
+ # ==================================================================
729
+ # Merge different results and parameter dataset and convert to h5 files
730
+ # ==================================================================
731
+ # Merge datasets, excluding None values
732
+ datasets_to_merge = [
733
+ ds
734
+ for ds in [
735
+ variable_param_ds,
736
+ locked_param_ds,
737
+ pipe_pos_ds,
738
+ r_out_ds,
739
+ locked_param_z_k_s_ds,
740
+ locked_param_z_k_g_ds,
741
+ locked_param_z_Tg_ds,
742
+ ]
743
+ if ds
744
+ ]
745
+ param_ds_fromds = xr.merge(datasets_to_merge)
746
+
747
+ # ================================================================
748
+ # Save datasets to HDF5 file
749
+ # ================================================================
750
+ results_file = outpath.with_name(outpath.name + "_MC.h5")
751
+
752
+ Results_ds_total.to_netcdf(results_file, group="results", engine="h5netcdf")
753
+ param_ds_fromds.to_netcdf(
754
+ results_file, group="parameters", engine="h5netcdf", mode="a"
755
+ )
756
+
757
+ return
758
+
759
+
760
+ def apply_smoothing(
761
+ df: pd.DataFrame,
762
+ column: str,
763
+ smoothing: int | str | None = None,
764
+ outdir: Path | None = None,
765
+ prefix: str = "data",
766
+ ) -> pd.DataFrame:
767
+ """
768
+ Apply smoothing to a timeseries column in a dataframe.
769
+
770
+ Parameters
771
+ ----------
772
+ df : pd.DataFrame
773
+ Input dataframe (must contain `column`, may contain `local_time`).
774
+ column : str
775
+ Column name to smooth (e.g. "m_flow" or "Q").
776
+ smoothing : int | str | None
777
+ - int: rolling average window (in samples, e.g. hours).
778
+ - "D": daily average (requires `local_time`).
779
+ - "M": monthly average (requires `local_time`).
780
+ - "none" or None: no smoothing.
781
+ outdir : Path, optional
782
+ Directory to save smoothed CSV (for "D" or "M").
783
+ prefix : str
784
+ Prefix for the output filename ("flow" or "load").
785
+
786
+ Returns
787
+ -------
788
+ pd.DataFrame
789
+ DataFrame with smoothed column.
790
+
791
+ Raises
792
+ ------
793
+ ValueError
794
+ If `smoothing` is "D" or "M" and `local_time` column is missing.
795
+ If `smoothing` is an unsupported string.
796
+ """
797
+ if smoothing is None or str(smoothing).lower() == "none":
798
+ return df # nothing to do
799
+
800
+ df = df.copy()
801
+
802
+ # Rolling smoothing by numeric window
803
+ if isinstance(smoothing, int):
804
+ df[column] = (
805
+ df[column].rolling(window=smoothing, min_periods=1).mean().ffill().bfill()
806
+ )
807
+ return df
808
+
809
+ # Daily / Monthly smoothing → requires local_time
810
+ if isinstance(smoothing, str):
811
+ if "local_time" not in df.columns:
812
+ raise ValueError(
813
+ f"Smoothing='{smoothing}' requires a 'local_time' column in the input table."
814
+ )
815
+
816
+ df["date"] = pd.to_datetime(df["local_time"], format="mixed", dayfirst=True)
817
+ df["day"] = df["date"].dt.day
818
+ df["month"] = df["date"].dt.month
819
+ df["year"] = df["date"].dt.year
820
+
821
+ if smoothing.upper() == "M":
822
+ df[column] = df[column].groupby(df["month"]).transform("mean")
823
+ if outdir:
824
+ df.to_csv(outdir / f"{prefix}_monthly.csv", index=False)
825
+
826
+ elif smoothing.upper() == "D":
827
+ df[column] = (
828
+ df[column]
829
+ .groupby([df["year"], df["month"], df["day"]])
830
+ .transform("mean")
831
+ )
832
+ if outdir:
833
+ df.to_csv(outdir / f"{prefix}_daily.csv", index=False)
834
+
835
+ else:
836
+ raise ValueError(
837
+ f"Unsupported smoothing option '{smoothing}'. "
838
+ "Use int (hours), 'D' (daily), 'M' (monthly), or None."
839
+ )
840
+
841
+ return df