hydroBayesCal 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. hydroBayesCal/__init__.py +1 -0
  2. hydroBayesCal/delft3d/__init__.py +5 -0
  3. hydroBayesCal/delft3d/control_delft3d.py +106 -0
  4. hydroBayesCal/doepy/DOE_functions.py +522 -0
  5. hydroBayesCal/doepy/__init__.py +0 -0
  6. hydroBayesCal/doepy/doe_control.py +177 -0
  7. hydroBayesCal/doepy/pyDOE_corrected.py +339 -0
  8. hydroBayesCal/function_pool.py +885 -0
  9. hydroBayesCal/hysim.py +674 -0
  10. hydroBayesCal/openfoam/__init__.py +1 -0
  11. hydroBayesCal/openfoam/control_openfoam.py +1002 -0
  12. hydroBayesCal/plots/__init__.py +0 -0
  13. hydroBayesCal/plots/plots.py +3679 -0
  14. hydroBayesCal/plots/plots_config.py +76 -0
  15. hydroBayesCal/surrogate/__init__.py +10 -0
  16. hydroBayesCal/surrogate/bal_functions.py +856 -0
  17. hydroBayesCal/surrogate/exploration.py +389 -0
  18. hydroBayesCal/surrogate/gpe_gpytorch.py +1029 -0
  19. hydroBayesCal/surrogate/gpe_skl.py +469 -0
  20. hydroBayesCal/telemac/__init__.py +0 -0
  21. hydroBayesCal/telemac/config_telemac.py +61 -0
  22. hydroBayesCal/telemac/control_telemac.py +2396 -0
  23. hydroBayesCal/telemac/pputils/__init__.py +6 -0
  24. hydroBayesCal/telemac/pputils/ppmodules/__init__.py +1 -0
  25. hydroBayesCal/telemac/pputils/ppmodules/readMesh.py +294 -0
  26. hydroBayesCal/telemac/pputils/ppmodules/selafin_io_pp.py +491 -0
  27. hydroBayesCal/telemac/pputils/ppmodules/utilities.py +577 -0
  28. hydroBayesCal/telemac/pputils/ppmodules/writeMesh.py +116 -0
  29. hydroBayesCal/telemac/templates/parameters-gaia.csv +30 -0
  30. hydroBayesCal/telemac/templates/parameters-telemac2d.csv +55 -0
  31. hydroBayesCal/utils/VectrinoPostproc/__init__.py +0 -0
  32. hydroBayesCal/utils/VectrinoPostproc/compute_tke.py +157 -0
  33. hydroBayesCal/utils/VectrinoPostproc/despiking.py +407 -0
  34. hydroBayesCal/utils/VectrinoPostproc/get_ascii_data.py +208 -0
  35. hydroBayesCal/utils/VectrinoPostproc/plot_velocities.py +124 -0
  36. hydroBayesCal/utils/VectrinoPostproc/transformation.py +191 -0
  37. hydroBayesCal/utils/__init__.py +0 -0
  38. hydroBayesCal/utils/config_logging.py +47 -0
  39. hydroBayesCal/utils/config_physics.py +10 -0
  40. hydroBayesCal/utils/configuration.py +59 -0
  41. hydroBayesCal/utils/get_les_mesh_size.py +61 -0
  42. hydroBayesCal/utils/log.py +34 -0
  43. hydrobayescal-0.1.0.dist-info/METADATA +128 -0
  44. hydrobayescal-0.1.0.dist-info/RECORD +47 -0
  45. hydrobayescal-0.1.0.dist-info/WHEEL +5 -0
  46. hydrobayescal-0.1.0.dist-info/licenses/LICENSE +29 -0
  47. hydrobayescal-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1 @@
1
+ """hydroBayesCal: surrogate-assisted Bayesian calibration for hydrodynamic models."""
@@ -0,0 +1,5 @@
1
+ # Delft3D-FLOW bindings for hydroBayesCal (planned -- not yet implemented).
2
+ #
3
+ # This sub-package is a placeholder that defines the intended interface for
4
+ # coupling HydroBayesCal to Delft3D-FLOW (Deltares). See
5
+ # ``control_delft3d.Delft3DModel`` and the ``usage-delft3d`` documentation page.
@@ -0,0 +1,106 @@
1
+ """
2
+ Delft3D-FLOW binding for HydroBayesCal -- **planned, not yet implemented**.
3
+
4
+ This module is a placeholder that mirrors the TELEMAC
5
+ (:mod:`hydroBayesCal.telemac.control_telemac`) and OpenFOAM
6
+ (:mod:`hydroBayesCal.openfoam.control_openfoam`) bindings. It defines the
7
+ intended public interface for coupling HydroBayesCal to the structured-grid
8
+ **Delft3D-FLOW** engine (Deltares) so that the coupling can be implemented
9
+ incrementally without changing the surrogate / Bayesian-active-learning layer.
10
+
11
+ The :class:`Delft3DModel` class subclasses
12
+ :class:`hydroBayesCal.hysim.HydroSimulations`; the Python attribute names are
13
+ shared across solvers, while the *string and file conventions* below are
14
+ Delft3D-specific and must be preserved when the binding is filled in:
15
+
16
+ * ``<case>.mdf`` -- master definition FLOW file (the control file); the engine
17
+ is launched through ``config_d_hydro.xml`` and the ``d_hydro`` executable.
18
+ * Bed roughness via Chézy / Manning / White-Colebrook (``.rgh`` file or
19
+ ``Roughness`` keywords in the ``.mdf``); eddy viscosity/diffusivity
20
+ ``Vicouv`` / ``Dicouv``.
21
+ * ``trim-<case>.dat`` / ``trim-<case>.def`` -- NEFIS map (field) output.
22
+ * ``trih-<case>.dat`` / ``trih-<case>.def`` -- NEFIS history (monitoring-point)
23
+ output.
24
+
25
+ See the :doc:`usage-delft3d <usage-delft3d>` page for the planned workflow.
26
+ """
27
+
28
+ from hydroBayesCal.hysim import HydroSimulations
29
+
30
+ #: Marker so callers / tests can detect that the binding is not ready yet.
31
+ DELFT3D_BINDING_IMPLEMENTED = False
32
+
33
+ _NOT_IMPLEMENTED_MSG = (
34
+ "The Delft3D-FLOW binding is planned but not yet implemented. "
35
+ "Use the TELEMAC (hydroBayesCal.telemac.control_telemac.TelemacModel) or "
36
+ "OpenFOAM (hydroBayesCal.openfoam.control_openfoam.OpenFOAMModel) bindings, "
37
+ "or contribute the Delft3D-FLOW implementation in "
38
+ "hydroBayesCal.delft3d.control_delft3d."
39
+ )
40
+
41
+
42
+ class Delft3DModel(HydroSimulations):
43
+ """
44
+ Placeholder Delft3D-FLOW model wrapper (planned).
45
+
46
+ Defines the intended constructor signature and interface but raises
47
+ :class:`NotImplementedError`. Instantiating it documents the Delft3D-specific
48
+ configuration the binding will need; it does not run a simulation.
49
+
50
+ Parameters
51
+ ----------
52
+ control_file : str
53
+ Master definition FLOW file, default ``"control.mdf"`` (Delft3D-FLOW
54
+ convention ``<case>.mdf``).
55
+ d_hydro_config : str
56
+ Runtime configuration passed to the ``d_hydro`` launcher, default
57
+ ``"config_d_hydro.xml"``.
58
+ flow_executable : str
59
+ Name of the Delft3D-FLOW launcher on ``PATH``, default ``"d_hydro"``.
60
+ roughness_formulation : str
61
+ Bed-roughness law used for the calibration parameters
62
+ (``"Chezy"``, ``"Manning"`` or ``"WhiteColebrook"``).
63
+ map_file_base, history_file_base : str
64
+ Base names of the NEFIS map (``trim-<case>``) and history
65
+ (``trih-<case>``) output files.
66
+ **kwargs
67
+ Common :class:`~hydroBayesCal.hysim.HydroSimulations` parameters
68
+ (``model_dir``, ``res_dir``, ``calibration_pts_file_path``,
69
+ ``calibration_parameters``, ``param_values``, ``calibration_quantities``,
70
+ ``init_runs``, ``max_runs`` ...).
71
+
72
+ Raises
73
+ ------
74
+ NotImplementedError
75
+ Always -- the binding is not implemented yet.
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ control_file="control.mdf",
81
+ d_hydro_config="config_d_hydro.xml",
82
+ flow_executable="d_hydro",
83
+ roughness_formulation="Manning",
84
+ map_file_base="trim",
85
+ history_file_base="trih",
86
+ *args,
87
+ **kwargs,
88
+ ):
89
+ # Keep the Delft3D-specific configuration on the instance so the intended
90
+ # interface is documented, then make the not-yet-implemented state
91
+ # explicit instead of silently constructing a non-functional model.
92
+ self.control_file = control_file
93
+ self.d_hydro_config = d_hydro_config
94
+ self.flow_executable = flow_executable
95
+ self.roughness_formulation = roughness_formulation
96
+ self.map_file_base = map_file_base
97
+ self.history_file_base = history_file_base
98
+ raise NotImplementedError(_NOT_IMPLEMENTED_MSG)
99
+
100
+ def run_multiple_simulations(self, *args, **kwargs):
101
+ """Run the Delft3D-FLOW experimental-design simulations (planned)."""
102
+ raise NotImplementedError(_NOT_IMPLEMENTED_MSG)
103
+
104
+ def output_processing(self, *args, **kwargs):
105
+ """Extract calibration quantities from NEFIS map/history output (planned)."""
106
+ raise NotImplementedError(_NOT_IMPLEMENTED_MSG)
@@ -0,0 +1,522 @@
1
+ """
2
+ Core DOE functions
3
+ """
4
+ from pyDOE import *
5
+ from hydroBayesCal.doepy.pyDOE_corrected import *
6
+ #from diversipy import *
7
+ import pandas as _pd
8
+ import numpy as _np
9
+
10
+
11
+ def construct_df(x, r):
12
+ """Construct a DataFrame from a numpy array generated by PyDOE function and individual lists
13
+
14
+ :param x: data
15
+ :param r:
16
+ :return: pandas.DataFrame
17
+ """
18
+ df = _pd.DataFrame(data=x, dtype='float32')
19
+ for i in df.index:
20
+ for j in range(len(list(df.iloc[i]))):
21
+ df.iloc[i][j] = r[j][int(df.iloc[i][j])]
22
+ return df
23
+
24
+
25
+ def construct_df_from_matrix(x, factor_array):
26
+ """
27
+ Construct a DataFrame from a matrix with floating point numbers between -1 and +1. Both inputs x
28
+ and factor_array are assumed to be numpy arrays. Numbers in x (which is output of a design-of-experiment
29
+ build) are projected onto the factor array ranges. factor_array is assumed to have only min and max ranges.
30
+ The array x is assumed to have numbers ranging from -1 to 1.
31
+
32
+ :param _np.array x: output of a design-of-experiment build
33
+ :param _np.array factor_array: only min and max ranges
34
+ :return: pandas.DataFrame
35
+ """
36
+
37
+ row_num = x.shape[0] # Number of rows in the matrix x
38
+ col_num = x.shape[1] # Number of columns in the matrix x
39
+
40
+ empty = _np.zeros((row_num, col_num))
41
+
42
+ def simple_substitution(idx, factor_list):
43
+ if idx == -1:
44
+ return factor_list[0]
45
+ elif idx == 0:
46
+ return factor_list[1]
47
+ elif idx == 1:
48
+ return factor_list[2]
49
+ else:
50
+ alpha = _np.abs(factor_list[2] - factor_list[0]) / 2
51
+ if idx < 0:
52
+ beta = _np.abs(idx) - 1
53
+ return factor_list[0] - (beta * alpha)
54
+ else:
55
+ beta = idx - 1
56
+ return factor_list[2] + (beta * alpha)
57
+
58
+ for i in range(row_num):
59
+ for j in range(col_num):
60
+ empty[i, j] = simple_substitution(x[i, j], factor_array[j])
61
+
62
+ return _pd.DataFrame(data=empty)
63
+
64
+
65
+ def construct_df_from_random_matrix(x, factor_array):
66
+ """
67
+ This function constructs a DataFrame out of matrix x and factor_array, both of which are assumed to be numpy arrays.
68
+ It projects the numbers in the x (which is output of a design-of-experiment build) to the factor array ranges.
69
+ Here factor_array is assumed to have only min and max ranges.
70
+ Matrix x is assumed to have numbers ranging from 0 to 1 only.
71
+
72
+ :param _np.array x: output of a design-of-experiment build
73
+ :param _np.array factor_array: only min and max ranges
74
+ :return: pandas.DataFrame
75
+ """
76
+
77
+ row_num = x.shape[0] # Number of rows in the matrix x
78
+ col_num = x.shape[1] # Number of columns in the matrix x
79
+
80
+ empty = _np.zeros((row_num, col_num))
81
+
82
+ def simple_substitution(idx, factor_list):
83
+ alpha = _np.abs(factor_list[1] - factor_list[0])
84
+ beta = idx
85
+ return factor_list[0] + (beta * alpha)
86
+
87
+ for i in range(row_num):
88
+ for j in range(col_num):
89
+ empty[i, j] = simple_substitution(x[i, j], factor_array[j])
90
+
91
+ return _pd.DataFrame(data=empty)
92
+
93
+
94
+ def build_full_fact(factor_level_ranges):
95
+ """
96
+ Builds a full factorial design dataframe from a dictionary of factor/level ranges
97
+
98
+ :param dict factor_level_ranges: Something like {'Pressure':[50,60,70],'Temperature':[290, 320, 350],'Flow rate':[0.9,1.0]}
99
+ """
100
+
101
+ factor_lvl_count = []
102
+ factor_lists = []
103
+
104
+ for key in factor_level_ranges:
105
+ factor_lvl_count.append(len(factor_level_ranges[key]))
106
+ factor_lists.append(factor_level_ranges[key])
107
+
108
+ x = fullfact_corrected(factor_lvl_count)
109
+ df = construct_df(x, factor_lists)
110
+ df.columns = factor_level_ranges.keys()
111
+
112
+ return df
113
+
114
+
115
+ def build_frac_fact(factor_level_ranges, gen_string):
116
+ """
117
+ Builds a 2-level fractional factorial DataFrame from a dictionary and a generator string
118
+ Only min and max values of the range are required.
119
+ Example of the dictionary:
120
+ {'Pressure':[50,70],'Temperature':[290, 350],'Flow rate':[0.9,1.0]}
121
+
122
+ This function requires a little more knowledge of how the confounding will be allowed.
123
+ This means that some factor effects get muddled with other interaction effects, so it’s harder to distinguish between them).
124
+
125
+ Let’s assume that we just can’t afford (for whatever reason) the number of runs in a full-factorial design. We can systematically decide on a fraction of the full-factorial by allowing some of the factor main effects to be confounded with other factor interaction effects.
126
+ This is done by defining an alias structure that defines, symbolically, these interactions. These alias structures are written like “C = AB” or “I = ABC”, or “AB = CD”, etc.
127
+ These define how one column is related to the others.
128
+
129
+ EXAMPLE
130
+ ------------
131
+ For example, the alias “C = AB” or “I = ABC” indicate that there are three factors (A, B, and C) and that the main effect of factor C is confounded with the interaction effect of the product AB, and by extension, A is confounded with BC and B is confounded with AC.
132
+ A full- factorial design with these three factors results in a design matrix with 8 runs, but we will assume that we can only afford 4 of those runs.
133
+ To create this fractional design, we need a matrix with three columns, one for A, B, and C, only now where the levels in the C column is created by the product of the A and B columns.
134
+ """
135
+
136
+ factor_count = len(factor_level_ranges)
137
+ factor_lists = []
138
+
139
+ for key in factor_level_ranges:
140
+ if len(factor_level_ranges[key]) != 2:
141
+ factor_level_ranges[key][1] = factor_level_ranges[key][-1]
142
+ factor_level_ranges[key] = factor_level_ranges[key][:2]
143
+ print(f"{key} had more than two levels. Assigning the end point to the high level.")
144
+
145
+ if factor_count != len(gen_string.split(' ')):
146
+ print(
147
+ "Length of the generator string for the fractional factorial build does not match the length of the process variables dictionary")
148
+ return None
149
+
150
+ for key in factor_level_ranges:
151
+ factor_lists.append(factor_level_ranges[key])
152
+
153
+ x = fracfact(gen_string)
154
+
155
+ def index_change(x):
156
+ if x == -1:
157
+ return 0
158
+ else:
159
+ return x
160
+
161
+ vfunc = _np.vectorize(index_change)
162
+ x = vfunc(x)
163
+
164
+ df = construct_df(x, factor_lists)
165
+ df.columns = factor_level_ranges.keys()
166
+
167
+ return df
168
+
169
+
170
+ def build_plackett_burman(factor_level_ranges):
171
+ """
172
+ Builds a Plackett-Burman dataframe from a dictionary of factor/level ranges.
173
+ Only min and max values of the range are required.
174
+ Example of the dictionary:
175
+ {'Pressure':[50,70],'Temperature':[290, 350],'Flow rate':[0.9,1.0]}
176
+
177
+ Plackett–Burman designs are experimental designs presented in 1946 by Robin L. Plackett and J. P. Burman while working in the British Ministry of Supply.(Their goal was to find experimental designs for investigating the dependence of some measured quantity on a number of independent variables (factors), each taking L levels, in such a way as to minimize the variance of the estimates of these dependencies using a limited number of experiments.
178
+
179
+ Interactions between the factors were considered negligible. The solution to this problem is to find an experimental design where each combination of levels for any pair of factors appears the same number of times, throughout all the experimental runs (refer to table).
180
+ A complete factorial design would satisfy this criterion, but the idea was to find smaller designs.
181
+
182
+ These designs are unique in that the number of trial conditions (rows) expands by multiples of four (e.g. 4, 8, 12, etc.).
183
+ The max number of columns allowed before a design increases the number of rows is always one less than the next higher multiple of four.
184
+ """
185
+
186
+ for key in factor_level_ranges:
187
+ if len(factor_level_ranges[key]) != 2:
188
+ factor_level_ranges[key][1] = factor_level_ranges[key][-1]
189
+ factor_level_ranges[key] = factor_level_ranges[key][:2]
190
+ print(f"{key} had more than two levels. Assigning the end point to the high level.")
191
+
192
+ factor_count = len(factor_level_ranges)
193
+ factor_lists = []
194
+
195
+ for key in factor_level_ranges:
196
+ factor_lists.append(factor_level_ranges[key])
197
+
198
+ x = pbdesign(factor_count)
199
+
200
+ def index_change(x):
201
+ if x == -1:
202
+ return 0
203
+ else:
204
+ return x
205
+
206
+ vfunc = _np.vectorize(index_change)
207
+ x = vfunc(x)
208
+
209
+ df = construct_df(x, factor_lists)
210
+ df.columns = factor_level_ranges.keys()
211
+
212
+ return df
213
+
214
+
215
+ def build_sukharev(factor_level_ranges, num_samples=None):
216
+ """
217
+ Builds a Sukharev-grid hypercube design dataframe from a dictionary of factor/level ranges.
218
+ Number of samples raised to the power of (1/dimension), where dimension is the number of variables, must be an integer.
219
+ Only min and max values of the range are required.
220
+ Example of the dictionary:
221
+ {'Pressure':[50,70],'Temperature':[290, 350],'Flow rate':[0.9,1.0]}
222
+ num_samples: Number of samples to be generated
223
+
224
+ Special property of this grid is that points are not placed on the boundaries of the hypercube, but at centroids of the subcells constituted by individual samples.
225
+ This design offers optimal results for the covering radius regarding distances based on the max-norm.
226
+ """
227
+ for key in factor_level_ranges:
228
+ if len(factor_level_ranges[key]) != 2:
229
+ factor_level_ranges[key][1] = factor_level_ranges[key][-1]
230
+ factor_level_ranges[key] = factor_level_ranges[key][:2]
231
+ print(f"{key} had more than two levels. Assigning the end point to the high level.")
232
+
233
+ factor_count = len(factor_level_ranges)
234
+ factor_lists = []
235
+
236
+ for key in factor_level_ranges:
237
+ factor_lists.append(factor_level_ranges[key])
238
+
239
+ check = num_samples ** (1 / factor_count)
240
+ if (check - int(check)) > 1e-5:
241
+ num_samples = (int(check) + 1) ** factor_count
242
+ print("\nNumber of samples not adequate to fill a Sukharev grid. Increasing sample size to: ", num_samples)
243
+
244
+ x = sukharev_grid(num_points=num_samples, dimension=factor_count)
245
+ factor_lists = _np.array(factor_lists)
246
+
247
+ df = construct_df_from_random_matrix(x, factor_lists)
248
+ df.columns = factor_level_ranges.keys()
249
+ return df
250
+
251
+
252
+ def build_box_behnken(factor_level_ranges, center=1):
253
+ """
254
+ Builds a Box-Behnken design dataframe from a dictionary of factor/level ranges.
255
+ Note 3 levels of factors are necessary. If not given, the function will automatically create 3 levels by linear mid-section method.
256
+ Example of the dictionary:
257
+ {'Pressure':[50,60,70],'Temperature':[290, 320, 350],'Flow rate':[0.9,1.0,1.1]}
258
+
259
+ In statistics, Box–Behnken designs are experimental designs for response surface methodology, devised by George E. P. Box and Donald Behnken in 1960, to achieve the following goals:
260
+ * Each factor, or independent variable, is placed at one of three equally spaced values, usually coded as −1, 0, +1. (At least three levels are needed for the following goal.)
261
+ * The design should be sufficient to fit a quadratic model, that is, one containing squared terms, products of two factors, linear terms and an intercept.
262
+ * The ratio of the number of experimental points to the number of coefficients in the quadratic model should be reasonable (in fact, their designs kept it in the range of 1.5 to 2.6).*estimation variance should more or less depend only on the distance from the centre (this is achieved exactly for the designs with 4 and 7 factors), and should not vary too much inside the smallest (hyper)cube containing the experimental points.
263
+ """
264
+
265
+ for key in factor_level_ranges:
266
+ if len(factor_level_ranges[key]) == 2:
267
+ factor_level_ranges[key].append((factor_level_ranges[key][0] + factor_level_ranges[key][1]) / 2)
268
+ factor_level_ranges[key].sort()
269
+ print(f"{key} had only two end points. Creating a mid-point by averaging them")
270
+
271
+ factor_count = len(factor_level_ranges)
272
+ factor_lists = []
273
+
274
+ for key in factor_level_ranges:
275
+ factor_lists.append(factor_level_ranges[key])
276
+
277
+ x = bbdesign_corrected(factor_count, center=center)
278
+ x = x + 1 # Adjusting the index up by 1
279
+
280
+ df = construct_df(x, factor_lists)
281
+ df.columns = factor_level_ranges.keys()
282
+
283
+ return df
284
+
285
+
286
+ def build_central_composite(factor_level_ranges, center=(2, 2), alpha='o', face='ccc'):
287
+ """
288
+ Builds a central-composite design dataframe from a dictionary of factor/level ranges.
289
+ Only min and max values of the range are required.
290
+ Example of the dictionary:
291
+ {'Pressure':[50,70],'Temperature':[290, 350],'Flow rate':[0.9,1.0]}
292
+
293
+ In statistics, a central composite design is an experimental design, useful in response surface methodology, for building a second order (quadratic) model for the response variable without needing to use a complete three-level factorial experiment.
294
+ The design consists of three distinct sets of experimental runs:
295
+ * A factorial (perhaps fractional) design in the factors studied, each having two levels;
296
+ * A set of center points, experimental runs whose values of each factor are the medians of the values used in the factorial portion. This point is often replicated in order to improve the precision of the experiment;
297
+ * A set of axial points, experimental runs identical to the centre points except for one factor, which will take on values both below and above the median of the two factorial levels, and typically both outside their range. All factors are varied in this way.
298
+ """
299
+ for key in factor_level_ranges:
300
+ if len(factor_level_ranges[key]) != 2:
301
+ factor_level_ranges[key][1] = factor_level_ranges[key][-1]
302
+ factor_level_ranges[key] = factor_level_ranges[key][:2]
303
+ print(f"{key} had more than two levels. Assigning the end point to the high level.")
304
+
305
+ # Creates the mid-points by averaging the low and high levels
306
+ for key in factor_level_ranges:
307
+ if len(factor_level_ranges[key]) == 2:
308
+ factor_level_ranges[key].append((factor_level_ranges[key][0] + factor_level_ranges[key][1]) / 2)
309
+ factor_level_ranges[key].sort()
310
+
311
+ factor_count = len(factor_level_ranges)
312
+ factor_lists = []
313
+
314
+ for key in factor_level_ranges:
315
+ factor_lists.append(factor_level_ranges[key])
316
+
317
+ x = ccdesign(factor_count, center=center, alpha=alpha, face=face)
318
+ factor_lists = _np.array(factor_lists)
319
+
320
+ df = construct_df_from_matrix(x, factor_lists)
321
+ df.columns = factor_level_ranges.keys()
322
+ return df
323
+
324
+
325
+ def build_lhs(factor_level_ranges, num_samples=None, prob_distribution=None):
326
+ """
327
+ Builds a Latin Hypercube design dataframe from a dictionary of factor/level ranges.
328
+ Only min and max values of the range are required.
329
+ Example of the dictionary:
330
+ {'Pressure':[50,70],'Temperature':[290, 350],'Flow rate':[0.9,1.0]}
331
+ num_samples: Number of samples to be generated
332
+ prob_distribution: Analytical probability distribution to be applied over the randomized sampling.
333
+ Takes strings like: 'Normal', 'Poisson', 'Exponential', 'Beta', 'Gamma'
334
+
335
+ Latin hypercube sampling (LHS) is a form of stratified sampling that can be applied to multiple variables. The method commonly used to reduce the number or runs necessary for a Monte Carlo simulation to achieve a reasonably accurate random distribution. LHS can be incorporated into an existing Monte Carlo model fairly easily, and work with variables following any analytical probability distribution.
336
+ """
337
+ for key in factor_level_ranges:
338
+ if len(factor_level_ranges[key]) != 2:
339
+ factor_level_ranges[key][1] = factor_level_ranges[key][-1]
340
+ factor_level_ranges[key] = factor_level_ranges[key][:2]
341
+ print(f"{key} had more than two levels. Assigning the end point to the high level.")
342
+
343
+ factor_count = len(factor_level_ranges)
344
+ factor_lists = []
345
+
346
+ if not num_samples: # i.e. num_samples = None
347
+ num_samples = factor_count
348
+
349
+ for key in factor_level_ranges:
350
+ factor_lists.append(factor_level_ranges[key])
351
+
352
+ x = lhs(n=factor_count, samples=num_samples)
353
+ factor_lists = _np.array(factor_lists)
354
+
355
+ df = construct_df_from_random_matrix(x, factor_lists)
356
+ df.columns = factor_level_ranges.keys()
357
+ return df
358
+
359
+
360
+ def build_space_filling_lhs(factor_level_ranges, num_samples=None):
361
+ """
362
+ Builds a space-filling Latin Hypercube design dataframe from a dictionary of factor/level ranges.
363
+ Only min and max values of the range are required.
364
+ Example of the dictionary:
365
+ {'Pressure':[50,70],'Temperature':[290, 350],'Flow rate':[0.9,1.0]}
366
+ num_samples: Number of samples to be generated
367
+ """
368
+ for key in factor_level_ranges:
369
+ if len(factor_level_ranges[key]) != 2:
370
+ factor_level_ranges[key][1] = factor_level_ranges[key][-1]
371
+ factor_level_ranges[key] = factor_level_ranges[key][:2]
372
+ print(f"{key} had more than two levels. Assigning the end point to the high level.")
373
+
374
+ factor_count = len(factor_level_ranges)
375
+ factor_lists = []
376
+
377
+ if not num_samples: # i.e. num_samples = None
378
+ num_samples = factor_count
379
+
380
+ for key in factor_level_ranges:
381
+ factor_lists.append(factor_level_ranges[key])
382
+
383
+ x = transform_spread_out(
384
+ lhd_matrix(num_points=num_samples, dimension=factor_count)) # create latin hypercube design
385
+ factor_lists = _np.array(factor_lists)
386
+
387
+ df = construct_df_from_random_matrix(x, factor_lists)
388
+ df.columns = factor_level_ranges.keys()
389
+ return df
390
+
391
+
392
+ def build_random_k_means(factor_level_ranges, num_samples=None):
393
+ """
394
+ Designs with random _k-means_ clusters from a dictionary of process variables.
395
+ Produce a centroidal Voronoi tesselation of the unit random hypercube and generate k-means clusters.
396
+ Only min and max values of the range are required.
397
+ Example of the dictionary:
398
+ {'Pressure':[50,70],'Temperature':[290, 350],'Flow rate':[0.9,1.0]}
399
+ num_samples: Number of samples to be generated
400
+ """
401
+ for key in factor_level_ranges:
402
+ if len(factor_level_ranges[key]) != 2:
403
+ factor_level_ranges[key][1] = factor_level_ranges[key][-1]
404
+ factor_level_ranges[key] = factor_level_ranges[key][:2]
405
+ print(f"{key} had more than two levels. Assigning the end point to the high level.")
406
+
407
+ factor_count = len(factor_level_ranges)
408
+ factor_lists = []
409
+
410
+ if not num_samples: # i.e. num_samples = None
411
+ num_samples = factor_count
412
+
413
+ for key in factor_level_ranges:
414
+ factor_lists.append(factor_level_ranges[key])
415
+
416
+ x = random_k_means(num_points=num_samples, dimension=factor_count) # create latin hypercube design
417
+ factor_lists = _np.array(factor_lists)
418
+
419
+ df = construct_df_from_random_matrix(x, factor_lists)
420
+ df.columns = factor_level_ranges.keys()
421
+ return df
422
+
423
+
424
+ def build_maximin(factor_level_ranges, num_samples=None):
425
+ """
426
+ Builds a maximin reconstructed design dataframe from a dictionary of factor/level ranges.
427
+ Only min and max values of the range are required.
428
+ Example of the dictionary:
429
+ {'Pressure':[50,70],'Temperature':[290, 350],'Flow rate':[0.9,1.0]}
430
+ num_samples: Number of samples to be generated
431
+
432
+ This algorithm carries out a user-specified number of iterations to maximize the minimal distance of a point in the set to
433
+ * other points in the set,
434
+ * existing (fixed) points,
435
+ * the boundary of the hypercube.
436
+ """
437
+ for key in factor_level_ranges:
438
+ if len(factor_level_ranges[key]) != 2:
439
+ factor_level_ranges[key][1] = factor_level_ranges[key][-1]
440
+ factor_level_ranges[key] = factor_level_ranges[key][:2]
441
+ print(f"{key} had more than two levels. Assigning the end point to the high level.")
442
+
443
+ factor_count = len(factor_level_ranges)
444
+ factor_lists = []
445
+
446
+ if not num_samples: # i.e. num_samples = None
447
+ num_samples = factor_count
448
+
449
+ for key in factor_level_ranges:
450
+ factor_lists.append(factor_level_ranges[key])
451
+
452
+ x = maximin_reconstruction(num_points=num_samples, dimension=factor_count) # create latin hypercube design
453
+ factor_lists = _np.array(factor_lists)
454
+
455
+ df = construct_df_from_random_matrix(x, factor_lists)
456
+ df.columns = factor_level_ranges.keys()
457
+ return df
458
+
459
+
460
+ def build_halton(factor_level_ranges, num_samples=None):
461
+ """
462
+ Builds a Halton matrix based design from a dictionary of process variables. Output is a
463
+ quasirandom dataframe from a dictionary of factor/level ranges using prime numbers as seed.
464
+ Only min and max values of the range are required.
465
+ Example of the dictionary:
466
+ {'Pressure':[50,70],'Temperature':[290, 350],'Flow rate':[0.9,1.0]}
467
+ num_samples: Number of samples to be generated
468
+
469
+ Quasirandom sequence using the default initialization with first n prime numbers equal to the number of factors/variables.
470
+ """
471
+ for key in factor_level_ranges:
472
+ if len(factor_level_ranges[key]) != 2:
473
+ factor_level_ranges[key][1] = factor_level_ranges[key][-1]
474
+ factor_level_ranges[key] = factor_level_ranges[key][:2]
475
+ print(f"{key} had more than two levels. Assigning the end point to the high level.")
476
+
477
+ factor_count = len(factor_level_ranges)
478
+ factor_lists = []
479
+
480
+ if not num_samples: # i.e. num_samples = None
481
+ num_samples = factor_count
482
+
483
+ for key in factor_level_ranges:
484
+ factor_lists.append(factor_level_ranges[key])
485
+
486
+ x = halton(num_points=num_samples, dimension=factor_count) # create Halton matrix design
487
+ factor_lists = _np.array(factor_lists)
488
+
489
+ df = construct_df_from_random_matrix(x, factor_lists)
490
+ df.columns = factor_level_ranges.keys()
491
+ return df
492
+
493
+
494
+ def build_uniform_random(factor_level_ranges, num_samples=None):
495
+ """
496
+ Builds a design dataframe with samples drawn from uniform random distribution based on a dictionary
497
+ of factor/level ranges. Only min and max values of the range are required.
498
+ Example of the dictionary:
499
+ {'Pressure':[50,70],'Temperature':[290, 350],'Flow rate':[0.9,1.0]}
500
+ num_samples: Number of samples to be generated
501
+ """
502
+ for key in factor_level_ranges:
503
+ if len(factor_level_ranges[key]) != 2:
504
+ factor_level_ranges[key][1] = factor_level_ranges[key][-1]
505
+ factor_level_ranges[key] = factor_level_ranges[key][:2]
506
+ print(f"{key} had more than two levels. Assigning the end point to the high level.")
507
+
508
+ factor_count = len(factor_level_ranges)
509
+ factor_lists = []
510
+
511
+ if not num_samples: # i.e. num_samples = None
512
+ num_samples = factor_count
513
+
514
+ for key in factor_level_ranges:
515
+ factor_lists.append(factor_level_ranges[key])
516
+
517
+ x = random_uniform(num_points=num_samples, dimension=factor_count) # create Halton matrix design
518
+ factor_lists = _np.array(factor_lists)
519
+
520
+ df = construct_df_from_random_matrix(x, factor_lists)
521
+ df.columns = factor_level_ranges.keys()
522
+ return df
File without changes