holobench 1.3.6__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. bencher/__init__.py +41 -0
  2. bencher/bench_cfg.py +462 -0
  3. bencher/bench_plot_server.py +100 -0
  4. bencher/bench_report.py +268 -0
  5. bencher/bench_runner.py +136 -0
  6. bencher/bencher.py +805 -0
  7. bencher/caching.py +51 -0
  8. bencher/example/__init__.py +0 -0
  9. bencher/example/benchmark_data.py +200 -0
  10. bencher/example/example_all.py +45 -0
  11. bencher/example/example_categorical.py +99 -0
  12. bencher/example/example_custom_sweep.py +59 -0
  13. bencher/example/example_docs.py +34 -0
  14. bencher/example/example_float3D.py +101 -0
  15. bencher/example/example_float_cat.py +98 -0
  16. bencher/example/example_floats.py +89 -0
  17. bencher/example/example_floats2D.py +93 -0
  18. bencher/example/example_holosweep.py +104 -0
  19. bencher/example/example_holosweep_objects.py +111 -0
  20. bencher/example/example_holosweep_tap.py +144 -0
  21. bencher/example/example_image.py +82 -0
  22. bencher/example/example_levels.py +181 -0
  23. bencher/example/example_pareto.py +53 -0
  24. bencher/example/example_sample_cache.py +85 -0
  25. bencher/example/example_sample_cache_context.py +116 -0
  26. bencher/example/example_simple.py +134 -0
  27. bencher/example/example_simple_bool.py +34 -0
  28. bencher/example/example_simple_cat.py +47 -0
  29. bencher/example/example_simple_float.py +38 -0
  30. bencher/example/example_strings.py +46 -0
  31. bencher/example/example_time_event.py +62 -0
  32. bencher/example/example_video.py +124 -0
  33. bencher/example/example_workflow.py +189 -0
  34. bencher/example/experimental/example_bokeh_plotly.py +38 -0
  35. bencher/example/experimental/example_hover_ex.py +45 -0
  36. bencher/example/experimental/example_hvplot_explorer.py +39 -0
  37. bencher/example/experimental/example_interactive.py +75 -0
  38. bencher/example/experimental/example_streamnd.py +49 -0
  39. bencher/example/experimental/example_streams.py +36 -0
  40. bencher/example/experimental/example_template.py +40 -0
  41. bencher/example/experimental/example_updates.py +84 -0
  42. bencher/example/experimental/example_vector.py +84 -0
  43. bencher/example/meta/example_meta.py +171 -0
  44. bencher/example/meta/example_meta_cat.py +25 -0
  45. bencher/example/meta/example_meta_float.py +23 -0
  46. bencher/example/meta/example_meta_levels.py +26 -0
  47. bencher/example/optuna/example_optuna.py +78 -0
  48. bencher/example/shelved/example_float2D_scatter.py +109 -0
  49. bencher/example/shelved/example_float3D_cone.py +96 -0
  50. bencher/example/shelved/example_kwargs.py +63 -0
  51. bencher/job.py +184 -0
  52. bencher/optuna_conversions.py +168 -0
  53. bencher/plotting/__init__.py +0 -0
  54. bencher/plotting/plot_filter.py +110 -0
  55. bencher/plotting/plt_cnt_cfg.py +74 -0
  56. bencher/results/__init__.py +0 -0
  57. bencher/results/bench_result.py +80 -0
  58. bencher/results/bench_result_base.py +405 -0
  59. bencher/results/float_formatter.py +44 -0
  60. bencher/results/holoview_result.py +592 -0
  61. bencher/results/optuna_result.py +354 -0
  62. bencher/results/panel_result.py +113 -0
  63. bencher/results/plotly_result.py +65 -0
  64. bencher/utils.py +148 -0
  65. bencher/variables/inputs.py +193 -0
  66. bencher/variables/parametrised_sweep.py +206 -0
  67. bencher/variables/results.py +176 -0
  68. bencher/variables/sweep_base.py +167 -0
  69. bencher/variables/time.py +74 -0
  70. bencher/video_writer.py +30 -0
  71. bencher/worker_job.py +40 -0
  72. holobench-1.3.6.dist-info/METADATA +85 -0
  73. holobench-1.3.6.dist-info/RECORD +74 -0
  74. holobench-1.3.6.dist-info/WHEEL +5 -0
bencher/bencher.py ADDED
@@ -0,0 +1,805 @@
1
+ import logging
2
+ from datetime import datetime
3
+ from itertools import product, combinations
4
+
5
+ from typing import Callable, List
6
+ from copy import deepcopy
7
+ import numpy as np
8
+ import param
9
+ import xarray as xr
10
+ from diskcache import Cache
11
+ from contextlib import suppress
12
+ from functools import partial
13
+
14
+ from bencher.worker_job import WorkerJob
15
+
16
+ from bencher.bench_cfg import BenchCfg, BenchRunCfg, DimsCfg
17
+ from bencher.bench_plot_server import BenchPlotServer
18
+ from bencher.bench_report import BenchReport
19
+
20
+ from bencher.variables.inputs import IntSweep
21
+ from bencher.variables.time import TimeSnapshot, TimeEvent
22
+ from bencher.variables.results import (
23
+ ResultVar,
24
+ ResultVec,
25
+ ResultHmap,
26
+ ResultVideo,
27
+ ResultImage,
28
+ ResultString,
29
+ ResultContainer,
30
+ ResultReference,
31
+ )
32
+ from bencher.results.bench_result import BenchResult
33
+ from bencher.variables.parametrised_sweep import ParametrizedSweep
34
+ from bencher.job import Job, FutureCache, JobFuture, Executors
35
+
36
+ # Customize the formatter
37
+ formatter = logging.Formatter("%(levelname)s: %(message)s")
38
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
39
+
40
+
41
+ for handler in logging.root.handlers:
42
+ handler.setFormatter(formatter)
43
+
44
+
45
+ def set_xarray_multidim(data_array: xr.DataArray, index_tuple, value: float) -> xr.DataArray:
46
+ # """This is terrible, I need to do this in a better way, but [] does not like *args syntax and the () version of the set function doesn't either"""
47
+ match len(index_tuple):
48
+ case 1:
49
+ data_array[index_tuple[0]] = value
50
+ case 2:
51
+ data_array[index_tuple[0], index_tuple[1]] = value
52
+ case 3:
53
+ data_array[index_tuple[0], index_tuple[1], index_tuple[2]] = value
54
+ case 4:
55
+ data_array[index_tuple[0], index_tuple[1], index_tuple[2], index_tuple[3]] = value
56
+ case 5:
57
+ data_array[
58
+ index_tuple[0], index_tuple[1], index_tuple[2], index_tuple[3], index_tuple[4]
59
+ ] = value
60
+ case 6:
61
+ data_array[
62
+ index_tuple[0],
63
+ index_tuple[1],
64
+ index_tuple[2],
65
+ index_tuple[3],
66
+ index_tuple[4],
67
+ index_tuple[5],
68
+ ] = value
69
+ case 7:
70
+ data_array[
71
+ index_tuple[0],
72
+ index_tuple[1],
73
+ index_tuple[2],
74
+ index_tuple[3],
75
+ index_tuple[4],
76
+ index_tuple[5],
77
+ index_tuple[6],
78
+ ] = value
79
+ case 8:
80
+ data_array[
81
+ index_tuple[0],
82
+ index_tuple[1],
83
+ index_tuple[2],
84
+ index_tuple[3],
85
+ index_tuple[4],
86
+ index_tuple[5],
87
+ index_tuple[6],
88
+ index_tuple[7],
89
+ ] = value
90
+ case 9:
91
+ data_array[
92
+ index_tuple[0],
93
+ index_tuple[1],
94
+ index_tuple[2],
95
+ index_tuple[3],
96
+ index_tuple[4],
97
+ index_tuple[5],
98
+ index_tuple[6],
99
+ index_tuple[7],
100
+ index_tuple[8],
101
+ ] = value
102
+
103
+ return data_array
104
+
105
+
106
+ def kwargs_to_input_cfg(worker_input_cfg: ParametrizedSweep, **kwargs) -> ParametrizedSweep:
107
+ input_cfg = worker_input_cfg()
108
+ input_cfg.param.update(kwargs)
109
+ return input_cfg
110
+
111
+
112
+ def worker_cfg_wrapper(worker, worker_input_cfg: ParametrizedSweep, **kwargs) -> dict:
113
+ input_cfg = kwargs_to_input_cfg(worker_input_cfg, **kwargs)
114
+ return worker(input_cfg)
115
+
116
+
117
+ def worker_kwargs_wrapper(worker: Callable, bench_cfg: BenchCfg, **kwargs) -> dict:
118
+ function_input_deep = deepcopy(kwargs)
119
+ if not bench_cfg.pass_repeat:
120
+ function_input_deep.pop("repeat")
121
+ if "over_time" in function_input_deep:
122
+ function_input_deep.pop("over_time")
123
+ if "time_event" in function_input_deep:
124
+ function_input_deep.pop("time_event")
125
+ return worker(**function_input_deep)
126
+
127
+
128
+ class Bench(BenchPlotServer):
129
+ def __init__(
130
+ self,
131
+ bench_name: str = None,
132
+ worker: Callable | ParametrizedSweep = None,
133
+ worker_input_cfg: ParametrizedSweep = None,
134
+ run_cfg=None,
135
+ report=None,
136
+ ) -> None:
137
+ """Create a new Bench object from a function and a class defining the inputs to the function
138
+
139
+ Args:
140
+ bench_name (str): The name of the benchmark and output folder for the figures
141
+ worker (Callable | ParametrizedSweep): A function that accepts a class of type (worker_input_config)
142
+ worker_input_config (ParametrizedSweep): A class defining the parameters of the function.
143
+ """
144
+ assert isinstance(bench_name, str)
145
+ self.bench_name = bench_name
146
+ self.worker = None
147
+ self.worker_class_instance = None
148
+ self.worker_input_cfg = None
149
+ self.worker_class_instance = None
150
+ self.set_worker(worker, worker_input_cfg)
151
+ self.run_cfg = run_cfg
152
+ if report is None:
153
+ self.report = BenchReport(self.bench_name)
154
+ else:
155
+ self.report = report
156
+ if self.report.bench_name is None:
157
+ self.report.bench_name = self.bench_name
158
+ self.results = []
159
+
160
+ self.bench_cfg_hashes = [] # a list of hashes that point to benchmark results
161
+ self.last_run_cfg = None # cached run_cfg used to pass to the plotting function
162
+ self.sample_cache = None # store the results of each benchmark function call in a cache
163
+ self.ds_dynamic = {} # A dictionary to store unstructured vector datasets
164
+
165
+ self.cache_size = int(100e9) # default to 100gb
166
+
167
+ def set_worker(self, worker: Callable, worker_input_cfg: ParametrizedSweep = None) -> None:
168
+ """Set the benchmark worker function and optionally the type the worker expects
169
+
170
+ Args:
171
+ worker (Callable): The benchmark worker function
172
+ worker_input_cfg (ParametrizedSweep, optional): The input type the worker expects. Defaults to None.
173
+ """
174
+
175
+ if isinstance(worker, ParametrizedSweep):
176
+ self.worker_class_instance = worker
177
+ # self.worker_class_type = type(worker)
178
+ self.worker = self.worker_class_instance.__call__
179
+ logging.info("setting worker from bench class.__call__")
180
+ else:
181
+ if isinstance(worker, type):
182
+ raise RuntimeError("This should be a class instance, not a class")
183
+ if worker_input_cfg is None:
184
+ self.worker = worker
185
+ else:
186
+ self.worker = partial(worker_cfg_wrapper, worker, worker_input_cfg)
187
+ logging.info(f"setting worker {worker}")
188
+ self.worker_input_cfg = worker_input_cfg
189
+
190
+ def sweep(
191
+ self,
192
+ input_vars: List[ParametrizedSweep] = None,
193
+ result_vars: List[ParametrizedSweep] = None,
194
+ const_vars: List[ParametrizedSweep] = None,
195
+ time_src: datetime = None,
196
+ description: str = None,
197
+ post_description: str = None,
198
+ pass_repeat: bool = False,
199
+ tag: str = "",
200
+ run_cfg: BenchRunCfg = None,
201
+ plot: bool = False,
202
+ ) -> BenchResult:
203
+ title = "Sweeping " + " vs ".join([self.get_name(i) for i in input_vars])
204
+ return self.plot_sweep(
205
+ title,
206
+ input_vars=input_vars,
207
+ result_vars=result_vars,
208
+ const_vars=const_vars,
209
+ time_src=time_src,
210
+ description=description,
211
+ post_description=post_description,
212
+ pass_repeat=pass_repeat,
213
+ tag=tag,
214
+ run_cfg=run_cfg,
215
+ plot=plot,
216
+ )
217
+
218
+ def sweep_sequential(
219
+ self,
220
+ title="",
221
+ input_vars: List[ParametrizedSweep] = None,
222
+ result_vars: List[ParametrizedSweep] = None,
223
+ const_vars: List[ParametrizedSweep] = None,
224
+ optimise_var: ParametrizedSweep = None,
225
+ run_cfg: BenchRunCfg = None,
226
+ group_size: int = 1,
227
+ iterations: int = 1,
228
+ relationship_cb=None,
229
+ ) -> List[BenchResult]:
230
+ results = []
231
+ if relationship_cb is None:
232
+ relationship_cb = combinations
233
+ for it in range(iterations):
234
+ for input_group in relationship_cb(input_vars, group_size):
235
+ title_gen = (
236
+ title + "Sweeping " + " vs ".join([self.get_name(i) for i in input_group])
237
+ )
238
+ if iterations > 1:
239
+ title_gen += f" iteration:{it}"
240
+ res = self.plot_sweep(
241
+ title=title_gen,
242
+ input_vars=list(input_group),
243
+ result_vars=result_vars,
244
+ const_vars=const_vars,
245
+ run_cfg=run_cfg,
246
+ plot=True,
247
+ )
248
+ if optimise_var is not None:
249
+ const_vars = res.get_optimal_inputs(optimise_var, True)
250
+ results.append(res)
251
+ return results
252
+
253
+ def plot_sweep(
254
+ self,
255
+ title: str = None,
256
+ input_vars: List[ParametrizedSweep] = None,
257
+ result_vars: List[ParametrizedSweep] = None,
258
+ const_vars: List[ParametrizedSweep] = None,
259
+ time_src: datetime = None,
260
+ description: str = None,
261
+ post_description: str = None,
262
+ pass_repeat: bool = False,
263
+ tag: str = "",
264
+ run_cfg: BenchRunCfg = None,
265
+ plot: bool = True,
266
+ ) -> BenchResult:
267
+ """The all in 1 function benchmarker and results plotter.
268
+
269
+ Args:
270
+ input_vars (List[ParametrizedSweep], optional): _description_. Defaults to None.
271
+ result_vars (List[ParametrizedSweep], optional): _description_. Defaults to None.
272
+ const_vars (List[ParametrizedSweep], optional): A list of variables to keep constant with a specified value. Defaults to None.
273
+ title (str, optional): The title of the benchmark. Defaults to None.
274
+ description (str, optional): A description of the benchmark. Defaults to None.
275
+ post_description (str, optional): A description that comes after the benchmark plots. Defaults to None.
276
+ time_src (datetime, optional): Set a time that the result was generated. Defaults to datetime.now().
277
+ pass_repeat (bool,optional) By default do not pass the kwarg 'repeat' to the benchmark function. Set to true if
278
+ you want the benchmark function to be passed the repeat number
279
+ tag (str,optional): Use tags to group different benchmarks together.
280
+ run_cfg: (BenchRunCfg, optional): A config for storing how the benchmarks and run and plotted
281
+ Raises:
282
+ ValueError: If a result variable is not set
283
+
284
+ Returns:
285
+ BenchResult: A class with all the data used to generate the results and the results
286
+ """
287
+
288
+ if self.worker_class_instance is not None:
289
+ if input_vars is None:
290
+ logging.info(
291
+ "No input variables passed, using all param variables in bench class as inputs"
292
+ )
293
+ input_vars = self.worker_class_instance.get_inputs_only()
294
+ for i in input_vars:
295
+ logging.info(f"input var: {i.name}")
296
+ if result_vars is None:
297
+ logging.info(
298
+ "No results variables passed, using all result variables in bench class:"
299
+ )
300
+ result_vars = self.worker_class_instance.get_results_only()
301
+ for r in result_vars:
302
+ logging.info(f"result var: {r.name}")
303
+ if const_vars is None:
304
+ const_vars = self.worker_class_instance.get_input_defaults()
305
+ else:
306
+ if input_vars is None:
307
+ input_vars = []
308
+ if result_vars is None:
309
+ result_vars = []
310
+ if const_vars is None:
311
+ const_vars = []
312
+ else:
313
+ const_vars = deepcopy(const_vars)
314
+
315
+ for i in range(len(input_vars)):
316
+ input_vars[i] = self.convert_vars_to_params(input_vars[i], "input")
317
+ for i in range(len(result_vars)):
318
+ result_vars[i] = self.convert_vars_to_params(result_vars[i], "result")
319
+
320
+ if isinstance(const_vars, dict):
321
+ const_vars = list(const_vars.items())
322
+
323
+ for i in range(len(const_vars)):
324
+ # consts come as tuple pairs
325
+ cv_list = list(const_vars[i])
326
+ cv_list[0] = self.convert_vars_to_params(cv_list[0], "const")
327
+ const_vars[i] = cv_list
328
+
329
+ if run_cfg is None:
330
+ if self.run_cfg is None:
331
+ run_cfg = BenchRunCfg()
332
+ logging.info("Generate default run cfg")
333
+ else:
334
+ run_cfg = deepcopy(self.run_cfg)
335
+ logging.info("Copy run cfg from bench class")
336
+
337
+ if run_cfg.only_plot:
338
+ run_cfg.use_cache = True
339
+
340
+ self.last_run_cfg = run_cfg
341
+
342
+ if title is None:
343
+ if len(input_vars) > 0:
344
+ title = "Sweeping " + " vs ".join([i.name for i in input_vars])
345
+ elif len(const_vars) > 0:
346
+ title = "Constant Value"
347
+ if len(const_vars) > 1:
348
+ title += "s"
349
+ title += ": " + ", ".join([f"{c[0].name}={c[1]}" for c in const_vars])
350
+ else:
351
+ raise RuntimeError("you must pass a title, or define inputs or consts")
352
+
353
+ if run_cfg.level > 0:
354
+ inputs = []
355
+ print(input_vars)
356
+ if len(input_vars) > 0:
357
+ for i in input_vars:
358
+ inputs.append(i.with_level(run_cfg.level))
359
+ input_vars = inputs
360
+
361
+ # if any of the inputs have been include as constants, remove those variables from the list of constants
362
+ with suppress(ValueError, AttributeError):
363
+ for i in input_vars:
364
+ for c in const_vars:
365
+ # print(i.hash_persistent())
366
+ if i.name == c[0].name:
367
+ const_vars.remove(c)
368
+ logging.info(f"removing {i.name} from constants")
369
+
370
+ result_hmaps = []
371
+ result_vars_only = []
372
+ for i in result_vars:
373
+ if isinstance(i, ResultHmap):
374
+ result_hmaps.append(i)
375
+ else:
376
+ result_vars_only.append(i)
377
+
378
+ if post_description is None:
379
+ post_description = (
380
+ "## Results Description\nPlease set post_description to explain these results"
381
+ )
382
+
383
+ bench_cfg = BenchCfg(
384
+ input_vars=input_vars,
385
+ result_vars=result_vars_only,
386
+ result_hmaps=result_hmaps,
387
+ const_vars=const_vars,
388
+ bench_name=self.bench_name,
389
+ description=description,
390
+ post_description=post_description,
391
+ title=title,
392
+ pass_repeat=pass_repeat,
393
+ tag=run_cfg.run_tag + tag,
394
+ )
395
+ print("tag", bench_cfg.tag)
396
+
397
+ bench_cfg.param.update(run_cfg.param.values())
398
+ bench_cfg_hash = bench_cfg.hash_persistent(True)
399
+ bench_cfg.hash_value = bench_cfg_hash
400
+
401
+ # does not include repeats in hash as sample_hash already includes repeat as part of the per sample hash
402
+ bench_cfg_sample_hash = bench_cfg.hash_persistent(False)
403
+
404
+ if self.sample_cache is None:
405
+ self.sample_cache = self.init_sample_cache(run_cfg)
406
+ if bench_cfg.clear_sample_cache:
407
+ self.clear_tag_from_sample_cache(bench_cfg.tag, run_cfg)
408
+
409
+ calculate_results = True
410
+ with Cache("cachedir/benchmark_inputs", size_limit=self.cache_size) as c:
411
+ if run_cfg.clear_cache:
412
+ c.delete(bench_cfg_hash)
413
+ logging.info("cleared cache")
414
+ elif run_cfg.use_cache:
415
+ logging.info(
416
+ f"checking for previously calculated results with key: {bench_cfg_hash}"
417
+ )
418
+ if bench_cfg_hash in c:
419
+ logging.info(f"loading cached results from key: {bench_cfg_hash}")
420
+ bench_res = c[bench_cfg_hash]
421
+ # if not over_time: # if over time we always want to calculate results
422
+ calculate_results = False
423
+ else:
424
+ logging.info("did not detect results in cache")
425
+ if run_cfg.only_plot:
426
+ raise FileNotFoundError("Was not able to load the results to plot!")
427
+
428
+ if calculate_results:
429
+ if run_cfg.time_event is not None:
430
+ time_src = run_cfg.time_event
431
+ bench_res = self.calculate_benchmark_results(
432
+ bench_cfg, time_src, bench_cfg_sample_hash, run_cfg
433
+ )
434
+
435
+ # use the hash of the inputs to look up historical values in the cache
436
+ if run_cfg.over_time:
437
+ bench_res.ds = self.load_history_cache(
438
+ bench_res.ds, bench_cfg_hash, run_cfg.clear_history
439
+ )
440
+
441
+ self.report_results(bench_res, run_cfg.print_xarray, run_cfg.print_pandas)
442
+ self.cache_results(bench_res, bench_cfg_hash)
443
+
444
+ logging.info(self.sample_cache.stats())
445
+ self.sample_cache.close()
446
+
447
+ bench_res.post_setup()
448
+
449
+ if plot and bench_res.bench_cfg.auto_plot:
450
+ self.report.append_result(bench_res)
451
+ self.results.append(bench_res)
452
+ return bench_res
453
+
454
+ def get_name(self, var):
455
+ if isinstance(var, param.Parameter):
456
+ return var.name
457
+ return var
458
+
459
+ def convert_vars_to_params(self, variable: param.Parameter, var_type: str):
460
+ """check that a variable is a subclass of param
461
+
462
+ Args:
463
+ variable (param.Parameter): the varible to check
464
+ var_type (str): a string representation of the variable type for better error messages
465
+
466
+ Raises:
467
+ TypeError: the input variable type is not a param.
468
+ """
469
+ if isinstance(variable, str):
470
+ variable = self.worker_class_instance.param.objects(instance=False)[variable]
471
+ if not isinstance(variable, param.Parameter):
472
+ raise TypeError(
473
+ f"You need to use {var_type}_vars =[{self.worker_input_cfg}.param.your_variable], instead of {var_type}_vars =[{self.worker_input_cfg}.your_variable]"
474
+ )
475
+ return variable
476
+
477
+ def cache_results(self, bench_res: BenchResult, bench_cfg_hash: int) -> None:
478
+ with Cache("cachedir/benchmark_inputs", size_limit=self.cache_size) as c:
479
+ logging.info(f"saving results with key: {bench_cfg_hash}")
480
+ self.bench_cfg_hashes.append(bench_cfg_hash)
481
+ # object index may not be pickleable so remove before caching
482
+ obj_index_tmp = bench_res.object_index
483
+ bench_res.object_index = []
484
+
485
+ c[bench_cfg_hash] = bench_res
486
+
487
+ # restore object index
488
+ bench_res.object_index = obj_index_tmp
489
+
490
+ logging.info(f"saving benchmark: {self.bench_name}")
491
+ c[self.bench_name] = self.bench_cfg_hashes
492
+
493
+ # def show(self, run_cfg: BenchRunCfg = None, pane=None) -> None:
494
+ # """Launches a webserver with plots of the benchmark results, blocking
495
+
496
+ # Args:
497
+ # run_cfg (BenchRunCfg, optional): Options for the webserve such as the port. Defaults to None.
498
+
499
+ # """
500
+ # if run_cfg is None:
501
+ # if self.last_run_cfg is not None:
502
+ # run_cfg = self.last_run_cfg
503
+ # else:
504
+ # run_cfg = BenchRunCfg()
505
+
506
+ # return BenchPlotServer().plot_server(self.bench_name, run_cfg, pane)
507
+
508
+ def load_history_cache(
509
+ self, dataset: xr.Dataset, bench_cfg_hash: int, clear_history: bool
510
+ ) -> xr.Dataset:
511
+ """Load historical data from a cache if over_time=true
512
+
513
+ Args:
514
+ ds (xr.Dataset): Freshly calcuated data
515
+ bench_cfg_hash (int): Hash of the input variables used to generate the data
516
+ clear_history (bool): Optionally clear the history
517
+
518
+ Returns:
519
+ xr.Dataset: historical data as an xr dataset
520
+ """
521
+ with Cache("cachedir/history", size_limit=self.cache_size) as c:
522
+ if clear_history:
523
+ logging.info("clearing history")
524
+ else:
525
+ logging.info(f"checking historical key: {bench_cfg_hash}")
526
+ if bench_cfg_hash in c:
527
+ logging.info("loading historical data from cache")
528
+ ds_old = c[bench_cfg_hash]
529
+ dataset = xr.concat([ds_old, dataset], "over_time")
530
+ else:
531
+ logging.info("did not detect any historical data")
532
+
533
+ logging.info("saving data to history cache")
534
+ c[bench_cfg_hash] = dataset
535
+ return dataset
536
+
537
+ def setup_dataset(
538
+ self, bench_cfg: BenchCfg, time_src: datetime | str
539
+ ) -> tuple[BenchResult, List, List]:
540
+ """A function for generating an n-d xarray from a set of input variables in the BenchCfg
541
+
542
+ Args:
543
+ bench_cfg (BenchCfg): description of the benchmark parameters
544
+ time_src (datetime | str): a representation of the sample time
545
+
546
+ Returns:
547
+ _type_: _description_
548
+ """
549
+
550
+ if time_src is None:
551
+ time_src = datetime.now()
552
+ bench_cfg.meta_vars = self.define_extra_vars(bench_cfg, bench_cfg.repeats, time_src)
553
+
554
+ bench_cfg.all_vars = bench_cfg.input_vars + bench_cfg.meta_vars
555
+
556
+ # bench_cfg.all_vars = bench_cfg.iv_time + bench_cfg.input_vars +[ bench_cfg.iv_repeat]
557
+
558
+ # bench_cfg.all_vars = [ bench_cfg.iv_repeat] +bench_cfg.input_vars + bench_cfg.iv_time
559
+
560
+ for i in bench_cfg.all_vars:
561
+ logging.info(i.sampling_str(bench_cfg.debug))
562
+
563
+ dims_cfg = DimsCfg(bench_cfg)
564
+ function_inputs = list(
565
+ zip(product(*dims_cfg.dim_ranges_index), product(*dims_cfg.dim_ranges))
566
+ )
567
+ # xarray stores K N-dimensional arrays of data. Each array is named and in this case we have a nd array for each result variable
568
+ data_vars = {}
569
+
570
+ for rv in bench_cfg.result_vars:
571
+ if isinstance(rv, ResultVar):
572
+ result_data = np.full(dims_cfg.dims_size, np.nan, dtype=float)
573
+ data_vars[rv.name] = (dims_cfg.dims_name, result_data)
574
+ if isinstance(rv, ResultReference):
575
+ result_data = np.full(dims_cfg.dims_size, -1, dtype=int)
576
+ data_vars[rv.name] = (dims_cfg.dims_name, result_data)
577
+ if isinstance(rv, (ResultVideo, ResultImage, ResultString, ResultContainer)):
578
+ result_data = np.full(dims_cfg.dims_size, "NAN", dtype=object)
579
+ data_vars[rv.name] = (dims_cfg.dims_name, result_data)
580
+ elif type(rv) == ResultVec:
581
+ for i in range(rv.size):
582
+ result_data = np.full(dims_cfg.dims_size, np.nan)
583
+ data_vars[rv.index_name(i)] = (dims_cfg.dims_name, result_data)
584
+
585
+ bench_res = BenchResult(bench_cfg)
586
+ bench_res.ds = xr.Dataset(data_vars=data_vars, coords=dims_cfg.coords)
587
+ bench_res.ds_dynamic = self.ds_dynamic
588
+ bench_res.setup_object_index()
589
+
590
+ return bench_res, function_inputs, dims_cfg.dims_name
591
+
592
+ def define_const_inputs(self, const_vars) -> dict:
593
+ constant_inputs = None
594
+ if const_vars is not None:
595
+ const_vars, constant_values = [
596
+ [i for i, j in const_vars],
597
+ [j for i, j in const_vars],
598
+ ]
599
+
600
+ constant_names = [i.name for i in const_vars]
601
+ constant_inputs = dict(zip(constant_names, constant_values))
602
+ return constant_inputs
603
+
604
+ def define_extra_vars(self, bench_cfg: BenchCfg, repeats: int, time_src) -> list[IntSweep]:
605
+ """Define extra meta vars that are stored in the n-d array but are not passed to the benchmarking function, such as number of repeats and the time the function was called.
606
+
607
+ Args:
608
+ bench_cfg (BenchCfg): description of the benchmark parameters
609
+ repeats (int): the number of times to sample the function
610
+ time_src (datetime): a representation of the sample time
611
+
612
+ Returns:
613
+ _type_: _description_
614
+ """
615
+ bench_cfg.iv_repeat = IntSweep(
616
+ default=repeats,
617
+ bounds=[1, repeats],
618
+ samples=repeats,
619
+ samples_debug=2 if repeats > 2 else 1,
620
+ units="repeats",
621
+ doc="The number of times a sample was measured",
622
+ )
623
+ bench_cfg.iv_repeat.name = "repeat"
624
+ extra_vars = [bench_cfg.iv_repeat]
625
+
626
+ if bench_cfg.over_time:
627
+ if isinstance(time_src, str):
628
+ iv_over_time = TimeEvent(time_src)
629
+ else:
630
+ iv_over_time = TimeSnapshot(time_src)
631
+ iv_over_time.name = "over_time"
632
+ extra_vars.append(iv_over_time)
633
+ bench_cfg.iv_time = [iv_over_time]
634
+ return extra_vars
635
+
636
+ def calculate_benchmark_results(
637
+ self, bench_cfg, time_src: datetime | str, bench_cfg_sample_hash, bench_run_cfg
638
+ ) -> BenchResult:
639
+ """A function for generating an n-d xarray from a set of input variables in the BenchCfg
640
+
641
+ Args:
642
+ bench_cfg (BenchCfg): description of the benchmark parameters
643
+ time_src (datetime): a representation of the sample time
644
+
645
+ Returns:
646
+ bench_cfg (BenchCfg): description of the benchmark parameters
647
+ """
648
+ bench_res, func_inputs, dims_name = self.setup_dataset(bench_cfg, time_src)
649
+ bench_res.bench_cfg.hmap_kdims = sorted(dims_name)
650
+ constant_inputs = self.define_const_inputs(bench_res.bench_cfg.const_vars)
651
+ callcount = 1
652
+
653
+ results_list = []
654
+ jobs = []
655
+
656
+ for idx_tuple, function_input_vars in func_inputs:
657
+ job = WorkerJob(
658
+ function_input_vars,
659
+ idx_tuple,
660
+ dims_name,
661
+ constant_inputs,
662
+ bench_cfg_sample_hash,
663
+ bench_res.bench_cfg.tag,
664
+ )
665
+ job.setup_hashes()
666
+ jobs.append(job)
667
+
668
+ jid = f"{bench_res.bench_cfg.title}:call {callcount}/{len(func_inputs)}"
669
+ worker = partial(worker_kwargs_wrapper, self.worker, bench_res.bench_cfg)
670
+ cache_job = Job(
671
+ job_id=jid,
672
+ function=worker,
673
+ job_args=job.function_input,
674
+ job_key=job.function_input_signature_pure,
675
+ tag=job.tag,
676
+ )
677
+ result = self.sample_cache.submit(cache_job)
678
+ results_list.append(result)
679
+ callcount += 1
680
+
681
+ if bench_run_cfg.executor == Executors.SERIAL:
682
+ self.store_results(result, bench_res, job, bench_run_cfg)
683
+
684
+ if bench_run_cfg.executor != Executors.SERIAL:
685
+ for job, res in zip(jobs, results_list):
686
+ self.store_results(res, bench_res, job, bench_run_cfg)
687
+
688
+ for inp in bench_res.bench_cfg.all_vars:
689
+ self.add_metadata_to_dataset(bench_res, inp)
690
+
691
+ return bench_res
692
+
693
+ def store_results(
694
+ self,
695
+ job_result: JobFuture,
696
+ bench_res: BenchResult,
697
+ worker_job: WorkerJob,
698
+ bench_run_cfg: BenchRunCfg,
699
+ ) -> None:
700
+ result = job_result.result()
701
+ if result is not None:
702
+ logging.info(f"{job_result.job.job_id}:")
703
+ if bench_res.bench_cfg.print_bench_inputs:
704
+ for k, v in worker_job.function_input.items():
705
+ logging.info(f"\t {k}:{v}")
706
+
707
+ result_dict = result if isinstance(result, dict) else result.param.values()
708
+
709
+ for rv in bench_res.bench_cfg.result_vars:
710
+ result_value = result_dict[rv.name]
711
+ if bench_run_cfg.print_bench_results:
712
+ logging.info(f"{rv.name}: {result_value}")
713
+
714
+ if isinstance(
715
+ rv, (ResultVar, ResultVideo, ResultImage, ResultString, ResultContainer)
716
+ ):
717
+ set_xarray_multidim(bench_res.ds[rv.name], worker_job.index_tuple, result_value)
718
+ elif isinstance(rv, ResultReference):
719
+ bench_res.object_index.append(result_value)
720
+ set_xarray_multidim(
721
+ bench_res.ds[rv.name],
722
+ worker_job.index_tuple,
723
+ len(bench_res.object_index) - 1,
724
+ )
725
+ elif isinstance(rv, ResultVec):
726
+ if isinstance(result_value, (list, np.ndarray)):
727
+ if len(result_value) == rv.size:
728
+ for i in range(rv.size):
729
+ set_xarray_multidim(
730
+ bench_res.ds[rv.index_name(i)],
731
+ worker_job.index_tuple,
732
+ result_value[i],
733
+ )
734
+
735
+ else:
736
+ raise RuntimeError("Unsupported result type")
737
+ for rv in bench_res.result_hmaps:
738
+ bench_res.hmaps[rv.name][worker_job.canonical_input] = result_dict[rv.name]
739
+
740
+ # bench_cfg.hmap = bench_cfg.hmaps[bench_cfg.result_hmaps[0].name]
741
+
742
+ def init_sample_cache(self, run_cfg: BenchRunCfg):
743
+ return FutureCache(
744
+ overwrite=run_cfg.overwrite_sample_cache,
745
+ executor=run_cfg.executor,
746
+ cache_name="sample_cache",
747
+ tag_index=True,
748
+ size_limit=self.cache_size,
749
+ use_cache=run_cfg.use_sample_cache,
750
+ )
751
+
752
+ def clear_tag_from_sample_cache(self, tag: str, run_cfg):
753
+ """Clear all samples from the cache that match a tag
754
+ Args:
755
+ tag(str): clear samples with this tag
756
+ """
757
+ if self.sample_cache is None:
758
+ self.sample_cache = self.init_sample_cache(run_cfg)
759
+ self.sample_cache.clear_tag(tag)
760
+
761
+ def add_metadata_to_dataset(self, bench_res: BenchResult, input_var: ParametrizedSweep) -> None:
762
+ """Adds variable metadata to the xrarry so that it can be used to automatically plot the dimension units etc.
763
+
764
+ Args:
765
+ bench_cfg (BenchCfg):
766
+ input_var (ParametrizedSweep): The varible to extract metadata from
767
+ """
768
+
769
+ for rv in bench_res.bench_cfg.result_vars:
770
+ if type(rv) == ResultVar:
771
+ bench_res.ds[rv.name].attrs["units"] = rv.units
772
+ bench_res.ds[rv.name].attrs["long_name"] = rv.name
773
+ elif type(rv) == ResultVec:
774
+ for i in range(rv.size):
775
+ bench_res.ds[rv.index_name(i)].attrs["units"] = rv.units
776
+ bench_res.ds[rv.index_name(i)].attrs["long_name"] = rv.name
777
+ else:
778
+ pass # todo
779
+
780
+ dsvar = bench_res.ds[input_var.name]
781
+ dsvar.attrs["long_name"] = input_var.name
782
+ if input_var.units is not None:
783
+ dsvar.attrs["units"] = input_var.units
784
+ if input_var.__doc__ is not None:
785
+ dsvar.attrs["description"] = input_var.__doc__
786
+
787
+ def report_results(self, bench_cfg: BenchCfg, print_xarray: bool, print_pandas: bool):
788
+ """Optionally display the caculated benchmark data as either as pandas, xarray or plot
789
+
790
+ Args:
791
+ bench_cfg (BenchCfg):
792
+ print_xarray (bool):
793
+ print_pandas (bool):
794
+ """
795
+ if print_xarray:
796
+ logging.info(bench_cfg.ds)
797
+ if print_pandas:
798
+ logging.info(bench_cfg.ds.to_dataframe())
799
+
800
+ def clear_call_counts(self) -> None:
801
+ """Clear the worker and cache call counts, to help debug and assert caching is happening properly"""
802
+ self.sample_cache.clear_call_counts()
803
+
804
+ def get_result(self, index: int = -1) -> BenchResult:
805
+ return self.results[index]