potassco-benchmark-tool 2.1.1__py3-none-any.whl → 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,935 @@
1
+ """
2
+ Created on Dec 2, 2025
3
+
4
+ @author: Tom Schmidt
5
+ """
6
+
7
+ import warnings
8
+ from dataclasses import dataclass, field
9
+ from typing import TYPE_CHECKING, Any, Optional
10
+
11
+ import numpy as np
12
+ import pandas as pd # type: ignore[import-untyped]
13
+ from xlsxwriter import Workbook # type: ignore[import-untyped]
14
+ from xlsxwriter.color import Color # type: ignore[import-untyped]
15
+ from xlsxwriter.utility import cell_autofit_width # type: ignore[import-untyped]
16
+ from xlsxwriter.worksheet import Worksheet # type: ignore[import-untyped]
17
+
18
+ if TYPE_CHECKING:
19
+ from benchmarktool.result import result # nocoverage
20
+
21
+
22
+ class Formula:
23
+ """
24
+ Helper class representing a spreadsheet formula.
25
+ """
26
+
27
+ def __init__(self, formula_string: str):
28
+ """
29
+ Initialize Formula.
30
+
31
+ Attributes:
32
+ formula_string (str): Formula string.
33
+ """
34
+ self.formula_string = formula_string
35
+
36
+ def __str__(self) -> str:
37
+ """
38
+ Get spreadsheet string representation.
39
+ """
40
+ # remove leading '='
41
+ s = self.formula_string.lstrip("=")
42
+ return f"={s}"
43
+
44
+
45
+ # pylint: disable=too-few-public-methods, dangerous-default-value
46
+ class DataValidation:
47
+ """
48
+ Helper class representing a spreadsheet data validation.
49
+ """
50
+
51
+ def __init__(self, params: dict[str, Any] = {}, default: Any = None, color: Optional[str] = None):
52
+ """
53
+ Initialize DataValidation.
54
+
55
+ Attributes:
56
+ params (dict[str, Any]): Data validation parameters.
57
+ default (Any): Default value.
58
+ color (Optional[str]): Color reference.
59
+ """
60
+ self.params = params
61
+ self.default = default
62
+ self.color = color
63
+
64
+ def write(self, xlsxdoc: "XLSXDoc", sheet: Worksheet, row: int, col: int) -> None:
65
+ """
66
+ Write to XLSX document sheet.
67
+
68
+ Attributes:
69
+ xlsxdoc (XLSXDoc): XLSX document.
70
+ sheet (Worksheet): XLSX worksheet.
71
+ row (int): Row index.
72
+ col (int): Column index.
73
+ """
74
+ if isinstance(xlsxdoc.workbook, Workbook):
75
+ if self.default is not None:
76
+ if self.color is not None:
77
+ sheet.write(
78
+ row, col, self.default, xlsxdoc.workbook.add_format({"bg_color": xlsxdoc.colors[self.color]})
79
+ )
80
+ else:
81
+ sheet.write(row, col, self.default)
82
+ sheet.data_validation(row, col, row, col, self.params)
83
+ else:
84
+ raise ValueError("Trying to write to uninitialized workbook.")
85
+
86
+ def __eq__(self, other: object) -> bool:
87
+ """
88
+ Equality operator.
89
+
90
+ Attributes:
91
+ other (object): Other DataValidation object.
92
+ """
93
+ if not isinstance(other, DataValidation):
94
+ raise TypeError("Comparison with non DataValidation object.")
95
+ return hash(self) == hash(other)
96
+
97
+ def __hash__(self) -> int:
98
+ """
99
+ Hash function.
100
+ """
101
+ return hash((repr(sorted(self.params.items())), self.default, self.color))
102
+
103
+
104
+ def try_float(v: Any) -> Any:
105
+ """
106
+ Try to cast given value to float.
107
+ Return input if not possible.
108
+
109
+ Attributes:
110
+ v (Any): Value tried to be cast to float.
111
+ """
112
+ try:
113
+ return float(v)
114
+ except (ValueError, TypeError):
115
+ return v
116
+
117
+
118
+ def get_cell_index(col: int, row: int, abs_col: bool = False, abs_row: bool = False) -> str:
119
+ """
120
+ Calculate spreadsheet cell index.
121
+
122
+ Attributes:
123
+ col (int): Column index.
124
+ row (int): Row index.
125
+ abs_col (bool): Set '$' for column.
126
+ abs_row (bool): Set '$' for row.
127
+ """
128
+ radix = ord("Z") - ord("A") + 1
129
+ ret = ""
130
+ while col >= 0:
131
+ rem = col % radix
132
+ ret = chr(rem + ord("A")) + ret
133
+ col = col // radix - 1
134
+ pre_col = "$" if abs_col else ""
135
+ pre_row = "$" if abs_row else ""
136
+ return f"{pre_col}{ret}{pre_row}{row + 1}"
137
+
138
+
139
+ # pylint: disable=too-many-instance-attributes
140
+ class XLSXDoc:
141
+ """
142
+ Class representing XLSX document.
143
+ """
144
+
145
+ def __init__(self, benchmark: "result.BenchmarkMerge", measures: dict[str, Any], max_col_width: int = 300):
146
+ """
147
+ Setup Instance and Class sheet.
148
+
149
+ Attributes:
150
+ benchmark (BenchmarkMerge): BenchmarkMerge object.
151
+ measures (dict[str, Any]): Measures to be displayed.
152
+ """
153
+ self.workbook: Optional[Workbook] = None
154
+ self.max_col_width = max_col_width
155
+ self.header_width = 80
156
+
157
+ self.colors: dict[str, Color] = {
158
+ "best": Color("#00ff00"),
159
+ "worst": Color("#ff0000"),
160
+ "input": Color("#ffcc99"),
161
+ "none": Color("#ffffff"),
162
+ }
163
+
164
+ self.num_formats: dict[str, str] = {
165
+ "defaultNumber": "0.00",
166
+ "formula": "0.00",
167
+ "to": "0",
168
+ }
169
+
170
+ self.inst_sheet = Sheet(benchmark, measures, "Instances")
171
+ self.merged_sheet = Sheet(benchmark, measures, "Merged Runs", self.inst_sheet, "merge")
172
+ self.class_sheet = Sheet(benchmark, measures, "Classes", self.inst_sheet, "class")
173
+
174
+ def add_runspec(self, runspec: "result.Runspec") -> None:
175
+ """
176
+ Attributes:
177
+ runspec (Runspec): Run specification.
178
+ """
179
+ self.inst_sheet.add_runspec(runspec)
180
+ self.merged_sheet.add_runspec(runspec)
181
+ self.class_sheet.add_runspec(runspec)
182
+
183
+ def finish(self) -> None:
184
+ """
185
+ Complete sheets by adding formulas and summaries.
186
+ """
187
+ self.inst_sheet.finish()
188
+ self.merged_sheet.finish()
189
+ self.class_sheet.finish()
190
+
191
+ def make_xlsx(self, out: str) -> None:
192
+ """
193
+ Write XLSX file.
194
+
195
+ Attributes:
196
+ out (str): Name of the generated XLSX file.
197
+ """
198
+ self.workbook = Workbook(out)
199
+
200
+ for sheet in (self.inst_sheet, self.merged_sheet, self.class_sheet):
201
+ sheet.write_sheet(self)
202
+ self.workbook.close()
203
+
204
+
205
+ # pylint: disable=too-many-instance-attributes, too-many-positional-arguments
206
+ class Sheet:
207
+ """
208
+ Class representing an XLSX sheet.
209
+ """
210
+
211
+ # pylint: disable=too-many-branches
212
+ def __init__(
213
+ self,
214
+ benchmark: "result.BenchmarkMerge",
215
+ measures: dict[str, Any],
216
+ name: str,
217
+ ref_sheet: Optional["Sheet"] = None,
218
+ sheet_type: str = "instance",
219
+ ):
220
+ """
221
+ Initialize sheet.
222
+
223
+ Attributes:
224
+ benchmark (BenchmarkMerge): Benchmark.
225
+ measures (dict[str, Any]): Measures to be displayed.
226
+ name (str): Name of the sheet.
227
+ refSheet (Optional[Sheet]): Reference sheet.
228
+ sheet_type (str): Type of the sheet.
229
+ """
230
+ # dataframe resembling almost final xlsx form
231
+ self.content = pd.DataFrame()
232
+ # name of the sheet
233
+ self.name = name
234
+ # evaluated benchmarks
235
+ self.benchmark = benchmark
236
+ # dataframes containing result data, use these for calculations
237
+ self.system_blocks: dict[tuple[Any, Any], SystemBlock] = {}
238
+ # types of measures
239
+ self.types: dict[str, str] = {}
240
+ # measures to be displayed
241
+ self.measures = measures
242
+ # machines
243
+ self.machines: set["result.Machine"] = set()
244
+ # sheet for references
245
+ self.ref_sheet = ref_sheet
246
+ # sheet type
247
+ self.type = sheet_type
248
+ # references for summary generation
249
+ self.summary_refs: dict[str, Any] = {}
250
+ # dataframe containing all results and stats
251
+ self.values = pd.DataFrame()
252
+ # columns containing floats
253
+ self.float_occur: dict[str, set[Any]] = {}
254
+ # formats for columns
255
+ self.formats: dict[int, str] = {}
256
+ # number of runs
257
+ self.runs: Optional[int] = None
258
+ # run summary only if same number of runs for all instances
259
+ run_summary = True
260
+
261
+ # first column
262
+ self.content[0] = None
263
+ # setup rows for instances/benchmark classes
264
+ if self.ref_sheet is None and sheet_type == "instance":
265
+ row = 2
266
+ for benchclass in benchmark:
267
+ for instance in benchclass:
268
+ self.content.loc[row] = instance.benchclass.name + "/" + instance.name
269
+ row += instance.values["max_runs"]
270
+ if self.runs is None:
271
+ self.runs = instance.values["max_runs"]
272
+ elif self.runs != instance.values["max_runs"]: # nocoverage
273
+ run_summary = False
274
+ elif self.ref_sheet is not None and sheet_type == "class":
275
+ row = 2
276
+ for benchclass in benchmark:
277
+ self.content.loc[row] = benchclass.name
278
+ row += 1
279
+ elif self.ref_sheet is not None and sheet_type == "merge":
280
+ self.content.loc[0] = "Merge criteria:"
281
+ # selection
282
+ self.content.loc[1] = DataValidation(
283
+ {
284
+ "validate": "list",
285
+ "source": ["average", "median", "min", "max", "diff"],
286
+ "input_message": "Select merge criteria",
287
+ },
288
+ "median",
289
+ "input",
290
+ )
291
+ row = 2
292
+ for benchclass in benchmark:
293
+ for instance in benchclass:
294
+ self.content.loc[row] = instance.benchclass.name + "/" + instance.name
295
+ row += 1
296
+ if self.runs is None:
297
+ self.runs = instance.values["max_runs"]
298
+ elif self.runs != instance.values["max_runs"]: # nocoverage
299
+ run_summary = False
300
+ else:
301
+ raise ValueError("Invalid sheet parameters.")
302
+
303
+ self.result_offset = row
304
+ for idx, label in enumerate(["SUM", "AVG", "DEV", "DST", "BEST", "BETTER", "WORSE", "WORST"], 1):
305
+ self.content.loc[self.result_offset + idx] = label
306
+
307
+ # run summary
308
+ if run_summary and self.runs and self.runs > 1 and self.ref_sheet is None:
309
+ selection = DataValidation(
310
+ {
311
+ "validate": "list",
312
+ "source": list(range(1, self.runs + 1)),
313
+ "input_message": "Select run number",
314
+ },
315
+ 1,
316
+ "input",
317
+ )
318
+
319
+ for idx, label in enumerate( # type: ignore[assignment]
320
+ ["Select run:", selection, "SUM", "AVG", "DEV", "DST", "BEST", "BETTER", "WORSE", "WORST"], 10
321
+ ):
322
+ self.content.loc[self.result_offset + idx] = label
323
+
324
+ # fill missing rows
325
+ self.content = self.content.reindex(list(range(self.content.index.max() + 1))).replace(np.nan, None)
326
+
327
+ def add_runspec(self, runspec: "result.Runspec") -> None:
328
+ """
329
+ Add results to their respective blocks.
330
+
331
+ Attributes:
332
+ runspec (Runspec): Run specification
333
+ """
334
+ key = (runspec.setting, runspec.machine)
335
+ block = self.system_blocks.setdefault(key, SystemBlock(runspec.setting, runspec.machine))
336
+ if block.machine:
337
+ self.machines.add(block.machine)
338
+
339
+ for benchclass_result in runspec:
340
+ benchclass_summary: dict[str, Any] = {}
341
+ instance_summary: dict[result.InstanceResult, dict[str, Any]] = {}
342
+ for instance_result in benchclass_result:
343
+ self.add_instance_results(block, instance_result, benchclass_summary, instance_summary)
344
+ for m in block.columns:
345
+ if m not in self.types or self.types[m] in {"None", "empty"}:
346
+ self.types[m] = block.columns[m]
347
+ # mixed measure
348
+ elif block.columns[m] not in {self.types[m], "None", "empty"}:
349
+ self.types[m] = "string"
350
+ if self.ref_sheet:
351
+ # mergeSheet
352
+ if self.type == "merge":
353
+ for instance_result in benchclass_result:
354
+ self.add_merged_instance_results(block, instance_result, instance_summary)
355
+ # classSheet
356
+ elif self.type == "class":
357
+ self.add_benchclass_summary(block, benchclass_result, benchclass_summary)
358
+ for m in block.columns:
359
+ if m not in self.types or self.types[m] in {"None", "empty"}:
360
+ self.types[m] = block.columns[m]
361
+
362
+ def add_instance_results(
363
+ self,
364
+ block: "SystemBlock",
365
+ instance_result: "result.InstanceResult",
366
+ benchclass_summary: dict[str, Any],
367
+ instance_summary: dict["result.InstanceResult", dict[str, Any]],
368
+ ) -> None:
369
+ """
370
+ Add instance results to SystemBlock and add values to summary if necessary.
371
+
372
+ Attributes:
373
+ block (SystemBlock): SystemBlock to which results are added.
374
+ instance_result (InstanceResult): InstanceResult.
375
+ benchclass_summary (dict[str, Any]): Summary of benchmark class.
376
+ instance_summary (dict[InstanceResult, dict[str, Any]]): Summary of instance results.
377
+ """
378
+ instance_summary[instance_result] = {}
379
+ for run in instance_result:
380
+ for name, value_type, value in run.iter(self.measures):
381
+ self.measures.setdefault(name, None)
382
+ instance_summary[instance_result].setdefault(name, False)
383
+ if value_type == "int":
384
+ value_type = "float"
385
+ elif value_type not in {"float", "None", "empty"}:
386
+ value_type = "string"
387
+ if self.ref_sheet is None:
388
+ if value_type == "float":
389
+ block.add_cell(
390
+ instance_result.instance.values["row"] + run.number - 1, name, value_type, float(value)
391
+ )
392
+ elif value_type in {"None", "empty"}:
393
+ block.add_cell(
394
+ instance_result.instance.values["row"] + run.number - 1, name, value_type, np.nan
395
+ )
396
+ else:
397
+ block.add_cell(instance_result.instance.values["row"] + run.number - 1, name, value_type, value)
398
+ elif value_type == "float" and self.ref_sheet.types.get(name, "") == "float":
399
+ instance_summary[instance_result][name] = True
400
+ if benchclass_summary.get(name) is None:
401
+ benchclass_summary[name] = (0.0, 0)
402
+ benchclass_summary[name] = (
403
+ float(value) + benchclass_summary[name][0],
404
+ 1 + benchclass_summary[name][1],
405
+ )
406
+ else:
407
+ if name not in benchclass_summary:
408
+ benchclass_summary[name] = None
409
+
410
+ def add_merged_instance_results(
411
+ self,
412
+ block: "SystemBlock",
413
+ instance_result: "result.InstanceResult",
414
+ instance_summary: dict["result.InstanceResult", dict[str, Any]],
415
+ ) -> None:
416
+ """
417
+ Add merged instance results to SystemBlock and add values to summary if necessary.
418
+
419
+ Attributes:
420
+ block (SystemBlock): SystemBlock to which results are added.
421
+ instance_result (InstanceResult): InstanceResult.
422
+ instance_summary (dict[result.InstanceResult, dict[str, Any]]): Summary of benchmark class.
423
+ """
424
+ for name, value in instance_summary[instance_result].items():
425
+ inst_val = instance_result.instance.values
426
+ # check if any run has a float value
427
+ if value:
428
+ # value just to signal non empty cell
429
+ block.add_cell(
430
+ (inst_val["row"] + inst_val["max_runs"]) // inst_val["max_runs"] - 1,
431
+ name,
432
+ "merged_runs",
433
+ {
434
+ "inst_start": inst_val["row"],
435
+ "inst_end": inst_val["row"] + inst_val["max_runs"] - 1,
436
+ "value": 1,
437
+ },
438
+ )
439
+ else:
440
+ block.add_cell(
441
+ (inst_val["row"] + inst_val["max_runs"]) // inst_val["max_runs"] - 1, name, "None", np.nan
442
+ )
443
+
444
+ def add_benchclass_summary(
445
+ self, block: "SystemBlock", benchclass_result: "result.ClassResult", benchclass_summary: dict[str, Any]
446
+ ) -> None:
447
+ """
448
+ Add benchmark class summary to SystemBlock.
449
+
450
+ Attributes:
451
+ block (SystemBlock): SystemBlock to which summary is added.
452
+ benchclass_result (ClassResult): ClassResult.
453
+ benchclass_summary (dict[str, Any]): Summary of benchmark class.
454
+ """
455
+ for name, value in benchclass_summary.items():
456
+ if value is not None:
457
+ temp_res = value[0] / value[1]
458
+ if name == "timeout":
459
+ temp_res = value[0]
460
+ block.add_cell(
461
+ benchclass_result.benchclass.values["row"],
462
+ name,
463
+ "classresult",
464
+ {
465
+ "inst_start": benchclass_result.benchclass.values["inst_start"],
466
+ "inst_end": benchclass_result.benchclass.values["inst_end"],
467
+ "value": temp_res,
468
+ },
469
+ )
470
+ else:
471
+ block.add_cell(benchclass_result.benchclass.values["row"], name, "None", np.nan)
472
+
473
+ def finish(self) -> None:
474
+ """
475
+ Finish XLSX content.
476
+ """
477
+ col = 1
478
+ # join results of different blocks
479
+ for block in sorted(self.system_blocks.values()):
480
+ self.content = self.content.join(block.content)
481
+ self.content = self.content.set_axis(list(range(len(self.content.columns))), axis=1)
482
+ self.content.at[0, col] = block.gen_name(len(self.machines) > 1)
483
+ col += len(block.columns)
484
+
485
+ # get columns used for summary calculations
486
+ # add formulas for results of classSheet
487
+ for column in self.content:
488
+ name = self.content.at[1, column]
489
+ if self.types.get(name, "") == "merged_runs":
490
+ for row in range(2, self.result_offset):
491
+ if isinstance(self.content.at[row, column], dict):
492
+ # value just to signal non empty cell
493
+ self.values.at[row, column] = self.content.at[row, column]["value"]
494
+ cell_range = (
495
+ f'(Instances!{get_cell_index(column, self.content.at[row, column]["inst_start"] + 2)}:'
496
+ f'Instances!{get_cell_index(column, self.content.at[row, column]["inst_end"] + 2)})'
497
+ )
498
+ self.content.at[row, column] = Formula(
499
+ f"SWITCH($A$2,"
500
+ f'"average", AVERAGE{cell_range},'
501
+ f'"median", MEDIAN{cell_range},'
502
+ f'"min", MIN{cell_range},'
503
+ f'"max", MAX{cell_range},'
504
+ f'"diff", MAX{cell_range}-MIN{cell_range}'
505
+ ")"
506
+ )
507
+ elif self.types.get(name, "") == "classresult":
508
+ for row in range(2, self.result_offset):
509
+ op = "SUM" if name == "timeout" else "AVERAGE"
510
+ if isinstance(self.content.at[row, column], dict):
511
+ self.values.at[row, column] = self.content.at[row, column]["value"]
512
+ self.content.at[row, column] = Formula(
513
+ f"{op}(Instances!{get_cell_index(column, self.content.at[row, column]['inst_start'] + 2)}:"
514
+ f"Instances!{get_cell_index(column, self.content.at[row, column]['inst_end'] + 2)})"
515
+ )
516
+ if self.types.get(name, "") in ["float", "classresult", "merged_runs"]:
517
+ self.float_occur.setdefault(name, set()).add(column)
518
+ # defragmentation (temporary workaround)
519
+ self.content = self.content.copy()
520
+ self.values = self.values.copy()
521
+
522
+ if self.ref_sheet is not None:
523
+ self.values = self.values.reindex(index=self.content.index, columns=self.content.columns)
524
+ self.values = self.values.combine_first(self.content)
525
+ else:
526
+ self.values = (
527
+ self.content.iloc[2 : self.result_offset - 1, 1:].combine_first(self.values).combine_first(self.content)
528
+ )
529
+
530
+ # defragmentation (temporary workaround)
531
+ self.content = self.content.copy()
532
+ self.values = self.values.copy()
533
+
534
+ # add summaries
535
+ self.add_row_summary(col)
536
+ self.add_col_summary()
537
+
538
+ # color cells
539
+ self.add_styles()
540
+
541
+ # replace all undefined cells with None (empty cell)
542
+ self.content = self.content.fillna(np.nan).replace(np.nan, None)
543
+
544
+ def add_row_summary(self, offset: int) -> None:
545
+ """
546
+ Add row summary (min, max, median).
547
+
548
+ Attributes:
549
+ offset (int): Column offset.
550
+ """
551
+ col = offset
552
+ for col_name in ["min", "median", "max"]:
553
+ block = SystemBlock(None, None)
554
+ block.offset = col
555
+ self.summary_refs[col_name] = {"col": col}
556
+ measures = sorted(self.float_occur.keys()) if not self.measures else list(self.measures.keys())
557
+ for measure in measures:
558
+ if measure in self.float_occur:
559
+ self.values.at[1, col] = measure
560
+ self._add_summary_formula(block, col_name, measure, self.float_occur, col)
561
+ self.summary_refs[col_name][measure] = (
562
+ col,
563
+ f"{get_cell_index(col, 2, True, True)}:"
564
+ f"{get_cell_index(col, self.result_offset - 1, True, True)}",
565
+ )
566
+ col += 1
567
+ self.content = self.content.join(block.content)
568
+ self.content = self.content.set_axis(list(range(len(self.content.columns))), axis=1)
569
+ self.content.at[0, block.offset] = col_name
570
+ self.values.at[0, block.offset] = col_name
571
+
572
+ # pylint: disable=too-many-positional-arguments
573
+ def _add_summary_formula(
574
+ self, block: "SystemBlock", operator: str, measure: str, float_occur: dict[str, Any], col: int
575
+ ) -> None:
576
+ """
577
+ Add row summary formula.
578
+
579
+ Attributes:
580
+ block (SystemBlock): SystemBlock to which summary is added.
581
+ operator (str): Summary operator.
582
+ measure (str): Name of the measure to be summarized.
583
+ float_occur (dict[str, Any]): Dict containing column references of float columns.
584
+ col (int): Current column index.
585
+ """
586
+ for row in range(self.result_offset - 2):
587
+ ref_range = ",".join(get_cell_index(col_ref, row + 2, True) for col_ref in sorted(float_occur[measure]))
588
+ values = np.array(self.values.loc[2 + row, sorted(float_occur[measure])], float)
589
+ if np.isnan(values).all():
590
+ self.values.at[2 + row, col] = np.nan
591
+ else:
592
+ # don't write formula if full row is nan
593
+ block.add_cell(row, measure, "formula", Formula(f"{operator.upper()}({ref_range})"))
594
+ self.values.at[2 + row, col] = getattr(np, "nan" + operator)(values)
595
+
596
+ def add_col_summary(self) -> None:
597
+ """
598
+ Add column summary if applicable to column type.
599
+ """
600
+
601
+ def _get_run_select(ref: str, runs: int, col_idx: int, abs_col: bool = True) -> str:
602
+ """
603
+ Get run dependent row selection formula.
604
+
605
+ Attributes:
606
+ ref (str): Row range reference
607
+ runs (int): Number of runs
608
+ col_idx (int): Current column index
609
+ abs_col (bool): Set '$' for new column reference
610
+ """
611
+ return (
612
+ f"CHOOSE({ref},"
613
+ + ",".join([f"ROW({get_cell_index(col_idx, 2 + i, abs_col, True)})" for i in range(runs)])
614
+ + ")"
615
+ )
616
+
617
+ def _get_run_filter(base_range: str, choose_rows: str) -> str:
618
+ """
619
+ Get formula for filtered rows by run.
620
+
621
+ Attributes:
622
+ base_range (str): Row range to filter
623
+ choose_rows (str): Run selection formula
624
+ """
625
+ return f"FILTER({base_range},MOD(ROW({base_range})-{choose_rows},{self.runs})=0)"
626
+
627
+ run_select_cell = f"{get_cell_index(0, self.result_offset + 11, True, True)}"
628
+ for col in self.content:
629
+ name = self.content.at[1, col]
630
+ if self.types.get(name, "") in {"float", "classresult", "merged_runs"}:
631
+
632
+ # skip empty columns
633
+ values = np.array(self.values.loc[2 : self.result_offset - 1, col], dtype=float)
634
+ if np.isnan(values).all():
635
+ continue
636
+
637
+ ref_value = (
638
+ f"{get_cell_index(col, 2, False, True)}:{get_cell_index(col, self.result_offset - 1, False, True)}"
639
+ )
640
+ min_rows = self.summary_refs["min"][name][1]
641
+ med_rows = self.summary_refs["median"][name][1]
642
+ max_rows = self.summary_refs["max"][name][1]
643
+ summaries = [(0, ref_value, min_rows, med_rows, max_rows)]
644
+
645
+ # Add run summary formulas if applicable
646
+ if self.ref_sheet is None and self.runs is not None and self.runs > 1:
647
+ sel_runs = _get_run_select(run_select_cell, self.runs, col, False)
648
+ ref_runs = _get_run_filter(ref_value, sel_runs)
649
+ min_runs = _get_run_filter(
650
+ min_rows, _get_run_select(run_select_cell, self.runs, self.summary_refs["min"][name][0])
651
+ )
652
+ med_runs = _get_run_filter(
653
+ med_rows, _get_run_select(run_select_cell, self.runs, self.summary_refs["median"][name][0])
654
+ )
655
+ max_runs = _get_run_filter(
656
+ max_rows, _get_run_select(run_select_cell, self.runs, self.summary_refs["max"][name][0])
657
+ )
658
+ summaries.append((11, ref_runs, min_runs, med_runs, max_runs))
659
+
660
+ for offset, ref, min_ref, med_ref, max_ref in summaries:
661
+ # SUM
662
+ self.content.at[self.result_offset + offset + 1, col] = Formula(f"SUM({ref})")
663
+ # AVG
664
+ self.content.at[self.result_offset + offset + 2, col] = Formula(f"AVERAGE({ref})")
665
+ # DEV
666
+ self.content.at[self.result_offset + offset + 3, col] = Formula(f"STDEV({ref})")
667
+ if col < self.summary_refs["min"]["col"]:
668
+ with np.errstate(invalid="ignore"):
669
+ # DST
670
+ self.content.at[self.result_offset + offset + 4, col] = Formula(
671
+ f"SUMPRODUCT(--({ref}-{min_ref})^2)^0.5"
672
+ )
673
+ # BEST
674
+ self.content.at[self.result_offset + offset + 5, col] = Formula(
675
+ f"SUMPRODUCT(NOT(ISBLANK({ref}))*({ref}={min_ref}))"
676
+ )
677
+ # BETTER
678
+ self.content.at[self.result_offset + offset + 6, col] = Formula(
679
+ f"SUMPRODUCT(NOT(ISBLANK({ref}))*({ref}<{med_ref}))"
680
+ )
681
+ # blank values are counted as worse/worst
682
+ # WORSE
683
+ self.content.at[self.result_offset + offset + 7, col] = Formula(
684
+ f"SUMPRODUCT((NOT(ISBLANK({ref}))*({ref}>{med_ref}))+ISBLANK({ref}))"
685
+ )
686
+ # WORST
687
+ self.content.at[self.result_offset + offset + 8, col] = Formula(
688
+ f"SUMPRODUCT((NOT(ISBLANK({ref}))*({ref}={max_ref}))+ISBLANK({ref}))"
689
+ )
690
+ if self.type == "merge":
691
+ continue
692
+ # values
693
+ # SUM
694
+ self.values.at[self.result_offset + 1, col] = np.nansum(values)
695
+ # AVG
696
+ self.values.at[self.result_offset + 2, col] = np.nanmean(values)
697
+ # DEV
698
+ # catch warnings caused by missing values (nan)
699
+ with warnings.catch_warnings():
700
+ warnings.filterwarnings("ignore", "Degrees of freedom <= 0 for slice", RuntimeWarning)
701
+ self.values.at[self.result_offset + 3, col] = (
702
+ np.nanstd(values, ddof=1) if len(values) != 1 else np.nan
703
+ )
704
+ if col < self.summary_refs["min"]["col"]:
705
+ with np.errstate(invalid="ignore"):
706
+ # DST
707
+ self.values.at[self.result_offset + 4, col] = (
708
+ np.nansum(
709
+ (
710
+ values
711
+ - np.array(
712
+ self.values.loc[2 : self.result_offset - 1, self.summary_refs["min"][name][0]]
713
+ )
714
+ )
715
+ ** 2
716
+ )
717
+ ** 0.5
718
+ )
719
+ # BEST (values * -1, since higher better)
720
+ self.values.at[self.result_offset + 5, col] = -1 * np.nansum(
721
+ values
722
+ == np.array(self.values.loc[2 : self.result_offset - 1, self.summary_refs["min"][name][0]])
723
+ )
724
+ # BETTER (values * -1, since higher better)
725
+ self.values.at[self.result_offset + 6, col] = -1 * np.nansum(
726
+ values
727
+ < np.array(
728
+ self.values.loc[2 : self.result_offset - 1, self.summary_refs["median"][name][0]]
729
+ )
730
+ )
731
+ # WORSE
732
+ self.values.at[self.result_offset + 7, col] = np.nansum(
733
+ values
734
+ > np.array(
735
+ self.values.loc[2 : self.result_offset - 1, self.summary_refs["median"][name][0]]
736
+ )
737
+ ) + np.sum(np.isnan(values))
738
+ # WORST
739
+ self.values.at[self.result_offset + 8, col] = np.nansum(
740
+ values
741
+ == np.array(self.values.loc[2 : self.result_offset - 1, self.summary_refs["max"][name][0]])
742
+ ) + np.sum(np.isnan(values))
743
+
744
+ def add_styles(self) -> None:
745
+ """
746
+ Color float results and their summaries.
747
+ Get column formats.
748
+ """
749
+ # remove header
750
+ results = self.values.loc[2:, 1:]
751
+
752
+ # might be better to move to write_sheet in the future
753
+ for measure, func in self.measures.items():
754
+ if measure in self.float_occur:
755
+ cols = sorted(self.float_occur[measure])
756
+ if func == "t":
757
+ diff = 2
758
+ elif func == "to":
759
+ diff = 0
760
+ for c in cols:
761
+ self.formats[c] = "to"
762
+ else:
763
+ continue
764
+
765
+ # no coloring for merge sheet
766
+ if self.type == "merge":
767
+ continue
768
+
769
+ # filter empty rows
770
+ values_df = results.loc[:, cols].dropna(how="all")
771
+ rows = values_df.index
772
+
773
+ values = np.array(values_df.values, dtype=float)
774
+ min_values = np.reshape(np.nanmin(values, axis=1), (-1, 1))
775
+ median_values = np.reshape(np.nanmedian(values, axis=1), (-1, 1))
776
+ max_values = np.reshape(np.nanmax(values, axis=1), (-1, 1))
777
+ max_min_diff = (max_values - min_values) > diff
778
+ max_med_diff = (max_values - median_values) > diff
779
+
780
+ self.content = (
781
+ self.content.loc[rows, cols]
782
+ .mask(
783
+ (values == min_values) & (values < median_values) & max_min_diff,
784
+ self.content.loc[rows].map(lambda x: (x, "best")),
785
+ )
786
+ .combine_first(self.content)
787
+ )
788
+ self.content = (
789
+ self.content.loc[rows, cols]
790
+ .mask(
791
+ (values == max_values) & (values > median_values) & max_med_diff,
792
+ self.content.loc[rows].map(lambda x: (x, "worst")),
793
+ )
794
+ .combine_first(self.content)
795
+ )
796
+
797
+ def export_values(self, file_name: str, metadata: dict[str, list[Any]]) -> None:
798
+ """
799
+ Export values to parquet file.
800
+
801
+ Attributes:
802
+ file_name (str): Name of the parquet file.
803
+ """
804
+ # currently only inst sheet exported
805
+ if self.ref_sheet is not None:
806
+ return
807
+ # fill settings
808
+ self.values.iloc[0, :] = self.values.iloc[0, :].ffill()
809
+ # group values by measure
810
+ df = self.values.iloc[2:, [0]].reset_index(drop=True).astype("string")
811
+ df.columns = pd.MultiIndex.from_tuples([("", "instance")], names=["measure", "setting"])
812
+ for m, cols in self.float_occur.items():
813
+ nf = self.values.iloc[2:, sorted(cols)].reset_index(drop=True).astype("float64")
814
+ nf.columns = self.values.iloc[0, sorted(cols)].to_list()
815
+ nf.columns = pd.MultiIndex.from_product([[m], nf.columns], names=["measure", "setting"])
816
+ df = df.join(nf)
817
+ # metadata
818
+ # offset -2 (header) -1 (empty row)
819
+ metadict = {**{"offset": [self.result_offset - 3]}, **metadata}
820
+ metadf = pd.DataFrame({k: pd.Series(v) for k, v in metadict.items()})
821
+ metadf.columns = pd.MultiIndex.from_product([["_metadata"], metadf.columns], names=["measure", "setting"])
822
+ self.values = df.join(metadf)
823
+ #! min,med,max no longer included
824
+ self.values.astype(str).to_parquet(file_name)
825
+
826
+ # pylint: disable=too-many-nested-blocks
827
+ def write_sheet(self, xlsxdoc: XLSXDoc) -> None:
828
+ """
829
+ Write sheet to XLSX document.
830
+
831
+ Attributes:
832
+ xlsxdoc (XLSXDoc): XLSX document.
833
+ """
834
+ if isinstance(xlsxdoc.workbook, Workbook):
835
+ sheet = xlsxdoc.workbook.add_worksheet(self.name)
836
+ measure_count = len(self.measures.keys())
837
+ for col in range(len(self.content.columns)):
838
+ num_format = xlsxdoc.num_formats.get(self.formats.get(col, "defaultNumber"), "0.00")
839
+ col_width = xlsxdoc.header_width
840
+ for row, cell in enumerate(list(self.content.iloc[:, col])):
841
+ val = cell
842
+ color: Optional[str] = None
843
+ if isinstance(cell, tuple):
844
+ val, color = cell
845
+ if isinstance(val, Formula):
846
+ val = str(val)
847
+ num_format = xlsxdoc.num_formats.get("formula", "0.00")
848
+ elif isinstance(val, str):
849
+ # header
850
+ if row == 0:
851
+ if measure_count > 0:
852
+ xlsxdoc.header_width = min(
853
+ xlsxdoc.max_col_width, max(80, cell_autofit_width(val) // measure_count)
854
+ )
855
+ else:
856
+ xlsxdoc.header_width = min(xlsxdoc.max_col_width, 80)
857
+ col_width = xlsxdoc.header_width
858
+ else:
859
+ col_width = min(xlsxdoc.max_col_width, max(col_width, cell_autofit_width(val)))
860
+ if isinstance(val, (int, float, str, bool)) or val is None:
861
+ if isinstance(color, str):
862
+ sheet.write(
863
+ row,
864
+ col,
865
+ val,
866
+ xlsxdoc.workbook.add_format(
867
+ {"bg_color": xlsxdoc.colors[color], "num_format": num_format}
868
+ ),
869
+ )
870
+ else:
871
+ sheet.write(row, col, val, xlsxdoc.workbook.add_format({"num_format": num_format}))
872
+ elif isinstance(val, DataValidation):
873
+ val.write(xlsxdoc, sheet, row, col)
874
+ sheet.set_column_pixels(col, col, col_width)
875
+ sheet.freeze_panes(2, 1)
876
+ else:
877
+ raise ValueError("Trying to write to uninitialized workbook.")
878
+
879
+
880
+ @dataclass(order=True, unsafe_hash=True)
881
+ class SystemBlock:
882
+ """
883
+ Dataframe containing results for system.
884
+
885
+ Attributes:
886
+ setting (Optional[Setting]): Benchmark setting.
887
+ machine (Optional[Machine]): Machine.
888
+ content (DataFrame): Results.
889
+ columns (dict[str, Any]): Dictionary of columns and their types.
890
+ offset (Optional[int]): Offset for final block position.
891
+ """
892
+
893
+ setting: Optional["result.Setting"]
894
+ machine: Optional["result.Machine"]
895
+ content: pd.DataFrame = field(default_factory=pd.DataFrame, compare=False)
896
+ columns: dict[str, Any] = field(default_factory=dict, compare=False)
897
+ offset: Optional[int] = field(default=None, compare=False)
898
+
899
+ def gen_name(self, add_machine: bool) -> str:
900
+ """
901
+ Generate name of the block.
902
+
903
+ Attributes:
904
+ addMachine (bool): Whether to include the machine name in the name.
905
+ """
906
+ res: str = ""
907
+ if self.setting:
908
+ res = f"{self.setting.system.name}-{self.setting.system.version}/{self.setting.name}"
909
+ if add_machine and self.machine:
910
+ res += f" ({self.machine.name})"
911
+ return res
912
+
913
+ def add_cell(self, row: int, name: str, value_type: str, value: Any) -> None:
914
+ """
915
+ Add cell to dataframe.
916
+
917
+ Attributes:
918
+ row (int): Row of the new cell.
919
+ name (str): Name of the column of the new cell (in most cases the measure).
920
+ valueType (str): Data type of the new cell.
921
+ value (Any): Value of the new cell.
922
+ """
923
+ if name not in self.columns:
924
+ self.content.at[1, name] = name
925
+ self.columns[name] = value_type
926
+ # first occurrence of column
927
+ elif self.columns[name] in {"None", "empty"}:
928
+ self.columns[name] = value_type
929
+ # mixed system column
930
+ elif value_type not in {self.columns[name], "None", "empty"}:
931
+ self.columns[name] = "string"
932
+ # leave space for header and add new row if necessary
933
+ if row + 2 not in self.content.index:
934
+ self.content = self.content.reindex(self.content.index.tolist() + [row + 2])
935
+ self.content.at[row + 2, name] = value