flopscope 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. benchmarks/__init__.py +1 -0
  2. benchmarks/__main__.py +6 -0
  3. benchmarks/_baseline.py +171 -0
  4. benchmarks/_bitwise.py +231 -0
  5. benchmarks/_complex.py +176 -0
  6. benchmarks/_contractions.py +291 -0
  7. benchmarks/_fft.py +198 -0
  8. benchmarks/_impl_urls.py +139 -0
  9. benchmarks/_linalg.py +197 -0
  10. benchmarks/_linalg_delegates.py +407 -0
  11. benchmarks/_metadata.py +141 -0
  12. benchmarks/_misc.py +653 -0
  13. benchmarks/_perf.py +321 -0
  14. benchmarks/_perm_group_calibration.py +175 -0
  15. benchmarks/_pointwise.py +372 -0
  16. benchmarks/_polynomial.py +193 -0
  17. benchmarks/_random.py +209 -0
  18. benchmarks/_reductions.py +136 -0
  19. benchmarks/_sorting.py +289 -0
  20. benchmarks/_stats.py +137 -0
  21. benchmarks/_window.py +92 -0
  22. benchmarks/accumulation/__init__.py +0 -0
  23. benchmarks/accumulation/bench_cost_compute.py +138 -0
  24. benchmarks/dashboard.py +312 -0
  25. benchmarks/runner.py +636 -0
  26. flopscope/__init__.py +273 -0
  27. flopscope/_accumulation/__init__.py +13 -0
  28. flopscope/_accumulation/_bipartite.py +121 -0
  29. flopscope/_accumulation/_burnside.py +51 -0
  30. flopscope/_accumulation/_cache.py +146 -0
  31. flopscope/_accumulation/_components.py +153 -0
  32. flopscope/_accumulation/_cost.py +1414 -0
  33. flopscope/_accumulation/_cost_descriptions.py +63 -0
  34. flopscope/_accumulation/_detection.py +318 -0
  35. flopscope/_accumulation/_ladder.py +191 -0
  36. flopscope/_accumulation/_output_orbit.py +104 -0
  37. flopscope/_accumulation/_partition.py +290 -0
  38. flopscope/_accumulation/_path_info.py +211 -0
  39. flopscope/_accumulation/_public.py +169 -0
  40. flopscope/_accumulation/_reduction.py +310 -0
  41. flopscope/_accumulation/_regimes.py +303 -0
  42. flopscope/_accumulation/_shape.py +33 -0
  43. flopscope/_accumulation/_wreath.py +209 -0
  44. flopscope/_budget.py +1027 -0
  45. flopscope/_config.py +118 -0
  46. flopscope/_counting_ops.py +451 -0
  47. flopscope/_display.py +478 -0
  48. flopscope/_docstrings.py +59 -0
  49. flopscope/_dtypes.py +20 -0
  50. flopscope/_einsum.py +717 -0
  51. flopscope/_errstate.py +25 -0
  52. flopscope/_flops.py +282 -0
  53. flopscope/_free_ops.py +2654 -0
  54. flopscope/_ndarray.py +1126 -0
  55. flopscope/_opt_einsum/LICENSE +21 -0
  56. flopscope/_opt_einsum/NOTICE +59 -0
  57. flopscope/_opt_einsum/__init__.py +209 -0
  58. flopscope/_opt_einsum/_contract.py +1478 -0
  59. flopscope/_opt_einsum/_helpers.py +164 -0
  60. flopscope/_opt_einsum/_hsluv.py +273 -0
  61. flopscope/_opt_einsum/_path_random.py +462 -0
  62. flopscope/_opt_einsum/_paths.py +1653 -0
  63. flopscope/_opt_einsum/_subgraph_symmetry.py +544 -0
  64. flopscope/_opt_einsum/_symmetry.py +140 -0
  65. flopscope/_opt_einsum/_typing.py +37 -0
  66. flopscope/_perm_group.py +717 -0
  67. flopscope/_pointwise.py +2522 -0
  68. flopscope/_polynomial.py +278 -0
  69. flopscope/_registry.py +3216 -0
  70. flopscope/_sorting_ops.py +571 -0
  71. flopscope/_symmetric.py +812 -0
  72. flopscope/_symmetry_transport.py +510 -0
  73. flopscope/_symmetry_utils.py +669 -0
  74. flopscope/_type_info.py +12 -0
  75. flopscope/_unwrap.py +70 -0
  76. flopscope/_validation.py +83 -0
  77. flopscope/_version_check.py +46 -0
  78. flopscope/_weights.py +195 -0
  79. flopscope/_window.py +177 -0
  80. flopscope/accounting.py +565 -0
  81. flopscope/data/default_weights.json +462 -0
  82. flopscope/data/weights.csv +509 -0
  83. flopscope/errors.py +197 -0
  84. flopscope/numpy/__init__.py +878 -0
  85. flopscope/numpy/fft/__init__.py +55 -0
  86. flopscope/numpy/fft/_free.py +51 -0
  87. flopscope/numpy/fft/_transforms.py +695 -0
  88. flopscope/numpy/linalg/__init__.py +105 -0
  89. flopscope/numpy/linalg/_aliases.py +126 -0
  90. flopscope/numpy/linalg/_compound.py +161 -0
  91. flopscope/numpy/linalg/_decompositions.py +353 -0
  92. flopscope/numpy/linalg/_properties.py +533 -0
  93. flopscope/numpy/linalg/_solvers.py +444 -0
  94. flopscope/numpy/linalg/_svd.py +122 -0
  95. flopscope/numpy/random/__init__.py +684 -0
  96. flopscope/numpy/random/_cost_formulas.py +115 -0
  97. flopscope/numpy/random/_counted_classes.py +241 -0
  98. flopscope/numpy/testing/__init__.py +13 -0
  99. flopscope/numpy/typing/__init__.py +30 -0
  100. flopscope/py.typed +0 -0
  101. flopscope/stats/__init__.py +84 -0
  102. flopscope/stats/_base.py +77 -0
  103. flopscope/stats/_cauchy.py +146 -0
  104. flopscope/stats/_erf.py +190 -0
  105. flopscope/stats/_expon.py +146 -0
  106. flopscope/stats/_laplace.py +150 -0
  107. flopscope/stats/_logistic.py +148 -0
  108. flopscope/stats/_lognorm.py +160 -0
  109. flopscope/stats/_ndtri.py +133 -0
  110. flopscope/stats/_norm.py +149 -0
  111. flopscope/stats/_truncnorm.py +186 -0
  112. flopscope/stats/_uniform.py +141 -0
  113. flopscope-0.2.0.dist-info/METADATA +23 -0
  114. flopscope-0.2.0.dist-info/RECORD +115 -0
  115. flopscope-0.2.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,372 @@
1
+ """Benchmark pointwise (element-wise) unary and binary operations.
2
+
3
+ All benchmarks pre-allocate output arrays and use ``out=`` to eliminate
4
+ memory allocation overhead from measurements, isolating pure compute cost.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import statistics
10
+
11
+ import numpy as np
12
+
13
+ from benchmarks._perf import measure_flops
14
+
15
+ UNARY_OPS: list[str] = [
16
+ "abs",
17
+ "negative",
18
+ "positive",
19
+ "exp",
20
+ "exp2",
21
+ "expm1",
22
+ "log",
23
+ "log2",
24
+ "log10",
25
+ "log1p",
26
+ "sqrt",
27
+ "cbrt",
28
+ "square",
29
+ "reciprocal",
30
+ "sin",
31
+ "cos",
32
+ "tan",
33
+ "arcsin",
34
+ "arccos",
35
+ "arctan",
36
+ "sinh",
37
+ "cosh",
38
+ "tanh",
39
+ "arcsinh",
40
+ "arccosh",
41
+ "arctanh",
42
+ "ceil",
43
+ "floor",
44
+ "trunc",
45
+ "rint",
46
+ "sign",
47
+ "signbit",
48
+ "fabs",
49
+ "deg2rad",
50
+ "rad2deg",
51
+ "degrees",
52
+ "radians",
53
+ "logical_not",
54
+ # --- added in Step 2.3 ---
55
+ "frexp",
56
+ "modf",
57
+ "sinc",
58
+ "i0",
59
+ "spacing",
60
+ "nan_to_num",
61
+ "isneginf",
62
+ "isposinf",
63
+ ]
64
+
65
+ BINARY_OPS: list[str] = [
66
+ "add",
67
+ "subtract",
68
+ "multiply",
69
+ "divide",
70
+ "true_divide",
71
+ "floor_divide",
72
+ "power",
73
+ "float_power",
74
+ "mod",
75
+ "remainder",
76
+ "fmod",
77
+ "maximum",
78
+ "minimum",
79
+ "fmax",
80
+ "fmin",
81
+ "greater",
82
+ "greater_equal",
83
+ "less",
84
+ "less_equal",
85
+ "equal",
86
+ "not_equal",
87
+ "logical_and",
88
+ "logical_or",
89
+ "logical_xor",
90
+ "arctan2",
91
+ "hypot",
92
+ "copysign",
93
+ "nextafter",
94
+ "logaddexp",
95
+ "logaddexp2",
96
+ "ldexp",
97
+ ]
98
+
99
+ # Special pointwise ops that don't follow the standard unary/binary pattern.
100
+ SPECIAL_OPS: list[str] = [
101
+ "isclose",
102
+ "heaviside",
103
+ "clip",
104
+ ]
105
+
106
+ # Ops whose output dtype is bool (need bool pre-allocation for out=)
107
+ _BOOL_UNARY = frozenset({"signbit", "logical_not", "isneginf", "isposinf"})
108
+ _BOOL_BINARY = frozenset(
109
+ {
110
+ "greater",
111
+ "greater_equal",
112
+ "less",
113
+ "less_equal",
114
+ "equal",
115
+ "not_equal",
116
+ "logical_and",
117
+ "logical_or",
118
+ "logical_xor",
119
+ }
120
+ )
121
+
122
+ # Ops that return tuples — benchmark without out= parameter.
123
+ _TUPLE_RETURN_OPS = frozenset({"frexp", "modf"})
124
+
125
+ # Ops that require positive input.
126
+ _POSITIVE_INPUT_OPS = frozenset({"i0"})
127
+
128
+ # Ops that benefit from NaN/inf values in input.
129
+ _NAN_INPUT_OPS = frozenset({"nan_to_num"})
130
+
131
+
132
+ def _make_inputs_unary(n: int, dtype: str) -> list[np.ndarray]:
133
+ """Return 3 input arrays with different distributions."""
134
+ rng = np.random.default_rng(42)
135
+ return [
136
+ rng.standard_normal(n).astype(dtype),
137
+ rng.uniform(0.01, 100, size=n).astype(dtype),
138
+ rng.uniform(-1000, 1000, size=n).astype(dtype),
139
+ ]
140
+
141
+
142
+ def _make_inputs_binary(n: int, dtype: str) -> list[tuple[np.ndarray, np.ndarray]]:
143
+ """Return 3 (a, b) tuples with different distributions."""
144
+ rng = np.random.default_rng(42)
145
+ return [
146
+ (
147
+ rng.standard_normal(n).astype(dtype),
148
+ rng.standard_normal(n).astype(dtype),
149
+ ),
150
+ (
151
+ rng.uniform(0.01, 100, size=n).astype(dtype),
152
+ rng.uniform(0.01, 100, size=n).astype(dtype),
153
+ ),
154
+ (
155
+ rng.uniform(-1000, 1000, size=n).astype(dtype),
156
+ rng.uniform(-1000, 1000, size=n).astype(dtype),
157
+ ),
158
+ ]
159
+
160
+
161
+ # sin/cos have a fast path for small inputs (near 0) that skips range
162
+ # reduction. Use uniform(-100, 100) instead of standard_normal for
163
+ # distribution 0 so all three distributions exercise the full code path.
164
+ _WIDE_INPUT_OPS = frozenset({"sin", "cos"})
165
+
166
+
167
+ def _unary_setup(n: int, dtype: str, op: str, dist_idx: int) -> str:
168
+ """Build setup code for a unary op with pre-allocated output."""
169
+ if op in _WIDE_INPUT_OPS and dist_idx == 0:
170
+ dists_0 = f"x = np.random.default_rng(42).uniform(-100, 100, size={n}).astype(np.{dtype})"
171
+ else:
172
+ dists_0 = (
173
+ f"x = np.random.default_rng(42).standard_normal({n}).astype(np.{dtype})"
174
+ )
175
+ dists = [
176
+ dists_0,
177
+ f"x = np.random.default_rng(42).uniform(0.01, 100, size={n}).astype(np.{dtype})",
178
+ f"x = np.random.default_rng(42).uniform(-1000, 1000, size={n}).astype(np.{dtype})",
179
+ ]
180
+ setup = f"import numpy as np; {dists[dist_idx]}"
181
+
182
+ # i0 only works on positive input.
183
+ if op in _POSITIVE_INPUT_OPS:
184
+ setup += "; x = np.abs(x)"
185
+
186
+ # nan_to_num benefits from NaN/inf values in one distribution.
187
+ if op in _NAN_INPUT_OPS and dist_idx == 0:
188
+ setup += (
189
+ f"; x[:{n}//100] = np.nan"
190
+ f"; x[{n}//100:{n}//50] = np.inf"
191
+ f"; x[{n}//50:{n}//50+{n}//100] = -np.inf"
192
+ )
193
+
194
+ # Tuple-return ops (frexp, modf) don't use out=.
195
+ if op in _TUPLE_RETURN_OPS:
196
+ return setup
197
+
198
+ out_dtype = "bool" if op in _BOOL_UNARY else f"np.{dtype}"
199
+ setup += f"; _out = np.empty({n}, dtype={out_dtype})"
200
+ return setup
201
+
202
+
203
+ def _binary_setup(n: int, dtype: str, op: str, dist_idx: int) -> str:
204
+ """Build setup code for a binary op with pre-allocated output."""
205
+ dists = [
206
+ (
207
+ f"rng = np.random.default_rng(42); "
208
+ f"a = rng.standard_normal({n}).astype(np.{dtype}); "
209
+ f"b = rng.standard_normal({n}).astype(np.{dtype})"
210
+ ),
211
+ (
212
+ f"rng = np.random.default_rng(42); "
213
+ f"a = rng.uniform(0.01, 100, size={n}).astype(np.{dtype}); "
214
+ f"b = rng.uniform(0.01, 100, size={n}).astype(np.{dtype})"
215
+ ),
216
+ (
217
+ f"rng = np.random.default_rng(42); "
218
+ f"a = rng.uniform(-1000, 1000, size={n}).astype(np.{dtype}); "
219
+ f"b = rng.uniform(-1000, 1000, size={n}).astype(np.{dtype})"
220
+ ),
221
+ ]
222
+ out_dtype = "bool" if op in _BOOL_BINARY else f"np.{dtype}"
223
+ return (
224
+ f"import numpy as np; {dists[dist_idx]}; "
225
+ f"_out = np.empty({n}, dtype={out_dtype})"
226
+ )
227
+
228
+
229
+ def benchmark_pointwise(
230
+ n: int = 10_000_000,
231
+ dtype: str = "float64",
232
+ repeats: int = 10,
233
+ distributions: int = 3,
234
+ ) -> tuple[dict[str, float], dict[str, dict]]:
235
+ """Benchmark all pointwise ops, returning raw measurement per element.
236
+
237
+ All operations use pre-allocated output (``out=``) to eliminate memory
238
+ allocation overhead from measurements, isolating pure compute cost.
239
+
240
+ Parameters
241
+ ----------
242
+ n : int
243
+ Array size.
244
+ dtype : str
245
+ NumPy dtype string.
246
+ repeats : int
247
+ Number of repetitions per measurement.
248
+ distributions : int
249
+ Number of input distributions to measure (median is taken).
250
+
251
+ Returns
252
+ -------
253
+ tuple[dict[str, float], dict[str, dict]]
254
+ A pair of (alphas, details). ``alphas`` maps op name to median
255
+ measurement per element. ``details`` maps op name to a dict of
256
+ raw benchmark metadata.
257
+ """
258
+ results: dict[str, float] = {}
259
+ details: dict[str, dict] = {}
260
+
261
+ # --- Unary ops ---
262
+ for op in UNARY_OPS:
263
+ dist_values: list[float] = []
264
+ dist_raw_totals: list[int] = []
265
+ bench = ""
266
+ for di in range(distributions):
267
+ setup = _unary_setup(n, dtype, op, di)
268
+ if op in _TUPLE_RETURN_OPS:
269
+ bench = f"np.{op}(x)"
270
+ else:
271
+ bench = f"np.{op}(x, out=_out)"
272
+ try:
273
+ result = measure_flops(setup, bench, repeats=repeats)
274
+ except RuntimeError:
275
+ continue
276
+ dist_values.append(result.total_flops / (n * repeats))
277
+ dist_raw_totals.append(result.total_flops)
278
+ if dist_values:
279
+ results[op] = statistics.median(dist_values)
280
+ details[op] = {
281
+ "category": "counted_unary",
282
+ "measurement_mode": "ufunc_unary",
283
+ "analytical_formula": "numel(output)",
284
+ "analytical_flops": n,
285
+ "benchmark_size": f"x: ({n},)",
286
+ "bench_code": bench,
287
+ "repeats": repeats,
288
+ "perf_instructions_total": dist_raw_totals,
289
+ "distribution_alphas": dist_values,
290
+ }
291
+
292
+ # --- Binary ops ---
293
+ for op in BINARY_OPS:
294
+ dist_values: list[float] = []
295
+ dist_raw_totals: list[int] = []
296
+ bench = ""
297
+ for di in range(distributions):
298
+ setup = _binary_setup(n, dtype, op, di)
299
+ bench = f"np.{op}(a, b, out=_out)"
300
+ try:
301
+ result = measure_flops(setup, bench, repeats=repeats)
302
+ except RuntimeError:
303
+ continue
304
+ dist_values.append(result.total_flops / (n * repeats))
305
+ dist_raw_totals.append(result.total_flops)
306
+ if dist_values:
307
+ results[op] = statistics.median(dist_values)
308
+ details[op] = {
309
+ "category": "counted_binary",
310
+ "measurement_mode": "ufunc_binary",
311
+ "analytical_formula": "numel(output)",
312
+ "analytical_flops": n,
313
+ "benchmark_size": f"a: ({n},), b: ({n},)",
314
+ "bench_code": bench,
315
+ "repeats": repeats,
316
+ "perf_instructions_total": dist_raw_totals,
317
+ "distribution_alphas": dist_values,
318
+ }
319
+
320
+ # --- Special ops (non-standard patterns) ---
321
+ for op in SPECIAL_OPS:
322
+ dist_values: list[float] = []
323
+ dist_raw_totals: list[int] = []
324
+ bench = ""
325
+ for di in range(distributions):
326
+ if op == "isclose":
327
+ # Binary comparison returning bool.
328
+ setup = _binary_setup(n, dtype, op, di)
329
+ bench = "np.isclose(a, b)"
330
+ category = "counted_binary"
331
+ elif op == "heaviside":
332
+ # Binary with scalar second argument.
333
+ setup = _unary_setup(n, dtype, op, di)
334
+ bench = "np.heaviside(x, 0.5)"
335
+ category = "counted_binary"
336
+ elif op == "clip":
337
+ # Ternary: clip(x, min, max).
338
+ setup = _unary_setup(n, dtype, op, di)
339
+ bench = "np.clip(x, -1.0, 1.0)"
340
+ category = "counted_unary"
341
+ else:
342
+ continue
343
+ try:
344
+ result = measure_flops(setup, bench, repeats=repeats)
345
+ except RuntimeError:
346
+ continue
347
+ dist_values.append(result.total_flops / (n * repeats))
348
+ dist_raw_totals.append(result.total_flops)
349
+ if dist_values:
350
+ results[op] = statistics.median(dist_values)
351
+ if op == "isclose":
352
+ bm_size = f"a: ({n},), b: ({n},)"
353
+ elif op == "heaviside":
354
+ bm_size = f"x: ({n},), h=0.5"
355
+ elif op == "clip":
356
+ bm_size = f"x: ({n},), a_min=-1.0, a_max=1.0"
357
+ else:
358
+ bm_size = f"x: ({n},)"
359
+ mm = "ufunc_unary" if category == "counted_unary" else "ufunc_binary"
360
+ details[op] = {
361
+ "category": category,
362
+ "measurement_mode": mm,
363
+ "analytical_formula": "numel(output)",
364
+ "analytical_flops": n,
365
+ "benchmark_size": bm_size,
366
+ "bench_code": bench,
367
+ "repeats": repeats,
368
+ "perf_instructions_total": dist_raw_totals,
369
+ "distribution_alphas": dist_values,
370
+ }
371
+
372
+ return results, details
@@ -0,0 +1,193 @@
1
+ """Benchmark polynomial operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import statistics
6
+
7
+ from benchmarks._perf import measure_flops
8
+
9
+ POLYNOMIAL_OPS: list[str] = [
10
+ "polyval",
11
+ "polyfit",
12
+ "polyadd",
13
+ "polysub",
14
+ "polymul",
15
+ "polydiv",
16
+ "polyder",
17
+ "polyint",
18
+ "poly",
19
+ "roots",
20
+ ]
21
+
22
+ _FORMULA_STRINGS: dict[str, str] = {
23
+ "polyval": "2 * n * degree (FMA=2)",
24
+ "polyfit": "2 * n * (degree+1)^2",
25
+ "roots": "degree^3",
26
+ "polymul": "(degree+1)^2",
27
+ "polydiv": "(degree+1)^2",
28
+ "polyadd": "degree + 1",
29
+ "polysub": "degree + 1",
30
+ "polyder": "degree + 1",
31
+ "polyint": "degree + 1",
32
+ "poly": "degree^2",
33
+ }
34
+
35
+
36
+ def _analytical_cost(op: str, n: int, degree: int) -> int:
37
+ """Return the analytical FLOP cost for a polynomial operation.
38
+
39
+ These formulas match flopscope's runtime cost model so that the
40
+ benchmark denominator and the budget deduction use the same formula.
41
+ """
42
+ if op == "polyval":
43
+ return (
44
+ 2 * n * degree
45
+ ) # Updated for FMA=2 unification (spec 2026-05-20): polyval formula doubled m*deg → 2*m*deg.
46
+ elif op == "polyfit":
47
+ return 2 * n * (degree + 1) ** 2
48
+ elif op == "roots":
49
+ return degree**3
50
+ elif op in ("polymul", "polydiv"):
51
+ return (degree + 1) ** 2
52
+ elif op in ("polyadd", "polysub"):
53
+ return degree + 1
54
+ elif op in ("polyder", "polyint"):
55
+ return degree + 1 # runtime charges len(c) = degree + 1
56
+ elif op == "poly":
57
+ return degree**2
58
+ else:
59
+ raise ValueError(f"Unknown polynomial op: {op!r}")
60
+
61
+
62
+ def benchmark_polynomial(
63
+ n: int = 1_000_000,
64
+ dtype: str = "float64",
65
+ repeats: int = 10,
66
+ degree: int = 100,
67
+ ) -> tuple[dict[str, float], dict[str, dict]]:
68
+ """Benchmark polynomial ops, returning raw measurement per element.
69
+
70
+ Each op is normalized by its analytical FLOP cost from
71
+ ``_analytical_cost(op, n, degree)`` so the returned value
72
+ represents raw perf-counter FLOPs per analytical FLOP.
73
+
74
+ Parameters
75
+ ----------
76
+ n : int
77
+ Array size for polyval/polyfit.
78
+ dtype : str
79
+ NumPy dtype string.
80
+ repeats : int
81
+ Number of repetitions per measurement.
82
+ degree : int
83
+ Polynomial degree (higher = less overhead-dominated for coeff ops).
84
+
85
+ Returns
86
+ -------
87
+ tuple[dict[str, float], dict[str, dict]]
88
+ ``(alphas, details)`` where *alphas* maps op name to median alpha
89
+ and *details* maps op name to a dict of per-op measurement metadata.
90
+ """
91
+ results: dict[str, float] = {}
92
+ details: dict[str, dict] = {}
93
+
94
+ # 3 distributions with varying coefficient magnitudes
95
+ coeff_setups = [
96
+ f"c = rng.standard_normal({degree + 1}).astype(np.{dtype})",
97
+ f"c = (rng.standard_normal({degree + 1}) * 100).astype(np.{dtype})",
98
+ f"c = (rng.standard_normal({degree + 1}) * 0.01).astype(np.{dtype})",
99
+ ]
100
+
101
+ for op in POLYNOMIAL_OPS:
102
+ dist_values: list[float] = []
103
+ perf_instructions: list[int] = []
104
+
105
+ for ci, c_setup in enumerate(coeff_setups):
106
+ seed = 42 + ci
107
+ base_setup = (
108
+ f"import numpy as np; rng = np.random.default_rng({seed}); {c_setup}"
109
+ )
110
+
111
+ if op == "polyval":
112
+ setup = (
113
+ base_setup + f"; x = rng.standard_normal({n}).astype(np.{dtype})"
114
+ )
115
+ bench = "np.polyval(c, x)"
116
+ elif op == "polyfit":
117
+ setup = (
118
+ base_setup
119
+ + f"; x = np.linspace(-1, 1, {n}).astype(np.{dtype})"
120
+ + f"; y = np.polyval(c, x) + rng.standard_normal({n}).astype(np.{dtype}) * 0.01"
121
+ )
122
+ bench = f"np.polyfit(x, y, {degree})"
123
+ elif op == "poly":
124
+ setup = (
125
+ base_setup
126
+ + f"; r = rng.standard_normal({degree}).astype(np.{dtype})"
127
+ )
128
+ bench = "np.poly(r)"
129
+ elif op == "roots":
130
+ setup = base_setup
131
+ bench = "np.roots(c)"
132
+ elif op in ("polyadd", "polysub"):
133
+ setup = (
134
+ base_setup
135
+ + f"; d = rng.standard_normal({degree + 1}).astype(np.{dtype})"
136
+ )
137
+ bench = f"np.{op}(c, d)"
138
+ elif op in ("polymul", "polydiv"):
139
+ setup = (
140
+ base_setup
141
+ + f"; d = rng.standard_normal({degree + 1}).astype(np.{dtype})"
142
+ )
143
+ bench = f"np.{op}(c, d)"
144
+ elif op == "polyder":
145
+ setup = base_setup
146
+ bench = "np.polyder(c)"
147
+ elif op == "polyint":
148
+ setup = base_setup
149
+ bench = "np.polyint(c)"
150
+ else:
151
+ setup = base_setup
152
+ bench = f"np.{op}(c)"
153
+
154
+ try:
155
+ result = measure_flops(setup, bench, repeats=repeats)
156
+ except RuntimeError:
157
+ continue
158
+ analytical = _analytical_cost(op, n, degree)
159
+ perf_instructions.append(result.total_flops)
160
+ dist_values.append(result.total_flops / (analytical * repeats))
161
+
162
+ if dist_values:
163
+ results[op] = statistics.median(dist_values)
164
+ # Build explicit benchmark_size per op
165
+ if op == "polyval":
166
+ bm_size = f"c: ({degree + 1},), x: ({n},)"
167
+ elif op == "polyfit":
168
+ bm_size = f"x: ({n},), y: ({n},), degree={degree}"
169
+ elif op in ("polymul", "polydiv"):
170
+ bm_size = f"c: ({degree + 1},), d: ({degree + 1},)"
171
+ elif op in ("polyadd", "polysub"):
172
+ bm_size = f"c: ({degree + 1},), d: ({degree + 1},)"
173
+ elif op in ("polyder", "polyint"):
174
+ bm_size = f"c: ({degree + 1},)"
175
+ elif op == "poly":
176
+ bm_size = f"r: ({degree},)"
177
+ elif op == "roots":
178
+ bm_size = f"c: ({degree + 1},)"
179
+ else:
180
+ bm_size = f"n={n}, degree={degree}"
181
+ details[op] = {
182
+ "category": "counted_custom",
183
+ "measurement_mode": "custom",
184
+ "analytical_formula": _FORMULA_STRINGS.get(op, "n"),
185
+ "analytical_flops": analytical,
186
+ "benchmark_size": bm_size,
187
+ "bench_code": bench,
188
+ "repeats": repeats,
189
+ "perf_instructions_total": perf_instructions,
190
+ "distribution_alphas": dist_values,
191
+ }
192
+
193
+ return results, details