flopscope 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. benchmarks/__init__.py +1 -0
  2. benchmarks/__main__.py +6 -0
  3. benchmarks/_baseline.py +171 -0
  4. benchmarks/_bitwise.py +231 -0
  5. benchmarks/_complex.py +176 -0
  6. benchmarks/_contractions.py +291 -0
  7. benchmarks/_fft.py +198 -0
  8. benchmarks/_impl_urls.py +139 -0
  9. benchmarks/_linalg.py +197 -0
  10. benchmarks/_linalg_delegates.py +407 -0
  11. benchmarks/_metadata.py +141 -0
  12. benchmarks/_misc.py +653 -0
  13. benchmarks/_perf.py +321 -0
  14. benchmarks/_perm_group_calibration.py +175 -0
  15. benchmarks/_pointwise.py +372 -0
  16. benchmarks/_polynomial.py +193 -0
  17. benchmarks/_random.py +209 -0
  18. benchmarks/_reductions.py +136 -0
  19. benchmarks/_sorting.py +289 -0
  20. benchmarks/_stats.py +137 -0
  21. benchmarks/_window.py +92 -0
  22. benchmarks/accumulation/__init__.py +0 -0
  23. benchmarks/accumulation/bench_cost_compute.py +138 -0
  24. benchmarks/dashboard.py +312 -0
  25. benchmarks/runner.py +636 -0
  26. flopscope/__init__.py +273 -0
  27. flopscope/_accumulation/__init__.py +13 -0
  28. flopscope/_accumulation/_bipartite.py +121 -0
  29. flopscope/_accumulation/_burnside.py +51 -0
  30. flopscope/_accumulation/_cache.py +146 -0
  31. flopscope/_accumulation/_components.py +153 -0
  32. flopscope/_accumulation/_cost.py +1414 -0
  33. flopscope/_accumulation/_cost_descriptions.py +63 -0
  34. flopscope/_accumulation/_detection.py +318 -0
  35. flopscope/_accumulation/_ladder.py +191 -0
  36. flopscope/_accumulation/_output_orbit.py +104 -0
  37. flopscope/_accumulation/_partition.py +290 -0
  38. flopscope/_accumulation/_path_info.py +211 -0
  39. flopscope/_accumulation/_public.py +169 -0
  40. flopscope/_accumulation/_reduction.py +310 -0
  41. flopscope/_accumulation/_regimes.py +303 -0
  42. flopscope/_accumulation/_shape.py +33 -0
  43. flopscope/_accumulation/_wreath.py +209 -0
  44. flopscope/_budget.py +1027 -0
  45. flopscope/_config.py +118 -0
  46. flopscope/_counting_ops.py +451 -0
  47. flopscope/_display.py +478 -0
  48. flopscope/_docstrings.py +59 -0
  49. flopscope/_dtypes.py +20 -0
  50. flopscope/_einsum.py +717 -0
  51. flopscope/_errstate.py +25 -0
  52. flopscope/_flops.py +282 -0
  53. flopscope/_free_ops.py +2654 -0
  54. flopscope/_ndarray.py +1126 -0
  55. flopscope/_opt_einsum/LICENSE +21 -0
  56. flopscope/_opt_einsum/NOTICE +59 -0
  57. flopscope/_opt_einsum/__init__.py +209 -0
  58. flopscope/_opt_einsum/_contract.py +1478 -0
  59. flopscope/_opt_einsum/_helpers.py +164 -0
  60. flopscope/_opt_einsum/_hsluv.py +273 -0
  61. flopscope/_opt_einsum/_path_random.py +462 -0
  62. flopscope/_opt_einsum/_paths.py +1653 -0
  63. flopscope/_opt_einsum/_subgraph_symmetry.py +544 -0
  64. flopscope/_opt_einsum/_symmetry.py +140 -0
  65. flopscope/_opt_einsum/_typing.py +37 -0
  66. flopscope/_perm_group.py +717 -0
  67. flopscope/_pointwise.py +2522 -0
  68. flopscope/_polynomial.py +278 -0
  69. flopscope/_registry.py +3216 -0
  70. flopscope/_sorting_ops.py +571 -0
  71. flopscope/_symmetric.py +812 -0
  72. flopscope/_symmetry_transport.py +510 -0
  73. flopscope/_symmetry_utils.py +669 -0
  74. flopscope/_type_info.py +12 -0
  75. flopscope/_unwrap.py +70 -0
  76. flopscope/_validation.py +83 -0
  77. flopscope/_version_check.py +46 -0
  78. flopscope/_weights.py +195 -0
  79. flopscope/_window.py +177 -0
  80. flopscope/accounting.py +565 -0
  81. flopscope/data/default_weights.json +462 -0
  82. flopscope/data/weights.csv +509 -0
  83. flopscope/errors.py +197 -0
  84. flopscope/numpy/__init__.py +878 -0
  85. flopscope/numpy/fft/__init__.py +55 -0
  86. flopscope/numpy/fft/_free.py +51 -0
  87. flopscope/numpy/fft/_transforms.py +695 -0
  88. flopscope/numpy/linalg/__init__.py +105 -0
  89. flopscope/numpy/linalg/_aliases.py +126 -0
  90. flopscope/numpy/linalg/_compound.py +161 -0
  91. flopscope/numpy/linalg/_decompositions.py +353 -0
  92. flopscope/numpy/linalg/_properties.py +533 -0
  93. flopscope/numpy/linalg/_solvers.py +444 -0
  94. flopscope/numpy/linalg/_svd.py +122 -0
  95. flopscope/numpy/random/__init__.py +684 -0
  96. flopscope/numpy/random/_cost_formulas.py +115 -0
  97. flopscope/numpy/random/_counted_classes.py +241 -0
  98. flopscope/numpy/testing/__init__.py +13 -0
  99. flopscope/numpy/typing/__init__.py +30 -0
  100. flopscope/py.typed +0 -0
  101. flopscope/stats/__init__.py +84 -0
  102. flopscope/stats/_base.py +77 -0
  103. flopscope/stats/_cauchy.py +146 -0
  104. flopscope/stats/_erf.py +190 -0
  105. flopscope/stats/_expon.py +146 -0
  106. flopscope/stats/_laplace.py +150 -0
  107. flopscope/stats/_logistic.py +148 -0
  108. flopscope/stats/_lognorm.py +160 -0
  109. flopscope/stats/_ndtri.py +133 -0
  110. flopscope/stats/_norm.py +149 -0
  111. flopscope/stats/_truncnorm.py +186 -0
  112. flopscope/stats/_uniform.py +141 -0
  113. flopscope-0.2.0.dist-info/METADATA +23 -0
  114. flopscope-0.2.0.dist-info/RECORD +115 -0
  115. flopscope-0.2.0.dist-info/WHEEL +4 -0
benchmarks/_misc.py ADDED
@@ -0,0 +1,653 @@
1
+ """Benchmark miscellaneous custom-formula operations.
2
+
3
+ Each operation has its own analytical cost formula and benchmark setup.
4
+ Categories covered: element-wise comparison/conversion, differencing,
5
+ convolution/correlation, statistical, binning/histogram, interpolation,
6
+ and linear/generation ops.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import math
12
+ import statistics
13
+
14
+ from benchmarks._perf import measure_flops
15
+
16
+ MISC_OPS: list[str] = [
17
+ # Element-wise comparison/conversion
18
+ "allclose",
19
+ "array_equal",
20
+ "array_equiv",
21
+ "clip",
22
+ # Differencing
23
+ "diff",
24
+ "ediff1d",
25
+ "gradient",
26
+ "unwrap",
27
+ # Convolution/correlation
28
+ "convolve",
29
+ "correlate",
30
+ # Statistical
31
+ "corrcoef",
32
+ "cov",
33
+ "cross",
34
+ # Binning/histogram
35
+ "histogram",
36
+ "histogram2d",
37
+ "histogramdd",
38
+ "histogram_bin_edges",
39
+ "digitize",
40
+ "bincount",
41
+ # Interpolation
42
+ "interp",
43
+ # Linear/generation
44
+ "trace",
45
+ "trapezoid",
46
+ "logspace",
47
+ "geomspace",
48
+ "vander",
49
+ ]
50
+
51
+
52
+ _FORMULA_STRINGS: dict[str, str] = {
53
+ "allclose": "n",
54
+ "array_equal": "n",
55
+ "array_equiv": "n",
56
+ "clip": "n",
57
+ "diff": "n",
58
+ "ediff1d": "n",
59
+ "gradient": "n",
60
+ "unwrap": "7 * n",
61
+ "convolve": "2*n*k - n - k",
62
+ "correlate": "2*n*k - n - k",
63
+ "corrcoef": "f^2 * s",
64
+ "cov": "f^2 * s",
65
+ "cross": "15 * n",
66
+ "histogram": "n * ceil(log2(bins))",
67
+ "histogram2d": "n * 2 * ceil(log2(bins))",
68
+ "histogramdd": "n * ndim * ceil(log2(bins))",
69
+ "histogram_bin_edges": "n",
70
+ "digitize": "n * ceil(log2(bins))",
71
+ "bincount": "n",
72
+ "interp": "n * ceil(log2(xp))",
73
+ "trace": "min(m, n)",
74
+ "trapezoid": "n",
75
+ "logspace": "n",
76
+ "geomspace": "n",
77
+ "vander": "n * (degree - 1)",
78
+ }
79
+
80
+
81
+ def _analytical_cost(op: str, **kwargs: int) -> int:
82
+ """Return the analytical FLOP count for the benchmark configuration.
83
+
84
+ Parameters
85
+ ----------
86
+ op : str
87
+ Operation name.
88
+ **kwargs
89
+ Operation-specific size parameters: ``n``, ``k`` (kernel size),
90
+ ``f`` (features), ``s`` (samples), ``bins``, ``xp`` (interp knots),
91
+ ``degree``.
92
+
93
+ Returns
94
+ -------
95
+ int
96
+ Analytical FLOP count for the benchmark configuration.
97
+ """
98
+ n = kwargs.get("n", 10_000_000)
99
+
100
+ # --- Element-wise comparison/conversion (cost = n) ---
101
+ if op in ("allclose", "array_equal", "array_equiv", "clip"):
102
+ return n
103
+
104
+ # --- Differencing (cost = n) ---
105
+ if op in ("diff", "ediff1d", "gradient"):
106
+ return n
107
+
108
+ # --- Phase unwrapping (~7 ufunc passes, issue #69) ---
109
+ if op == "unwrap":
110
+ return 7 * n
111
+
112
+ # --- Convolution/correlation (cost = 2*n*k - n - k, FMA=1, issue #69) ---
113
+ if op in ("convolve", "correlate"):
114
+ k = kwargs.get("k", 1000)
115
+ return max(2 * n * k - n - k, 1)
116
+
117
+ # --- Statistical ---
118
+ if op in ("corrcoef", "cov"):
119
+ f = kwargs.get("f", 1000)
120
+ s = kwargs.get("s", 10000)
121
+ return f * f * s
122
+
123
+ if op == "cross":
124
+ # 5 ops per output element; benchmark shape (n, 3) → a.size = n*3 → 15*n
125
+ return 15 * n
126
+
127
+ # --- Binning/histogram ---
128
+ if op == "histogram":
129
+ bins = kwargs.get("bins", 100)
130
+ return n * math.ceil(math.log2(bins))
131
+
132
+ if op == "histogram2d":
133
+ bins = kwargs.get("bins", 100)
134
+ return n * (math.ceil(math.log2(bins)) + math.ceil(math.log2(bins)))
135
+
136
+ if op == "histogramdd":
137
+ bins = kwargs.get("bins", 50)
138
+ ndim = kwargs.get("ndim", 3)
139
+ return n * ndim * math.ceil(math.log2(bins))
140
+
141
+ if op == "histogram_bin_edges":
142
+ return n
143
+
144
+ if op == "digitize":
145
+ bins = kwargs.get("bins", 100)
146
+ return n * math.ceil(math.log2(bins))
147
+
148
+ if op == "bincount":
149
+ return n
150
+
151
+ # --- Interpolation ---
152
+ if op == "interp":
153
+ xp = kwargs.get("xp", 10000)
154
+ return n * math.ceil(math.log2(xp))
155
+
156
+ # --- Linear/generation ---
157
+ if op == "trace":
158
+ return n # cost = min(m, n); for square 1000x1000, n=1000
159
+
160
+ if op == "trapezoid":
161
+ return n
162
+
163
+ if op in ("logspace", "geomspace"):
164
+ return n
165
+
166
+ if op == "vander":
167
+ degree = kwargs.get("degree", 100)
168
+ return n * (degree - 1)
169
+
170
+ # Fallback
171
+ return n
172
+
173
+
174
+ def _get_op_config(op: str, dtype: str) -> dict:
175
+ """Return setups, bench code, analytical cost, and notes for an op.
176
+
177
+ Returns
178
+ -------
179
+ dict with keys: ``setups`` (list[str]), ``bench`` (str),
180
+ ``analytical`` (int).
181
+ """
182
+ n_large = 10_000_000
183
+ rng_seed_a = 42
184
+ rng_seed_b = 43
185
+
186
+ def _three_setups_1arr(n: int, extra: str = "") -> list[str]:
187
+ """Three distributions for a single array x."""
188
+ base = f"import numpy as np; rng = np.random.default_rng({rng_seed_a})"
189
+ return [
190
+ f"{base}; x = rng.standard_normal({n}).astype(np.{dtype}){extra}",
191
+ f"{base}; x = rng.uniform(0.01, 100, size={n}).astype(np.{dtype}){extra}",
192
+ f"{base}; x = rng.uniform(-1000, 1000, size={n}).astype(np.{dtype}){extra}",
193
+ ]
194
+
195
+ def _three_setups_2arr(n: int, extra: str = "") -> list[str]:
196
+ """Three distributions for two arrays a, b."""
197
+ base = (
198
+ f"import numpy as np; "
199
+ f"rng_a = np.random.default_rng({rng_seed_a}); "
200
+ f"rng_b = np.random.default_rng({rng_seed_b})"
201
+ )
202
+ return [
203
+ (
204
+ f"{base}; "
205
+ f"a = rng_a.standard_normal({n}).astype(np.{dtype}); "
206
+ f"b = rng_b.standard_normal({n}).astype(np.{dtype}){extra}"
207
+ ),
208
+ (
209
+ f"{base}; "
210
+ f"a = rng_a.uniform(0.01, 100, size={n}).astype(np.{dtype}); "
211
+ f"b = rng_b.uniform(0.01, 100, size={n}).astype(np.{dtype}){extra}"
212
+ ),
213
+ (
214
+ f"{base}; "
215
+ f"a = rng_a.uniform(-1000, 1000, size={n}).astype(np.{dtype}); "
216
+ f"b = rng_b.uniform(-1000, 1000, size={n}).astype(np.{dtype}){extra}"
217
+ ),
218
+ ]
219
+
220
+ # --- Element-wise comparison/conversion ---
221
+ if op == "allclose":
222
+ # Two similar arrays (b = a + small noise) so allclose scans all.
223
+ base = f"import numpy as np; rng = np.random.default_rng({rng_seed_a})"
224
+ setups = [
225
+ (
226
+ f"{base}; a = rng.standard_normal({n_large}).astype(np.{dtype}); "
227
+ f"b = a + rng.standard_normal({n_large}).astype(np.{dtype}) * 1e-10"
228
+ ),
229
+ (
230
+ f"{base}; a = rng.uniform(0.01, 100, size={n_large}).astype(np.{dtype}); "
231
+ f"b = a + rng.uniform(-1e-10, 1e-10, size={n_large}).astype(np.{dtype})"
232
+ ),
233
+ (
234
+ f"{base}; a = rng.uniform(-1000, 1000, size={n_large}).astype(np.{dtype}); "
235
+ f"b = a + rng.uniform(-1e-10, 1e-10, size={n_large}).astype(np.{dtype})"
236
+ ),
237
+ ]
238
+ return {
239
+ "setups": setups,
240
+ "bench": "np.allclose(a, b)",
241
+ "analytical": _analytical_cost(op, n=n_large),
242
+ }
243
+
244
+ if op in ("array_equal", "array_equiv"):
245
+ setups = _three_setups_2arr(n_large)
246
+ return {
247
+ "setups": setups,
248
+ "bench": f"np.{op}(a, b)",
249
+ "analytical": _analytical_cost(op, n=n_large),
250
+ }
251
+
252
+ if op == "clip":
253
+ setups = _three_setups_1arr(n_large)
254
+ return {
255
+ "setups": setups,
256
+ "bench": "np.clip(x, -1.0, 1.0)",
257
+ "analytical": _analytical_cost(op, n=n_large),
258
+ }
259
+
260
+ # --- Differencing ---
261
+ if op in ("diff", "ediff1d", "gradient", "unwrap"):
262
+ setups = _three_setups_1arr(n_large)
263
+ return {
264
+ "setups": setups,
265
+ "bench": f"np.{op}(x)",
266
+ "analytical": _analytical_cost(op, n=n_large),
267
+ }
268
+
269
+ # --- Convolution/correlation ---
270
+ if op in ("convolve", "correlate"):
271
+ n_conv = 100_000
272
+ k_conv = 1000
273
+ base = f"import numpy as np; rng = np.random.default_rng({rng_seed_a})"
274
+ setups = [
275
+ (
276
+ f"{base}; "
277
+ f"x = rng.standard_normal({n_conv}).astype(np.{dtype}); "
278
+ f"k = rng.standard_normal({k_conv}).astype(np.{dtype})"
279
+ ),
280
+ (
281
+ f"{base}; "
282
+ f"x = rng.uniform(0.01, 100, size={n_conv}).astype(np.{dtype}); "
283
+ f"k = rng.uniform(0.01, 100, size={k_conv}).astype(np.{dtype})"
284
+ ),
285
+ (
286
+ f"{base}; "
287
+ f"x = rng.uniform(-1000, 1000, size={n_conv}).astype(np.{dtype}); "
288
+ f"k = rng.uniform(-1000, 1000, size={k_conv}).astype(np.{dtype})"
289
+ ),
290
+ ]
291
+ return {
292
+ "setups": setups,
293
+ "bench": f"np.{op}(x, k, mode='full')",
294
+ "analytical": _analytical_cost(op, n=n_conv, k=k_conv),
295
+ }
296
+
297
+ # --- Statistical ---
298
+ if op in ("corrcoef", "cov"):
299
+ f_feat = 1000
300
+ s_samp = 10000
301
+ base = f"import numpy as np; rng = np.random.default_rng({rng_seed_a})"
302
+ setups = [
303
+ f"{base}; x = rng.standard_normal(({f_feat}, {s_samp})).astype(np.{dtype})",
304
+ f"{base}; x = rng.uniform(0.01, 100, size=({f_feat}, {s_samp})).astype(np.{dtype})",
305
+ f"{base}; x = rng.uniform(-1000, 1000, size=({f_feat}, {s_samp})).astype(np.{dtype})",
306
+ ]
307
+ return {
308
+ "setups": setups,
309
+ "bench": f"np.{op}(x)",
310
+ "analytical": _analytical_cost(op, f=f_feat, s=s_samp),
311
+ }
312
+
313
+ if op == "cross":
314
+ n_cross = 1_000_000
315
+ base_a = f"import numpy as np; rng_a = np.random.default_rng({rng_seed_a}); rng_b = np.random.default_rng({rng_seed_b})"
316
+ setups = [
317
+ (
318
+ f"{base_a}; "
319
+ f"a = rng_a.standard_normal(({n_cross}, 3)).astype(np.{dtype}); "
320
+ f"b = rng_b.standard_normal(({n_cross}, 3)).astype(np.{dtype})"
321
+ ),
322
+ (
323
+ f"{base_a}; "
324
+ f"a = rng_a.uniform(0.01, 100, size=({n_cross}, 3)).astype(np.{dtype}); "
325
+ f"b = rng_b.uniform(0.01, 100, size=({n_cross}, 3)).astype(np.{dtype})"
326
+ ),
327
+ (
328
+ f"{base_a}; "
329
+ f"a = rng_a.uniform(-1000, 1000, size=({n_cross}, 3)).astype(np.{dtype}); "
330
+ f"b = rng_b.uniform(-1000, 1000, size=({n_cross}, 3)).astype(np.{dtype})"
331
+ ),
332
+ ]
333
+ return {
334
+ "setups": setups,
335
+ "bench": "np.cross(a, b)",
336
+ "analytical": _analytical_cost(op, n=n_cross),
337
+ }
338
+
339
+ # --- Binning/histogram ---
340
+ if op == "histogram":
341
+ setups = _three_setups_1arr(n_large)
342
+ return {
343
+ "setups": setups,
344
+ "bench": "np.histogram(x, bins=100)",
345
+ "analytical": _analytical_cost(op, n=n_large, bins=100),
346
+ }
347
+
348
+ if op == "histogram2d":
349
+ setups = _three_setups_2arr(n_large, extra="")
350
+ # Rename for histogram2d: need x and y
351
+ setups = [
352
+ s.replace("; a = ", "; x = ").replace("; b = ", "; y = ") for s in setups
353
+ ]
354
+ # Fix variable names in rng references
355
+ setups = [
356
+ s.replace("rng_a", "rng")
357
+ .replace("rng_b", "rng2")
358
+ .replace(
359
+ f"rng = np.random.default_rng({rng_seed_a}); "
360
+ f"rng2 = np.random.default_rng({rng_seed_b})",
361
+ f"rng = np.random.default_rng({rng_seed_a}); "
362
+ f"rng2 = np.random.default_rng({rng_seed_b})",
363
+ )
364
+ for s in setups
365
+ ]
366
+ return {
367
+ "setups": setups,
368
+ "bench": "np.histogram2d(x, y, bins=100)",
369
+ "analytical": _analytical_cost(op, n=n_large, bins=100),
370
+ }
371
+
372
+ if op == "histogramdd":
373
+ n_hdd = 1_000_000
374
+ base = f"import numpy as np; rng = np.random.default_rng({rng_seed_a})"
375
+ setups = [
376
+ f"{base}; x = rng.standard_normal(({n_hdd}, 3)).astype(np.{dtype})",
377
+ f"{base}; x = rng.uniform(0.01, 100, size=({n_hdd}, 3)).astype(np.{dtype})",
378
+ f"{base}; x = rng.uniform(-1000, 1000, size=({n_hdd}, 3)).astype(np.{dtype})",
379
+ ]
380
+ return {
381
+ "setups": setups,
382
+ "bench": "np.histogramdd(x, bins=50)",
383
+ "analytical": _analytical_cost(op, n=n_hdd, bins=50, ndim=3),
384
+ }
385
+
386
+ if op == "histogram_bin_edges":
387
+ setups = _three_setups_1arr(n_large)
388
+ return {
389
+ "setups": setups,
390
+ "bench": "np.histogram_bin_edges(x, bins=100)",
391
+ "analytical": _analytical_cost(op, n=n_large),
392
+ }
393
+
394
+ if op == "digitize":
395
+ base = f"import numpy as np; rng = np.random.default_rng({rng_seed_a})"
396
+ setups = [
397
+ f"{base}; x = rng.standard_normal({n_large}).astype(np.{dtype}); bins = np.linspace(-3, 3, 100)",
398
+ f"{base}; x = rng.uniform(0.01, 100, size={n_large}).astype(np.{dtype}); bins = np.linspace(0, 100, 100)",
399
+ f"{base}; x = rng.uniform(-1000, 1000, size={n_large}).astype(np.{dtype}); bins = np.linspace(-1000, 1000, 100)",
400
+ ]
401
+ return {
402
+ "setups": setups,
403
+ "bench": "np.digitize(x, bins)",
404
+ "analytical": _analytical_cost(op, n=n_large, bins=100),
405
+ }
406
+
407
+ if op == "bincount":
408
+ base = f"import numpy as np; rng = np.random.default_rng({rng_seed_a})"
409
+ setups = [
410
+ f"{base}; x = rng.integers(0, 1000, size={n_large})",
411
+ f"{base}; x = rng.integers(0, 100, size={n_large})",
412
+ f"{base}; x = rng.integers(0, 10000, size={n_large})",
413
+ ]
414
+ return {
415
+ "setups": setups,
416
+ "bench": "np.bincount(x)",
417
+ "analytical": _analytical_cost(op, n=n_large),
418
+ }
419
+
420
+ # --- Interpolation ---
421
+ if op == "interp":
422
+ n_interp = 10_000_000
423
+ xp_size = 10000
424
+ base = f"import numpy as np; rng = np.random.default_rng({rng_seed_a})"
425
+ setups = [
426
+ (
427
+ f"{base}; "
428
+ f"xp = np.sort(rng.standard_normal({xp_size})); "
429
+ f"fp = rng.standard_normal({xp_size}).astype(np.{dtype}); "
430
+ f"x = rng.standard_normal({n_interp}).astype(np.{dtype})"
431
+ ),
432
+ (
433
+ f"{base}; "
434
+ f"xp = np.sort(rng.uniform(0.01, 100, size={xp_size})); "
435
+ f"fp = rng.uniform(0.01, 100, size={xp_size}).astype(np.{dtype}); "
436
+ f"x = rng.uniform(0.01, 100, size={n_interp}).astype(np.{dtype})"
437
+ ),
438
+ (
439
+ f"{base}; "
440
+ f"xp = np.sort(rng.uniform(-1000, 1000, size={xp_size})); "
441
+ f"fp = rng.uniform(-1000, 1000, size={xp_size}).astype(np.{dtype}); "
442
+ f"x = rng.uniform(-1000, 1000, size={n_interp}).astype(np.{dtype})"
443
+ ),
444
+ ]
445
+ return {
446
+ "setups": setups,
447
+ "bench": "np.interp(x, xp, fp)",
448
+ "analytical": _analytical_cost(op, n=n_interp, xp=xp_size),
449
+ }
450
+
451
+ # --- Linear/generation ---
452
+ if op == "trace":
453
+ # Use 10000x10000 to ensure analytical cost (10000) dominates
454
+ # over subprocess overhead (~1000 FP instructions).
455
+ n_trace = 10000
456
+ base = f"import numpy as np; rng = np.random.default_rng({rng_seed_a})"
457
+ setups = [
458
+ f"{base}; A = rng.standard_normal(({n_trace}, {n_trace})).astype(np.{dtype})",
459
+ f"{base}; A = rng.uniform(0.01, 100, size=({n_trace}, {n_trace})).astype(np.{dtype})",
460
+ f"{base}; A = rng.uniform(-1000, 1000, size=({n_trace}, {n_trace})).astype(np.{dtype})",
461
+ ]
462
+ return {
463
+ "setups": setups,
464
+ "bench": "np.trace(A)",
465
+ "analytical": _analytical_cost(op, n=n_trace),
466
+ }
467
+
468
+ if op == "trapezoid":
469
+ setups = _three_setups_1arr(n_large)
470
+ # Handle NumPy version differences: trapezoid (>=1.25) vs trapz
471
+ # Use a simpler approach: detect at setup time
472
+ setups_with_compat = []
473
+ for s in setups:
474
+ setups_with_compat.append(
475
+ s + "; _trapfn = getattr(np, 'trapezoid', None) or np.trapz"
476
+ )
477
+ return {
478
+ "setups": setups_with_compat,
479
+ "bench": "_trapfn(x)",
480
+ "analytical": _analytical_cost(op, n=n_large),
481
+ }
482
+
483
+ if op == "logspace":
484
+ # logspace generates arrays, not element-wise on existing data.
485
+ # Use three different ranges as "distributions".
486
+ base = "import numpy as np"
487
+ setups = [
488
+ f"{base}",
489
+ f"{base}",
490
+ f"{base}",
491
+ ]
492
+ benches = [
493
+ f"np.logspace(0, 10, {n_large})",
494
+ f"np.logspace(-5, 5, {n_large})",
495
+ f"np.logspace(0, 100, {n_large})",
496
+ ]
497
+ # For generation ops with varying bench code, return special marker.
498
+ return {
499
+ "setups": setups,
500
+ "bench": benches, # list of bench codes (one per distribution)
501
+ "analytical": _analytical_cost(op, n=n_large),
502
+ }
503
+
504
+ if op == "geomspace":
505
+ base = "import numpy as np"
506
+ setups = [
507
+ f"{base}",
508
+ f"{base}",
509
+ f"{base}",
510
+ ]
511
+ benches = [
512
+ f"np.geomspace(1, 1000, {n_large})",
513
+ f"np.geomspace(0.001, 1000, {n_large})",
514
+ f"np.geomspace(1, 1e6, {n_large})",
515
+ ]
516
+ return {
517
+ "setups": setups,
518
+ "bench": benches,
519
+ "analytical": _analytical_cost(op, n=n_large),
520
+ }
521
+
522
+ if op == "vander":
523
+ n_vander = 10000
524
+ degree = 100
525
+ base = f"import numpy as np; rng = np.random.default_rng({rng_seed_a})"
526
+ setups = [
527
+ f"{base}; x = rng.standard_normal({n_vander}).astype(np.{dtype})",
528
+ f"{base}; x = rng.uniform(0.01, 100, size={n_vander}).astype(np.{dtype})",
529
+ f"{base}; x = rng.uniform(-1000, 1000, size={n_vander}).astype(np.{dtype})",
530
+ ]
531
+ return {
532
+ "setups": setups,
533
+ "bench": f"np.vander(x, {degree})",
534
+ "analytical": _analytical_cost(op, n=n_vander, degree=degree),
535
+ }
536
+
537
+ raise ValueError(f"Unknown misc op: {op!r}")
538
+
539
+
540
+ def _benchmark_size_str(op: str) -> str:
541
+ """Return a human-readable benchmark size string for an op."""
542
+ n_large = 10_000_000
543
+ if op in ("convolve", "correlate"):
544
+ return "x: (100000,), k: (1000,)"
545
+ if op in ("corrcoef", "cov"):
546
+ return "x: (1000,10000)"
547
+ if op == "cross":
548
+ return "a: (1000000,3), b: (1000000,3)"
549
+ if op == "histogramdd":
550
+ return "x: (1000000,3), bins=50"
551
+ if op == "histogram":
552
+ return f"x: ({n_large},), bins=100"
553
+ if op == "histogram2d":
554
+ return f"x: ({n_large},), y: ({n_large},), bins=100"
555
+ if op == "histogram_bin_edges":
556
+ return f"x: ({n_large},), bins=100"
557
+ if op == "digitize":
558
+ return f"x: ({n_large},), bins: (100,)"
559
+ if op == "bincount":
560
+ return f"x: ({n_large},)"
561
+ if op == "interp":
562
+ return f"x: ({n_large},), xp: (10000,), fp: (10000,)"
563
+ if op == "trace":
564
+ return "A: (10000,10000)"
565
+ if op == "vander":
566
+ return "x: (10000,), degree=100"
567
+ if op == "clip":
568
+ return f"x: ({n_large},), a_min=-1.0, a_max=1.0"
569
+ if op in ("allclose", "array_equal", "array_equiv"):
570
+ return f"a: ({n_large},), b: ({n_large},)"
571
+ if op in ("diff", "ediff1d", "gradient", "unwrap"):
572
+ return f"x: ({n_large},)"
573
+ if op == "trapezoid":
574
+ return f"x: ({n_large},)"
575
+ if op in ("logspace", "geomspace"):
576
+ return f"output: ({n_large},)"
577
+ # Default: most ops use n_large
578
+ return f"x: ({n_large},)"
579
+
580
+
581
+ def benchmark_misc(
582
+ dtype: str = "float64",
583
+ repeats: int = 10,
584
+ ) -> tuple[dict[str, float], dict[str, dict]]:
585
+ """Benchmark misc ops, returning alpha(op) = measured / analytical.
586
+
587
+ For each operation, run with 3 input distributions and take the
588
+ median ratio of measured FP work to analytical cost.
589
+
590
+ Parameters
591
+ ----------
592
+ dtype : str
593
+ NumPy dtype string (default ``"float64"``).
594
+ repeats : int
595
+ Number of repetitions per measurement.
596
+
597
+ Returns
598
+ -------
599
+ tuple[dict[str, float], dict[str, dict]]
600
+ A pair of (alphas, details). ``alphas`` maps op name to median
601
+ alpha(op). ``details`` maps op name to a dict of raw benchmark
602
+ metadata.
603
+ """
604
+ results: dict[str, float] = {}
605
+ details: dict[str, dict] = {}
606
+
607
+ for op in MISC_OPS:
608
+ try:
609
+ config = _get_op_config(op, dtype)
610
+ except ValueError:
611
+ continue
612
+
613
+ setups = config["setups"]
614
+ bench = config["bench"]
615
+ analytical = config["analytical"]
616
+
617
+ if analytical <= 0:
618
+ continue
619
+
620
+ dist_values: list[float] = []
621
+ dist_raw_totals: list[int] = []
622
+
623
+ for i, setup in enumerate(setups):
624
+ # bench can be a single string or a list (one per distribution)
625
+ if isinstance(bench, list):
626
+ bench_code = bench[i]
627
+ else:
628
+ bench_code = bench
629
+
630
+ try:
631
+ result = measure_flops(setup, bench_code, repeats=repeats)
632
+ except RuntimeError:
633
+ continue
634
+ dist_values.append(result.total_flops / (analytical * repeats))
635
+ dist_raw_totals.append(result.total_flops)
636
+
637
+ if dist_values:
638
+ results[op] = statistics.median(dist_values)
639
+ # For ops with varying bench code (list), store the first one
640
+ display_bench = bench[0] if isinstance(bench, list) else bench
641
+ details[op] = {
642
+ "category": "counted_custom",
643
+ "measurement_mode": "custom",
644
+ "analytical_formula": _FORMULA_STRINGS.get(op, "n"),
645
+ "analytical_flops": analytical,
646
+ "benchmark_size": _benchmark_size_str(op),
647
+ "bench_code": display_bench,
648
+ "repeats": repeats,
649
+ "perf_instructions_total": dist_raw_totals,
650
+ "distribution_alphas": dist_values,
651
+ }
652
+
653
+ return results, details