flopscope 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. benchmarks/__init__.py +1 -0
  2. benchmarks/__main__.py +6 -0
  3. benchmarks/_baseline.py +171 -0
  4. benchmarks/_bitwise.py +231 -0
  5. benchmarks/_complex.py +176 -0
  6. benchmarks/_contractions.py +291 -0
  7. benchmarks/_fft.py +198 -0
  8. benchmarks/_impl_urls.py +139 -0
  9. benchmarks/_linalg.py +197 -0
  10. benchmarks/_linalg_delegates.py +407 -0
  11. benchmarks/_metadata.py +141 -0
  12. benchmarks/_misc.py +653 -0
  13. benchmarks/_perf.py +321 -0
  14. benchmarks/_perm_group_calibration.py +175 -0
  15. benchmarks/_pointwise.py +372 -0
  16. benchmarks/_polynomial.py +193 -0
  17. benchmarks/_random.py +209 -0
  18. benchmarks/_reductions.py +136 -0
  19. benchmarks/_sorting.py +289 -0
  20. benchmarks/_stats.py +137 -0
  21. benchmarks/_window.py +92 -0
  22. benchmarks/accumulation/__init__.py +0 -0
  23. benchmarks/accumulation/bench_cost_compute.py +138 -0
  24. benchmarks/dashboard.py +312 -0
  25. benchmarks/runner.py +636 -0
  26. flopscope/__init__.py +273 -0
  27. flopscope/_accumulation/__init__.py +13 -0
  28. flopscope/_accumulation/_bipartite.py +121 -0
  29. flopscope/_accumulation/_burnside.py +51 -0
  30. flopscope/_accumulation/_cache.py +146 -0
  31. flopscope/_accumulation/_components.py +153 -0
  32. flopscope/_accumulation/_cost.py +1414 -0
  33. flopscope/_accumulation/_cost_descriptions.py +63 -0
  34. flopscope/_accumulation/_detection.py +318 -0
  35. flopscope/_accumulation/_ladder.py +191 -0
  36. flopscope/_accumulation/_output_orbit.py +104 -0
  37. flopscope/_accumulation/_partition.py +290 -0
  38. flopscope/_accumulation/_path_info.py +211 -0
  39. flopscope/_accumulation/_public.py +169 -0
  40. flopscope/_accumulation/_reduction.py +310 -0
  41. flopscope/_accumulation/_regimes.py +303 -0
  42. flopscope/_accumulation/_shape.py +33 -0
  43. flopscope/_accumulation/_wreath.py +209 -0
  44. flopscope/_budget.py +1027 -0
  45. flopscope/_config.py +118 -0
  46. flopscope/_counting_ops.py +451 -0
  47. flopscope/_display.py +478 -0
  48. flopscope/_docstrings.py +59 -0
  49. flopscope/_dtypes.py +20 -0
  50. flopscope/_einsum.py +717 -0
  51. flopscope/_errstate.py +25 -0
  52. flopscope/_flops.py +282 -0
  53. flopscope/_free_ops.py +2654 -0
  54. flopscope/_ndarray.py +1126 -0
  55. flopscope/_opt_einsum/LICENSE +21 -0
  56. flopscope/_opt_einsum/NOTICE +59 -0
  57. flopscope/_opt_einsum/__init__.py +209 -0
  58. flopscope/_opt_einsum/_contract.py +1478 -0
  59. flopscope/_opt_einsum/_helpers.py +164 -0
  60. flopscope/_opt_einsum/_hsluv.py +273 -0
  61. flopscope/_opt_einsum/_path_random.py +462 -0
  62. flopscope/_opt_einsum/_paths.py +1653 -0
  63. flopscope/_opt_einsum/_subgraph_symmetry.py +544 -0
  64. flopscope/_opt_einsum/_symmetry.py +140 -0
  65. flopscope/_opt_einsum/_typing.py +37 -0
  66. flopscope/_perm_group.py +717 -0
  67. flopscope/_pointwise.py +2522 -0
  68. flopscope/_polynomial.py +278 -0
  69. flopscope/_registry.py +3216 -0
  70. flopscope/_sorting_ops.py +571 -0
  71. flopscope/_symmetric.py +812 -0
  72. flopscope/_symmetry_transport.py +510 -0
  73. flopscope/_symmetry_utils.py +669 -0
  74. flopscope/_type_info.py +12 -0
  75. flopscope/_unwrap.py +70 -0
  76. flopscope/_validation.py +83 -0
  77. flopscope/_version_check.py +46 -0
  78. flopscope/_weights.py +195 -0
  79. flopscope/_window.py +177 -0
  80. flopscope/accounting.py +565 -0
  81. flopscope/data/default_weights.json +462 -0
  82. flopscope/data/weights.csv +509 -0
  83. flopscope/errors.py +197 -0
  84. flopscope/numpy/__init__.py +878 -0
  85. flopscope/numpy/fft/__init__.py +55 -0
  86. flopscope/numpy/fft/_free.py +51 -0
  87. flopscope/numpy/fft/_transforms.py +695 -0
  88. flopscope/numpy/linalg/__init__.py +105 -0
  89. flopscope/numpy/linalg/_aliases.py +126 -0
  90. flopscope/numpy/linalg/_compound.py +161 -0
  91. flopscope/numpy/linalg/_decompositions.py +353 -0
  92. flopscope/numpy/linalg/_properties.py +533 -0
  93. flopscope/numpy/linalg/_solvers.py +444 -0
  94. flopscope/numpy/linalg/_svd.py +122 -0
  95. flopscope/numpy/random/__init__.py +684 -0
  96. flopscope/numpy/random/_cost_formulas.py +115 -0
  97. flopscope/numpy/random/_counted_classes.py +241 -0
  98. flopscope/numpy/testing/__init__.py +13 -0
  99. flopscope/numpy/typing/__init__.py +30 -0
  100. flopscope/py.typed +0 -0
  101. flopscope/stats/__init__.py +84 -0
  102. flopscope/stats/_base.py +77 -0
  103. flopscope/stats/_cauchy.py +146 -0
  104. flopscope/stats/_erf.py +190 -0
  105. flopscope/stats/_expon.py +146 -0
  106. flopscope/stats/_laplace.py +150 -0
  107. flopscope/stats/_logistic.py +148 -0
  108. flopscope/stats/_lognorm.py +160 -0
  109. flopscope/stats/_ndtri.py +133 -0
  110. flopscope/stats/_norm.py +149 -0
  111. flopscope/stats/_truncnorm.py +186 -0
  112. flopscope/stats/_uniform.py +141 -0
  113. flopscope-0.2.0.dist-info/METADATA +23 -0
  114. flopscope-0.2.0.dist-info/RECORD +115 -0
  115. flopscope-0.2.0.dist-info/WHEEL +4 -0
benchmarks/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Benchmark suite for measuring empirical FLOP weights."""
benchmarks/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ # benchmarks/__main__.py
2
+ """Allow running benchmarks as: python -m benchmarks.runner"""
3
+
4
+ from benchmarks.runner import main
5
+
6
+ main()
@@ -0,0 +1,171 @@
1
+ """Baseline measurements for overhead-subtracted weight normalization.
2
+
3
+ Measures three baselines:
4
+
5
+ 1. **alpha(add)** — raw FP instructions per element for ``np.add`` (includes
6
+ ufunc overhead). Used to derive the binary ufunc overhead.
7
+ 2. **alpha(abs)** — raw FP instructions per element for ``np.abs``. Since abs
8
+ on float64 is a bitwise sign-bit clear (NOT an FP instruction), all
9
+ measured FP instructions are pure **unary ufunc overhead**.
10
+ 3. **Binary ufunc overhead** = ``alpha(add) - 1.0`` (since one add = exactly
11
+ one FP instruction; the rest is overhead).
12
+
13
+ The runner subtracts the appropriate overhead from each counted operation's
14
+ raw alpha before storing it as the weight::
15
+
16
+ weight(op) = max(alpha_raw(op) - overhead_for_category, 0.0)
17
+
18
+ Known analytical zero-FLOP operations are stored separately with
19
+ ``weight(op) = 0.0`` so the published artifacts surface them as free rather
20
+ than as unit-cost operations.
21
+
22
+ This replaces the old ``weight(op) = alpha(op) / alpha(add)`` formula which
23
+ penalized BLAS ops (that bypass the ufunc layer) with ufunc overhead they
24
+ don't have.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import statistics
30
+ from dataclasses import dataclass
31
+
32
+ from benchmarks._perf import measure_flops
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class BaselineResult:
37
+ """All baseline measurements needed for overhead-subtracted normalization."""
38
+
39
+ alpha_add: float
40
+ """Raw alpha for np.add (FP instructions per element, including overhead)."""
41
+
42
+ alpha_abs: float
43
+ """Raw alpha for np.abs (pure unary ufunc overhead — abs is bitwise)."""
44
+
45
+ @property
46
+ def overhead_ufunc_unary(self) -> float:
47
+ """Unary ufunc overhead per element (from abs measurement)."""
48
+ return self.alpha_abs
49
+
50
+ @property
51
+ def overhead_ufunc_binary(self) -> float:
52
+ """Binary ufunc overhead per element.
53
+
54
+ Derived as alpha(add) - 1.0, since one add = exactly 1 FP instruction.
55
+ """
56
+ return max(self.alpha_add - 1.0, 0.0)
57
+
58
+ @property
59
+ def overhead_ufunc_reduction(self) -> float:
60
+ """Reduction ufunc overhead (same iterator structure as unary)."""
61
+ return self.alpha_abs
62
+
63
+ def overhead_for_mode(self, mode: str) -> float:
64
+ """Return the overhead to subtract for a given measurement mode."""
65
+ return {
66
+ "ufunc_unary": self.overhead_ufunc_unary,
67
+ "ufunc_binary": self.overhead_ufunc_binary,
68
+ "ufunc_reduction": self.overhead_ufunc_reduction,
69
+ "blas": 0.0,
70
+ "linalg": 0.0,
71
+ "custom": 0.0,
72
+ "instructions": 0.0,
73
+ }.get(mode, 0.0)
74
+
75
+ def to_dict(self) -> dict:
76
+ """Serialize for weights.json metadata."""
77
+ return {
78
+ "alpha_add_raw": self.alpha_add,
79
+ "alpha_abs_raw": self.alpha_abs,
80
+ "overhead_ufunc_unary": self.overhead_ufunc_unary,
81
+ "overhead_ufunc_binary": self.overhead_ufunc_binary,
82
+ "overhead_ufunc_reduction": self.overhead_ufunc_reduction,
83
+ "normalization": (
84
+ "subtract per-category ufunc overhead; known zero-FLOP ops use "
85
+ "weight 0.0"
86
+ ),
87
+ }
88
+
89
+
90
+ def _measure_alpha(setups: list[str], bench: str, n: int, repeats: int) -> float:
91
+ """Measure median alpha across distributions."""
92
+ dist_alphas = []
93
+ for setup in setups:
94
+ result = measure_flops(setup, bench, repeats=repeats)
95
+ dist_alphas.append(result.total_flops / (n * repeats))
96
+ return statistics.median(dist_alphas)
97
+
98
+
99
+ def _unary_setups(n: int, dtype: str) -> list[str]:
100
+ return [
101
+ f"x = np.random.default_rng(42).standard_normal({n}).astype(np.{dtype})",
102
+ (
103
+ f"rng = np.random.default_rng(42); "
104
+ f"x = rng.uniform(0.01, 100, size={n}).astype(np.{dtype})"
105
+ ),
106
+ (
107
+ f"rng = np.random.default_rng(42); "
108
+ f"x = rng.uniform(-1000, 1000, size={n}).astype(np.{dtype})"
109
+ ),
110
+ ]
111
+
112
+
113
+ def _binary_setups(n: int, dtype: str) -> list[str]:
114
+ return [
115
+ (
116
+ f"x = np.random.default_rng(42).standard_normal({n}).astype(np.{dtype}); "
117
+ f"y = np.random.default_rng(43).standard_normal({n}).astype(np.{dtype}); "
118
+ f"_out = np.empty({n}, dtype=np.{dtype})"
119
+ ),
120
+ (
121
+ f"rng = np.random.default_rng(42); "
122
+ f"x = rng.uniform(0.01, 100, size={n}).astype(np.{dtype}); "
123
+ f"y = rng.uniform(0.01, 100, size={n}).astype(np.{dtype}); "
124
+ f"_out = np.empty({n}, dtype=np.{dtype})"
125
+ ),
126
+ (
127
+ f"rng = np.random.default_rng(42); "
128
+ f"x = rng.uniform(-1000, 1000, size={n}).astype(np.{dtype}); "
129
+ f"y = rng.uniform(-1000, 1000, size={n}).astype(np.{dtype}); "
130
+ f"_out = np.empty({n}, dtype=np.{dtype})"
131
+ ),
132
+ ]
133
+
134
+
135
+ def measure_baseline(
136
+ n: int = 10_000_000, dtype: str = "float64", repeats: int = 10
137
+ ) -> float:
138
+ """Return alpha(add) for backwards compatibility.
139
+
140
+ Prefer :func:`measure_baselines` which returns the full
141
+ :class:`BaselineResult` with overhead measurements.
142
+ """
143
+ return _measure_alpha(
144
+ _binary_setups(n, dtype), "np.add(x, y, out=_out)", n, repeats
145
+ )
146
+
147
+
148
+ def measure_baselines(
149
+ n: int = 10_000_000, dtype: str = "float64", repeats: int = 10
150
+ ) -> BaselineResult:
151
+ """Measure all baselines needed for overhead-subtracted normalization.
152
+
153
+ Returns
154
+ -------
155
+ BaselineResult
156
+ Contains alpha(add), alpha(abs), and derived overhead values.
157
+ """
158
+ alpha_add = _measure_alpha(
159
+ _binary_setups(n, dtype), "np.add(x, y, out=_out)", n, repeats
160
+ )
161
+ alpha_abs = _measure_alpha(_unary_setups(n, dtype), "np.abs(x)", n, repeats)
162
+
163
+ result = BaselineResult(alpha_add=alpha_add, alpha_abs=alpha_abs)
164
+ print(f" alpha(add) = {alpha_add:.4f}")
165
+ print(f" alpha(abs) = {alpha_abs:.4f} (pure unary ufunc overhead)")
166
+ print(" Derived overheads:")
167
+ print(f" ufunc_unary: {result.overhead_ufunc_unary:.4f}")
168
+ print(f" ufunc_binary: {result.overhead_ufunc_binary:.4f}")
169
+ print(f" ufunc_reduction: {result.overhead_ufunc_reduction:.4f}")
170
+ print(" blas/linalg: 0.0000")
171
+ return result
benchmarks/_bitwise.py ADDED
@@ -0,0 +1,231 @@
1
+ """Benchmark bitwise and integer operations via ``instructions`` counter.
2
+
3
+ These ops operate on integers, so ``fp_arith_inst_retired`` perf counters
4
+ read 0. We use ``perf stat -e instructions`` (total retired instructions)
5
+ as the hardware-counter fallback — more stable and deterministic than
6
+ wall-clock timing. Falls back to timing if ``perf`` is unavailable.
7
+
8
+ Also includes ``isnat`` which operates on datetime64 arrays.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import statistics
14
+
15
+ from benchmarks._perf import measure_instructions
16
+
17
+ # --- Operation lists -------------------------------------------------------
18
+
19
+ UNARY_OPS: list[str] = [
20
+ "bitwise_not",
21
+ "bitwise_invert",
22
+ "bitwise_count",
23
+ "invert",
24
+ ]
25
+
26
+ BINARY_OPS: list[str] = [
27
+ "bitwise_and",
28
+ "bitwise_or",
29
+ "bitwise_xor",
30
+ "gcd",
31
+ "lcm",
32
+ ]
33
+
34
+ SHIFT_OPS: list[str] = [
35
+ "bitwise_left_shift",
36
+ "bitwise_right_shift",
37
+ "left_shift",
38
+ "right_shift",
39
+ ]
40
+
41
+ SPECIAL_OPS: list[str] = [
42
+ "isnat",
43
+ ]
44
+
45
+ BITWISE_OPS: list[str] = UNARY_OPS + BINARY_OPS + SHIFT_OPS + SPECIAL_OPS
46
+
47
+ # --- Analytical formula strings (all cost = n) ----------------------------
48
+
49
+ _FORMULA_STRINGS: dict[str, str] = dict.fromkeys(BITWISE_OPS, "n")
50
+
51
+
52
+ def _analytical_cost(op: str, n: int) -> int: # noqa: ARG001
53
+ """Return analytical FLOP cost for *op* on arrays of length *n*."""
54
+ return n
55
+
56
+
57
+ # --- Setup helpers ----------------------------------------------------------
58
+
59
+
60
+ def _unary_setup(n: int, dist_idx: int) -> str:
61
+ """Build setup code for a unary integer op."""
62
+ seeds = [42, 123, 7]
63
+ seed = seeds[dist_idx]
64
+ return (
65
+ f"import numpy as np; "
66
+ f"x = np.random.default_rng({seed}).integers(-1_000_000, 1_000_000, "
67
+ f"size={n}, dtype=np.int64)"
68
+ )
69
+
70
+
71
+ def _binary_setup(n: int, dist_idx: int) -> str:
72
+ """Build setup code for a binary integer op."""
73
+ seeds = [42, 123, 7]
74
+ seed = seeds[dist_idx]
75
+ return (
76
+ f"import numpy as np; "
77
+ f"rng = np.random.default_rng({seed}); "
78
+ f"a = rng.integers(-1_000_000, 1_000_000, size={n}, dtype=np.int64); "
79
+ f"b = rng.integers(-1_000_000, 1_000_000, size={n}, dtype=np.int64)"
80
+ )
81
+
82
+
83
+ def _gcd_lcm_setup(n: int, dist_idx: int) -> str:
84
+ """Build setup code for gcd/lcm (positive integers)."""
85
+ seeds = [42, 123, 7]
86
+ seed = seeds[dist_idx]
87
+ return (
88
+ f"import numpy as np; "
89
+ f"rng = np.random.default_rng({seed}); "
90
+ f"a = rng.integers(1, 1_000_000, size={n}, dtype=np.int64); "
91
+ f"b = rng.integers(1, 1_000_000, size={n}, dtype=np.int64)"
92
+ )
93
+
94
+
95
+ def _shift_setup(n: int, dist_idx: int) -> str:
96
+ """Build setup code for shift ops (second operand 0-10)."""
97
+ seeds = [42, 123, 7]
98
+ seed = seeds[dist_idx]
99
+ return (
100
+ f"import numpy as np; "
101
+ f"rng = np.random.default_rng({seed}); "
102
+ f"a = rng.integers(-1_000_000, 1_000_000, size={n}, dtype=np.int64); "
103
+ f"b = rng.integers(0, 11, size={n}, dtype=np.int64)"
104
+ )
105
+
106
+
107
+ def _isnat_setup(n: int, dist_idx: int) -> str:
108
+ """Build setup code for isnat (datetime64 input with some NaTs)."""
109
+ seeds = [42, 123, 7]
110
+ seeds[dist_idx]
111
+ # Create datetime64 array with ~1/3 NaT values
112
+ return (
113
+ f"import numpy as np; "
114
+ f"x = np.array(['2020-01-01', 'NaT', '2020-06-15'] * ({n} // 3), "
115
+ f"dtype='datetime64')"
116
+ )
117
+
118
+
119
+ # --- Main benchmark function -----------------------------------------------
120
+
121
+
122
+ def benchmark_bitwise(
123
+ n: int = 10_000_000,
124
+ dtype: str = "int64",
125
+ repeats: int = 10,
126
+ ) -> tuple[dict[str, float], dict[str, dict]]:
127
+ """Benchmark bitwise/integer ops using timing mode only.
128
+
129
+ Parameters
130
+ ----------
131
+ n : int
132
+ Array size.
133
+ dtype : str
134
+ Ignored (always uses int64 for bitwise ops). Kept for interface
135
+ consistency with other benchmark modules.
136
+ repeats : int
137
+ Number of repetitions per measurement.
138
+
139
+ Returns
140
+ -------
141
+ tuple[dict[str, float], dict[str, dict]]
142
+ A pair of (alphas, details). ``alphas`` maps op name to median
143
+ timing per element (nanoseconds). ``details`` maps op name to a
144
+ dict of raw benchmark metadata.
145
+ """
146
+ distributions = 3
147
+ results: dict[str, float] = {}
148
+ details: dict[str, dict] = {}
149
+
150
+ def _bench_op(
151
+ op: str, setup_fn, bench_code: str, category: str, size_desc: str
152
+ ) -> None:
153
+ """Benchmark a single op across distributions using instructions counter."""
154
+ dist_values: list[float] = []
155
+ dist_raw_totals: list[int] = []
156
+ for di in range(distributions):
157
+ setup = setup_fn(n, di)
158
+ try:
159
+ result = measure_instructions(setup, bench_code, repeats=repeats)
160
+ except RuntimeError:
161
+ continue
162
+ dist_values.append(result.total_flops / (n * repeats))
163
+ dist_raw_totals.append(result.total_flops)
164
+ if dist_values:
165
+ results[op] = statistics.median(dist_values)
166
+ details[op] = {
167
+ "category": category,
168
+ "measurement_mode": "instructions",
169
+ "analytical_formula": _FORMULA_STRINGS[op],
170
+ "analytical_flops": n,
171
+ "benchmark_size": size_desc,
172
+ "bench_code": bench_code,
173
+ "repeats": repeats,
174
+ "perf_instructions_total": dist_raw_totals,
175
+ "distribution_alphas": dist_values,
176
+ }
177
+
178
+ # --- Unary ops ---
179
+ for op in UNARY_OPS:
180
+ _bench_op(op, _unary_setup, f"np.{op}(x)", "instructions_unary", f"x: ({n},)")
181
+
182
+ # --- Binary ops ---
183
+ for op in BINARY_OPS:
184
+ setup_fn = _gcd_lcm_setup if op in ("gcd", "lcm") else _binary_setup
185
+ _bench_op(
186
+ op,
187
+ setup_fn,
188
+ f"np.{op}(a, b)",
189
+ "instructions_binary",
190
+ f"a: ({n},), b: ({n},)",
191
+ )
192
+
193
+ # --- Shift ops ---
194
+ for op in SHIFT_OPS:
195
+ _bench_op(
196
+ op,
197
+ _shift_setup,
198
+ f"np.{op}(a, b)",
199
+ "instructions_shift",
200
+ f"a: ({n},), b: ({n},) (values 0-10)",
201
+ )
202
+
203
+ # --- Special ops ---
204
+ # isnat: operates on datetime64 arrays
205
+ op = "isnat"
206
+ dist_values: list[float] = []
207
+ dist_raw_totals: list[int] = []
208
+ bench = "np.isnat(x)"
209
+ for di in range(distributions):
210
+ setup = _isnat_setup(n, di)
211
+ try:
212
+ result = measure_instructions(setup, bench, repeats=repeats)
213
+ except RuntimeError:
214
+ continue
215
+ dist_values.append(result.total_flops / (n * repeats))
216
+ dist_raw_totals.append(result.total_flops)
217
+ if dist_values:
218
+ results[op] = statistics.median(dist_values)
219
+ details[op] = {
220
+ "category": "instructions_special",
221
+ "measurement_mode": "instructions",
222
+ "analytical_formula": _FORMULA_STRINGS[op],
223
+ "analytical_flops": n,
224
+ "benchmark_size": f"x: ({n},) datetime64 with NaTs",
225
+ "bench_code": bench,
226
+ "repeats": repeats,
227
+ "perf_instructions_total": dist_raw_totals,
228
+ "distribution_alphas": dist_values,
229
+ }
230
+
231
+ return results, details
benchmarks/_complex.py ADDED
@@ -0,0 +1,176 @@
1
+ """Benchmark complex-number operations.
2
+
3
+ Most ops use perf mode with complex128 input (they DO retire FP instructions
4
+ on complex data). Two type-check ops (``iscomplexobj``, ``isrealobj``) use
5
+ the ``instructions`` counter because they inspect the dtype, not the array
6
+ elements (so ``fp_arith_inst_retired`` reads 0).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import statistics
12
+
13
+ from benchmarks._perf import measure_flops, measure_instructions
14
+
15
+ COMPLEX_OPS: list[str] = [
16
+ "angle",
17
+ "conj",
18
+ "conjugate",
19
+ "imag",
20
+ "real",
21
+ "real_if_close",
22
+ "iscomplex",
23
+ "isreal",
24
+ "sort_complex",
25
+ "iscomplexobj",
26
+ "isrealobj",
27
+ ]
28
+
29
+ # Ops that use the ``instructions`` counter instead of ``fp_arith_inst_retired``
30
+ # because they inspect the dtype, not the array elements.
31
+ _INSTRUCTIONS_OPS: frozenset[str] = frozenset({"iscomplexobj", "isrealobj"})
32
+
33
+ _FORMULA_STRINGS: dict[str, str] = {
34
+ "angle": "numel(output)",
35
+ "conj": "numel(output)",
36
+ "conjugate": "numel(output)",
37
+ "imag": "numel(output)",
38
+ "real": "numel(output)",
39
+ "real_if_close": "numel(output)",
40
+ "iscomplex": "numel(output)",
41
+ "isreal": "numel(output)",
42
+ "sort_complex": "numel(output)",
43
+ "iscomplexobj": "numel(output)",
44
+ "isrealobj": "numel(output)",
45
+ }
46
+
47
+ # Seeds for the 3 input distributions.
48
+ _DIST_SEEDS: list[tuple[int, int]] = [
49
+ (42, 43),
50
+ (100, 101),
51
+ (200, 201),
52
+ ]
53
+
54
+
55
+ def _complex_setup(n: int, seed_real: int, seed_imag: int) -> str:
56
+ """Build setup code that creates a complex128 array from two RNGs."""
57
+ return (
58
+ f"import numpy as np; "
59
+ f"x = np.random.default_rng({seed_real}).standard_normal({n}).astype(np.float64) "
60
+ f"+ 1j * np.random.default_rng({seed_imag}).standard_normal({n}).astype(np.float64)"
61
+ )
62
+
63
+
64
+ def _real_if_close_setup(n: int, dist_idx: int) -> str:
65
+ """Build setup for ``real_if_close``.
66
+
67
+ Distribution 0 has negligible imaginary parts (tests the "close to real"
68
+ path). Distributions 1 and 2 have substantial imaginary parts.
69
+ """
70
+ if dist_idx == 0:
71
+ # Negligible imaginary part — real_if_close may strip it.
72
+ return (
73
+ f"import numpy as np; "
74
+ f"x = np.random.default_rng(42).standard_normal({n}).astype(np.float64) "
75
+ f"+ 1j * np.random.default_rng(43).standard_normal({n}).astype(np.float64) * 1e-15"
76
+ )
77
+ seed_r, seed_i = _DIST_SEEDS[dist_idx]
78
+ return _complex_setup(n, seed_r, seed_i)
79
+
80
+
81
+ def _timing_setup(n: int) -> str:
82
+ """Build setup for type-check ops (``iscomplexobj`` / ``isrealobj``)."""
83
+ return (
84
+ f"import numpy as np; "
85
+ f"x = np.random.default_rng(42).standard_normal({n}) "
86
+ f"+ 1j * np.random.default_rng(43).standard_normal({n})"
87
+ )
88
+
89
+
90
+ def _bench_code(op: str) -> str:
91
+ """Return the benchmark statement for *op*."""
92
+ return f"np.{op}(x)"
93
+
94
+
95
+ def benchmark_complex(
96
+ n: int = 10_000_000,
97
+ dtype: str = "complex128",
98
+ repeats: int = 10,
99
+ distributions: int = 3,
100
+ ) -> tuple[dict[str, float], dict[str, dict]]:
101
+ """Benchmark complex-number ops, returning raw measurement per element.
102
+
103
+ Parameters
104
+ ----------
105
+ n : int
106
+ Array size (element count).
107
+ dtype : str
108
+ NumPy dtype string (unused — always complex128, kept for API parity).
109
+ repeats : int
110
+ Number of repetitions per measurement.
111
+ distributions : int
112
+ Number of input distributions to measure (median is taken).
113
+
114
+ Returns
115
+ -------
116
+ tuple[dict[str, float], dict[str, dict]]
117
+ ``(alphas, details)`` — *alphas* maps op name to median measurement
118
+ per analytical FLOP; *details* maps op name to benchmark metadata.
119
+ """
120
+ results: dict[str, float] = {}
121
+ details: dict[str, dict] = {}
122
+
123
+ for op in COMPLEX_OPS:
124
+ # --- Determine n for this op ---
125
+ op_n = 1_000_000 if op == "sort_complex" else n
126
+
127
+ # --- Choose measurement function ---
128
+ use_instructions = op in _INSTRUCTIONS_OPS
129
+ measure_fn = measure_instructions if use_instructions else measure_flops
130
+
131
+ dist_values: list[float] = []
132
+ dist_raw_totals: list[int] = []
133
+ bench = _bench_code(op)
134
+
135
+ for di in range(distributions):
136
+ # --- Build setup code ---
137
+ if use_instructions:
138
+ setup = _timing_setup(op_n)
139
+ elif op == "real_if_close":
140
+ setup = _real_if_close_setup(op_n, di)
141
+ else:
142
+ seed_r, seed_i = _DIST_SEEDS[di]
143
+ setup = _complex_setup(op_n, seed_r, seed_i)
144
+
145
+ try:
146
+ result = measure_fn(setup, bench, repeats=repeats)
147
+ except RuntimeError:
148
+ continue
149
+
150
+ # Analytical cost = numel(output) = op_n for all complex ops.
151
+ analytical = op_n
152
+ dist_values.append(result.total_flops / (analytical * repeats))
153
+ dist_raw_totals.append(result.total_flops)
154
+
155
+ if dist_values:
156
+ results[op] = statistics.median(dist_values)
157
+
158
+ if use_instructions:
159
+ bm_size = f"x: ({op_n},) complex128 (instructions counter)"
160
+ else:
161
+ bm_size = f"x: ({op_n},) complex128"
162
+
163
+ mm = "instructions" if op in _INSTRUCTIONS_OPS else "ufunc_unary"
164
+ details[op] = {
165
+ "category": "counted_complex",
166
+ "measurement_mode": mm,
167
+ "analytical_formula": _FORMULA_STRINGS[op],
168
+ "analytical_flops": op_n,
169
+ "benchmark_size": bm_size,
170
+ "bench_code": bench,
171
+ "repeats": repeats,
172
+ "perf_instructions_total": dist_raw_totals,
173
+ "distribution_alphas": dist_values,
174
+ }
175
+
176
+ return results, details