flopscope 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. benchmarks/__init__.py +1 -0
  2. benchmarks/__main__.py +6 -0
  3. benchmarks/_baseline.py +171 -0
  4. benchmarks/_bitwise.py +231 -0
  5. benchmarks/_complex.py +176 -0
  6. benchmarks/_contractions.py +291 -0
  7. benchmarks/_fft.py +198 -0
  8. benchmarks/_impl_urls.py +139 -0
  9. benchmarks/_linalg.py +197 -0
  10. benchmarks/_linalg_delegates.py +407 -0
  11. benchmarks/_metadata.py +141 -0
  12. benchmarks/_misc.py +653 -0
  13. benchmarks/_perf.py +321 -0
  14. benchmarks/_perm_group_calibration.py +175 -0
  15. benchmarks/_pointwise.py +372 -0
  16. benchmarks/_polynomial.py +193 -0
  17. benchmarks/_random.py +209 -0
  18. benchmarks/_reductions.py +136 -0
  19. benchmarks/_sorting.py +289 -0
  20. benchmarks/_stats.py +137 -0
  21. benchmarks/_window.py +92 -0
  22. benchmarks/accumulation/__init__.py +0 -0
  23. benchmarks/accumulation/bench_cost_compute.py +138 -0
  24. benchmarks/dashboard.py +312 -0
  25. benchmarks/runner.py +636 -0
  26. flopscope/__init__.py +273 -0
  27. flopscope/_accumulation/__init__.py +13 -0
  28. flopscope/_accumulation/_bipartite.py +121 -0
  29. flopscope/_accumulation/_burnside.py +51 -0
  30. flopscope/_accumulation/_cache.py +146 -0
  31. flopscope/_accumulation/_components.py +153 -0
  32. flopscope/_accumulation/_cost.py +1414 -0
  33. flopscope/_accumulation/_cost_descriptions.py +63 -0
  34. flopscope/_accumulation/_detection.py +318 -0
  35. flopscope/_accumulation/_ladder.py +191 -0
  36. flopscope/_accumulation/_output_orbit.py +104 -0
  37. flopscope/_accumulation/_partition.py +290 -0
  38. flopscope/_accumulation/_path_info.py +211 -0
  39. flopscope/_accumulation/_public.py +169 -0
  40. flopscope/_accumulation/_reduction.py +310 -0
  41. flopscope/_accumulation/_regimes.py +303 -0
  42. flopscope/_accumulation/_shape.py +33 -0
  43. flopscope/_accumulation/_wreath.py +209 -0
  44. flopscope/_budget.py +1027 -0
  45. flopscope/_config.py +118 -0
  46. flopscope/_counting_ops.py +451 -0
  47. flopscope/_display.py +478 -0
  48. flopscope/_docstrings.py +59 -0
  49. flopscope/_dtypes.py +20 -0
  50. flopscope/_einsum.py +717 -0
  51. flopscope/_errstate.py +25 -0
  52. flopscope/_flops.py +282 -0
  53. flopscope/_free_ops.py +2654 -0
  54. flopscope/_ndarray.py +1126 -0
  55. flopscope/_opt_einsum/LICENSE +21 -0
  56. flopscope/_opt_einsum/NOTICE +59 -0
  57. flopscope/_opt_einsum/__init__.py +209 -0
  58. flopscope/_opt_einsum/_contract.py +1478 -0
  59. flopscope/_opt_einsum/_helpers.py +164 -0
  60. flopscope/_opt_einsum/_hsluv.py +273 -0
  61. flopscope/_opt_einsum/_path_random.py +462 -0
  62. flopscope/_opt_einsum/_paths.py +1653 -0
  63. flopscope/_opt_einsum/_subgraph_symmetry.py +544 -0
  64. flopscope/_opt_einsum/_symmetry.py +140 -0
  65. flopscope/_opt_einsum/_typing.py +37 -0
  66. flopscope/_perm_group.py +717 -0
  67. flopscope/_pointwise.py +2522 -0
  68. flopscope/_polynomial.py +278 -0
  69. flopscope/_registry.py +3216 -0
  70. flopscope/_sorting_ops.py +571 -0
  71. flopscope/_symmetric.py +812 -0
  72. flopscope/_symmetry_transport.py +510 -0
  73. flopscope/_symmetry_utils.py +669 -0
  74. flopscope/_type_info.py +12 -0
  75. flopscope/_unwrap.py +70 -0
  76. flopscope/_validation.py +83 -0
  77. flopscope/_version_check.py +46 -0
  78. flopscope/_weights.py +195 -0
  79. flopscope/_window.py +177 -0
  80. flopscope/accounting.py +565 -0
  81. flopscope/data/default_weights.json +462 -0
  82. flopscope/data/weights.csv +509 -0
  83. flopscope/errors.py +197 -0
  84. flopscope/numpy/__init__.py +878 -0
  85. flopscope/numpy/fft/__init__.py +55 -0
  86. flopscope/numpy/fft/_free.py +51 -0
  87. flopscope/numpy/fft/_transforms.py +695 -0
  88. flopscope/numpy/linalg/__init__.py +105 -0
  89. flopscope/numpy/linalg/_aliases.py +126 -0
  90. flopscope/numpy/linalg/_compound.py +161 -0
  91. flopscope/numpy/linalg/_decompositions.py +353 -0
  92. flopscope/numpy/linalg/_properties.py +533 -0
  93. flopscope/numpy/linalg/_solvers.py +444 -0
  94. flopscope/numpy/linalg/_svd.py +122 -0
  95. flopscope/numpy/random/__init__.py +684 -0
  96. flopscope/numpy/random/_cost_formulas.py +115 -0
  97. flopscope/numpy/random/_counted_classes.py +241 -0
  98. flopscope/numpy/testing/__init__.py +13 -0
  99. flopscope/numpy/typing/__init__.py +30 -0
  100. flopscope/py.typed +0 -0
  101. flopscope/stats/__init__.py +84 -0
  102. flopscope/stats/_base.py +77 -0
  103. flopscope/stats/_cauchy.py +146 -0
  104. flopscope/stats/_erf.py +190 -0
  105. flopscope/stats/_expon.py +146 -0
  106. flopscope/stats/_laplace.py +150 -0
  107. flopscope/stats/_logistic.py +148 -0
  108. flopscope/stats/_lognorm.py +160 -0
  109. flopscope/stats/_ndtri.py +133 -0
  110. flopscope/stats/_norm.py +149 -0
  111. flopscope/stats/_truncnorm.py +186 -0
  112. flopscope/stats/_uniform.py +141 -0
  113. flopscope-0.2.0.dist-info/METADATA +23 -0
  114. flopscope-0.2.0.dist-info/RECORD +115 -0
  115. flopscope-0.2.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,291 @@
1
+ """Benchmark BLAS contraction operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import statistics
6
+
7
+ from benchmarks._perf import measure_flops
8
+
9
+ CONTRACTION_OPS: list[str] = [
10
+ "dot",
11
+ "matmul",
12
+ "inner",
13
+ "vdot",
14
+ "vecdot",
15
+ "outer",
16
+ "tensordot",
17
+ "kron",
18
+ "einsum",
19
+ ]
20
+
21
+ _FORMULA_STRINGS: dict[str, str] = {
22
+ "dot": "MNK",
23
+ "matmul": "MNK",
24
+ "inner": "N (a.size)",
25
+ "vdot": "N (a.size)",
26
+ "vecdot": "batch * K (output_size * contracted_axis)",
27
+ "outer": "M*N",
28
+ "tensordot": "product of free * contracted dims",
29
+ "kron": "numel(output)",
30
+ "einsum": "α/M model (FMA=2 textbook)",
31
+ }
32
+
33
+ _BENCHMARK_SIZE_STRINGS: dict[str, str] = {
34
+ "dot": "A: (512,512), B: (512,512)",
35
+ "matmul": "A: (512,512), B: (512,512)",
36
+ "inner": "a: (1000000,), b: (1000000,)",
37
+ "vdot": "a: (1000000,), b: (1000000,)",
38
+ "vecdot": "A: (1000,512), B: (1000,512)",
39
+ "outer": "a: (5000,), b: (5000,)",
40
+ "tensordot": "A: (64,64,64), B: (64,64,64), axes=1",
41
+ "kron": "A: (64,64), B: (64,64)",
42
+ "einsum": "A: (512,512), B: (512,512), subscripts='ij,jk->ik'",
43
+ }
44
+
45
+
46
+ def _analytical_cost(op: str, **kwargs: int) -> int:
47
+ """Return analytical FLOP count for the benchmark configuration.
48
+
49
+ Parameters
50
+ ----------
51
+ op : str
52
+ Operation name (e.g. ``"dot"``).
53
+ **kwargs : int
54
+ Shape parameters used by each formula.
55
+
56
+ Returns
57
+ -------
58
+ int
59
+ Analytical FLOP count.
60
+ """
61
+ costs: dict[str, int] = {
62
+ # dot: 2D matrix multiply A(512,512) @ B(512,512), FMA=2 textbook
63
+ "dot": 512 * 512 * 512,
64
+ # matmul: identical to dot for 2D
65
+ "matmul": 512 * 512 * 512,
66
+ # inner: dot product of two 1M-element vectors.
67
+ # Runtime charges a.size — matches flopscope's convention (FMA=2, but
68
+ # a.size is pointwise-shaped so the FMA off-by-one doesn't apply here).
69
+ "inner": 1_000_000,
70
+ # vdot: same as inner for 1D real inputs.
71
+ # Runtime charges a.size (FMA=2, pointwise-shaped — no off-by-one).
72
+ "vdot": 1_000_000,
73
+ # vecdot: batched dot product A(1000,512) . B(1000,512)
74
+ # Output (1000,) with contracted axis 512.
75
+ # Runtime charges result.size * contracted = 1000 * 512 (FMA=2 textbook).
76
+ "vecdot": 1000 * 512,
77
+ # outer: outer product of two 5000-element vectors
78
+ "outer": 5000 * 5000,
79
+ # tensordot: A(64,64,64) . B(64,64,64) axes=1 -> contract last of A with first of B
80
+ "tensordot": 64**5,
81
+ # kron: Kronecker product A(64,64) x B(64,64)
82
+ "kron": 64**4,
83
+ # einsum: 'ij,jk->ik' is matrix multiply (512,512)x(512,512), FMA=2 textbook
84
+ "einsum": 512 * 512 * 512,
85
+ }
86
+ return costs[op]
87
+
88
+
89
+ def benchmark_contractions(
90
+ dtype: str = "float64",
91
+ repeats: int = 10,
92
+ ) -> tuple[dict[str, float], dict[str, dict]]:
93
+ """Benchmark contraction ops, returning raw measurement per analytical FLOP.
94
+
95
+ In perf mode this is actual FP ops / analytical FLOPs (correction factor).
96
+ In timing mode this is nanoseconds / analytical FLOPs (same units as
97
+ pointwise -- the runner normalizes against baseline to get relative weights).
98
+
99
+ Parameters
100
+ ----------
101
+ dtype : str
102
+ NumPy dtype string.
103
+ repeats : int
104
+ Number of repetitions per measurement.
105
+
106
+ Returns
107
+ -------
108
+ tuple[dict[str, float], dict[str, dict]]
109
+ A pair of (alphas, details). ``alphas`` maps op name to median
110
+ raw measurement per analytical FLOP. ``details`` maps op name to
111
+ a dict of raw benchmark metadata.
112
+ """
113
+ results: dict[str, float] = {}
114
+ details: dict[str, dict] = {}
115
+
116
+ for op in CONTRACTION_OPS:
117
+ dist_values: list[float] = []
118
+ dist_raw_totals: list[int] = []
119
+
120
+ # --- Build setups and bench code per op ---
121
+
122
+ if op in ("dot", "matmul", "einsum"):
123
+ # Two 512x512 matrices
124
+ setups = [
125
+ (
126
+ f"import numpy as np; rng = np.random.default_rng(42); "
127
+ f"A = rng.standard_normal((512, 512)).astype(np.{dtype}); "
128
+ f"B = rng.standard_normal((512, 512)).astype(np.{dtype})"
129
+ ),
130
+ (
131
+ f"import numpy as np; rng = np.random.default_rng(42); "
132
+ f"A = rng.uniform(0.01, 100, size=(512, 512)).astype(np.{dtype}); "
133
+ f"B = rng.uniform(0.01, 100, size=(512, 512)).astype(np.{dtype})"
134
+ ),
135
+ (
136
+ f"import numpy as np; rng = np.random.default_rng(42); "
137
+ f"A = rng.uniform(-1000, 1000, size=(512, 512)).astype(np.{dtype}); "
138
+ f"B = rng.uniform(-1000, 1000, size=(512, 512)).astype(np.{dtype})"
139
+ ),
140
+ ]
141
+ if op == "dot":
142
+ bench = "np.dot(A, B)"
143
+ elif op == "matmul":
144
+ bench = "np.matmul(A, B)"
145
+ else: # einsum
146
+ bench = "np.einsum('ij,jk->ik', A, B)"
147
+
148
+ elif op in ("inner", "vdot"):
149
+ # Two 1M-element vectors — large enough for BLAS ddot FMA to dominate
150
+ # over per-call overhead (10K was too small, overhead inflated alpha)
151
+ vec_n = 1_000_000
152
+ setups = [
153
+ (
154
+ f"import numpy as np; rng = np.random.default_rng(42); "
155
+ f"a = rng.standard_normal({vec_n}).astype(np.{dtype}); "
156
+ f"b = rng.standard_normal({vec_n}).astype(np.{dtype})"
157
+ ),
158
+ (
159
+ f"import numpy as np; rng = np.random.default_rng(42); "
160
+ f"a = rng.uniform(0.01, 100, size={vec_n}).astype(np.{dtype}); "
161
+ f"b = rng.uniform(0.01, 100, size={vec_n}).astype(np.{dtype})"
162
+ ),
163
+ (
164
+ f"import numpy as np; rng = np.random.default_rng(42); "
165
+ f"a = rng.uniform(-1000, 1000, size={vec_n}).astype(np.{dtype}); "
166
+ f"b = rng.uniform(-1000, 1000, size={vec_n}).astype(np.{dtype})"
167
+ ),
168
+ ]
169
+ bench = f"np.{op}(a, b)"
170
+
171
+ elif op == "vecdot":
172
+ # Batched dot: A(1000,512), B(1000,512) -- NumPy 2.x only
173
+ setups = [
174
+ (
175
+ f"import numpy as np; rng = np.random.default_rng(42); "
176
+ f"A = rng.standard_normal((1000, 512)).astype(np.{dtype}); "
177
+ f"B = rng.standard_normal((1000, 512)).astype(np.{dtype})"
178
+ ),
179
+ (
180
+ f"import numpy as np; rng = np.random.default_rng(42); "
181
+ f"A = rng.uniform(0.01, 100, size=(1000, 512)).astype(np.{dtype}); "
182
+ f"B = rng.uniform(0.01, 100, size=(1000, 512)).astype(np.{dtype})"
183
+ ),
184
+ (
185
+ f"import numpy as np; rng = np.random.default_rng(42); "
186
+ f"A = rng.uniform(-1000, 1000, size=(1000, 512)).astype(np.{dtype}); "
187
+ f"B = rng.uniform(-1000, 1000, size=(1000, 512)).astype(np.{dtype})"
188
+ ),
189
+ ]
190
+ bench = "np.vecdot(A, B)"
191
+
192
+ elif op == "outer":
193
+ # Two 5000-element vectors
194
+ setups = [
195
+ (
196
+ f"import numpy as np; rng = np.random.default_rng(42); "
197
+ f"a = rng.standard_normal(5000).astype(np.{dtype}); "
198
+ f"b = rng.standard_normal(5000).astype(np.{dtype})"
199
+ ),
200
+ (
201
+ f"import numpy as np; rng = np.random.default_rng(42); "
202
+ f"a = rng.uniform(0.01, 100, size=5000).astype(np.{dtype}); "
203
+ f"b = rng.uniform(0.01, 100, size=5000).astype(np.{dtype})"
204
+ ),
205
+ (
206
+ f"import numpy as np; rng = np.random.default_rng(42); "
207
+ f"a = rng.uniform(-1000, 1000, size=5000).astype(np.{dtype}); "
208
+ f"b = rng.uniform(-1000, 1000, size=5000).astype(np.{dtype})"
209
+ ),
210
+ ]
211
+ bench = "np.outer(a, b)"
212
+
213
+ elif op == "tensordot":
214
+ # Two (64,64,64) tensors, axes=1
215
+ setups = [
216
+ (
217
+ f"import numpy as np; rng = np.random.default_rng(42); "
218
+ f"A = rng.standard_normal((64, 64, 64)).astype(np.{dtype}); "
219
+ f"B = rng.standard_normal((64, 64, 64)).astype(np.{dtype})"
220
+ ),
221
+ (
222
+ f"import numpy as np; rng = np.random.default_rng(42); "
223
+ f"A = rng.uniform(0.01, 100, size=(64, 64, 64)).astype(np.{dtype}); "
224
+ f"B = rng.uniform(0.01, 100, size=(64, 64, 64)).astype(np.{dtype})"
225
+ ),
226
+ (
227
+ f"import numpy as np; rng = np.random.default_rng(42); "
228
+ f"A = rng.uniform(-1000, 1000, size=(64, 64, 64)).astype(np.{dtype}); "
229
+ f"B = rng.uniform(-1000, 1000, size=(64, 64, 64)).astype(np.{dtype})"
230
+ ),
231
+ ]
232
+ bench = "np.tensordot(A, B, axes=1)"
233
+
234
+ elif op == "kron":
235
+ # Two (64,64) matrices
236
+ setups = [
237
+ (
238
+ f"import numpy as np; rng = np.random.default_rng(42); "
239
+ f"A = rng.standard_normal((64, 64)).astype(np.{dtype}); "
240
+ f"B = rng.standard_normal((64, 64)).astype(np.{dtype})"
241
+ ),
242
+ (
243
+ f"import numpy as np; rng = np.random.default_rng(42); "
244
+ f"A = rng.uniform(0.01, 100, size=(64, 64)).astype(np.{dtype}); "
245
+ f"B = rng.uniform(0.01, 100, size=(64, 64)).astype(np.{dtype})"
246
+ ),
247
+ (
248
+ f"import numpy as np; rng = np.random.default_rng(42); "
249
+ f"A = rng.uniform(-1000, 1000, size=(64, 64)).astype(np.{dtype}); "
250
+ f"B = rng.uniform(-1000, 1000, size=(64, 64)).astype(np.{dtype})"
251
+ ),
252
+ ]
253
+ bench = "np.kron(A, B)"
254
+
255
+ else:
256
+ continue # pragma: no cover
257
+
258
+ analytical = _analytical_cost(op)
259
+
260
+ for setup in setups:
261
+ # For vecdot, wrap in try/except since it's NumPy 2.x only
262
+ if op == "vecdot":
263
+ try:
264
+ result = measure_flops(setup, bench, repeats=repeats)
265
+ except (RuntimeError, AttributeError):
266
+ continue
267
+ else:
268
+ try:
269
+ result = measure_flops(setup, bench, repeats=repeats)
270
+ except RuntimeError:
271
+ continue
272
+
273
+ measured = result.total_flops / repeats
274
+ dist_values.append(measured / analytical if analytical else 0.0)
275
+ dist_raw_totals.append(result.total_flops)
276
+
277
+ if dist_values:
278
+ results[op] = statistics.median(dist_values)
279
+ details[op] = {
280
+ "category": "counted_custom",
281
+ "measurement_mode": "blas",
282
+ "analytical_formula": _FORMULA_STRINGS.get(op, ""),
283
+ "analytical_flops": analytical,
284
+ "benchmark_size": _BENCHMARK_SIZE_STRINGS.get(op, ""),
285
+ "bench_code": bench,
286
+ "repeats": repeats,
287
+ "perf_instructions_total": dist_raw_totals,
288
+ "distribution_alphas": dist_values,
289
+ }
290
+
291
+ return results, details
benchmarks/_fft.py ADDED
@@ -0,0 +1,198 @@
1
+ """Benchmark FFT operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ import statistics
7
+
8
+ from benchmarks._perf import measure_flops
9
+
10
+ FFT_OPS: list[str] = [
11
+ "fft.fft",
12
+ "fft.ifft",
13
+ "fft.rfft",
14
+ "fft.irfft",
15
+ "fft.fft2",
16
+ "fft.ifft2",
17
+ "fft.rfft2",
18
+ "fft.irfft2",
19
+ "fft.fftn",
20
+ "fft.ifftn",
21
+ "fft.rfftn",
22
+ "fft.irfftn",
23
+ "fft.hfft",
24
+ "fft.ihfft",
25
+ ]
26
+
27
+ _RFFT_OPS = {
28
+ "fft.rfft",
29
+ "fft.irfft",
30
+ "fft.rfft2",
31
+ "fft.irfft2",
32
+ "fft.rfftn",
33
+ "fft.irfftn",
34
+ }
35
+
36
+ _FORMULA_STRINGS: dict[str, str] = {
37
+ "fft.fft": "5*n*ceil(log2(n))",
38
+ "fft.ifft": "5*n*ceil(log2(n))",
39
+ "fft.rfft": "5*(n/2)*ceil(log2(n))",
40
+ "fft.irfft": "5*(n/2)*ceil(log2(n))",
41
+ "fft.fft2": "5*n*ceil(log2(n))",
42
+ "fft.ifft2": "5*n*ceil(log2(n))",
43
+ "fft.rfft2": "5*(n/2)*ceil(log2(n))",
44
+ "fft.irfft2": "5*(n/2)*ceil(log2(n))",
45
+ "fft.fftn": "5*n*ceil(log2(n))",
46
+ "fft.ifftn": "5*n*ceil(log2(n))",
47
+ "fft.rfftn": "5*(n/2)*ceil(log2(n))",
48
+ "fft.irfftn": "5*(n/2)*ceil(log2(n))",
49
+ "fft.hfft": "5*n*ceil(log2(n))",
50
+ "fft.ihfft": "5*(n/2)*ceil(log2(n))",
51
+ }
52
+
53
+
54
+ def _ceil_log2(n: int) -> int:
55
+ """Return ceil(log2(n)), minimum 1."""
56
+ if n <= 1:
57
+ return 1
58
+ return math.ceil(math.log2(n))
59
+
60
+
61
+ def _analytical_cost(op_name: str, n: int) -> int:
62
+ """Return the analytical FLOP count for an FFT operation.
63
+
64
+ Parameters
65
+ ----------
66
+ op_name : str
67
+ Operation name (e.g. ``"fft.fft"``).
68
+ n : int
69
+ Input size.
70
+
71
+ Returns
72
+ -------
73
+ int
74
+ Analytical FLOP count.
75
+ """
76
+ cl2 = _ceil_log2(n)
77
+ if op_name in _RFFT_OPS:
78
+ return 5 * (n // 2) * cl2
79
+ return 5 * n * cl2
80
+
81
+
82
+ def benchmark_fft(
83
+ n: int = 2**20,
84
+ dtype: str = "float64",
85
+ repeats: int = 10,
86
+ ) -> tuple[dict[str, float], dict[str, dict]]:
87
+ """Benchmark FFT ops, returning correction factors (measured / analytical).
88
+
89
+ Parameters
90
+ ----------
91
+ n : int
92
+ Input size.
93
+ dtype : str
94
+ NumPy dtype string.
95
+ repeats : int
96
+ Number of repetitions per measurement.
97
+
98
+ Returns
99
+ -------
100
+ tuple[dict[str, float], dict[str, dict]]
101
+ A pair of (alphas, details). ``alphas`` maps op name to median
102
+ correction factor. ``details`` maps op name to a dict of raw
103
+ benchmark metadata.
104
+ """
105
+ results: dict[str, float] = {}
106
+ details: dict[str, dict] = {}
107
+
108
+ # 2D/nD ops use sqrt(n) x sqrt(n)
109
+ side = int(math.isqrt(n))
110
+
111
+ for op in FFT_OPS:
112
+ dist_values: list[float] = []
113
+ dist_raw_totals: list[int] = []
114
+
115
+ # Determine dimensionality
116
+ is_2d = "2" in op.split(".")[-1]
117
+ is_nd = op.endswith("n") or op.endswith("fftn")
118
+ is_multi = is_2d or is_nd
119
+
120
+ if is_multi:
121
+ shape_str = f"({side}, {side})"
122
+ effective_n = side * side
123
+ benchmark_size = f"x: ({side},{side})"
124
+ else:
125
+ shape_str = f"({n},)"
126
+ effective_n = n
127
+ benchmark_size = f"x: ({n},)"
128
+
129
+ # Determine input type needed
130
+ short = op.split(".")[-1]
131
+
132
+ # irfft variants need rfft output (complex, half-size)
133
+ needs_rfft_input = short in ("irfft", "irfft2", "irfftn")
134
+ # ifft variants and hfft need complex input
135
+ needs_complex = short in ("ifft", "ifft2", "ifftn", "hfft")
136
+
137
+ setups = []
138
+ if needs_rfft_input:
139
+ # Generate input by applying rfft to real data
140
+ rfft_func = short.replace("i", "", 1) # irfft -> rfft
141
+ setups = [
142
+ f"import numpy as np; _r = np.random.default_rng(42).standard_normal({shape_str}).astype(np.{dtype}); x = np.fft.{rfft_func}(_r)",
143
+ f"import numpy as np; _r = np.random.default_rng(43).uniform(-1, 1, size={shape_str}).astype(np.{dtype}); x = np.fft.{rfft_func}(_r)",
144
+ f"import numpy as np; _r = np.random.default_rng(99).standard_normal({shape_str}).astype(np.{dtype}) * 100; x = np.fft.{rfft_func}(_r)",
145
+ ]
146
+ elif needs_complex:
147
+ setups = [
148
+ (
149
+ f"import numpy as np; x = np.random.default_rng(42).standard_normal({shape_str}).astype(np.{dtype}) "
150
+ f"+ 1j * np.random.default_rng(43).standard_normal({shape_str}).astype(np.{dtype})"
151
+ ),
152
+ (
153
+ f"import numpy as np; t = np.linspace(0, 2*np.pi, {effective_n}).reshape({shape_str}).astype(np.{dtype}); "
154
+ f"x = np.sin(t) + 1j * np.cos(t)"
155
+ ),
156
+ (
157
+ f"import numpy as np; x = (np.random.default_rng(99).uniform(-1, 1, size={shape_str}).astype(np.{dtype}) "
158
+ f"+ 1j * np.random.default_rng(100).uniform(-1, 1, size={shape_str}).astype(np.{dtype}))"
159
+ ),
160
+ ]
161
+ else:
162
+ # Real input: fft, rfft, fftn, rfftn, ihfft
163
+ setups = [
164
+ f"import numpy as np; x = np.random.default_rng(42).standard_normal({shape_str}).astype(np.{dtype})",
165
+ (
166
+ f"import numpy as np; t = np.linspace(0, 2*np.pi, {effective_n}).reshape({shape_str}).astype(np.{dtype}); "
167
+ f"x = np.sin(5*t) + 0.5*np.sin(13*t)"
168
+ ),
169
+ f"import numpy as np; x = np.random.default_rng(99).uniform(-1, 1, size={shape_str}).astype(np.{dtype})",
170
+ ]
171
+
172
+ bench = f"np.{op}(x)"
173
+ analytical = _analytical_cost(op, effective_n)
174
+
175
+ for setup in setups:
176
+ try:
177
+ result = measure_flops(setup, bench, repeats=repeats)
178
+ except RuntimeError:
179
+ continue
180
+ measured = result.total_flops / repeats
181
+ dist_values.append(measured / analytical if analytical else 0.0)
182
+ dist_raw_totals.append(result.total_flops)
183
+
184
+ if dist_values:
185
+ results[op] = statistics.median(dist_values)
186
+ details[op] = {
187
+ "category": "counted_custom",
188
+ "measurement_mode": "custom",
189
+ "analytical_formula": _FORMULA_STRINGS.get(op, ""),
190
+ "analytical_flops": analytical,
191
+ "benchmark_size": benchmark_size,
192
+ "bench_code": bench,
193
+ "repeats": repeats,
194
+ "perf_instructions_total": dist_raw_totals,
195
+ "distribution_alphas": dist_values,
196
+ }
197
+
198
+ return results, details
@@ -0,0 +1,139 @@
1
+ """Map operation names to GitHub source code URLs.
2
+
3
+ For each operation in flopscope, finds where the FLOP cost is charged in the
4
+ source tree and constructs a GitHub permalink.
5
+
6
+ Strategy (tried in order for each op):
7
+ 1. Literal ``budget.deduct("op_name", ...)`` call — works for hand-written ops.
8
+ 2. Factory registration like ``sin = _counted_unary(_np.sin, "sin")`` — works
9
+ for pointwise ops created via factory helpers.
10
+ 3. Falls back to file-level URL if only the file is identified.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import re
16
+ import subprocess
17
+ from pathlib import Path
18
+
19
+ REPO_URL = "https://github.com/AIcrowd/flopscope/blob/main"
20
+ SRC_ROOT = Path(__file__).resolve().parent.parent / "src" / "flopscope"
21
+
22
+
23
+ def _grep(pattern: str, path: str | None = None) -> list[tuple[str, int, str]]:
24
+ """Run grep -rn and return list of (abs_path, line_no, line_text)."""
25
+ target = path or str(SRC_ROOT)
26
+ try:
27
+ result = subprocess.run(
28
+ ["grep", "-rn", "-E", pattern, target],
29
+ capture_output=True,
30
+ text=True,
31
+ timeout=10,
32
+ )
33
+ except (subprocess.TimeoutExpired, FileNotFoundError):
34
+ return []
35
+
36
+ hits: list[tuple[str, int, str]] = []
37
+ if result.returncode == 0 and result.stdout.strip():
38
+ for line in result.stdout.strip().splitlines():
39
+ parts = line.split(":", 2)
40
+ if len(parts) >= 3:
41
+ try:
42
+ hits.append((parts[0], int(parts[1]), parts[2]))
43
+ except ValueError:
44
+ pass
45
+ return hits
46
+
47
+
48
+ def _to_rel_path(abs_path: str) -> str:
49
+ """Convert absolute path to repo-relative path."""
50
+ repo_root = SRC_ROOT.parent.parent # up from src/flopscope
51
+ try:
52
+ return str(Path(abs_path).relative_to(repo_root))
53
+ except ValueError:
54
+ return str(abs_path)
55
+
56
+
57
+ def _find_deduct_line(op_name: str) -> tuple[str, int | None]:
58
+ """Find where the FLOP cost for *op_name* is charged in the source tree.
59
+
60
+ Returns ``(repo_relative_path, line_number)`` or ``("", None)``.
61
+ """
62
+ escaped = re.escape(op_name)
63
+
64
+ # --- Strategy 1: direct deduct("op_name" or deduct('op_name' --------
65
+ for quote in ('"', "'"):
66
+ hits = _grep(f"deduct\\({quote}{escaped}{quote}")
67
+ if hits:
68
+ abs_path, line_no, _ = hits[0]
69
+ return (_to_rel_path(abs_path), line_no)
70
+
71
+ # --- Strategy 2: factory registration like sin = _counted_unary(..., "sin")
72
+ # Also handles _counted_binary, _counted_sampler, etc.
73
+ for quote in ('"', "'"):
74
+ # Match patterns like:
75
+ # sin = _counted_unary(_np.sin, "sin")
76
+ # random.rand = _counted_dims_sampler(_np.random.rand, "random.rand")
77
+ pattern = f"_counted_\\w+\\([^)]*,\\s*{quote}{escaped}{quote}"
78
+ hits = _grep(pattern)
79
+ if hits:
80
+ abs_path, line_no, _ = hits[0]
81
+ return (_to_rel_path(abs_path), line_no)
82
+
83
+ # --- Strategy 3: for reduction ops created via loops or dicts --------
84
+ # Search for the string literal "op_name" near a deduct or factory call
85
+ for quote in ('"', "'"):
86
+ pattern = f"{quote}{escaped}{quote}"
87
+ hits = _grep(pattern)
88
+ # Filter to source files (not test, not __pycache__, not data/)
89
+ src_hits = [
90
+ h
91
+ for h in hits
92
+ if "/src/flopscope/" in h[0]
93
+ and "__pycache__" not in h[0]
94
+ and "/data/" not in h[0]
95
+ and "_registry.py" not in h[0]
96
+ and "_docstrings.py" not in h[0]
97
+ ]
98
+ if src_hits:
99
+ abs_path, line_no, _ = src_hits[0]
100
+ return (_to_rel_path(abs_path), line_no)
101
+
102
+ return ("", None)
103
+
104
+
105
+ def map_op_to_url(op_name: str) -> str:
106
+ """Return GitHub URL for the runtime cost implementation of an operation.
107
+
108
+ Returns an empty string if the source cannot be located.
109
+ """
110
+ rel_path, line_no = _find_deduct_line(op_name)
111
+ if not rel_path:
112
+ return ""
113
+ url = f"{REPO_URL}/{rel_path}"
114
+ if line_no is not None:
115
+ url += f"#L{line_no}"
116
+ return url
117
+
118
+
119
+ def build_url_map(op_names: list[str] | None = None) -> dict[str, str]:
120
+ """Build URL map for all given operation names.
121
+
122
+ Parameters
123
+ ----------
124
+ op_names : list of str, optional
125
+ If *None*, reads all operation names from ``weights.json``.
126
+
127
+ Returns
128
+ -------
129
+ dict mapping operation name to GitHub URL (empty string if not found).
130
+ """
131
+ if op_names is None:
132
+ import json
133
+
134
+ weights_path = SRC_ROOT / "data" / "weights.json"
135
+ with open(weights_path) as f:
136
+ data = json.load(f)
137
+ op_names = list(data["weights"].keys())
138
+
139
+ return {op: map_op_to_url(op) for op in op_names}