max-div 0.0.3__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- max_div/_cli.py +99 -0
- max_div/benchmark/__init__.py +2 -1
- max_div/benchmark/_formatting.py +218 -0
- max_div/benchmark/randint.py +104 -0
- max_div/benchmark/randint_constrained.py +355 -0
- max_div/constraints/__init__.py +2 -0
- max_div/constraints/_numba.py +110 -0
- max_div/constraints/constraint.py +10 -0
- max_div/constraints/constraints.py +47 -0
- max_div/internal/benchmarking/_micro_benchmark.py +48 -7
- max_div/internal/formatting/__init__.py +1 -0
- max_div/internal/formatting/_markdown.py +43 -0
- max_div/internal/math/__init__.py +1 -0
- max_div/internal/math/fast_log.py +167 -0
- max_div/internal/math/random.py +166 -0
- max_div/internal/math/select_k_minmax.py +250 -0
- max_div/sampling/__init__.py +1 -1
- max_div/sampling/con.py +350 -0
- max_div/sampling/uncon.py +269 -0
- {max_div-0.0.3.dist-info → max_div-0.1.1.dist-info}/METADATA +13 -8
- max_div-0.1.1.dist-info/RECORD +32 -0
- max_div-0.1.1.dist-info/entry_points.txt +2 -0
- max_div/benchmark/sample_int.py +0 -85
- max_div/internal/compat/__init__.py +0 -1
- max_div/internal/compat/_numba/__init__.py +0 -14
- max_div/internal/compat/_numba/_dummy_numba.py +0 -94
- max_div/internal/compat/_numba/_helpers.py +0 -14
- max_div/sampling/discrete.py +0 -176
- max_div-0.0.3.dist-info/RECORD +0 -23
- max_div-0.0.3.dist-info/entry_points.txt +0 -2
- {max_div-0.0.3.dist-info → max_div-0.1.1.dist-info}/WHEEL +0 -0
- {max_div-0.0.3.dist-info → max_div-0.1.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
|
|
7
|
+
from max_div.constraints._numba import _build_array_repr
|
|
8
|
+
from max_div.internal.benchmarking import BenchmarkResult, benchmark
|
|
9
|
+
from max_div.internal.formatting import md_multiline
|
|
10
|
+
from max_div.sampling import randint_numba
|
|
11
|
+
from max_div.sampling.con import Constraint, randint_constrained_numba
|
|
12
|
+
|
|
13
|
+
from ._formatting import (
|
|
14
|
+
BoldLabels,
|
|
15
|
+
CellContent,
|
|
16
|
+
FastestBenchmark,
|
|
17
|
+
HighestPercentage,
|
|
18
|
+
Percentage,
|
|
19
|
+
extend_table_with_aggregate_row,
|
|
20
|
+
format_as_markdown,
|
|
21
|
+
format_for_console,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# =================================================================================================
|
|
26
|
+
# Helper classes
|
|
27
|
+
# =================================================================================================
|
|
28
|
+
@dataclass
|
|
29
|
+
class _Scenario:
|
|
30
|
+
letter: str
|
|
31
|
+
description: str
|
|
32
|
+
n_k_cons_tuples: list[tuple[int, int, int]] # list of (n, k, n_cons)-tuples
|
|
33
|
+
use_p: bool
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# =================================================================================================
|
|
37
|
+
# Main benchmark function
|
|
38
|
+
# =================================================================================================
|
|
39
|
+
def benchmark_randint_constrained(speed: float = 0.0, markdown: bool = False) -> None:
|
|
40
|
+
"""
|
|
41
|
+
Benchmarks the `randint_constrained` function from `max_div.sampling.con`.
|
|
42
|
+
|
|
43
|
+
Different scenarios are tested across different values of `k`, `n` & `n_cons` (# of constraints):
|
|
44
|
+
|
|
45
|
+
* **SCENARIO A1**
|
|
46
|
+
* all combinations with `k` < `n` with
|
|
47
|
+
* `n` in [10, 100, 1000]
|
|
48
|
+
* `k` in [2, 4, 8, 16, 32, ..., 256]
|
|
49
|
+
* constraints:
|
|
50
|
+
* 10 non-overlapping constraints, each spanning exactly 1/10th of the `n`-range
|
|
51
|
+
* min_count = floor(k/11)
|
|
52
|
+
* max_count = ceil(k/9)
|
|
53
|
+
* no `p` provided (uniform sampling)
|
|
54
|
+
|
|
55
|
+
* **SCENARIO A2**
|
|
56
|
+
* same as Scenario A1, but with `p` provided (with p[i] ~ 1+i)
|
|
57
|
+
|
|
58
|
+
* **SCENARIO B1**
|
|
59
|
+
* `n` = 1000
|
|
60
|
+
* `k` = 100
|
|
61
|
+
* `n_cons` in [2, 4, 8, 16, ..., 512]
|
|
62
|
+
* each constraint spans a random 1% of the `n` range (=10 values)
|
|
63
|
+
* min_count = floor(10 / n_cons)
|
|
64
|
+
* max_count = ceil(100 / n_cons)
|
|
65
|
+
* no `p` provided (uniform sampling)
|
|
66
|
+
|
|
67
|
+
* **SCENARIO B2**
|
|
68
|
+
* same as Scenario B1, but with `p` provided (with p[i] ~ 1+i)
|
|
69
|
+
|
|
70
|
+
:param speed: value in [0.0, 1.0] (default=0.0); 0.0=accurate but slow; 1.0=fast but less accurate
|
|
71
|
+
:param markdown: If `True`, outputs the results as a Markdown table.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
# --- build scenarios ---------------------------------
|
|
75
|
+
scenarios = []
|
|
76
|
+
for use_p in [False, True]:
|
|
77
|
+
if not use_p:
|
|
78
|
+
letter = "A1"
|
|
79
|
+
description = "Varying n & k with 10 non-overlapping constraints spanning equal portions of the n range (uniform sampling)."
|
|
80
|
+
else:
|
|
81
|
+
letter = "A2"
|
|
82
|
+
description = "Identical to Scenario A1, but with custom probabilities p provided, favoring larger values."
|
|
83
|
+
|
|
84
|
+
scenarios.append(
|
|
85
|
+
_Scenario(
|
|
86
|
+
letter=letter,
|
|
87
|
+
description=description,
|
|
88
|
+
n_k_cons_tuples=[
|
|
89
|
+
(n, k, 10)
|
|
90
|
+
for n in [10, 100, 1000]
|
|
91
|
+
for k in [2**i for i in range(1, 9)] # 2, 4, 8, ..., 256
|
|
92
|
+
if k < n
|
|
93
|
+
],
|
|
94
|
+
use_p=use_p,
|
|
95
|
+
),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
for use_p in [False, True]:
|
|
99
|
+
if not use_p:
|
|
100
|
+
letter = "B1"
|
|
101
|
+
description = "Fixed n=1000 & k=100 with varying number of constraints spanning random 1% portions of the n range (uniform sampling)."
|
|
102
|
+
else:
|
|
103
|
+
letter = "B2"
|
|
104
|
+
description = "Identical to Scenario B1, but with custom probabilities p provided, favoring larger values."
|
|
105
|
+
|
|
106
|
+
scenarios.append(
|
|
107
|
+
_Scenario(
|
|
108
|
+
letter=letter,
|
|
109
|
+
description=description,
|
|
110
|
+
n_k_cons_tuples=[
|
|
111
|
+
(1000, 100, n_cons)
|
|
112
|
+
for n_cons in [2**i for i in range(1, 10)] # 2, 4, 8, ..., 512
|
|
113
|
+
],
|
|
114
|
+
use_p=use_p,
|
|
115
|
+
),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# --- benchmark all scenarios -------------------------
|
|
119
|
+
print("Benchmarking `randint_constrained`...")
|
|
120
|
+
print()
|
|
121
|
+
for s in scenarios:
|
|
122
|
+
if markdown:
|
|
123
|
+
print(f"## Scenario {s.letter}")
|
|
124
|
+
else:
|
|
125
|
+
print(f"Scenario {s.letter}:")
|
|
126
|
+
|
|
127
|
+
print()
|
|
128
|
+
print(s.description)
|
|
129
|
+
print()
|
|
130
|
+
|
|
131
|
+
# --- create headers --------------------
|
|
132
|
+
if markdown:
|
|
133
|
+
headers = [
|
|
134
|
+
"`k`",
|
|
135
|
+
"`n`",
|
|
136
|
+
"`n_cons`",
|
|
137
|
+
md_multiline(["`randint_numba`", "(time)"]),
|
|
138
|
+
md_multiline(["`randint_numba`", "(accuracy %)"]),
|
|
139
|
+
md_multiline(["`randint_constrained_numba`", "(time)"]),
|
|
140
|
+
md_multiline(["`randint_constrained_numba`", "(accuracy %)"]),
|
|
141
|
+
]
|
|
142
|
+
else:
|
|
143
|
+
headers = [
|
|
144
|
+
"k",
|
|
145
|
+
"n",
|
|
146
|
+
"n_cons",
|
|
147
|
+
"randint_numba (time)",
|
|
148
|
+
"randint_numba (accuracy %)",
|
|
149
|
+
"randint_constrained_numba (time)",
|
|
150
|
+
"randint_constrained_numba (accuracy %)",
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
# --- benchmark scenario ----------------
|
|
154
|
+
data: list[list[CellContent]] = []
|
|
155
|
+
for n, k, n_cons in tqdm(s.n_k_cons_tuples, leave=False):
|
|
156
|
+
# --- build constraints ---
|
|
157
|
+
if s.letter.startswith("A"):
|
|
158
|
+
cons = [
|
|
159
|
+
Constraint(
|
|
160
|
+
int_set=set(range(i * (n // 10), (i + 1) * (n // 10))),
|
|
161
|
+
min_count=math.floor(k / 11),
|
|
162
|
+
max_count=math.ceil(k / 9),
|
|
163
|
+
)
|
|
164
|
+
for i in range(10)
|
|
165
|
+
]
|
|
166
|
+
else:
|
|
167
|
+
cons = []
|
|
168
|
+
for i in range(n_cons):
|
|
169
|
+
cons.append(
|
|
170
|
+
Constraint(
|
|
171
|
+
int_set=set(
|
|
172
|
+
randint_numba(
|
|
173
|
+
n=np.int32(n),
|
|
174
|
+
k=np.int32(n // 100), # 1% random samples from n
|
|
175
|
+
replace=False,
|
|
176
|
+
seed=np.int64(42 + i),
|
|
177
|
+
)
|
|
178
|
+
),
|
|
179
|
+
min_count=math.floor(10 / n_cons),
|
|
180
|
+
max_count=math.ceil(100 / n_cons),
|
|
181
|
+
)
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
if s.use_p:
|
|
185
|
+
p = np.array([1.0 + i for i in range(n)], dtype=np.float32)
|
|
186
|
+
p /= p.sum()
|
|
187
|
+
else:
|
|
188
|
+
p = None
|
|
189
|
+
|
|
190
|
+
# --- convert constraints to numpy format once ---
|
|
191
|
+
con_values, con_indices = _build_array_repr(cons)
|
|
192
|
+
|
|
193
|
+
# --- benchmark & determine precision ---
|
|
194
|
+
data.append(
|
|
195
|
+
[
|
|
196
|
+
str(k),
|
|
197
|
+
str(n),
|
|
198
|
+
str(n_cons),
|
|
199
|
+
_benchmark(n, k, con_values, con_indices, p, True, speed),
|
|
200
|
+
_determine_precision(n, k, cons, con_values, con_indices, p, True, speed),
|
|
201
|
+
_benchmark(n, k, con_values, con_indices, p, False, speed),
|
|
202
|
+
_determine_precision(n, k, cons, con_values, con_indices, p, False, speed),
|
|
203
|
+
]
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# --- show results -----------------------------------------
|
|
207
|
+
data = extend_table_with_aggregate_row(data, agg="geomean", include_percentage=False)
|
|
208
|
+
data = extend_table_with_aggregate_row(data, agg="mean", include_benchmark_result=False)
|
|
209
|
+
if markdown:
|
|
210
|
+
display_data = format_as_markdown(
|
|
211
|
+
headers,
|
|
212
|
+
data,
|
|
213
|
+
highlighters=[
|
|
214
|
+
FastestBenchmark(),
|
|
215
|
+
HighestPercentage(),
|
|
216
|
+
BoldLabels(),
|
|
217
|
+
],
|
|
218
|
+
)
|
|
219
|
+
else:
|
|
220
|
+
display_data = format_for_console(headers, data)
|
|
221
|
+
|
|
222
|
+
for line in display_data:
|
|
223
|
+
print(line)
|
|
224
|
+
print()
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# =================================================================================================
|
|
228
|
+
# Internal helpers
|
|
229
|
+
# =================================================================================================
|
|
230
|
+
def _benchmark(
|
|
231
|
+
n: int,
|
|
232
|
+
k: int,
|
|
233
|
+
con_values: np.ndarray[np.int32],
|
|
234
|
+
con_indices: np.ndarray[np.int32],
|
|
235
|
+
p: np.ndarray | None,
|
|
236
|
+
ignore_constraints: bool,
|
|
237
|
+
speed: float,
|
|
238
|
+
) -> BenchmarkResult:
|
|
239
|
+
"""
|
|
240
|
+
Runs a benchmark and returns the BenchmarkResult.
|
|
241
|
+
If ignore_constraints=True, benchmarks randint_numba.
|
|
242
|
+
If ignore_constraints=False, benchmarks randint_constrained_numba.
|
|
243
|
+
"""
|
|
244
|
+
n = np.int32(n)
|
|
245
|
+
k = np.int32(k)
|
|
246
|
+
|
|
247
|
+
if ignore_constraints:
|
|
248
|
+
# Benchmark randint_numba
|
|
249
|
+
if p is None:
|
|
250
|
+
|
|
251
|
+
def benchmark_func():
|
|
252
|
+
return randint_numba(n=n, k=k, replace=False)
|
|
253
|
+
else:
|
|
254
|
+
p_float32 = p.astype(np.float32)
|
|
255
|
+
|
|
256
|
+
def benchmark_func():
|
|
257
|
+
return randint_numba(n=n, k=k, replace=False, p=p_float32)
|
|
258
|
+
else:
|
|
259
|
+
# Benchmark randint_constrained_numba
|
|
260
|
+
if p is None:
|
|
261
|
+
|
|
262
|
+
def benchmark_func():
|
|
263
|
+
return randint_constrained_numba(
|
|
264
|
+
n=n,
|
|
265
|
+
k=k,
|
|
266
|
+
con_values=con_values,
|
|
267
|
+
con_indices=con_indices,
|
|
268
|
+
p=np.zeros(0, dtype=np.float32),
|
|
269
|
+
seed=np.int64(0),
|
|
270
|
+
)
|
|
271
|
+
else:
|
|
272
|
+
p_float32 = p.astype(np.float32)
|
|
273
|
+
|
|
274
|
+
def benchmark_func():
|
|
275
|
+
return randint_constrained_numba(
|
|
276
|
+
n=n,
|
|
277
|
+
k=k,
|
|
278
|
+
con_values=con_values,
|
|
279
|
+
con_indices=con_indices,
|
|
280
|
+
p=p_float32,
|
|
281
|
+
seed=np.int64(0),
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
return benchmark(
|
|
285
|
+
f=benchmark_func,
|
|
286
|
+
t_per_run=0.05 / (1000.0**speed),
|
|
287
|
+
n_warmup=int(8 - 5 * speed),
|
|
288
|
+
n_benchmark=int(25 - 22 * speed),
|
|
289
|
+
silent=True,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _determine_precision(
|
|
294
|
+
n: int,
|
|
295
|
+
k: int,
|
|
296
|
+
cons: list[Constraint],
|
|
297
|
+
con_values: np.ndarray[np.int32],
|
|
298
|
+
con_indices: np.ndarray[np.int32],
|
|
299
|
+
p: np.ndarray | None,
|
|
300
|
+
ignore_constraints: bool,
|
|
301
|
+
speed: float,
|
|
302
|
+
) -> Percentage:
|
|
303
|
+
"""
|
|
304
|
+
Determines how often (%) the constraints are satisfied when sampling.
|
|
305
|
+
If ignore_constraints=True, samples with randint_numba.
|
|
306
|
+
If ignore_constraints=False, samples with randint_constrained_numba.
|
|
307
|
+
"""
|
|
308
|
+
|
|
309
|
+
# Calculate number of runs based on speed (1000 at speed=0, 2 at speed=1)
|
|
310
|
+
n_runs = int(1000 * (0.002**speed))
|
|
311
|
+
|
|
312
|
+
satisfied_count = 0
|
|
313
|
+
for run_idx in range(n_runs):
|
|
314
|
+
# Run the appropriate function with seed equal to run index
|
|
315
|
+
if ignore_constraints:
|
|
316
|
+
# Use randint_numba
|
|
317
|
+
if p is None:
|
|
318
|
+
result = randint_numba(n=np.int32(n), k=np.int32(k), replace=False, seed=np.int64(run_idx))
|
|
319
|
+
else:
|
|
320
|
+
result = randint_numba(
|
|
321
|
+
n=np.int32(n), k=np.int32(k), replace=False, p=p.astype(np.float32), seed=np.int64(run_idx)
|
|
322
|
+
)
|
|
323
|
+
else:
|
|
324
|
+
# Use randint_constrained_numba
|
|
325
|
+
if p is None:
|
|
326
|
+
result = randint_constrained_numba(
|
|
327
|
+
n=np.int32(n),
|
|
328
|
+
k=np.int32(k),
|
|
329
|
+
con_values=con_values,
|
|
330
|
+
con_indices=con_indices,
|
|
331
|
+
p=np.zeros(0, dtype=np.float32),
|
|
332
|
+
seed=np.int64(run_idx),
|
|
333
|
+
)
|
|
334
|
+
else:
|
|
335
|
+
result = randint_constrained_numba(
|
|
336
|
+
n=np.int32(n),
|
|
337
|
+
k=np.int32(k),
|
|
338
|
+
con_values=con_values,
|
|
339
|
+
con_indices=con_indices,
|
|
340
|
+
p=p.astype(np.float32),
|
|
341
|
+
seed=np.int64(run_idx),
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# Check if all constraints are satisfied
|
|
345
|
+
constraints_satisfied = True
|
|
346
|
+
for constraint in cons:
|
|
347
|
+
count = sum(1 for val in result if val in constraint.int_set)
|
|
348
|
+
if count < constraint.min_count or count > constraint.max_count:
|
|
349
|
+
constraints_satisfied = False
|
|
350
|
+
break
|
|
351
|
+
|
|
352
|
+
if constraints_satisfied:
|
|
353
|
+
satisfied_count += 1
|
|
354
|
+
|
|
355
|
+
return Percentage(frac=satisfied_count / n_runs, decimals=1)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import numba
|
|
2
|
+
import numpy as np
|
|
3
|
+
from numba.typed import List
|
|
4
|
+
|
|
5
|
+
from .constraint import Constraint
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# =================================================================================================
|
|
9
|
+
# Handlers for numpy-based constraint representation
|
|
10
|
+
# =================================================================================================
|
|
11
|
+
#
|
|
12
|
+
# Constraints:
|
|
13
|
+
# [
|
|
14
|
+
# Constraint(int_set={0,1,2,3,4}, min_count=2, max_count=3),
|
|
15
|
+
# Constraint(int_set={10,11,12,13}, min_count=0, max_count=7),
|
|
16
|
+
# Constraint(int_set={3,11}, min_count=2, max_count=2),
|
|
17
|
+
# ]
|
|
18
|
+
#
|
|
19
|
+
# Will be represented to 2 numpy arrays:
|
|
20
|
+
#
|
|
21
|
+
# con_values:
|
|
22
|
+
# np.array([
|
|
23
|
+
# [2, 3], # min_count, max_count for constraint 0
|
|
24
|
+
# [0, 7], # min_count, max_count for constraint 1
|
|
25
|
+
# [2, 2], # min_count, max_count for constraint 2
|
|
26
|
+
# ], dtype=np.int32)
|
|
27
|
+
#
|
|
28
|
+
# con_indices:
|
|
29
|
+
# -> Part 1 - first 2*n_cons values indicate start/end indices in the array for each constraint
|
|
30
|
+
# -> Part 2 - followed by concatenated indices from each constraint's int_set
|
|
31
|
+
#
|
|
32
|
+
# |-------- Part 1 ----------|----------- Part 2 ---------------|
|
|
33
|
+
# index: 0 1 2 3 4 5 6 10 11 14 15 16
|
|
34
|
+
#
|
|
35
|
+
# np.array([6, 11, 11, 15, 15,17, 0,1,2,3,4, 10,11,12,13, 3,11], dtype=np.int32)
|
|
36
|
+
#
|
|
37
|
+
# | | | ^^^^^^^^^ ^^^^^^^^^^^ ^^^^
|
|
38
|
+
# | | | | | |
|
|
39
|
+
# | | +-----> | | con 2 indices
|
|
40
|
+
# | +-------------> | con 1 indices
|
|
41
|
+
# +--------------------> con 2 indices
|
|
42
|
+
#
|
|
43
|
+
# =================================================================================================
|
|
44
|
+
def _build_array_repr(
|
|
45
|
+
cons: list[Constraint],
|
|
46
|
+
) -> tuple[np.ndarray[np.int32], np.ndarray[np.int32]]:
|
|
47
|
+
"""
|
|
48
|
+
Convert list of Constraint objects to numba-compatible representation:
|
|
49
|
+
- con_values: 2D numpy array of shape (n_cons, 2) with min_count and max_count for each constraint
|
|
50
|
+
- con_indices: 1D numpy array of shape (total_indices,) with concatenated
|
|
51
|
+
|
|
52
|
+
:param cons: list of Constraint objects
|
|
53
|
+
:return: tuple of (con_values, con_indices)
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
# get dimensions
|
|
57
|
+
n_cons = len(cons)
|
|
58
|
+
n_indices = sum([len(con.int_set) for con in cons])
|
|
59
|
+
|
|
60
|
+
# pre-allocate
|
|
61
|
+
con_values = np.empty((n_cons, 2), dtype=np.int32)
|
|
62
|
+
con_indices = np.empty((2 * n_cons) + n_indices, dtype=np.int32)
|
|
63
|
+
|
|
64
|
+
# build con_values
|
|
65
|
+
for i, con in enumerate(cons):
|
|
66
|
+
con_values[i, 0] = np.int32(con.min_count)
|
|
67
|
+
con_values[i, 1] = np.int32(con.max_count)
|
|
68
|
+
|
|
69
|
+
# build con_indices
|
|
70
|
+
i_start = 2 * n_cons # where we start filling in values from int_set for each constraint
|
|
71
|
+
for i, con in enumerate(cons):
|
|
72
|
+
con_indices[2 * i] = np.int32(i_start)
|
|
73
|
+
con_indices[(2 * i) + 1] = np.int32(i_start + len(con.int_set))
|
|
74
|
+
for idx in sorted(con.int_set):
|
|
75
|
+
con_indices[i_start] = np.int32(idx)
|
|
76
|
+
i_start += 1
|
|
77
|
+
|
|
78
|
+
return con_values, con_indices
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@numba.njit(numba.int32(numba.int32[:, :], numba.int32), inline="always")
|
|
82
|
+
def _np_con_min_value(con_values: np.ndarray[np.int32], i_con: np.int32) -> np.int32:
|
|
83
|
+
"""Return min_value of i-th constraint from con_values array."""
|
|
84
|
+
return con_values[i_con, 0]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@numba.njit(numba.int32(numba.int32[:, :], numba.int32), inline="always")
|
|
88
|
+
def _np_con_max_value(con_values: np.ndarray[np.int32], i_con: np.int32) -> np.int32:
|
|
89
|
+
"""Return max_value of i-th constraint from con_values array."""
|
|
90
|
+
return con_values[i_con, 1]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@numba.njit(numba.int32[:](numba.int32[:], numba.int32), inline="always")
|
|
94
|
+
def _np_con_indices(con_indices: np.ndarray[np.int32], i_con: np.int32) -> np.ndarray[np.int32]:
|
|
95
|
+
"""Return the indices array for the i-th constraint from con_indices array."""
|
|
96
|
+
start = con_indices[2 * i_con]
|
|
97
|
+
end = con_indices[2 * i_con + 1]
|
|
98
|
+
return con_indices[start:end]
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@numba.njit(inline="always")
|
|
102
|
+
def _np_con_build_index_sets(
|
|
103
|
+
con_indices: np.ndarray[np.int32],
|
|
104
|
+
n_cons: np.int32,
|
|
105
|
+
) -> List[set[np.int32]]:
|
|
106
|
+
"""Build list of sets of indices for each constraint from con_indices array."""
|
|
107
|
+
list_of_sets = List()
|
|
108
|
+
for i in np.arange(n_cons, dtype=np.int32):
|
|
109
|
+
list_of_sets.append(set(_np_con_indices(con_indices, i)))
|
|
110
|
+
return list_of_sets
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from numba.typed import List
|
|
5
|
+
|
|
6
|
+
from .constraint import Constraint
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Constraints:
|
|
10
|
+
"""Representation for a collection of constraints."""
|
|
11
|
+
|
|
12
|
+
# -------------------------------------------------------------------------
|
|
13
|
+
# Constructor / initialization
|
|
14
|
+
# -------------------------------------------------------------------------
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self._cons: list[Constraint] = []
|
|
17
|
+
|
|
18
|
+
# -------------------------------------------------------------------------
|
|
19
|
+
# API
|
|
20
|
+
# -------------------------------------------------------------------------
|
|
21
|
+
@property
|
|
22
|
+
def n_cons(self) -> int:
|
|
23
|
+
return len(self._cons)
|
|
24
|
+
|
|
25
|
+
def add(self, indices: set[int], min_count: int, max_count: int) -> None:
|
|
26
|
+
"""
|
|
27
|
+
Add a new constraint, indicating we want to sample at least `min_count` and at most `max_count`
|
|
28
|
+
integers from `indices`.
|
|
29
|
+
"""
|
|
30
|
+
self._cons.append(Constraint(indices, min_count, max_count))
|
|
31
|
+
|
|
32
|
+
def all(self, deepcopy: bool = False) -> list[Constraint]:
|
|
33
|
+
"""
|
|
34
|
+
Return list of Constraint objects.
|
|
35
|
+
:param deepcopy: If True, return a deep copy of the list and its contents.
|
|
36
|
+
:return: list of constraints representing what was added using `add()`.
|
|
37
|
+
"""
|
|
38
|
+
if not deepcopy:
|
|
39
|
+
return self._cons
|
|
40
|
+
else:
|
|
41
|
+
return copy.deepcopy(self._cons)
|
|
42
|
+
|
|
43
|
+
def to_numpy(self) -> tuple[np.ndarray[np.int32], np.ndarray[np.int32]]:
|
|
44
|
+
"""Convert to 2 numpy arrays (con_values, con_indices) for use in numba sampling functions."""
|
|
45
|
+
from ._numba import _build_array_repr
|
|
46
|
+
|
|
47
|
+
return _build_array_repr(self._cons)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from dataclasses import dataclass
|
|
2
|
-
from typing import Callable
|
|
4
|
+
from typing import Callable, Literal
|
|
3
5
|
|
|
4
6
|
import numpy as np
|
|
5
7
|
|
|
@@ -29,6 +31,44 @@ class BenchmarkResult:
|
|
|
29
31
|
s_perc = f"{50 * (self.t_sec_q_75 - self.t_sec_q_25) / self.t_sec_q_50:.1f}%"
|
|
30
32
|
return f"{s_median} ± {s_perc}"
|
|
31
33
|
|
|
34
|
+
def __str__(self) -> str:
|
|
35
|
+
return self.t_sec_with_uncertainty_str
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def aggregate(cls, results: list[BenchmarkResult], method: Literal["mean", "geomean", "sum"]) -> BenchmarkResult:
|
|
39
|
+
"""
|
|
40
|
+
Aggregate multiple BenchmarkResult objects into a single result, by aggregating q25, q50, 75 values separately.
|
|
41
|
+
|
|
42
|
+
:param results: List of BenchmarkResult objects to aggregate
|
|
43
|
+
:param method: Aggregation method - "mean", "geomean" (geometric mean), or "sum"
|
|
44
|
+
:return: Aggregated BenchmarkResult
|
|
45
|
+
"""
|
|
46
|
+
if not results:
|
|
47
|
+
raise ValueError("Cannot aggregate empty list of results")
|
|
48
|
+
|
|
49
|
+
# Collect all quantile values
|
|
50
|
+
q25_values = [r.t_sec_q_25 for r in results]
|
|
51
|
+
q50_values = [r.t_sec_q_50 for r in results]
|
|
52
|
+
q75_values = [r.t_sec_q_75 for r in results]
|
|
53
|
+
|
|
54
|
+
# Apply the aggregation method
|
|
55
|
+
if method == "mean":
|
|
56
|
+
agg_q25 = np.mean(q25_values)
|
|
57
|
+
agg_q50 = np.mean(q50_values)
|
|
58
|
+
agg_q75 = np.mean(q75_values)
|
|
59
|
+
elif method == "geomean":
|
|
60
|
+
agg_q25 = np.exp(np.mean(np.log(q25_values)))
|
|
61
|
+
agg_q50 = np.exp(np.mean(np.log(q50_values)))
|
|
62
|
+
agg_q75 = np.exp(np.mean(np.log(q75_values)))
|
|
63
|
+
elif method == "sum":
|
|
64
|
+
agg_q25 = np.sum(q25_values)
|
|
65
|
+
agg_q50 = np.sum(q50_values)
|
|
66
|
+
agg_q75 = np.sum(q75_values)
|
|
67
|
+
else:
|
|
68
|
+
raise ValueError(f"Unknown aggregation method: {method}")
|
|
69
|
+
|
|
70
|
+
return BenchmarkResult(t_sec_q_25=agg_q25, t_sec_q_50=agg_q50, t_sec_q_75=agg_q75)
|
|
71
|
+
|
|
32
72
|
|
|
33
73
|
# =================================================================================================
|
|
34
74
|
# Main benchmarking function
|
|
@@ -36,7 +76,7 @@ class BenchmarkResult:
|
|
|
36
76
|
def benchmark(
|
|
37
77
|
f: Callable,
|
|
38
78
|
t_per_run: float = 0.1,
|
|
39
|
-
n_warmup: int =
|
|
79
|
+
n_warmup: int = 5,
|
|
40
80
|
n_benchmark: int = 30,
|
|
41
81
|
silent: bool = False,
|
|
42
82
|
) -> BenchmarkResult:
|
|
@@ -46,7 +86,7 @@ def benchmark(
|
|
|
46
86
|
:param f: (Callable) Function to benchmark. Should take no arguments.
|
|
47
87
|
:param t_per_run: (float, default=0.1) time in seconds we want to target per benchmarking run.
|
|
48
88
|
# of executions/run is adjusted to meet this target.
|
|
49
|
-
:param n_warmup: (int, default=
|
|
89
|
+
:param n_warmup: (int, default=5) Number of warmup runs to perform before benchmarking.
|
|
50
90
|
:param n_benchmark: (int, default=30) Number of benchmark runs to perform.
|
|
51
91
|
:param silent: (bool, default=False) If True, suppresses any output during benchmarking.
|
|
52
92
|
:return: Median estimate of duration/execution of `f` in seconds.
|
|
@@ -85,13 +125,14 @@ def benchmark(
|
|
|
85
125
|
if not silent:
|
|
86
126
|
print("w", end="")
|
|
87
127
|
|
|
88
|
-
# adjust n_executions
|
|
128
|
+
# adjust n_executions to bring t_tot closer to t_per_run
|
|
129
|
+
# NOTE: during warmup we adjust n_executions at a log-scale to reach t_per_run target at end of warmup
|
|
89
130
|
t_tot = timer_tot.t_elapsed_sec()
|
|
90
131
|
n_executions = round(
|
|
91
132
|
clip(
|
|
92
|
-
value=n_executions * (t_per_run / t_tot),
|
|
93
|
-
min_value=max(1.0, n_executions /
|
|
94
|
-
max_value=n_executions *
|
|
133
|
+
value=n_executions * (t_per_run / t_tot) ** min(1.0, (i + 1) / n_warmup),
|
|
134
|
+
min_value=max(1.0, n_executions / 100),
|
|
135
|
+
max_value=n_executions * 100,
|
|
95
136
|
)
|
|
96
137
|
)
|
|
97
138
|
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
def md_table(data: list[list[str]]) -> list[str]:
|
|
2
|
+
"""Creates a Markdown table from a 2D list of strings, where the first list represents the table headers."""
|
|
3
|
+
|
|
4
|
+
# Calculate column widths based on all rows
|
|
5
|
+
num_cols = len(data[0])
|
|
6
|
+
col_widths = [max(1, max(len(row[i]) for row in data)) for i in range(num_cols)] # max width of content and >=1
|
|
7
|
+
|
|
8
|
+
result = []
|
|
9
|
+
|
|
10
|
+
# Add header row
|
|
11
|
+
header = "| " + " | ".join(data[0][i].ljust(col_widths[i]) for i in range(num_cols)) + " |"
|
|
12
|
+
result.append(header)
|
|
13
|
+
|
|
14
|
+
# Add separator row
|
|
15
|
+
separator = "| " + " | ".join("-" * col_widths[i] for i in range(num_cols)) + " |"
|
|
16
|
+
result.append(separator)
|
|
17
|
+
|
|
18
|
+
# Add data rows
|
|
19
|
+
for row in data[1:]:
|
|
20
|
+
row_str = "| " + " | ".join(row[i].ljust(col_widths[i]) for i in range(num_cols)) + " |"
|
|
21
|
+
result.append(row_str)
|
|
22
|
+
|
|
23
|
+
return result
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def md_multiline(lines: list[str]) -> str:
|
|
27
|
+
"""Puts multiple Markdown lines into a single line by means of html line breaks."""
|
|
28
|
+
return "<br>".join(lines)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def md_bold(text: str) -> str:
|
|
32
|
+
"""Makes the given text bold in Markdown."""
|
|
33
|
+
return f"**{text}**"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def md_italic(text: str) -> str:
|
|
37
|
+
"""Makes the given text italic in Markdown."""
|
|
38
|
+
return f"*{text}*"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def md_colored(text: str, hex_color: str) -> str:
|
|
42
|
+
"""Colors the given text in Markdown using HTML span tags (hex_color="#rrggbb" or "#rgb")."""
|
|
43
|
+
return f'<span style="color:{hex_color}">{text}</span>'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|