arbiter-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arbiter/__init__.py +3 -0
- arbiter/cli/__init__.py +0 -0
- arbiter/cli/app.py +699 -0
- arbiter/cli/display.py +381 -0
- arbiter/core/__init__.py +0 -0
- arbiter/core/benchmarks.py +804 -0
- arbiter/core/config.py +137 -0
- arbiter/core/discover.py +184 -0
- arbiter/core/judge.py +193 -0
- arbiter/core/leaderboard.py +197 -0
- arbiter/core/metrics.py +367 -0
- arbiter/core/providers/__init__.py +19 -0
- arbiter/core/providers/anthropic_provider.py +133 -0
- arbiter/core/providers/base.py +62 -0
- arbiter/core/providers/factory.py +79 -0
- arbiter/core/providers/google_provider.py +126 -0
- arbiter/core/providers/ollama.py +103 -0
- arbiter/core/providers/openai_provider.py +120 -0
- arbiter/core/runner.py +257 -0
- arbiter/core/swe/__init__.py +1 -0
- arbiter/core/swe/container.py +158 -0
- arbiter/core/swe/runner.py +220 -0
- arbiter/core/swe/sandbox.py +111 -0
- arbiter/core/swe/test_packs.py +548 -0
- arbiter/dashboard/__init__.py +0 -0
- arbiter/dashboard/frontend/dist/assets/index-1tkxJouQ.css +1 -0
- arbiter/dashboard/frontend/dist/assets/index-dHa4zmvw.js +298 -0
- arbiter/dashboard/frontend/dist/index.html +16 -0
- arbiter/dashboard/server.py +426 -0
- arbiter_cli-0.1.0.dist-info/METADATA +299 -0
- arbiter_cli-0.1.0.dist-info/RECORD +35 -0
- arbiter_cli-0.1.0.dist-info/WHEEL +5 -0
- arbiter_cli-0.1.0.dist-info/entry_points.txt +2 -0
- arbiter_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- arbiter_cli-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Self-contained Python sandbox -- no Docker required.
|
|
2
|
+
|
|
3
|
+
Runs model-generated code in a subprocess with:
|
|
4
|
+
- Strict timeout (kills after N seconds)
|
|
5
|
+
- Temp directory isolation (code can't see the host filesystem)
|
|
6
|
+
- Memory limit via resource module (Unix only)
|
|
7
|
+
|
|
8
|
+
This is the default execution mode. Docker is optional for full isolation.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import shutil
|
|
15
|
+
import subprocess
|
|
16
|
+
import sys
|
|
17
|
+
import tempfile
|
|
18
|
+
import time
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Optional
|
|
22
|
+
|
|
23
|
+
from arbiter.core.swe.container import ContainerResult, _parse_pytest_output
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def run_in_sandbox(
|
|
27
|
+
fix_code: str,
|
|
28
|
+
test_code: str,
|
|
29
|
+
timeout: int = 30,
|
|
30
|
+
) -> ContainerResult:
|
|
31
|
+
"""Run model's fix + test suite in an isolated subprocess.
|
|
32
|
+
|
|
33
|
+
1. Creates a temp directory
|
|
34
|
+
2. Writes fix.py and test_fix.py
|
|
35
|
+
3. Runs pytest in a subprocess with restricted PYTHONPATH
|
|
36
|
+
4. Parses results
|
|
37
|
+
5. Cleans up
|
|
38
|
+
|
|
39
|
+
No Docker needed. Ships with Arbiter.
|
|
40
|
+
"""
|
|
41
|
+
tmpdir = tempfile.mkdtemp(prefix="arbiter-sandbox-")
|
|
42
|
+
fix_path = Path(tmpdir) / "fix.py"
|
|
43
|
+
test_path = Path(tmpdir) / "test_fix.py"
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
fix_path.write_text(fix_code)
|
|
47
|
+
test_path.write_text(test_code)
|
|
48
|
+
|
|
49
|
+
start = time.perf_counter()
|
|
50
|
+
|
|
51
|
+
env = os.environ.copy()
|
|
52
|
+
env["PYTHONDONTWRITEBYTECODE"] = "1"
|
|
53
|
+
env["PYTHONPATH"] = tmpdir
|
|
54
|
+
|
|
55
|
+
cmd = [
|
|
56
|
+
sys.executable, "-m", "pytest",
|
|
57
|
+
str(test_path),
|
|
58
|
+
"-v", "--tb=short",
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
result = subprocess.run(
|
|
62
|
+
cmd,
|
|
63
|
+
capture_output=True,
|
|
64
|
+
text=True,
|
|
65
|
+
timeout=timeout,
|
|
66
|
+
cwd=tmpdir,
|
|
67
|
+
env=env,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
elapsed = time.perf_counter() - start
|
|
71
|
+
stdout = result.stdout
|
|
72
|
+
stderr = result.stderr
|
|
73
|
+
|
|
74
|
+
passed, failed, total = _parse_pytest_output(stdout + stderr)
|
|
75
|
+
|
|
76
|
+
return ContainerResult(
|
|
77
|
+
exit_code=result.returncode,
|
|
78
|
+
stdout=stdout[-2000:],
|
|
79
|
+
stderr=stderr[-1000:],
|
|
80
|
+
tests_passed=passed,
|
|
81
|
+
tests_failed=failed,
|
|
82
|
+
tests_total=total,
|
|
83
|
+
duration_s=elapsed,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
except subprocess.TimeoutExpired:
|
|
87
|
+
return ContainerResult(
|
|
88
|
+
exit_code=-1, stdout="", stderr=f"Timeout after {timeout}s",
|
|
89
|
+
tests_passed=0, tests_failed=0, tests_total=0,
|
|
90
|
+
duration_s=timeout,
|
|
91
|
+
)
|
|
92
|
+
except Exception as e:
|
|
93
|
+
return ContainerResult(
|
|
94
|
+
exit_code=-1, stdout="", stderr=str(e),
|
|
95
|
+
tests_passed=0, tests_failed=0, tests_total=0,
|
|
96
|
+
duration_s=0,
|
|
97
|
+
)
|
|
98
|
+
finally:
|
|
99
|
+
shutil.rmtree(tmpdir, ignore_errors=True)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def check_sandbox() -> bool:
|
|
103
|
+
"""Check if the sandbox can run (just needs Python + pytest)."""
|
|
104
|
+
try:
|
|
105
|
+
result = subprocess.run(
|
|
106
|
+
[sys.executable, "-m", "pytest", "--version"],
|
|
107
|
+
capture_output=True, text=True, timeout=5,
|
|
108
|
+
)
|
|
109
|
+
return result.returncode == 0
|
|
110
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
111
|
+
return False
|
|
@@ -0,0 +1,548 @@
|
|
|
1
|
+
"""Built-in SWE test packs -- real bugs with real test suites.
|
|
2
|
+
|
|
3
|
+
Each test case contains:
|
|
4
|
+
- Buggy source code
|
|
5
|
+
- A bug report (what the model sees)
|
|
6
|
+
- A test suite (what verifies the fix)
|
|
7
|
+
- The correct fix (for reference scoring)
|
|
8
|
+
|
|
9
|
+
These run inside Docker containers with pytest.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class TestCase:
|
|
19
|
+
"""A single SWE-bench style test case."""
|
|
20
|
+
name: str
|
|
21
|
+
category: str
|
|
22
|
+
filename: str
|
|
23
|
+
issue: str # bug report the model sees
|
|
24
|
+
buggy_code: str # the broken code
|
|
25
|
+
test_code: str # pytest test suite that validates the fix
|
|
26
|
+
solution_code: str # correct fix (for reference)
|
|
27
|
+
expected_tests: int # how many tests should pass
|
|
28
|
+
docker_image: str = "arbiter-swe-python"
|
|
29
|
+
test_command: str = "python -m pytest /workspace/test_fix.py -v --tb=short"
|
|
30
|
+
timeout: int = 30
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class TestPack:
|
|
35
|
+
"""A collection of related test cases."""
|
|
36
|
+
name: str
|
|
37
|
+
description: str
|
|
38
|
+
language: str
|
|
39
|
+
cases: list[TestCase] = field(default_factory=list)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
43
|
+
# Python Bug Fixes
|
|
44
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
45
|
+
|
|
46
|
+
PYTHON_BUGS = TestPack(
|
|
47
|
+
name="python-bugs",
|
|
48
|
+
description="Real Python bugs: off-by-one errors, logic flaws, edge cases",
|
|
49
|
+
language="python",
|
|
50
|
+
cases=[
|
|
51
|
+
TestCase(
|
|
52
|
+
name="Binary Search Off-by-One",
|
|
53
|
+
category="bug_fix",
|
|
54
|
+
filename="search.py",
|
|
55
|
+
issue=(
|
|
56
|
+
"binary_search returns -1 for elements that exist in the list. "
|
|
57
|
+
"For example, binary_search([1, 3, 5, 7, 9], 7) returns -1 "
|
|
58
|
+
"instead of 3. The function enters an infinite loop for some inputs."
|
|
59
|
+
),
|
|
60
|
+
buggy_code="""\
|
|
61
|
+
def binary_search(arr, target):
|
|
62
|
+
low, high = 0, len(arr)
|
|
63
|
+
while low < high:
|
|
64
|
+
mid = (low + high) // 2
|
|
65
|
+
if arr[mid] == target:
|
|
66
|
+
return mid
|
|
67
|
+
elif arr[mid] < target:
|
|
68
|
+
low = mid
|
|
69
|
+
else:
|
|
70
|
+
high = mid
|
|
71
|
+
return -1
|
|
72
|
+
""",
|
|
73
|
+
test_code="""\
|
|
74
|
+
from fix import binary_search
|
|
75
|
+
|
|
76
|
+
def test_find_existing():
|
|
77
|
+
assert binary_search([1, 3, 5, 7, 9], 7) == 3
|
|
78
|
+
|
|
79
|
+
def test_find_first():
|
|
80
|
+
assert binary_search([1, 3, 5, 7, 9], 1) == 0
|
|
81
|
+
|
|
82
|
+
def test_find_last():
|
|
83
|
+
assert binary_search([1, 3, 5, 7, 9], 9) == 4
|
|
84
|
+
|
|
85
|
+
def test_not_found():
|
|
86
|
+
assert binary_search([1, 3, 5, 7, 9], 4) == -1
|
|
87
|
+
|
|
88
|
+
def test_empty():
|
|
89
|
+
assert binary_search([], 1) == -1
|
|
90
|
+
|
|
91
|
+
def test_single_element_found():
|
|
92
|
+
assert binary_search([5], 5) == 0
|
|
93
|
+
|
|
94
|
+
def test_single_element_not_found():
|
|
95
|
+
assert binary_search([5], 3) == -1
|
|
96
|
+
""",
|
|
97
|
+
solution_code="""\
|
|
98
|
+
def binary_search(arr, target):
|
|
99
|
+
low, high = 0, len(arr) - 1
|
|
100
|
+
while low <= high:
|
|
101
|
+
mid = (low + high) // 2
|
|
102
|
+
if arr[mid] == target:
|
|
103
|
+
return mid
|
|
104
|
+
elif arr[mid] < target:
|
|
105
|
+
low = mid + 1
|
|
106
|
+
else:
|
|
107
|
+
high = mid - 1
|
|
108
|
+
return -1
|
|
109
|
+
""",
|
|
110
|
+
expected_tests=7,
|
|
111
|
+
),
|
|
112
|
+
|
|
113
|
+
TestCase(
|
|
114
|
+
name="LRU Cache Eviction Bug",
|
|
115
|
+
category="bug_fix",
|
|
116
|
+
filename="cache.py",
|
|
117
|
+
issue=(
|
|
118
|
+
"The LRU cache does not evict the least recently used item when full. "
|
|
119
|
+
"After inserting 3 items into a cache with capacity 2, all 3 items "
|
|
120
|
+
"remain. The cache should only hold the 2 most recently accessed items."
|
|
121
|
+
),
|
|
122
|
+
buggy_code="""\
|
|
123
|
+
class LRUCache:
|
|
124
|
+
def __init__(self, capacity):
|
|
125
|
+
self.capacity = capacity
|
|
126
|
+
self.cache = {}
|
|
127
|
+
self.order = []
|
|
128
|
+
|
|
129
|
+
def get(self, key):
|
|
130
|
+
if key in self.cache:
|
|
131
|
+
return self.cache[key]
|
|
132
|
+
return -1
|
|
133
|
+
|
|
134
|
+
def put(self, key, value):
|
|
135
|
+
if key in self.cache:
|
|
136
|
+
self.cache[key] = value
|
|
137
|
+
else:
|
|
138
|
+
self.cache[key] = value
|
|
139
|
+
self.order.append(key)
|
|
140
|
+
""",
|
|
141
|
+
test_code="""\
|
|
142
|
+
from fix import LRUCache
|
|
143
|
+
|
|
144
|
+
def test_basic_put_get():
|
|
145
|
+
c = LRUCache(2)
|
|
146
|
+
c.put("a", 1)
|
|
147
|
+
c.put("b", 2)
|
|
148
|
+
assert c.get("a") == 1
|
|
149
|
+
assert c.get("b") == 2
|
|
150
|
+
|
|
151
|
+
def test_eviction():
|
|
152
|
+
c = LRUCache(2)
|
|
153
|
+
c.put("a", 1)
|
|
154
|
+
c.put("b", 2)
|
|
155
|
+
c.put("c", 3) # should evict "a"
|
|
156
|
+
assert c.get("a") == -1
|
|
157
|
+
assert c.get("c") == 3
|
|
158
|
+
|
|
159
|
+
def test_access_refreshes():
|
|
160
|
+
c = LRUCache(2)
|
|
161
|
+
c.put("a", 1)
|
|
162
|
+
c.put("b", 2)
|
|
163
|
+
c.get("a") # refresh "a"
|
|
164
|
+
c.put("c", 3) # should evict "b" not "a"
|
|
165
|
+
assert c.get("a") == 1
|
|
166
|
+
assert c.get("b") == -1
|
|
167
|
+
|
|
168
|
+
def test_update_existing():
|
|
169
|
+
c = LRUCache(2)
|
|
170
|
+
c.put("a", 1)
|
|
171
|
+
c.put("a", 10)
|
|
172
|
+
assert c.get("a") == 10
|
|
173
|
+
|
|
174
|
+
def test_capacity_one():
|
|
175
|
+
c = LRUCache(1)
|
|
176
|
+
c.put("a", 1)
|
|
177
|
+
c.put("b", 2)
|
|
178
|
+
assert c.get("a") == -1
|
|
179
|
+
assert c.get("b") == 2
|
|
180
|
+
""",
|
|
181
|
+
solution_code="""\
|
|
182
|
+
from collections import OrderedDict
|
|
183
|
+
|
|
184
|
+
class LRUCache:
|
|
185
|
+
def __init__(self, capacity):
|
|
186
|
+
self.capacity = capacity
|
|
187
|
+
self.cache = OrderedDict()
|
|
188
|
+
|
|
189
|
+
def get(self, key):
|
|
190
|
+
if key in self.cache:
|
|
191
|
+
self.cache.move_to_end(key)
|
|
192
|
+
return self.cache[key]
|
|
193
|
+
return -1
|
|
194
|
+
|
|
195
|
+
def put(self, key, value):
|
|
196
|
+
if key in self.cache:
|
|
197
|
+
self.cache.move_to_end(key)
|
|
198
|
+
self.cache[key] = value
|
|
199
|
+
if len(self.cache) > self.capacity:
|
|
200
|
+
self.cache.popitem(last=False)
|
|
201
|
+
""",
|
|
202
|
+
expected_tests=5,
|
|
203
|
+
),
|
|
204
|
+
|
|
205
|
+
TestCase(
|
|
206
|
+
name="Flatten Nested List Bug",
|
|
207
|
+
category="bug_fix",
|
|
208
|
+
filename="flatten.py",
|
|
209
|
+
issue=(
|
|
210
|
+
"flatten([[1, [2, 3]], [4, [5, [6]]]]) returns [1, [2, 3], 4, [5, [6]]] "
|
|
211
|
+
"instead of [1, 2, 3, 4, 5, 6]. It only flattens one level deep "
|
|
212
|
+
"instead of recursively flattening all nested lists."
|
|
213
|
+
),
|
|
214
|
+
buggy_code="""\
|
|
215
|
+
def flatten(lst):
|
|
216
|
+
result = []
|
|
217
|
+
for item in lst:
|
|
218
|
+
if isinstance(item, list):
|
|
219
|
+
result.extend(item)
|
|
220
|
+
else:
|
|
221
|
+
result.append(item)
|
|
222
|
+
return result
|
|
223
|
+
""",
|
|
224
|
+
test_code="""\
|
|
225
|
+
from fix import flatten
|
|
226
|
+
|
|
227
|
+
def test_already_flat():
|
|
228
|
+
assert flatten([1, 2, 3]) == [1, 2, 3]
|
|
229
|
+
|
|
230
|
+
def test_one_level():
|
|
231
|
+
assert flatten([[1, 2], [3, 4]]) == [1, 2, 3, 4]
|
|
232
|
+
|
|
233
|
+
def test_deep_nested():
|
|
234
|
+
assert flatten([[1, [2, 3]], [4, [5, [6]]]]) == [1, 2, 3, 4, 5, 6]
|
|
235
|
+
|
|
236
|
+
def test_empty():
|
|
237
|
+
assert flatten([]) == []
|
|
238
|
+
|
|
239
|
+
def test_mixed():
|
|
240
|
+
assert flatten([1, [2, [3]], 4]) == [1, 2, 3, 4]
|
|
241
|
+
|
|
242
|
+
def test_all_nested():
|
|
243
|
+
assert flatten([[[1]], [[2]], [[3]]]) == [1, 2, 3]
|
|
244
|
+
""",
|
|
245
|
+
solution_code="""\
|
|
246
|
+
def flatten(lst):
|
|
247
|
+
result = []
|
|
248
|
+
for item in lst:
|
|
249
|
+
if isinstance(item, list):
|
|
250
|
+
result.extend(flatten(item))
|
|
251
|
+
else:
|
|
252
|
+
result.append(item)
|
|
253
|
+
return result
|
|
254
|
+
""",
|
|
255
|
+
expected_tests=6,
|
|
256
|
+
),
|
|
257
|
+
|
|
258
|
+
TestCase(
|
|
259
|
+
name="Rate Limiter Time Window",
|
|
260
|
+
category="implementation",
|
|
261
|
+
filename="rate_limiter.py",
|
|
262
|
+
issue=(
|
|
263
|
+
"Implement a rate limiter. The current implementation counts all calls "
|
|
264
|
+
"ever made instead of using a sliding time window. After max_calls "
|
|
265
|
+
"have been made, it blocks forever instead of allowing calls once "
|
|
266
|
+
"the window has passed."
|
|
267
|
+
),
|
|
268
|
+
buggy_code="""\
|
|
269
|
+
import time
|
|
270
|
+
|
|
271
|
+
class RateLimiter:
|
|
272
|
+
def __init__(self, max_calls, period_seconds):
|
|
273
|
+
self.max_calls = max_calls
|
|
274
|
+
self.period = period_seconds
|
|
275
|
+
self.call_count = 0
|
|
276
|
+
|
|
277
|
+
def allow(self):
|
|
278
|
+
if self.call_count < self.max_calls:
|
|
279
|
+
self.call_count += 1
|
|
280
|
+
return True
|
|
281
|
+
return False
|
|
282
|
+
""",
|
|
283
|
+
test_code="""\
|
|
284
|
+
import time
|
|
285
|
+
from fix import RateLimiter
|
|
286
|
+
|
|
287
|
+
def test_allows_under_limit():
|
|
288
|
+
rl = RateLimiter(3, 1.0)
|
|
289
|
+
assert rl.allow() == True
|
|
290
|
+
assert rl.allow() == True
|
|
291
|
+
assert rl.allow() == True
|
|
292
|
+
|
|
293
|
+
def test_blocks_over_limit():
|
|
294
|
+
rl = RateLimiter(2, 1.0)
|
|
295
|
+
assert rl.allow() == True
|
|
296
|
+
assert rl.allow() == True
|
|
297
|
+
assert rl.allow() == False
|
|
298
|
+
|
|
299
|
+
def test_allows_after_window():
|
|
300
|
+
rl = RateLimiter(1, 0.1)
|
|
301
|
+
assert rl.allow() == True
|
|
302
|
+
assert rl.allow() == False
|
|
303
|
+
time.sleep(0.15)
|
|
304
|
+
assert rl.allow() == True
|
|
305
|
+
|
|
306
|
+
def test_sliding_window():
|
|
307
|
+
rl = RateLimiter(2, 0.2)
|
|
308
|
+
assert rl.allow() == True
|
|
309
|
+
time.sleep(0.1)
|
|
310
|
+
assert rl.allow() == True
|
|
311
|
+
assert rl.allow() == False
|
|
312
|
+
time.sleep(0.15)
|
|
313
|
+
assert rl.allow() == True
|
|
314
|
+
""",
|
|
315
|
+
solution_code="""\
|
|
316
|
+
import time
|
|
317
|
+
|
|
318
|
+
class RateLimiter:
|
|
319
|
+
def __init__(self, max_calls, period_seconds):
|
|
320
|
+
self.max_calls = max_calls
|
|
321
|
+
self.period = period_seconds
|
|
322
|
+
self.calls = []
|
|
323
|
+
|
|
324
|
+
def allow(self):
|
|
325
|
+
now = time.time()
|
|
326
|
+
self.calls = [t for t in self.calls if now - t < self.period]
|
|
327
|
+
if len(self.calls) < self.max_calls:
|
|
328
|
+
self.calls.append(now)
|
|
329
|
+
return True
|
|
330
|
+
return False
|
|
331
|
+
""",
|
|
332
|
+
expected_tests=4,
|
|
333
|
+
),
|
|
334
|
+
|
|
335
|
+
TestCase(
|
|
336
|
+
name="Linked List Cycle Detection",
|
|
337
|
+
category="implementation",
|
|
338
|
+
filename="linked_list.py",
|
|
339
|
+
issue=(
|
|
340
|
+
"Implement Floyd's cycle detection for a linked list. "
|
|
341
|
+
"The current has_cycle function uses a set to track visited nodes, "
|
|
342
|
+
"which works but uses O(n) memory. Rewrite it to use O(1) memory "
|
|
343
|
+
"with the tortoise and hare algorithm."
|
|
344
|
+
),
|
|
345
|
+
buggy_code="""\
|
|
346
|
+
class Node:
|
|
347
|
+
def __init__(self, val, next=None):
|
|
348
|
+
self.val = val
|
|
349
|
+
self.next = next
|
|
350
|
+
|
|
351
|
+
def has_cycle(head):
|
|
352
|
+
visited = set()
|
|
353
|
+
current = head
|
|
354
|
+
while current:
|
|
355
|
+
if id(current) in visited:
|
|
356
|
+
return True
|
|
357
|
+
visited.add(id(current))
|
|
358
|
+
current = current.next
|
|
359
|
+
return False
|
|
360
|
+
""",
|
|
361
|
+
test_code="""\
|
|
362
|
+
from fix import Node, has_cycle
|
|
363
|
+
|
|
364
|
+
def test_no_cycle():
|
|
365
|
+
head = Node(1, Node(2, Node(3)))
|
|
366
|
+
assert has_cycle(head) == False
|
|
367
|
+
|
|
368
|
+
def test_with_cycle():
|
|
369
|
+
a = Node(1)
|
|
370
|
+
b = Node(2)
|
|
371
|
+
c = Node(3)
|
|
372
|
+
a.next = b
|
|
373
|
+
b.next = c
|
|
374
|
+
c.next = a
|
|
375
|
+
assert has_cycle(a) == True
|
|
376
|
+
|
|
377
|
+
def test_empty():
|
|
378
|
+
assert has_cycle(None) == False
|
|
379
|
+
|
|
380
|
+
def test_single_no_cycle():
|
|
381
|
+
assert has_cycle(Node(1)) == False
|
|
382
|
+
|
|
383
|
+
def test_single_self_cycle():
|
|
384
|
+
a = Node(1)
|
|
385
|
+
a.next = a
|
|
386
|
+
assert has_cycle(a) == True
|
|
387
|
+
|
|
388
|
+
def test_uses_constant_memory():
|
|
389
|
+
# The function should NOT use a set/dict
|
|
390
|
+
import inspect
|
|
391
|
+
source = inspect.getsource(has_cycle)
|
|
392
|
+
assert 'set()' not in source, "Should use O(1) memory, not a set"
|
|
393
|
+
assert 'dict()' not in source, "Should use O(1) memory, not a dict"
|
|
394
|
+
assert '{}' not in source, "Should use O(1) memory"
|
|
395
|
+
""",
|
|
396
|
+
solution_code="""\
|
|
397
|
+
class Node:
|
|
398
|
+
def __init__(self, val, next=None):
|
|
399
|
+
self.val = val
|
|
400
|
+
self.next = next
|
|
401
|
+
|
|
402
|
+
def has_cycle(head):
|
|
403
|
+
slow = head
|
|
404
|
+
fast = head
|
|
405
|
+
while fast and fast.next:
|
|
406
|
+
slow = slow.next
|
|
407
|
+
fast = fast.next.next
|
|
408
|
+
if slow is fast:
|
|
409
|
+
return True
|
|
410
|
+
return False
|
|
411
|
+
""",
|
|
412
|
+
expected_tests=6,
|
|
413
|
+
),
|
|
414
|
+
],
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
419
|
+
# Algorithm Challenges
|
|
420
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
421
|
+
|
|
422
|
+
ALGORITHMS = TestPack(
|
|
423
|
+
name="algorithms",
|
|
424
|
+
description="Classic algorithm problems with edge cases",
|
|
425
|
+
language="python",
|
|
426
|
+
cases=[
|
|
427
|
+
TestCase(
|
|
428
|
+
name="Two Sum",
|
|
429
|
+
category="algorithms",
|
|
430
|
+
filename="two_sum.py",
|
|
431
|
+
issue=(
|
|
432
|
+
"Implement two_sum(nums, target) that returns indices of two numbers "
|
|
433
|
+
"that add up to target. Must run in O(n) time, not O(n^2). "
|
|
434
|
+
"The current brute force solution is too slow for large inputs."
|
|
435
|
+
),
|
|
436
|
+
buggy_code="""\
|
|
437
|
+
def two_sum(nums, target):
|
|
438
|
+
for i in range(len(nums)):
|
|
439
|
+
for j in range(len(nums)):
|
|
440
|
+
if i != j and nums[i] + nums[j] == target:
|
|
441
|
+
return [i, j]
|
|
442
|
+
return []
|
|
443
|
+
""",
|
|
444
|
+
test_code="""\
|
|
445
|
+
from fix import two_sum
|
|
446
|
+
|
|
447
|
+
def test_basic():
|
|
448
|
+
result = two_sum([2, 7, 11, 15], 9)
|
|
449
|
+
assert sorted(result) == [0, 1]
|
|
450
|
+
|
|
451
|
+
def test_middle():
|
|
452
|
+
result = two_sum([3, 2, 4], 6)
|
|
453
|
+
assert sorted(result) == [1, 2]
|
|
454
|
+
|
|
455
|
+
def test_negative():
|
|
456
|
+
result = two_sum([-1, -2, -3, -4, -5], -8)
|
|
457
|
+
assert sorted(result) == [2, 4]
|
|
458
|
+
|
|
459
|
+
def test_not_found():
|
|
460
|
+
assert two_sum([1, 2, 3], 100) == []
|
|
461
|
+
|
|
462
|
+
def test_uses_hash_map():
|
|
463
|
+
import inspect
|
|
464
|
+
source = inspect.getsource(two_sum)
|
|
465
|
+
has_dict = 'dict' in source or '{}' in source or 'hash' in source.lower()
|
|
466
|
+
no_nested_loop = source.count('for ') <= 1
|
|
467
|
+
assert has_dict or no_nested_loop, "Should use O(n) approach with hash map"
|
|
468
|
+
""",
|
|
469
|
+
solution_code="""\
|
|
470
|
+
def two_sum(nums, target):
|
|
471
|
+
seen = {}
|
|
472
|
+
for i, num in enumerate(nums):
|
|
473
|
+
complement = target - num
|
|
474
|
+
if complement in seen:
|
|
475
|
+
return [seen[complement], i]
|
|
476
|
+
seen[num] = i
|
|
477
|
+
return []
|
|
478
|
+
""",
|
|
479
|
+
expected_tests=5,
|
|
480
|
+
),
|
|
481
|
+
|
|
482
|
+
TestCase(
|
|
483
|
+
name="Merge Intervals",
|
|
484
|
+
category="algorithms",
|
|
485
|
+
filename="intervals.py",
|
|
486
|
+
issue=(
|
|
487
|
+
"merge_intervals([[1,3],[2,6],[8,10],[15,18]]) should return "
|
|
488
|
+
"[[1,6],[8,10],[15,18]] but currently returns the input unchanged. "
|
|
489
|
+
"The function doesn't actually merge overlapping intervals."
|
|
490
|
+
),
|
|
491
|
+
buggy_code="""\
|
|
492
|
+
def merge_intervals(intervals):
|
|
493
|
+
if not intervals:
|
|
494
|
+
return []
|
|
495
|
+
intervals.sort(key=lambda x: x[0])
|
|
496
|
+
merged = [intervals[0]]
|
|
497
|
+
for i in range(1, len(intervals)):
|
|
498
|
+
merged.append(intervals[i])
|
|
499
|
+
return merged
|
|
500
|
+
""",
|
|
501
|
+
test_code="""\
|
|
502
|
+
from fix import merge_intervals
|
|
503
|
+
|
|
504
|
+
def test_overlapping():
|
|
505
|
+
assert merge_intervals([[1,3],[2,6],[8,10],[15,18]]) == [[1,6],[8,10],[15,18]]
|
|
506
|
+
|
|
507
|
+
def test_no_overlap():
|
|
508
|
+
assert merge_intervals([[1,2],[3,4],[5,6]]) == [[1,2],[3,4],[5,6]]
|
|
509
|
+
|
|
510
|
+
def test_all_overlap():
|
|
511
|
+
assert merge_intervals([[1,4],[2,3]]) == [[1,4]]
|
|
512
|
+
|
|
513
|
+
def test_empty():
|
|
514
|
+
assert merge_intervals([]) == []
|
|
515
|
+
|
|
516
|
+
def test_single():
|
|
517
|
+
assert merge_intervals([[1,5]]) == [[1,5]]
|
|
518
|
+
|
|
519
|
+
def test_touching():
|
|
520
|
+
assert merge_intervals([[1,2],[2,3]]) == [[1,3]]
|
|
521
|
+
""",
|
|
522
|
+
solution_code="""\
|
|
523
|
+
def merge_intervals(intervals):
|
|
524
|
+
if not intervals:
|
|
525
|
+
return []
|
|
526
|
+
intervals.sort(key=lambda x: x[0])
|
|
527
|
+
merged = [intervals[0]]
|
|
528
|
+
for start, end in intervals[1:]:
|
|
529
|
+
if start <= merged[-1][1]:
|
|
530
|
+
merged[-1][1] = max(merged[-1][1], end)
|
|
531
|
+
else:
|
|
532
|
+
merged.append([start, end])
|
|
533
|
+
return merged
|
|
534
|
+
""",
|
|
535
|
+
expected_tests=6,
|
|
536
|
+
),
|
|
537
|
+
],
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
542
|
+
# All built-in packs
|
|
543
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
544
|
+
|
|
545
|
+
BUILT_IN_PACKS = [PYTHON_BUGS, ALGORITHMS]
|
|
546
|
+
|
|
547
|
+
# Total test count
|
|
548
|
+
TOTAL_SWE_TESTS = sum(len(p.cases) for p in BUILT_IN_PACKS)
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
*,:before,:after{--tw-border-spacing-x: 0;--tw-border-spacing-y: 0;--tw-translate-x: 0;--tw-translate-y: 0;--tw-rotate: 0;--tw-skew-x: 0;--tw-skew-y: 0;--tw-scale-x: 1;--tw-scale-y: 1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness: proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width: 0px;--tw-ring-offset-color: #fff;--tw-ring-color: rgb(59 130 246 / .5);--tw-ring-offset-shadow: 0 0 #0000;--tw-ring-shadow: 0 0 #0000;--tw-shadow: 0 0 #0000;--tw-shadow-colored: 0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }::backdrop{--tw-border-spacing-x: 0;--tw-border-spacing-y: 0;--tw-translate-x: 0;--tw-translate-y: 0;--tw-rotate: 0;--tw-skew-x: 0;--tw-skew-y: 0;--tw-scale-x: 1;--tw-scale-y: 1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness: proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width: 0px;--tw-ring-offset-color: #fff;--tw-ring-color: rgb(59 130 246 / .5);--tw-ring-offset-shadow: 0 0 #0000;--tw-ring-shadow: 0 0 #0000;--tw-shadow: 0 0 #0000;--tw-shadow-colored: 0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }*,:before,:after{box-sizing:border-box;border-width:0;border-style:solid;border-color:#e5e7eb}:before,:after{--tw-content: ""}html,:host{line-height:1.5;-webkit-text-size-adjust:100%;-moz-tab-size:4;-o-tab-size:4;tab-size:4;font-family:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji";font-feature-settings:normal;font-variation-settings:normal;-webkit-tap-highlight-color:transparent}body{margin:0;line-height:inherit}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:JetBrains Mono,Fira Code,monospace;font-feature-settings:normal;font-variation-settings:normal;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}button,input,optgroup,select,textarea{font-family:inherit;font-feature-settings:inherit;font-variation-settings:inherit;font-size:100%;font-weight:inherit;line-height:inherit;letter-spacing:inherit;color:inherit;margin:0;padding:0}button,select{text-transform:none}button,input:where([type=button]),input:where([type=reset]),input:where([type=submit]){-webkit-appearance:button;background-color:transparent;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:baseline}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dl,dd,h1,h2,h3,h4,h5,h6,hr,figure,p,pre{margin:0}fieldset{margin:0;padding:0}legend{padding:0}ol,ul,menu{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-moz-placeholder,textarea::-moz-placeholder{opacity:1;color:#9ca3af}input::placeholder,textarea::placeholder{opacity:1;color:#9ca3af}button,[role=button]{cursor:pointer}:disabled{cursor:default}img,svg,video,canvas,audio,iframe,embed,object{display:block;vertical-align:middle}img,video{max-width:100%;height:auto}[hidden]:where(:not([hidden=until-found])){display:none}.pointer-events-none{pointer-events:none}.absolute{position:absolute}.relative{position:relative}.sticky{position:sticky}.inset-0{top:0;right:0;bottom:0;left:0}.inset-y-0{top:0;bottom:0}.left-0{left:0}.right-2{right:.5rem}.right-3{right:.75rem}.top-0{top:0}.top-0\.5{top:.125rem}.top-1\/2{top:50%}.top-2{top:.5rem}.z-10{z-index:10}.z-50{z-index:50}.mx-auto{margin-left:auto;margin-right:auto}.mb-1{margin-bottom:.25rem}.mb-2{margin-bottom:.5rem}.mb-3{margin-bottom:.75rem}.mb-4{margin-bottom:1rem}.mb-5{margin-bottom:1.25rem}.ml-2{margin-left:.5rem}.ml-auto{margin-left:auto}.mt-0\.5{margin-top:.125rem}.mt-1{margin-top:.25rem}.mt-1\.5{margin-top:.375rem}.mt-12{margin-top:3rem}.mt-2{margin-top:.5rem}.mt-2\.5{margin-top:.625rem}.mt-3{margin-top:.75rem}.mt-4{margin-top:1rem}.block{display:block}.flex{display:flex}.inline-flex{display:inline-flex}.table{display:table}.grid{display:grid}.hidden{display:none}.h-1{height:.25rem}.h-1\.5{height:.375rem}.h-10{height:2.5rem}.h-12{height:3rem}.h-14{height:3.5rem}.h-2{height:.5rem}.h-2\.5{height:.625rem}.h-3{height:.75rem}.h-4{height:1rem}.h-6{height:1.5rem}.h-8{height:2rem}.h-9{height:2.25rem}.h-full{height:100%}.max-h-80{max-height:20rem}.min-h-\[400px\]{min-height:400px}.min-h-\[480px\]{min-height:480px}.min-h-screen{min-height:100vh}.w-1\.5{width:.375rem}.w-10{width:2.5rem}.w-12{width:3rem}.w-2{width:.5rem}.w-2\.5{width:.625rem}.w-28{width:7rem}.w-3{width:.75rem}.w-32{width:8rem}.w-4{width:1rem}.w-6{width:1.5rem}.w-7{width:1.75rem}.w-8{width:2rem}.w-9{width:2.25rem}.w-full{width:100%}.max-w-7xl{max-width:80rem}.flex-1{flex:1 1 0%}.shrink-0{flex-shrink:0}.-translate-y-1\/2{--tw-translate-y: -50%;transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skew(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}@keyframes pulse{50%{opacity:.5}}.animate-pulse{animation:pulse 2s cubic-bezier(.4,0,.6,1) infinite}@keyframes spin{to{transform:rotate(360deg)}}.animate-spin{animation:spin 1s linear infinite}.resize-none{resize:none}.grid-cols-1{grid-template-columns:repeat(1,minmax(0,1fr))}.grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}.grid-cols-5{grid-template-columns:repeat(5,minmax(0,1fr))}.flex-col{flex-direction:column}.flex-wrap{flex-wrap:wrap}.items-start{align-items:flex-start}.items-center{align-items:center}.justify-end{justify-content:flex-end}.justify-center{justify-content:center}.justify-between{justify-content:space-between}.gap-0\.5{gap:.125rem}.gap-1{gap:.25rem}.gap-1\.5{gap:.375rem}.gap-2{gap:.5rem}.gap-2\.5{gap:.625rem}.gap-3{gap:.75rem}.gap-4{gap:1rem}.gap-5{gap:1.25rem}.space-y-1>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(.25rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.25rem * var(--tw-space-y-reverse))}.space-y-1\.5>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(.375rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.375rem * var(--tw-space-y-reverse))}.space-y-2>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(.5rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.5rem * var(--tw-space-y-reverse))}.space-y-3>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(.75rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.75rem * var(--tw-space-y-reverse))}.space-y-4>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(1rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(1rem * var(--tw-space-y-reverse))}.space-y-5>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(1.25rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(1.25rem * var(--tw-space-y-reverse))}.space-y-6>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(1.5rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(1.5rem * var(--tw-space-y-reverse))}.space-y-8>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(2rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(2rem * var(--tw-space-y-reverse))}.overflow-auto{overflow:auto}.overflow-hidden{overflow:hidden}.overflow-x-auto{overflow-x:auto}.truncate{overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.whitespace-pre-line{white-space:pre-line}.whitespace-pre-wrap{white-space:pre-wrap}.rounded{border-radius:.25rem}.rounded-2xl{border-radius:1rem}.rounded-full{border-radius:9999px}.rounded-lg{border-radius:.5rem}.rounded-md{border-radius:.375rem}.rounded-xl{border-radius:.75rem}.border{border-width:1px}.border-2{border-width:2px}.border-y{border-top-width:1px;border-bottom-width:1px}.border-b{border-bottom-width:1px}.border-r{border-right-width:1px}.border-t{border-top-width:1px}.border-\[var\(--border\)\]{border-color:var(--border)}.border-\[var\(--primary\)\]{border-color:var(--primary)}.border-\[var\(--secondary\)\]{border-color:var(--secondary)}.border-emerald-500\/15{border-color:#10b98126}.border-emerald-500\/20{border-color:#10b98133}.border-emerald-500\/30{border-color:#10b9814d}.border-red-500\/20{border-color:#ef444433}.border-white\/\[0\.02\]{border-color:#ffffff05}.border-t-transparent{border-top-color:transparent}.border-opacity-50{--tw-border-opacity: .5}.bg-\[var\(--bg\)\]{background-color:var(--bg)}.bg-\[var\(--primary\)\]{background-color:var(--primary)}.bg-\[var\(--surface\)\]{background-color:var(--surface)}.bg-\[var\(--surface-2\)\]{background-color:var(--surface-2)}.bg-\[var\(--surface-3\)\]{background-color:var(--surface-3)}.bg-amber-400{--tw-bg-opacity: 1;background-color:rgb(251 191 36 / var(--tw-bg-opacity, 1))}.bg-arbiter-bg{--tw-bg-opacity: 1;background-color:rgb(10 10 15 / var(--tw-bg-opacity, 1))}.bg-emerald-400{--tw-bg-opacity: 1;background-color:rgb(52 211 153 / var(--tw-bg-opacity, 1))}.bg-emerald-500\/10{background-color:#10b9811a}.bg-emerald-500\/15{background-color:#10b98126}.bg-emerald-500\/5{background-color:#10b9810d}.bg-red-400{--tw-bg-opacity: 1;background-color:rgb(248 113 113 / var(--tw-bg-opacity, 1))}.bg-white{--tw-bg-opacity: 1;background-color:rgb(255 255 255 / var(--tw-bg-opacity, 1))}.bg-opacity-10{--tw-bg-opacity: .1}.bg-gradient-to-br{background-image:linear-gradient(to bottom right,var(--tw-gradient-stops))}.bg-gradient-to-r{background-image:linear-gradient(to right,var(--tw-gradient-stops))}.from-\[var\(--primary\)\]{--tw-gradient-from: var(--primary) var(--tw-gradient-from-position);--tw-gradient-to: rgb(255 255 255 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to)}.from-\[var\(--secondary\)\]{--tw-gradient-from: var(--secondary) var(--tw-gradient-from-position);--tw-gradient-to: rgb(255 255 255 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to)}.from-emerald-500\/20{--tw-gradient-from: rgb(16 185 129 / .2) var(--tw-gradient-from-position);--tw-gradient-to: rgb(16 185 129 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to)}.via-transparent{--tw-gradient-to: rgb(0 0 0 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), transparent var(--tw-gradient-via-position), var(--tw-gradient-to)}.to-\[var\(--primary\)\]{--tw-gradient-to: var(--primary) var(--tw-gradient-to-position)}.to-\[var\(--secondary\)\]{--tw-gradient-to: var(--secondary) var(--tw-gradient-to-position)}.to-emerald-500\/20{--tw-gradient-to: rgb(16 185 129 / .2) var(--tw-gradient-to-position)}.p-1{padding:.25rem}.p-12{padding:3rem}.p-16{padding:4rem}.p-3{padding:.75rem}.p-4{padding:1rem}.p-6{padding:1.5rem}.p-8{padding:2rem}.px-1\.5{padding-left:.375rem;padding-right:.375rem}.px-2{padding-left:.5rem;padding-right:.5rem}.px-3{padding-left:.75rem;padding-right:.75rem}.px-4{padding-left:1rem;padding-right:1rem}.px-5{padding-left:1.25rem;padding-right:1.25rem}.px-6{padding-left:1.5rem;padding-right:1.5rem}.py-0\.5{padding-top:.125rem;padding-bottom:.125rem}.py-1{padding-top:.25rem;padding-bottom:.25rem}.py-1\.5{padding-top:.375rem;padding-bottom:.375rem}.py-2{padding-top:.5rem;padding-bottom:.5rem}.py-2\.5{padding-top:.625rem;padding-bottom:.625rem}.py-3{padding-top:.75rem;padding-bottom:.75rem}.py-3\.5{padding-top:.875rem;padding-bottom:.875rem}.py-5{padding-top:1.25rem;padding-bottom:1.25rem}.py-6{padding-top:1.5rem;padding-bottom:1.5rem}.py-8{padding-top:2rem;padding-bottom:2rem}.pb-2{padding-bottom:.5rem}.pb-3{padding-bottom:.75rem}.pb-4{padding-bottom:1rem}.pb-5{padding-bottom:1.25rem}.pl-7{padding-left:1.75rem}.pr-4{padding-right:1rem}.pt-2\.5{padding-top:.625rem}.pt-4{padding-top:1rem}.pt-5{padding-top:1.25rem}.text-left{text-align:left}.text-center{text-align:center}.text-right{text-align:right}.font-mono{font-family:JetBrains Mono,Fira Code,monospace}.font-sans{font-family:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji"}.text-2xl{font-size:1.5rem;line-height:2rem}.text-\[10px\]{font-size:10px}.text-\[11px\]{font-size:11px}.text-\[8px\]{font-size:8px}.text-\[9px\]{font-size:9px}.text-base{font-size:1rem;line-height:1.5rem}.text-lg{font-size:1.125rem;line-height:1.75rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xs{font-size:.75rem;line-height:1rem}.font-bold{font-weight:700}.font-medium{font-weight:500}.font-normal{font-weight:400}.font-semibold{font-weight:600}.uppercase{text-transform:uppercase}.capitalize{text-transform:capitalize}.leading-relaxed{line-height:1.625}.tracking-tight{letter-spacing:-.025em}.tracking-wider{letter-spacing:.05em}.tracking-widest{letter-spacing:.1em}.text-\[var\(--primary\)\]{color:var(--primary)}.text-\[var\(--secondary\)\]{color:var(--secondary)}.text-\[var\(--text\)\]{color:var(--text)}.text-\[var\(--text-dim\)\]{color:var(--text-dim)}.text-\[var\(--text-muted\)\]{color:var(--text-muted)}.text-\[var\(--warning\)\]{color:var(--warning)}.text-amber-400{--tw-text-opacity: 1;color:rgb(251 191 36 / var(--tw-text-opacity, 1))}.text-emerald-400{--tw-text-opacity: 1;color:rgb(52 211 153 / var(--tw-text-opacity, 1))}.text-red-400{--tw-text-opacity: 1;color:rgb(248 113 113 / var(--tw-text-opacity, 1))}.text-white{--tw-text-opacity: 1;color:rgb(255 255 255 / var(--tw-text-opacity, 1))}.antialiased{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.opacity-50{opacity:.5}.shadow-2xl{--tw-shadow: 0 25px 50px -12px rgb(0 0 0 / .25);--tw-shadow-colored: 0 25px 50px -12px var(--tw-shadow-color);box-shadow:var(--tw-ring-offset-shadow, 0 0 #0000),var(--tw-ring-shadow, 0 0 #0000),var(--tw-shadow)}.shadow-lg{--tw-shadow: 0 10px 15px -3px rgb(0 0 0 / .1), 0 4px 6px -4px rgb(0 0 0 / .1);--tw-shadow-colored: 0 10px 15px -3px var(--tw-shadow-color), 0 4px 6px -4px var(--tw-shadow-color);box-shadow:var(--tw-ring-offset-shadow, 0 0 #0000),var(--tw-ring-shadow, 0 0 #0000),var(--tw-shadow)}.filter{filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)}.transition{transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.transition-all{transition-property:all;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.transition-colors{transition-property:color,background-color,border-color,text-decoration-color,fill,stroke;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}:root{--bg: #06060c;--surface: #0f0f1a;--surface-2: #181828;--surface-3: #222238;--border: #1e1e35;--primary: #00d4ff;--secondary: #7c3aed;--accent: #10b981;--warning: #f59e0b;--error: #ef4444;--text: #e2e2f0;--text-dim: #6b6b8a;--text-muted: #3d3d5c}body{font-family:Inter,system-ui,-apple-system,sans-serif;background:var(--bg);color:var(--text);min-height:100vh;overflow-x:hidden}code,pre,.font-mono{font-family:JetBrains Mono,Fira Code,monospace}::-webkit-scrollbar{width:5px;height:5px}::-webkit-scrollbar-track{background:transparent}::-webkit-scrollbar-thumb{background:var(--border);border-radius:10px}::-webkit-scrollbar-thumb:hover{background:var(--text-muted)}.glass{background:#0f0f1ab3;-webkit-backdrop-filter:blur(12px);backdrop-filter:blur(12px);border:1px solid var(--border)}.glow-primary{box-shadow:0 0 20px #00d4ff26,0 0 40px #00d4ff0d}.glow-accent{box-shadow:0 0 20px #10b98126,0 0 40px #10b9810d}.glow-winner{box-shadow:0 0 30px #10b98133,0 0 60px #10b98114}@keyframes shimmer{0%{background-position:-200% 0}to{background-position:200% 0}}.shimmer{background:linear-gradient(90deg,var(--surface) 25%,var(--surface-2) 50%,var(--surface) 75%);background-size:200% 100%;animation:shimmer 2s ease-in-out infinite}.gradient-text{background:linear-gradient(135deg,var(--primary),var(--secondary));-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text}.ambient-glow{position:fixed;width:600px;height:600px;border-radius:50%;filter:blur(150px);opacity:.03;pointer-events:none;z-index:0}.placeholder\:text-\[var\(--text-muted\)\]::-moz-placeholder{color:var(--text-muted)}.placeholder\:text-\[var\(--text-muted\)\]::placeholder{color:var(--text-muted)}.first\:text-right:first-child{text-align:right}.last\:border-0:last-child{border-width:0px}.last\:text-right:last-child{text-align:right}.hover\:bg-\[var\(--surface-2\)\]:hover{background-color:var(--surface-2)}.hover\:bg-white\/\[0\.01\]:hover{background-color:#ffffff03}.hover\:bg-white\/\[0\.02\]:hover{background-color:#ffffff05}.hover\:text-\[var\(--text\)\]:hover{color:var(--text)}.hover\:text-\[var\(--text-dim\)\]:hover{color:var(--text-dim)}.hover\:underline:hover{text-decoration-line:underline}.focus\:outline-none:focus{outline:2px solid transparent;outline-offset:2px}.disabled\:cursor-not-allowed:disabled{cursor:not-allowed}.disabled\:opacity-30:disabled{opacity:.3}@media(min-width:640px){.sm\:grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}.sm\:grid-cols-4{grid-template-columns:repeat(4,minmax(0,1fr))}}@media(min-width:1024px){.lg\:grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.lg\:grid-cols-4{grid-template-columns:repeat(4,minmax(0,1fr))}}
|