cudag 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudag/__init__.py +334 -0
- cudag/annotation/__init__.py +77 -0
- cudag/annotation/codegen.py +648 -0
- cudag/annotation/config.py +545 -0
- cudag/annotation/loader.py +342 -0
- cudag/annotation/scaffold.py +121 -0
- cudag/annotation/transcription.py +296 -0
- cudag/cli/__init__.py +5 -0
- cudag/cli/main.py +315 -0
- cudag/cli/new.py +873 -0
- cudag/core/__init__.py +364 -0
- cudag/core/button.py +137 -0
- cudag/core/canvas.py +222 -0
- cudag/core/config.py +70 -0
- cudag/core/coords.py +233 -0
- cudag/core/data_grid.py +804 -0
- cudag/core/dataset.py +678 -0
- cudag/core/distribution.py +136 -0
- cudag/core/drawing.py +75 -0
- cudag/core/fonts.py +156 -0
- cudag/core/generator.py +163 -0
- cudag/core/grid.py +367 -0
- cudag/core/grounding_task.py +247 -0
- cudag/core/icon.py +207 -0
- cudag/core/iconlist_task.py +301 -0
- cudag/core/models.py +1251 -0
- cudag/core/random.py +130 -0
- cudag/core/renderer.py +190 -0
- cudag/core/screen.py +402 -0
- cudag/core/scroll_task.py +254 -0
- cudag/core/scrollable_grid.py +447 -0
- cudag/core/state.py +110 -0
- cudag/core/task.py +293 -0
- cudag/core/taskbar.py +350 -0
- cudag/core/text.py +212 -0
- cudag/core/utils.py +82 -0
- cudag/data/surnames.txt +5000 -0
- cudag/modal_apps/__init__.py +4 -0
- cudag/modal_apps/archive.py +103 -0
- cudag/modal_apps/extract.py +138 -0
- cudag/modal_apps/preprocess.py +529 -0
- cudag/modal_apps/upload.py +317 -0
- cudag/prompts/SYSTEM_PROMPT.txt +104 -0
- cudag/prompts/__init__.py +33 -0
- cudag/prompts/system.py +43 -0
- cudag/prompts/tools.py +382 -0
- cudag/py.typed +0 -0
- cudag/schemas/filesystem.json +90 -0
- cudag/schemas/test_record.schema.json +113 -0
- cudag/schemas/train_record.schema.json +90 -0
- cudag/server/__init__.py +21 -0
- cudag/server/app.py +232 -0
- cudag/server/services/__init__.py +9 -0
- cudag/server/services/generator.py +128 -0
- cudag/templates/scripts/archive.sh +35 -0
- cudag/templates/scripts/build.sh +13 -0
- cudag/templates/scripts/extract.sh +54 -0
- cudag/templates/scripts/generate.sh +116 -0
- cudag/templates/scripts/pre-commit.sh +44 -0
- cudag/templates/scripts/preprocess.sh +46 -0
- cudag/templates/scripts/upload.sh +63 -0
- cudag/templates/scripts/verify.py +428 -0
- cudag/validation/__init__.py +35 -0
- cudag/validation/validate.py +508 -0
- cudag-0.3.10.dist-info/METADATA +570 -0
- cudag-0.3.10.dist-info/RECORD +69 -0
- cudag-0.3.10.dist-info/WHEEL +4 -0
- cudag-0.3.10.dist-info/entry_points.txt +2 -0
- cudag-0.3.10.dist-info/licenses/LICENSE +66 -0
cudag/core/screen.py
ADDED
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
2
|
+
# CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
|
|
3
|
+
# is strictly prohibited. For licensing inquiries: hello@claimhawk.app
|
|
4
|
+
|
|
5
|
+
"""Rails-like DSL for screen definitions.
|
|
6
|
+
|
|
7
|
+
Simple, readable screen definitions with DSL functions.
|
|
8
|
+
|
|
9
|
+
Example:
|
|
10
|
+
class CalendarScreen(Screen):
|
|
11
|
+
name = "calendar"
|
|
12
|
+
base_image = "calendar.png"
|
|
13
|
+
size = (224, 208)
|
|
14
|
+
|
|
15
|
+
day_grid = grid((10, 50, 210, 150), rows=6, cols=7)
|
|
16
|
+
back_month = button((7, 192, 20, 12), label="Back Month")
|
|
17
|
+
scroll_area = scrollable((0, 0, 224, 208), step=100)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from abc import ABC, abstractmethod
|
|
23
|
+
from collections.abc import Sequence
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from dataclasses import field as dataclass_field
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Any, ClassVar
|
|
28
|
+
|
|
29
|
+
# =============================================================================
|
|
30
|
+
# Bounds - represents a rectangular area
|
|
31
|
+
# =============================================================================
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class Bounds:
|
|
36
|
+
"""Rectangular bounds for a region on screen."""
|
|
37
|
+
|
|
38
|
+
x: int
|
|
39
|
+
y: int
|
|
40
|
+
width: int
|
|
41
|
+
height: int
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def center(self) -> tuple[int, int]:
|
|
45
|
+
"""Center point of the bounds."""
|
|
46
|
+
return (self.x + self.width // 2, self.y + self.height // 2)
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def right(self) -> int:
|
|
50
|
+
"""Right edge x coordinate."""
|
|
51
|
+
return self.x + self.width
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def bottom(self) -> int:
|
|
55
|
+
"""Bottom edge y coordinate."""
|
|
56
|
+
return self.y + self.height
|
|
57
|
+
|
|
58
|
+
def contains(self, point: tuple[int, int]) -> bool:
|
|
59
|
+
"""Check if a point is within bounds."""
|
|
60
|
+
px, py = point
|
|
61
|
+
return self.x <= px < self.right and self.y <= py < self.bottom
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def from_tuple(cls, t: tuple[int, int, int, int]) -> Bounds:
|
|
65
|
+
"""Create from (x, y, width, height) tuple."""
|
|
66
|
+
return cls(x=t[0], y=t[1], width=t[2], height=t[3])
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# =============================================================================
|
|
70
|
+
# Region Base Class
|
|
71
|
+
# =============================================================================
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class Region(ABC):
|
|
76
|
+
"""Base class for interactive screen regions."""
|
|
77
|
+
|
|
78
|
+
bounds: Bounds
|
|
79
|
+
name: str = ""
|
|
80
|
+
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def get_action_point(self, target: Any = None) -> tuple[int, int]:
|
|
83
|
+
"""Get the pixel coordinate for performing an action."""
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# =============================================================================
|
|
88
|
+
# Region Types
|
|
89
|
+
# =============================================================================
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dataclass
|
|
93
|
+
class ClickRegion(Region):
|
|
94
|
+
"""A simple clickable region."""
|
|
95
|
+
|
|
96
|
+
def get_action_point(self, target: Any = None) -> tuple[int, int]:
|
|
97
|
+
return self.bounds.center
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclass
|
|
101
|
+
class ButtonRegion(ClickRegion):
|
|
102
|
+
"""A clickable button."""
|
|
103
|
+
|
|
104
|
+
label: str = ""
|
|
105
|
+
description: str = ""
|
|
106
|
+
tolerance: tuple[int, int] = (5, 5)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass
|
|
110
|
+
class GridRegion(Region):
|
|
111
|
+
"""A grid of clickable cells."""
|
|
112
|
+
|
|
113
|
+
rows: int = 1
|
|
114
|
+
cols: int = 1
|
|
115
|
+
cell_width: int | None = None
|
|
116
|
+
cell_height: int | None = None
|
|
117
|
+
row_gap: int = 0
|
|
118
|
+
col_gap: int = 0
|
|
119
|
+
|
|
120
|
+
def __post_init__(self) -> None:
|
|
121
|
+
if self.cell_width is None:
|
|
122
|
+
total_gaps = self.col_gap * (self.cols - 1) if self.cols > 1 else 0
|
|
123
|
+
self.cell_width = (self.bounds.width - total_gaps) // self.cols
|
|
124
|
+
if self.cell_height is None:
|
|
125
|
+
total_gaps = self.row_gap * (self.rows - 1) if self.rows > 1 else 0
|
|
126
|
+
self.cell_height = (self.bounds.height - total_gaps) // self.rows
|
|
127
|
+
|
|
128
|
+
def get_action_point(self, target: tuple[int, int] | int | None = None) -> tuple[int, int]:
|
|
129
|
+
if target is None:
|
|
130
|
+
return self.bounds.center
|
|
131
|
+
|
|
132
|
+
if isinstance(target, int):
|
|
133
|
+
row, col = divmod(target, self.cols)
|
|
134
|
+
else:
|
|
135
|
+
row, col = target
|
|
136
|
+
|
|
137
|
+
assert self.cell_width is not None
|
|
138
|
+
assert self.cell_height is not None
|
|
139
|
+
|
|
140
|
+
x = self.bounds.x + col * (self.cell_width + self.col_gap) + self.cell_width // 2
|
|
141
|
+
y = self.bounds.y + row * (self.cell_height + self.row_gap) + self.cell_height // 2
|
|
142
|
+
return (x, y)
|
|
143
|
+
|
|
144
|
+
def cell_bounds(self, row: int, col: int) -> Bounds:
|
|
145
|
+
assert self.cell_width is not None
|
|
146
|
+
assert self.cell_height is not None
|
|
147
|
+
|
|
148
|
+
x = self.bounds.x + col * (self.cell_width + self.col_gap)
|
|
149
|
+
y = self.bounds.y + row * (self.cell_height + self.row_gap)
|
|
150
|
+
return Bounds(x=x, y=y, width=self.cell_width, height=self.cell_height)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@dataclass
|
|
154
|
+
class ScrollRegion(Region):
|
|
155
|
+
"""A scrollable region."""
|
|
156
|
+
|
|
157
|
+
scroll_step: int = 100
|
|
158
|
+
direction: str = "vertical"
|
|
159
|
+
|
|
160
|
+
def get_action_point(self, target: Any = None) -> tuple[int, int]:
|
|
161
|
+
return self.bounds.center
|
|
162
|
+
|
|
163
|
+
def get_scroll_pixels(self, direction: str = "down") -> int:
|
|
164
|
+
amount = self.scroll_step
|
|
165
|
+
if direction in ("up", "left"):
|
|
166
|
+
return -amount
|
|
167
|
+
return amount
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@dataclass
|
|
171
|
+
class DropdownRegion(Region):
|
|
172
|
+
"""A dropdown/select field."""
|
|
173
|
+
|
|
174
|
+
items: Sequence[str] = dataclass_field(default_factory=list)
|
|
175
|
+
item_height: int = 20
|
|
176
|
+
|
|
177
|
+
def get_action_point(self, target: str | int | None = None) -> tuple[int, int]:
|
|
178
|
+
if target is None:
|
|
179
|
+
return self.bounds.center
|
|
180
|
+
|
|
181
|
+
if isinstance(target, str):
|
|
182
|
+
try:
|
|
183
|
+
idx = list(self.items).index(target)
|
|
184
|
+
except ValueError:
|
|
185
|
+
return self.bounds.center
|
|
186
|
+
else:
|
|
187
|
+
idx = target
|
|
188
|
+
|
|
189
|
+
x = self.bounds.center[0]
|
|
190
|
+
y = self.bounds.bottom + (idx * self.item_height) + self.item_height // 2
|
|
191
|
+
return (x, y)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
# =============================================================================
|
|
195
|
+
# DSL Functions - the Rails-like interface
|
|
196
|
+
# =============================================================================
|
|
197
|
+
|
|
198
|
+
BoundsTuple = tuple[int, int, int, int]
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def region(bounds: BoundsTuple) -> ClickRegion:
|
|
202
|
+
"""Define a simple clickable region.
|
|
203
|
+
|
|
204
|
+
Example:
|
|
205
|
+
header = region((0, 0, 100, 50))
|
|
206
|
+
"""
|
|
207
|
+
return ClickRegion(bounds=Bounds.from_tuple(bounds))
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def button(
|
|
211
|
+
bounds: BoundsTuple,
|
|
212
|
+
label: str = "",
|
|
213
|
+
description: str = "",
|
|
214
|
+
tolerance: tuple[int, int] = (5, 5),
|
|
215
|
+
) -> ButtonRegion:
|
|
216
|
+
"""Define a button.
|
|
217
|
+
|
|
218
|
+
Example:
|
|
219
|
+
back_month = button((7, 192, 20, 12), label="Back Month")
|
|
220
|
+
"""
|
|
221
|
+
return ButtonRegion(
|
|
222
|
+
bounds=Bounds.from_tuple(bounds),
|
|
223
|
+
label=label,
|
|
224
|
+
description=description,
|
|
225
|
+
tolerance=tolerance,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def grid(
|
|
230
|
+
bounds: BoundsTuple,
|
|
231
|
+
rows: int = 1,
|
|
232
|
+
cols: int = 1,
|
|
233
|
+
cell_width: int | None = None,
|
|
234
|
+
cell_height: int | None = None,
|
|
235
|
+
row_gap: int = 0,
|
|
236
|
+
col_gap: int = 0,
|
|
237
|
+
) -> GridRegion:
|
|
238
|
+
"""Define a grid region.
|
|
239
|
+
|
|
240
|
+
Example:
|
|
241
|
+
day_grid = grid((10, 50, 210, 150), rows=6, cols=7)
|
|
242
|
+
data_grid = grid((0, 100, 800, 400), rows=10, cols=5, row_gap=2)
|
|
243
|
+
"""
|
|
244
|
+
return GridRegion(
|
|
245
|
+
bounds=Bounds.from_tuple(bounds),
|
|
246
|
+
rows=rows,
|
|
247
|
+
cols=cols,
|
|
248
|
+
cell_width=cell_width,
|
|
249
|
+
cell_height=cell_height,
|
|
250
|
+
row_gap=row_gap,
|
|
251
|
+
col_gap=col_gap,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def scrollable(
|
|
256
|
+
bounds: BoundsTuple,
|
|
257
|
+
step: int = 100,
|
|
258
|
+
direction: str = "vertical",
|
|
259
|
+
) -> ScrollRegion:
|
|
260
|
+
"""Define a scrollable region.
|
|
261
|
+
|
|
262
|
+
Example:
|
|
263
|
+
content = scrollable((0, 100, 800, 500), step=100)
|
|
264
|
+
"""
|
|
265
|
+
return ScrollRegion(
|
|
266
|
+
bounds=Bounds.from_tuple(bounds),
|
|
267
|
+
scroll_step=step,
|
|
268
|
+
direction=direction,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def dropdown(
|
|
273
|
+
bounds: BoundsTuple,
|
|
274
|
+
items: Sequence[str] | None = None,
|
|
275
|
+
item_height: int = 20,
|
|
276
|
+
) -> DropdownRegion:
|
|
277
|
+
"""Define a dropdown region.
|
|
278
|
+
|
|
279
|
+
Example:
|
|
280
|
+
month_select = dropdown((100, 10, 80, 25), items=["Jan", "Feb", "Mar"])
|
|
281
|
+
"""
|
|
282
|
+
return DropdownRegion(
|
|
283
|
+
bounds=Bounds.from_tuple(bounds),
|
|
284
|
+
items=items or [],
|
|
285
|
+
item_height=item_height,
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
# =============================================================================
|
|
290
|
+
# Screen Meta - configuration for screens
|
|
291
|
+
# =============================================================================
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
class ScreenMeta:
|
|
295
|
+
"""Metadata for a Screen class."""
|
|
296
|
+
|
|
297
|
+
name: str = ""
|
|
298
|
+
base_image: str | Path = ""
|
|
299
|
+
size: tuple[int, int] = (1000, 1000)
|
|
300
|
+
task_types: list[str]
|
|
301
|
+
|
|
302
|
+
def __init__(self) -> None:
|
|
303
|
+
self.task_types = []
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
# =============================================================================
|
|
307
|
+
# Screen Base Class
|
|
308
|
+
# =============================================================================
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class ScreenBase(ABC):
|
|
312
|
+
"""Base class for Screen definitions.
|
|
313
|
+
|
|
314
|
+
A Screen has many Tasks - this is the core 1:N relationship.
|
|
315
|
+
Each Screen defines the UI layout and can declare which task types it supports.
|
|
316
|
+
|
|
317
|
+
Example:
|
|
318
|
+
class CalendarScreen(Screen):
|
|
319
|
+
name = "calendar"
|
|
320
|
+
base_image = "calendar.png"
|
|
321
|
+
size = (224, 208)
|
|
322
|
+
|
|
323
|
+
# Regions
|
|
324
|
+
day_grid = grid((10, 50, 210, 150), rows=6, cols=7)
|
|
325
|
+
back_month = button((7, 192, 20, 12), label="Back")
|
|
326
|
+
|
|
327
|
+
# Tasks that belong to this screen
|
|
328
|
+
task_types = ["click-day", "click-month", "scroll-calendar"]
|
|
329
|
+
"""
|
|
330
|
+
|
|
331
|
+
# Class-level attributes that can be set directly
|
|
332
|
+
name: ClassVar[str] = ""
|
|
333
|
+
base_image: ClassVar[str | Path] = ""
|
|
334
|
+
size: ClassVar[tuple[int, int]] = (1000, 1000)
|
|
335
|
+
task_types: ClassVar[list[str]] = []
|
|
336
|
+
|
|
337
|
+
# Collected metadata
|
|
338
|
+
_regions: ClassVar[dict[str, Region]] = {}
|
|
339
|
+
_meta: ClassVar[ScreenMeta]
|
|
340
|
+
|
|
341
|
+
def __init_subclass__(cls, **kwargs: Any) -> None:
|
|
342
|
+
super().__init_subclass__(**kwargs)
|
|
343
|
+
|
|
344
|
+
# Collect regions from class attributes
|
|
345
|
+
regions: dict[str, Region] = {}
|
|
346
|
+
for attr_name, value in cls.__dict__.items():
|
|
347
|
+
if isinstance(value, Region):
|
|
348
|
+
value.name = attr_name
|
|
349
|
+
regions[attr_name] = value
|
|
350
|
+
|
|
351
|
+
cls._regions = regions
|
|
352
|
+
|
|
353
|
+
# Build meta from class attributes or inner Meta class
|
|
354
|
+
cls._meta = ScreenMeta()
|
|
355
|
+
|
|
356
|
+
# Check for inner Meta class (Rails style)
|
|
357
|
+
inner_meta = getattr(cls, "Meta", None)
|
|
358
|
+
if inner_meta:
|
|
359
|
+
for attr in ("name", "base_image", "size", "task_types"):
|
|
360
|
+
if hasattr(inner_meta, attr):
|
|
361
|
+
setattr(cls._meta, attr, getattr(inner_meta, attr))
|
|
362
|
+
|
|
363
|
+
# Also check class-level attributes (simpler style)
|
|
364
|
+
for attr in ("name", "base_image", "size", "task_types"):
|
|
365
|
+
val = cls.__dict__.get(attr)
|
|
366
|
+
if val is not None and val != "" and val != []:
|
|
367
|
+
setattr(cls._meta, attr, val)
|
|
368
|
+
|
|
369
|
+
# Default name from class name
|
|
370
|
+
if not cls._meta.name:
|
|
371
|
+
cls._meta.name = cls.__name__.lower().replace("screen", "")
|
|
372
|
+
|
|
373
|
+
@classmethod
|
|
374
|
+
def get_region(cls, name: str) -> Region:
|
|
375
|
+
"""Get a region by name."""
|
|
376
|
+
if name not in cls._regions:
|
|
377
|
+
raise KeyError(f"Region '{name}' not found in {cls.__name__}")
|
|
378
|
+
return cls._regions[name]
|
|
379
|
+
|
|
380
|
+
@classmethod
|
|
381
|
+
def regions(cls) -> dict[str, Region]:
|
|
382
|
+
"""Get all regions."""
|
|
383
|
+
return cls._regions.copy()
|
|
384
|
+
|
|
385
|
+
@classmethod
|
|
386
|
+
def meta(cls) -> ScreenMeta:
|
|
387
|
+
"""Get screen metadata."""
|
|
388
|
+
return cls._meta
|
|
389
|
+
|
|
390
|
+
@classmethod
|
|
391
|
+
def get_task_types(cls) -> list[str]:
|
|
392
|
+
"""Get task types that belong to this screen."""
|
|
393
|
+
return cls._meta.task_types.copy()
|
|
394
|
+
|
|
395
|
+
@abstractmethod
|
|
396
|
+
def render(self, state: Any) -> tuple[Any, dict[str, Any]]:
|
|
397
|
+
"""Render the screen with given state."""
|
|
398
|
+
pass
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
# Alias for cleaner imports
|
|
402
|
+
Screen = ScreenBase
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
2
|
+
# CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
|
|
3
|
+
# is strictly prohibited. For licensing inquiries: hello@claimhawk.app
|
|
4
|
+
|
|
5
|
+
"""Base class for scroll interaction tasks.
|
|
6
|
+
|
|
7
|
+
This module provides an abstract base class for scroll tasks, reducing
|
|
8
|
+
boilerplate code when implementing scroll-up/scroll-down tasks across
|
|
9
|
+
different generators.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from abc import abstractmethod
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from random import Random
|
|
17
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
18
|
+
|
|
19
|
+
from cudag.core.coords import normalize_coord
|
|
20
|
+
from cudag.core.task import BaseTask, TaskContext, TaskSample, TestCase
|
|
21
|
+
from cudag.prompts.tools import ToolCall
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from cudag.core.renderer import BaseRenderer
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ScrollTaskConfig:
|
|
29
|
+
"""Configuration for a scroll task.
|
|
30
|
+
|
|
31
|
+
This dataclass encapsulates all the static configuration for a scroll
|
|
32
|
+
task, making it easy to create multiple scroll tasks with different
|
|
33
|
+
configurations.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
task_type: Unique task type identifier (e.g., "scroll-page-down")
|
|
37
|
+
scroll_pixels: Number of pixels to scroll (positive=down, negative=up)
|
|
38
|
+
direction: Human-readable direction ("up" or "down")
|
|
39
|
+
prompt: Prompt text for the training sample
|
|
40
|
+
tolerance: Default tolerance in RU units (x, y)
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
>>> config = ScrollTaskConfig(
|
|
44
|
+
... task_type="scroll-page-down",
|
|
45
|
+
... scroll_pixels=300,
|
|
46
|
+
... direction="down",
|
|
47
|
+
... prompt="Scroll down one page",
|
|
48
|
+
... )
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
task_type: str
|
|
52
|
+
scroll_pixels: int
|
|
53
|
+
direction: str
|
|
54
|
+
prompt: str
|
|
55
|
+
tolerance: tuple[int, int] = (100, 6)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class ScrollTaskBase(BaseTask):
|
|
59
|
+
"""Abstract base class for scroll direction tasks.
|
|
60
|
+
|
|
61
|
+
This base class encapsulates the common pattern for scroll interaction
|
|
62
|
+
tasks, reducing boilerplate in individual task implementations. Instead
|
|
63
|
+
of implementing the full generate_sample() logic, subclasses only need to:
|
|
64
|
+
|
|
65
|
+
1. Set the `config` class variable with a ScrollTaskConfig
|
|
66
|
+
2. Implement `get_scroll_center()` to return the target coordinates
|
|
67
|
+
3. Implement `generate_state()` to create the appropriate state
|
|
68
|
+
|
|
69
|
+
The base class handles:
|
|
70
|
+
- Rendering the image
|
|
71
|
+
- Saving the image
|
|
72
|
+
- Creating the tool call
|
|
73
|
+
- Building the TaskSample with proper metadata
|
|
74
|
+
- Creating test cases
|
|
75
|
+
|
|
76
|
+
Example:
|
|
77
|
+
class ScrollPageDownTask(ScrollTaskBase):
|
|
78
|
+
config = ScrollTaskConfig(
|
|
79
|
+
task_type="scroll-page-down",
|
|
80
|
+
scroll_pixels=300,
|
|
81
|
+
direction="down",
|
|
82
|
+
prompt="Scroll down one page",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def get_scroll_center(self, metadata: dict) -> tuple[int, int]:
|
|
86
|
+
return metadata["grid_center"]
|
|
87
|
+
|
|
88
|
+
def generate_state(self, rng: Random):
|
|
89
|
+
return MyState.generate_for_scroll(rng, "middle")
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
config: ClassVar[ScrollTaskConfig]
|
|
93
|
+
"""Configuration for this scroll task. Must be set by subclass."""
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def task_type(self) -> str:
|
|
97
|
+
"""Return the task type from config."""
|
|
98
|
+
return self.config.task_type
|
|
99
|
+
|
|
100
|
+
@abstractmethod
|
|
101
|
+
def get_scroll_center(self, metadata: dict[str, Any]) -> tuple[int, int]:
|
|
102
|
+
"""Return the pixel coordinates for the scroll action.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
metadata: Rendering metadata from the renderer, typically contains
|
|
106
|
+
information about element positions and grid dimensions.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
(x, y) pixel coordinates for scroll center
|
|
110
|
+
"""
|
|
111
|
+
...
|
|
112
|
+
|
|
113
|
+
@abstractmethod
|
|
114
|
+
def generate_state(self, rng: Random) -> Any:
|
|
115
|
+
"""Generate the state for this scroll task.
|
|
116
|
+
|
|
117
|
+
The state should represent a scrollable position appropriate for
|
|
118
|
+
the scroll direction. For example:
|
|
119
|
+
- scroll-down tasks should generate states near the top/middle
|
|
120
|
+
- scroll-up tasks should generate states near the middle/bottom
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
rng: Random number generator for reproducibility
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
State object appropriate for the scroll position
|
|
127
|
+
"""
|
|
128
|
+
...
|
|
129
|
+
|
|
130
|
+
def generate_sample(self, ctx: TaskContext) -> TaskSample:
|
|
131
|
+
"""Generate a training sample for this scroll task.
|
|
132
|
+
|
|
133
|
+
This method orchestrates the sample generation:
|
|
134
|
+
1. Generates state using generate_state()
|
|
135
|
+
2. Renders the image using the renderer
|
|
136
|
+
3. Saves the image
|
|
137
|
+
4. Gets scroll coordinates from get_scroll_center()
|
|
138
|
+
5. Creates and returns the TaskSample
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
ctx: Task context with RNG, index, output directory, etc.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
TaskSample with image, prompt, and scroll tool call
|
|
145
|
+
"""
|
|
146
|
+
state = self.generate_state(ctx.rng)
|
|
147
|
+
image, metadata = self.renderer.render(state)
|
|
148
|
+
|
|
149
|
+
image_path = self.save_image(image, ctx)
|
|
150
|
+
scroll_center = self.get_scroll_center(metadata)
|
|
151
|
+
normalized = normalize_coord(scroll_center, image.size)
|
|
152
|
+
|
|
153
|
+
return TaskSample(
|
|
154
|
+
id=self.build_id(ctx),
|
|
155
|
+
image_path=image_path,
|
|
156
|
+
human_prompt=self.config.prompt,
|
|
157
|
+
tool_call=ToolCall.scroll(normalized, pixels=self.config.scroll_pixels),
|
|
158
|
+
pixel_coords=scroll_center,
|
|
159
|
+
metadata={
|
|
160
|
+
"task_type": self.config.task_type,
|
|
161
|
+
"scroll_pixels": self.config.scroll_pixels,
|
|
162
|
+
"scroll_direction": self.config.direction,
|
|
163
|
+
"tolerance": list(self.config.tolerance),
|
|
164
|
+
**metadata,
|
|
165
|
+
},
|
|
166
|
+
image_size=image.size,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def generate_test(self, ctx: TaskContext) -> TestCase:
|
|
170
|
+
"""Generate a test case for this scroll task.
|
|
171
|
+
|
|
172
|
+
Creates a test case by first generating a sample, then wrapping
|
|
173
|
+
it in a TestCase with the appropriate tolerance.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
ctx: Task context with RNG, index, output directory, etc.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
TestCase ready for evaluation
|
|
180
|
+
"""
|
|
181
|
+
sample = self.generate_sample(ctx)
|
|
182
|
+
return TestCase(
|
|
183
|
+
test_id=f"test_{sample.id}",
|
|
184
|
+
screenshot=sample.image_path,
|
|
185
|
+
prompt=sample.human_prompt,
|
|
186
|
+
expected_action=sample.tool_call.to_dict(),
|
|
187
|
+
tolerance=self.config.tolerance,
|
|
188
|
+
metadata=sample.metadata,
|
|
189
|
+
pixel_coords=sample.pixel_coords,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def create_scroll_task_pair(
|
|
194
|
+
base_task_type: str,
|
|
195
|
+
scroll_pixels: int,
|
|
196
|
+
up_prompt: str,
|
|
197
|
+
down_prompt: str,
|
|
198
|
+
tolerance: tuple[int, int] = (100, 6),
|
|
199
|
+
) -> tuple[type[ScrollTaskBase], type[ScrollTaskBase]]:
|
|
200
|
+
"""Factory function to create a pair of scroll up/down task classes.
|
|
201
|
+
|
|
202
|
+
This is a convenience function for creating complementary scroll tasks
|
|
203
|
+
that share the same configuration except for direction.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
base_task_type: Base name for task types (e.g., "scroll-page")
|
|
207
|
+
scroll_pixels: Number of pixels to scroll
|
|
208
|
+
up_prompt: Prompt for scroll-up task
|
|
209
|
+
down_prompt: Prompt for scroll-down task
|
|
210
|
+
tolerance: Tolerance in RU units
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Tuple of (ScrollUpTask, ScrollDownTask) classes
|
|
214
|
+
|
|
215
|
+
Example:
|
|
216
|
+
>>> ScrollUp, ScrollDown = create_scroll_task_pair(
|
|
217
|
+
... "scroll-page",
|
|
218
|
+
... 300,
|
|
219
|
+
... "Scroll up one page",
|
|
220
|
+
... "Scroll down one page",
|
|
221
|
+
... )
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
class _ScrollUpTask(ScrollTaskBase):
|
|
225
|
+
config = ScrollTaskConfig(
|
|
226
|
+
task_type=f"{base_task_type}-up",
|
|
227
|
+
scroll_pixels=-scroll_pixels,
|
|
228
|
+
direction="up",
|
|
229
|
+
prompt=up_prompt,
|
|
230
|
+
tolerance=tolerance,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def get_scroll_center(self, metadata: dict[str, Any]) -> tuple[int, int]:
|
|
234
|
+
raise NotImplementedError("Subclass must implement get_scroll_center()")
|
|
235
|
+
|
|
236
|
+
def generate_state(self, rng: Random) -> Any:
|
|
237
|
+
raise NotImplementedError("Subclass must implement generate_state()")
|
|
238
|
+
|
|
239
|
+
class _ScrollDownTask(ScrollTaskBase):
|
|
240
|
+
config = ScrollTaskConfig(
|
|
241
|
+
task_type=f"{base_task_type}-down",
|
|
242
|
+
scroll_pixels=scroll_pixels,
|
|
243
|
+
direction="down",
|
|
244
|
+
prompt=down_prompt,
|
|
245
|
+
tolerance=tolerance,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def get_scroll_center(self, metadata: dict[str, Any]) -> tuple[int, int]:
|
|
249
|
+
raise NotImplementedError("Subclass must implement get_scroll_center()")
|
|
250
|
+
|
|
251
|
+
def generate_state(self, rng: Random) -> Any:
|
|
252
|
+
raise NotImplementedError("Subclass must implement generate_state()")
|
|
253
|
+
|
|
254
|
+
return _ScrollUpTask, _ScrollDownTask
|