cudag 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudag/__init__.py +334 -0
- cudag/annotation/__init__.py +77 -0
- cudag/annotation/codegen.py +648 -0
- cudag/annotation/config.py +545 -0
- cudag/annotation/loader.py +342 -0
- cudag/annotation/scaffold.py +121 -0
- cudag/annotation/transcription.py +296 -0
- cudag/cli/__init__.py +5 -0
- cudag/cli/main.py +315 -0
- cudag/cli/new.py +873 -0
- cudag/core/__init__.py +364 -0
- cudag/core/button.py +137 -0
- cudag/core/canvas.py +222 -0
- cudag/core/config.py +70 -0
- cudag/core/coords.py +233 -0
- cudag/core/data_grid.py +804 -0
- cudag/core/dataset.py +678 -0
- cudag/core/distribution.py +136 -0
- cudag/core/drawing.py +75 -0
- cudag/core/fonts.py +156 -0
- cudag/core/generator.py +163 -0
- cudag/core/grid.py +367 -0
- cudag/core/grounding_task.py +247 -0
- cudag/core/icon.py +207 -0
- cudag/core/iconlist_task.py +301 -0
- cudag/core/models.py +1251 -0
- cudag/core/random.py +130 -0
- cudag/core/renderer.py +190 -0
- cudag/core/screen.py +402 -0
- cudag/core/scroll_task.py +254 -0
- cudag/core/scrollable_grid.py +447 -0
- cudag/core/state.py +110 -0
- cudag/core/task.py +293 -0
- cudag/core/taskbar.py +350 -0
- cudag/core/text.py +212 -0
- cudag/core/utils.py +82 -0
- cudag/data/surnames.txt +5000 -0
- cudag/modal_apps/__init__.py +4 -0
- cudag/modal_apps/archive.py +103 -0
- cudag/modal_apps/extract.py +138 -0
- cudag/modal_apps/preprocess.py +529 -0
- cudag/modal_apps/upload.py +317 -0
- cudag/prompts/SYSTEM_PROMPT.txt +104 -0
- cudag/prompts/__init__.py +33 -0
- cudag/prompts/system.py +43 -0
- cudag/prompts/tools.py +382 -0
- cudag/py.typed +0 -0
- cudag/schemas/filesystem.json +90 -0
- cudag/schemas/test_record.schema.json +113 -0
- cudag/schemas/train_record.schema.json +90 -0
- cudag/server/__init__.py +21 -0
- cudag/server/app.py +232 -0
- cudag/server/services/__init__.py +9 -0
- cudag/server/services/generator.py +128 -0
- cudag/templates/scripts/archive.sh +35 -0
- cudag/templates/scripts/build.sh +13 -0
- cudag/templates/scripts/extract.sh +54 -0
- cudag/templates/scripts/generate.sh +116 -0
- cudag/templates/scripts/pre-commit.sh +44 -0
- cudag/templates/scripts/preprocess.sh +46 -0
- cudag/templates/scripts/upload.sh +63 -0
- cudag/templates/scripts/verify.py +428 -0
- cudag/validation/__init__.py +35 -0
- cudag/validation/validate.py +508 -0
- cudag-0.3.10.dist-info/METADATA +570 -0
- cudag-0.3.10.dist-info/RECORD +69 -0
- cudag-0.3.10.dist-info/WHEEL +4 -0
- cudag-0.3.10.dist-info/entry_points.txt +2 -0
- cudag-0.3.10.dist-info/licenses/LICENSE +66 -0
|
@@ -0,0 +1,545 @@
|
|
|
1
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
2
|
+
# CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
|
|
3
|
+
# is strictly prohibited. For licensing inquiries: hello@claimhawk.app
|
|
4
|
+
|
|
5
|
+
"""Runtime annotation configuration for data-driven generators.
|
|
6
|
+
|
|
7
|
+
This module provides AnnotationConfig, which loads annotation.json at runtime
|
|
8
|
+
and provides structured access to elements, icons, tasks, tolerances, and
|
|
9
|
+
transcriptions.
|
|
10
|
+
|
|
11
|
+
Example:
|
|
12
|
+
from cudag.annotation import AnnotationConfig
|
|
13
|
+
|
|
14
|
+
# Load from assets/annotations folder
|
|
15
|
+
config = AnnotationConfig.load(Path("assets/annotations"))
|
|
16
|
+
|
|
17
|
+
# Access icons from an iconlist element
|
|
18
|
+
for icon in config.get_icons("desktop"):
|
|
19
|
+
print(f"{icon.label} at ({icon.center_x}, {icon.center_y})")
|
|
20
|
+
|
|
21
|
+
# Get task templates
|
|
22
|
+
for task in config.tasks:
|
|
23
|
+
prompt = task.render_prompt(icon_label="Open Dental")
|
|
24
|
+
|
|
25
|
+
# Access grid transcription data
|
|
26
|
+
grid = config.get_element_by_label("patient-account")
|
|
27
|
+
if grid and grid.transcription:
|
|
28
|
+
for row in grid.transcription.rows:
|
|
29
|
+
print([cell.text for cell in row.cells])
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
import json
|
|
35
|
+
import re
|
|
36
|
+
from dataclasses import dataclass, field
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
from typing import Any
|
|
39
|
+
|
|
40
|
+
from cudag.annotation.transcription import (
|
|
41
|
+
ParsedTranscription,
|
|
42
|
+
parse_text_transcription,
|
|
43
|
+
parse_transcription,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class AnnotatedIcon:
|
|
49
|
+
"""Single icon within an iconlist element."""
|
|
50
|
+
|
|
51
|
+
center_x: int
|
|
52
|
+
"""X coordinate of icon center (relative to element bbox or absolute)."""
|
|
53
|
+
|
|
54
|
+
center_y: int
|
|
55
|
+
"""Y coordinate of icon center (relative to element bbox or absolute)."""
|
|
56
|
+
|
|
57
|
+
label: str = ""
|
|
58
|
+
"""Icon label for prompts (e.g., 'Open Dental', 'recycle bin')."""
|
|
59
|
+
|
|
60
|
+
required: bool = False
|
|
61
|
+
"""If True, always include this icon even when varyN is enabled."""
|
|
62
|
+
|
|
63
|
+
icon_file_id: str = ""
|
|
64
|
+
"""ID to map to icon image file (e.g., 'od' -> icon-tb-od.png)."""
|
|
65
|
+
|
|
66
|
+
# Parent element info (set after parsing)
|
|
67
|
+
element_id: str = ""
|
|
68
|
+
element_label: str = ""
|
|
69
|
+
bbox_offset: tuple[int, int] = (0, 0)
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def absolute_center(self) -> tuple[int, int]:
|
|
73
|
+
"""Get absolute center coordinates (bbox offset + relative center)."""
|
|
74
|
+
return (
|
|
75
|
+
self.bbox_offset[0] + self.center_x,
|
|
76
|
+
self.bbox_offset[1] + self.center_y,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class AnnotatedElement:
|
|
82
|
+
"""Parsed UI element from annotation with full metadata."""
|
|
83
|
+
|
|
84
|
+
id: str
|
|
85
|
+
element_type: str
|
|
86
|
+
bbox: tuple[int, int, int, int] # x, y, width, height
|
|
87
|
+
label: str = ""
|
|
88
|
+
|
|
89
|
+
# Icon list properties
|
|
90
|
+
icons: list[AnnotatedIcon] = field(default_factory=list)
|
|
91
|
+
icon_width: int = 0
|
|
92
|
+
icon_height: int = 0
|
|
93
|
+
|
|
94
|
+
# VaryN feature: show random subset of icons
|
|
95
|
+
vary_n: bool = False
|
|
96
|
+
"""If True, show a random subset of icons instead of all."""
|
|
97
|
+
|
|
98
|
+
random_order: bool = False
|
|
99
|
+
"""If True, shuffle the order of icons."""
|
|
100
|
+
|
|
101
|
+
layout: str = ""
|
|
102
|
+
"""Layout style: 'stacked', 'sparse', 'random', or empty for default."""
|
|
103
|
+
|
|
104
|
+
# Grid properties
|
|
105
|
+
rows: int = 0
|
|
106
|
+
"""Number of rows for grid elements."""
|
|
107
|
+
|
|
108
|
+
cols: int = 0
|
|
109
|
+
"""Number of columns for grid elements."""
|
|
110
|
+
|
|
111
|
+
col_widths: list[float] = field(default_factory=list)
|
|
112
|
+
"""Relative column widths (should sum to 1.0)."""
|
|
113
|
+
|
|
114
|
+
row_heights: list[float] = field(default_factory=list)
|
|
115
|
+
"""Relative row heights (should sum to 1.0)."""
|
|
116
|
+
|
|
117
|
+
selectable_cell: bool = False
|
|
118
|
+
"""If True, individual grid cells are selectable."""
|
|
119
|
+
|
|
120
|
+
first_row_header: bool = False
|
|
121
|
+
"""If True, first row is a fixed header (doesn't scroll)."""
|
|
122
|
+
|
|
123
|
+
last_col_scroll: bool = False
|
|
124
|
+
"""If True, last column is reserved for vertical scrollbar."""
|
|
125
|
+
|
|
126
|
+
last_row_scroll: bool = False
|
|
127
|
+
"""If True, last row is reserved for horizontal scrollbar."""
|
|
128
|
+
|
|
129
|
+
hide_grid_lines: bool = False
|
|
130
|
+
"""If True, don't draw grid lines between cells."""
|
|
131
|
+
|
|
132
|
+
show_grid_lines: bool = False
|
|
133
|
+
"""If True, draw grid lines between cells."""
|
|
134
|
+
|
|
135
|
+
scrollable: bool = False
|
|
136
|
+
"""If True, this element supports scrolling."""
|
|
137
|
+
|
|
138
|
+
# Tolerance from annotation (in pixels)
|
|
139
|
+
tolerance_x: int = 0
|
|
140
|
+
tolerance_y: int = 0
|
|
141
|
+
|
|
142
|
+
# Mask properties
|
|
143
|
+
mask_color: str | None = None
|
|
144
|
+
|
|
145
|
+
# Loading element properties
|
|
146
|
+
loading_image: str | None = None
|
|
147
|
+
"""Base64-encoded loading overlay image (data:image/png;base64,...)."""
|
|
148
|
+
|
|
149
|
+
# Text properties
|
|
150
|
+
h_align: str = "center"
|
|
151
|
+
v_align: str = "center"
|
|
152
|
+
|
|
153
|
+
# Grounding flag
|
|
154
|
+
grounding: bool = False
|
|
155
|
+
"""If True, this element is included in grounding tasks."""
|
|
156
|
+
|
|
157
|
+
grounding_label: str = ""
|
|
158
|
+
"""Human-readable label for grounding tasks (e.g., 'Appts', '◀ Y')."""
|
|
159
|
+
|
|
160
|
+
# Transcription data (from OCR annotations)
|
|
161
|
+
ocr: bool = False
|
|
162
|
+
"""If True, this element has OCR transcription data."""
|
|
163
|
+
|
|
164
|
+
transcription_raw: str = ""
|
|
165
|
+
"""Raw transcription string (HTML for grids, plain text for text elements)."""
|
|
166
|
+
|
|
167
|
+
transcription: ParsedTranscription | None = None
|
|
168
|
+
"""Parsed table transcription for grid elements (None for non-grid elements)."""
|
|
169
|
+
|
|
170
|
+
transcription_text: str = ""
|
|
171
|
+
"""Plain text transcription for text elements (empty for grids)."""
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def center(self) -> tuple[int, int]:
|
|
175
|
+
"""Center point of the element bounding box."""
|
|
176
|
+
x, y, w, h = self.bbox
|
|
177
|
+
return (x + w // 2, y + h // 2)
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def tolerance(self) -> tuple[int, int]:
|
|
181
|
+
"""Tolerance as (x, y) tuple in pixels."""
|
|
182
|
+
return (self.tolerance_x, self.tolerance_y)
|
|
183
|
+
|
|
184
|
+
def get_required_icons(self) -> list[AnnotatedIcon]:
|
|
185
|
+
"""Get icons marked as required."""
|
|
186
|
+
return [icon for icon in self.icons if icon.required]
|
|
187
|
+
|
|
188
|
+
def get_optional_icons(self) -> list[AnnotatedIcon]:
|
|
189
|
+
"""Get icons not marked as required."""
|
|
190
|
+
return [icon for icon in self.icons if not icon.required]
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def data_rows(self) -> int:
|
|
194
|
+
"""Number of rows available for data (excluding header/scroll rows)."""
|
|
195
|
+
count = self.rows
|
|
196
|
+
if self.first_row_header:
|
|
197
|
+
count -= 1
|
|
198
|
+
if self.last_row_scroll:
|
|
199
|
+
count -= 1
|
|
200
|
+
return max(0, count)
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def data_cols(self) -> int:
|
|
204
|
+
"""Number of columns available for data (excluding scroll column)."""
|
|
205
|
+
count = self.cols
|
|
206
|
+
if self.last_col_scroll:
|
|
207
|
+
count -= 1
|
|
208
|
+
return max(0, count)
|
|
209
|
+
|
|
210
|
+
@property
|
|
211
|
+
def has_transcription(self) -> bool:
|
|
212
|
+
"""Check if this element has transcription data."""
|
|
213
|
+
return self.ocr and bool(self.transcription_raw)
|
|
214
|
+
|
|
215
|
+
@property
|
|
216
|
+
def is_grid_with_data(self) -> bool:
|
|
217
|
+
"""Check if this is a grid element with parsed table data."""
|
|
218
|
+
return self.element_type == "grid" and self.transcription is not None
|
|
219
|
+
|
|
220
|
+
def get_transcription_column(self, col_index: int) -> list[str]:
|
|
221
|
+
"""Get all values from a specific transcription column.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
col_index: Column index (0-based)
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
List of cell text values for that column
|
|
228
|
+
"""
|
|
229
|
+
if not self.transcription:
|
|
230
|
+
return []
|
|
231
|
+
return self.transcription.column(col_index)
|
|
232
|
+
|
|
233
|
+
def get_transcription_sample(
|
|
234
|
+
self, col_index: int, max_samples: int = 10
|
|
235
|
+
) -> list[str]:
|
|
236
|
+
"""Get sample values from a transcription column.
|
|
237
|
+
|
|
238
|
+
Useful for seeding random generators with realistic example data.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
col_index: Column index (0-based)
|
|
242
|
+
max_samples: Maximum number of samples to return
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
List of unique non-empty values from that column
|
|
246
|
+
"""
|
|
247
|
+
if not self.transcription:
|
|
248
|
+
return []
|
|
249
|
+
return self.transcription.sample_values(col_index, max_samples)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
@dataclass
|
|
253
|
+
class AnnotatedTask:
|
|
254
|
+
"""Task template from annotation."""
|
|
255
|
+
|
|
256
|
+
id: str
|
|
257
|
+
prompt_template: str
|
|
258
|
+
"""Prompt with placeholders like [icon_label]."""
|
|
259
|
+
|
|
260
|
+
target_element_id: str
|
|
261
|
+
action: str = "left_click"
|
|
262
|
+
wait_time: float = 0.0
|
|
263
|
+
"""Wait time in seconds for wait actions."""
|
|
264
|
+
|
|
265
|
+
task_type: str = ""
|
|
266
|
+
"""Task type identifier (e.g., 'dclick-desktop-icon', 'load-wait')."""
|
|
267
|
+
|
|
268
|
+
def render_prompt(self, **kwargs: str) -> str:
|
|
269
|
+
"""Render prompt template with substitutions.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
**kwargs: Substitutions like icon_label="Open Dental"
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
Rendered prompt string
|
|
276
|
+
"""
|
|
277
|
+
result = self.prompt_template
|
|
278
|
+
for key, value in kwargs.items():
|
|
279
|
+
result = result.replace(f"[{key}]", value)
|
|
280
|
+
return result
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@dataclass
|
|
284
|
+
class AnnotationConfig:
|
|
285
|
+
"""Runtime configuration loaded from annotation.json.
|
|
286
|
+
|
|
287
|
+
Provides structured access to annotated elements, icons, and tasks
|
|
288
|
+
for data-driven generation.
|
|
289
|
+
"""
|
|
290
|
+
|
|
291
|
+
screen_name: str
|
|
292
|
+
image_size: tuple[int, int]
|
|
293
|
+
elements: list[AnnotatedElement]
|
|
294
|
+
tasks: list[AnnotatedTask]
|
|
295
|
+
image_path: str = ""
|
|
296
|
+
|
|
297
|
+
# Paths to annotation assets
|
|
298
|
+
annotations_dir: Path | None = None
|
|
299
|
+
|
|
300
|
+
@classmethod
|
|
301
|
+
def load(cls, annotations_dir: Path | str) -> "AnnotationConfig":
|
|
302
|
+
"""Load annotation config from a directory.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
annotations_dir: Path to annotations directory containing annotation.json
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
Loaded AnnotationConfig instance
|
|
309
|
+
"""
|
|
310
|
+
annotations_dir = Path(annotations_dir)
|
|
311
|
+
json_path = annotations_dir / "annotation.json"
|
|
312
|
+
|
|
313
|
+
if not json_path.exists():
|
|
314
|
+
raise FileNotFoundError(f"annotation.json not found in {annotations_dir}")
|
|
315
|
+
|
|
316
|
+
with open(json_path) as f:
|
|
317
|
+
data = json.load(f)
|
|
318
|
+
|
|
319
|
+
config = cls._parse_dict(data)
|
|
320
|
+
config.annotations_dir = annotations_dir
|
|
321
|
+
return config
|
|
322
|
+
|
|
323
|
+
@classmethod
|
|
324
|
+
def _parse_dict(cls, data: dict[str, Any]) -> "AnnotationConfig":
|
|
325
|
+
"""Parse annotation from dictionary."""
|
|
326
|
+
elements = [cls._parse_element(el) for el in data.get("elements", [])]
|
|
327
|
+
tasks = [cls._parse_task(t) for t in data.get("tasks", [])]
|
|
328
|
+
|
|
329
|
+
image_size = data.get("imageSize", [1920, 1080])
|
|
330
|
+
|
|
331
|
+
return cls(
|
|
332
|
+
screen_name=data.get("screenName", "untitled"),
|
|
333
|
+
image_size=(image_size[0], image_size[1]),
|
|
334
|
+
elements=elements,
|
|
335
|
+
tasks=tasks,
|
|
336
|
+
image_path=data.get("imagePath", ""),
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
@classmethod
|
|
340
|
+
def _parse_element(cls, el: dict[str, Any]) -> AnnotatedElement:
|
|
341
|
+
"""Parse a single element with icons."""
|
|
342
|
+
bbox = el.get("bbox", {})
|
|
343
|
+
bbox_tuple = (
|
|
344
|
+
bbox.get("x", 0),
|
|
345
|
+
bbox.get("y", 0),
|
|
346
|
+
bbox.get("width", 0),
|
|
347
|
+
bbox.get("height", 0),
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# Parse icons if present (for iconlist type)
|
|
351
|
+
icons: list[AnnotatedIcon] = []
|
|
352
|
+
element_label = el.get("text", "")
|
|
353
|
+
element_id = el.get("id", "")
|
|
354
|
+
|
|
355
|
+
for icon_data in el.get("icons", []):
|
|
356
|
+
icon = AnnotatedIcon(
|
|
357
|
+
center_x=icon_data.get("centerX", 0),
|
|
358
|
+
center_y=icon_data.get("centerY", 0),
|
|
359
|
+
label=icon_data.get("label", ""),
|
|
360
|
+
required=icon_data.get("required", False),
|
|
361
|
+
icon_file_id=icon_data.get("iconFileId", ""),
|
|
362
|
+
element_id=icon_data.get("elementId", ""),
|
|
363
|
+
element_label=element_label,
|
|
364
|
+
bbox_offset=(bbox_tuple[0], bbox_tuple[1]),
|
|
365
|
+
)
|
|
366
|
+
icons.append(icon)
|
|
367
|
+
|
|
368
|
+
# Parse transcription if present
|
|
369
|
+
ocr_enabled = el.get("ocr", False)
|
|
370
|
+
transcription_raw = el.get("transcription", "")
|
|
371
|
+
element_type = el.get("type", "button")
|
|
372
|
+
|
|
373
|
+
# Parse transcription based on element type
|
|
374
|
+
parsed_transcription: ParsedTranscription | None = None
|
|
375
|
+
transcription_text = ""
|
|
376
|
+
|
|
377
|
+
if transcription_raw:
|
|
378
|
+
if element_type == "grid":
|
|
379
|
+
# Grid elements have HTML table transcriptions
|
|
380
|
+
parsed_transcription = parse_transcription(transcription_raw)
|
|
381
|
+
else:
|
|
382
|
+
# Text and other elements have plain text transcriptions
|
|
383
|
+
transcription_text = parse_text_transcription(transcription_raw)
|
|
384
|
+
|
|
385
|
+
return AnnotatedElement(
|
|
386
|
+
id=element_id,
|
|
387
|
+
element_type=el.get("type", "button"),
|
|
388
|
+
bbox=bbox_tuple,
|
|
389
|
+
label=element_label,
|
|
390
|
+
icons=icons,
|
|
391
|
+
icon_width=el.get("iconWidth", 0),
|
|
392
|
+
icon_height=el.get("iconHeight", 0),
|
|
393
|
+
vary_n=el.get("varyN", False),
|
|
394
|
+
random_order=el.get("randomOrder", False),
|
|
395
|
+
layout=el.get("layout", ""),
|
|
396
|
+
rows=el.get("rows", 0),
|
|
397
|
+
cols=el.get("cols", 0),
|
|
398
|
+
col_widths=el.get("colWidths", []),
|
|
399
|
+
row_heights=el.get("rowHeights", []),
|
|
400
|
+
selectable_cell=el.get("selectableCell", False),
|
|
401
|
+
first_row_header=el.get("firstRowHeader", False),
|
|
402
|
+
last_col_scroll=el.get("lastColScroll", False),
|
|
403
|
+
last_row_scroll=el.get("lastRowScroll", False),
|
|
404
|
+
hide_grid_lines=el.get("hideGridLines", False),
|
|
405
|
+
show_grid_lines=el.get("showGridLines", False),
|
|
406
|
+
scrollable=el.get("scrollable", False),
|
|
407
|
+
tolerance_x=el.get("toleranceX", 0),
|
|
408
|
+
tolerance_y=el.get("toleranceY", 0),
|
|
409
|
+
mask_color=el.get("maskColor"),
|
|
410
|
+
loading_image=el.get("loadingImage"),
|
|
411
|
+
h_align=el.get("hAlign", "center"),
|
|
412
|
+
v_align=el.get("vAlign", "center"),
|
|
413
|
+
grounding=el.get("grounding", False),
|
|
414
|
+
grounding_label=el.get("groundingLabel", ""),
|
|
415
|
+
ocr=ocr_enabled,
|
|
416
|
+
transcription_raw=transcription_raw,
|
|
417
|
+
transcription=parsed_transcription,
|
|
418
|
+
transcription_text=transcription_text,
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
@classmethod
|
|
422
|
+
def _parse_task(cls, t: dict[str, Any]) -> AnnotatedTask:
|
|
423
|
+
"""Parse a single task."""
|
|
424
|
+
return AnnotatedTask(
|
|
425
|
+
id=t.get("id", ""),
|
|
426
|
+
prompt_template=t.get("prompt", ""),
|
|
427
|
+
target_element_id=t.get("targetElementId", ""),
|
|
428
|
+
action=t.get("action", "left_click"),
|
|
429
|
+
wait_time=float(t.get("waitTime", 0)),
|
|
430
|
+
task_type=t.get("taskType", ""),
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
def get_element(self, element_id: str) -> AnnotatedElement | None:
|
|
434
|
+
"""Get element by ID."""
|
|
435
|
+
for el in self.elements:
|
|
436
|
+
if el.id == element_id:
|
|
437
|
+
return el
|
|
438
|
+
return None
|
|
439
|
+
|
|
440
|
+
def get_element_by_label(self, label: str) -> AnnotatedElement | None:
|
|
441
|
+
"""Get element by label (text field)."""
|
|
442
|
+
for el in self.elements:
|
|
443
|
+
if el.label == label:
|
|
444
|
+
return el
|
|
445
|
+
return None
|
|
446
|
+
|
|
447
|
+
def get_icons(self, element_label: str) -> list[AnnotatedIcon]:
|
|
448
|
+
"""Get all icons from an element by its label.
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
element_label: The 'text' field of the iconlist element (e.g., 'desktop', 'taskbar')
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
List of AnnotatedIcon objects
|
|
455
|
+
"""
|
|
456
|
+
for el in self.elements:
|
|
457
|
+
if el.label == element_label:
|
|
458
|
+
return el.icons
|
|
459
|
+
return []
|
|
460
|
+
|
|
461
|
+
def get_icon_by_label(
|
|
462
|
+
self, element_label: str, icon_label: str
|
|
463
|
+
) -> AnnotatedIcon | None:
|
|
464
|
+
"""Get a specific icon by element label and icon label.
|
|
465
|
+
|
|
466
|
+
Args:
|
|
467
|
+
element_label: The element's text (e.g., 'desktop')
|
|
468
|
+
icon_label: The icon's label (e.g., 'open dental')
|
|
469
|
+
|
|
470
|
+
Returns:
|
|
471
|
+
AnnotatedIcon or None if not found
|
|
472
|
+
"""
|
|
473
|
+
icons = self.get_icons(element_label)
|
|
474
|
+
icon_label_lower = icon_label.lower()
|
|
475
|
+
for icon in icons:
|
|
476
|
+
if icon.label.lower() == icon_label_lower:
|
|
477
|
+
return icon
|
|
478
|
+
return None
|
|
479
|
+
|
|
480
|
+
def get_labeled_icons(self, element_label: str) -> list[AnnotatedIcon]:
|
|
481
|
+
"""Get only icons that have labels (non-empty label field).
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
element_label: The element's text (e.g., 'desktop')
|
|
485
|
+
|
|
486
|
+
Returns:
|
|
487
|
+
List of AnnotatedIcon objects with non-empty labels
|
|
488
|
+
"""
|
|
489
|
+
return [icon for icon in self.get_icons(element_label) if icon.label]
|
|
490
|
+
|
|
491
|
+
def get_tasks_for_element(self, element_id: str) -> list[AnnotatedTask]:
|
|
492
|
+
"""Get all tasks targeting a specific element."""
|
|
493
|
+
return [t for t in self.tasks if t.target_element_id == element_id]
|
|
494
|
+
|
|
495
|
+
def get_loading_element(self) -> AnnotatedElement | None:
|
|
496
|
+
"""Get the loading element if one exists."""
|
|
497
|
+
for el in self.elements:
|
|
498
|
+
if el.element_type == "loading":
|
|
499
|
+
return el
|
|
500
|
+
return None
|
|
501
|
+
|
|
502
|
+
def get_wait_task(self) -> AnnotatedTask | None:
|
|
503
|
+
"""Get the wait task if one exists."""
|
|
504
|
+
for task in self.tasks:
|
|
505
|
+
if task.action == "wait":
|
|
506
|
+
return task
|
|
507
|
+
return None
|
|
508
|
+
|
|
509
|
+
def get_click_tasks(self) -> list[AnnotatedTask]:
|
|
510
|
+
"""Get all click-type tasks (non-wait actions)."""
|
|
511
|
+
return [t for t in self.tasks if t.action != "wait"]
|
|
512
|
+
|
|
513
|
+
def get_task_by_type(self, task_type: str) -> AnnotatedTask | None:
|
|
514
|
+
"""Get task by its task_type field."""
|
|
515
|
+
for task in self.tasks:
|
|
516
|
+
if task.task_type == task_type:
|
|
517
|
+
return task
|
|
518
|
+
return None
|
|
519
|
+
|
|
520
|
+
def get_tasks_by_type(self, task_type: str) -> list[AnnotatedTask]:
|
|
521
|
+
"""Get all tasks with a specific task_type."""
|
|
522
|
+
return [t for t in self.tasks if t.task_type == task_type]
|
|
523
|
+
|
|
524
|
+
@property
|
|
525
|
+
def masked_image_path(self) -> Path | None:
|
|
526
|
+
"""Path to masked.png if annotations_dir is set."""
|
|
527
|
+
if self.annotations_dir:
|
|
528
|
+
return self.annotations_dir / "masked.png"
|
|
529
|
+
return None
|
|
530
|
+
|
|
531
|
+
@property
|
|
532
|
+
def original_image_path(self) -> Path | None:
|
|
533
|
+
"""Path to original.png if annotations_dir is set."""
|
|
534
|
+
if self.annotations_dir:
|
|
535
|
+
return self.annotations_dir / "original.png"
|
|
536
|
+
return None
|
|
537
|
+
|
|
538
|
+
def to_snake_case(self, name: str) -> str:
|
|
539
|
+
"""Convert name to valid Python identifier."""
|
|
540
|
+
clean = re.sub(r"[^a-zA-Z0-9]", "_", name)
|
|
541
|
+
snake = re.sub(r"([a-z])([A-Z])", r"\1_\2", clean).lower()
|
|
542
|
+
snake = re.sub(r"_+", "_", snake)
|
|
543
|
+
if snake and snake[0].isdigit():
|
|
544
|
+
snake = "el_" + snake
|
|
545
|
+
return snake or "unnamed"
|