cudag 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudag/__init__.py +334 -0
- cudag/annotation/__init__.py +77 -0
- cudag/annotation/codegen.py +648 -0
- cudag/annotation/config.py +545 -0
- cudag/annotation/loader.py +342 -0
- cudag/annotation/scaffold.py +121 -0
- cudag/annotation/transcription.py +296 -0
- cudag/cli/__init__.py +5 -0
- cudag/cli/main.py +315 -0
- cudag/cli/new.py +873 -0
- cudag/core/__init__.py +364 -0
- cudag/core/button.py +137 -0
- cudag/core/canvas.py +222 -0
- cudag/core/config.py +70 -0
- cudag/core/coords.py +233 -0
- cudag/core/data_grid.py +804 -0
- cudag/core/dataset.py +678 -0
- cudag/core/distribution.py +136 -0
- cudag/core/drawing.py +75 -0
- cudag/core/fonts.py +156 -0
- cudag/core/generator.py +163 -0
- cudag/core/grid.py +367 -0
- cudag/core/grounding_task.py +247 -0
- cudag/core/icon.py +207 -0
- cudag/core/iconlist_task.py +301 -0
- cudag/core/models.py +1251 -0
- cudag/core/random.py +130 -0
- cudag/core/renderer.py +190 -0
- cudag/core/screen.py +402 -0
- cudag/core/scroll_task.py +254 -0
- cudag/core/scrollable_grid.py +447 -0
- cudag/core/state.py +110 -0
- cudag/core/task.py +293 -0
- cudag/core/taskbar.py +350 -0
- cudag/core/text.py +212 -0
- cudag/core/utils.py +82 -0
- cudag/data/surnames.txt +5000 -0
- cudag/modal_apps/__init__.py +4 -0
- cudag/modal_apps/archive.py +103 -0
- cudag/modal_apps/extract.py +138 -0
- cudag/modal_apps/preprocess.py +529 -0
- cudag/modal_apps/upload.py +317 -0
- cudag/prompts/SYSTEM_PROMPT.txt +104 -0
- cudag/prompts/__init__.py +33 -0
- cudag/prompts/system.py +43 -0
- cudag/prompts/tools.py +382 -0
- cudag/py.typed +0 -0
- cudag/schemas/filesystem.json +90 -0
- cudag/schemas/test_record.schema.json +113 -0
- cudag/schemas/train_record.schema.json +90 -0
- cudag/server/__init__.py +21 -0
- cudag/server/app.py +232 -0
- cudag/server/services/__init__.py +9 -0
- cudag/server/services/generator.py +128 -0
- cudag/templates/scripts/archive.sh +35 -0
- cudag/templates/scripts/build.sh +13 -0
- cudag/templates/scripts/extract.sh +54 -0
- cudag/templates/scripts/generate.sh +116 -0
- cudag/templates/scripts/pre-commit.sh +44 -0
- cudag/templates/scripts/preprocess.sh +46 -0
- cudag/templates/scripts/upload.sh +63 -0
- cudag/templates/scripts/verify.py +428 -0
- cudag/validation/__init__.py +35 -0
- cudag/validation/validate.py +508 -0
- cudag-0.3.10.dist-info/METADATA +570 -0
- cudag-0.3.10.dist-info/RECORD +69 -0
- cudag-0.3.10.dist-info/WHEEL +4 -0
- cudag-0.3.10.dist-info/entry_points.txt +2 -0
- cudag-0.3.10.dist-info/licenses/LICENSE +66 -0
cudag/cli/new.py
ADDED
|
@@ -0,0 +1,873 @@
|
|
|
1
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
2
|
+
# CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
|
|
3
|
+
# is strictly prohibited. For licensing inquiries: hello@claimhawk.app
|
|
4
|
+
|
|
5
|
+
"""Project scaffolding for cudag new command."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import shutil
|
|
10
|
+
import stat
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from textwrap import dedent
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def create_project(name: str, parent_dir: Path) -> Path:
|
|
16
|
+
"""Create a new CUDAG project with scaffolding.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
name: Project name (e.g., "appointment-picker")
|
|
20
|
+
parent_dir: Directory to create project in
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Path to created project directory
|
|
24
|
+
"""
|
|
25
|
+
# Normalize name
|
|
26
|
+
project_name = name.lower().replace(" ", "-").replace("_", "-")
|
|
27
|
+
module_name = project_name.replace("-", "_")
|
|
28
|
+
|
|
29
|
+
project_dir = parent_dir / project_name
|
|
30
|
+
project_dir.mkdir(parents=True, exist_ok=True)
|
|
31
|
+
|
|
32
|
+
# Create directory structure
|
|
33
|
+
(project_dir / "config").mkdir(exist_ok=True)
|
|
34
|
+
(project_dir / "tasks").mkdir(exist_ok=True)
|
|
35
|
+
(project_dir / "assets").mkdir(exist_ok=True)
|
|
36
|
+
(project_dir / "datasets").mkdir(exist_ok=True)
|
|
37
|
+
(project_dir / "models").mkdir(exist_ok=True)
|
|
38
|
+
(project_dir / "scripts").mkdir(exist_ok=True)
|
|
39
|
+
(project_dir / "modal_apps").mkdir(exist_ok=True)
|
|
40
|
+
|
|
41
|
+
# Create files
|
|
42
|
+
_write_pyproject(project_dir, project_name, module_name)
|
|
43
|
+
_write_gitignore(project_dir)
|
|
44
|
+
_write_screen(project_dir, module_name)
|
|
45
|
+
_write_state(project_dir, module_name)
|
|
46
|
+
_write_renderer(project_dir, module_name)
|
|
47
|
+
_write_generator(project_dir, module_name)
|
|
48
|
+
_write_models_init(project_dir, module_name)
|
|
49
|
+
_write_tasks_init(project_dir)
|
|
50
|
+
_write_example_task(project_dir, module_name)
|
|
51
|
+
_write_dataset_config(project_dir, project_name)
|
|
52
|
+
_write_readme(project_dir, project_name)
|
|
53
|
+
_write_scripts(project_dir, module_name)
|
|
54
|
+
_write_modal_apps(project_dir)
|
|
55
|
+
_write_makefile(project_dir, module_name)
|
|
56
|
+
_write_copyright(project_dir)
|
|
57
|
+
_init_git(project_dir)
|
|
58
|
+
|
|
59
|
+
return project_dir
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _write_pyproject(project_dir: Path, project_name: str, module_name: str) -> None:
|
|
63
|
+
"""Write pyproject.toml."""
|
|
64
|
+
content = dedent(f'''\
|
|
65
|
+
[build-system]
|
|
66
|
+
requires = ["hatchling"]
|
|
67
|
+
build-backend = "hatchling.build"
|
|
68
|
+
|
|
69
|
+
[project]
|
|
70
|
+
name = "{project_name}"
|
|
71
|
+
version = "0.1.0"
|
|
72
|
+
description = "CUDAG project for {project_name}"
|
|
73
|
+
requires-python = ">=3.11"
|
|
74
|
+
dependencies = [
|
|
75
|
+
"cudag",
|
|
76
|
+
"pillow>=10.0.0",
|
|
77
|
+
"pyyaml>=6.0",
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
[tool.uv.sources]
|
|
81
|
+
cudag = {{ path = "../../cudag", editable = true }}
|
|
82
|
+
|
|
83
|
+
[project.optional-dependencies]
|
|
84
|
+
dev = [
|
|
85
|
+
"ruff>=0.1.0",
|
|
86
|
+
"mypy>=1.0.0",
|
|
87
|
+
"types-PyYAML>=6.0.0",
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
[tool.hatch.build.targets.wheel]
|
|
91
|
+
packages = ["."]
|
|
92
|
+
|
|
93
|
+
[tool.ruff]
|
|
94
|
+
line-length = 100
|
|
95
|
+
target-version = "py311"
|
|
96
|
+
|
|
97
|
+
[tool.mypy]
|
|
98
|
+
python_version = "3.11"
|
|
99
|
+
strict = true
|
|
100
|
+
''')
|
|
101
|
+
(project_dir / "pyproject.toml").write_text(content)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _write_gitignore(project_dir: Path) -> None:
|
|
105
|
+
"""Write .gitignore."""
|
|
106
|
+
content = dedent("""\
|
|
107
|
+
# Python
|
|
108
|
+
__pycache__/
|
|
109
|
+
*.py[cod]
|
|
110
|
+
.venv/
|
|
111
|
+
*.egg-info/
|
|
112
|
+
|
|
113
|
+
# Generated datasets
|
|
114
|
+
datasets/
|
|
115
|
+
|
|
116
|
+
# IDE
|
|
117
|
+
.idea/
|
|
118
|
+
.vscode/
|
|
119
|
+
*.swp
|
|
120
|
+
""")
|
|
121
|
+
(project_dir / ".gitignore").write_text(content)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _write_screen(project_dir: Path, module_name: str) -> None:
|
|
125
|
+
"""Write screen.py with example Screen class."""
|
|
126
|
+
class_name = "".join(word.title() for word in module_name.split("_")) + "Screen"
|
|
127
|
+
renderer_name = class_name.replace("Screen", "Renderer")
|
|
128
|
+
|
|
129
|
+
content = dedent(f'''\
|
|
130
|
+
# Derivative works may be released by researchers,
|
|
131
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
132
|
+
|
|
133
|
+
"""Screen definition for {module_name}."""
|
|
134
|
+
|
|
135
|
+
from typing import Any, NoReturn
|
|
136
|
+
|
|
137
|
+
from cudag.core import Screen
|
|
138
|
+
|
|
139
|
+
# Uncomment to use these region types:
|
|
140
|
+
# from cudag.core import Bounds, ButtonRegion, GridRegion, ClickRegion
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class {class_name}(Screen):
|
|
144
|
+
"""Define the screen layout and interactive regions.
|
|
145
|
+
|
|
146
|
+
Edit this class to define your screen's regions:
|
|
147
|
+
- ButtonRegion for clickable buttons
|
|
148
|
+
- GridRegion for grid-like clickable areas
|
|
149
|
+
- ScrollRegion for scrollable areas
|
|
150
|
+
- DropdownRegion for dropdown menus
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
class Meta:
|
|
154
|
+
name = "{module_name}"
|
|
155
|
+
base_image = "assets/base.png" # Your base screenshot
|
|
156
|
+
size = (800, 600) # Image dimensions
|
|
157
|
+
|
|
158
|
+
# Example: Define a clickable grid region
|
|
159
|
+
# grid = GridRegion(
|
|
160
|
+
# bounds=Bounds(x=100, y=100, width=400, height=300),
|
|
161
|
+
# rows=5,
|
|
162
|
+
# cols=4,
|
|
163
|
+
# )
|
|
164
|
+
|
|
165
|
+
# Example: Define a button
|
|
166
|
+
# submit_button = ButtonRegion(
|
|
167
|
+
# bounds=Bounds(x=350, y=450, width=100, height=40),
|
|
168
|
+
# label="Submit",
|
|
169
|
+
# description="Submit the form",
|
|
170
|
+
# )
|
|
171
|
+
|
|
172
|
+
def render(self, state: Any) -> NoReturn:
|
|
173
|
+
"""Render is handled by the Renderer class."""
|
|
174
|
+
raise NotImplementedError("Use {renderer_name} instead")
|
|
175
|
+
''')
|
|
176
|
+
(project_dir / "screen.py").write_text(content)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _write_state(project_dir: Path, module_name: str) -> None:
|
|
180
|
+
"""Write state.py with example State class."""
|
|
181
|
+
class_name = "".join(word.title() for word in module_name.split("_")) + "State"
|
|
182
|
+
|
|
183
|
+
content = dedent(f'''\
|
|
184
|
+
# Derivative works may be released by researchers,
|
|
185
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
186
|
+
|
|
187
|
+
"""State definition for {module_name}."""
|
|
188
|
+
|
|
189
|
+
from dataclasses import dataclass
|
|
190
|
+
from cudag.core import BaseState
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
@dataclass
|
|
194
|
+
class {class_name}(BaseState):
|
|
195
|
+
"""Dynamic data that populates the screen.
|
|
196
|
+
|
|
197
|
+
Add fields for all the data needed to render your screen.
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
# Example fields - replace with your own:
|
|
201
|
+
# selected_item: int = 0
|
|
202
|
+
# items: list[str] = field(default_factory=list)
|
|
203
|
+
|
|
204
|
+
pass # Remove this when you add fields
|
|
205
|
+
''')
|
|
206
|
+
(project_dir / "state.py").write_text(content)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _write_renderer(project_dir: Path, module_name: str) -> None:
|
|
210
|
+
"""Write renderer.py with example Renderer class."""
|
|
211
|
+
screen_class = "".join(word.title() for word in module_name.split("_")) + "Screen"
|
|
212
|
+
state_class = "".join(word.title() for word in module_name.split("_")) + "State"
|
|
213
|
+
renderer_class = "".join(word.title() for word in module_name.split("_")) + "Renderer"
|
|
214
|
+
|
|
215
|
+
content = dedent(f'''\
|
|
216
|
+
# Derivative works may be released by researchers,
|
|
217
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
218
|
+
|
|
219
|
+
"""Renderer for {module_name}."""
|
|
220
|
+
|
|
221
|
+
from typing import Any
|
|
222
|
+
|
|
223
|
+
from PIL import Image
|
|
224
|
+
|
|
225
|
+
from cudag.core import BaseRenderer
|
|
226
|
+
|
|
227
|
+
from screen import {screen_class}
|
|
228
|
+
from state import {state_class}
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class {renderer_class}(BaseRenderer[{state_class}]):
|
|
232
|
+
"""Renders the {module_name} screen.
|
|
233
|
+
|
|
234
|
+
Loads assets and generates images from state.
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
screen_class = {screen_class}
|
|
238
|
+
|
|
239
|
+
def load_assets(self) -> None:
|
|
240
|
+
"""Load fonts and other assets."""
|
|
241
|
+
# Example:
|
|
242
|
+
# from PIL import ImageFont
|
|
243
|
+
# self.font = ImageFont.truetype(self.asset_path("fonts", "arial.ttf"), 12)
|
|
244
|
+
pass
|
|
245
|
+
|
|
246
|
+
def render(self, state: {state_class}) -> tuple[Image.Image, dict[str, Any]]:
|
|
247
|
+
"""Render the screen with given state.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
state: Current screen state
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
(PIL Image, metadata dict)
|
|
254
|
+
"""
|
|
255
|
+
# Load base image
|
|
256
|
+
image = self.load_base_image()
|
|
257
|
+
|
|
258
|
+
# TODO: Draw state onto image
|
|
259
|
+
# Example:
|
|
260
|
+
# draw = ImageDraw.Draw(image)
|
|
261
|
+
# draw.text((100, 100), state.some_field, fill="black")
|
|
262
|
+
|
|
263
|
+
# Build metadata
|
|
264
|
+
metadata = self.build_metadata(state)
|
|
265
|
+
|
|
266
|
+
return image, metadata
|
|
267
|
+
''')
|
|
268
|
+
(project_dir / "renderer.py").write_text(content)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _write_generator(project_dir: Path, module_name: str) -> None:
|
|
272
|
+
"""Write generator.py - main entry point for dataset generation."""
|
|
273
|
+
renderer_class = "".join(word.title() for word in module_name.split("_")) + "Renderer"
|
|
274
|
+
|
|
275
|
+
content = dedent(f'''\
|
|
276
|
+
# Derivative works may be released by researchers,
|
|
277
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
278
|
+
|
|
279
|
+
"""Dataset generator for {module_name}.
|
|
280
|
+
|
|
281
|
+
Usage:
|
|
282
|
+
python generator.py
|
|
283
|
+
python generator.py --config config/dataset.yaml
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
import argparse
|
|
287
|
+
from datetime import datetime
|
|
288
|
+
from pathlib import Path
|
|
289
|
+
|
|
290
|
+
from cudag.core import DatasetBuilder, DatasetConfig
|
|
291
|
+
|
|
292
|
+
from renderer import {renderer_class}
|
|
293
|
+
from tasks.example_task import ExampleTask
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def get_researcher_name() -> str | None:
|
|
297
|
+
"""Get researcher name from .researcher file if it exists."""
|
|
298
|
+
researcher_file = Path(".researcher")
|
|
299
|
+
if researcher_file.exists():
|
|
300
|
+
content = researcher_file.read_text().strip()
|
|
301
|
+
for line in content.split("\\n"):
|
|
302
|
+
if line.startswith("Name:"):
|
|
303
|
+
return line.split(":", 1)[1].strip().lower()
|
|
304
|
+
return None
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def main() -> None:
|
|
308
|
+
"""Run dataset generation."""
|
|
309
|
+
parser = argparse.ArgumentParser(description="Generate dataset")
|
|
310
|
+
parser.add_argument(
|
|
311
|
+
"--config",
|
|
312
|
+
type=Path,
|
|
313
|
+
default=Path("config/dataset.yaml"),
|
|
314
|
+
help="Path to dataset config YAML",
|
|
315
|
+
)
|
|
316
|
+
parser.add_argument(
|
|
317
|
+
"--exp",
|
|
318
|
+
type=str,
|
|
319
|
+
default=None,
|
|
320
|
+
help="Experiment label to include in dataset name",
|
|
321
|
+
)
|
|
322
|
+
args = parser.parse_args()
|
|
323
|
+
|
|
324
|
+
# Load config
|
|
325
|
+
config = DatasetConfig.from_yaml(args.config)
|
|
326
|
+
|
|
327
|
+
# Build dataset name: name--researcher--exp--timestamp
|
|
328
|
+
researcher = get_researcher_name()
|
|
329
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
330
|
+
name_parts = [config.name_prefix]
|
|
331
|
+
if researcher:
|
|
332
|
+
name_parts.append(researcher)
|
|
333
|
+
if args.exp:
|
|
334
|
+
name_parts.append(args.exp)
|
|
335
|
+
name_parts.append(timestamp)
|
|
336
|
+
dataset_name = "--".join(name_parts)
|
|
337
|
+
|
|
338
|
+
# Override output_dir with new naming
|
|
339
|
+
config.output_dir = Path("datasets") / dataset_name
|
|
340
|
+
|
|
341
|
+
print(f"Loaded config: {{config.name_prefix}}")
|
|
342
|
+
print(f"Tasks: {{config.task_counts}}")
|
|
343
|
+
|
|
344
|
+
# Initialize renderer
|
|
345
|
+
renderer = {renderer_class}(assets_dir=Path("assets"))
|
|
346
|
+
renderer.load_assets()
|
|
347
|
+
|
|
348
|
+
# Create tasks - add your tasks here
|
|
349
|
+
tasks = [
|
|
350
|
+
ExampleTask(config=config, renderer=renderer),
|
|
351
|
+
# Add more tasks as needed
|
|
352
|
+
]
|
|
353
|
+
|
|
354
|
+
# Build dataset
|
|
355
|
+
builder = DatasetBuilder(config=config, tasks=tasks)
|
|
356
|
+
output_dir = builder.build()
|
|
357
|
+
|
|
358
|
+
# Build tests
|
|
359
|
+
builder.build_tests()
|
|
360
|
+
|
|
361
|
+
print(f"\\nDataset generated at: {{output_dir}}")
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
if __name__ == "__main__":
|
|
365
|
+
main()
|
|
366
|
+
''')
|
|
367
|
+
(project_dir / "generator.py").write_text(content)
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def _write_models_init(project_dir: Path, module_name: str) -> None:
|
|
371
|
+
"""Write models/__init__.py with example Model classes."""
|
|
372
|
+
content = dedent(f'''\
|
|
373
|
+
# Derivative works may be released by researchers,
|
|
374
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
375
|
+
|
|
376
|
+
"""Domain models for {module_name}.
|
|
377
|
+
|
|
378
|
+
Define your data types here (Patient, Provider, Claim, etc.)
|
|
379
|
+
with field definitions for realistic data generation.
|
|
380
|
+
"""
|
|
381
|
+
|
|
382
|
+
# Re-export common models for use in this project
|
|
383
|
+
from cudag.core import Claim as Claim
|
|
384
|
+
from cudag.core import Patient as Patient
|
|
385
|
+
from cudag.core import Procedure as Procedure
|
|
386
|
+
from cudag.core import Provider as Provider
|
|
387
|
+
|
|
388
|
+
# Import types for custom model definitions:
|
|
389
|
+
# from cudag.core import (
|
|
390
|
+
# Model,
|
|
391
|
+
# StringField,
|
|
392
|
+
# IntField,
|
|
393
|
+
# DateField,
|
|
394
|
+
# ChoiceField,
|
|
395
|
+
# MoneyField,
|
|
396
|
+
# )
|
|
397
|
+
|
|
398
|
+
# Example: Define a custom model
|
|
399
|
+
# class MyCustomModel(Model):
|
|
400
|
+
# name = StringField(faker="full_name")
|
|
401
|
+
# account_number = StringField(pattern=r"[A-Z]{{2}}[0-9]{{8}}")
|
|
402
|
+
# status = ChoiceField(choices=["Active", "Pending", "Closed"])
|
|
403
|
+
|
|
404
|
+
__all__ = [
|
|
405
|
+
"Patient",
|
|
406
|
+
"Provider",
|
|
407
|
+
"Procedure",
|
|
408
|
+
"Claim",
|
|
409
|
+
]
|
|
410
|
+
''')
|
|
411
|
+
(project_dir / "models" / "__init__.py").write_text(content)
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def _write_tasks_init(project_dir: Path) -> None:
|
|
415
|
+
"""Write tasks/__init__.py."""
|
|
416
|
+
content = dedent('''\
|
|
417
|
+
# Derivative works may be released by researchers,
|
|
418
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
419
|
+
|
|
420
|
+
"""Task definitions for this project."""
|
|
421
|
+
|
|
422
|
+
# Import your tasks here:
|
|
423
|
+
# from tasks.click_item import ClickItemTask
|
|
424
|
+
''')
|
|
425
|
+
(project_dir / "tasks" / "__init__.py").write_text(content)
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def _write_example_task(project_dir: Path, module_name: str) -> None:
|
|
429
|
+
"""Write an example task file."""
|
|
430
|
+
state_class = "".join(word.title() for word in module_name.split("_")) + "State"
|
|
431
|
+
|
|
432
|
+
content = dedent(f'''\
|
|
433
|
+
# Derivative works may be released by researchers,
|
|
434
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
435
|
+
|
|
436
|
+
"""Example task - demonstrates 1-image-to-many-samples pattern with distributions.
|
|
437
|
+
|
|
438
|
+
Key insight: One rendered image can produce MULTIPLE training samples.
|
|
439
|
+
This is more efficient than generating a new image for each sample.
|
|
440
|
+
|
|
441
|
+
IMPORTANT: All coordinates in training data MUST be normalized RU (0-1000).
|
|
442
|
+
Use normalize_coord() before passing to ToolCall.
|
|
443
|
+
|
|
444
|
+
Distribution Pattern:
|
|
445
|
+
- Configure distributions in dataset.yaml under task_distributions
|
|
446
|
+
- Use ctx.config or DatasetConfig.sample_distribution_type() to select type
|
|
447
|
+
- Generate samples according to the distribution (normal, edge_case, adversarial)
|
|
448
|
+
"""
|
|
449
|
+
|
|
450
|
+
from cudag.core import BaseTask, DatasetConfig, TaskContext, TaskSample, TestCase
|
|
451
|
+
from cudag.core.coords import normalize_coord
|
|
452
|
+
from cudag.prompts.tools import ToolCall
|
|
453
|
+
|
|
454
|
+
from state import {state_class}
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
class ExampleTask(BaseTask):
|
|
458
|
+
"""Example task demonstrating 1:N image-to-samples pattern with distributions.
|
|
459
|
+
|
|
460
|
+
One Screen can have many Tasks. Each Task:
|
|
461
|
+
- Belongs to a Screen
|
|
462
|
+
- Has a task_type identifier
|
|
463
|
+
- Can generate multiple samples from one rendered image
|
|
464
|
+
- Supports distribution-based sample generation
|
|
465
|
+
|
|
466
|
+
Example use cases:
|
|
467
|
+
- Same claim window -> "click code" + "click fee" + "scroll"
|
|
468
|
+
- Same calendar -> "click day 1" + "click day 15"
|
|
469
|
+
|
|
470
|
+
Distribution Example (in dataset.yaml):
|
|
471
|
+
task_distributions:
|
|
472
|
+
example-click:
|
|
473
|
+
normal: 0.80 # 80% normal cases
|
|
474
|
+
edge_case: 0.15 # 15% edge cases
|
|
475
|
+
adversarial: 0.05 # 5% no valid target
|
|
476
|
+
"""
|
|
477
|
+
|
|
478
|
+
task_type = "example-click"
|
|
479
|
+
|
|
480
|
+
def __init__(self, config: dict | DatasetConfig, renderer: "BaseRenderer") -> None:
|
|
481
|
+
super().__init__(config, renderer)
|
|
482
|
+
# Store DatasetConfig for distribution sampling
|
|
483
|
+
self._dataset_config: DatasetConfig | None = None
|
|
484
|
+
if isinstance(config, DatasetConfig):
|
|
485
|
+
self._dataset_config = config
|
|
486
|
+
|
|
487
|
+
def _get_distribution_type(self, ctx: TaskContext) -> str:
|
|
488
|
+
"""Sample distribution type from config, with defaults."""
|
|
489
|
+
if self._dataset_config:
|
|
490
|
+
dist_type = self._dataset_config.sample_distribution_type(
|
|
491
|
+
self.task_type, ctx.rng
|
|
492
|
+
)
|
|
493
|
+
if dist_type:
|
|
494
|
+
return dist_type
|
|
495
|
+
# Default distribution if not configured
|
|
496
|
+
roll = ctx.rng.random()
|
|
497
|
+
if roll < 0.80:
|
|
498
|
+
return "normal"
|
|
499
|
+
elif roll < 0.95:
|
|
500
|
+
return "edge_case"
|
|
501
|
+
else:
|
|
502
|
+
return "adversarial"
|
|
503
|
+
|
|
504
|
+
def generate_samples(self, ctx: TaskContext) -> list[TaskSample]:
|
|
505
|
+
"""Generate MULTIPLE samples from ONE rendered image."""
|
|
506
|
+
# 1. Determine distribution type for this sample
|
|
507
|
+
dist_type = self._get_distribution_type(ctx)
|
|
508
|
+
|
|
509
|
+
# 2. Create state and render ONCE
|
|
510
|
+
state = {state_class}()
|
|
511
|
+
image, metadata = self.renderer.render(state)
|
|
512
|
+
image_path = self.save_image(image, ctx)
|
|
513
|
+
|
|
514
|
+
samples = []
|
|
515
|
+
|
|
516
|
+
# 3. Generate samples based on distribution type
|
|
517
|
+
if dist_type == "adversarial":
|
|
518
|
+
# Adversarial: No valid target - model should respond with "answer"
|
|
519
|
+
samples.append(TaskSample(
|
|
520
|
+
id=self.build_id(ctx, "_adversarial"),
|
|
521
|
+
image_path=image_path,
|
|
522
|
+
human_prompt="Click the nonexistent item",
|
|
523
|
+
tool_call=ToolCall(
|
|
524
|
+
action="answer",
|
|
525
|
+
text="There is no such item on the screen."
|
|
526
|
+
),
|
|
527
|
+
pixel_coords=(0, 0),
|
|
528
|
+
metadata={{
|
|
529
|
+
"task_type": self.task_type,
|
|
530
|
+
"distribution": dist_type,
|
|
531
|
+
"has_match": False,
|
|
532
|
+
}},
|
|
533
|
+
image_size=image.size,
|
|
534
|
+
))
|
|
535
|
+
else:
|
|
536
|
+
# Normal or edge_case: valid targets exist
|
|
537
|
+
# IMPORTANT: Always normalize pixel coords before ToolCall!
|
|
538
|
+
pixel_coords_1 = (400, 300)
|
|
539
|
+
norm_coords_1 = normalize_coord(pixel_coords_1, image.size)
|
|
540
|
+
samples.append(TaskSample(
|
|
541
|
+
id=self.build_id(ctx, "_target1"),
|
|
542
|
+
image_path=image_path,
|
|
543
|
+
human_prompt="Click the first item",
|
|
544
|
+
tool_call=ToolCall.left_click(norm_coords_1), # NORMALIZED!
|
|
545
|
+
pixel_coords=pixel_coords_1,
|
|
546
|
+
metadata={{
|
|
547
|
+
"task_type": self.task_type,
|
|
548
|
+
"distribution": dist_type,
|
|
549
|
+
"has_match": True,
|
|
550
|
+
"target": "first",
|
|
551
|
+
}},
|
|
552
|
+
image_size=image.size,
|
|
553
|
+
))
|
|
554
|
+
|
|
555
|
+
# For normal distribution, add more samples from same image
|
|
556
|
+
if dist_type == "normal":
|
|
557
|
+
pixel_coords_2 = (500, 400)
|
|
558
|
+
norm_coords_2 = normalize_coord(pixel_coords_2, image.size)
|
|
559
|
+
samples.append(TaskSample(
|
|
560
|
+
id=self.build_id(ctx, "_target2"),
|
|
561
|
+
image_path=image_path,
|
|
562
|
+
human_prompt="Click the second item",
|
|
563
|
+
tool_call=ToolCall.left_click(norm_coords_2), # NORMALIZED!
|
|
564
|
+
pixel_coords=pixel_coords_2,
|
|
565
|
+
metadata={{
|
|
566
|
+
"task_type": self.task_type,
|
|
567
|
+
"distribution": dist_type,
|
|
568
|
+
"has_match": True,
|
|
569
|
+
"target": "second",
|
|
570
|
+
}},
|
|
571
|
+
image_size=image.size,
|
|
572
|
+
))
|
|
573
|
+
|
|
574
|
+
return samples
|
|
575
|
+
|
|
576
|
+
def generate_sample(self, ctx: TaskContext) -> TaskSample:
|
|
577
|
+
"""Generate one training sample (fallback)."""
|
|
578
|
+
return self.generate_samples(ctx)[0]
|
|
579
|
+
|
|
580
|
+
def generate_tests(self, ctx: TaskContext) -> list[TestCase]:
|
|
581
|
+
"""Generate test cases from ONE rendered image."""
|
|
582
|
+
samples = self.generate_samples(ctx)
|
|
583
|
+
return [
|
|
584
|
+
TestCase(
|
|
585
|
+
test_id=f"test_{{ctx.index:04d}}_{{i}}",
|
|
586
|
+
screenshot=s.image_path,
|
|
587
|
+
prompt=s.human_prompt,
|
|
588
|
+
expected_action=s.tool_call.to_dict(),
|
|
589
|
+
tolerance=10,
|
|
590
|
+
metadata=s.metadata,
|
|
591
|
+
pixel_coords=s.pixel_coords,
|
|
592
|
+
)
|
|
593
|
+
for i, s in enumerate(samples)
|
|
594
|
+
]
|
|
595
|
+
|
|
596
|
+
def generate_test(self, ctx: TaskContext) -> TestCase:
|
|
597
|
+
"""Generate one test case (fallback)."""
|
|
598
|
+
return self.generate_tests(ctx)[0]
|
|
599
|
+
''')
|
|
600
|
+
(project_dir / "tasks" / "example_task.py").write_text(content)
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def _write_dataset_config(project_dir: Path, project_name: str) -> None:
|
|
604
|
+
"""Write config/dataset.yaml."""
|
|
605
|
+
content = dedent(f"""\
|
|
606
|
+
# Dataset configuration for {project_name}
|
|
607
|
+
|
|
608
|
+
name_prefix: {project_name}
|
|
609
|
+
seed: 42
|
|
610
|
+
|
|
611
|
+
# Task counts - how many samples of each type
|
|
612
|
+
tasks:
|
|
613
|
+
example-click: 100
|
|
614
|
+
|
|
615
|
+
# Task distributions - distribution of sample types within each task
|
|
616
|
+
# Each task can have its own distribution of subtypes.
|
|
617
|
+
# The values should sum to 1.0 (100%).
|
|
618
|
+
# task_distributions:
|
|
619
|
+
# example-click:
|
|
620
|
+
# normal: 0.80 # 80% normal cases
|
|
621
|
+
# edge_case: 0.15 # 15% edge cases
|
|
622
|
+
# adversarial: 0.05 # 5% adversarial (no match)
|
|
623
|
+
|
|
624
|
+
# Train/test split
|
|
625
|
+
splits:
|
|
626
|
+
train: 0.8
|
|
627
|
+
|
|
628
|
+
# System prompt style
|
|
629
|
+
system_prompt: computer-use
|
|
630
|
+
|
|
631
|
+
# Output settings
|
|
632
|
+
output:
|
|
633
|
+
image_format: png
|
|
634
|
+
image_quality: 95
|
|
635
|
+
|
|
636
|
+
# Test settings (held-out evaluation data)
|
|
637
|
+
test:
|
|
638
|
+
count: 20
|
|
639
|
+
tolerance: 10
|
|
640
|
+
|
|
641
|
+
# Annotation settings
|
|
642
|
+
annotation:
|
|
643
|
+
enabled: true
|
|
644
|
+
per_type:
|
|
645
|
+
example-click: 2
|
|
646
|
+
""")
|
|
647
|
+
(project_dir / "config" / "dataset.yaml").write_text(content)
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def _write_readme(project_dir: Path, project_name: str) -> None:
|
|
651
|
+
"""Write README.md."""
|
|
652
|
+
content = dedent(f"""\
|
|
653
|
+
# {project_name}
|
|
654
|
+
|
|
655
|
+
CUDAG project for generating training data.
|
|
656
|
+
|
|
657
|
+
## Setup
|
|
658
|
+
|
|
659
|
+
```bash
|
|
660
|
+
pip install -e .
|
|
661
|
+
```
|
|
662
|
+
|
|
663
|
+
## Structure
|
|
664
|
+
|
|
665
|
+
- `screen.py` - Screen definition (regions, layout)
|
|
666
|
+
- `state.py` - State dataclass (dynamic data)
|
|
667
|
+
- `renderer.py` - Image rendering logic
|
|
668
|
+
- `models/` - Domain model definitions (Patient, Provider, etc.)
|
|
669
|
+
- `tasks/` - Task implementations
|
|
670
|
+
- `config/` - Dataset configurations
|
|
671
|
+
- `assets/` - Base images, fonts, etc.
|
|
672
|
+
|
|
673
|
+
## Usage
|
|
674
|
+
|
|
675
|
+
```bash
|
|
676
|
+
# Generate dataset
|
|
677
|
+
cudag generate --config config/dataset.yaml
|
|
678
|
+
|
|
679
|
+
# Or run directly
|
|
680
|
+
python generate.py --config config/dataset.yaml
|
|
681
|
+
```
|
|
682
|
+
|
|
683
|
+
## Development
|
|
684
|
+
|
|
685
|
+
1. Edit `screen.py` to define your UI regions
|
|
686
|
+
2. Edit `state.py` to define your data model
|
|
687
|
+
3. Edit `renderer.py` to implement image generation
|
|
688
|
+
4. Add domain models in `models/` for data generation
|
|
689
|
+
5. Add tasks in `tasks/` for each interaction type
|
|
690
|
+
6. Configure dataset.yaml with sample counts
|
|
691
|
+
""")
|
|
692
|
+
(project_dir / "README.md").write_text(content)
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
def _get_templates_dir() -> Path:
|
|
696
|
+
"""Get the path to the templates directory."""
|
|
697
|
+
return Path(__file__).parent.parent / "templates"
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
def _write_scripts(project_dir: Path, module_name: str) -> None:
|
|
701
|
+
"""Copy shell scripts from templates to the project."""
|
|
702
|
+
templates_dir = _get_templates_dir()
|
|
703
|
+
scripts_template_dir = templates_dir / "scripts"
|
|
704
|
+
|
|
705
|
+
# Copy all script files from templates
|
|
706
|
+
for script_file in scripts_template_dir.glob("*.sh"):
|
|
707
|
+
dest_path = project_dir / "scripts" / script_file.name
|
|
708
|
+
shutil.copy(script_file, dest_path)
|
|
709
|
+
# Make executable
|
|
710
|
+
dest_path.chmod(dest_path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
def _write_modal_apps(project_dir: Path) -> None:
|
|
714
|
+
"""Copy modal_apps from templates to the project."""
|
|
715
|
+
templates_dir = _get_templates_dir()
|
|
716
|
+
modal_apps_template_dir = templates_dir / "modal_apps"
|
|
717
|
+
|
|
718
|
+
# Copy all Python files from templates
|
|
719
|
+
for py_file in modal_apps_template_dir.glob("*.py"):
|
|
720
|
+
dest_path = project_dir / "modal_apps" / py_file.name
|
|
721
|
+
shutil.copy(py_file, dest_path)
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
def _write_makefile(project_dir: Path, module_name: str) -> None:
|
|
725
|
+
"""Write Makefile for code quality and build tasks."""
|
|
726
|
+
content = dedent(f'''\
|
|
727
|
+
# Derivative works may be released by researchers,
|
|
728
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
729
|
+
|
|
730
|
+
.PHONY: all check lint typecheck format clean install dev test build generate
|
|
731
|
+
|
|
732
|
+
# Use venv Python if available, fallback to python3
|
|
733
|
+
PYTHON := $(shell test -x .venv/bin/python && echo .venv/bin/python || echo python3)
|
|
734
|
+
SRC_FILES := $(shell find . -name "*.py" -not -path "./.venv/*")
|
|
735
|
+
|
|
736
|
+
# Default target
|
|
737
|
+
all: check
|
|
738
|
+
|
|
739
|
+
# Setup virtualenv and install dependencies
|
|
740
|
+
install:
|
|
741
|
+
\tpython3 -m venv .venv
|
|
742
|
+
\t.venv/bin/pip install -e .
|
|
743
|
+
|
|
744
|
+
# Install dev dependencies
|
|
745
|
+
dev: install
|
|
746
|
+
\t.venv/bin/pip install -e ".[dev]"
|
|
747
|
+
\t.venv/bin/pip install radon
|
|
748
|
+
|
|
749
|
+
# Run all quality checks
|
|
750
|
+
check: lint typecheck
|
|
751
|
+
\t@echo "✓ All checks passed!"
|
|
752
|
+
|
|
753
|
+
# Linting with ruff
|
|
754
|
+
lint:
|
|
755
|
+
\t@echo "Running ruff..."
|
|
756
|
+
\t$(PYTHON) -m ruff check $(SRC_FILES)
|
|
757
|
+
\t$(PYTHON) -m ruff format --check $(SRC_FILES)
|
|
758
|
+
|
|
759
|
+
# Type checking with mypy
|
|
760
|
+
typecheck:
|
|
761
|
+
\t@echo "Running mypy..."
|
|
762
|
+
\t$(PYTHON) -m mypy $(SRC_FILES) --strict
|
|
763
|
+
|
|
764
|
+
# Auto-format code
|
|
765
|
+
format:
|
|
766
|
+
\t@echo "Formatting code..."
|
|
767
|
+
\t$(PYTHON) -m ruff format $(SRC_FILES)
|
|
768
|
+
\t$(PYTHON) -m ruff check --fix $(SRC_FILES)
|
|
769
|
+
|
|
770
|
+
# Clean build artifacts
|
|
771
|
+
clean:
|
|
772
|
+
\trm -rf build/ dist/ *.egg-info/
|
|
773
|
+
\tfind . -type d -name __pycache__ -exec rm -rf {{}} + 2>/dev/null || true
|
|
774
|
+
\tfind . -type f -name "*.pyc" -delete 2>/dev/null || true
|
|
775
|
+
|
|
776
|
+
# Generate dataset
|
|
777
|
+
generate:
|
|
778
|
+
\t./scripts/generate.sh --dry
|
|
779
|
+
|
|
780
|
+
# Build and upload
|
|
781
|
+
build:
|
|
782
|
+
\t./scripts/build.sh
|
|
783
|
+
''')
|
|
784
|
+
(project_dir / "Makefile").write_text(content)
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
def _write_copyright(project_dir: Path) -> None:
|
|
788
|
+
"""Write COPYRIGHT.txt file."""
|
|
789
|
+
content = dedent('''\
|
|
790
|
+
Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
791
|
+
|
|
792
|
+
Derivative works may be released by researchers,
|
|
793
|
+
but original files may not be redistributed or used beyond research purposes.
|
|
794
|
+
''')
|
|
795
|
+
(project_dir / "COPYRIGHT.txt").write_text(content)
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
def _init_git(project_dir: Path) -> None:
|
|
799
|
+
"""Initialize git repository with pre-commit hook."""
|
|
800
|
+
import subprocess
|
|
801
|
+
|
|
802
|
+
# Initialize git
|
|
803
|
+
subprocess.run(["git", "init"], cwd=project_dir, capture_output=True)
|
|
804
|
+
|
|
805
|
+
# Create .git/hooks directory if needed
|
|
806
|
+
hooks_dir = project_dir / ".git" / "hooks"
|
|
807
|
+
hooks_dir.mkdir(parents=True, exist_ok=True)
|
|
808
|
+
|
|
809
|
+
# Write pre-commit hook
|
|
810
|
+
precommit_hook = dedent('''\
|
|
811
|
+
#!/usr/bin/env bash
|
|
812
|
+
# Pre-commit hook - runs code quality checks on staged files
|
|
813
|
+
|
|
814
|
+
exec ./scripts/pre-commit.sh
|
|
815
|
+
''')
|
|
816
|
+
hook_path = hooks_dir / "pre-commit"
|
|
817
|
+
hook_path.write_text(precommit_hook)
|
|
818
|
+
hook_path.chmod(hook_path.stat().st_mode | 0o755)
|
|
819
|
+
|
|
820
|
+
# Write .gitattributes
|
|
821
|
+
gitattributes = dedent('''\
|
|
822
|
+
# Auto detect text files and perform LF normalization
|
|
823
|
+
* text=auto
|
|
824
|
+
|
|
825
|
+
# Python files
|
|
826
|
+
*.py text diff=python
|
|
827
|
+
|
|
828
|
+
# Shell scripts
|
|
829
|
+
*.sh text eol=lf
|
|
830
|
+
|
|
831
|
+
# Binary files
|
|
832
|
+
*.png binary
|
|
833
|
+
*.jpg binary
|
|
834
|
+
*.jpeg binary
|
|
835
|
+
*.gif binary
|
|
836
|
+
*.ico binary
|
|
837
|
+
*.ttf binary
|
|
838
|
+
*.woff binary
|
|
839
|
+
*.woff2 binary
|
|
840
|
+
''')
|
|
841
|
+
(project_dir / ".gitattributes").write_text(gitattributes)
|
|
842
|
+
|
|
843
|
+
# Stage all files
|
|
844
|
+
subprocess.run(["git", "add", "."], cwd=project_dir, capture_output=True)
|
|
845
|
+
|
|
846
|
+
# Make initial commit (skip hooks since cudag isn't installed yet)
|
|
847
|
+
result = subprocess.run(
|
|
848
|
+
[
|
|
849
|
+
"git",
|
|
850
|
+
"commit",
|
|
851
|
+
"--no-verify", # Skip pre-commit hook (cudag not installed yet)
|
|
852
|
+
"-m",
|
|
853
|
+
"Initial project scaffolding from cudag new",
|
|
854
|
+
],
|
|
855
|
+
cwd=project_dir,
|
|
856
|
+
capture_output=True,
|
|
857
|
+
)
|
|
858
|
+
|
|
859
|
+
# If commit failed due to missing author, try with explicit author
|
|
860
|
+
if result.returncode != 0:
|
|
861
|
+
subprocess.run(
|
|
862
|
+
[
|
|
863
|
+
"git",
|
|
864
|
+
"-c", "user.email=cudag@example.com",
|
|
865
|
+
"-c", "user.name=CUDAG",
|
|
866
|
+
"commit",
|
|
867
|
+
"--no-verify",
|
|
868
|
+
"-m",
|
|
869
|
+
"Initial project scaffolding from cudag new",
|
|
870
|
+
],
|
|
871
|
+
cwd=project_dir,
|
|
872
|
+
capture_output=True,
|
|
873
|
+
)
|