cudag 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. cudag/__init__.py +334 -0
  2. cudag/annotation/__init__.py +77 -0
  3. cudag/annotation/codegen.py +648 -0
  4. cudag/annotation/config.py +545 -0
  5. cudag/annotation/loader.py +342 -0
  6. cudag/annotation/scaffold.py +121 -0
  7. cudag/annotation/transcription.py +296 -0
  8. cudag/cli/__init__.py +5 -0
  9. cudag/cli/main.py +315 -0
  10. cudag/cli/new.py +873 -0
  11. cudag/core/__init__.py +364 -0
  12. cudag/core/button.py +137 -0
  13. cudag/core/canvas.py +222 -0
  14. cudag/core/config.py +70 -0
  15. cudag/core/coords.py +233 -0
  16. cudag/core/data_grid.py +804 -0
  17. cudag/core/dataset.py +678 -0
  18. cudag/core/distribution.py +136 -0
  19. cudag/core/drawing.py +75 -0
  20. cudag/core/fonts.py +156 -0
  21. cudag/core/generator.py +163 -0
  22. cudag/core/grid.py +367 -0
  23. cudag/core/grounding_task.py +247 -0
  24. cudag/core/icon.py +207 -0
  25. cudag/core/iconlist_task.py +301 -0
  26. cudag/core/models.py +1251 -0
  27. cudag/core/random.py +130 -0
  28. cudag/core/renderer.py +190 -0
  29. cudag/core/screen.py +402 -0
  30. cudag/core/scroll_task.py +254 -0
  31. cudag/core/scrollable_grid.py +447 -0
  32. cudag/core/state.py +110 -0
  33. cudag/core/task.py +293 -0
  34. cudag/core/taskbar.py +350 -0
  35. cudag/core/text.py +212 -0
  36. cudag/core/utils.py +82 -0
  37. cudag/data/surnames.txt +5000 -0
  38. cudag/modal_apps/__init__.py +4 -0
  39. cudag/modal_apps/archive.py +103 -0
  40. cudag/modal_apps/extract.py +138 -0
  41. cudag/modal_apps/preprocess.py +529 -0
  42. cudag/modal_apps/upload.py +317 -0
  43. cudag/prompts/SYSTEM_PROMPT.txt +104 -0
  44. cudag/prompts/__init__.py +33 -0
  45. cudag/prompts/system.py +43 -0
  46. cudag/prompts/tools.py +382 -0
  47. cudag/py.typed +0 -0
  48. cudag/schemas/filesystem.json +90 -0
  49. cudag/schemas/test_record.schema.json +113 -0
  50. cudag/schemas/train_record.schema.json +90 -0
  51. cudag/server/__init__.py +21 -0
  52. cudag/server/app.py +232 -0
  53. cudag/server/services/__init__.py +9 -0
  54. cudag/server/services/generator.py +128 -0
  55. cudag/templates/scripts/archive.sh +35 -0
  56. cudag/templates/scripts/build.sh +13 -0
  57. cudag/templates/scripts/extract.sh +54 -0
  58. cudag/templates/scripts/generate.sh +116 -0
  59. cudag/templates/scripts/pre-commit.sh +44 -0
  60. cudag/templates/scripts/preprocess.sh +46 -0
  61. cudag/templates/scripts/upload.sh +63 -0
  62. cudag/templates/scripts/verify.py +428 -0
  63. cudag/validation/__init__.py +35 -0
  64. cudag/validation/validate.py +508 -0
  65. cudag-0.3.10.dist-info/METADATA +570 -0
  66. cudag-0.3.10.dist-info/RECORD +69 -0
  67. cudag-0.3.10.dist-info/WHEEL +4 -0
  68. cudag-0.3.10.dist-info/entry_points.txt +2 -0
  69. cudag-0.3.10.dist-info/licenses/LICENSE +66 -0
cudag/cli/new.py ADDED
@@ -0,0 +1,873 @@
1
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
2
+ # CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
3
+ # is strictly prohibited. For licensing inquiries: hello@claimhawk.app
4
+
5
+ """Project scaffolding for cudag new command."""
6
+
7
+ from __future__ import annotations
8
+
9
+ import shutil
10
+ import stat
11
+ from pathlib import Path
12
+ from textwrap import dedent
13
+
14
+
15
+ def create_project(name: str, parent_dir: Path) -> Path:
16
+ """Create a new CUDAG project with scaffolding.
17
+
18
+ Args:
19
+ name: Project name (e.g., "appointment-picker")
20
+ parent_dir: Directory to create project in
21
+
22
+ Returns:
23
+ Path to created project directory
24
+ """
25
+ # Normalize name
26
+ project_name = name.lower().replace(" ", "-").replace("_", "-")
27
+ module_name = project_name.replace("-", "_")
28
+
29
+ project_dir = parent_dir / project_name
30
+ project_dir.mkdir(parents=True, exist_ok=True)
31
+
32
+ # Create directory structure
33
+ (project_dir / "config").mkdir(exist_ok=True)
34
+ (project_dir / "tasks").mkdir(exist_ok=True)
35
+ (project_dir / "assets").mkdir(exist_ok=True)
36
+ (project_dir / "datasets").mkdir(exist_ok=True)
37
+ (project_dir / "models").mkdir(exist_ok=True)
38
+ (project_dir / "scripts").mkdir(exist_ok=True)
39
+ (project_dir / "modal_apps").mkdir(exist_ok=True)
40
+
41
+ # Create files
42
+ _write_pyproject(project_dir, project_name, module_name)
43
+ _write_gitignore(project_dir)
44
+ _write_screen(project_dir, module_name)
45
+ _write_state(project_dir, module_name)
46
+ _write_renderer(project_dir, module_name)
47
+ _write_generator(project_dir, module_name)
48
+ _write_models_init(project_dir, module_name)
49
+ _write_tasks_init(project_dir)
50
+ _write_example_task(project_dir, module_name)
51
+ _write_dataset_config(project_dir, project_name)
52
+ _write_readme(project_dir, project_name)
53
+ _write_scripts(project_dir, module_name)
54
+ _write_modal_apps(project_dir)
55
+ _write_makefile(project_dir, module_name)
56
+ _write_copyright(project_dir)
57
+ _init_git(project_dir)
58
+
59
+ return project_dir
60
+
61
+
62
+ def _write_pyproject(project_dir: Path, project_name: str, module_name: str) -> None:
63
+ """Write pyproject.toml."""
64
+ content = dedent(f'''\
65
+ [build-system]
66
+ requires = ["hatchling"]
67
+ build-backend = "hatchling.build"
68
+
69
+ [project]
70
+ name = "{project_name}"
71
+ version = "0.1.0"
72
+ description = "CUDAG project for {project_name}"
73
+ requires-python = ">=3.11"
74
+ dependencies = [
75
+ "cudag",
76
+ "pillow>=10.0.0",
77
+ "pyyaml>=6.0",
78
+ ]
79
+
80
+ [tool.uv.sources]
81
+ cudag = {{ path = "../../cudag", editable = true }}
82
+
83
+ [project.optional-dependencies]
84
+ dev = [
85
+ "ruff>=0.1.0",
86
+ "mypy>=1.0.0",
87
+ "types-PyYAML>=6.0.0",
88
+ ]
89
+
90
+ [tool.hatch.build.targets.wheel]
91
+ packages = ["."]
92
+
93
+ [tool.ruff]
94
+ line-length = 100
95
+ target-version = "py311"
96
+
97
+ [tool.mypy]
98
+ python_version = "3.11"
99
+ strict = true
100
+ ''')
101
+ (project_dir / "pyproject.toml").write_text(content)
102
+
103
+
104
+ def _write_gitignore(project_dir: Path) -> None:
105
+ """Write .gitignore."""
106
+ content = dedent("""\
107
+ # Python
108
+ __pycache__/
109
+ *.py[cod]
110
+ .venv/
111
+ *.egg-info/
112
+
113
+ # Generated datasets
114
+ datasets/
115
+
116
+ # IDE
117
+ .idea/
118
+ .vscode/
119
+ *.swp
120
+ """)
121
+ (project_dir / ".gitignore").write_text(content)
122
+
123
+
124
+ def _write_screen(project_dir: Path, module_name: str) -> None:
125
+ """Write screen.py with example Screen class."""
126
+ class_name = "".join(word.title() for word in module_name.split("_")) + "Screen"
127
+ renderer_name = class_name.replace("Screen", "Renderer")
128
+
129
+ content = dedent(f'''\
130
+ # Derivative works may be released by researchers,
131
+ # but original files may not be redistributed or used beyond research purposes.
132
+
133
+ """Screen definition for {module_name}."""
134
+
135
+ from typing import Any, NoReturn
136
+
137
+ from cudag.core import Screen
138
+
139
+ # Uncomment to use these region types:
140
+ # from cudag.core import Bounds, ButtonRegion, GridRegion, ClickRegion
141
+
142
+
143
+ class {class_name}(Screen):
144
+ """Define the screen layout and interactive regions.
145
+
146
+ Edit this class to define your screen's regions:
147
+ - ButtonRegion for clickable buttons
148
+ - GridRegion for grid-like clickable areas
149
+ - ScrollRegion for scrollable areas
150
+ - DropdownRegion for dropdown menus
151
+ """
152
+
153
+ class Meta:
154
+ name = "{module_name}"
155
+ base_image = "assets/base.png" # Your base screenshot
156
+ size = (800, 600) # Image dimensions
157
+
158
+ # Example: Define a clickable grid region
159
+ # grid = GridRegion(
160
+ # bounds=Bounds(x=100, y=100, width=400, height=300),
161
+ # rows=5,
162
+ # cols=4,
163
+ # )
164
+
165
+ # Example: Define a button
166
+ # submit_button = ButtonRegion(
167
+ # bounds=Bounds(x=350, y=450, width=100, height=40),
168
+ # label="Submit",
169
+ # description="Submit the form",
170
+ # )
171
+
172
+ def render(self, state: Any) -> NoReturn:
173
+ """Render is handled by the Renderer class."""
174
+ raise NotImplementedError("Use {renderer_name} instead")
175
+ ''')
176
+ (project_dir / "screen.py").write_text(content)
177
+
178
+
179
+ def _write_state(project_dir: Path, module_name: str) -> None:
180
+ """Write state.py with example State class."""
181
+ class_name = "".join(word.title() for word in module_name.split("_")) + "State"
182
+
183
+ content = dedent(f'''\
184
+ # Derivative works may be released by researchers,
185
+ # but original files may not be redistributed or used beyond research purposes.
186
+
187
+ """State definition for {module_name}."""
188
+
189
+ from dataclasses import dataclass
190
+ from cudag.core import BaseState
191
+
192
+
193
+ @dataclass
194
+ class {class_name}(BaseState):
195
+ """Dynamic data that populates the screen.
196
+
197
+ Add fields for all the data needed to render your screen.
198
+ """
199
+
200
+ # Example fields - replace with your own:
201
+ # selected_item: int = 0
202
+ # items: list[str] = field(default_factory=list)
203
+
204
+ pass # Remove this when you add fields
205
+ ''')
206
+ (project_dir / "state.py").write_text(content)
207
+
208
+
209
+ def _write_renderer(project_dir: Path, module_name: str) -> None:
210
+ """Write renderer.py with example Renderer class."""
211
+ screen_class = "".join(word.title() for word in module_name.split("_")) + "Screen"
212
+ state_class = "".join(word.title() for word in module_name.split("_")) + "State"
213
+ renderer_class = "".join(word.title() for word in module_name.split("_")) + "Renderer"
214
+
215
+ content = dedent(f'''\
216
+ # Derivative works may be released by researchers,
217
+ # but original files may not be redistributed or used beyond research purposes.
218
+
219
+ """Renderer for {module_name}."""
220
+
221
+ from typing import Any
222
+
223
+ from PIL import Image
224
+
225
+ from cudag.core import BaseRenderer
226
+
227
+ from screen import {screen_class}
228
+ from state import {state_class}
229
+
230
+
231
+ class {renderer_class}(BaseRenderer[{state_class}]):
232
+ """Renders the {module_name} screen.
233
+
234
+ Loads assets and generates images from state.
235
+ """
236
+
237
+ screen_class = {screen_class}
238
+
239
+ def load_assets(self) -> None:
240
+ """Load fonts and other assets."""
241
+ # Example:
242
+ # from PIL import ImageFont
243
+ # self.font = ImageFont.truetype(self.asset_path("fonts", "arial.ttf"), 12)
244
+ pass
245
+
246
+ def render(self, state: {state_class}) -> tuple[Image.Image, dict[str, Any]]:
247
+ """Render the screen with given state.
248
+
249
+ Args:
250
+ state: Current screen state
251
+
252
+ Returns:
253
+ (PIL Image, metadata dict)
254
+ """
255
+ # Load base image
256
+ image = self.load_base_image()
257
+
258
+ # TODO: Draw state onto image
259
+ # Example:
260
+ # draw = ImageDraw.Draw(image)
261
+ # draw.text((100, 100), state.some_field, fill="black")
262
+
263
+ # Build metadata
264
+ metadata = self.build_metadata(state)
265
+
266
+ return image, metadata
267
+ ''')
268
+ (project_dir / "renderer.py").write_text(content)
269
+
270
+
271
+ def _write_generator(project_dir: Path, module_name: str) -> None:
272
+ """Write generator.py - main entry point for dataset generation."""
273
+ renderer_class = "".join(word.title() for word in module_name.split("_")) + "Renderer"
274
+
275
+ content = dedent(f'''\
276
+ # Derivative works may be released by researchers,
277
+ # but original files may not be redistributed or used beyond research purposes.
278
+
279
+ """Dataset generator for {module_name}.
280
+
281
+ Usage:
282
+ python generator.py
283
+ python generator.py --config config/dataset.yaml
284
+ """
285
+
286
+ import argparse
287
+ from datetime import datetime
288
+ from pathlib import Path
289
+
290
+ from cudag.core import DatasetBuilder, DatasetConfig
291
+
292
+ from renderer import {renderer_class}
293
+ from tasks.example_task import ExampleTask
294
+
295
+
296
+ def get_researcher_name() -> str | None:
297
+ """Get researcher name from .researcher file if it exists."""
298
+ researcher_file = Path(".researcher")
299
+ if researcher_file.exists():
300
+ content = researcher_file.read_text().strip()
301
+ for line in content.split("\\n"):
302
+ if line.startswith("Name:"):
303
+ return line.split(":", 1)[1].strip().lower()
304
+ return None
305
+
306
+
307
+ def main() -> None:
308
+ """Run dataset generation."""
309
+ parser = argparse.ArgumentParser(description="Generate dataset")
310
+ parser.add_argument(
311
+ "--config",
312
+ type=Path,
313
+ default=Path("config/dataset.yaml"),
314
+ help="Path to dataset config YAML",
315
+ )
316
+ parser.add_argument(
317
+ "--exp",
318
+ type=str,
319
+ default=None,
320
+ help="Experiment label to include in dataset name",
321
+ )
322
+ args = parser.parse_args()
323
+
324
+ # Load config
325
+ config = DatasetConfig.from_yaml(args.config)
326
+
327
+ # Build dataset name: name--researcher--exp--timestamp
328
+ researcher = get_researcher_name()
329
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
330
+ name_parts = [config.name_prefix]
331
+ if researcher:
332
+ name_parts.append(researcher)
333
+ if args.exp:
334
+ name_parts.append(args.exp)
335
+ name_parts.append(timestamp)
336
+ dataset_name = "--".join(name_parts)
337
+
338
+ # Override output_dir with new naming
339
+ config.output_dir = Path("datasets") / dataset_name
340
+
341
+ print(f"Loaded config: {{config.name_prefix}}")
342
+ print(f"Tasks: {{config.task_counts}}")
343
+
344
+ # Initialize renderer
345
+ renderer = {renderer_class}(assets_dir=Path("assets"))
346
+ renderer.load_assets()
347
+
348
+ # Create tasks - add your tasks here
349
+ tasks = [
350
+ ExampleTask(config=config, renderer=renderer),
351
+ # Add more tasks as needed
352
+ ]
353
+
354
+ # Build dataset
355
+ builder = DatasetBuilder(config=config, tasks=tasks)
356
+ output_dir = builder.build()
357
+
358
+ # Build tests
359
+ builder.build_tests()
360
+
361
+ print(f"\\nDataset generated at: {{output_dir}}")
362
+
363
+
364
+ if __name__ == "__main__":
365
+ main()
366
+ ''')
367
+ (project_dir / "generator.py").write_text(content)
368
+
369
+
370
+ def _write_models_init(project_dir: Path, module_name: str) -> None:
371
+ """Write models/__init__.py with example Model classes."""
372
+ content = dedent(f'''\
373
+ # Derivative works may be released by researchers,
374
+ # but original files may not be redistributed or used beyond research purposes.
375
+
376
+ """Domain models for {module_name}.
377
+
378
+ Define your data types here (Patient, Provider, Claim, etc.)
379
+ with field definitions for realistic data generation.
380
+ """
381
+
382
+ # Re-export common models for use in this project
383
+ from cudag.core import Claim as Claim
384
+ from cudag.core import Patient as Patient
385
+ from cudag.core import Procedure as Procedure
386
+ from cudag.core import Provider as Provider
387
+
388
+ # Import types for custom model definitions:
389
+ # from cudag.core import (
390
+ # Model,
391
+ # StringField,
392
+ # IntField,
393
+ # DateField,
394
+ # ChoiceField,
395
+ # MoneyField,
396
+ # )
397
+
398
+ # Example: Define a custom model
399
+ # class MyCustomModel(Model):
400
+ # name = StringField(faker="full_name")
401
+ # account_number = StringField(pattern=r"[A-Z]{{2}}[0-9]{{8}}")
402
+ # status = ChoiceField(choices=["Active", "Pending", "Closed"])
403
+
404
+ __all__ = [
405
+ "Patient",
406
+ "Provider",
407
+ "Procedure",
408
+ "Claim",
409
+ ]
410
+ ''')
411
+ (project_dir / "models" / "__init__.py").write_text(content)
412
+
413
+
414
+ def _write_tasks_init(project_dir: Path) -> None:
415
+ """Write tasks/__init__.py."""
416
+ content = dedent('''\
417
+ # Derivative works may be released by researchers,
418
+ # but original files may not be redistributed or used beyond research purposes.
419
+
420
+ """Task definitions for this project."""
421
+
422
+ # Import your tasks here:
423
+ # from tasks.click_item import ClickItemTask
424
+ ''')
425
+ (project_dir / "tasks" / "__init__.py").write_text(content)
426
+
427
+
428
+ def _write_example_task(project_dir: Path, module_name: str) -> None:
429
+ """Write an example task file."""
430
+ state_class = "".join(word.title() for word in module_name.split("_")) + "State"
431
+
432
+ content = dedent(f'''\
433
+ # Derivative works may be released by researchers,
434
+ # but original files may not be redistributed or used beyond research purposes.
435
+
436
+ """Example task - demonstrates 1-image-to-many-samples pattern with distributions.
437
+
438
+ Key insight: One rendered image can produce MULTIPLE training samples.
439
+ This is more efficient than generating a new image for each sample.
440
+
441
+ IMPORTANT: All coordinates in training data MUST be normalized RU (0-1000).
442
+ Use normalize_coord() before passing to ToolCall.
443
+
444
+ Distribution Pattern:
445
+ - Configure distributions in dataset.yaml under task_distributions
446
+ - Use ctx.config or DatasetConfig.sample_distribution_type() to select type
447
+ - Generate samples according to the distribution (normal, edge_case, adversarial)
448
+ """
449
+
450
+ from cudag.core import BaseTask, DatasetConfig, TaskContext, TaskSample, TestCase
451
+ from cudag.core.coords import normalize_coord
452
+ from cudag.prompts.tools import ToolCall
453
+
454
+ from state import {state_class}
455
+
456
+
457
+ class ExampleTask(BaseTask):
458
+ """Example task demonstrating 1:N image-to-samples pattern with distributions.
459
+
460
+ One Screen can have many Tasks. Each Task:
461
+ - Belongs to a Screen
462
+ - Has a task_type identifier
463
+ - Can generate multiple samples from one rendered image
464
+ - Supports distribution-based sample generation
465
+
466
+ Example use cases:
467
+ - Same claim window -> "click code" + "click fee" + "scroll"
468
+ - Same calendar -> "click day 1" + "click day 15"
469
+
470
+ Distribution Example (in dataset.yaml):
471
+ task_distributions:
472
+ example-click:
473
+ normal: 0.80 # 80% normal cases
474
+ edge_case: 0.15 # 15% edge cases
475
+ adversarial: 0.05 # 5% no valid target
476
+ """
477
+
478
+ task_type = "example-click"
479
+
480
+ def __init__(self, config: dict | DatasetConfig, renderer: "BaseRenderer") -> None:
481
+ super().__init__(config, renderer)
482
+ # Store DatasetConfig for distribution sampling
483
+ self._dataset_config: DatasetConfig | None = None
484
+ if isinstance(config, DatasetConfig):
485
+ self._dataset_config = config
486
+
487
+ def _get_distribution_type(self, ctx: TaskContext) -> str:
488
+ """Sample distribution type from config, with defaults."""
489
+ if self._dataset_config:
490
+ dist_type = self._dataset_config.sample_distribution_type(
491
+ self.task_type, ctx.rng
492
+ )
493
+ if dist_type:
494
+ return dist_type
495
+ # Default distribution if not configured
496
+ roll = ctx.rng.random()
497
+ if roll < 0.80:
498
+ return "normal"
499
+ elif roll < 0.95:
500
+ return "edge_case"
501
+ else:
502
+ return "adversarial"
503
+
504
+ def generate_samples(self, ctx: TaskContext) -> list[TaskSample]:
505
+ """Generate MULTIPLE samples from ONE rendered image."""
506
+ # 1. Determine distribution type for this sample
507
+ dist_type = self._get_distribution_type(ctx)
508
+
509
+ # 2. Create state and render ONCE
510
+ state = {state_class}()
511
+ image, metadata = self.renderer.render(state)
512
+ image_path = self.save_image(image, ctx)
513
+
514
+ samples = []
515
+
516
+ # 3. Generate samples based on distribution type
517
+ if dist_type == "adversarial":
518
+ # Adversarial: No valid target - model should respond with "answer"
519
+ samples.append(TaskSample(
520
+ id=self.build_id(ctx, "_adversarial"),
521
+ image_path=image_path,
522
+ human_prompt="Click the nonexistent item",
523
+ tool_call=ToolCall(
524
+ action="answer",
525
+ text="There is no such item on the screen."
526
+ ),
527
+ pixel_coords=(0, 0),
528
+ metadata={{
529
+ "task_type": self.task_type,
530
+ "distribution": dist_type,
531
+ "has_match": False,
532
+ }},
533
+ image_size=image.size,
534
+ ))
535
+ else:
536
+ # Normal or edge_case: valid targets exist
537
+ # IMPORTANT: Always normalize pixel coords before ToolCall!
538
+ pixel_coords_1 = (400, 300)
539
+ norm_coords_1 = normalize_coord(pixel_coords_1, image.size)
540
+ samples.append(TaskSample(
541
+ id=self.build_id(ctx, "_target1"),
542
+ image_path=image_path,
543
+ human_prompt="Click the first item",
544
+ tool_call=ToolCall.left_click(norm_coords_1), # NORMALIZED!
545
+ pixel_coords=pixel_coords_1,
546
+ metadata={{
547
+ "task_type": self.task_type,
548
+ "distribution": dist_type,
549
+ "has_match": True,
550
+ "target": "first",
551
+ }},
552
+ image_size=image.size,
553
+ ))
554
+
555
+ # For normal distribution, add more samples from same image
556
+ if dist_type == "normal":
557
+ pixel_coords_2 = (500, 400)
558
+ norm_coords_2 = normalize_coord(pixel_coords_2, image.size)
559
+ samples.append(TaskSample(
560
+ id=self.build_id(ctx, "_target2"),
561
+ image_path=image_path,
562
+ human_prompt="Click the second item",
563
+ tool_call=ToolCall.left_click(norm_coords_2), # NORMALIZED!
564
+ pixel_coords=pixel_coords_2,
565
+ metadata={{
566
+ "task_type": self.task_type,
567
+ "distribution": dist_type,
568
+ "has_match": True,
569
+ "target": "second",
570
+ }},
571
+ image_size=image.size,
572
+ ))
573
+
574
+ return samples
575
+
576
+ def generate_sample(self, ctx: TaskContext) -> TaskSample:
577
+ """Generate one training sample (fallback)."""
578
+ return self.generate_samples(ctx)[0]
579
+
580
+ def generate_tests(self, ctx: TaskContext) -> list[TestCase]:
581
+ """Generate test cases from ONE rendered image."""
582
+ samples = self.generate_samples(ctx)
583
+ return [
584
+ TestCase(
585
+ test_id=f"test_{{ctx.index:04d}}_{{i}}",
586
+ screenshot=s.image_path,
587
+ prompt=s.human_prompt,
588
+ expected_action=s.tool_call.to_dict(),
589
+ tolerance=10,
590
+ metadata=s.metadata,
591
+ pixel_coords=s.pixel_coords,
592
+ )
593
+ for i, s in enumerate(samples)
594
+ ]
595
+
596
+ def generate_test(self, ctx: TaskContext) -> TestCase:
597
+ """Generate one test case (fallback)."""
598
+ return self.generate_tests(ctx)[0]
599
+ ''')
600
+ (project_dir / "tasks" / "example_task.py").write_text(content)
601
+
602
+
603
+ def _write_dataset_config(project_dir: Path, project_name: str) -> None:
604
+ """Write config/dataset.yaml."""
605
+ content = dedent(f"""\
606
+ # Dataset configuration for {project_name}
607
+
608
+ name_prefix: {project_name}
609
+ seed: 42
610
+
611
+ # Task counts - how many samples of each type
612
+ tasks:
613
+ example-click: 100
614
+
615
+ # Task distributions - distribution of sample types within each task
616
+ # Each task can have its own distribution of subtypes.
617
+ # The values should sum to 1.0 (100%).
618
+ # task_distributions:
619
+ # example-click:
620
+ # normal: 0.80 # 80% normal cases
621
+ # edge_case: 0.15 # 15% edge cases
622
+ # adversarial: 0.05 # 5% adversarial (no match)
623
+
624
+ # Train/test split
625
+ splits:
626
+ train: 0.8
627
+
628
+ # System prompt style
629
+ system_prompt: computer-use
630
+
631
+ # Output settings
632
+ output:
633
+ image_format: png
634
+ image_quality: 95
635
+
636
+ # Test settings (held-out evaluation data)
637
+ test:
638
+ count: 20
639
+ tolerance: 10
640
+
641
+ # Annotation settings
642
+ annotation:
643
+ enabled: true
644
+ per_type:
645
+ example-click: 2
646
+ """)
647
+ (project_dir / "config" / "dataset.yaml").write_text(content)
648
+
649
+
650
+ def _write_readme(project_dir: Path, project_name: str) -> None:
651
+ """Write README.md."""
652
+ content = dedent(f"""\
653
+ # {project_name}
654
+
655
+ CUDAG project for generating training data.
656
+
657
+ ## Setup
658
+
659
+ ```bash
660
+ pip install -e .
661
+ ```
662
+
663
+ ## Structure
664
+
665
+ - `screen.py` - Screen definition (regions, layout)
666
+ - `state.py` - State dataclass (dynamic data)
667
+ - `renderer.py` - Image rendering logic
668
+ - `models/` - Domain model definitions (Patient, Provider, etc.)
669
+ - `tasks/` - Task implementations
670
+ - `config/` - Dataset configurations
671
+ - `assets/` - Base images, fonts, etc.
672
+
673
+ ## Usage
674
+
675
+ ```bash
676
+ # Generate dataset
677
+ cudag generate --config config/dataset.yaml
678
+
679
+ # Or run directly
680
+ python generate.py --config config/dataset.yaml
681
+ ```
682
+
683
+ ## Development
684
+
685
+ 1. Edit `screen.py` to define your UI regions
686
+ 2. Edit `state.py` to define your data model
687
+ 3. Edit `renderer.py` to implement image generation
688
+ 4. Add domain models in `models/` for data generation
689
+ 5. Add tasks in `tasks/` for each interaction type
690
+ 6. Configure dataset.yaml with sample counts
691
+ """)
692
+ (project_dir / "README.md").write_text(content)
693
+
694
+
695
+ def _get_templates_dir() -> Path:
696
+ """Get the path to the templates directory."""
697
+ return Path(__file__).parent.parent / "templates"
698
+
699
+
700
+ def _write_scripts(project_dir: Path, module_name: str) -> None:
701
+ """Copy shell scripts from templates to the project."""
702
+ templates_dir = _get_templates_dir()
703
+ scripts_template_dir = templates_dir / "scripts"
704
+
705
+ # Copy all script files from templates
706
+ for script_file in scripts_template_dir.glob("*.sh"):
707
+ dest_path = project_dir / "scripts" / script_file.name
708
+ shutil.copy(script_file, dest_path)
709
+ # Make executable
710
+ dest_path.chmod(dest_path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
711
+
712
+
713
+ def _write_modal_apps(project_dir: Path) -> None:
714
+ """Copy modal_apps from templates to the project."""
715
+ templates_dir = _get_templates_dir()
716
+ modal_apps_template_dir = templates_dir / "modal_apps"
717
+
718
+ # Copy all Python files from templates
719
+ for py_file in modal_apps_template_dir.glob("*.py"):
720
+ dest_path = project_dir / "modal_apps" / py_file.name
721
+ shutil.copy(py_file, dest_path)
722
+
723
+
724
+ def _write_makefile(project_dir: Path, module_name: str) -> None:
725
+ """Write Makefile for code quality and build tasks."""
726
+ content = dedent(f'''\
727
+ # Derivative works may be released by researchers,
728
+ # but original files may not be redistributed or used beyond research purposes.
729
+
730
+ .PHONY: all check lint typecheck format clean install dev test build generate
731
+
732
+ # Use venv Python if available, fallback to python3
733
+ PYTHON := $(shell test -x .venv/bin/python && echo .venv/bin/python || echo python3)
734
+ SRC_FILES := $(shell find . -name "*.py" -not -path "./.venv/*")
735
+
736
+ # Default target
737
+ all: check
738
+
739
+ # Setup virtualenv and install dependencies
740
+ install:
741
+ \tpython3 -m venv .venv
742
+ \t.venv/bin/pip install -e .
743
+
744
+ # Install dev dependencies
745
+ dev: install
746
+ \t.venv/bin/pip install -e ".[dev]"
747
+ \t.venv/bin/pip install radon
748
+
749
+ # Run all quality checks
750
+ check: lint typecheck
751
+ \t@echo "✓ All checks passed!"
752
+
753
+ # Linting with ruff
754
+ lint:
755
+ \t@echo "Running ruff..."
756
+ \t$(PYTHON) -m ruff check $(SRC_FILES)
757
+ \t$(PYTHON) -m ruff format --check $(SRC_FILES)
758
+
759
+ # Type checking with mypy
760
+ typecheck:
761
+ \t@echo "Running mypy..."
762
+ \t$(PYTHON) -m mypy $(SRC_FILES) --strict
763
+
764
+ # Auto-format code
765
+ format:
766
+ \t@echo "Formatting code..."
767
+ \t$(PYTHON) -m ruff format $(SRC_FILES)
768
+ \t$(PYTHON) -m ruff check --fix $(SRC_FILES)
769
+
770
+ # Clean build artifacts
771
+ clean:
772
+ \trm -rf build/ dist/ *.egg-info/
773
+ \tfind . -type d -name __pycache__ -exec rm -rf {{}} + 2>/dev/null || true
774
+ \tfind . -type f -name "*.pyc" -delete 2>/dev/null || true
775
+
776
+ # Generate dataset
777
+ generate:
778
+ \t./scripts/generate.sh --dry
779
+
780
+ # Build and upload
781
+ build:
782
+ \t./scripts/build.sh
783
+ ''')
784
+ (project_dir / "Makefile").write_text(content)
785
+
786
+
787
+ def _write_copyright(project_dir: Path) -> None:
788
+ """Write COPYRIGHT.txt file."""
789
+ content = dedent('''\
790
+ Copyright (c) 2025 Tylt LLC. All rights reserved.
791
+
792
+ Derivative works may be released by researchers,
793
+ but original files may not be redistributed or used beyond research purposes.
794
+ ''')
795
+ (project_dir / "COPYRIGHT.txt").write_text(content)
796
+
797
+
798
+ def _init_git(project_dir: Path) -> None:
799
+ """Initialize git repository with pre-commit hook."""
800
+ import subprocess
801
+
802
+ # Initialize git
803
+ subprocess.run(["git", "init"], cwd=project_dir, capture_output=True)
804
+
805
+ # Create .git/hooks directory if needed
806
+ hooks_dir = project_dir / ".git" / "hooks"
807
+ hooks_dir.mkdir(parents=True, exist_ok=True)
808
+
809
+ # Write pre-commit hook
810
+ precommit_hook = dedent('''\
811
+ #!/usr/bin/env bash
812
+ # Pre-commit hook - runs code quality checks on staged files
813
+
814
+ exec ./scripts/pre-commit.sh
815
+ ''')
816
+ hook_path = hooks_dir / "pre-commit"
817
+ hook_path.write_text(precommit_hook)
818
+ hook_path.chmod(hook_path.stat().st_mode | 0o755)
819
+
820
+ # Write .gitattributes
821
+ gitattributes = dedent('''\
822
+ # Auto detect text files and perform LF normalization
823
+ * text=auto
824
+
825
+ # Python files
826
+ *.py text diff=python
827
+
828
+ # Shell scripts
829
+ *.sh text eol=lf
830
+
831
+ # Binary files
832
+ *.png binary
833
+ *.jpg binary
834
+ *.jpeg binary
835
+ *.gif binary
836
+ *.ico binary
837
+ *.ttf binary
838
+ *.woff binary
839
+ *.woff2 binary
840
+ ''')
841
+ (project_dir / ".gitattributes").write_text(gitattributes)
842
+
843
+ # Stage all files
844
+ subprocess.run(["git", "add", "."], cwd=project_dir, capture_output=True)
845
+
846
+ # Make initial commit (skip hooks since cudag isn't installed yet)
847
+ result = subprocess.run(
848
+ [
849
+ "git",
850
+ "commit",
851
+ "--no-verify", # Skip pre-commit hook (cudag not installed yet)
852
+ "-m",
853
+ "Initial project scaffolding from cudag new",
854
+ ],
855
+ cwd=project_dir,
856
+ capture_output=True,
857
+ )
858
+
859
+ # If commit failed due to missing author, try with explicit author
860
+ if result.returncode != 0:
861
+ subprocess.run(
862
+ [
863
+ "git",
864
+ "-c", "user.email=cudag@example.com",
865
+ "-c", "user.name=CUDAG",
866
+ "commit",
867
+ "--no-verify",
868
+ "-m",
869
+ "Initial project scaffolding from cudag new",
870
+ ],
871
+ cwd=project_dir,
872
+ capture_output=True,
873
+ )