cudag 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. cudag/__init__.py +334 -0
  2. cudag/annotation/__init__.py +77 -0
  3. cudag/annotation/codegen.py +648 -0
  4. cudag/annotation/config.py +545 -0
  5. cudag/annotation/loader.py +342 -0
  6. cudag/annotation/scaffold.py +121 -0
  7. cudag/annotation/transcription.py +296 -0
  8. cudag/cli/__init__.py +5 -0
  9. cudag/cli/main.py +315 -0
  10. cudag/cli/new.py +873 -0
  11. cudag/core/__init__.py +364 -0
  12. cudag/core/button.py +137 -0
  13. cudag/core/canvas.py +222 -0
  14. cudag/core/config.py +70 -0
  15. cudag/core/coords.py +233 -0
  16. cudag/core/data_grid.py +804 -0
  17. cudag/core/dataset.py +678 -0
  18. cudag/core/distribution.py +136 -0
  19. cudag/core/drawing.py +75 -0
  20. cudag/core/fonts.py +156 -0
  21. cudag/core/generator.py +163 -0
  22. cudag/core/grid.py +367 -0
  23. cudag/core/grounding_task.py +247 -0
  24. cudag/core/icon.py +207 -0
  25. cudag/core/iconlist_task.py +301 -0
  26. cudag/core/models.py +1251 -0
  27. cudag/core/random.py +130 -0
  28. cudag/core/renderer.py +190 -0
  29. cudag/core/screen.py +402 -0
  30. cudag/core/scroll_task.py +254 -0
  31. cudag/core/scrollable_grid.py +447 -0
  32. cudag/core/state.py +110 -0
  33. cudag/core/task.py +293 -0
  34. cudag/core/taskbar.py +350 -0
  35. cudag/core/text.py +212 -0
  36. cudag/core/utils.py +82 -0
  37. cudag/data/surnames.txt +5000 -0
  38. cudag/modal_apps/__init__.py +4 -0
  39. cudag/modal_apps/archive.py +103 -0
  40. cudag/modal_apps/extract.py +138 -0
  41. cudag/modal_apps/preprocess.py +529 -0
  42. cudag/modal_apps/upload.py +317 -0
  43. cudag/prompts/SYSTEM_PROMPT.txt +104 -0
  44. cudag/prompts/__init__.py +33 -0
  45. cudag/prompts/system.py +43 -0
  46. cudag/prompts/tools.py +382 -0
  47. cudag/py.typed +0 -0
  48. cudag/schemas/filesystem.json +90 -0
  49. cudag/schemas/test_record.schema.json +113 -0
  50. cudag/schemas/train_record.schema.json +90 -0
  51. cudag/server/__init__.py +21 -0
  52. cudag/server/app.py +232 -0
  53. cudag/server/services/__init__.py +9 -0
  54. cudag/server/services/generator.py +128 -0
  55. cudag/templates/scripts/archive.sh +35 -0
  56. cudag/templates/scripts/build.sh +13 -0
  57. cudag/templates/scripts/extract.sh +54 -0
  58. cudag/templates/scripts/generate.sh +116 -0
  59. cudag/templates/scripts/pre-commit.sh +44 -0
  60. cudag/templates/scripts/preprocess.sh +46 -0
  61. cudag/templates/scripts/upload.sh +63 -0
  62. cudag/templates/scripts/verify.py +428 -0
  63. cudag/validation/__init__.py +35 -0
  64. cudag/validation/validate.py +508 -0
  65. cudag-0.3.10.dist-info/METADATA +570 -0
  66. cudag-0.3.10.dist-info/RECORD +69 -0
  67. cudag-0.3.10.dist-info/WHEEL +4 -0
  68. cudag-0.3.10.dist-info/entry_points.txt +2 -0
  69. cudag-0.3.10.dist-info/licenses/LICENSE +66 -0
cudag/prompts/tools.py ADDED
@@ -0,0 +1,382 @@
1
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
2
+ # CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
3
+ # is strictly prohibited. For licensing inquiries: hello@claimhawk.app
4
+
5
+ """Computer use tool definition and tool_call formatting.
6
+
7
+ This module codifies the canonical tool_call format used in VLM training datasets.
8
+ All tool calls must use this format for consistency across generators.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import re
15
+ from dataclasses import dataclass, field
16
+ from typing import Any, Literal
17
+
18
+ # Valid actions for computer_use tool
19
+ TOOL_ACTIONS = Literal[
20
+ "key",
21
+ "type",
22
+ "mouse_move",
23
+ "left_click",
24
+ "left_click_drag",
25
+ "right_click",
26
+ "middle_click",
27
+ "double_click",
28
+ "triple_click",
29
+ "scroll",
30
+ "hscroll",
31
+ "wait",
32
+ "terminate",
33
+ "answer",
34
+ ]
35
+
36
+ # Action descriptions for system prompt
37
+ ACTION_DESCRIPTIONS = {
38
+ "key": "Press keys in order, release in reverse.",
39
+ "type": "Type a string of text.",
40
+ "mouse_move": "Move the cursor to (x, y).",
41
+ "left_click": "Left click at (x, y).",
42
+ "left_click_drag": "Click and drag from current to (x, y).",
43
+ "right_click": "Right click at (x, y).",
44
+ "middle_click": "Middle click at (x, y).",
45
+ "double_click": "Double-click at (x, y).",
46
+ "triple_click": "Triple-click at (x, y) (simulated as double-click).",
47
+ "scroll": "Scroll the mouse wheel.",
48
+ "hscroll": "Horizontal scroll.",
49
+ "wait": "Wait N seconds.",
50
+ "terminate": "End the task with a status.",
51
+ "answer": "Answer a question.",
52
+ }
53
+
54
+ # Actions that require coordinate parameter
55
+ COORDINATE_ACTIONS = {
56
+ "mouse_move",
57
+ "left_click",
58
+ "left_click_drag",
59
+ "right_click",
60
+ "middle_click",
61
+ "double_click",
62
+ "triple_click",
63
+ "scroll",
64
+ "hscroll",
65
+ }
66
+
67
+ # Actions that require specific parameters
68
+ ACTION_REQUIRED_PARAMS: dict[str, list[str]] = {
69
+ "key": ["keys"],
70
+ "type": ["text"],
71
+ "scroll": ["coordinate", "pixels"],
72
+ "hscroll": ["coordinate", "pixels"],
73
+ "wait": ["time"],
74
+ "terminate": ["status"],
75
+ }
76
+
77
+ # Canonical computer_use tool definition (JSON schema)
78
+ COMPUTER_USE_TOOL: dict[str, Any] = {
79
+ "type": "function",
80
+ "function": {
81
+ "name_for_human": "computer_use",
82
+ "name": "computer_use",
83
+ "description": "Perform computer actions",
84
+ "parameters": {
85
+ "properties": {
86
+ "action": {
87
+ "description": "\n".join(
88
+ f"* `{action}`: {desc}" for action, desc in ACTION_DESCRIPTIONS.items()
89
+ ),
90
+ "enum": list(ACTION_DESCRIPTIONS.keys()),
91
+ "type": "string",
92
+ },
93
+ "keys": {
94
+ "description": "Required only by `action=key`.",
95
+ "type": "array",
96
+ },
97
+ "text": {
98
+ "description": "Required only by `action=type`.",
99
+ "type": "string",
100
+ },
101
+ "coordinate": {
102
+ "description": "Mouse coordinates (1000x1000 normalized).",
103
+ "type": "array",
104
+ },
105
+ "pixels": {
106
+ "description": "The amount of scrolling.",
107
+ "type": "number",
108
+ },
109
+ "time": {
110
+ "description": "The seconds to wait.",
111
+ "type": "number",
112
+ },
113
+ "status": {
114
+ "description": "The status of the task.",
115
+ "type": "string",
116
+ "enum": ["success", "failure"],
117
+ },
118
+ },
119
+ "required": ["action"],
120
+ "type": "object",
121
+ },
122
+ "args_format": "Format the arguments as a JSON object.",
123
+ },
124
+ }
125
+
126
+
127
+ @dataclass
128
+ class BboxCall:
129
+ """Represents a get_bbox tool call for element grounding.
130
+
131
+ This is the canonical format for bounding box detection in VLMGen datasets.
132
+ Used for "grounding" task types that identify element locations.
133
+ """
134
+
135
+ bbox_2d: tuple[int, int, int, int]
136
+ """Bounding box coordinates [x1, y1, x2, y2] in RU (0-1000)."""
137
+
138
+ label: str | None = None
139
+ """Optional human-readable label of the element being located."""
140
+
141
+ def to_dict(self) -> dict[str, Any]:
142
+ """Convert to dictionary for JSON serialization."""
143
+ args: dict[str, Any] = {"bbox_2d": list(self.bbox_2d)}
144
+ if self.label:
145
+ args["label"] = self.label
146
+ return {
147
+ "name": "get_bbox",
148
+ "arguments": args,
149
+ }
150
+
151
+ @classmethod
152
+ def from_dict(cls, data: dict[str, Any]) -> BboxCall:
153
+ """Create from dictionary."""
154
+ if data.get("name") != "get_bbox":
155
+ raise ValueError(f"Expected get_bbox tool, got: {data.get('name')}")
156
+
157
+ args = data.get("arguments", {})
158
+ bbox = args.get("bbox_2d", [0, 0, 0, 0])
159
+
160
+ return cls(
161
+ bbox_2d=tuple(bbox), # type: ignore[arg-type]
162
+ label=args.get("label"),
163
+ )
164
+
165
+ @classmethod
166
+ def create(
167
+ cls, bbox_2d: tuple[int, int, int, int], label: str | None = None
168
+ ) -> BboxCall:
169
+ """Create a get_bbox tool call.
170
+
171
+ Args:
172
+ bbox_2d: Bounding box [x1, y1, x2, y2] in RU units (0-1000)
173
+ label: Optional human-readable label of the element (e.g., "Appts")
174
+
175
+ Returns:
176
+ BboxCall instance
177
+ """
178
+ return cls(bbox_2d=bbox_2d, label=label)
179
+
180
+
181
+ @dataclass
182
+ class ToolCall:
183
+ """Represents a computer_use tool call.
184
+
185
+ This is the canonical format for all tool calls in VLMGen datasets.
186
+ """
187
+
188
+ action: str
189
+ coordinate: tuple[int, int] | None = None
190
+ pixels: int | None = None
191
+ keys: list[str] | None = None
192
+ text: str | None = None
193
+ time: float | None = None
194
+ status: str | None = None
195
+ extra: dict[str, Any] = field(default_factory=dict)
196
+
197
+ def to_dict(self) -> dict[str, Any]:
198
+ """Convert to dictionary for JSON serialization."""
199
+ args: dict[str, Any] = {"action": self.action}
200
+
201
+ if self.coordinate is not None:
202
+ args["coordinate"] = list(self.coordinate)
203
+ if self.pixels is not None:
204
+ args["pixels"] = self.pixels
205
+ if self.keys is not None:
206
+ args["keys"] = self.keys
207
+ if self.text is not None:
208
+ args["text"] = self.text
209
+ if self.time is not None:
210
+ args["time"] = self.time
211
+ if self.status is not None:
212
+ args["status"] = self.status
213
+
214
+ # Include any extra fields
215
+ args.update(self.extra)
216
+
217
+ return {"name": "computer_use", "arguments": args}
218
+
219
+ @classmethod
220
+ def from_dict(cls, data: dict[str, Any]) -> ToolCall:
221
+ """Create from dictionary."""
222
+ if data.get("name") != "computer_use":
223
+ raise ValueError(f"Expected computer_use tool, got: {data.get('name')}")
224
+
225
+ args = data.get("arguments", {})
226
+ coord = args.get("coordinate")
227
+
228
+ # Extract known fields
229
+ known_fields = {"action", "coordinate", "pixels", "keys", "text", "time", "status"}
230
+ extra = {k: v for k, v in args.items() if k not in known_fields}
231
+
232
+ return cls(
233
+ action=args["action"],
234
+ coordinate=tuple(coord) if coord else None,
235
+ pixels=args.get("pixels"),
236
+ keys=args.get("keys"),
237
+ text=args.get("text"),
238
+ time=args.get("time"),
239
+ status=args.get("status"),
240
+ extra=extra,
241
+ )
242
+
243
+ @classmethod
244
+ def left_click(cls, coordinate: tuple[int, int]) -> ToolCall:
245
+ """Create a left_click tool call."""
246
+ return cls(action="left_click", coordinate=coordinate)
247
+
248
+ @classmethod
249
+ def double_click(cls, coordinate: tuple[int, int]) -> ToolCall:
250
+ """Create a double_click tool call."""
251
+ return cls(action="double_click", coordinate=coordinate)
252
+
253
+ @classmethod
254
+ def right_click(cls, coordinate: tuple[int, int]) -> ToolCall:
255
+ """Create a right_click tool call."""
256
+ return cls(action="right_click", coordinate=coordinate)
257
+
258
+ @classmethod
259
+ def scroll(cls, coordinate: tuple[int, int], pixels: int) -> ToolCall:
260
+ """Create a scroll tool call. Negative pixels = scroll up."""
261
+ return cls(action="scroll", coordinate=coordinate, pixels=pixels)
262
+
263
+ @classmethod
264
+ def key_press(cls, keys: list[str]) -> ToolCall:
265
+ """Create a key press tool call."""
266
+ return cls(action="key", keys=keys)
267
+
268
+ @classmethod
269
+ def type_text(cls, text: str) -> ToolCall:
270
+ """Create a type tool call."""
271
+ return cls(action="type", text=text)
272
+
273
+ @classmethod
274
+ def wait(cls, seconds: float) -> ToolCall:
275
+ """Create a wait tool call."""
276
+ return cls(action="wait", time=seconds)
277
+
278
+ @classmethod
279
+ def terminate(cls, status: str = "success") -> ToolCall:
280
+ """Create a terminate tool call."""
281
+ return cls(action="terminate", status=status)
282
+
283
+
284
+ def format_tool_call(tool_call: ToolCall | BboxCall | dict[str, Any]) -> str:
285
+ """Format a tool call as XML-wrapped JSON string.
286
+
287
+ This is the canonical output format for GPT responses in training data.
288
+
289
+ Args:
290
+ tool_call: ToolCall, BboxCall instance, or dict with {name, arguments}
291
+
292
+ Returns:
293
+ Formatted string like:
294
+ <tool_call>
295
+ {"name": "computer_use", "arguments": {...}}
296
+ </tool_call>
297
+
298
+ or for bounding box:
299
+ <tool_call>
300
+ {"name": "get_bbox", "arguments": {"label": "...", "bbox_2d": [...]}}
301
+ </tool_call>
302
+ """
303
+ if isinstance(tool_call, (ToolCall, BboxCall)):
304
+ data = tool_call.to_dict()
305
+ else:
306
+ data = tool_call
307
+
308
+ json_str = json.dumps(data)
309
+ return f"<tool_call>\n{json_str}\n</tool_call>"
310
+
311
+
312
+ # Regex pattern for parsing tool calls
313
+ TOOL_CALL_PATTERN = re.compile(
314
+ r"<tool_call>\s*(?P<json>\{.*?\})\s*</tool_call>",
315
+ re.DOTALL | re.IGNORECASE,
316
+ )
317
+
318
+
319
+ def parse_tool_call(text: str) -> ToolCall | None:
320
+ """Parse a tool call from model output text.
321
+
322
+ Args:
323
+ text: Model output containing <tool_call>...</tool_call>
324
+
325
+ Returns:
326
+ Parsed ToolCall or None if not found
327
+ """
328
+ match = TOOL_CALL_PATTERN.search(text)
329
+ if not match:
330
+ return None
331
+
332
+ try:
333
+ data = json.loads(match.group("json"))
334
+ return ToolCall.from_dict(data)
335
+ except (json.JSONDecodeError, ValueError, KeyError):
336
+ return None
337
+
338
+
339
+ def validate_tool_call(tool_call: ToolCall) -> list[str]:
340
+ """Validate a tool call and return list of errors.
341
+
342
+ Args:
343
+ tool_call: ToolCall to validate
344
+
345
+ Returns:
346
+ List of error messages (empty if valid)
347
+ """
348
+ errors: list[str] = []
349
+
350
+ # Check action is valid
351
+ if tool_call.action not in ACTION_DESCRIPTIONS:
352
+ errors.append(f"Invalid action: {tool_call.action}")
353
+ return errors # Can't validate further without valid action
354
+
355
+ # Check coordinate is provided for coordinate-requiring actions
356
+ if tool_call.action in COORDINATE_ACTIONS and tool_call.coordinate is None:
357
+ errors.append(f"Action '{tool_call.action}' requires coordinate")
358
+
359
+ # Check coordinate values are in valid range
360
+ if tool_call.coordinate is not None:
361
+ x, y = tool_call.coordinate
362
+ if not (0 <= x <= 1000 and 0 <= y <= 1000):
363
+ errors.append(f"Coordinate out of range [0, 1000]: ({x}, {y})")
364
+
365
+ # Check required parameters
366
+ required = ACTION_REQUIRED_PARAMS.get(tool_call.action, [])
367
+ for param in required:
368
+ value = getattr(tool_call, param, None)
369
+ if value is None:
370
+ errors.append(f"Action '{tool_call.action}' requires '{param}'")
371
+
372
+ # Validate scroll pixels
373
+ if tool_call.action in ("scroll", "hscroll") and tool_call.pixels is not None:
374
+ if not isinstance(tool_call.pixels, (int, float)):
375
+ errors.append(f"Invalid pixels value: {tool_call.pixels}")
376
+
377
+ # Validate terminate status
378
+ if tool_call.action == "terminate" and tool_call.status is not None:
379
+ if tool_call.status not in ("success", "failure"):
380
+ errors.append(f"Invalid terminate status: {tool_call.status}")
381
+
382
+ return errors
cudag/py.typed ADDED
File without changes
@@ -0,0 +1,90 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://claimhawk.com/schemas/cudag/filesystem.json",
4
+ "title": "CUDAG Dataset Filesystem Structure",
5
+ "description": "Defines the required filesystem structure for CUDAG datasets",
6
+ "filesystem": {
7
+ "root": {
8
+ "description": "Dataset root directory (e.g., datasets/calendar--mike--20251203_123456)",
9
+ "required_files": [
10
+ {
11
+ "name": "config.json",
12
+ "description": "Generation configuration and metadata"
13
+ },
14
+ {
15
+ "name": "data.jsonl",
16
+ "description": "All training samples (JSONL format)"
17
+ },
18
+ {
19
+ "name": "train.jsonl",
20
+ "description": "Training split samples (JSONL format)"
21
+ },
22
+ {
23
+ "name": "val.jsonl",
24
+ "description": "Validation split samples (JSONL format)"
25
+ }
26
+ ],
27
+ "optional_files": [
28
+ {
29
+ "name": "held_out.jsonl",
30
+ "description": "Held-out samples for evaluation (JSONL format)"
31
+ }
32
+ ],
33
+ "required_directories": [
34
+ {
35
+ "name": "images",
36
+ "description": "Training images directory",
37
+ "contents": "*.jpg or *.png files referenced by train.jsonl/val.jsonl"
38
+ },
39
+ {
40
+ "name": "test",
41
+ "description": "Test cases directory"
42
+ }
43
+ ]
44
+ },
45
+ "test": {
46
+ "description": "Test directory structure (dataset_root/test/)",
47
+ "required_files": [
48
+ {
49
+ "name": "test.json",
50
+ "description": "Test case definitions (JSON array)"
51
+ }
52
+ ],
53
+ "required_directories": [
54
+ {
55
+ "name": "images",
56
+ "description": "Test screenshot images",
57
+ "contents": "*.png files referenced by test.json"
58
+ }
59
+ ],
60
+ "optional_directories": [
61
+ {
62
+ "name": "annotated",
63
+ "description": "Annotated test images with crosshairs and tool call output",
64
+ "contents": "*_annotated.png files"
65
+ }
66
+ ]
67
+ }
68
+ },
69
+ "naming_conventions": {
70
+ "dataset_directory": {
71
+ "pattern": "{prefix}--{user}--{timestamp}",
72
+ "example": "calendar--mike--20251203_123456"
73
+ },
74
+ "training_images": {
75
+ "pattern": "{dataset_name}_{index:05d}.{ext}",
76
+ "example": "calendar_00001.jpg"
77
+ },
78
+ "test_images": {
79
+ "pattern": "{prefix}_{index:05d}.png",
80
+ "example": "test_00001.png"
81
+ }
82
+ },
83
+ "constraints": [
84
+ "All image paths in JSONL/JSON files must be relative to their parent directory",
85
+ "Training images are relative to dataset root (e.g., 'images/sample.jpg')",
86
+ "Test screenshots are relative to test directory (e.g., 'images/test.png')",
87
+ "Coordinates in training data use RU (Resolution Units) normalized to [0, 1000]",
88
+ "Test tolerance values are in RU units"
89
+ ]
90
+ }
@@ -0,0 +1,113 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://claimhawk.com/schemas/cudag/test_record.schema.json",
4
+ "title": "CUDAG Test Record",
5
+ "description": "Schema for test case records in test/test.json",
6
+ "type": "object",
7
+ "required": ["test_id", "screenshot", "prompt", "expected_action", "tolerance", "metadata"],
8
+ "properties": {
9
+ "test_id": {
10
+ "type": "string",
11
+ "description": "Unique identifier for this test case",
12
+ "pattern": "^[a-zA-Z0-9_-]+$"
13
+ },
14
+ "screenshot": {
15
+ "type": "string",
16
+ "description": "Relative path to screenshot from test directory (e.g., 'images/test_00001.png')",
17
+ "pattern": "^images/[^/]+\\.(jpg|jpeg|png)$"
18
+ },
19
+ "prompt": {
20
+ "type": "string",
21
+ "description": "Human instruction prompt",
22
+ "minLength": 1
23
+ },
24
+ "expected_action": {
25
+ "type": "object",
26
+ "required": ["name", "arguments"],
27
+ "properties": {
28
+ "name": {
29
+ "type": "string",
30
+ "description": "Tool name",
31
+ "const": "computer_use"
32
+ },
33
+ "arguments": {
34
+ "type": "object",
35
+ "required": ["action"],
36
+ "properties": {
37
+ "action": {
38
+ "type": "string",
39
+ "description": "Action type",
40
+ "enum": ["left_click", "right_click", "double_click", "scroll", "type", "key"]
41
+ },
42
+ "coordinate": {
43
+ "type": "array",
44
+ "description": "Click coordinates [x, y] in RU units (0-1000)",
45
+ "items": {
46
+ "type": "integer",
47
+ "minimum": 0,
48
+ "maximum": 1000
49
+ },
50
+ "minItems": 2,
51
+ "maxItems": 2
52
+ },
53
+ "text": {
54
+ "type": "string",
55
+ "description": "Text to type (for type action)"
56
+ },
57
+ "direction": {
58
+ "type": "string",
59
+ "description": "Scroll direction",
60
+ "enum": ["up", "down", "left", "right"]
61
+ },
62
+ "amount": {
63
+ "type": "integer",
64
+ "description": "Scroll amount in pixels"
65
+ }
66
+ },
67
+ "additionalProperties": false
68
+ }
69
+ },
70
+ "additionalProperties": false
71
+ },
72
+ "tolerance": {
73
+ "type": "array",
74
+ "description": "Allowed coordinate tolerance [tol_x, tol_y] in RU units",
75
+ "items": {
76
+ "type": "integer",
77
+ "minimum": 0
78
+ },
79
+ "minItems": 2,
80
+ "maxItems": 2
81
+ },
82
+ "metadata": {
83
+ "type": "object",
84
+ "required": ["task_type"],
85
+ "properties": {
86
+ "task_type": {
87
+ "type": "string",
88
+ "description": "Task type identifier"
89
+ },
90
+ "real_coords": {
91
+ "type": ["array", "null"],
92
+ "description": "Original pixel coordinates [x, y]",
93
+ "items": {
94
+ "type": "integer"
95
+ },
96
+ "minItems": 2,
97
+ "maxItems": 2
98
+ },
99
+ "image_size": {
100
+ "type": "array",
101
+ "description": "Image dimensions [width, height]",
102
+ "items": {
103
+ "type": "integer"
104
+ },
105
+ "minItems": 2,
106
+ "maxItems": 2
107
+ }
108
+ },
109
+ "additionalProperties": true
110
+ }
111
+ },
112
+ "additionalProperties": false
113
+ }
@@ -0,0 +1,90 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://claimhawk.com/schemas/cudag/train_record.schema.json",
4
+ "title": "CUDAG Training Record",
5
+ "description": "Schema for training/validation records in data.jsonl, train.jsonl, val.jsonl",
6
+ "type": "object",
7
+ "required": ["id", "image", "conversations", "metadata"],
8
+ "properties": {
9
+ "id": {
10
+ "type": "string",
11
+ "description": "Unique identifier for this sample",
12
+ "pattern": "^[a-zA-Z0-9_-]+$"
13
+ },
14
+ "image": {
15
+ "type": "string",
16
+ "description": "Relative path to image file from dataset root (e.g., 'images/sample_00001.jpg')",
17
+ "pattern": "^images/[^/]+\\.(jpg|jpeg|png)$"
18
+ },
19
+ "conversations": {
20
+ "type": "array",
21
+ "description": "Human-GPT conversation turns",
22
+ "minItems": 2,
23
+ "maxItems": 2,
24
+ "items": [
25
+ {
26
+ "type": "object",
27
+ "required": ["from", "value"],
28
+ "properties": {
29
+ "from": {
30
+ "type": "string",
31
+ "const": "human"
32
+ },
33
+ "value": {
34
+ "type": "string",
35
+ "description": "Human prompt, must start with <image>\\n",
36
+ "pattern": "^<image>\\n.+"
37
+ }
38
+ },
39
+ "additionalProperties": false
40
+ },
41
+ {
42
+ "type": "object",
43
+ "required": ["from", "value"],
44
+ "properties": {
45
+ "from": {
46
+ "type": "string",
47
+ "const": "gpt"
48
+ },
49
+ "value": {
50
+ "type": "string",
51
+ "description": "GPT response in <tool_call> format",
52
+ "pattern": "^<tool_call>"
53
+ }
54
+ },
55
+ "additionalProperties": false
56
+ }
57
+ ]
58
+ },
59
+ "metadata": {
60
+ "type": "object",
61
+ "required": ["task_type", "real_coords"],
62
+ "properties": {
63
+ "task_type": {
64
+ "type": "string",
65
+ "description": "Task type identifier (e.g., 'click-day', 'click-appointment')"
66
+ },
67
+ "real_coords": {
68
+ "type": "array",
69
+ "description": "Original pixel coordinates [x, y]",
70
+ "items": {
71
+ "type": "integer"
72
+ },
73
+ "minItems": 2,
74
+ "maxItems": 2
75
+ },
76
+ "tolerance": {
77
+ "type": "array",
78
+ "description": "Coordinate tolerance [tol_x, tol_y] in RU units",
79
+ "items": {
80
+ "type": "integer"
81
+ },
82
+ "minItems": 2,
83
+ "maxItems": 2
84
+ }
85
+ },
86
+ "additionalProperties": true
87
+ }
88
+ },
89
+ "additionalProperties": false
90
+ }