cudag 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. cudag/__init__.py +334 -0
  2. cudag/annotation/__init__.py +77 -0
  3. cudag/annotation/codegen.py +648 -0
  4. cudag/annotation/config.py +545 -0
  5. cudag/annotation/loader.py +342 -0
  6. cudag/annotation/scaffold.py +121 -0
  7. cudag/annotation/transcription.py +296 -0
  8. cudag/cli/__init__.py +5 -0
  9. cudag/cli/main.py +315 -0
  10. cudag/cli/new.py +873 -0
  11. cudag/core/__init__.py +364 -0
  12. cudag/core/button.py +137 -0
  13. cudag/core/canvas.py +222 -0
  14. cudag/core/config.py +70 -0
  15. cudag/core/coords.py +233 -0
  16. cudag/core/data_grid.py +804 -0
  17. cudag/core/dataset.py +678 -0
  18. cudag/core/distribution.py +136 -0
  19. cudag/core/drawing.py +75 -0
  20. cudag/core/fonts.py +156 -0
  21. cudag/core/generator.py +163 -0
  22. cudag/core/grid.py +367 -0
  23. cudag/core/grounding_task.py +247 -0
  24. cudag/core/icon.py +207 -0
  25. cudag/core/iconlist_task.py +301 -0
  26. cudag/core/models.py +1251 -0
  27. cudag/core/random.py +130 -0
  28. cudag/core/renderer.py +190 -0
  29. cudag/core/screen.py +402 -0
  30. cudag/core/scroll_task.py +254 -0
  31. cudag/core/scrollable_grid.py +447 -0
  32. cudag/core/state.py +110 -0
  33. cudag/core/task.py +293 -0
  34. cudag/core/taskbar.py +350 -0
  35. cudag/core/text.py +212 -0
  36. cudag/core/utils.py +82 -0
  37. cudag/data/surnames.txt +5000 -0
  38. cudag/modal_apps/__init__.py +4 -0
  39. cudag/modal_apps/archive.py +103 -0
  40. cudag/modal_apps/extract.py +138 -0
  41. cudag/modal_apps/preprocess.py +529 -0
  42. cudag/modal_apps/upload.py +317 -0
  43. cudag/prompts/SYSTEM_PROMPT.txt +104 -0
  44. cudag/prompts/__init__.py +33 -0
  45. cudag/prompts/system.py +43 -0
  46. cudag/prompts/tools.py +382 -0
  47. cudag/py.typed +0 -0
  48. cudag/schemas/filesystem.json +90 -0
  49. cudag/schemas/test_record.schema.json +113 -0
  50. cudag/schemas/train_record.schema.json +90 -0
  51. cudag/server/__init__.py +21 -0
  52. cudag/server/app.py +232 -0
  53. cudag/server/services/__init__.py +9 -0
  54. cudag/server/services/generator.py +128 -0
  55. cudag/templates/scripts/archive.sh +35 -0
  56. cudag/templates/scripts/build.sh +13 -0
  57. cudag/templates/scripts/extract.sh +54 -0
  58. cudag/templates/scripts/generate.sh +116 -0
  59. cudag/templates/scripts/pre-commit.sh +44 -0
  60. cudag/templates/scripts/preprocess.sh +46 -0
  61. cudag/templates/scripts/upload.sh +63 -0
  62. cudag/templates/scripts/verify.py +428 -0
  63. cudag/validation/__init__.py +35 -0
  64. cudag/validation/validate.py +508 -0
  65. cudag-0.3.10.dist-info/METADATA +570 -0
  66. cudag-0.3.10.dist-info/RECORD +69 -0
  67. cudag-0.3.10.dist-info/WHEEL +4 -0
  68. cudag-0.3.10.dist-info/entry_points.txt +2 -0
  69. cudag-0.3.10.dist-info/licenses/LICENSE +66 -0
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env bash
2
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
3
+ # Derivative works may be released by researchers,
4
+ # but original files may not be redistributed or used beyond research purposes.
5
+ #
6
+ # DO NOT EDIT THIS FILE - Generated by cudag framework
7
+
8
+ # Pipeline: generate.sh -> upload.sh -> extract.sh -> preprocess.sh
9
+ #
10
+ # Usage:
11
+ # ./scripts/preprocess.sh --dataset-name <NAME>
12
+
13
+ set -euo pipefail
14
+
15
+ DATASET_NAME=""
16
+
17
+ while [[ $# -gt 0 ]]; do
18
+ case "$1" in
19
+ --dataset-name)
20
+ DATASET_NAME="${2:-}"
21
+ shift 2
22
+ ;;
23
+ *)
24
+ shift
25
+ ;;
26
+ esac
27
+ done
28
+
29
+ if [[ -z "$DATASET_NAME" ]]; then
30
+ echo "Error: --dataset-name <NAME> is required"
31
+ exit 1
32
+ fi
33
+
34
+ echo "========================================"
35
+ echo "STAGE 4: Preprocess Dataset"
36
+ echo "========================================"
37
+ echo ""
38
+ echo "Dataset: $DATASET_NAME"
39
+ echo ""
40
+
41
+ # Find cudag's preprocess.py location and run via Modal
42
+ CUDAG_PATH=$(uvx --with cudag python -c "import cudag.modal_apps.preprocess as p; print(p.__file__)")
43
+ uvx modal run --detach "$CUDAG_PATH" --dataset-name "$DATASET_NAME"
44
+
45
+ echo ""
46
+ echo "Preprocessing job started for: $DATASET_NAME"
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env bash
2
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
3
+ # Derivative works may be released by researchers,
4
+ # but original files may not be redistributed or used beyond research purposes.
5
+ #
6
+ # DO NOT EDIT THIS FILE - Generated by cudag framework
7
+
8
+ # Usage:
9
+ # ./scripts/upload.sh [dataset_dir] # Upload to Modal volume
10
+ # ./scripts/upload.sh --dry [dataset_dir] # Dry run, show what would be uploaded
11
+
12
+ set -euo pipefail
13
+
14
+ DRY_RUN=false
15
+ DATASET_DIR=""
16
+
17
+ # Parse args
18
+ for arg in "$@"; do
19
+ if [[ "$arg" == "--dry" ]]; then
20
+ DRY_RUN=true
21
+ elif [[ -z "$DATASET_DIR" && ! "$arg" =~ ^-- ]]; then
22
+ DATASET_DIR="$arg"
23
+ fi
24
+ done
25
+
26
+ echo "========================================"
27
+ echo "STAGE 2: Upload Dataset"
28
+ echo "========================================"
29
+ echo ""
30
+
31
+ if [[ -z "$DATASET_DIR" ]]; then
32
+ # Find most recent dataset
33
+ DATASET_DIR=$(ls -td datasets/*/ 2>/dev/null | head -1)
34
+ if [[ -z "$DATASET_DIR" ]]; then
35
+ echo "No dataset directory found. Specify path or run generate.sh first."
36
+ exit 1
37
+ fi
38
+ fi
39
+
40
+ DATASET_NAME=$(basename "$DATASET_DIR")
41
+ echo "Dataset: $DATASET_NAME"
42
+ echo "Path: $DATASET_DIR"
43
+ echo ""
44
+
45
+ if [[ "$DRY_RUN" == "true" ]]; then
46
+ echo "[DRY RUN] Would upload: $DATASET_DIR"
47
+ exit 0
48
+ fi
49
+
50
+ # Upload via Modal
51
+ echo "Uploading to Modal volume..."
52
+ uvx --with cudag python -m cudag.modal_apps.upload "$DATASET_DIR"
53
+
54
+ echo ""
55
+ echo "Upload complete: $DATASET_NAME"
56
+
57
+ echo ""
58
+ echo "========================================"
59
+ echo "Auto-starting extraction..."
60
+ echo "========================================"
61
+ echo ""
62
+
63
+ exec ./scripts/extract.sh --dataset-name "$DATASET_NAME"
@@ -0,0 +1,428 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
3
+ # CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
4
+ # is strictly prohibited. For licensing inquiries: hello@claimhawk.app
5
+
6
+ """Interactive dataset verification and configuration tool.
7
+
8
+ Usage:
9
+ python scripts/verify.py --config config/dataset.yaml
10
+ python scripts/verify.py --dataset datasets/my-dataset
11
+
12
+ Allows interactive modification of:
13
+ - Task counts (training samples per task type)
14
+ - Test distribution (tests per task type)
15
+ - Train/val split ratio
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import json
22
+ import shutil
23
+ import subprocess
24
+ import sys
25
+ from collections import Counter
26
+ from pathlib import Path
27
+
28
+ import yaml
29
+
30
+
31
+ def load_config(config_path: Path) -> dict:
32
+ """Load dataset configuration from YAML."""
33
+ with open(config_path) as f:
34
+ return yaml.safe_load(f)
35
+
36
+
37
+ def save_config(config_path: Path, config: dict) -> None:
38
+ """Save dataset configuration to YAML."""
39
+ with open(config_path, "w") as f:
40
+ yaml.dump(config, f, default_flow_style=False, sort_keys=False)
41
+
42
+
43
+ def analyze_dataset(dataset_dir: Path) -> dict:
44
+ """Analyze a generated dataset and return stats."""
45
+ stats = {
46
+ "dataset_dir": str(dataset_dir),
47
+ "training": {},
48
+ "tests": {},
49
+ "images": 0,
50
+ }
51
+
52
+ # Count training samples by task type
53
+ data_path = dataset_dir / "data.jsonl"
54
+ if data_path.exists():
55
+ task_counts: Counter[str] = Counter()
56
+ with open(data_path) as f:
57
+ for line in f:
58
+ record = json.loads(line)
59
+ task_type = record.get("metadata", {}).get("task_type", "unknown")
60
+ task_counts[task_type] += 1
61
+ stats["training"] = dict(task_counts)
62
+
63
+ # Count training images
64
+ images_dir = dataset_dir / "images"
65
+ if images_dir.exists():
66
+ stats["images"] = len(list(images_dir.glob("*")))
67
+
68
+ # Count tests by task type
69
+ test_json = dataset_dir / "test" / "test.json"
70
+ if test_json.exists():
71
+ with open(test_json) as f:
72
+ tests = json.load(f)
73
+ test_counts: Counter[str] = Counter()
74
+ for test in tests:
75
+ task_type = test.get("metadata", {}).get("task_type", "unknown")
76
+ test_counts[task_type] += 1
77
+ stats["tests"] = dict(test_counts)
78
+
79
+ return stats
80
+
81
+
82
+ def print_stats(stats: dict, config: dict | None = None) -> None:
83
+ """Print dataset statistics in a readable format."""
84
+ print("\n" + "=" * 60)
85
+ print("DATASET STATISTICS")
86
+ print("=" * 60)
87
+
88
+ if config:
89
+ print(f"\nConfig: {config.get('name_prefix', 'unknown')}")
90
+ print(f"Seed: {config.get('seed', 'N/A')}")
91
+
92
+ print(f"\nDataset: {stats['dataset_dir']}")
93
+ print(f"Total images: {stats['images']}")
94
+
95
+ # Training samples
96
+ print("\n--- TRAINING SAMPLES ---")
97
+ training = stats.get("training", {})
98
+ total_training = sum(training.values())
99
+ print(f"Total: {total_training}")
100
+
101
+ if training:
102
+ max_name_len = max(len(name) for name in training.keys())
103
+ for task_type, count in sorted(training.items()):
104
+ pct = (count / total_training * 100) if total_training > 0 else 0
105
+ bar = "#" * int(pct / 2)
106
+ print(f" {task_type:<{max_name_len}} : {count:>5} ({pct:5.1f}%) {bar}")
107
+
108
+ # Test samples
109
+ print("\n--- TEST SAMPLES ---")
110
+ tests = stats.get("tests", {})
111
+ total_tests = sum(tests.values())
112
+ print(f"Total: {total_tests}")
113
+
114
+ if tests:
115
+ max_name_len = max(len(name) for name in tests.keys())
116
+ for task_type, count in sorted(tests.items()):
117
+ pct = (count / total_tests * 100) if total_tests > 0 else 0
118
+ print(f" {task_type:<{max_name_len}} : {count:>3} ({pct:5.1f}%)")
119
+
120
+ print("=" * 60)
121
+
122
+
123
+ def prompt_yes_no(question: str, default: bool = True) -> bool:
124
+ """Prompt user for yes/no answer."""
125
+ suffix = " [Y/n]: " if default else " [y/N]: "
126
+ while True:
127
+ response = input(question + suffix).strip().lower()
128
+ if not response:
129
+ return default
130
+ if response in ("y", "yes"):
131
+ return True
132
+ if response in ("n", "no"):
133
+ return False
134
+ print("Please answer 'y' or 'n'")
135
+
136
+
137
+ def prompt_int(question: str, default: int | None = None) -> int:
138
+ """Prompt user for integer input."""
139
+ suffix = f" [{default}]: " if default is not None else ": "
140
+ while True:
141
+ response = input(question + suffix).strip()
142
+ if not response and default is not None:
143
+ return default
144
+ try:
145
+ return int(response)
146
+ except ValueError:
147
+ print("Please enter a valid integer")
148
+
149
+
150
+ def edit_task_counts(config: dict) -> tuple[dict, bool]:
151
+ """Interactive editor for task counts. Returns (config, changed)."""
152
+ tasks = config.get("tasks", {})
153
+
154
+ print("\n--- EDIT TASK COUNTS ---")
155
+ print("Enter new count for each task type (press Enter to keep current):\n")
156
+
157
+ new_tasks = {}
158
+ for task_type, current_count in tasks.items():
159
+ new_count = prompt_int(f" {task_type}", default=current_count)
160
+ new_tasks[task_type] = new_count
161
+
162
+ # Check for new tasks
163
+ if prompt_yes_no("\nAdd a new task type?", default=False):
164
+ task_type = input(" Task type name: ").strip()
165
+ count = prompt_int(f" {task_type} count", default=100)
166
+ new_tasks[task_type] = count
167
+
168
+ changed = new_tasks != tasks
169
+ config["tasks"] = new_tasks
170
+ return config, changed
171
+
172
+
173
+ def calc_auto_test_distribution(task_types: list[str], total: int) -> dict[str, int]:
174
+ """Calculate auto-distribution: 3 each for scroll/click, rest for select."""
175
+ dist: dict[str, int] = {}
176
+ simple_tasks = [t for t in task_types if t.startswith("scroll-") or t.startswith("click-")]
177
+ select_tasks = [t for t in task_types if t.startswith("select-")]
178
+
179
+ simple_per_task = 3
180
+ simple_total = len(simple_tasks) * simple_per_task
181
+ remaining = max(0, total - simple_total)
182
+
183
+ for task_type in simple_tasks:
184
+ dist[task_type] = simple_per_task
185
+
186
+ if select_tasks and remaining > 0:
187
+ per_select = remaining // len(select_tasks)
188
+ remainder = remaining % len(select_tasks)
189
+ for i, task_type in enumerate(select_tasks):
190
+ dist[task_type] = per_select + (1 if i < remainder else 0)
191
+ else:
192
+ for task_type in select_tasks:
193
+ dist[task_type] = 0
194
+
195
+ return dist
196
+
197
+
198
+ def edit_test_distribution(config: dict) -> tuple[dict, bool]:
199
+ """Interactive editor for test distribution. Returns (config, changed)."""
200
+ test_config = config.get("test", {})
201
+ total_tests = test_config.get("count", 100)
202
+ current_dist = test_config.get("distribution", {})
203
+ task_types = list(config.get("tasks", {}).keys())
204
+
205
+ print("\n--- EDIT TEST DISTRIBUTION ---")
206
+ new_total = prompt_int("Total test count", default=total_tests)
207
+
208
+ # Calculate auto values for any tasks not explicitly set
209
+ auto_dist = calc_auto_test_distribution(task_types, new_total)
210
+
211
+ print("\nPer-task test counts (press Enter to keep current):")
212
+ new_dist: dict[str, int] = {}
213
+ for task_type in task_types:
214
+ # Use explicit value if set, otherwise show auto-calculated
215
+ current = current_dist.get(task_type, auto_dist.get(task_type, 0))
216
+ new_count = prompt_int(f" {task_type}", default=current)
217
+ new_dist[task_type] = new_count
218
+
219
+ # Check if anything changed
220
+ changed = (new_total != total_tests) or (new_dist != current_dist)
221
+
222
+ test_config["count"] = new_total
223
+ test_config["distribution"] = new_dist
224
+
225
+ config["test"] = test_config
226
+ return config, changed
227
+
228
+
229
+ def run_generator(config_path: Path, verbose: bool = False) -> Path | None:
230
+ """Run the generator and return the generated dataset path."""
231
+ import os
232
+ import threading
233
+ import time
234
+
235
+ print("\n" + "-" * 40)
236
+ print("Running generator (this may take a few minutes)...")
237
+ print("-" * 40 + "\n", flush=True)
238
+
239
+ env = os.environ.copy()
240
+ env["CUDAG_FROM_SCRIPT"] = "1"
241
+ cmd = ["uv", "run", "python", "generator.py", "--config", str(config_path)]
242
+
243
+ if verbose:
244
+ # Stream output directly
245
+ result = subprocess.run(cmd, env=env)
246
+ else:
247
+ # Spinner for progress indication
248
+ stop_spinner = threading.Event()
249
+
250
+ def spinner() -> None:
251
+ chars = "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"
252
+ i = 0
253
+ while not stop_spinner.is_set():
254
+ print(f"\r{chars[i % len(chars)]} Generating...", end="", flush=True)
255
+ time.sleep(0.1)
256
+ i += 1
257
+ print("\r" + " " * 20 + "\r", end="", flush=True)
258
+
259
+ spinner_thread = threading.Thread(target=spinner)
260
+ spinner_thread.start()
261
+
262
+ result = subprocess.run(cmd, capture_output=True, text=True, env=env)
263
+
264
+ stop_spinner.set()
265
+ spinner_thread.join()
266
+
267
+ if result.returncode == 0:
268
+ print(result.stdout)
269
+ else:
270
+ print(result.stderr)
271
+
272
+ if result.returncode != 0:
273
+ print("ERROR: Generator failed!")
274
+ return None
275
+
276
+ # Find the generated dataset
277
+ datasets_dir = Path("datasets")
278
+ if datasets_dir.exists():
279
+ datasets = sorted(datasets_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True)
280
+ if datasets:
281
+ return datasets[0]
282
+
283
+ return None
284
+
285
+
286
+ def find_latest_dataset() -> Path | None:
287
+ """Find the most recently generated dataset."""
288
+ datasets_dir = Path("datasets")
289
+ if not datasets_dir.exists():
290
+ return None
291
+ datasets = sorted(datasets_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True)
292
+ return datasets[0] if datasets else None
293
+
294
+
295
+ def interactive_loop(
296
+ config_path: Path, existing_dataset: Path | None = None, verbose: bool = False
297
+ ) -> None:
298
+ """Main interactive loop for dataset verification."""
299
+ config = load_config(config_path)
300
+
301
+ # Use provided dataset or find latest
302
+ if existing_dataset and existing_dataset.exists():
303
+ dataset_dir = existing_dataset
304
+ needs_generation = False
305
+ print(f"Using existing dataset: {dataset_dir}")
306
+ else:
307
+ dataset_dir = find_latest_dataset()
308
+ needs_generation = dataset_dir is None
309
+
310
+ while True:
311
+ if needs_generation:
312
+ # Run generator
313
+ dataset_dir = run_generator(config_path, verbose=verbose)
314
+ if dataset_dir is None:
315
+ print("Failed to generate dataset. Please fix errors and try again.")
316
+ if not prompt_yes_no("Retry?"):
317
+ break
318
+ continue
319
+ needs_generation = False
320
+
321
+ # Analyze and show stats
322
+ stats = analyze_dataset(dataset_dir)
323
+ print_stats(stats, config)
324
+
325
+ # Ask for approval
326
+ print("\nOptions:")
327
+ print(" [a] Approve - dataset looks good")
328
+ print(" [t] Modify task counts")
329
+ print(" [d] Modify test distribution")
330
+ print(" [r] Regenerate with same config")
331
+ print(" [q] Quit without approving")
332
+
333
+ choice = input("\nChoice [a/t/d/r/q]: ").strip().lower()
334
+
335
+ if choice == "a":
336
+ print(f"\nDataset approved: {dataset_dir}")
337
+ print("Ready for upload with: ./scripts/upload.sh " + str(dataset_dir))
338
+ break
339
+
340
+ elif choice == "t":
341
+ config, changed = edit_task_counts(config)
342
+ if changed:
343
+ save_config(config_path, config)
344
+ print(f"\nUpdated config saved to {config_path}")
345
+ # Delete old dataset before regenerating
346
+ if dataset_dir.exists():
347
+ shutil.rmtree(dataset_dir)
348
+ needs_generation = True
349
+ else:
350
+ print("\nNo changes made.")
351
+
352
+ elif choice == "d":
353
+ config, changed = edit_test_distribution(config)
354
+ if changed:
355
+ save_config(config_path, config)
356
+ print(f"\nUpdated config saved to {config_path}")
357
+ if dataset_dir.exists():
358
+ shutil.rmtree(dataset_dir)
359
+ needs_generation = True
360
+ else:
361
+ print("\nNo changes made.")
362
+
363
+ elif choice == "r":
364
+ if dataset_dir.exists():
365
+ shutil.rmtree(dataset_dir)
366
+ needs_generation = True
367
+
368
+ elif choice == "q":
369
+ print("\nExiting without approval.")
370
+ # Clean up generated dataset
371
+ if dataset_dir.exists() and prompt_yes_no("Delete generated dataset?", default=False):
372
+ shutil.rmtree(dataset_dir)
373
+ break
374
+
375
+ else:
376
+ print("Invalid choice, please try again.")
377
+
378
+
379
+ def main() -> None:
380
+ """Main entry point."""
381
+ parser = argparse.ArgumentParser(
382
+ description="Interactive dataset verification and configuration tool"
383
+ )
384
+ parser.add_argument(
385
+ "--config",
386
+ "-c",
387
+ type=Path,
388
+ help="Path to dataset config YAML",
389
+ )
390
+ parser.add_argument(
391
+ "--existing",
392
+ "-e",
393
+ type=Path,
394
+ help="Path to existing dataset to verify (skips generation)",
395
+ )
396
+ parser.add_argument(
397
+ "--verbose",
398
+ "-v",
399
+ action="store_true",
400
+ help="Stream generator output instead of showing spinner",
401
+ )
402
+ args = parser.parse_args()
403
+
404
+ # Determine config path
405
+ config_path = args.config
406
+ if not config_path:
407
+ config_path = Path("config/dataset.prod.yaml")
408
+ if not config_path.exists():
409
+ config_path = Path("config/dataset.yaml")
410
+
411
+ if not config_path.exists():
412
+ print(f"ERROR: Config not found: {config_path}")
413
+ parser.print_help()
414
+ sys.exit(1)
415
+
416
+ if args.existing:
417
+ # Verify existing dataset
418
+ if not args.existing.exists():
419
+ print(f"ERROR: Dataset not found: {args.existing}")
420
+ sys.exit(1)
421
+ interactive_loop(config_path, existing_dataset=args.existing, verbose=args.verbose)
422
+ else:
423
+ # Default: generate and verify
424
+ interactive_loop(config_path, verbose=args.verbose)
425
+
426
+
427
+ if __name__ == "__main__":
428
+ main()
@@ -0,0 +1,35 @@
1
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
2
+ # CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
3
+ # is strictly prohibited. For licensing inquiries: hello@claimhawk.app
4
+
5
+ """Dataset validation module for CUDAG.
6
+
7
+ This module provides validation for CUDAG datasets to ensure they conform
8
+ to the expected filesystem structure and data schemas.
9
+
10
+ Example:
11
+ from cudag.validation import validate_dataset
12
+
13
+ errors = validate_dataset(Path("datasets/my-dataset"))
14
+ if errors:
15
+ for error in errors:
16
+ print(f"ERROR: {error}")
17
+ """
18
+
19
+ from cudag.validation.validate import (
20
+ ValidationError,
21
+ validate_dataset,
22
+ validate_filesystem,
23
+ validate_image_paths,
24
+ validate_test_records,
25
+ validate_training_records,
26
+ )
27
+
28
+ __all__ = [
29
+ "ValidationError",
30
+ "validate_dataset",
31
+ "validate_filesystem",
32
+ "validate_image_paths",
33
+ "validate_test_records",
34
+ "validate_training_records",
35
+ ]