cudag 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. cudag/__init__.py +334 -0
  2. cudag/annotation/__init__.py +77 -0
  3. cudag/annotation/codegen.py +648 -0
  4. cudag/annotation/config.py +545 -0
  5. cudag/annotation/loader.py +342 -0
  6. cudag/annotation/scaffold.py +121 -0
  7. cudag/annotation/transcription.py +296 -0
  8. cudag/cli/__init__.py +5 -0
  9. cudag/cli/main.py +315 -0
  10. cudag/cli/new.py +873 -0
  11. cudag/core/__init__.py +364 -0
  12. cudag/core/button.py +137 -0
  13. cudag/core/canvas.py +222 -0
  14. cudag/core/config.py +70 -0
  15. cudag/core/coords.py +233 -0
  16. cudag/core/data_grid.py +804 -0
  17. cudag/core/dataset.py +678 -0
  18. cudag/core/distribution.py +136 -0
  19. cudag/core/drawing.py +75 -0
  20. cudag/core/fonts.py +156 -0
  21. cudag/core/generator.py +163 -0
  22. cudag/core/grid.py +367 -0
  23. cudag/core/grounding_task.py +247 -0
  24. cudag/core/icon.py +207 -0
  25. cudag/core/iconlist_task.py +301 -0
  26. cudag/core/models.py +1251 -0
  27. cudag/core/random.py +130 -0
  28. cudag/core/renderer.py +190 -0
  29. cudag/core/screen.py +402 -0
  30. cudag/core/scroll_task.py +254 -0
  31. cudag/core/scrollable_grid.py +447 -0
  32. cudag/core/state.py +110 -0
  33. cudag/core/task.py +293 -0
  34. cudag/core/taskbar.py +350 -0
  35. cudag/core/text.py +212 -0
  36. cudag/core/utils.py +82 -0
  37. cudag/data/surnames.txt +5000 -0
  38. cudag/modal_apps/__init__.py +4 -0
  39. cudag/modal_apps/archive.py +103 -0
  40. cudag/modal_apps/extract.py +138 -0
  41. cudag/modal_apps/preprocess.py +529 -0
  42. cudag/modal_apps/upload.py +317 -0
  43. cudag/prompts/SYSTEM_PROMPT.txt +104 -0
  44. cudag/prompts/__init__.py +33 -0
  45. cudag/prompts/system.py +43 -0
  46. cudag/prompts/tools.py +382 -0
  47. cudag/py.typed +0 -0
  48. cudag/schemas/filesystem.json +90 -0
  49. cudag/schemas/test_record.schema.json +113 -0
  50. cudag/schemas/train_record.schema.json +90 -0
  51. cudag/server/__init__.py +21 -0
  52. cudag/server/app.py +232 -0
  53. cudag/server/services/__init__.py +9 -0
  54. cudag/server/services/generator.py +128 -0
  55. cudag/templates/scripts/archive.sh +35 -0
  56. cudag/templates/scripts/build.sh +13 -0
  57. cudag/templates/scripts/extract.sh +54 -0
  58. cudag/templates/scripts/generate.sh +116 -0
  59. cudag/templates/scripts/pre-commit.sh +44 -0
  60. cudag/templates/scripts/preprocess.sh +46 -0
  61. cudag/templates/scripts/upload.sh +63 -0
  62. cudag/templates/scripts/verify.py +428 -0
  63. cudag/validation/__init__.py +35 -0
  64. cudag/validation/validate.py +508 -0
  65. cudag-0.3.10.dist-info/METADATA +570 -0
  66. cudag-0.3.10.dist-info/RECORD +69 -0
  67. cudag-0.3.10.dist-info/WHEEL +4 -0
  68. cudag-0.3.10.dist-info/entry_points.txt +2 -0
  69. cudag-0.3.10.dist-info/licenses/LICENSE +66 -0
@@ -0,0 +1,21 @@
1
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
2
+ # CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
3
+ # is strictly prohibited. For licensing inquiries: hello@claimhawk.app
4
+
5
+ """CUDAG Server - FastAPI server for annotation-to-generator workflow.
6
+
7
+ This module provides a local HTTP server that the Annotator UI can call
8
+ to generate CUDAG projects from annotations without using the terminal.
9
+
10
+ Start the server:
11
+ cudag serve --port 8420
12
+
13
+ The server exposes:
14
+ GET /health - Health check
15
+ POST /api/v1/generate - Generate project from annotation
16
+ GET /api/v1/status/{job_id} - Check generation progress
17
+ """
18
+
19
+ from cudag.server.app import create_app, run_server
20
+
21
+ __all__ = ["create_app", "run_server"]
cudag/server/app.py ADDED
@@ -0,0 +1,232 @@
1
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
2
+ # CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
3
+ # is strictly prohibited. For licensing inquiries: hello@claimhawk.app
4
+
5
+ """FastAPI application for CUDAG server."""
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import uuid
11
+ from contextlib import asynccontextmanager
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
16
+ from fastapi.middleware.cors import CORSMiddleware
17
+ from pydantic import BaseModel, Field
18
+
19
+ from cudag import __version__
20
+ from cudag.server.services.generator import GeneratorService
21
+
22
+ # In-memory job storage (for MVP - would use Redis/DB in production)
23
+ _jobs: dict[str, dict[str, Any]] = {}
24
+
25
+
26
+ class GenerateOptions(BaseModel):
27
+ """Options for project generation."""
28
+
29
+ project_name: str = Field(..., description="Name for the generated project")
30
+ output_dir: str | None = Field(None, description="Output directory (default: ~/cudag-projects)")
31
+ num_samples: int = Field(1000, description="Number of samples per task")
32
+ generate_immediately: bool = Field(True, description="Run generation after scaffolding")
33
+
34
+
35
+ class GenerateRequest(BaseModel):
36
+ """Request body for generate endpoint."""
37
+
38
+ annotation: dict = Field(..., description="Full annotation.json data")
39
+ original_image: str = Field(..., description="Base64 encoded original image")
40
+ masked_image: str | None = Field(None, description="Base64 encoded masked image")
41
+ icons: dict[str, str] | None = Field(None, description="Map of icon names to base64 images")
42
+ options: GenerateOptions
43
+
44
+
45
+ class GenerateResponse(BaseModel):
46
+ """Response from generate endpoint."""
47
+
48
+ status: str
49
+ project_path: str | None = None
50
+ files_created: list[str] | None = None
51
+ job_id: str | None = None
52
+ error: str | None = None
53
+
54
+
55
+ class StatusResponse(BaseModel):
56
+ """Response from status endpoint."""
57
+
58
+ progress: int
59
+ total: int
60
+ current_task: str | None = None
61
+ done: bool
62
+ error: str | None = None
63
+
64
+
65
+ class HealthResponse(BaseModel):
66
+ """Response from health endpoint."""
67
+
68
+ status: str
69
+ version: str
70
+
71
+
72
+ @asynccontextmanager
73
+ async def lifespan(app: FastAPI):
74
+ """Manage application lifespan."""
75
+ # Startup
76
+ yield
77
+ # Shutdown - cleanup jobs
78
+ _jobs.clear()
79
+
80
+
81
+ def create_app() -> FastAPI:
82
+ """Create and configure the FastAPI application."""
83
+ app = FastAPI(
84
+ title="CUDAG Server",
85
+ description="Generate CUDAG projects from annotations",
86
+ version=__version__,
87
+ lifespan=lifespan,
88
+ )
89
+
90
+ # Configure CORS for local development
91
+ app.add_middleware(
92
+ CORSMiddleware,
93
+ allow_origins=["http://localhost:3000", "http://127.0.0.1:3000"],
94
+ allow_credentials=True,
95
+ allow_methods=["*"],
96
+ allow_headers=["*"],
97
+ )
98
+
99
+ @app.get("/health", response_model=HealthResponse)
100
+ async def health_check() -> HealthResponse:
101
+ """Check server health."""
102
+ return HealthResponse(status="healthy", version=__version__)
103
+
104
+ @app.post("/api/v1/generate", response_model=GenerateResponse)
105
+ async def generate_project(
106
+ request: GenerateRequest,
107
+ background_tasks: BackgroundTasks,
108
+ ) -> GenerateResponse:
109
+ """Generate a CUDAG project from annotation data."""
110
+ try:
111
+ service = GeneratorService()
112
+
113
+ # Validate annotation
114
+ validation_error = service.validate_annotation(request.annotation)
115
+ if validation_error:
116
+ return GenerateResponse(status="error", error=validation_error)
117
+
118
+ # Determine output directory
119
+ output_dir = Path(request.options.output_dir or "~/cudag-projects").expanduser()
120
+ project_dir = output_dir / request.options.project_name
121
+
122
+ # Scaffold the project
123
+ files_created = service.scaffold_project(
124
+ annotation=request.annotation,
125
+ original_image=request.original_image,
126
+ masked_image=request.masked_image,
127
+ icons=request.icons,
128
+ project_dir=project_dir,
129
+ )
130
+
131
+ # If generate_immediately, run generation in background
132
+ if request.options.generate_immediately:
133
+ job_id = str(uuid.uuid4())
134
+ _jobs[job_id] = {
135
+ "progress": 0,
136
+ "total": request.options.num_samples,
137
+ "current_task": "Starting generation...",
138
+ "done": False,
139
+ "error": None,
140
+ "project_dir": str(project_dir),
141
+ }
142
+
143
+ background_tasks.add_task(
144
+ _run_generation,
145
+ job_id=job_id,
146
+ project_dir=project_dir,
147
+ num_samples=request.options.num_samples,
148
+ )
149
+
150
+ return GenerateResponse(
151
+ status="generating",
152
+ project_path=str(project_dir),
153
+ files_created=files_created,
154
+ job_id=job_id,
155
+ )
156
+
157
+ return GenerateResponse(
158
+ status="success",
159
+ project_path=str(project_dir),
160
+ files_created=files_created,
161
+ )
162
+
163
+ except Exception as e:
164
+ return GenerateResponse(status="error", error=str(e))
165
+
166
+ @app.get("/api/v1/status/{job_id}", response_model=StatusResponse)
167
+ async def get_status(job_id: str) -> StatusResponse:
168
+ """Get the status of a generation job."""
169
+ if job_id not in _jobs:
170
+ raise HTTPException(status_code=404, detail="Job not found")
171
+
172
+ job = _jobs[job_id]
173
+ return StatusResponse(
174
+ progress=job["progress"],
175
+ total=job["total"],
176
+ current_task=job.get("current_task"),
177
+ done=job["done"],
178
+ error=job.get("error"),
179
+ )
180
+
181
+ return app
182
+
183
+
184
+ async def _run_generation(
185
+ job_id: str,
186
+ project_dir: Path,
187
+ num_samples: int,
188
+ ) -> None:
189
+ """Run dataset generation in background."""
190
+ try:
191
+ service = GeneratorService()
192
+
193
+ # Update progress callback
194
+ def on_progress(progress: int, task: str) -> None:
195
+ if job_id in _jobs:
196
+ _jobs[job_id]["progress"] = progress
197
+ _jobs[job_id]["current_task"] = task
198
+
199
+ await asyncio.to_thread(
200
+ service.run_generation,
201
+ project_dir=project_dir,
202
+ num_samples=num_samples,
203
+ progress_callback=on_progress,
204
+ )
205
+
206
+ if job_id in _jobs:
207
+ _jobs[job_id]["done"] = True
208
+ _jobs[job_id]["current_task"] = "Generation complete"
209
+
210
+ except Exception as e:
211
+ if job_id in _jobs:
212
+ _jobs[job_id]["done"] = True
213
+ _jobs[job_id]["error"] = str(e)
214
+
215
+
216
+ def run_server(host: str = "127.0.0.1", port: int = 8420, reload: bool = False) -> None:
217
+ """Run the CUDAG server.
218
+
219
+ Args:
220
+ host: Host to bind to
221
+ port: Port to listen on
222
+ reload: Enable auto-reload for development
223
+ """
224
+ import uvicorn
225
+
226
+ uvicorn.run(
227
+ "cudag.server.app:create_app",
228
+ host=host,
229
+ port=port,
230
+ reload=reload,
231
+ factory=True,
232
+ )
@@ -0,0 +1,9 @@
1
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
2
+ # CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
3
+ # is strictly prohibited. For licensing inquiries: hello@claimhawk.app
4
+
5
+ """Server services for CUDAG."""
6
+
7
+ from cudag.server.services.generator import GeneratorService
8
+
9
+ __all__ = ["GeneratorService"]
@@ -0,0 +1,128 @@
1
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
2
+ # CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
3
+ # is strictly prohibited. For licensing inquiries: hello@claimhawk.app
4
+
5
+ """Generator service for creating CUDAG projects from annotations."""
6
+
7
+ from __future__ import annotations
8
+
9
+ import base64
10
+ import re
11
+ import subprocess
12
+ import sys
13
+ from pathlib import Path
14
+ from typing import Any, Callable
15
+
16
+ from cudag.annotation.loader import AnnotationLoader, ParsedAnnotation
17
+ from cudag.annotation.scaffold import scaffold_generator
18
+
19
+
20
+ class GeneratorService:
21
+ """Service for generating CUDAG projects from annotation data."""
22
+
23
+ def __init__(self) -> None:
24
+ self.loader = AnnotationLoader()
25
+
26
+ def validate_annotation(self, annotation: dict[str, Any]) -> str | None:
27
+ """Validate annotation data.
28
+
29
+ Args:
30
+ annotation: Raw annotation dictionary
31
+
32
+ Returns:
33
+ Error message if invalid, None if valid
34
+ """
35
+ required_fields = ["screenName", "imageSize", "elements"]
36
+ for field in required_fields:
37
+ if field not in annotation:
38
+ return f"Missing required field: {field}"
39
+
40
+ if not isinstance(annotation["elements"], list):
41
+ return "elements must be a list"
42
+
43
+ if not isinstance(annotation["imageSize"], list) or len(annotation["imageSize"]) != 2:
44
+ return "imageSize must be a [width, height] array"
45
+
46
+ return None
47
+
48
+ def scaffold_project(
49
+ self,
50
+ annotation: dict[str, Any],
51
+ original_image: str,
52
+ masked_image: str | None,
53
+ icons: dict[str, str] | None,
54
+ project_dir: Path,
55
+ ) -> list[str]:
56
+ """Scaffold a CUDAG project from annotation data.
57
+
58
+ Args:
59
+ annotation: Full annotation.json data
60
+ original_image: Base64 encoded original image
61
+ masked_image: Base64 encoded masked image (optional)
62
+ icons: Map of icon names to base64 images (optional)
63
+ project_dir: Directory to create project in
64
+
65
+ Returns:
66
+ List of created file paths (relative to project_dir)
67
+ """
68
+ # Parse annotation
69
+ parsed = self.loader.parse_dict(annotation)
70
+
71
+ # Decode images
72
+ original_bytes = base64.b64decode(original_image)
73
+ masked_bytes = base64.b64decode(masked_image) if masked_image else None
74
+ icon_bytes = {
75
+ name: base64.b64decode(data)
76
+ for name, data in (icons or {}).items()
77
+ }
78
+
79
+ # Scaffold the project
80
+ created_files = scaffold_generator(
81
+ name=parsed.screen_name,
82
+ annotation=parsed,
83
+ output_dir=project_dir.parent,
84
+ original_image=original_bytes,
85
+ masked_image=masked_bytes,
86
+ icons=icon_bytes,
87
+ )
88
+
89
+ return [str(f.relative_to(project_dir)) for f in created_files]
90
+
91
+ def run_generation(
92
+ self,
93
+ project_dir: Path,
94
+ num_samples: int,
95
+ progress_callback: Callable[[int, str], None] | None = None,
96
+ ) -> None:
97
+ """Run dataset generation for a scaffolded project.
98
+
99
+ Args:
100
+ project_dir: Path to the project directory
101
+ num_samples: Number of samples to generate per task
102
+ progress_callback: Callback for progress updates (progress, task_name)
103
+ """
104
+ generator_script = project_dir / "generator.py"
105
+ if not generator_script.exists():
106
+ raise FileNotFoundError(f"Generator script not found: {generator_script}")
107
+
108
+ # Run the generator
109
+ if progress_callback:
110
+ progress_callback(0, "Starting generator...")
111
+
112
+ result = subprocess.run(
113
+ [
114
+ sys.executable,
115
+ str(generator_script),
116
+ "--samples",
117
+ str(num_samples),
118
+ ],
119
+ cwd=project_dir,
120
+ capture_output=True,
121
+ text=True,
122
+ )
123
+
124
+ if result.returncode != 0:
125
+ raise RuntimeError(f"Generation failed: {result.stderr}")
126
+
127
+ if progress_callback:
128
+ progress_callback(num_samples, "Generation complete")
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env bash
2
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
3
+ # Derivative works may be released by researchers,
4
+ # but original files may not be redistributed or used beyond research purposes.
5
+ #
6
+ # DO NOT EDIT THIS FILE - Generated by cudag framework
7
+
8
+ # Archive a dataset directory into a tar.gz file
9
+ #
10
+ # Usage:
11
+ # ./scripts/archive.sh [dataset_dir]
12
+
13
+ set -euo pipefail
14
+
15
+ DATASET_DIR="${1:-}"
16
+
17
+ if [[ -z "$DATASET_DIR" ]]; then
18
+ # Find most recent dataset
19
+ DATASET_DIR=$(ls -td datasets/*/ 2>/dev/null | head -1)
20
+ if [[ -z "$DATASET_DIR" ]]; then
21
+ echo "No dataset directory found. Specify path or run generate.sh first."
22
+ exit 1
23
+ fi
24
+ fi
25
+
26
+ DATASET_NAME=$(basename "$DATASET_DIR")
27
+ ARCHIVE_NAME="datasets/${DATASET_NAME}.tar.gz"
28
+
29
+ echo "Archiving: $DATASET_DIR"
30
+ echo "Output: $ARCHIVE_NAME"
31
+
32
+ tar -czvf "$ARCHIVE_NAME" -C "$(dirname "$DATASET_DIR")" "$DATASET_NAME"
33
+
34
+ echo ""
35
+ echo "Archive created: $ARCHIVE_NAME"
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env bash
2
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
3
+ # Derivative works may be released by researchers,
4
+ # but original files may not be redistributed or used beyond research purposes.
5
+ #
6
+ # DO NOT EDIT THIS FILE - Generated by cudag framework
7
+
8
+ set -euo pipefail
9
+
10
+ repo_root="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
11
+ cd "$repo_root"
12
+
13
+ "$repo_root/scripts/pre-commit.sh" --all
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env bash
2
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
3
+ # Derivative works may be released by researchers,
4
+ # but original files may not be redistributed or used beyond research purposes.
5
+ #
6
+ # DO NOT EDIT THIS FILE - Generated by cudag framework
7
+
8
+ # Pipeline: generate.sh -> upload.sh -> extract.sh -> preprocess.sh
9
+ #
10
+ # Usage:
11
+ # ./scripts/extract.sh --dataset-name <NAME>
12
+
13
+ set -euo pipefail
14
+
15
+ DATASET_NAME=""
16
+
17
+ while [[ $# -gt 0 ]]; do
18
+ case "$1" in
19
+ --dataset-name)
20
+ DATASET_NAME="${2:-}"
21
+ shift 2
22
+ ;;
23
+ *)
24
+ shift
25
+ ;;
26
+ esac
27
+ done
28
+
29
+ if [[ -z "$DATASET_NAME" ]]; then
30
+ echo "Error: --dataset-name <NAME> is required"
31
+ exit 1
32
+ fi
33
+
34
+ echo "========================================"
35
+ echo "STAGE 3: Extract Dataset"
36
+ echo "========================================"
37
+ echo ""
38
+ echo "Dataset: $DATASET_NAME"
39
+ echo ""
40
+
41
+ # Find cudag's extract.py location and run via Modal
42
+ CUDAG_PATH=$(uvx --with cudag python -c "import cudag.modal_apps.extract as e; print(e.__file__)")
43
+ uvx modal run "$CUDAG_PATH" --dataset-name "$DATASET_NAME"
44
+
45
+ echo ""
46
+ echo "Extraction complete for: $DATASET_NAME"
47
+
48
+ echo ""
49
+ echo "========================================"
50
+ echo "Auto-starting preprocessing..."
51
+ echo "========================================"
52
+ echo ""
53
+
54
+ exec ./scripts/preprocess.sh --dataset-name "$DATASET_NAME"
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env bash
2
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
3
+ # Derivative works may be released by researchers,
4
+ # but original files may not be redistributed or used beyond research purposes.
5
+ #
6
+ # DO NOT EDIT THIS FILE - Generated by cudag framework
7
+
8
+ # Usage:
9
+ # ./scripts/generate.sh [options] # Generate and auto-upload
10
+ # ./scripts/generate.sh --dry [options] # Generate only, no upload
11
+ # ./scripts/generate.sh --verify # Generate with interactive verification
12
+ # ./scripts/generate.sh --verify --verbose # Verify with streaming output
13
+
14
+ set -euo pipefail
15
+
16
+ DRY_RUN=false
17
+ VERIFY_MODE=false
18
+ VERBOSE=false
19
+ EXTRA_ARGS=()
20
+
21
+ # Parse args - extract --dry, --verify, --verbose, pass everything else through
22
+ for arg in "$@"; do
23
+ if [[ "$arg" == "--dry" ]]; then
24
+ DRY_RUN=true
25
+ elif [[ "$arg" == "--verify" ]]; then
26
+ VERIFY_MODE=true
27
+ DRY_RUN=true # verify implies dry run
28
+ elif [[ "$arg" == "--verbose" ]] || [[ "$arg" == "-v" ]]; then
29
+ VERBOSE=true
30
+ else
31
+ EXTRA_ARGS+=("$arg")
32
+ fi
33
+ done
34
+
35
+ # If verify mode, delegate to verify.py which handles the loop
36
+ if [[ "$VERIFY_MODE" == "true" ]]; then
37
+ echo "========================================"
38
+ echo "Interactive Dataset Verification"
39
+ echo "========================================"
40
+ echo ""
41
+
42
+ # Use prod config by default
43
+ CONFIG_PATH="config/dataset.prod.yaml"
44
+ for i in "${!EXTRA_ARGS[@]}"; do
45
+ if [[ "${EXTRA_ARGS[$i]}" == "--config" ]] && [[ $((i+1)) -lt ${#EXTRA_ARGS[@]} ]]; then
46
+ CONFIG_PATH="${EXTRA_ARGS[$((i+1))]}"
47
+ break
48
+ fi
49
+ done
50
+
51
+ VERIFY_ARGS=(--config "$CONFIG_PATH")
52
+ if [[ "$VERBOSE" == "true" ]]; then
53
+ VERIFY_ARGS+=(--verbose)
54
+ fi
55
+
56
+ exec uv run python scripts/verify.py "${VERIFY_ARGS[@]}"
57
+ fi
58
+
59
+ echo "========================================"
60
+ echo "STAGE 1: Generate Dataset"
61
+ echo "========================================"
62
+ echo ""
63
+
64
+ if [[ "$DRY_RUN" == "true" ]]; then
65
+ echo "[DRY RUN] Will generate but NOT upload"
66
+ echo ""
67
+ fi
68
+
69
+ # Run the dataset generation using uv run (uses pyproject.toml dependencies)
70
+ # Set env var so generator.py knows it was called from this script
71
+ export CUDAG_FROM_SCRIPT=1
72
+
73
+ if [[ ${#EXTRA_ARGS[@]} -gt 0 ]]; then
74
+ uv run python generator.py "${EXTRA_ARGS[@]}"
75
+ else
76
+ uv run python generator.py
77
+ fi
78
+
79
+ if [[ $? -ne 0 ]]; then
80
+ echo ""
81
+ echo "Dataset generation failed"
82
+ exit 1
83
+ fi
84
+
85
+ # Find the most recently created dataset directory
86
+ LATEST_DATASET=$(ls -td datasets/*/ 2>/dev/null | head -1)
87
+
88
+ if [[ -z "$LATEST_DATASET" ]]; then
89
+ echo ""
90
+ echo "No dataset directory found"
91
+ exit 1
92
+ fi
93
+
94
+ DATASET_NAME=$(basename "$LATEST_DATASET")
95
+ echo ""
96
+ echo "Generated dataset: $DATASET_NAME"
97
+
98
+ if [[ "$DRY_RUN" == "true" ]]; then
99
+ echo ""
100
+ echo "[DRY RUN] Skipping upload"
101
+ echo ""
102
+ echo "To verify interactively:"
103
+ echo " ./scripts/generate.sh --verify"
104
+ echo ""
105
+ echo "To upload manually:"
106
+ echo " ./scripts/upload.sh datasets/$DATASET_NAME"
107
+ exit 0
108
+ fi
109
+
110
+ echo ""
111
+ echo "========================================"
112
+ echo "Auto-starting upload..."
113
+ echo "========================================"
114
+ echo ""
115
+
116
+ exec ./scripts/upload.sh "datasets/$DATASET_NAME"
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env bash
2
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
3
+ # Derivative works may be released by researchers,
4
+ # but original files may not be redistributed or used beyond research purposes.
5
+ #
6
+ # DO NOT EDIT THIS FILE - Generated by cudag framework
7
+
8
+ set -euo pipefail
9
+
10
+ mode="${1:-staged}"
11
+
12
+ if [ "$mode" = "--help" ] || [ "$mode" = "-h" ]; then
13
+ echo "Usage: $0 [--all]" >&2
14
+ echo " --all Run checks against all tracked Python files instead of staged ones." >&2
15
+ exit 0
16
+ fi
17
+
18
+ repo_root="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
19
+ cd "$repo_root"
20
+
21
+ if [ "$mode" = "--all" ]; then
22
+ py_targets=$(git ls-files -- '*.py' 2>/dev/null || find . -name '*.py' -type f)
23
+ scope_label="all Python files"
24
+ else
25
+ py_targets=$(git diff --cached --name-only --diff-filter=ACM -- '*.py' 2>/dev/null || true)
26
+ scope_label="staged Python files"
27
+ fi
28
+
29
+ if [ -z "$py_targets" ]; then
30
+ echo "No ${scope_label}. Skipping lint/type checks."
31
+ exit 0
32
+ fi
33
+
34
+ # Use uv run to execute in the project's venv with dev dependencies (Python 3.12)
35
+ UV_RUN="uv run --python 3.12 --extra dev"
36
+
37
+ echo "Running ruff (lexical checks) on ${scope_label}..."
38
+ printf '%s\n' "$py_targets" | xargs $UV_RUN ruff check
39
+
40
+ echo "Running mypy (syntax & types) on ${scope_label}..."
41
+ printf '%s\n' "$py_targets" | xargs $UV_RUN mypy
42
+
43
+ echo ""
44
+ echo "✓ All checks passed!"