cudag 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudag/__init__.py +334 -0
- cudag/annotation/__init__.py +77 -0
- cudag/annotation/codegen.py +648 -0
- cudag/annotation/config.py +545 -0
- cudag/annotation/loader.py +342 -0
- cudag/annotation/scaffold.py +121 -0
- cudag/annotation/transcription.py +296 -0
- cudag/cli/__init__.py +5 -0
- cudag/cli/main.py +315 -0
- cudag/cli/new.py +873 -0
- cudag/core/__init__.py +364 -0
- cudag/core/button.py +137 -0
- cudag/core/canvas.py +222 -0
- cudag/core/config.py +70 -0
- cudag/core/coords.py +233 -0
- cudag/core/data_grid.py +804 -0
- cudag/core/dataset.py +678 -0
- cudag/core/distribution.py +136 -0
- cudag/core/drawing.py +75 -0
- cudag/core/fonts.py +156 -0
- cudag/core/generator.py +163 -0
- cudag/core/grid.py +367 -0
- cudag/core/grounding_task.py +247 -0
- cudag/core/icon.py +207 -0
- cudag/core/iconlist_task.py +301 -0
- cudag/core/models.py +1251 -0
- cudag/core/random.py +130 -0
- cudag/core/renderer.py +190 -0
- cudag/core/screen.py +402 -0
- cudag/core/scroll_task.py +254 -0
- cudag/core/scrollable_grid.py +447 -0
- cudag/core/state.py +110 -0
- cudag/core/task.py +293 -0
- cudag/core/taskbar.py +350 -0
- cudag/core/text.py +212 -0
- cudag/core/utils.py +82 -0
- cudag/data/surnames.txt +5000 -0
- cudag/modal_apps/__init__.py +4 -0
- cudag/modal_apps/archive.py +103 -0
- cudag/modal_apps/extract.py +138 -0
- cudag/modal_apps/preprocess.py +529 -0
- cudag/modal_apps/upload.py +317 -0
- cudag/prompts/SYSTEM_PROMPT.txt +104 -0
- cudag/prompts/__init__.py +33 -0
- cudag/prompts/system.py +43 -0
- cudag/prompts/tools.py +382 -0
- cudag/py.typed +0 -0
- cudag/schemas/filesystem.json +90 -0
- cudag/schemas/test_record.schema.json +113 -0
- cudag/schemas/train_record.schema.json +90 -0
- cudag/server/__init__.py +21 -0
- cudag/server/app.py +232 -0
- cudag/server/services/__init__.py +9 -0
- cudag/server/services/generator.py +128 -0
- cudag/templates/scripts/archive.sh +35 -0
- cudag/templates/scripts/build.sh +13 -0
- cudag/templates/scripts/extract.sh +54 -0
- cudag/templates/scripts/generate.sh +116 -0
- cudag/templates/scripts/pre-commit.sh +44 -0
- cudag/templates/scripts/preprocess.sh +46 -0
- cudag/templates/scripts/upload.sh +63 -0
- cudag/templates/scripts/verify.py +428 -0
- cudag/validation/__init__.py +35 -0
- cudag/validation/validate.py +508 -0
- cudag-0.3.10.dist-info/METADATA +570 -0
- cudag-0.3.10.dist-info/RECORD +69 -0
- cudag-0.3.10.dist-info/WHEEL +4 -0
- cudag-0.3.10.dist-info/entry_points.txt +2 -0
- cudag-0.3.10.dist-info/licenses/LICENSE +66 -0
cudag/server/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
2
|
+
# CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
|
|
3
|
+
# is strictly prohibited. For licensing inquiries: hello@claimhawk.app
|
|
4
|
+
|
|
5
|
+
"""CUDAG Server - FastAPI server for annotation-to-generator workflow.
|
|
6
|
+
|
|
7
|
+
This module provides a local HTTP server that the Annotator UI can call
|
|
8
|
+
to generate CUDAG projects from annotations without using the terminal.
|
|
9
|
+
|
|
10
|
+
Start the server:
|
|
11
|
+
cudag serve --port 8420
|
|
12
|
+
|
|
13
|
+
The server exposes:
|
|
14
|
+
GET /health - Health check
|
|
15
|
+
POST /api/v1/generate - Generate project from annotation
|
|
16
|
+
GET /api/v1/status/{job_id} - Check generation progress
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from cudag.server.app import create_app, run_server
|
|
20
|
+
|
|
21
|
+
__all__ = ["create_app", "run_server"]
|
cudag/server/app.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
2
|
+
# CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
|
|
3
|
+
# is strictly prohibited. For licensing inquiries: hello@claimhawk.app
|
|
4
|
+
|
|
5
|
+
"""FastAPI application for CUDAG server."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import uuid
|
|
11
|
+
from contextlib import asynccontextmanager
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
|
16
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
17
|
+
from pydantic import BaseModel, Field
|
|
18
|
+
|
|
19
|
+
from cudag import __version__
|
|
20
|
+
from cudag.server.services.generator import GeneratorService
|
|
21
|
+
|
|
22
|
+
# In-memory job storage (for MVP - would use Redis/DB in production)
|
|
23
|
+
_jobs: dict[str, dict[str, Any]] = {}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class GenerateOptions(BaseModel):
|
|
27
|
+
"""Options for project generation."""
|
|
28
|
+
|
|
29
|
+
project_name: str = Field(..., description="Name for the generated project")
|
|
30
|
+
output_dir: str | None = Field(None, description="Output directory (default: ~/cudag-projects)")
|
|
31
|
+
num_samples: int = Field(1000, description="Number of samples per task")
|
|
32
|
+
generate_immediately: bool = Field(True, description="Run generation after scaffolding")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class GenerateRequest(BaseModel):
|
|
36
|
+
"""Request body for generate endpoint."""
|
|
37
|
+
|
|
38
|
+
annotation: dict = Field(..., description="Full annotation.json data")
|
|
39
|
+
original_image: str = Field(..., description="Base64 encoded original image")
|
|
40
|
+
masked_image: str | None = Field(None, description="Base64 encoded masked image")
|
|
41
|
+
icons: dict[str, str] | None = Field(None, description="Map of icon names to base64 images")
|
|
42
|
+
options: GenerateOptions
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class GenerateResponse(BaseModel):
|
|
46
|
+
"""Response from generate endpoint."""
|
|
47
|
+
|
|
48
|
+
status: str
|
|
49
|
+
project_path: str | None = None
|
|
50
|
+
files_created: list[str] | None = None
|
|
51
|
+
job_id: str | None = None
|
|
52
|
+
error: str | None = None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class StatusResponse(BaseModel):
|
|
56
|
+
"""Response from status endpoint."""
|
|
57
|
+
|
|
58
|
+
progress: int
|
|
59
|
+
total: int
|
|
60
|
+
current_task: str | None = None
|
|
61
|
+
done: bool
|
|
62
|
+
error: str | None = None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class HealthResponse(BaseModel):
|
|
66
|
+
"""Response from health endpoint."""
|
|
67
|
+
|
|
68
|
+
status: str
|
|
69
|
+
version: str
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@asynccontextmanager
|
|
73
|
+
async def lifespan(app: FastAPI):
|
|
74
|
+
"""Manage application lifespan."""
|
|
75
|
+
# Startup
|
|
76
|
+
yield
|
|
77
|
+
# Shutdown - cleanup jobs
|
|
78
|
+
_jobs.clear()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def create_app() -> FastAPI:
|
|
82
|
+
"""Create and configure the FastAPI application."""
|
|
83
|
+
app = FastAPI(
|
|
84
|
+
title="CUDAG Server",
|
|
85
|
+
description="Generate CUDAG projects from annotations",
|
|
86
|
+
version=__version__,
|
|
87
|
+
lifespan=lifespan,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Configure CORS for local development
|
|
91
|
+
app.add_middleware(
|
|
92
|
+
CORSMiddleware,
|
|
93
|
+
allow_origins=["http://localhost:3000", "http://127.0.0.1:3000"],
|
|
94
|
+
allow_credentials=True,
|
|
95
|
+
allow_methods=["*"],
|
|
96
|
+
allow_headers=["*"],
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
@app.get("/health", response_model=HealthResponse)
|
|
100
|
+
async def health_check() -> HealthResponse:
|
|
101
|
+
"""Check server health."""
|
|
102
|
+
return HealthResponse(status="healthy", version=__version__)
|
|
103
|
+
|
|
104
|
+
@app.post("/api/v1/generate", response_model=GenerateResponse)
|
|
105
|
+
async def generate_project(
|
|
106
|
+
request: GenerateRequest,
|
|
107
|
+
background_tasks: BackgroundTasks,
|
|
108
|
+
) -> GenerateResponse:
|
|
109
|
+
"""Generate a CUDAG project from annotation data."""
|
|
110
|
+
try:
|
|
111
|
+
service = GeneratorService()
|
|
112
|
+
|
|
113
|
+
# Validate annotation
|
|
114
|
+
validation_error = service.validate_annotation(request.annotation)
|
|
115
|
+
if validation_error:
|
|
116
|
+
return GenerateResponse(status="error", error=validation_error)
|
|
117
|
+
|
|
118
|
+
# Determine output directory
|
|
119
|
+
output_dir = Path(request.options.output_dir or "~/cudag-projects").expanduser()
|
|
120
|
+
project_dir = output_dir / request.options.project_name
|
|
121
|
+
|
|
122
|
+
# Scaffold the project
|
|
123
|
+
files_created = service.scaffold_project(
|
|
124
|
+
annotation=request.annotation,
|
|
125
|
+
original_image=request.original_image,
|
|
126
|
+
masked_image=request.masked_image,
|
|
127
|
+
icons=request.icons,
|
|
128
|
+
project_dir=project_dir,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# If generate_immediately, run generation in background
|
|
132
|
+
if request.options.generate_immediately:
|
|
133
|
+
job_id = str(uuid.uuid4())
|
|
134
|
+
_jobs[job_id] = {
|
|
135
|
+
"progress": 0,
|
|
136
|
+
"total": request.options.num_samples,
|
|
137
|
+
"current_task": "Starting generation...",
|
|
138
|
+
"done": False,
|
|
139
|
+
"error": None,
|
|
140
|
+
"project_dir": str(project_dir),
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
background_tasks.add_task(
|
|
144
|
+
_run_generation,
|
|
145
|
+
job_id=job_id,
|
|
146
|
+
project_dir=project_dir,
|
|
147
|
+
num_samples=request.options.num_samples,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return GenerateResponse(
|
|
151
|
+
status="generating",
|
|
152
|
+
project_path=str(project_dir),
|
|
153
|
+
files_created=files_created,
|
|
154
|
+
job_id=job_id,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
return GenerateResponse(
|
|
158
|
+
status="success",
|
|
159
|
+
project_path=str(project_dir),
|
|
160
|
+
files_created=files_created,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
except Exception as e:
|
|
164
|
+
return GenerateResponse(status="error", error=str(e))
|
|
165
|
+
|
|
166
|
+
@app.get("/api/v1/status/{job_id}", response_model=StatusResponse)
|
|
167
|
+
async def get_status(job_id: str) -> StatusResponse:
|
|
168
|
+
"""Get the status of a generation job."""
|
|
169
|
+
if job_id not in _jobs:
|
|
170
|
+
raise HTTPException(status_code=404, detail="Job not found")
|
|
171
|
+
|
|
172
|
+
job = _jobs[job_id]
|
|
173
|
+
return StatusResponse(
|
|
174
|
+
progress=job["progress"],
|
|
175
|
+
total=job["total"],
|
|
176
|
+
current_task=job.get("current_task"),
|
|
177
|
+
done=job["done"],
|
|
178
|
+
error=job.get("error"),
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return app
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
async def _run_generation(
|
|
185
|
+
job_id: str,
|
|
186
|
+
project_dir: Path,
|
|
187
|
+
num_samples: int,
|
|
188
|
+
) -> None:
|
|
189
|
+
"""Run dataset generation in background."""
|
|
190
|
+
try:
|
|
191
|
+
service = GeneratorService()
|
|
192
|
+
|
|
193
|
+
# Update progress callback
|
|
194
|
+
def on_progress(progress: int, task: str) -> None:
|
|
195
|
+
if job_id in _jobs:
|
|
196
|
+
_jobs[job_id]["progress"] = progress
|
|
197
|
+
_jobs[job_id]["current_task"] = task
|
|
198
|
+
|
|
199
|
+
await asyncio.to_thread(
|
|
200
|
+
service.run_generation,
|
|
201
|
+
project_dir=project_dir,
|
|
202
|
+
num_samples=num_samples,
|
|
203
|
+
progress_callback=on_progress,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
if job_id in _jobs:
|
|
207
|
+
_jobs[job_id]["done"] = True
|
|
208
|
+
_jobs[job_id]["current_task"] = "Generation complete"
|
|
209
|
+
|
|
210
|
+
except Exception as e:
|
|
211
|
+
if job_id in _jobs:
|
|
212
|
+
_jobs[job_id]["done"] = True
|
|
213
|
+
_jobs[job_id]["error"] = str(e)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def run_server(host: str = "127.0.0.1", port: int = 8420, reload: bool = False) -> None:
|
|
217
|
+
"""Run the CUDAG server.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
host: Host to bind to
|
|
221
|
+
port: Port to listen on
|
|
222
|
+
reload: Enable auto-reload for development
|
|
223
|
+
"""
|
|
224
|
+
import uvicorn
|
|
225
|
+
|
|
226
|
+
uvicorn.run(
|
|
227
|
+
"cudag.server.app:create_app",
|
|
228
|
+
host=host,
|
|
229
|
+
port=port,
|
|
230
|
+
reload=reload,
|
|
231
|
+
factory=True,
|
|
232
|
+
)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
2
|
+
# CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
|
|
3
|
+
# is strictly prohibited. For licensing inquiries: hello@claimhawk.app
|
|
4
|
+
|
|
5
|
+
"""Server services for CUDAG."""
|
|
6
|
+
|
|
7
|
+
from cudag.server.services.generator import GeneratorService
|
|
8
|
+
|
|
9
|
+
__all__ = ["GeneratorService"]
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
2
|
+
# CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
|
|
3
|
+
# is strictly prohibited. For licensing inquiries: hello@claimhawk.app
|
|
4
|
+
|
|
5
|
+
"""Generator service for creating CUDAG projects from annotations."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import base64
|
|
10
|
+
import re
|
|
11
|
+
import subprocess
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Callable
|
|
15
|
+
|
|
16
|
+
from cudag.annotation.loader import AnnotationLoader, ParsedAnnotation
|
|
17
|
+
from cudag.annotation.scaffold import scaffold_generator
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class GeneratorService:
|
|
21
|
+
"""Service for generating CUDAG projects from annotation data."""
|
|
22
|
+
|
|
23
|
+
def __init__(self) -> None:
|
|
24
|
+
self.loader = AnnotationLoader()
|
|
25
|
+
|
|
26
|
+
def validate_annotation(self, annotation: dict[str, Any]) -> str | None:
|
|
27
|
+
"""Validate annotation data.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
annotation: Raw annotation dictionary
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Error message if invalid, None if valid
|
|
34
|
+
"""
|
|
35
|
+
required_fields = ["screenName", "imageSize", "elements"]
|
|
36
|
+
for field in required_fields:
|
|
37
|
+
if field not in annotation:
|
|
38
|
+
return f"Missing required field: {field}"
|
|
39
|
+
|
|
40
|
+
if not isinstance(annotation["elements"], list):
|
|
41
|
+
return "elements must be a list"
|
|
42
|
+
|
|
43
|
+
if not isinstance(annotation["imageSize"], list) or len(annotation["imageSize"]) != 2:
|
|
44
|
+
return "imageSize must be a [width, height] array"
|
|
45
|
+
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
def scaffold_project(
|
|
49
|
+
self,
|
|
50
|
+
annotation: dict[str, Any],
|
|
51
|
+
original_image: str,
|
|
52
|
+
masked_image: str | None,
|
|
53
|
+
icons: dict[str, str] | None,
|
|
54
|
+
project_dir: Path,
|
|
55
|
+
) -> list[str]:
|
|
56
|
+
"""Scaffold a CUDAG project from annotation data.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
annotation: Full annotation.json data
|
|
60
|
+
original_image: Base64 encoded original image
|
|
61
|
+
masked_image: Base64 encoded masked image (optional)
|
|
62
|
+
icons: Map of icon names to base64 images (optional)
|
|
63
|
+
project_dir: Directory to create project in
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
List of created file paths (relative to project_dir)
|
|
67
|
+
"""
|
|
68
|
+
# Parse annotation
|
|
69
|
+
parsed = self.loader.parse_dict(annotation)
|
|
70
|
+
|
|
71
|
+
# Decode images
|
|
72
|
+
original_bytes = base64.b64decode(original_image)
|
|
73
|
+
masked_bytes = base64.b64decode(masked_image) if masked_image else None
|
|
74
|
+
icon_bytes = {
|
|
75
|
+
name: base64.b64decode(data)
|
|
76
|
+
for name, data in (icons or {}).items()
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
# Scaffold the project
|
|
80
|
+
created_files = scaffold_generator(
|
|
81
|
+
name=parsed.screen_name,
|
|
82
|
+
annotation=parsed,
|
|
83
|
+
output_dir=project_dir.parent,
|
|
84
|
+
original_image=original_bytes,
|
|
85
|
+
masked_image=masked_bytes,
|
|
86
|
+
icons=icon_bytes,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return [str(f.relative_to(project_dir)) for f in created_files]
|
|
90
|
+
|
|
91
|
+
def run_generation(
|
|
92
|
+
self,
|
|
93
|
+
project_dir: Path,
|
|
94
|
+
num_samples: int,
|
|
95
|
+
progress_callback: Callable[[int, str], None] | None = None,
|
|
96
|
+
) -> None:
|
|
97
|
+
"""Run dataset generation for a scaffolded project.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
project_dir: Path to the project directory
|
|
101
|
+
num_samples: Number of samples to generate per task
|
|
102
|
+
progress_callback: Callback for progress updates (progress, task_name)
|
|
103
|
+
"""
|
|
104
|
+
generator_script = project_dir / "generator.py"
|
|
105
|
+
if not generator_script.exists():
|
|
106
|
+
raise FileNotFoundError(f"Generator script not found: {generator_script}")
|
|
107
|
+
|
|
108
|
+
# Run the generator
|
|
109
|
+
if progress_callback:
|
|
110
|
+
progress_callback(0, "Starting generator...")
|
|
111
|
+
|
|
112
|
+
result = subprocess.run(
|
|
113
|
+
[
|
|
114
|
+
sys.executable,
|
|
115
|
+
str(generator_script),
|
|
116
|
+
"--samples",
|
|
117
|
+
str(num_samples),
|
|
118
|
+
],
|
|
119
|
+
cwd=project_dir,
|
|
120
|
+
capture_output=True,
|
|
121
|
+
text=True,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
if result.returncode != 0:
|
|
125
|
+
raise RuntimeError(f"Generation failed: {result.stderr}")
|
|
126
|
+
|
|
127
|
+
if progress_callback:
|
|
128
|
+
progress_callback(num_samples, "Generation complete")
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
3
|
+
# Derivative works may be released by researchers,
|
|
4
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
5
|
+
#
|
|
6
|
+
# DO NOT EDIT THIS FILE - Generated by cudag framework
|
|
7
|
+
|
|
8
|
+
# Archive a dataset directory into a tar.gz file
|
|
9
|
+
#
|
|
10
|
+
# Usage:
|
|
11
|
+
# ./scripts/archive.sh [dataset_dir]
|
|
12
|
+
|
|
13
|
+
set -euo pipefail
|
|
14
|
+
|
|
15
|
+
DATASET_DIR="${1:-}"
|
|
16
|
+
|
|
17
|
+
if [[ -z "$DATASET_DIR" ]]; then
|
|
18
|
+
# Find most recent dataset
|
|
19
|
+
DATASET_DIR=$(ls -td datasets/*/ 2>/dev/null | head -1)
|
|
20
|
+
if [[ -z "$DATASET_DIR" ]]; then
|
|
21
|
+
echo "No dataset directory found. Specify path or run generate.sh first."
|
|
22
|
+
exit 1
|
|
23
|
+
fi
|
|
24
|
+
fi
|
|
25
|
+
|
|
26
|
+
DATASET_NAME=$(basename "$DATASET_DIR")
|
|
27
|
+
ARCHIVE_NAME="datasets/${DATASET_NAME}.tar.gz"
|
|
28
|
+
|
|
29
|
+
echo "Archiving: $DATASET_DIR"
|
|
30
|
+
echo "Output: $ARCHIVE_NAME"
|
|
31
|
+
|
|
32
|
+
tar -czvf "$ARCHIVE_NAME" -C "$(dirname "$DATASET_DIR")" "$DATASET_NAME"
|
|
33
|
+
|
|
34
|
+
echo ""
|
|
35
|
+
echo "Archive created: $ARCHIVE_NAME"
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
3
|
+
# Derivative works may be released by researchers,
|
|
4
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
5
|
+
#
|
|
6
|
+
# DO NOT EDIT THIS FILE - Generated by cudag framework
|
|
7
|
+
|
|
8
|
+
set -euo pipefail
|
|
9
|
+
|
|
10
|
+
repo_root="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
|
|
11
|
+
cd "$repo_root"
|
|
12
|
+
|
|
13
|
+
"$repo_root/scripts/pre-commit.sh" --all
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
3
|
+
# Derivative works may be released by researchers,
|
|
4
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
5
|
+
#
|
|
6
|
+
# DO NOT EDIT THIS FILE - Generated by cudag framework
|
|
7
|
+
|
|
8
|
+
# Pipeline: generate.sh -> upload.sh -> extract.sh -> preprocess.sh
|
|
9
|
+
#
|
|
10
|
+
# Usage:
|
|
11
|
+
# ./scripts/extract.sh --dataset-name <NAME>
|
|
12
|
+
|
|
13
|
+
set -euo pipefail
|
|
14
|
+
|
|
15
|
+
DATASET_NAME=""
|
|
16
|
+
|
|
17
|
+
while [[ $# -gt 0 ]]; do
|
|
18
|
+
case "$1" in
|
|
19
|
+
--dataset-name)
|
|
20
|
+
DATASET_NAME="${2:-}"
|
|
21
|
+
shift 2
|
|
22
|
+
;;
|
|
23
|
+
*)
|
|
24
|
+
shift
|
|
25
|
+
;;
|
|
26
|
+
esac
|
|
27
|
+
done
|
|
28
|
+
|
|
29
|
+
if [[ -z "$DATASET_NAME" ]]; then
|
|
30
|
+
echo "Error: --dataset-name <NAME> is required"
|
|
31
|
+
exit 1
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
echo "========================================"
|
|
35
|
+
echo "STAGE 3: Extract Dataset"
|
|
36
|
+
echo "========================================"
|
|
37
|
+
echo ""
|
|
38
|
+
echo "Dataset: $DATASET_NAME"
|
|
39
|
+
echo ""
|
|
40
|
+
|
|
41
|
+
# Find cudag's extract.py location and run via Modal
|
|
42
|
+
CUDAG_PATH=$(uvx --with cudag python -c "import cudag.modal_apps.extract as e; print(e.__file__)")
|
|
43
|
+
uvx modal run "$CUDAG_PATH" --dataset-name "$DATASET_NAME"
|
|
44
|
+
|
|
45
|
+
echo ""
|
|
46
|
+
echo "Extraction complete for: $DATASET_NAME"
|
|
47
|
+
|
|
48
|
+
echo ""
|
|
49
|
+
echo "========================================"
|
|
50
|
+
echo "Auto-starting preprocessing..."
|
|
51
|
+
echo "========================================"
|
|
52
|
+
echo ""
|
|
53
|
+
|
|
54
|
+
exec ./scripts/preprocess.sh --dataset-name "$DATASET_NAME"
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
3
|
+
# Derivative works may be released by researchers,
|
|
4
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
5
|
+
#
|
|
6
|
+
# DO NOT EDIT THIS FILE - Generated by cudag framework
|
|
7
|
+
|
|
8
|
+
# Usage:
|
|
9
|
+
# ./scripts/generate.sh [options] # Generate and auto-upload
|
|
10
|
+
# ./scripts/generate.sh --dry [options] # Generate only, no upload
|
|
11
|
+
# ./scripts/generate.sh --verify # Generate with interactive verification
|
|
12
|
+
# ./scripts/generate.sh --verify --verbose # Verify with streaming output
|
|
13
|
+
|
|
14
|
+
set -euo pipefail
|
|
15
|
+
|
|
16
|
+
DRY_RUN=false
|
|
17
|
+
VERIFY_MODE=false
|
|
18
|
+
VERBOSE=false
|
|
19
|
+
EXTRA_ARGS=()
|
|
20
|
+
|
|
21
|
+
# Parse args - extract --dry, --verify, --verbose, pass everything else through
|
|
22
|
+
for arg in "$@"; do
|
|
23
|
+
if [[ "$arg" == "--dry" ]]; then
|
|
24
|
+
DRY_RUN=true
|
|
25
|
+
elif [[ "$arg" == "--verify" ]]; then
|
|
26
|
+
VERIFY_MODE=true
|
|
27
|
+
DRY_RUN=true # verify implies dry run
|
|
28
|
+
elif [[ "$arg" == "--verbose" ]] || [[ "$arg" == "-v" ]]; then
|
|
29
|
+
VERBOSE=true
|
|
30
|
+
else
|
|
31
|
+
EXTRA_ARGS+=("$arg")
|
|
32
|
+
fi
|
|
33
|
+
done
|
|
34
|
+
|
|
35
|
+
# If verify mode, delegate to verify.py which handles the loop
|
|
36
|
+
if [[ "$VERIFY_MODE" == "true" ]]; then
|
|
37
|
+
echo "========================================"
|
|
38
|
+
echo "Interactive Dataset Verification"
|
|
39
|
+
echo "========================================"
|
|
40
|
+
echo ""
|
|
41
|
+
|
|
42
|
+
# Use prod config by default
|
|
43
|
+
CONFIG_PATH="config/dataset.prod.yaml"
|
|
44
|
+
for i in "${!EXTRA_ARGS[@]}"; do
|
|
45
|
+
if [[ "${EXTRA_ARGS[$i]}" == "--config" ]] && [[ $((i+1)) -lt ${#EXTRA_ARGS[@]} ]]; then
|
|
46
|
+
CONFIG_PATH="${EXTRA_ARGS[$((i+1))]}"
|
|
47
|
+
break
|
|
48
|
+
fi
|
|
49
|
+
done
|
|
50
|
+
|
|
51
|
+
VERIFY_ARGS=(--config "$CONFIG_PATH")
|
|
52
|
+
if [[ "$VERBOSE" == "true" ]]; then
|
|
53
|
+
VERIFY_ARGS+=(--verbose)
|
|
54
|
+
fi
|
|
55
|
+
|
|
56
|
+
exec uv run python scripts/verify.py "${VERIFY_ARGS[@]}"
|
|
57
|
+
fi
|
|
58
|
+
|
|
59
|
+
echo "========================================"
|
|
60
|
+
echo "STAGE 1: Generate Dataset"
|
|
61
|
+
echo "========================================"
|
|
62
|
+
echo ""
|
|
63
|
+
|
|
64
|
+
if [[ "$DRY_RUN" == "true" ]]; then
|
|
65
|
+
echo "[DRY RUN] Will generate but NOT upload"
|
|
66
|
+
echo ""
|
|
67
|
+
fi
|
|
68
|
+
|
|
69
|
+
# Run the dataset generation using uv run (uses pyproject.toml dependencies)
|
|
70
|
+
# Set env var so generator.py knows it was called from this script
|
|
71
|
+
export CUDAG_FROM_SCRIPT=1
|
|
72
|
+
|
|
73
|
+
if [[ ${#EXTRA_ARGS[@]} -gt 0 ]]; then
|
|
74
|
+
uv run python generator.py "${EXTRA_ARGS[@]}"
|
|
75
|
+
else
|
|
76
|
+
uv run python generator.py
|
|
77
|
+
fi
|
|
78
|
+
|
|
79
|
+
if [[ $? -ne 0 ]]; then
|
|
80
|
+
echo ""
|
|
81
|
+
echo "Dataset generation failed"
|
|
82
|
+
exit 1
|
|
83
|
+
fi
|
|
84
|
+
|
|
85
|
+
# Find the most recently created dataset directory
|
|
86
|
+
LATEST_DATASET=$(ls -td datasets/*/ 2>/dev/null | head -1)
|
|
87
|
+
|
|
88
|
+
if [[ -z "$LATEST_DATASET" ]]; then
|
|
89
|
+
echo ""
|
|
90
|
+
echo "No dataset directory found"
|
|
91
|
+
exit 1
|
|
92
|
+
fi
|
|
93
|
+
|
|
94
|
+
DATASET_NAME=$(basename "$LATEST_DATASET")
|
|
95
|
+
echo ""
|
|
96
|
+
echo "Generated dataset: $DATASET_NAME"
|
|
97
|
+
|
|
98
|
+
if [[ "$DRY_RUN" == "true" ]]; then
|
|
99
|
+
echo ""
|
|
100
|
+
echo "[DRY RUN] Skipping upload"
|
|
101
|
+
echo ""
|
|
102
|
+
echo "To verify interactively:"
|
|
103
|
+
echo " ./scripts/generate.sh --verify"
|
|
104
|
+
echo ""
|
|
105
|
+
echo "To upload manually:"
|
|
106
|
+
echo " ./scripts/upload.sh datasets/$DATASET_NAME"
|
|
107
|
+
exit 0
|
|
108
|
+
fi
|
|
109
|
+
|
|
110
|
+
echo ""
|
|
111
|
+
echo "========================================"
|
|
112
|
+
echo "Auto-starting upload..."
|
|
113
|
+
echo "========================================"
|
|
114
|
+
echo ""
|
|
115
|
+
|
|
116
|
+
exec ./scripts/upload.sh "datasets/$DATASET_NAME"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
3
|
+
# Derivative works may be released by researchers,
|
|
4
|
+
# but original files may not be redistributed or used beyond research purposes.
|
|
5
|
+
#
|
|
6
|
+
# DO NOT EDIT THIS FILE - Generated by cudag framework
|
|
7
|
+
|
|
8
|
+
set -euo pipefail
|
|
9
|
+
|
|
10
|
+
mode="${1:-staged}"
|
|
11
|
+
|
|
12
|
+
if [ "$mode" = "--help" ] || [ "$mode" = "-h" ]; then
|
|
13
|
+
echo "Usage: $0 [--all]" >&2
|
|
14
|
+
echo " --all Run checks against all tracked Python files instead of staged ones." >&2
|
|
15
|
+
exit 0
|
|
16
|
+
fi
|
|
17
|
+
|
|
18
|
+
repo_root="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
|
|
19
|
+
cd "$repo_root"
|
|
20
|
+
|
|
21
|
+
if [ "$mode" = "--all" ]; then
|
|
22
|
+
py_targets=$(git ls-files -- '*.py' 2>/dev/null || find . -name '*.py' -type f)
|
|
23
|
+
scope_label="all Python files"
|
|
24
|
+
else
|
|
25
|
+
py_targets=$(git diff --cached --name-only --diff-filter=ACM -- '*.py' 2>/dev/null || true)
|
|
26
|
+
scope_label="staged Python files"
|
|
27
|
+
fi
|
|
28
|
+
|
|
29
|
+
if [ -z "$py_targets" ]; then
|
|
30
|
+
echo "No ${scope_label}. Skipping lint/type checks."
|
|
31
|
+
exit 0
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
# Use uv run to execute in the project's venv with dev dependencies (Python 3.12)
|
|
35
|
+
UV_RUN="uv run --python 3.12 --extra dev"
|
|
36
|
+
|
|
37
|
+
echo "Running ruff (lexical checks) on ${scope_label}..."
|
|
38
|
+
printf '%s\n' "$py_targets" | xargs $UV_RUN ruff check
|
|
39
|
+
|
|
40
|
+
echo "Running mypy (syntax & types) on ${scope_label}..."
|
|
41
|
+
printf '%s\n' "$py_targets" | xargs $UV_RUN mypy
|
|
42
|
+
|
|
43
|
+
echo ""
|
|
44
|
+
echo "✓ All checks passed!"
|