cudag 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. cudag/__init__.py +334 -0
  2. cudag/annotation/__init__.py +77 -0
  3. cudag/annotation/codegen.py +648 -0
  4. cudag/annotation/config.py +545 -0
  5. cudag/annotation/loader.py +342 -0
  6. cudag/annotation/scaffold.py +121 -0
  7. cudag/annotation/transcription.py +296 -0
  8. cudag/cli/__init__.py +5 -0
  9. cudag/cli/main.py +315 -0
  10. cudag/cli/new.py +873 -0
  11. cudag/core/__init__.py +364 -0
  12. cudag/core/button.py +137 -0
  13. cudag/core/canvas.py +222 -0
  14. cudag/core/config.py +70 -0
  15. cudag/core/coords.py +233 -0
  16. cudag/core/data_grid.py +804 -0
  17. cudag/core/dataset.py +678 -0
  18. cudag/core/distribution.py +136 -0
  19. cudag/core/drawing.py +75 -0
  20. cudag/core/fonts.py +156 -0
  21. cudag/core/generator.py +163 -0
  22. cudag/core/grid.py +367 -0
  23. cudag/core/grounding_task.py +247 -0
  24. cudag/core/icon.py +207 -0
  25. cudag/core/iconlist_task.py +301 -0
  26. cudag/core/models.py +1251 -0
  27. cudag/core/random.py +130 -0
  28. cudag/core/renderer.py +190 -0
  29. cudag/core/screen.py +402 -0
  30. cudag/core/scroll_task.py +254 -0
  31. cudag/core/scrollable_grid.py +447 -0
  32. cudag/core/state.py +110 -0
  33. cudag/core/task.py +293 -0
  34. cudag/core/taskbar.py +350 -0
  35. cudag/core/text.py +212 -0
  36. cudag/core/utils.py +82 -0
  37. cudag/data/surnames.txt +5000 -0
  38. cudag/modal_apps/__init__.py +4 -0
  39. cudag/modal_apps/archive.py +103 -0
  40. cudag/modal_apps/extract.py +138 -0
  41. cudag/modal_apps/preprocess.py +529 -0
  42. cudag/modal_apps/upload.py +317 -0
  43. cudag/prompts/SYSTEM_PROMPT.txt +104 -0
  44. cudag/prompts/__init__.py +33 -0
  45. cudag/prompts/system.py +43 -0
  46. cudag/prompts/tools.py +382 -0
  47. cudag/py.typed +0 -0
  48. cudag/schemas/filesystem.json +90 -0
  49. cudag/schemas/test_record.schema.json +113 -0
  50. cudag/schemas/train_record.schema.json +90 -0
  51. cudag/server/__init__.py +21 -0
  52. cudag/server/app.py +232 -0
  53. cudag/server/services/__init__.py +9 -0
  54. cudag/server/services/generator.py +128 -0
  55. cudag/templates/scripts/archive.sh +35 -0
  56. cudag/templates/scripts/build.sh +13 -0
  57. cudag/templates/scripts/extract.sh +54 -0
  58. cudag/templates/scripts/generate.sh +116 -0
  59. cudag/templates/scripts/pre-commit.sh +44 -0
  60. cudag/templates/scripts/preprocess.sh +46 -0
  61. cudag/templates/scripts/upload.sh +63 -0
  62. cudag/templates/scripts/verify.py +428 -0
  63. cudag/validation/__init__.py +35 -0
  64. cudag/validation/validate.py +508 -0
  65. cudag-0.3.10.dist-info/METADATA +570 -0
  66. cudag-0.3.10.dist-info/RECORD +69 -0
  67. cudag-0.3.10.dist-info/WHEEL +4 -0
  68. cudag-0.3.10.dist-info/entry_points.txt +2 -0
  69. cudag-0.3.10.dist-info/licenses/LICENSE +66 -0
@@ -0,0 +1,4 @@
1
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
2
+ # CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
3
+ # is strictly prohibited. For licensing inquiries: hello@claimhawk.app
4
+
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
3
+ # CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
4
+ # is strictly prohibited. For licensing inquiries: hello@claimhawk.app
5
+
6
+ """Archive datasets from Modal volume to archive volume.
7
+
8
+ Compresses the raw dataset into a .tgz archive.
9
+
10
+ Volume structure:
11
+ claimhawk-archives/
12
+ datasets/[ds-name].tgz
13
+ loras/[ds-name]/[run-name].tgz
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import sys
18
+
19
+ import modal
20
+
21
+ # =============================================================================
22
+ # CENTRALIZED CONFIGURATION
23
+ # =============================================================================
24
+ # Volume names are loaded from config/adapters.yaml via the SDK.
25
+ # Users can customize these by editing the YAML file.
26
+
27
+ try:
28
+ from sdk.modal_compat import get_volume_name
29
+ TRAINING_VOLUME = get_volume_name("lora_training")
30
+ ARCHIVE_VOLUME = get_volume_name("archives")
31
+ except ImportError:
32
+ # Fallback when SDK not available
33
+ TRAINING_VOLUME = "claimhawk-lora-training"
34
+ ARCHIVE_VOLUME = "claimhawk-archives"
35
+
36
+
37
+ def _get_generator_name() -> str:
38
+ """Extract generator name from --ds-name arg for dynamic app naming."""
39
+ for i, arg in enumerate(sys.argv):
40
+ if arg == "--ds-name" and i + 1 < len(sys.argv):
41
+ ds_name = sys.argv[i + 1]
42
+ return ds_name.split("-")[0] if ds_name else "cudag"
43
+ return "cudag"
44
+
45
+
46
+ app = modal.App(f"{_get_generator_name()}-archive")
47
+ training_vol = modal.Volume.from_name(TRAINING_VOLUME, create_if_missing=True)
48
+ archive_vol = modal.Volume.from_name(ARCHIVE_VOLUME, create_if_missing=True)
49
+
50
+
51
+ @app.function(
52
+ volumes={
53
+ "/training": training_vol,
54
+ "/archive": archive_vol,
55
+ },
56
+ timeout=1800, # 30 min for large datasets
57
+ )
58
+ def archive_dataset(ds_name: str) -> str:
59
+ """Archive a dataset to the archive volume.
60
+
61
+ Reads from:
62
+ /training/datasets/[ds_name]/
63
+
64
+ Writes to:
65
+ /archive/datasets/[ds_name].tgz
66
+ """
67
+ import tarfile
68
+ from pathlib import Path
69
+
70
+ dataset_path = Path(f"/training/datasets/{ds_name}")
71
+ archive_dir = Path("/archive/datasets")
72
+ archive_path = archive_dir / f"{ds_name}.tgz"
73
+
74
+ # Verify source path exists
75
+ if not dataset_path.exists():
76
+ raise FileNotFoundError(f"Dataset not found: {dataset_path}")
77
+
78
+ # Create archive directory
79
+ archive_dir.mkdir(parents=True, exist_ok=True)
80
+
81
+ print(f"Creating archive: {ds_name}.tgz")
82
+ print(f" Dataset: {dataset_path}")
83
+
84
+ # Create tar.gz archive
85
+ with tarfile.open(archive_path, "w:gz") as tar:
86
+ tar.add(dataset_path, arcname=ds_name)
87
+
88
+ # Get archive size
89
+ size_mb = archive_path.stat().st_size / (1024 * 1024)
90
+ print(f"Archive size: {size_mb:.1f} MB")
91
+
92
+ # Commit to volume
93
+ archive_vol.commit()
94
+
95
+ print(f"Archived to: /archive/datasets/{ds_name}.tgz")
96
+ return str(archive_path)
97
+
98
+
99
+ @app.local_entrypoint()
100
+ def main(ds_name: str) -> None:
101
+ """Archive a dataset."""
102
+ result = archive_dataset.remote(ds_name)
103
+ print(f"Archive complete: {result}")
@@ -0,0 +1,138 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
3
+ # CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
4
+ # is strictly prohibited. For licensing inquiries: hello@claimhawk.app
5
+
6
+ """Modal function to extract uploaded dataset archives (single or chunked) on a volume.
7
+
8
+ Pipeline: upload_dataset -> modal_extract -> preprocess
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import sys
13
+
14
+ import modal
15
+
16
+ # =============================================================================
17
+ # CENTRALIZED CONFIGURATION
18
+ # =============================================================================
19
+ # Volume names are loaded from config/adapters.yaml via the SDK.
20
+ # Users can customize these by editing the YAML file.
21
+
22
+ try:
23
+ from sdk.modal_compat import get_volume_name
24
+ DEFAULT_VOLUME = get_volume_name("lora_training")
25
+ except ImportError:
26
+ # Fallback when SDK not available
27
+ DEFAULT_VOLUME = "claimhawk-lora-training"
28
+
29
+
30
+ def _get_generator_name() -> str:
31
+ """Extract generator name from --dataset-name arg for dynamic app naming."""
32
+ for i, arg in enumerate(sys.argv):
33
+ if arg == "--dataset-name" and i + 1 < len(sys.argv):
34
+ ds_name = sys.argv[i + 1]
35
+ return ds_name.split("-")[0] if ds_name else "cudag"
36
+ return "cudag"
37
+
38
+
39
+ app = modal.App(f"{_get_generator_name()}-extract")
40
+ VOLUME = modal.Volume.from_name(DEFAULT_VOLUME, create_if_missing=True)
41
+
42
+
43
+ @app.function(volumes={"/data": VOLUME}, timeout=600)
44
+ def extract(ds_name: str) -> str:
45
+ """Extract a tarball (single or chunked) on the Modal volume."""
46
+ import json
47
+ import shutil
48
+ import tarfile
49
+ from pathlib import Path
50
+
51
+ datasets_dir = Path("/data/datasets")
52
+ datasets_dir.mkdir(parents=True, exist_ok=True)
53
+
54
+ chunks_dir = datasets_dir / f"{ds_name}_chunks"
55
+ legacy_archive = datasets_dir / f"{ds_name}.tar.gz"
56
+ extract_dir = datasets_dir
57
+
58
+ # Check for chunked upload first
59
+ if chunks_dir.exists():
60
+ manifest_path = chunks_dir / f"{ds_name}.manifest.json"
61
+
62
+ if not manifest_path.exists():
63
+ raise FileNotFoundError(f"Manifest not found: {manifest_path}")
64
+
65
+ with open(manifest_path) as f:
66
+ manifest = json.load(f)
67
+
68
+ num_chunks = manifest["num_chunks"]
69
+ print(f"Reassembling {num_chunks} chunks...")
70
+
71
+ # Reassemble the archive from chunks
72
+ reassembled_path = datasets_dir / f"{ds_name}.tar.gz"
73
+
74
+ # Handle single-chunk case (archive wasn't split)
75
+ if num_chunks == 1:
76
+ # Find the single chunk (could be .tar.gz or other naming)
77
+ chunk_names = list(manifest["chunks"].keys())
78
+ if chunk_names:
79
+ chunk_path = chunks_dir / chunk_names[0]
80
+ if chunk_path.exists():
81
+ print(f" Moving single chunk {chunk_path.name}")
82
+ shutil.copy2(chunk_path, reassembled_path)
83
+ else:
84
+ raise FileNotFoundError(f"Chunk not found: {chunk_path}")
85
+ else:
86
+ raise FileNotFoundError("No chunks found in manifest")
87
+ else:
88
+ with open(reassembled_path, "wb") as outfile:
89
+ for i in range(num_chunks):
90
+ # Try to find the chunk with different naming patterns
91
+ chunk_path = None
92
+ for name in manifest["chunks"]:
93
+ if f"part{i:03d}" in name:
94
+ chunk_path = chunks_dir / name
95
+ break
96
+
97
+ if chunk_path is None or not chunk_path.exists():
98
+ raise FileNotFoundError(f"Chunk {i} not found")
99
+
100
+ print(f" Adding {chunk_path.name}")
101
+ with open(chunk_path, "rb") as chunk:
102
+ outfile.write(chunk.read())
103
+
104
+ # Extract the reassembled archive
105
+ print("Extracting archive...")
106
+ with tarfile.open(reassembled_path, "r:gz") as tar:
107
+ tar.extractall(path=extract_dir, filter="data")
108
+
109
+ # Cleanup: remove chunks directory and reassembled archive
110
+ shutil.rmtree(chunks_dir)
111
+ reassembled_path.unlink()
112
+ VOLUME.commit()
113
+ print(f"Extracted {ds_name} to /data/datasets/{ds_name}")
114
+
115
+ # Fall back to legacy single-file archive
116
+ elif legacy_archive.exists():
117
+ print("Extracting single archive...")
118
+ with tarfile.open(legacy_archive, "r:gz") as tar:
119
+ tar.extractall(path=extract_dir, filter="data")
120
+
121
+ legacy_archive.unlink()
122
+ VOLUME.commit()
123
+ print(f"Extracted {ds_name} to /data/datasets/{ds_name}")
124
+
125
+ else:
126
+ raise FileNotFoundError(
127
+ f"No archive found for {ds_name}. "
128
+ f"Checked: {chunks_dir} and {legacy_archive}"
129
+ )
130
+
131
+ return ds_name
132
+
133
+
134
+ @app.local_entrypoint()
135
+ def main(dataset_name: str) -> None:
136
+ """Entry point for modal run command."""
137
+ result = extract.remote(dataset_name)
138
+ print(f"Extraction complete: {result}")