datumhub-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datum/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
File without changes
@@ -0,0 +1,188 @@
1
+ """datum cache — manage the local dataset cache."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import shutil
6
+ from pathlib import Path
7
+ from typing import List, NamedTuple
8
+
9
+ import typer
10
+ from rich import box
11
+ from rich.table import Table
12
+
13
+ from datum.console import console, err_console
14
+ from datum.state import OutputFormat, state
15
+
16
+ cache_app = typer.Typer(help="Manage the local dataset cache.")
17
+
18
+
19
+ def get_cache_root() -> Path:
20
+ return Path("~/.datum/cache").expanduser()
21
+
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Helpers
25
+ # ---------------------------------------------------------------------------
26
+
27
+
28
+ class CacheEntry(NamedTuple):
29
+ dataset_id: str # publisher.namespace.dataset
30
+ version: str
31
+ files: List[Path]
32
+
33
+ @property
34
+ def size(self) -> int:
35
+ return sum(f.stat().st_size for f in self.files)
36
+
37
+
38
+ def _scan_cache(root: Path) -> List[CacheEntry]:
39
+ """Walk cache root and return one entry per dataset version."""
40
+ entries: List[CacheEntry] = []
41
+ if not root.exists():
42
+ return entries
43
+ for pub_dir in sorted(root.iterdir()):
44
+ if not pub_dir.is_dir():
45
+ continue
46
+ for ns_dir in sorted(pub_dir.iterdir()):
47
+ if not ns_dir.is_dir():
48
+ continue
49
+ for ds_dir in sorted(ns_dir.iterdir()):
50
+ if not ds_dir.is_dir():
51
+ continue
52
+ for ver_dir in sorted(ds_dir.iterdir()):
53
+ if not ver_dir.is_dir():
54
+ continue
55
+ files = [f for f in ver_dir.iterdir() if f.is_file()]
56
+ dataset_id = f"{pub_dir.name}.{ns_dir.name}.{ds_dir.name}"
57
+ entries.append(CacheEntry(dataset_id, ver_dir.name, files))
58
+ return entries
59
+
60
+
61
+ def _fmt_size(n: int) -> str:
62
+ for unit in ("B", "KB", "MB", "GB"):
63
+ if n < 1024:
64
+ return f"{n} {unit}" if unit == "B" else f"{n:.1f} {unit}"
65
+ n /= 1024
66
+ return f"{n:.1f} TB"
67
+
68
+
69
+ # ---------------------------------------------------------------------------
70
+ # Subcommands
71
+ # ---------------------------------------------------------------------------
72
+
73
+
74
+ @cache_app.command("list")
75
+ def cache_list() -> None:
76
+ """List all cached datasets."""
77
+ output_fmt = state.output
78
+ quiet = state.quiet
79
+
80
+ entries = _scan_cache(get_cache_root())
81
+
82
+ if output_fmt == OutputFormat.json:
83
+ import json
84
+ payload = [
85
+ {
86
+ "id": e.dataset_id,
87
+ "version": e.version,
88
+ "files": [str(f) for f in e.files],
89
+ "size": e.size,
90
+ }
91
+ for e in entries
92
+ ]
93
+ print(json.dumps(payload, indent=2))
94
+ return
95
+
96
+ if quiet:
97
+ return
98
+
99
+ if not entries:
100
+ console.print()
101
+ console.print(" [muted]Cache is empty.[/muted]")
102
+ console.print(f" [muted]{get_cache_root()}[/muted]")
103
+ console.print()
104
+ return
105
+
106
+ total_size = sum(e.size for e in entries)
107
+ total_files = sum(len(e.files) for e in entries)
108
+
109
+ console.print()
110
+ console.print(
111
+ f" [bold]{len(entries)}[/bold] cached version(s) "
112
+ f"[muted]·[/muted] {_fmt_size(total_size)} "
113
+ f"[muted]·[/muted] {get_cache_root()}\n"
114
+ )
115
+
116
+ table = Table(box=box.SIMPLE, show_header=True, header_style="bold white")
117
+ table.add_column("Dataset", style="identifier", min_width=30)
118
+ table.add_column("Version", min_width=9)
119
+ table.add_column("Files", justify="right", min_width=5)
120
+ table.add_column("Size", justify="right", min_width=8)
121
+
122
+ for entry in entries:
123
+ table.add_row(
124
+ entry.dataset_id,
125
+ entry.version,
126
+ str(len(entry.files)),
127
+ _fmt_size(entry.size),
128
+ )
129
+
130
+ console.print(table)
131
+ console.print()
132
+
133
+
134
+ @cache_app.command("size")
135
+ def cache_size() -> None:
136
+ """Show total disk usage of the local cache."""
137
+ output_fmt = state.output
138
+ quiet = state.quiet
139
+
140
+ entries = _scan_cache(get_cache_root())
141
+ total_size = sum(e.size for e in entries)
142
+ total_files = sum(len(e.files) for e in entries)
143
+
144
+ if output_fmt == OutputFormat.json:
145
+ import json
146
+ print(json.dumps({"size_bytes": total_size, "files": total_files}, indent=2))
147
+ return
148
+
149
+ if quiet:
150
+ return
151
+
152
+ console.print()
153
+ console.print(f" [bold]Cache:[/bold] {get_cache_root()}")
154
+ console.print(f" [bold]Total:[/bold] {_fmt_size(total_size)} [muted]({total_files} file(s))[/muted]")
155
+ console.print()
156
+
157
+
158
+ @cache_app.command("clear")
159
+ def cache_clear(
160
+ yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"),
161
+ ) -> None:
162
+ """Remove all cached datasets."""
163
+ root = get_cache_root()
164
+ entries = _scan_cache(root)
165
+
166
+ if not entries:
167
+ console.print()
168
+ console.print(" [muted]Cache is already empty.[/muted]")
169
+ console.print()
170
+ return
171
+
172
+ total_size = sum(e.size for e in entries)
173
+ total_files = sum(len(e.files) for e in entries)
174
+
175
+ if not yes:
176
+ confirmed = typer.confirm(
177
+ f" Clear {_fmt_size(total_size)} ({total_files} file(s)) from cache?",
178
+ default=False,
179
+ )
180
+ if not confirmed:
181
+ console.print(" [muted]Aborted.[/muted]")
182
+ return
183
+
184
+ shutil.rmtree(root, ignore_errors=True)
185
+
186
+ console.print()
187
+ console.print(f" [success]✓[/success] Cache cleared [muted]({_fmt_size(total_size)} freed)[/muted]")
188
+ console.print()
@@ -0,0 +1,233 @@
1
+ """datum check — validate a datapackage.json file."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import sys
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import typer
11
+ from pydantic import ValidationError
12
+ from rich.panel import Panel
13
+ from rich.table import Table
14
+
15
+ from datum.console import console, err_console
16
+ from datum.models import DataPackage
17
+ from datum.state import OutputFormat, state
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Helpers
22
+ # ---------------------------------------------------------------------------
23
+
24
+
25
+ def _pydantic_errors(exc: ValidationError) -> list[dict[str, str]]:
26
+ """Flatten Pydantic v2 errors into simple field/message dicts."""
27
+ out: list[dict[str, str]] = []
28
+ for err in exc.errors():
29
+ loc_parts = []
30
+ for part in err["loc"]:
31
+ if isinstance(part, int):
32
+ loc_parts.append(f"[{part}]")
33
+ else:
34
+ loc_parts.append(str(part))
35
+ field = ".".join(loc_parts).replace(".[", "[")
36
+ out.append({"field": field, "message": err["msg"]})
37
+ return out
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Command
42
+ # ---------------------------------------------------------------------------
43
+
44
+
45
+ def cmd_check(
46
+ file: Path = typer.Argument(
47
+ Path("datapackage.json"),
48
+ help="Path to the datapackage.json to validate",
49
+ show_default=True,
50
+ ),
51
+ ) -> None:
52
+ """
53
+ Validate a datapackage.json file.
54
+
55
+ Checks that the file exists, is valid JSON, and conforms to the
56
+ Datum datapackage schema. Exits with code 0 on success, 1 on
57
+ validation errors, and 2 on file or parse errors.
58
+
59
+ Use [bold]--output json[/bold] for machine-readable output:
60
+
61
+ datum check --output json | jq .
62
+ """
63
+ output_fmt = state.output
64
+ quiet = state.quiet
65
+
66
+ # -----------------------------------------------------------------------
67
+ # 1. File existence
68
+ # -----------------------------------------------------------------------
69
+ if not file.exists():
70
+ if output_fmt == OutputFormat.json:
71
+ _emit_json(
72
+ valid=False,
73
+ file=str(file),
74
+ errors=[{"field": "file", "message": f"File not found: {file}"}],
75
+ )
76
+ else:
77
+ err_console.print(
78
+ f"\n[error]✗[/error] File not found: [bold]{file}[/bold]\n\n"
79
+ "Run [bold]datum init[/bold] to create a datapackage.json, "
80
+ "or pass a path: [bold]datum check path/to/datapackage.json[/bold]"
81
+ )
82
+ raise typer.Exit(code=2)
83
+
84
+ # -----------------------------------------------------------------------
85
+ # 2. JSON parse
86
+ # -----------------------------------------------------------------------
87
+ try:
88
+ raw: dict[str, Any] = json.loads(file.read_text(encoding="utf-8"))
89
+ except json.JSONDecodeError as exc:
90
+ if output_fmt == OutputFormat.json:
91
+ _emit_json(
92
+ valid=False,
93
+ file=str(file),
94
+ errors=[{"field": "json", "message": f"Invalid JSON: {exc}"}],
95
+ )
96
+ else:
97
+ err_console.print(
98
+ f"\n[error]✗[/error] [bold]{file}[/bold] is not valid JSON.\n\n"
99
+ f" {exc}\n"
100
+ )
101
+ raise typer.Exit(code=2)
102
+
103
+ # -----------------------------------------------------------------------
104
+ # 3. Schema validation
105
+ # -----------------------------------------------------------------------
106
+ try:
107
+ pkg = DataPackage.model_validate(raw)
108
+ except ValidationError as exc:
109
+ errors = _pydantic_errors(exc)
110
+
111
+ if output_fmt == OutputFormat.json:
112
+ _emit_json(valid=False, file=str(file), errors=errors)
113
+ else:
114
+ _print_failure(file=file, errors=errors, quiet=quiet)
115
+
116
+ raise typer.Exit(code=1)
117
+
118
+ # -----------------------------------------------------------------------
119
+ # 4. Success
120
+ # -----------------------------------------------------------------------
121
+ if output_fmt == OutputFormat.json:
122
+ _emit_json(valid=True, file=str(file), errors=[], package=pkg.to_dict())
123
+ else:
124
+ _print_success(file=file, pkg=pkg, quiet=quiet)
125
+
126
+
127
+ # ---------------------------------------------------------------------------
128
+ # Output renderers
129
+ # ---------------------------------------------------------------------------
130
+
131
+
132
+ def _emit_json(
133
+ *,
134
+ valid: bool,
135
+ file: str,
136
+ errors: list[dict[str, str]],
137
+ package: dict | None = None,
138
+ ) -> None:
139
+ payload: dict[str, Any] = {
140
+ "valid": valid,
141
+ "file": file,
142
+ "errors": errors,
143
+ }
144
+ if package is not None:
145
+ payload["package"] = package
146
+ print(json.dumps(payload, indent=2, ensure_ascii=False))
147
+
148
+
149
+ def _print_success(file: Path, pkg: DataPackage, quiet: bool) -> None:
150
+ if quiet:
151
+ return
152
+
153
+ console.print()
154
+ console.print(
155
+ Panel(
156
+ f"[success]✓ Valid[/success] [muted]·[/muted] [bold]{file}[/bold]",
157
+ border_style="green",
158
+ padding=(0, 2),
159
+ )
160
+ )
161
+
162
+ # Summary table
163
+ table = Table(show_header=False, box=None, padding=(0, 2, 0, 0))
164
+ table.add_column("key", style="key", min_width=14)
165
+ table.add_column("value")
166
+
167
+ table.add_row("Identifier", f"[identifier]{pkg.id}[/identifier]")
168
+ table.add_row("Version", pkg.version)
169
+ table.add_row("Title", pkg.title)
170
+ table.add_row("Publisher", pkg.publisher.name)
171
+ table.add_row(
172
+ "Sources",
173
+ f"{len(pkg.sources)} file(s) "
174
+ + " ".join(
175
+ f"[muted]{s.format.upper()}[/muted]" for s in pkg.sources
176
+ ),
177
+ )
178
+ if pkg.license:
179
+ table.add_row("License", pkg.license)
180
+ if pkg.tags:
181
+ table.add_row("Tags", ", ".join(pkg.tags))
182
+
183
+ # Checksum coverage
184
+ sources_with_checksum = sum(1 for s in pkg.sources if s.checksum)
185
+ if sources_with_checksum == 0:
186
+ table.add_row(
187
+ "Checksums",
188
+ "[warning]None — add sha256 checksums to your sources for integrity verification[/warning]",
189
+ )
190
+ elif sources_with_checksum < len(pkg.sources):
191
+ missing = len(pkg.sources) - sources_with_checksum
192
+ table.add_row(
193
+ "Checksums",
194
+ f"[warning]{missing} source(s) missing a checksum[/warning]",
195
+ )
196
+ else:
197
+ table.add_row("Checksums", "[success]✓ All sources have checksums[/success]")
198
+
199
+ console.print(table)
200
+ console.print()
201
+
202
+
203
+ def _print_failure(file: Path, errors: list[dict[str, str]], quiet: bool) -> None:
204
+ n = len(errors)
205
+
206
+ if not quiet:
207
+ console.print()
208
+ console.print(
209
+ Panel(
210
+ f"[error]✗ Invalid[/error] [muted]·[/muted] [bold]{file}[/bold] "
211
+ f"[muted]·[/muted] [error]{n} error{'s' if n != 1 else ''}[/error]",
212
+ border_style="red",
213
+ padding=(0, 2),
214
+ )
215
+ )
216
+
217
+ table = Table(show_header=False, box=None, padding=(0, 2, 0, 0))
218
+ table.add_column("field", style="key", min_width=20)
219
+ table.add_column("message")
220
+
221
+ for err in errors:
222
+ table.add_row(
223
+ f"[error]✗[/error] {err['field']}",
224
+ err["message"],
225
+ )
226
+
227
+ if not quiet:
228
+ console.print(table)
229
+ console.print()
230
+ console.print(
231
+ " Fix the errors above and run [bold]datum check[/bold] again."
232
+ )
233
+ console.print()
@@ -0,0 +1,171 @@
1
+ """datum config — manage local Datum configuration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any, Optional
8
+
9
+ import typer
10
+ from rich import box
11
+ from rich.table import Table
12
+
13
+ from datum.console import console, err_console
14
+ from datum.state import OutputFormat, state
15
+
16
+ config_app = typer.Typer(help="Manage local Datum configuration.")
17
+
18
+ # Keys with descriptions shown in `datum config list`
19
+ KNOWN_KEYS = {
20
+ "registry": "Default registry URL or local path",
21
+ "output": "Default output format (table | json | plain)",
22
+ }
23
+
24
+
25
+ def get_config_path() -> Path:
26
+ return Path("~/.datum/config.json").expanduser()
27
+
28
+
29
+ def load_config() -> dict:
30
+ p = get_config_path()
31
+ if not p.exists():
32
+ return {}
33
+ try:
34
+ return json.loads(p.read_text(encoding="utf-8"))
35
+ except Exception:
36
+ return {}
37
+
38
+
39
+ def save_config(cfg: dict) -> None:
40
+ p = get_config_path()
41
+ p.parent.mkdir(parents=True, exist_ok=True)
42
+ p.write_text(json.dumps(cfg, indent=2, ensure_ascii=False), encoding="utf-8")
43
+
44
+
45
+ # Keep private aliases for backwards compat within this module
46
+ _load = load_config
47
+ _save = save_config
48
+
49
+
50
+ # ---------------------------------------------------------------------------
51
+ # Subcommands
52
+ # ---------------------------------------------------------------------------
53
+
54
+
55
+ @config_app.command("get")
56
+ def config_get(
57
+ key: str = typer.Argument(..., help="Configuration key"),
58
+ ) -> None:
59
+ """Print the value of a configuration key."""
60
+ _show_one(key)
61
+
62
+
63
+ @config_app.command("set")
64
+ def config_set(
65
+ key: str = typer.Argument(..., help="Configuration key"),
66
+ value: str = typer.Argument(..., help="Configuration value"),
67
+ ) -> None:
68
+ """Set a configuration value."""
69
+ output_fmt = state.output
70
+ quiet = state.quiet
71
+
72
+ cfg = _load()
73
+ cfg[key] = value
74
+ _save(cfg)
75
+
76
+ if output_fmt == OutputFormat.json:
77
+ print(json.dumps({"key": key, "value": value}, indent=2))
78
+ elif not quiet:
79
+ console.print(f" [success]✓[/success] [bold]{key}[/bold] = {value}")
80
+
81
+
82
+ @config_app.command("show")
83
+ def config_show(
84
+ key: Optional[str] = typer.Argument(None, help="Key to show (omit to show all)"),
85
+ ) -> None:
86
+ """Show one key or all configuration values."""
87
+ if key is not None:
88
+ return _show_one(key)
89
+ _show_all()
90
+
91
+
92
+ @config_app.command("list")
93
+ def config_list() -> None:
94
+ """List all configuration values."""
95
+ _show_all()
96
+
97
+
98
+ def _show_one(key: str) -> None:
99
+ output_fmt = state.output
100
+ cfg = _load()
101
+
102
+ if key not in cfg:
103
+ if output_fmt == OutputFormat.json:
104
+ print(json.dumps({"key": key, "value": None}, indent=2))
105
+ else:
106
+ err_console.print(f"\n[error]✗[/error] Key [bold]{key}[/bold] is not set.\n")
107
+ raise typer.Exit(code=1)
108
+
109
+ value = cfg[key]
110
+ if output_fmt == OutputFormat.json:
111
+ print(json.dumps({"key": key, "value": value}, indent=2))
112
+ else:
113
+ console.print(value)
114
+
115
+
116
+ def _show_all() -> None:
117
+ output_fmt = state.output
118
+ quiet = state.quiet
119
+ cfg = _load()
120
+
121
+ if output_fmt == OutputFormat.json:
122
+ print(json.dumps(cfg, indent=2, ensure_ascii=False))
123
+ return
124
+
125
+ if quiet:
126
+ return
127
+
128
+ console.print()
129
+
130
+ if not cfg:
131
+ console.print(" [muted]No configuration set.[/muted]")
132
+ console.print(f" [muted]{get_config_path()}[/muted]")
133
+ console.print()
134
+ return
135
+
136
+ table = Table(box=box.SIMPLE, show_header=True, header_style="bold white")
137
+ table.add_column("Key", style="key", min_width=16)
138
+ table.add_column("Value", min_width=20)
139
+ table.add_column("Description", style="muted")
140
+
141
+ for k, v in sorted(cfg.items()):
142
+ desc = KNOWN_KEYS.get(k, "")
143
+ table.add_row(k, str(v), desc)
144
+
145
+ console.print(table)
146
+ console.print()
147
+
148
+
149
+ @config_app.command("unset")
150
+ def config_unset(
151
+ key: str = typer.Argument(..., help="Configuration key to remove"),
152
+ ) -> None:
153
+ """Remove a configuration key."""
154
+ output_fmt = state.output
155
+ quiet = state.quiet
156
+ cfg = _load()
157
+
158
+ if key not in cfg:
159
+ if output_fmt == OutputFormat.json:
160
+ print(json.dumps({"key": key, "removed": False}, indent=2))
161
+ else:
162
+ err_console.print(f"\n[error]✗[/error] Key [bold]{key}[/bold] is not set.\n")
163
+ raise typer.Exit(code=1)
164
+
165
+ del cfg[key]
166
+ _save(cfg)
167
+
168
+ if output_fmt == OutputFormat.json:
169
+ print(json.dumps({"key": key, "removed": True}, indent=2))
170
+ elif not quiet:
171
+ console.print(f" [success]✓[/success] [bold]{key}[/bold] removed")