atdata 0.2.2b1__py3-none-any.whl → 0.3.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. atdata/.gitignore +1 -0
  2. atdata/__init__.py +31 -1
  3. atdata/_cid.py +29 -35
  4. atdata/_exceptions.py +168 -0
  5. atdata/_helpers.py +33 -17
  6. atdata/_hf_api.py +109 -59
  7. atdata/_logging.py +70 -0
  8. atdata/_protocols.py +74 -132
  9. atdata/_schema_codec.py +38 -41
  10. atdata/_sources.py +57 -64
  11. atdata/_stub_manager.py +31 -26
  12. atdata/_type_utils.py +47 -7
  13. atdata/atmosphere/__init__.py +31 -24
  14. atdata/atmosphere/_types.py +11 -11
  15. atdata/atmosphere/client.py +11 -8
  16. atdata/atmosphere/lens.py +27 -30
  17. atdata/atmosphere/records.py +34 -39
  18. atdata/atmosphere/schema.py +35 -31
  19. atdata/atmosphere/store.py +16 -20
  20. atdata/cli/__init__.py +163 -168
  21. atdata/cli/diagnose.py +12 -8
  22. atdata/cli/inspect.py +69 -0
  23. atdata/cli/local.py +5 -2
  24. atdata/cli/preview.py +63 -0
  25. atdata/cli/schema.py +109 -0
  26. atdata/dataset.py +678 -533
  27. atdata/lens.py +85 -83
  28. atdata/local/__init__.py +71 -0
  29. atdata/local/_entry.py +157 -0
  30. atdata/local/_index.py +940 -0
  31. atdata/local/_repo_legacy.py +218 -0
  32. atdata/local/_s3.py +349 -0
  33. atdata/local/_schema.py +380 -0
  34. atdata/manifest/__init__.py +28 -0
  35. atdata/manifest/_aggregates.py +156 -0
  36. atdata/manifest/_builder.py +163 -0
  37. atdata/manifest/_fields.py +154 -0
  38. atdata/manifest/_manifest.py +146 -0
  39. atdata/manifest/_query.py +150 -0
  40. atdata/manifest/_writer.py +74 -0
  41. atdata/promote.py +20 -24
  42. atdata/providers/__init__.py +25 -0
  43. atdata/providers/_base.py +140 -0
  44. atdata/providers/_factory.py +69 -0
  45. atdata/providers/_postgres.py +214 -0
  46. atdata/providers/_redis.py +171 -0
  47. atdata/providers/_sqlite.py +191 -0
  48. atdata/repository.py +323 -0
  49. atdata/testing.py +337 -0
  50. {atdata-0.2.2b1.dist-info → atdata-0.3.0b1.dist-info}/METADATA +5 -1
  51. atdata-0.3.0b1.dist-info/RECORD +54 -0
  52. atdata/local.py +0 -1707
  53. atdata-0.2.2b1.dist-info/RECORD +0 -28
  54. {atdata-0.2.2b1.dist-info → atdata-0.3.0b1.dist-info}/WHEEL +0 -0
  55. {atdata-0.2.2b1.dist-info → atdata-0.3.0b1.dist-info}/entry_points.txt +0 -0
  56. {atdata-0.2.2b1.dist-info → atdata-0.3.0b1.dist-info}/licenses/LICENSE +0 -0
@@ -19,10 +19,12 @@ from ._types import (
19
19
 
20
20
  # Import for type checking only to avoid circular imports
21
21
  from typing import TYPE_CHECKING
22
+
22
23
  if TYPE_CHECKING:
23
- from ..dataset import PackableSample, Dataset
24
+ from ..dataset import Dataset
25
+ from .._protocols import Packable
24
26
 
25
- ST = TypeVar("ST", bound="PackableSample")
27
+ ST = TypeVar("ST", bound="Packable")
26
28
 
27
29
 
28
30
  class DatasetPublisher:
@@ -31,21 +33,19 @@ class DatasetPublisher:
31
33
  This class creates dataset records that reference a schema and point to
32
34
  external storage (WebDataset URLs) or ATProto blobs.
33
35
 
34
- Example:
35
- ::
36
-
37
- >>> dataset = atdata.Dataset[MySample]("s3://bucket/data-{000000..000009}.tar")
38
- >>>
39
- >>> client = AtmosphereClient()
40
- >>> client.login("handle", "password")
41
- >>>
42
- >>> publisher = DatasetPublisher(client)
43
- >>> uri = publisher.publish(
44
- ... dataset,
45
- ... name="My Training Data",
46
- ... description="Training data for my model",
47
- ... tags=["computer-vision", "training"],
48
- ... )
36
+ Examples:
37
+ >>> dataset = atdata.Dataset[MySample]("s3://bucket/data-{000000..000009}.tar")
38
+ >>>
39
+ >>> client = AtmosphereClient()
40
+ >>> client.login("handle", "password")
41
+ >>>
42
+ >>> publisher = DatasetPublisher(client)
43
+ >>> uri = publisher.publish(
44
+ ... dataset,
45
+ ... name="My Training Data",
46
+ ... description="Training data for my model",
47
+ ... tags=["computer-vision", "training"],
48
+ ... )
49
49
  """
50
50
 
51
51
  def __init__(self, client: AtmosphereClient):
@@ -267,19 +267,17 @@ class DatasetLoader:
267
267
  from them. Note that loading a dataset requires having the corresponding
268
268
  Python class for the sample type.
269
269
 
270
- Example:
271
- ::
272
-
273
- >>> client = AtmosphereClient()
274
- >>> loader = DatasetLoader(client)
275
- >>>
276
- >>> # List available datasets
277
- >>> datasets = loader.list()
278
- >>> for ds in datasets:
279
- ... print(ds["name"], ds["schemaRef"])
280
- >>>
281
- >>> # Get a specific dataset record
282
- >>> record = loader.get("at://did:plc:abc/ac.foundation.dataset.record/xyz")
270
+ Examples:
271
+ >>> client = AtmosphereClient()
272
+ >>> loader = DatasetLoader(client)
273
+ >>>
274
+ >>> # List available datasets
275
+ >>> datasets = loader.list()
276
+ >>> for ds in datasets:
277
+ ... print(ds["name"], ds["schemaRef"])
278
+ >>>
279
+ >>> # Get a specific dataset record
280
+ >>> record = loader.get("at://did:plc:abc/ac.foundation.dataset.record/xyz")
283
281
  """
284
282
 
285
283
  def __init__(self, client: AtmosphereClient):
@@ -398,8 +396,7 @@ class DatasetLoader:
398
396
  return storage.get("blobs", [])
399
397
  elif "storageExternal" in storage_type:
400
398
  raise ValueError(
401
- "Dataset uses external URL storage, not blobs. "
402
- "Use get_urls() instead."
399
+ "Dataset uses external URL storage, not blobs. Use get_urls() instead."
403
400
  )
404
401
  else:
405
402
  raise ValueError(f"Unknown storage type: {storage_type}")
@@ -478,13 +475,11 @@ class DatasetLoader:
478
475
  Raises:
479
476
  ValueError: If no storage URLs can be resolved.
480
477
 
481
- Example:
482
- ::
483
-
484
- >>> loader = DatasetLoader(client)
485
- >>> dataset = loader.to_dataset(uri, MySampleType)
486
- >>> for batch in dataset.shuffled(batch_size=32):
487
- ... process(batch)
478
+ Examples:
479
+ >>> loader = DatasetLoader(client)
480
+ >>> dataset = loader.to_dataset(uri, MySampleType)
481
+ >>> for batch in dataset.shuffled(batch_size=32):
482
+ ... process(batch)
488
483
  """
489
484
  # Import here to avoid circular import
490
485
  from ..dataset import Dataset
@@ -17,7 +17,6 @@ from ._types import (
17
17
  LEXICON_NAMESPACE,
18
18
  )
19
19
  from .._type_utils import (
20
- numpy_dtype_to_string,
21
20
  unwrap_optional,
22
21
  is_ndarray_type,
23
22
  extract_ndarray_dtype,
@@ -25,10 +24,11 @@ from .._type_utils import (
25
24
 
26
25
  # Import for type checking only to avoid circular imports
27
26
  from typing import TYPE_CHECKING
27
+
28
28
  if TYPE_CHECKING:
29
- from ..dataset import PackableSample
29
+ from .._protocols import Packable
30
30
 
31
- ST = TypeVar("ST", bound="PackableSample")
31
+ ST = TypeVar("ST", bound="Packable")
32
32
 
33
33
 
34
34
  class SchemaPublisher:
@@ -37,21 +37,19 @@ class SchemaPublisher:
37
37
  This class introspects a PackableSample class to extract its field
38
38
  definitions and publishes them as an ATProto schema record.
39
39
 
40
- Example:
41
- ::
42
-
43
- >>> @atdata.packable
44
- ... class MySample:
45
- ... image: NDArray
46
- ... label: str
47
- ...
48
- >>> client = AtmosphereClient()
49
- >>> client.login("handle", "password")
50
- >>>
51
- >>> publisher = SchemaPublisher(client)
52
- >>> uri = publisher.publish(MySample, version="1.0.0")
53
- >>> print(uri)
54
- at://did:plc:.../ac.foundation.dataset.sampleSchema/...
40
+ Examples:
41
+ >>> @atdata.packable
42
+ ... class MySample:
43
+ ... image: NDArray
44
+ ... label: str
45
+ ...
46
+ >>> client = AtmosphereClient()
47
+ >>> client.login("handle", "password")
48
+ >>>
49
+ >>> publisher = SchemaPublisher(client)
50
+ >>> uri = publisher.publish(MySample, version="1.0.0")
51
+ >>> print(uri)
52
+ at://did:plc:.../ac.foundation.dataset.sampleSchema/...
55
53
  """
56
54
 
57
55
  def __init__(self, client: AtmosphereClient):
@@ -90,7 +88,9 @@ class SchemaPublisher:
90
88
  TypeError: If a field type is not supported.
91
89
  """
92
90
  if not is_dataclass(sample_type):
93
- raise ValueError(f"{sample_type.__name__} must be a dataclass (use @packable)")
91
+ raise ValueError(
92
+ f"{sample_type.__name__} must be a dataclass (use @packable)"
93
+ )
94
94
 
95
95
  # Build the schema record
96
96
  schema_record = self._build_schema_record(
@@ -155,12 +155,18 @@ class SchemaPublisher:
155
155
  return FieldType(kind="primitive", primitive="bytes")
156
156
 
157
157
  if is_ndarray_type(python_type):
158
- return FieldType(kind="ndarray", dtype=extract_ndarray_dtype(python_type), shape=None)
158
+ return FieldType(
159
+ kind="ndarray", dtype=extract_ndarray_dtype(python_type), shape=None
160
+ )
159
161
 
160
162
  origin = get_origin(python_type)
161
163
  if origin is list:
162
164
  args = get_args(python_type)
163
- items = self._python_type_to_field_type(args[0]) if args else FieldType(kind="primitive", primitive="str")
165
+ items = (
166
+ self._python_type_to_field_type(args[0])
167
+ if args
168
+ else FieldType(kind="primitive", primitive="str")
169
+ )
164
170
  return FieldType(kind="array", items=items)
165
171
 
166
172
  if is_dataclass(python_type):
@@ -178,16 +184,14 @@ class SchemaLoader:
178
184
  This class fetches schema records from ATProto and can list available
179
185
  schemas from a repository.
180
186
 
181
- Example:
182
- ::
183
-
184
- >>> client = AtmosphereClient()
185
- >>> client.login("handle", "password")
186
- >>>
187
- >>> loader = SchemaLoader(client)
188
- >>> schema = loader.get("at://did:plc:.../ac.foundation.dataset.sampleSchema/...")
189
- >>> print(schema["name"])
190
- 'MySample'
187
+ Examples:
188
+ >>> client = AtmosphereClient()
189
+ >>> client.login("handle", "password")
190
+ >>>
191
+ >>> loader = SchemaLoader(client)
192
+ >>> schema = loader.get("at://did:plc:.../ac.foundation.dataset.sampleSchema/...")
193
+ >>> print(schema["name"])
194
+ 'MySample'
191
195
  """
192
196
 
193
197
  def __init__(self, client: AtmosphereClient):
@@ -6,23 +6,20 @@ protocol that stores dataset shards as ATProto blobs in a Personal Data Server.
6
6
  This enables fully decentralized dataset storage where both metadata (records)
7
7
  and data (blobs) live on the AT Protocol network.
8
8
 
9
- Example:
10
- ::
11
-
12
- >>> from atdata.atmosphere import AtmosphereClient, PDSBlobStore
13
- >>>
14
- >>> client = AtmosphereClient()
15
- >>> client.login("handle.bsky.social", "app-password")
16
- >>>
17
- >>> store = PDSBlobStore(client)
18
- >>> urls = store.write_shards(dataset, prefix="mnist/v1")
19
- >>> print(urls)
20
- ['at://did:plc:.../blob/bafyrei...', ...]
9
+ Examples:
10
+ >>> from atdata.atmosphere import AtmosphereClient, PDSBlobStore
11
+ >>>
12
+ >>> client = AtmosphereClient()
13
+ >>> client.login("handle.bsky.social", "app-password")
14
+ >>>
15
+ >>> store = PDSBlobStore(client)
16
+ >>> urls = store.write_shards(dataset, prefix="mnist/v1")
17
+ >>> print(urls)
18
+ ['at://did:plc:.../blob/bafyrei...', ...]
21
19
  """
22
20
 
23
21
  from __future__ import annotations
24
22
 
25
- import io
26
23
  import tempfile
27
24
  from dataclasses import dataclass
28
25
  from typing import TYPE_CHECKING, Any
@@ -31,6 +28,7 @@ import webdataset as wds
31
28
 
32
29
  if TYPE_CHECKING:
33
30
  from ..dataset import Dataset
31
+ from .._sources import BlobSource
34
32
  from .client import AtmosphereClient
35
33
 
36
34
 
@@ -48,13 +46,11 @@ class PDSBlobStore:
48
46
  Attributes:
49
47
  client: Authenticated AtmosphereClient instance.
50
48
 
51
- Example:
52
- ::
53
-
54
- >>> store = PDSBlobStore(client)
55
- >>> urls = store.write_shards(dataset, prefix="training/v1")
56
- >>> # Returns AT URIs like:
57
- >>> # ['at://did:plc:abc/blob/bafyrei...', ...]
49
+ Examples:
50
+ >>> store = PDSBlobStore(client)
51
+ >>> urls = store.write_shards(dataset, prefix="training/v1")
52
+ >>> # Returns AT URIs like:
53
+ >>> # ['at://did:plc:abc/blob/bafyrei...', ...]
58
54
  """
59
55
 
60
56
  client: "AtmosphereClient"
atdata/cli/__init__.py CHANGED
@@ -1,212 +1,207 @@
1
1
  """Command-line interface for atdata.
2
2
 
3
- This module provides CLI commands for managing local development infrastructure
4
- and diagnosing configuration issues.
3
+ This module provides CLI commands for managing local development infrastructure,
4
+ inspecting datasets, and diagnosing configuration issues.
5
5
 
6
6
  Commands:
7
- atdata local up Start Redis and MinIO containers for local development
8
- atdata local down Stop local development containers
9
- atdata diagnose Check Redis configuration and connectivity
10
- atdata version Show version information
11
-
12
- Example:
13
- $ atdata local up
14
- Starting Redis on port 6379...
15
- Starting MinIO on port 9000...
16
- Local infrastructure ready.
17
-
18
- $ atdata diagnose
19
- Checking Redis configuration...
20
- ✓ Redis connected
21
- ✓ Persistence enabled (AOF)
22
- ✓ Memory policy: noeviction
7
+ atdata local up Start Redis and MinIO containers for local development
8
+ atdata local down Stop local development containers
9
+ atdata local status Show status of local infrastructure
10
+ atdata diagnose Check Redis configuration and connectivity
11
+ atdata inspect Show dataset summary information
12
+ atdata schema show Display dataset schema
13
+ atdata schema diff Compare two dataset schemas
14
+ atdata preview Preview first N samples of a dataset
15
+ atdata version Show version information
23
16
  """
24
17
 
25
- import argparse
26
18
  import sys
27
- from typing import Sequence
28
19
 
20
+ import typer
29
21
 
30
- def main(argv: Sequence[str] | None = None) -> int:
31
- """Main entry point for the atdata CLI.
22
+ # ---------------------------------------------------------------------------
23
+ # App hierarchy
24
+ # ---------------------------------------------------------------------------
32
25
 
33
- Args:
34
- argv: Command-line arguments. If None, uses sys.argv[1:].
26
+ app = typer.Typer(
27
+ name="atdata",
28
+ help="A loose federation of distributed, typed datasets.",
29
+ add_completion=False,
30
+ no_args_is_help=True,
31
+ )
35
32
 
36
- Returns:
37
- Exit code (0 for success, non-zero for errors).
38
- """
39
- parser = argparse.ArgumentParser(
40
- prog="atdata",
41
- description="A loose federation of distributed, typed datasets",
42
- formatter_class=argparse.RawDescriptionHelpFormatter,
43
- )
44
- parser.add_argument(
45
- "--version", "-v",
46
- action="store_true",
47
- help="Show version information",
48
- )
33
+ local_app = typer.Typer(
34
+ name="local",
35
+ help="Manage local development infrastructure.",
36
+ no_args_is_help=True,
37
+ )
38
+ app.add_typer(local_app, name="local")
49
39
 
50
- subparsers = parser.add_subparsers(dest="command", help="Available commands")
40
+ schema_app = typer.Typer(
41
+ name="schema",
42
+ help="Show or compare dataset schemas.",
43
+ no_args_is_help=True,
44
+ )
45
+ app.add_typer(schema_app, name="schema")
51
46
 
52
- # 'local' command group
53
- local_parser = subparsers.add_parser(
54
- "local",
55
- help="Manage local development infrastructure",
56
- )
57
- local_subparsers = local_parser.add_subparsers(
58
- dest="local_command",
59
- help="Local infrastructure commands",
60
- )
61
47
 
62
- # 'local up' command
63
- up_parser = local_subparsers.add_parser(
64
- "up",
65
- help="Start Redis and MinIO containers",
66
- )
67
- up_parser.add_argument(
68
- "--redis-port",
69
- type=int,
70
- default=6379,
71
- help="Redis port (default: 6379)",
72
- )
73
- up_parser.add_argument(
74
- "--minio-port",
75
- type=int,
76
- default=9000,
77
- help="MinIO API port (default: 9000)",
78
- )
79
- up_parser.add_argument(
80
- "--minio-console-port",
81
- type=int,
82
- default=9001,
83
- help="MinIO console port (default: 9001)",
84
- )
85
- up_parser.add_argument(
86
- "--detach", "-d",
87
- action="store_true",
88
- default=True,
89
- help="Run containers in detached mode (default: True)",
90
- )
48
+ # ---------------------------------------------------------------------------
49
+ # Top-level commands
50
+ # ---------------------------------------------------------------------------
91
51
 
92
- # 'local down' command
93
- down_parser = local_subparsers.add_parser(
94
- "down",
95
- help="Stop local development containers",
96
- )
97
- down_parser.add_argument(
98
- "--volumes", "-v",
99
- action="store_true",
100
- help="Also remove volumes (deletes all data)",
101
- )
102
52
 
103
- # 'local status' command
104
- local_subparsers.add_parser(
105
- "status",
106
- help="Show status of local infrastructure",
107
- )
53
+ @app.command()
54
+ def version() -> None:
55
+ """Show version information."""
56
+ try:
57
+ from atdata import __version__
108
58
 
109
- # 'diagnose' command
110
- diagnose_parser = subparsers.add_parser(
111
- "diagnose",
112
- help="Diagnose Redis configuration and connectivity",
113
- )
114
- diagnose_parser.add_argument(
115
- "--host",
116
- default="localhost",
117
- help="Redis host (default: localhost)",
118
- )
119
- diagnose_parser.add_argument(
120
- "--port",
121
- type=int,
122
- default=6379,
123
- help="Redis port (default: 6379)",
124
- )
59
+ ver = __version__
60
+ except ImportError:
61
+ from importlib.metadata import version as pkg_version
125
62
 
126
- # 'version' command (alternative to --version flag)
127
- subparsers.add_parser(
128
- "version",
129
- help="Show version information",
130
- )
63
+ ver = pkg_version("atdata")
131
64
 
132
- args = parser.parse_args(argv)
133
-
134
- # Handle --version flag
135
- if args.version or args.command == "version":
136
- return _cmd_version()
137
-
138
- # Handle 'local' commands
139
- if args.command == "local":
140
- if args.local_command == "up":
141
- return _cmd_local_up(
142
- redis_port=args.redis_port,
143
- minio_port=args.minio_port,
144
- minio_console_port=args.minio_console_port,
145
- detach=args.detach,
146
- )
147
- elif args.local_command == "down":
148
- return _cmd_local_down(remove_volumes=args.volumes)
149
- elif args.local_command == "status":
150
- return _cmd_local_status()
151
- else:
152
- local_parser.print_help()
153
- return 1
65
+ print(f"atdata {ver}")
154
66
 
155
- # Handle 'diagnose' command
156
- if args.command == "diagnose":
157
- return _cmd_diagnose(host=args.host, port=args.port)
158
67
 
159
- # No command given
160
- parser.print_help()
161
- return 0
68
+ @app.command()
69
+ def inspect(
70
+ url: str = typer.Argument(help="Dataset URL, local path, or atmosphere URI"),
71
+ ) -> None:
72
+ """Show dataset summary (sample count, schema, shards)."""
73
+ from .inspect import inspect_dataset
162
74
 
75
+ code = inspect_dataset(url=url)
76
+ raise typer.Exit(code=code)
77
+
78
+
79
+ @app.command()
80
+ def preview(
81
+ url: str = typer.Argument(help="Dataset URL, local path, or atmosphere URI"),
82
+ limit: int = typer.Option(5, help="Number of samples to preview."),
83
+ ) -> None:
84
+ """Preview first N samples of a dataset."""
85
+ from .preview import preview_dataset
86
+
87
+ code = preview_dataset(url=url, limit=limit)
88
+ raise typer.Exit(code=code)
89
+
90
+
91
+ @app.command()
92
+ def diagnose(
93
+ host: str = typer.Option("localhost", help="Redis host."),
94
+ port: int = typer.Option(6379, help="Redis port."),
95
+ ) -> None:
96
+ """Diagnose Redis configuration and connectivity."""
97
+ from .diagnose import diagnose_redis
98
+
99
+ code = diagnose_redis(host=host, port=port)
100
+ raise typer.Exit(code=code)
163
101
 
164
- def _cmd_version() -> int:
165
- """Show version information."""
166
- try:
167
- from atdata import __version__
168
- version = __version__
169
- except ImportError:
170
- # Fallback to package metadata
171
- from importlib.metadata import version as pkg_version
172
- version = pkg_version("atdata")
173
102
 
174
- print(f"atdata {version}")
175
- return 0
103
+ # ---------------------------------------------------------------------------
104
+ # local sub-commands
105
+ # ---------------------------------------------------------------------------
176
106
 
177
107
 
178
- def _cmd_local_up(
179
- redis_port: int,
180
- minio_port: int,
181
- minio_console_port: int,
182
- detach: bool,
183
- ) -> int:
184
- """Start local development infrastructure."""
108
+ @local_app.command()
109
+ def up(
110
+ redis_port: int = typer.Option(6379, help="Redis port."),
111
+ minio_port: int = typer.Option(9000, help="MinIO API port."),
112
+ minio_console_port: int = typer.Option(9001, help="MinIO console port."),
113
+ detach: bool = typer.Option(
114
+ True, "--detach", "-d", help="Run containers in detached mode."
115
+ ),
116
+ ) -> None:
117
+ """Start Redis and MinIO containers."""
185
118
  from .local import local_up
186
- return local_up(
119
+
120
+ code = local_up(
187
121
  redis_port=redis_port,
188
122
  minio_port=minio_port,
189
123
  minio_console_port=minio_console_port,
190
124
  detach=detach,
191
125
  )
126
+ raise typer.Exit(code=code)
192
127
 
193
128
 
194
- def _cmd_local_down(remove_volumes: bool) -> int:
195
- """Stop local development infrastructure."""
129
+ @local_app.command()
130
+ def down(
131
+ volumes: bool = typer.Option(
132
+ False, "--volumes", "-v", help="Also remove volumes (deletes all data)."
133
+ ),
134
+ ) -> None:
135
+ """Stop local development containers."""
196
136
  from .local import local_down
197
- return local_down(remove_volumes=remove_volumes)
137
+
138
+ code = local_down(remove_volumes=volumes)
139
+ raise typer.Exit(code=code)
198
140
 
199
141
 
200
- def _cmd_local_status() -> int:
142
+ @local_app.command()
143
+ def status() -> None:
201
144
  """Show status of local infrastructure."""
202
145
  from .local import local_status
203
- return local_status()
204
146
 
147
+ code = local_status()
148
+ raise typer.Exit(code=code)
205
149
 
206
- def _cmd_diagnose(host: str, port: int) -> int:
207
- """Diagnose Redis configuration."""
208
- from .diagnose import diagnose_redis
209
- return diagnose_redis(host=host, port=port)
150
+
151
+ # ---------------------------------------------------------------------------
152
+ # schema sub-commands
153
+ # ---------------------------------------------------------------------------
154
+
155
+
156
+ @schema_app.command("show")
157
+ def schema_show(
158
+ dataset_ref: str = typer.Argument(
159
+ help="Dataset URL, local path, or index reference."
160
+ ),
161
+ ) -> None:
162
+ """Display dataset schema."""
163
+ from .schema import schema_show as _schema_show
164
+
165
+ code = _schema_show(dataset_ref=dataset_ref)
166
+ raise typer.Exit(code=code)
167
+
168
+
169
+ @schema_app.command("diff")
170
+ def schema_diff(
171
+ url_a: str = typer.Argument(help="First dataset URL."),
172
+ url_b: str = typer.Argument(help="Second dataset URL."),
173
+ ) -> None:
174
+ """Compare two dataset schemas."""
175
+ from .schema import schema_diff as _schema_diff
176
+
177
+ code = _schema_diff(url_a=url_a, url_b=url_b)
178
+ raise typer.Exit(code=code)
179
+
180
+
181
+ # ---------------------------------------------------------------------------
182
+ # Entrypoint
183
+ # ---------------------------------------------------------------------------
184
+
185
+
186
+ def main(argv: list[str] | None = None) -> int:
187
+ """Main entry point for the atdata CLI.
188
+
189
+ Args:
190
+ argv: Command-line arguments. If None, uses sys.argv[1:].
191
+
192
+ Returns:
193
+ Exit code (0 for success, non-zero for errors).
194
+ """
195
+ try:
196
+ if argv is not None:
197
+ app(args=argv, standalone_mode=False)
198
+ else:
199
+ app(standalone_mode=False)
200
+ return 0
201
+ except SystemExit as exc:
202
+ return exc.code if isinstance(exc.code, int) else 0
203
+ except Exception:
204
+ return 1
210
205
 
211
206
 
212
207
  if __name__ == "__main__":