atdata 0.2.2b1__py3-none-any.whl → 0.3.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atdata/.gitignore +1 -0
- atdata/__init__.py +31 -1
- atdata/_cid.py +29 -35
- atdata/_exceptions.py +168 -0
- atdata/_helpers.py +33 -17
- atdata/_hf_api.py +109 -59
- atdata/_logging.py +70 -0
- atdata/_protocols.py +74 -132
- atdata/_schema_codec.py +38 -41
- atdata/_sources.py +57 -64
- atdata/_stub_manager.py +31 -26
- atdata/_type_utils.py +47 -7
- atdata/atmosphere/__init__.py +31 -24
- atdata/atmosphere/_types.py +11 -11
- atdata/atmosphere/client.py +11 -8
- atdata/atmosphere/lens.py +27 -30
- atdata/atmosphere/records.py +34 -39
- atdata/atmosphere/schema.py +35 -31
- atdata/atmosphere/store.py +16 -20
- atdata/cli/__init__.py +163 -168
- atdata/cli/diagnose.py +12 -8
- atdata/cli/inspect.py +69 -0
- atdata/cli/local.py +5 -2
- atdata/cli/preview.py +63 -0
- atdata/cli/schema.py +109 -0
- atdata/dataset.py +678 -533
- atdata/lens.py +85 -83
- atdata/local/__init__.py +71 -0
- atdata/local/_entry.py +157 -0
- atdata/local/_index.py +940 -0
- atdata/local/_repo_legacy.py +218 -0
- atdata/local/_s3.py +349 -0
- atdata/local/_schema.py +380 -0
- atdata/manifest/__init__.py +28 -0
- atdata/manifest/_aggregates.py +156 -0
- atdata/manifest/_builder.py +163 -0
- atdata/manifest/_fields.py +154 -0
- atdata/manifest/_manifest.py +146 -0
- atdata/manifest/_query.py +150 -0
- atdata/manifest/_writer.py +74 -0
- atdata/promote.py +20 -24
- atdata/providers/__init__.py +25 -0
- atdata/providers/_base.py +140 -0
- atdata/providers/_factory.py +69 -0
- atdata/providers/_postgres.py +214 -0
- atdata/providers/_redis.py +171 -0
- atdata/providers/_sqlite.py +191 -0
- atdata/repository.py +323 -0
- atdata/testing.py +337 -0
- {atdata-0.2.2b1.dist-info → atdata-0.3.0b1.dist-info}/METADATA +5 -1
- atdata-0.3.0b1.dist-info/RECORD +54 -0
- atdata/local.py +0 -1707
- atdata-0.2.2b1.dist-info/RECORD +0 -28
- {atdata-0.2.2b1.dist-info → atdata-0.3.0b1.dist-info}/WHEEL +0 -0
- {atdata-0.2.2b1.dist-info → atdata-0.3.0b1.dist-info}/entry_points.txt +0 -0
- {atdata-0.2.2b1.dist-info → atdata-0.3.0b1.dist-info}/licenses/LICENSE +0 -0
atdata/atmosphere/records.py
CHANGED
|
@@ -19,10 +19,12 @@ from ._types import (
|
|
|
19
19
|
|
|
20
20
|
# Import for type checking only to avoid circular imports
|
|
21
21
|
from typing import TYPE_CHECKING
|
|
22
|
+
|
|
22
23
|
if TYPE_CHECKING:
|
|
23
|
-
from ..dataset import
|
|
24
|
+
from ..dataset import Dataset
|
|
25
|
+
from .._protocols import Packable
|
|
24
26
|
|
|
25
|
-
ST = TypeVar("ST", bound="
|
|
27
|
+
ST = TypeVar("ST", bound="Packable")
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
class DatasetPublisher:
|
|
@@ -31,21 +33,19 @@ class DatasetPublisher:
|
|
|
31
33
|
This class creates dataset records that reference a schema and point to
|
|
32
34
|
external storage (WebDataset URLs) or ATProto blobs.
|
|
33
35
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
... tags=["computer-vision", "training"],
|
|
48
|
-
... )
|
|
36
|
+
Examples:
|
|
37
|
+
>>> dataset = atdata.Dataset[MySample]("s3://bucket/data-{000000..000009}.tar")
|
|
38
|
+
>>>
|
|
39
|
+
>>> client = AtmosphereClient()
|
|
40
|
+
>>> client.login("handle", "password")
|
|
41
|
+
>>>
|
|
42
|
+
>>> publisher = DatasetPublisher(client)
|
|
43
|
+
>>> uri = publisher.publish(
|
|
44
|
+
... dataset,
|
|
45
|
+
... name="My Training Data",
|
|
46
|
+
... description="Training data for my model",
|
|
47
|
+
... tags=["computer-vision", "training"],
|
|
48
|
+
... )
|
|
49
49
|
"""
|
|
50
50
|
|
|
51
51
|
def __init__(self, client: AtmosphereClient):
|
|
@@ -267,19 +267,17 @@ class DatasetLoader:
|
|
|
267
267
|
from them. Note that loading a dataset requires having the corresponding
|
|
268
268
|
Python class for the sample type.
|
|
269
269
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
>>> # Get a specific dataset record
|
|
282
|
-
>>> record = loader.get("at://did:plc:abc/ac.foundation.dataset.record/xyz")
|
|
270
|
+
Examples:
|
|
271
|
+
>>> client = AtmosphereClient()
|
|
272
|
+
>>> loader = DatasetLoader(client)
|
|
273
|
+
>>>
|
|
274
|
+
>>> # List available datasets
|
|
275
|
+
>>> datasets = loader.list()
|
|
276
|
+
>>> for ds in datasets:
|
|
277
|
+
... print(ds["name"], ds["schemaRef"])
|
|
278
|
+
>>>
|
|
279
|
+
>>> # Get a specific dataset record
|
|
280
|
+
>>> record = loader.get("at://did:plc:abc/ac.foundation.dataset.record/xyz")
|
|
283
281
|
"""
|
|
284
282
|
|
|
285
283
|
def __init__(self, client: AtmosphereClient):
|
|
@@ -398,8 +396,7 @@ class DatasetLoader:
|
|
|
398
396
|
return storage.get("blobs", [])
|
|
399
397
|
elif "storageExternal" in storage_type:
|
|
400
398
|
raise ValueError(
|
|
401
|
-
"Dataset uses external URL storage, not blobs. "
|
|
402
|
-
"Use get_urls() instead."
|
|
399
|
+
"Dataset uses external URL storage, not blobs. Use get_urls() instead."
|
|
403
400
|
)
|
|
404
401
|
else:
|
|
405
402
|
raise ValueError(f"Unknown storage type: {storage_type}")
|
|
@@ -478,13 +475,11 @@ class DatasetLoader:
|
|
|
478
475
|
Raises:
|
|
479
476
|
ValueError: If no storage URLs can be resolved.
|
|
480
477
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
>>> for batch in dataset.shuffled(batch_size=32):
|
|
487
|
-
... process(batch)
|
|
478
|
+
Examples:
|
|
479
|
+
>>> loader = DatasetLoader(client)
|
|
480
|
+
>>> dataset = loader.to_dataset(uri, MySampleType)
|
|
481
|
+
>>> for batch in dataset.shuffled(batch_size=32):
|
|
482
|
+
... process(batch)
|
|
488
483
|
"""
|
|
489
484
|
# Import here to avoid circular import
|
|
490
485
|
from ..dataset import Dataset
|
atdata/atmosphere/schema.py
CHANGED
|
@@ -17,7 +17,6 @@ from ._types import (
|
|
|
17
17
|
LEXICON_NAMESPACE,
|
|
18
18
|
)
|
|
19
19
|
from .._type_utils import (
|
|
20
|
-
numpy_dtype_to_string,
|
|
21
20
|
unwrap_optional,
|
|
22
21
|
is_ndarray_type,
|
|
23
22
|
extract_ndarray_dtype,
|
|
@@ -25,10 +24,11 @@ from .._type_utils import (
|
|
|
25
24
|
|
|
26
25
|
# Import for type checking only to avoid circular imports
|
|
27
26
|
from typing import TYPE_CHECKING
|
|
27
|
+
|
|
28
28
|
if TYPE_CHECKING:
|
|
29
|
-
from ..
|
|
29
|
+
from .._protocols import Packable
|
|
30
30
|
|
|
31
|
-
ST = TypeVar("ST", bound="
|
|
31
|
+
ST = TypeVar("ST", bound="Packable")
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
class SchemaPublisher:
|
|
@@ -37,21 +37,19 @@ class SchemaPublisher:
|
|
|
37
37
|
This class introspects a PackableSample class to extract its field
|
|
38
38
|
definitions and publishes them as an ATProto schema record.
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
>>> print(uri)
|
|
54
|
-
at://did:plc:.../ac.foundation.dataset.sampleSchema/...
|
|
40
|
+
Examples:
|
|
41
|
+
>>> @atdata.packable
|
|
42
|
+
... class MySample:
|
|
43
|
+
... image: NDArray
|
|
44
|
+
... label: str
|
|
45
|
+
...
|
|
46
|
+
>>> client = AtmosphereClient()
|
|
47
|
+
>>> client.login("handle", "password")
|
|
48
|
+
>>>
|
|
49
|
+
>>> publisher = SchemaPublisher(client)
|
|
50
|
+
>>> uri = publisher.publish(MySample, version="1.0.0")
|
|
51
|
+
>>> print(uri)
|
|
52
|
+
at://did:plc:.../ac.foundation.dataset.sampleSchema/...
|
|
55
53
|
"""
|
|
56
54
|
|
|
57
55
|
def __init__(self, client: AtmosphereClient):
|
|
@@ -90,7 +88,9 @@ class SchemaPublisher:
|
|
|
90
88
|
TypeError: If a field type is not supported.
|
|
91
89
|
"""
|
|
92
90
|
if not is_dataclass(sample_type):
|
|
93
|
-
raise ValueError(
|
|
91
|
+
raise ValueError(
|
|
92
|
+
f"{sample_type.__name__} must be a dataclass (use @packable)"
|
|
93
|
+
)
|
|
94
94
|
|
|
95
95
|
# Build the schema record
|
|
96
96
|
schema_record = self._build_schema_record(
|
|
@@ -155,12 +155,18 @@ class SchemaPublisher:
|
|
|
155
155
|
return FieldType(kind="primitive", primitive="bytes")
|
|
156
156
|
|
|
157
157
|
if is_ndarray_type(python_type):
|
|
158
|
-
return FieldType(
|
|
158
|
+
return FieldType(
|
|
159
|
+
kind="ndarray", dtype=extract_ndarray_dtype(python_type), shape=None
|
|
160
|
+
)
|
|
159
161
|
|
|
160
162
|
origin = get_origin(python_type)
|
|
161
163
|
if origin is list:
|
|
162
164
|
args = get_args(python_type)
|
|
163
|
-
items =
|
|
165
|
+
items = (
|
|
166
|
+
self._python_type_to_field_type(args[0])
|
|
167
|
+
if args
|
|
168
|
+
else FieldType(kind="primitive", primitive="str")
|
|
169
|
+
)
|
|
164
170
|
return FieldType(kind="array", items=items)
|
|
165
171
|
|
|
166
172
|
if is_dataclass(python_type):
|
|
@@ -178,16 +184,14 @@ class SchemaLoader:
|
|
|
178
184
|
This class fetches schema records from ATProto and can list available
|
|
179
185
|
schemas from a repository.
|
|
180
186
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
>>> print(schema["name"])
|
|
190
|
-
'MySample'
|
|
187
|
+
Examples:
|
|
188
|
+
>>> client = AtmosphereClient()
|
|
189
|
+
>>> client.login("handle", "password")
|
|
190
|
+
>>>
|
|
191
|
+
>>> loader = SchemaLoader(client)
|
|
192
|
+
>>> schema = loader.get("at://did:plc:.../ac.foundation.dataset.sampleSchema/...")
|
|
193
|
+
>>> print(schema["name"])
|
|
194
|
+
'MySample'
|
|
191
195
|
"""
|
|
192
196
|
|
|
193
197
|
def __init__(self, client: AtmosphereClient):
|
atdata/atmosphere/store.py
CHANGED
|
@@ -6,23 +6,20 @@ protocol that stores dataset shards as ATProto blobs in a Personal Data Server.
|
|
|
6
6
|
This enables fully decentralized dataset storage where both metadata (records)
|
|
7
7
|
and data (blobs) live on the AT Protocol network.
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
>>> print(urls)
|
|
20
|
-
['at://did:plc:.../blob/bafyrei...', ...]
|
|
9
|
+
Examples:
|
|
10
|
+
>>> from atdata.atmosphere import AtmosphereClient, PDSBlobStore
|
|
11
|
+
>>>
|
|
12
|
+
>>> client = AtmosphereClient()
|
|
13
|
+
>>> client.login("handle.bsky.social", "app-password")
|
|
14
|
+
>>>
|
|
15
|
+
>>> store = PDSBlobStore(client)
|
|
16
|
+
>>> urls = store.write_shards(dataset, prefix="mnist/v1")
|
|
17
|
+
>>> print(urls)
|
|
18
|
+
['at://did:plc:.../blob/bafyrei...', ...]
|
|
21
19
|
"""
|
|
22
20
|
|
|
23
21
|
from __future__ import annotations
|
|
24
22
|
|
|
25
|
-
import io
|
|
26
23
|
import tempfile
|
|
27
24
|
from dataclasses import dataclass
|
|
28
25
|
from typing import TYPE_CHECKING, Any
|
|
@@ -31,6 +28,7 @@ import webdataset as wds
|
|
|
31
28
|
|
|
32
29
|
if TYPE_CHECKING:
|
|
33
30
|
from ..dataset import Dataset
|
|
31
|
+
from .._sources import BlobSource
|
|
34
32
|
from .client import AtmosphereClient
|
|
35
33
|
|
|
36
34
|
|
|
@@ -48,13 +46,11 @@ class PDSBlobStore:
|
|
|
48
46
|
Attributes:
|
|
49
47
|
client: Authenticated AtmosphereClient instance.
|
|
50
48
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
>>> # Returns AT URIs like:
|
|
57
|
-
>>> # ['at://did:plc:abc/blob/bafyrei...', ...]
|
|
49
|
+
Examples:
|
|
50
|
+
>>> store = PDSBlobStore(client)
|
|
51
|
+
>>> urls = store.write_shards(dataset, prefix="training/v1")
|
|
52
|
+
>>> # Returns AT URIs like:
|
|
53
|
+
>>> # ['at://did:plc:abc/blob/bafyrei...', ...]
|
|
58
54
|
"""
|
|
59
55
|
|
|
60
56
|
client: "AtmosphereClient"
|
atdata/cli/__init__.py
CHANGED
|
@@ -1,212 +1,207 @@
|
|
|
1
1
|
"""Command-line interface for atdata.
|
|
2
2
|
|
|
3
|
-
This module provides CLI commands for managing local development infrastructure
|
|
4
|
-
and diagnosing configuration issues.
|
|
3
|
+
This module provides CLI commands for managing local development infrastructure,
|
|
4
|
+
inspecting datasets, and diagnosing configuration issues.
|
|
5
5
|
|
|
6
6
|
Commands:
|
|
7
|
-
atdata local up
|
|
8
|
-
atdata local down
|
|
9
|
-
atdata
|
|
10
|
-
atdata
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
Local infrastructure ready.
|
|
17
|
-
|
|
18
|
-
$ atdata diagnose
|
|
19
|
-
Checking Redis configuration...
|
|
20
|
-
✓ Redis connected
|
|
21
|
-
✓ Persistence enabled (AOF)
|
|
22
|
-
✓ Memory policy: noeviction
|
|
7
|
+
atdata local up Start Redis and MinIO containers for local development
|
|
8
|
+
atdata local down Stop local development containers
|
|
9
|
+
atdata local status Show status of local infrastructure
|
|
10
|
+
atdata diagnose Check Redis configuration and connectivity
|
|
11
|
+
atdata inspect Show dataset summary information
|
|
12
|
+
atdata schema show Display dataset schema
|
|
13
|
+
atdata schema diff Compare two dataset schemas
|
|
14
|
+
atdata preview Preview first N samples of a dataset
|
|
15
|
+
atdata version Show version information
|
|
23
16
|
"""
|
|
24
17
|
|
|
25
|
-
import argparse
|
|
26
18
|
import sys
|
|
27
|
-
from typing import Sequence
|
|
28
19
|
|
|
20
|
+
import typer
|
|
29
21
|
|
|
30
|
-
|
|
31
|
-
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# App hierarchy
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
32
25
|
|
|
33
|
-
|
|
34
|
-
|
|
26
|
+
app = typer.Typer(
|
|
27
|
+
name="atdata",
|
|
28
|
+
help="A loose federation of distributed, typed datasets.",
|
|
29
|
+
add_completion=False,
|
|
30
|
+
no_args_is_help=True,
|
|
31
|
+
)
|
|
35
32
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
""
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
43
|
-
)
|
|
44
|
-
parser.add_argument(
|
|
45
|
-
"--version", "-v",
|
|
46
|
-
action="store_true",
|
|
47
|
-
help="Show version information",
|
|
48
|
-
)
|
|
33
|
+
local_app = typer.Typer(
|
|
34
|
+
name="local",
|
|
35
|
+
help="Manage local development infrastructure.",
|
|
36
|
+
no_args_is_help=True,
|
|
37
|
+
)
|
|
38
|
+
app.add_typer(local_app, name="local")
|
|
49
39
|
|
|
50
|
-
|
|
40
|
+
schema_app = typer.Typer(
|
|
41
|
+
name="schema",
|
|
42
|
+
help="Show or compare dataset schemas.",
|
|
43
|
+
no_args_is_help=True,
|
|
44
|
+
)
|
|
45
|
+
app.add_typer(schema_app, name="schema")
|
|
51
46
|
|
|
52
|
-
# 'local' command group
|
|
53
|
-
local_parser = subparsers.add_parser(
|
|
54
|
-
"local",
|
|
55
|
-
help="Manage local development infrastructure",
|
|
56
|
-
)
|
|
57
|
-
local_subparsers = local_parser.add_subparsers(
|
|
58
|
-
dest="local_command",
|
|
59
|
-
help="Local infrastructure commands",
|
|
60
|
-
)
|
|
61
47
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
help="Start Redis and MinIO containers",
|
|
66
|
-
)
|
|
67
|
-
up_parser.add_argument(
|
|
68
|
-
"--redis-port",
|
|
69
|
-
type=int,
|
|
70
|
-
default=6379,
|
|
71
|
-
help="Redis port (default: 6379)",
|
|
72
|
-
)
|
|
73
|
-
up_parser.add_argument(
|
|
74
|
-
"--minio-port",
|
|
75
|
-
type=int,
|
|
76
|
-
default=9000,
|
|
77
|
-
help="MinIO API port (default: 9000)",
|
|
78
|
-
)
|
|
79
|
-
up_parser.add_argument(
|
|
80
|
-
"--minio-console-port",
|
|
81
|
-
type=int,
|
|
82
|
-
default=9001,
|
|
83
|
-
help="MinIO console port (default: 9001)",
|
|
84
|
-
)
|
|
85
|
-
up_parser.add_argument(
|
|
86
|
-
"--detach", "-d",
|
|
87
|
-
action="store_true",
|
|
88
|
-
default=True,
|
|
89
|
-
help="Run containers in detached mode (default: True)",
|
|
90
|
-
)
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
# Top-level commands
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
91
51
|
|
|
92
|
-
# 'local down' command
|
|
93
|
-
down_parser = local_subparsers.add_parser(
|
|
94
|
-
"down",
|
|
95
|
-
help="Stop local development containers",
|
|
96
|
-
)
|
|
97
|
-
down_parser.add_argument(
|
|
98
|
-
"--volumes", "-v",
|
|
99
|
-
action="store_true",
|
|
100
|
-
help="Also remove volumes (deletes all data)",
|
|
101
|
-
)
|
|
102
52
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
53
|
+
@app.command()
|
|
54
|
+
def version() -> None:
|
|
55
|
+
"""Show version information."""
|
|
56
|
+
try:
|
|
57
|
+
from atdata import __version__
|
|
108
58
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
help="Diagnose Redis configuration and connectivity",
|
|
113
|
-
)
|
|
114
|
-
diagnose_parser.add_argument(
|
|
115
|
-
"--host",
|
|
116
|
-
default="localhost",
|
|
117
|
-
help="Redis host (default: localhost)",
|
|
118
|
-
)
|
|
119
|
-
diagnose_parser.add_argument(
|
|
120
|
-
"--port",
|
|
121
|
-
type=int,
|
|
122
|
-
default=6379,
|
|
123
|
-
help="Redis port (default: 6379)",
|
|
124
|
-
)
|
|
59
|
+
ver = __version__
|
|
60
|
+
except ImportError:
|
|
61
|
+
from importlib.metadata import version as pkg_version
|
|
125
62
|
|
|
126
|
-
|
|
127
|
-
subparsers.add_parser(
|
|
128
|
-
"version",
|
|
129
|
-
help="Show version information",
|
|
130
|
-
)
|
|
63
|
+
ver = pkg_version("atdata")
|
|
131
64
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
# Handle --version flag
|
|
135
|
-
if args.version or args.command == "version":
|
|
136
|
-
return _cmd_version()
|
|
137
|
-
|
|
138
|
-
# Handle 'local' commands
|
|
139
|
-
if args.command == "local":
|
|
140
|
-
if args.local_command == "up":
|
|
141
|
-
return _cmd_local_up(
|
|
142
|
-
redis_port=args.redis_port,
|
|
143
|
-
minio_port=args.minio_port,
|
|
144
|
-
minio_console_port=args.minio_console_port,
|
|
145
|
-
detach=args.detach,
|
|
146
|
-
)
|
|
147
|
-
elif args.local_command == "down":
|
|
148
|
-
return _cmd_local_down(remove_volumes=args.volumes)
|
|
149
|
-
elif args.local_command == "status":
|
|
150
|
-
return _cmd_local_status()
|
|
151
|
-
else:
|
|
152
|
-
local_parser.print_help()
|
|
153
|
-
return 1
|
|
65
|
+
print(f"atdata {ver}")
|
|
154
66
|
|
|
155
|
-
# Handle 'diagnose' command
|
|
156
|
-
if args.command == "diagnose":
|
|
157
|
-
return _cmd_diagnose(host=args.host, port=args.port)
|
|
158
67
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
68
|
+
@app.command()
|
|
69
|
+
def inspect(
|
|
70
|
+
url: str = typer.Argument(help="Dataset URL, local path, or atmosphere URI"),
|
|
71
|
+
) -> None:
|
|
72
|
+
"""Show dataset summary (sample count, schema, shards)."""
|
|
73
|
+
from .inspect import inspect_dataset
|
|
162
74
|
|
|
75
|
+
code = inspect_dataset(url=url)
|
|
76
|
+
raise typer.Exit(code=code)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@app.command()
|
|
80
|
+
def preview(
|
|
81
|
+
url: str = typer.Argument(help="Dataset URL, local path, or atmosphere URI"),
|
|
82
|
+
limit: int = typer.Option(5, help="Number of samples to preview."),
|
|
83
|
+
) -> None:
|
|
84
|
+
"""Preview first N samples of a dataset."""
|
|
85
|
+
from .preview import preview_dataset
|
|
86
|
+
|
|
87
|
+
code = preview_dataset(url=url, limit=limit)
|
|
88
|
+
raise typer.Exit(code=code)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@app.command()
|
|
92
|
+
def diagnose(
|
|
93
|
+
host: str = typer.Option("localhost", help="Redis host."),
|
|
94
|
+
port: int = typer.Option(6379, help="Redis port."),
|
|
95
|
+
) -> None:
|
|
96
|
+
"""Diagnose Redis configuration and connectivity."""
|
|
97
|
+
from .diagnose import diagnose_redis
|
|
98
|
+
|
|
99
|
+
code = diagnose_redis(host=host, port=port)
|
|
100
|
+
raise typer.Exit(code=code)
|
|
163
101
|
|
|
164
|
-
def _cmd_version() -> int:
|
|
165
|
-
"""Show version information."""
|
|
166
|
-
try:
|
|
167
|
-
from atdata import __version__
|
|
168
|
-
version = __version__
|
|
169
|
-
except ImportError:
|
|
170
|
-
# Fallback to package metadata
|
|
171
|
-
from importlib.metadata import version as pkg_version
|
|
172
|
-
version = pkg_version("atdata")
|
|
173
102
|
|
|
174
|
-
|
|
175
|
-
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
# local sub-commands
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
176
106
|
|
|
177
107
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
108
|
+
@local_app.command()
|
|
109
|
+
def up(
|
|
110
|
+
redis_port: int = typer.Option(6379, help="Redis port."),
|
|
111
|
+
minio_port: int = typer.Option(9000, help="MinIO API port."),
|
|
112
|
+
minio_console_port: int = typer.Option(9001, help="MinIO console port."),
|
|
113
|
+
detach: bool = typer.Option(
|
|
114
|
+
True, "--detach", "-d", help="Run containers in detached mode."
|
|
115
|
+
),
|
|
116
|
+
) -> None:
|
|
117
|
+
"""Start Redis and MinIO containers."""
|
|
185
118
|
from .local import local_up
|
|
186
|
-
|
|
119
|
+
|
|
120
|
+
code = local_up(
|
|
187
121
|
redis_port=redis_port,
|
|
188
122
|
minio_port=minio_port,
|
|
189
123
|
minio_console_port=minio_console_port,
|
|
190
124
|
detach=detach,
|
|
191
125
|
)
|
|
126
|
+
raise typer.Exit(code=code)
|
|
192
127
|
|
|
193
128
|
|
|
194
|
-
|
|
195
|
-
|
|
129
|
+
@local_app.command()
|
|
130
|
+
def down(
|
|
131
|
+
volumes: bool = typer.Option(
|
|
132
|
+
False, "--volumes", "-v", help="Also remove volumes (deletes all data)."
|
|
133
|
+
),
|
|
134
|
+
) -> None:
|
|
135
|
+
"""Stop local development containers."""
|
|
196
136
|
from .local import local_down
|
|
197
|
-
|
|
137
|
+
|
|
138
|
+
code = local_down(remove_volumes=volumes)
|
|
139
|
+
raise typer.Exit(code=code)
|
|
198
140
|
|
|
199
141
|
|
|
200
|
-
|
|
142
|
+
@local_app.command()
|
|
143
|
+
def status() -> None:
|
|
201
144
|
"""Show status of local infrastructure."""
|
|
202
145
|
from .local import local_status
|
|
203
|
-
return local_status()
|
|
204
146
|
|
|
147
|
+
code = local_status()
|
|
148
|
+
raise typer.Exit(code=code)
|
|
205
149
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
150
|
+
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
# schema sub-commands
|
|
153
|
+
# ---------------------------------------------------------------------------
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@schema_app.command("show")
|
|
157
|
+
def schema_show(
|
|
158
|
+
dataset_ref: str = typer.Argument(
|
|
159
|
+
help="Dataset URL, local path, or index reference."
|
|
160
|
+
),
|
|
161
|
+
) -> None:
|
|
162
|
+
"""Display dataset schema."""
|
|
163
|
+
from .schema import schema_show as _schema_show
|
|
164
|
+
|
|
165
|
+
code = _schema_show(dataset_ref=dataset_ref)
|
|
166
|
+
raise typer.Exit(code=code)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@schema_app.command("diff")
|
|
170
|
+
def schema_diff(
|
|
171
|
+
url_a: str = typer.Argument(help="First dataset URL."),
|
|
172
|
+
url_b: str = typer.Argument(help="Second dataset URL."),
|
|
173
|
+
) -> None:
|
|
174
|
+
"""Compare two dataset schemas."""
|
|
175
|
+
from .schema import schema_diff as _schema_diff
|
|
176
|
+
|
|
177
|
+
code = _schema_diff(url_a=url_a, url_b=url_b)
|
|
178
|
+
raise typer.Exit(code=code)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# ---------------------------------------------------------------------------
|
|
182
|
+
# Entrypoint
|
|
183
|
+
# ---------------------------------------------------------------------------
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def main(argv: list[str] | None = None) -> int:
|
|
187
|
+
"""Main entry point for the atdata CLI.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
argv: Command-line arguments. If None, uses sys.argv[1:].
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Exit code (0 for success, non-zero for errors).
|
|
194
|
+
"""
|
|
195
|
+
try:
|
|
196
|
+
if argv is not None:
|
|
197
|
+
app(args=argv, standalone_mode=False)
|
|
198
|
+
else:
|
|
199
|
+
app(standalone_mode=False)
|
|
200
|
+
return 0
|
|
201
|
+
except SystemExit as exc:
|
|
202
|
+
return exc.code if isinstance(exc.code, int) else 0
|
|
203
|
+
except Exception:
|
|
204
|
+
return 1
|
|
210
205
|
|
|
211
206
|
|
|
212
207
|
if __name__ == "__main__":
|