atdata 0.3.0b1__py3-none-any.whl → 0.3.2b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. atdata/__init__.py +11 -0
  2. atdata/_cid.py +0 -21
  3. atdata/_helpers.py +12 -0
  4. atdata/_hf_api.py +46 -1
  5. atdata/_logging.py +43 -0
  6. atdata/_protocols.py +81 -182
  7. atdata/_schema_codec.py +2 -2
  8. atdata/_sources.py +24 -4
  9. atdata/_stub_manager.py +5 -25
  10. atdata/atmosphere/__init__.py +60 -21
  11. atdata/atmosphere/_lexicon_types.py +595 -0
  12. atdata/atmosphere/_types.py +73 -245
  13. atdata/atmosphere/client.py +64 -12
  14. atdata/atmosphere/lens.py +60 -53
  15. atdata/atmosphere/records.py +291 -100
  16. atdata/atmosphere/schema.py +91 -65
  17. atdata/atmosphere/store.py +68 -66
  18. atdata/cli/__init__.py +16 -16
  19. atdata/cli/diagnose.py +2 -2
  20. atdata/cli/{local.py → infra.py} +10 -10
  21. atdata/dataset.py +266 -47
  22. atdata/index/__init__.py +54 -0
  23. atdata/{local → index}/_entry.py +6 -2
  24. atdata/{local → index}/_index.py +617 -72
  25. atdata/{local → index}/_schema.py +5 -5
  26. atdata/lexicons/__init__.py +127 -0
  27. atdata/lexicons/ac.foundation.dataset.arrayFormat.json +16 -0
  28. atdata/lexicons/ac.foundation.dataset.getLatestSchema.json +78 -0
  29. atdata/lexicons/ac.foundation.dataset.lens.json +101 -0
  30. atdata/lexicons/ac.foundation.dataset.record.json +117 -0
  31. atdata/lexicons/ac.foundation.dataset.schema.json +107 -0
  32. atdata/lexicons/ac.foundation.dataset.schemaType.json +16 -0
  33. atdata/lexicons/ac.foundation.dataset.storageBlobs.json +46 -0
  34. atdata/lexicons/ac.foundation.dataset.storageExternal.json +25 -0
  35. atdata/lexicons/ac.foundation.dataset.storageHttp.json +45 -0
  36. atdata/lexicons/ac.foundation.dataset.storageS3.json +61 -0
  37. atdata/lexicons/ndarray_shim.json +16 -0
  38. atdata/local/__init__.py +12 -13
  39. atdata/local/_repo_legacy.py +3 -3
  40. atdata/manifest/__init__.py +4 -0
  41. atdata/manifest/_proxy.py +321 -0
  42. atdata/promote.py +14 -10
  43. atdata/repository.py +66 -16
  44. atdata/stores/__init__.py +23 -0
  45. atdata/stores/_disk.py +131 -0
  46. atdata/{local → stores}/_s3.py +134 -112
  47. atdata/testing.py +12 -8
  48. {atdata-0.3.0b1.dist-info → atdata-0.3.2b1.dist-info}/METADATA +2 -2
  49. atdata-0.3.2b1.dist-info/RECORD +71 -0
  50. atdata-0.3.0b1.dist-info/RECORD +0 -54
  51. {atdata-0.3.0b1.dist-info → atdata-0.3.2b1.dist-info}/WHEEL +0 -0
  52. {atdata-0.3.0b1.dist-info → atdata-0.3.2b1.dist-info}/entry_points.txt +0 -0
  53. {atdata-0.3.0b1.dist-info → atdata-0.3.2b1.dist-info}/licenses/LICENSE +0 -0
@@ -1,25 +1,19 @@
1
1
  """Schema publishing and loading for ATProto.
2
2
 
3
3
  This module provides classes for publishing PackableSample schemas to ATProto
4
- and loading them back. Schemas are published as ``ac.foundation.dataset.sampleSchema``
4
+ and loading them back. Schemas are published as ``ac.foundation.dataset.schema``
5
5
  records.
6
6
  """
7
7
 
8
8
  from dataclasses import fields, is_dataclass
9
9
  from typing import Type, TypeVar, Optional, get_type_hints, get_origin, get_args
10
10
 
11
- from .client import AtmosphereClient
12
- from ._types import (
13
- AtUri,
14
- SchemaRecord,
15
- FieldDef,
16
- FieldType,
17
- LEXICON_NAMESPACE,
18
- )
11
+ from .client import Atmosphere
12
+ from ._types import AtUri, LEXICON_NAMESPACE
13
+ from ._lexicon_types import LexSchemaRecord, JsonSchemaFormat
19
14
  from .._type_utils import (
20
15
  unwrap_optional,
21
16
  is_ndarray_type,
22
- extract_ndarray_dtype,
23
17
  )
24
18
 
25
19
  # Import for type checking only to avoid circular imports
@@ -43,20 +37,19 @@ class SchemaPublisher:
43
37
  ... image: NDArray
44
38
  ... label: str
45
39
  ...
46
- >>> client = AtmosphereClient()
47
- >>> client.login("handle", "password")
40
+ >>> atmo = Atmosphere.login("handle", "password")
48
41
  >>>
49
- >>> publisher = SchemaPublisher(client)
42
+ >>> publisher = SchemaPublisher(atmo)
50
43
  >>> uri = publisher.publish(MySample, version="1.0.0")
51
44
  >>> print(uri)
52
- at://did:plc:.../ac.foundation.dataset.sampleSchema/...
45
+ at://did:plc:.../ac.foundation.dataset.schema/...
53
46
  """
54
47
 
55
- def __init__(self, client: AtmosphereClient):
48
+ def __init__(self, client: Atmosphere):
56
49
  """Initialize the schema publisher.
57
50
 
58
51
  Args:
59
- client: Authenticated AtmosphereClient instance.
52
+ client: Authenticated Atmosphere instance.
60
53
  """
61
54
  self.client = client
62
55
 
@@ -87,27 +80,32 @@ class SchemaPublisher:
87
80
  ValueError: If sample_type is not a dataclass or client is not authenticated.
88
81
  TypeError: If a field type is not supported.
89
82
  """
83
+ from atdata._logging import log_operation
84
+
90
85
  if not is_dataclass(sample_type):
91
86
  raise ValueError(
92
87
  f"{sample_type.__name__} must be a dataclass (use @packable)"
93
88
  )
94
89
 
95
- # Build the schema record
96
- schema_record = self._build_schema_record(
97
- sample_type,
98
- name=name,
99
- version=version,
100
- description=description,
101
- metadata=metadata,
102
- )
90
+ with log_operation(
91
+ "SchemaPublisher.publish", schema=sample_type.__name__, version=version
92
+ ):
93
+ # Build the schema record
94
+ schema_record = self._build_schema_record(
95
+ sample_type,
96
+ name=name,
97
+ version=version,
98
+ description=description,
99
+ metadata=metadata,
100
+ )
103
101
 
104
- # Publish to ATProto
105
- return self.client.create_record(
106
- collection=f"{LEXICON_NAMESPACE}.sampleSchema",
107
- record=schema_record.to_record(),
108
- rkey=rkey,
109
- validate=False, # PDS doesn't know our lexicon
110
- )
102
+ # Publish to ATProto
103
+ return self.client.create_record(
104
+ collection=f"{LEXICON_NAMESPACE}.schema",
105
+ record=schema_record.to_record(),
106
+ rkey=rkey,
107
+ validate=False, # PDS doesn't know our lexicon
108
+ )
111
109
 
112
110
  def _build_schema_record(
113
111
  self,
@@ -117,57 +115,74 @@ class SchemaPublisher:
117
115
  version: str,
118
116
  description: Optional[str],
119
117
  metadata: Optional[dict],
120
- ) -> SchemaRecord:
121
- """Build a SchemaRecord from a PackableSample class."""
122
- field_defs = []
118
+ ) -> LexSchemaRecord:
119
+ """Build a LexSchemaRecord from a PackableSample class."""
123
120
  type_hints = get_type_hints(sample_type)
121
+ properties: dict[str, dict] = {}
122
+ required_fields: list[str] = []
123
+ has_ndarray = False
124
124
 
125
125
  for f in fields(sample_type):
126
126
  field_type = type_hints.get(f.name, f.type)
127
- field_def = self._field_to_def(f.name, field_type)
128
- field_defs.append(field_def)
129
-
130
- return SchemaRecord(
127
+ field_type, is_optional = unwrap_optional(field_type)
128
+ prop = self._python_type_to_json_schema(field_type)
129
+ properties[f.name] = prop
130
+ if not is_optional:
131
+ required_fields.append(f.name)
132
+ if is_ndarray_type(field_type):
133
+ has_ndarray = True
134
+
135
+ schema_body = {
136
+ "$schema": "http://json-schema.org/draft-07/schema#",
137
+ "type": "object",
138
+ "properties": properties,
139
+ }
140
+ if required_fields:
141
+ schema_body["required"] = required_fields
142
+
143
+ array_format_versions = None
144
+ if has_ndarray:
145
+ array_format_versions = {"ndarrayBytes": "1.0.0"}
146
+
147
+ return LexSchemaRecord(
131
148
  name=name or sample_type.__name__,
132
149
  version=version,
150
+ schema_type="jsonSchema",
151
+ schema=JsonSchemaFormat(
152
+ schema_body=schema_body,
153
+ array_format_versions=array_format_versions,
154
+ ),
133
155
  description=description,
134
- fields=field_defs,
135
156
  metadata=metadata,
136
157
  )
137
158
 
138
- def _field_to_def(self, name: str, python_type) -> FieldDef:
139
- """Convert a Python field to a FieldDef."""
140
- python_type, is_optional = unwrap_optional(python_type)
141
- field_type = self._python_type_to_field_type(python_type)
142
- return FieldDef(name=name, field_type=field_type, optional=is_optional)
143
-
144
- def _python_type_to_field_type(self, python_type) -> FieldType:
145
- """Map a Python type to a FieldType."""
159
+ def _python_type_to_json_schema(self, python_type) -> dict:
160
+ """Map a Python type to a JSON Schema property definition."""
146
161
  if python_type is str:
147
- return FieldType(kind="primitive", primitive="str")
162
+ return {"type": "string"}
148
163
  if python_type is int:
149
- return FieldType(kind="primitive", primitive="int")
164
+ return {"type": "integer"}
150
165
  if python_type is float:
151
- return FieldType(kind="primitive", primitive="float")
166
+ return {"type": "number"}
152
167
  if python_type is bool:
153
- return FieldType(kind="primitive", primitive="bool")
168
+ return {"type": "boolean"}
154
169
  if python_type is bytes:
155
- return FieldType(kind="primitive", primitive="bytes")
170
+ return {"type": "string", "format": "byte", "contentEncoding": "base64"}
156
171
 
157
172
  if is_ndarray_type(python_type):
158
- return FieldType(
159
- kind="ndarray", dtype=extract_ndarray_dtype(python_type), shape=None
160
- )
173
+ return {
174
+ "$ref": "https://foundation.ac/schemas/atdata-ndarray-bytes/1.0.0#/$defs/ndarray"
175
+ }
161
176
 
162
177
  origin = get_origin(python_type)
163
178
  if origin is list:
164
179
  args = get_args(python_type)
165
180
  items = (
166
- self._python_type_to_field_type(args[0])
181
+ self._python_type_to_json_schema(args[0])
167
182
  if args
168
- else FieldType(kind="primitive", primitive="str")
183
+ else {"type": "string"}
169
184
  )
170
- return FieldType(kind="array", items=items)
185
+ return {"type": "array", "items": items}
171
186
 
172
187
  if is_dataclass(python_type):
173
188
  raise TypeError(
@@ -185,20 +200,19 @@ class SchemaLoader:
185
200
  schemas from a repository.
186
201
 
187
202
  Examples:
188
- >>> client = AtmosphereClient()
189
- >>> client.login("handle", "password")
203
+ >>> atmo = Atmosphere.login("handle", "password")
190
204
  >>>
191
- >>> loader = SchemaLoader(client)
192
- >>> schema = loader.get("at://did:plc:.../ac.foundation.dataset.sampleSchema/...")
205
+ >>> loader = SchemaLoader(atmo)
206
+ >>> schema = loader.get("at://did:plc:.../ac.foundation.dataset.schema/...")
193
207
  >>> print(schema["name"])
194
208
  'MySample'
195
209
  """
196
210
 
197
- def __init__(self, client: AtmosphereClient):
211
+ def __init__(self, client: Atmosphere):
198
212
  """Initialize the schema loader.
199
213
 
200
214
  Args:
201
- client: AtmosphereClient instance (authentication optional for reads).
215
+ client: Atmosphere instance (authentication optional for reads).
202
216
  """
203
217
  self.client = client
204
218
 
@@ -217,7 +231,7 @@ class SchemaLoader:
217
231
  """
218
232
  record = self.client.get_record(uri)
219
233
 
220
- expected_type = f"{LEXICON_NAMESPACE}.sampleSchema"
234
+ expected_type = f"{LEXICON_NAMESPACE}.schema"
221
235
  if record.get("$type") != expected_type:
222
236
  raise ValueError(
223
237
  f"Record at {uri} is not a schema record. "
@@ -226,6 +240,18 @@ class SchemaLoader:
226
240
 
227
241
  return record
228
242
 
243
+ def get_typed(self, uri: str | AtUri) -> LexSchemaRecord:
244
+ """Fetch a schema record and return as a typed object.
245
+
246
+ Args:
247
+ uri: The AT URI of the schema record.
248
+
249
+ Returns:
250
+ LexSchemaRecord instance.
251
+ """
252
+ record = self.get(uri)
253
+ return LexSchemaRecord.from_record(record)
254
+
229
255
  def list_all(
230
256
  self,
231
257
  repo: Optional[str] = None,
@@ -7,12 +7,11 @@ This enables fully decentralized dataset storage where both metadata (records)
7
7
  and data (blobs) live on the AT Protocol network.
8
8
 
9
9
  Examples:
10
- >>> from atdata.atmosphere import AtmosphereClient, PDSBlobStore
10
+ >>> from atdata.atmosphere import Atmosphere, PDSBlobStore
11
11
  >>>
12
- >>> client = AtmosphereClient()
13
- >>> client.login("handle.bsky.social", "app-password")
12
+ >>> atmo = Atmosphere.login("handle.bsky.social", "app-password")
14
13
  >>>
15
- >>> store = PDSBlobStore(client)
14
+ >>> store = PDSBlobStore(atmo)
16
15
  >>> urls = store.write_shards(dataset, prefix="mnist/v1")
17
16
  >>> print(urls)
18
17
  ['at://did:plc:.../blob/bafyrei...', ...]
@@ -20,16 +19,38 @@ Examples:
20
19
 
21
20
  from __future__ import annotations
22
21
 
23
- import tempfile
24
22
  from dataclasses import dataclass
25
23
  from typing import TYPE_CHECKING, Any
26
24
 
27
- import webdataset as wds
25
+ #: Maximum size in bytes for a single PDS blob upload (50 MB).
26
+ PDS_BLOB_LIMIT_BYTES: int = 50_000_000
27
+
28
+ #: Maximum total dataset size in bytes for atmosphere uploads (1 GB).
29
+ PDS_TOTAL_DATASET_LIMIT_BYTES: int = 1_000_000_000
28
30
 
29
31
  if TYPE_CHECKING:
30
32
  from ..dataset import Dataset
31
33
  from .._sources import BlobSource
32
- from .client import AtmosphereClient
34
+ from .client import Atmosphere
35
+
36
+
37
+ class ShardUploadResult(list):
38
+ """Return type for ``PDSBlobStore.write_shards()``.
39
+
40
+ Extends ``list[str]`` (AT URIs) so it satisfies the ``AbstractDataStore``
41
+ protocol, while also carrying the raw blob reference dicts needed to
42
+ create ``storageBlobs`` records.
43
+
44
+ Attributes:
45
+ blob_refs: Blob reference dicts as returned by
46
+ ``Atmosphere.upload_blob()``.
47
+ """
48
+
49
+ blob_refs: list[dict]
50
+
51
+ def __init__(self, urls: list[str], blob_refs: list[dict]) -> None:
52
+ super().__init__(urls)
53
+ self.blob_refs = blob_refs
33
54
 
34
55
 
35
56
  @dataclass
@@ -44,7 +65,7 @@ class PDSBlobStore:
44
65
  to HTTP URLs for streaming.
45
66
 
46
67
  Attributes:
47
- client: Authenticated AtmosphereClient instance.
68
+ client: Authenticated Atmosphere instance.
48
69
 
49
70
  Examples:
50
71
  >>> store = PDSBlobStore(client)
@@ -53,85 +74,61 @@ class PDSBlobStore:
53
74
  >>> # ['at://did:plc:abc/blob/bafyrei...', ...]
54
75
  """
55
76
 
56
- client: "AtmosphereClient"
77
+ client: "Atmosphere"
57
78
 
58
79
  def write_shards(
59
80
  self,
60
81
  ds: "Dataset",
61
82
  *,
62
83
  prefix: str,
63
- maxcount: int = 10000,
64
- maxsize: float = 3e9,
65
84
  **kwargs: Any,
66
- ) -> list[str]:
67
- """Write dataset shards as PDS blobs.
85
+ ) -> "ShardUploadResult":
86
+ """Upload existing dataset shards as PDS blobs.
68
87
 
69
- Creates tar archives from the dataset and uploads each as a blob
70
- to the authenticated user's PDS.
88
+ Reads the tar archives already written to disk by the caller and
89
+ uploads each as a blob to the authenticated user's PDS. This
90
+ avoids re-serializing samples that have already been written.
71
91
 
72
92
  Args:
73
- ds: The Dataset to write.
74
- prefix: Logical path prefix for naming (used in shard names only).
75
- maxcount: Maximum samples per shard (default: 10000).
76
- maxsize: Maximum shard size in bytes (default: 3GB, PDS limit).
77
- **kwargs: Additional args passed to wds.ShardWriter.
93
+ ds: The Dataset whose shards to upload.
94
+ prefix: Logical path prefix (unused, kept for protocol compat).
95
+ **kwargs: Unused, kept for protocol compatibility.
78
96
 
79
97
  Returns:
80
- List of AT URIs for the written blobs, in format:
81
- ``at://{did}/blob/{cid}``
98
+ A ``ShardUploadResult`` (behaves as ``list[str]`` of AT URIs)
99
+ with a ``blob_refs`` attribute containing the raw blob reference
100
+ dicts needed for ``storageBlobs`` records.
82
101
 
83
102
  Raises:
84
103
  ValueError: If not authenticated.
85
- RuntimeError: If no shards were written.
86
-
87
- Note:
88
- PDS blobs have size limits (typically 50MB-5GB depending on PDS).
89
- Adjust maxcount/maxsize to stay within limits.
104
+ RuntimeError: If no shards are found on the dataset.
90
105
  """
91
106
  if not self.client.did:
92
107
  raise ValueError("Client must be authenticated to upload blobs")
93
108
 
94
109
  did = self.client.did
95
110
  blob_urls: list[str] = []
111
+ blob_refs: list[dict] = []
112
+
113
+ shard_paths = ds.list_shards()
114
+ if not shard_paths:
115
+ raise RuntimeError("No shards to upload")
116
+
117
+ for shard_url in shard_paths:
118
+ with open(shard_url, "rb") as f:
119
+ shard_data = f.read()
120
+
121
+ blob_ref = self.client.upload_blob(
122
+ shard_data,
123
+ mime_type="application/x-tar",
124
+ )
125
+
126
+ blob_refs.append(blob_ref)
127
+ cid = blob_ref["ref"]["$link"]
128
+ at_uri = f"at://{did}/blob/{cid}"
129
+ blob_urls.append(at_uri)
96
130
 
97
- # Write shards to temp files, upload each as blob
98
- with tempfile.TemporaryDirectory() as temp_dir:
99
- shard_pattern = f"{temp_dir}/shard-%06d.tar"
100
- written_files: list[str] = []
101
-
102
- # Track written files via custom post callback
103
- def track_file(fname: str) -> None:
104
- written_files.append(fname)
105
-
106
- with wds.writer.ShardWriter(
107
- shard_pattern,
108
- maxcount=maxcount,
109
- maxsize=maxsize,
110
- post=track_file,
111
- **kwargs,
112
- ) as sink:
113
- for sample in ds.ordered(batch_size=None):
114
- sink.write(sample.as_wds)
115
-
116
- if not written_files:
117
- raise RuntimeError("No shards written")
118
-
119
- # Upload each shard as a blob
120
- for shard_path in written_files:
121
- with open(shard_path, "rb") as f:
122
- shard_data = f.read()
123
-
124
- blob_ref = self.client.upload_blob(
125
- shard_data,
126
- mime_type="application/x-tar",
127
- )
128
-
129
- # Extract CID from blob reference
130
- cid = blob_ref["ref"]["$link"]
131
- at_uri = f"at://{did}/blob/{cid}"
132
- blob_urls.append(at_uri)
133
-
134
- return blob_urls
131
+ return ShardUploadResult(blob_urls, blob_refs)
135
132
 
136
133
  def read_url(self, url: str) -> str:
137
134
  """Resolve an AT URI blob reference to an HTTP URL.
@@ -201,4 +198,9 @@ class PDSBlobStore:
201
198
  return BlobSource(blob_refs=blob_refs)
202
199
 
203
200
 
204
- __all__ = ["PDSBlobStore"]
201
+ __all__ = [
202
+ "PDS_BLOB_LIMIT_BYTES",
203
+ "PDS_TOTAL_DATASET_LIMIT_BYTES",
204
+ "PDSBlobStore",
205
+ "ShardUploadResult",
206
+ ]
atdata/cli/__init__.py CHANGED
@@ -1,12 +1,12 @@
1
1
  """Command-line interface for atdata.
2
2
 
3
- This module provides CLI commands for managing local development infrastructure,
3
+ This module provides CLI commands for managing development infrastructure,
4
4
  inspecting datasets, and diagnosing configuration issues.
5
5
 
6
6
  Commands:
7
- atdata local up Start Redis and MinIO containers for local development
8
- atdata local down Stop local development containers
9
- atdata local status Show status of local infrastructure
7
+ atdata infra up Start Redis and MinIO containers for development
8
+ atdata infra down Stop development containers
9
+ atdata infra status Show status of infrastructure
10
10
  atdata diagnose Check Redis configuration and connectivity
11
11
  atdata inspect Show dataset summary information
12
12
  atdata schema show Display dataset schema
@@ -30,12 +30,12 @@ app = typer.Typer(
30
30
  no_args_is_help=True,
31
31
  )
32
32
 
33
- local_app = typer.Typer(
34
- name="local",
35
- help="Manage local development infrastructure.",
33
+ infra_app = typer.Typer(
34
+ name="infra",
35
+ help="Manage development infrastructure.",
36
36
  no_args_is_help=True,
37
37
  )
38
- app.add_typer(local_app, name="local")
38
+ app.add_typer(infra_app, name="infra")
39
39
 
40
40
  schema_app = typer.Typer(
41
41
  name="schema",
@@ -101,11 +101,11 @@ def diagnose(
101
101
 
102
102
 
103
103
  # ---------------------------------------------------------------------------
104
- # local sub-commands
104
+ # infra sub-commands
105
105
  # ---------------------------------------------------------------------------
106
106
 
107
107
 
108
- @local_app.command()
108
+ @infra_app.command()
109
109
  def up(
110
110
  redis_port: int = typer.Option(6379, help="Redis port."),
111
111
  minio_port: int = typer.Option(9000, help="MinIO API port."),
@@ -115,7 +115,7 @@ def up(
115
115
  ),
116
116
  ) -> None:
117
117
  """Start Redis and MinIO containers."""
118
- from .local import local_up
118
+ from .infra import local_up
119
119
 
120
120
  code = local_up(
121
121
  redis_port=redis_port,
@@ -126,23 +126,23 @@ def up(
126
126
  raise typer.Exit(code=code)
127
127
 
128
128
 
129
- @local_app.command()
129
+ @infra_app.command()
130
130
  def down(
131
131
  volumes: bool = typer.Option(
132
132
  False, "--volumes", "-v", help="Also remove volumes (deletes all data)."
133
133
  ),
134
134
  ) -> None:
135
135
  """Stop local development containers."""
136
- from .local import local_down
136
+ from .infra import local_down
137
137
 
138
138
  code = local_down(remove_volumes=volumes)
139
139
  raise typer.Exit(code=code)
140
140
 
141
141
 
142
- @local_app.command()
142
+ @infra_app.command()
143
143
  def status() -> None:
144
- """Show status of local infrastructure."""
145
- from .local import local_status
144
+ """Show status of infrastructure."""
145
+ from .infra import local_status
146
146
 
147
147
  code = local_status()
148
148
  raise typer.Exit(code=code)
atdata/cli/diagnose.py CHANGED
@@ -51,7 +51,7 @@ def diagnose_redis(host: str = "localhost", port: int = 6379) -> int:
51
51
  _print_status("Connection", False, str(e))
52
52
  print()
53
53
  print("Cannot connect to Redis. Make sure Redis is running:")
54
- print(" atdata local up")
54
+ print(" atdata infra up")
55
55
  return 1
56
56
 
57
57
  # Check Redis version
@@ -162,7 +162,7 @@ def diagnose_redis(host: str = "localhost", port: int = 6379) -> int:
162
162
  print(" maxmemory-policy noeviction")
163
163
  print()
164
164
  print(" # Or use atdata's preconfigured local setup:")
165
- print(" atdata local up")
165
+ print(" atdata infra up")
166
166
  return 1
167
167
  else:
168
168
  print("All checks passed. Redis is properly configured for atdata.")
@@ -1,6 +1,6 @@
1
- """Local infrastructure management for atdata.
1
+ """Infrastructure management for atdata.
2
2
 
3
- This module provides commands to start and stop local development infrastructure:
3
+ This module provides commands to start and stop development infrastructure:
4
4
  - Redis: For index storage and metadata
5
5
  - MinIO: S3-compatible object storage for dataset files
6
6
 
@@ -179,7 +179,7 @@ def local_up(
179
179
  if not _check_docker():
180
180
  return 1
181
181
 
182
- print("Starting atdata local infrastructure...")
182
+ print("Starting atdata infrastructure...")
183
183
 
184
184
  compose_content = _get_compose_file(redis_port, minio_port, minio_console_port)
185
185
  command = ["up"]
@@ -202,7 +202,7 @@ def local_up(
202
202
 
203
203
  # Show status
204
204
  print()
205
- print("Local infrastructure started:")
205
+ print("Infrastructure started:")
206
206
  print(f" Redis: localhost:{redis_port}")
207
207
  print(f" MinIO API: http://localhost:{minio_port}")
208
208
  print(f" MinIO Console: http://localhost:{minio_console_port}")
@@ -210,7 +210,7 @@ def local_up(
210
210
  print("MinIO credentials: minioadmin / minioadmin")
211
211
  print()
212
212
  print("Example usage:")
213
- print(" from atdata.local import Index, S3DataStore")
213
+ print(" from atdata.stores import S3DataStore")
214
214
  print(" ")
215
215
  print(" store = S3DataStore.from_credentials({")
216
216
  print(f" 'AWS_ENDPOINT': 'http://localhost:{minio_port}',")
@@ -234,7 +234,7 @@ def local_down(remove_volumes: bool = False) -> int:
234
234
  if not _check_docker():
235
235
  return 1
236
236
 
237
- print("Stopping atdata local infrastructure...")
237
+ print("Stopping atdata infrastructure...")
238
238
 
239
239
  # Use default ports for compose file (actual ports don't matter for down)
240
240
  compose_content = _get_compose_file(6379, 9000, 9001)
@@ -252,7 +252,7 @@ def local_down(remove_volumes: bool = False) -> int:
252
252
  print(f"Error: {e}", file=sys.stderr)
253
253
  return 1
254
254
 
255
- print("Local infrastructure stopped.")
255
+ print("Infrastructure stopped.")
256
256
  return 0
257
257
 
258
258
 
@@ -268,16 +268,16 @@ def local_status() -> int:
268
268
  redis_running = _container_running(REDIS_CONTAINER)
269
269
  minio_running = _container_running(MINIO_CONTAINER)
270
270
 
271
- print("atdata local infrastructure status:")
271
+ print("atdata infrastructure status:")
272
272
  print()
273
273
  print(f" Redis ({REDIS_CONTAINER}): {'running' if redis_running else 'stopped'}")
274
274
  print(f" MinIO ({MINIO_CONTAINER}): {'running' if minio_running else 'stopped'}")
275
275
 
276
276
  if redis_running or minio_running:
277
277
  print()
278
- print("To stop: atdata local down")
278
+ print("To stop: atdata infra down")
279
279
  else:
280
280
  print()
281
- print("To start: atdata local up")
281
+ print("To start: atdata infra up")
282
282
 
283
283
  return 0