atdata 0.2.2b1__py3-none-any.whl → 0.2.3b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,7 +17,6 @@ from ._types import (
17
17
  LEXICON_NAMESPACE,
18
18
  )
19
19
  from .._type_utils import (
20
- numpy_dtype_to_string,
21
20
  unwrap_optional,
22
21
  is_ndarray_type,
23
22
  extract_ndarray_dtype,
@@ -25,6 +24,7 @@ from .._type_utils import (
25
24
 
26
25
  # Import for type checking only to avoid circular imports
27
26
  from typing import TYPE_CHECKING
27
+
28
28
  if TYPE_CHECKING:
29
29
  from ..dataset import PackableSample
30
30
 
@@ -37,21 +37,19 @@ class SchemaPublisher:
37
37
  This class introspects a PackableSample class to extract its field
38
38
  definitions and publishes them as an ATProto schema record.
39
39
 
40
- Example:
41
- ::
42
-
43
- >>> @atdata.packable
44
- ... class MySample:
45
- ... image: NDArray
46
- ... label: str
47
- ...
48
- >>> client = AtmosphereClient()
49
- >>> client.login("handle", "password")
50
- >>>
51
- >>> publisher = SchemaPublisher(client)
52
- >>> uri = publisher.publish(MySample, version="1.0.0")
53
- >>> print(uri)
54
- at://did:plc:.../ac.foundation.dataset.sampleSchema/...
40
+ Examples:
41
+ >>> @atdata.packable
42
+ ... class MySample:
43
+ ... image: NDArray
44
+ ... label: str
45
+ ...
46
+ >>> client = AtmosphereClient()
47
+ >>> client.login("handle", "password")
48
+ >>>
49
+ >>> publisher = SchemaPublisher(client)
50
+ >>> uri = publisher.publish(MySample, version="1.0.0")
51
+ >>> print(uri)
52
+ at://did:plc:.../ac.foundation.dataset.sampleSchema/...
55
53
  """
56
54
 
57
55
  def __init__(self, client: AtmosphereClient):
@@ -90,7 +88,9 @@ class SchemaPublisher:
90
88
  TypeError: If a field type is not supported.
91
89
  """
92
90
  if not is_dataclass(sample_type):
93
- raise ValueError(f"{sample_type.__name__} must be a dataclass (use @packable)")
91
+ raise ValueError(
92
+ f"{sample_type.__name__} must be a dataclass (use @packable)"
93
+ )
94
94
 
95
95
  # Build the schema record
96
96
  schema_record = self._build_schema_record(
@@ -155,12 +155,18 @@ class SchemaPublisher:
155
155
  return FieldType(kind="primitive", primitive="bytes")
156
156
 
157
157
  if is_ndarray_type(python_type):
158
- return FieldType(kind="ndarray", dtype=extract_ndarray_dtype(python_type), shape=None)
158
+ return FieldType(
159
+ kind="ndarray", dtype=extract_ndarray_dtype(python_type), shape=None
160
+ )
159
161
 
160
162
  origin = get_origin(python_type)
161
163
  if origin is list:
162
164
  args = get_args(python_type)
163
- items = self._python_type_to_field_type(args[0]) if args else FieldType(kind="primitive", primitive="str")
165
+ items = (
166
+ self._python_type_to_field_type(args[0])
167
+ if args
168
+ else FieldType(kind="primitive", primitive="str")
169
+ )
164
170
  return FieldType(kind="array", items=items)
165
171
 
166
172
  if is_dataclass(python_type):
@@ -178,16 +184,14 @@ class SchemaLoader:
178
184
  This class fetches schema records from ATProto and can list available
179
185
  schemas from a repository.
180
186
 
181
- Example:
182
- ::
183
-
184
- >>> client = AtmosphereClient()
185
- >>> client.login("handle", "password")
186
- >>>
187
- >>> loader = SchemaLoader(client)
188
- >>> schema = loader.get("at://did:plc:.../ac.foundation.dataset.sampleSchema/...")
189
- >>> print(schema["name"])
190
- 'MySample'
187
+ Examples:
188
+ >>> client = AtmosphereClient()
189
+ >>> client.login("handle", "password")
190
+ >>>
191
+ >>> loader = SchemaLoader(client)
192
+ >>> schema = loader.get("at://did:plc:.../ac.foundation.dataset.sampleSchema/...")
193
+ >>> print(schema["name"])
194
+ 'MySample'
191
195
  """
192
196
 
193
197
  def __init__(self, client: AtmosphereClient):
@@ -6,23 +6,20 @@ protocol that stores dataset shards as ATProto blobs in a Personal Data Server.
6
6
  This enables fully decentralized dataset storage where both metadata (records)
7
7
  and data (blobs) live on the AT Protocol network.
8
8
 
9
- Example:
10
- ::
11
-
12
- >>> from atdata.atmosphere import AtmosphereClient, PDSBlobStore
13
- >>>
14
- >>> client = AtmosphereClient()
15
- >>> client.login("handle.bsky.social", "app-password")
16
- >>>
17
- >>> store = PDSBlobStore(client)
18
- >>> urls = store.write_shards(dataset, prefix="mnist/v1")
19
- >>> print(urls)
20
- ['at://did:plc:.../blob/bafyrei...', ...]
9
+ Examples:
10
+ >>> from atdata.atmosphere import AtmosphereClient, PDSBlobStore
11
+ >>>
12
+ >>> client = AtmosphereClient()
13
+ >>> client.login("handle.bsky.social", "app-password")
14
+ >>>
15
+ >>> store = PDSBlobStore(client)
16
+ >>> urls = store.write_shards(dataset, prefix="mnist/v1")
17
+ >>> print(urls)
18
+ ['at://did:plc:.../blob/bafyrei...', ...]
21
19
  """
22
20
 
23
21
  from __future__ import annotations
24
22
 
25
- import io
26
23
  import tempfile
27
24
  from dataclasses import dataclass
28
25
  from typing import TYPE_CHECKING, Any
@@ -31,6 +28,7 @@ import webdataset as wds
31
28
 
32
29
  if TYPE_CHECKING:
33
30
  from ..dataset import Dataset
31
+ from .._sources import BlobSource
34
32
  from .client import AtmosphereClient
35
33
 
36
34
 
@@ -48,13 +46,11 @@ class PDSBlobStore:
48
46
  Attributes:
49
47
  client: Authenticated AtmosphereClient instance.
50
48
 
51
- Example:
52
- ::
53
-
54
- >>> store = PDSBlobStore(client)
55
- >>> urls = store.write_shards(dataset, prefix="training/v1")
56
- >>> # Returns AT URIs like:
57
- >>> # ['at://did:plc:abc/blob/bafyrei...', ...]
49
+ Examples:
50
+ >>> store = PDSBlobStore(client)
51
+ >>> urls = store.write_shards(dataset, prefix="training/v1")
52
+ >>> # Returns AT URIs like:
53
+ >>> # ['at://did:plc:abc/blob/bafyrei...', ...]
58
54
  """
59
55
 
60
56
  client: "AtmosphereClient"
atdata/cli/__init__.py CHANGED
@@ -42,7 +42,8 @@ def main(argv: Sequence[str] | None = None) -> int:
42
42
  formatter_class=argparse.RawDescriptionHelpFormatter,
43
43
  )
44
44
  parser.add_argument(
45
- "--version", "-v",
45
+ "--version",
46
+ "-v",
46
47
  action="store_true",
47
48
  help="Show version information",
48
49
  )
@@ -83,7 +84,8 @@ def main(argv: Sequence[str] | None = None) -> int:
83
84
  help="MinIO console port (default: 9001)",
84
85
  )
85
86
  up_parser.add_argument(
86
- "--detach", "-d",
87
+ "--detach",
88
+ "-d",
87
89
  action="store_true",
88
90
  default=True,
89
91
  help="Run containers in detached mode (default: True)",
@@ -95,7 +97,8 @@ def main(argv: Sequence[str] | None = None) -> int:
95
97
  help="Stop local development containers",
96
98
  )
97
99
  down_parser.add_argument(
98
- "--volumes", "-v",
100
+ "--volumes",
101
+ "-v",
99
102
  action="store_true",
100
103
  help="Also remove volumes (deletes all data)",
101
104
  )
@@ -165,10 +168,12 @@ def _cmd_version() -> int:
165
168
  """Show version information."""
166
169
  try:
167
170
  from atdata import __version__
171
+
168
172
  version = __version__
169
173
  except ImportError:
170
174
  # Fallback to package metadata
171
175
  from importlib.metadata import version as pkg_version
176
+
172
177
  version = pkg_version("atdata")
173
178
 
174
179
  print(f"atdata {version}")
@@ -183,6 +188,7 @@ def _cmd_local_up(
183
188
  ) -> int:
184
189
  """Start local development infrastructure."""
185
190
  from .local import local_up
191
+
186
192
  return local_up(
187
193
  redis_port=redis_port,
188
194
  minio_port=minio_port,
@@ -194,18 +200,21 @@ def _cmd_local_up(
194
200
  def _cmd_local_down(remove_volumes: bool) -> int:
195
201
  """Stop local development infrastructure."""
196
202
  from .local import local_down
203
+
197
204
  return local_down(remove_volumes=remove_volumes)
198
205
 
199
206
 
200
207
  def _cmd_local_status() -> int:
201
208
  """Show status of local infrastructure."""
202
209
  from .local import local_status
210
+
203
211
  return local_status()
204
212
 
205
213
 
206
214
  def _cmd_diagnose(host: str, port: int) -> int:
207
215
  """Diagnose Redis configuration."""
208
216
  from .diagnose import diagnose_redis
217
+
209
218
  return diagnose_redis(host=host, port=port)
210
219
 
211
220
 
atdata/cli/diagnose.py CHANGED
@@ -5,7 +5,6 @@ and other infrastructure components.
5
5
  """
6
6
 
7
7
  import sys
8
- from typing import Any
9
8
 
10
9
 
11
10
  def _print_status(label: str, ok: bool, detail: str = "") -> None:
@@ -41,6 +40,7 @@ def diagnose_redis(host: str = "localhost", port: int = 6379) -> int:
41
40
  # Try to connect
42
41
  try:
43
42
  from redis import Redis
43
+
44
44
  redis = Redis(host=host, port=port, socket_connect_timeout=5)
45
45
  redis.ping()
46
46
  _print_status("Connection", True, "connected")
@@ -70,7 +70,7 @@ def diagnose_redis(host: str = "localhost", port: int = 6379) -> int:
70
70
  _print_status(
71
71
  "AOF Persistence",
72
72
  aof_ok,
73
- "enabled" if aof_ok else "DISABLED - data may be lost on restart!"
73
+ "enabled" if aof_ok else "DISABLED - data may be lost on restart!",
74
74
  )
75
75
  if not aof_ok:
76
76
  issues_found = True
@@ -85,7 +85,7 @@ def diagnose_redis(host: str = "localhost", port: int = 6379) -> int:
85
85
  _print_status(
86
86
  "RDB Persistence",
87
87
  rdb_ok,
88
- f"configured ({save_config})" if rdb_ok else "DISABLED"
88
+ f"configured ({save_config})" if rdb_ok else "DISABLED",
89
89
  )
90
90
  # RDB disabled is only a warning if AOF is enabled
91
91
  except Exception as e:
@@ -95,7 +95,13 @@ def diagnose_redis(host: str = "localhost", port: int = 6379) -> int:
95
95
  try:
96
96
  policy = redis.config_get("maxmemory-policy").get("maxmemory-policy", "unknown")
97
97
  # Safe policies that won't evict index data
98
- safe_policies = {"noeviction", "volatile-lru", "volatile-lfu", "volatile-ttl", "volatile-random"}
98
+ safe_policies = {
99
+ "noeviction",
100
+ "volatile-lru",
101
+ "volatile-lfu",
102
+ "volatile-ttl",
103
+ "volatile-random",
104
+ }
99
105
  policy_ok = policy in safe_policies
100
106
 
101
107
  if policy_ok:
@@ -104,7 +110,7 @@ def diagnose_redis(host: str = "localhost", port: int = 6379) -> int:
104
110
  _print_status(
105
111
  "Memory Policy",
106
112
  False,
107
- f"{policy} - may evict index data! Use 'noeviction' or 'volatile-*'"
113
+ f"{policy} - may evict index data! Use 'noeviction' or 'volatile-*'",
108
114
  )
109
115
  issues_found = True
110
116
  except Exception as e:
@@ -141,9 +147,7 @@ def diagnose_redis(host: str = "localhost", port: int = 6379) -> int:
141
147
  for key in redis.scan_iter(match="LocalSchema:*", count=100):
142
148
  schema_count += 1
143
149
  _print_status(
144
- "atdata Keys",
145
- True,
146
- f"{dataset_count} datasets, {schema_count} schemas"
150
+ "atdata Keys", True, f"{dataset_count} datasets, {schema_count} schemas"
147
151
  )
148
152
  except Exception as e:
149
153
  _print_status("atdata Keys", False, f"check failed: {e}")
atdata/cli/local.py CHANGED
@@ -144,7 +144,9 @@ def _run_compose(
144
144
  elif shutil.which("docker-compose"):
145
145
  base_cmd = ["docker-compose"]
146
146
  else:
147
- raise RuntimeError("Neither 'docker compose' nor 'docker-compose' available")
147
+ raise RuntimeError(
148
+ "Neither 'docker compose' nor 'docker-compose' available"
149
+ )
148
150
  else:
149
151
  raise RuntimeError("Docker not found")
150
152
 
@@ -195,6 +197,7 @@ def local_up(
195
197
 
196
198
  # Wait a moment for containers to be healthy
197
199
  import time
200
+
198
201
  time.sleep(2)
199
202
 
200
203
  # Show status