atdata 0.2.2b1__py3-none-any.whl → 0.3.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. atdata/.gitignore +1 -0
  2. atdata/__init__.py +31 -1
  3. atdata/_cid.py +29 -35
  4. atdata/_exceptions.py +168 -0
  5. atdata/_helpers.py +33 -17
  6. atdata/_hf_api.py +109 -59
  7. atdata/_logging.py +70 -0
  8. atdata/_protocols.py +74 -132
  9. atdata/_schema_codec.py +38 -41
  10. atdata/_sources.py +57 -64
  11. atdata/_stub_manager.py +31 -26
  12. atdata/_type_utils.py +47 -7
  13. atdata/atmosphere/__init__.py +31 -24
  14. atdata/atmosphere/_types.py +11 -11
  15. atdata/atmosphere/client.py +11 -8
  16. atdata/atmosphere/lens.py +27 -30
  17. atdata/atmosphere/records.py +34 -39
  18. atdata/atmosphere/schema.py +35 -31
  19. atdata/atmosphere/store.py +16 -20
  20. atdata/cli/__init__.py +163 -168
  21. atdata/cli/diagnose.py +12 -8
  22. atdata/cli/inspect.py +69 -0
  23. atdata/cli/local.py +5 -2
  24. atdata/cli/preview.py +63 -0
  25. atdata/cli/schema.py +109 -0
  26. atdata/dataset.py +678 -533
  27. atdata/lens.py +85 -83
  28. atdata/local/__init__.py +71 -0
  29. atdata/local/_entry.py +157 -0
  30. atdata/local/_index.py +940 -0
  31. atdata/local/_repo_legacy.py +218 -0
  32. atdata/local/_s3.py +349 -0
  33. atdata/local/_schema.py +380 -0
  34. atdata/manifest/__init__.py +28 -0
  35. atdata/manifest/_aggregates.py +156 -0
  36. atdata/manifest/_builder.py +163 -0
  37. atdata/manifest/_fields.py +154 -0
  38. atdata/manifest/_manifest.py +146 -0
  39. atdata/manifest/_query.py +150 -0
  40. atdata/manifest/_writer.py +74 -0
  41. atdata/promote.py +20 -24
  42. atdata/providers/__init__.py +25 -0
  43. atdata/providers/_base.py +140 -0
  44. atdata/providers/_factory.py +69 -0
  45. atdata/providers/_postgres.py +214 -0
  46. atdata/providers/_redis.py +171 -0
  47. atdata/providers/_sqlite.py +191 -0
  48. atdata/repository.py +323 -0
  49. atdata/testing.py +337 -0
  50. {atdata-0.2.2b1.dist-info → atdata-0.3.0b1.dist-info}/METADATA +5 -1
  51. atdata-0.3.0b1.dist-info/RECORD +54 -0
  52. atdata/local.py +0 -1707
  53. atdata-0.2.2b1.dist-info/RECORD +0 -28
  54. {atdata-0.2.2b1.dist-info → atdata-0.3.0b1.dist-info}/WHEEL +0 -0
  55. {atdata-0.2.2b1.dist-info → atdata-0.3.0b1.dist-info}/entry_points.txt +0 -0
  56. {atdata-0.2.2b1.dist-info → atdata-0.3.0b1.dist-info}/licenses/LICENSE +0 -0
atdata/_sources.py CHANGED
@@ -13,22 +13,20 @@ The key insight is that WebDataset's tar_file_expander only needs
13
13
  By providing streams directly, we can support private repos, custom
14
14
  endpoints, and future backends like ATProto blobs.
15
15
 
16
- Example:
17
- ::
18
-
19
- >>> # Standard URL (uses WebDataset's gopen)
20
- >>> source = URLSource("https://example.com/data-{000..009}.tar")
21
- >>> ds = Dataset[MySample](source)
22
- >>>
23
- >>> # Private S3 with credentials
24
- >>> source = S3Source(
25
- ... bucket="my-bucket",
26
- ... keys=["train/shard-000.tar", "train/shard-001.tar"],
27
- ... endpoint="https://my-r2.cloudflarestorage.com",
28
- ... access_key="...",
29
- ... secret_key="...",
30
- ... )
31
- >>> ds = Dataset[MySample](source)
16
+ Examples:
17
+ >>> # Standard URL (uses WebDataset's gopen)
18
+ >>> source = URLSource("https://example.com/data-{000..009}.tar")
19
+ >>> ds = Dataset[MySample](source)
20
+ >>>
21
+ >>> # Private S3 with credentials
22
+ >>> source = S3Source(
23
+ ... bucket="my-bucket",
24
+ ... keys=["train/shard-000.tar", "train/shard-001.tar"],
25
+ ... endpoint="https://my-r2.cloudflarestorage.com",
26
+ ... access_key="...",
27
+ ... secret_key="...",
28
+ ... )
29
+ >>> ds = Dataset[MySample](source)
32
30
  """
33
31
 
34
32
  from __future__ import annotations
@@ -54,12 +52,10 @@ class URLSource:
54
52
  Attributes:
55
53
  url: URL or brace pattern for the shards.
56
54
 
57
- Example:
58
- ::
59
-
60
- >>> source = URLSource("https://example.com/train-{000..009}.tar")
61
- >>> for shard_id, stream in source.shards:
62
- ... print(f"Streaming {shard_id}")
55
+ Examples:
56
+ >>> source = URLSource("https://example.com/train-{000..009}.tar")
57
+ >>> for shard_id, stream in source.shards:
58
+ ... print(f"Streaming {shard_id}")
63
59
  """
64
60
 
65
61
  url: str
@@ -131,18 +127,16 @@ class S3Source:
131
127
  secret_key: Optional AWS secret access key.
132
128
  region: Optional AWS region (defaults to us-east-1).
133
129
 
134
- Example:
135
- ::
136
-
137
- >>> source = S3Source(
138
- ... bucket="my-datasets",
139
- ... keys=["train/shard-000.tar", "train/shard-001.tar"],
140
- ... endpoint="https://abc123.r2.cloudflarestorage.com",
141
- ... access_key="AKIAIOSFODNN7EXAMPLE",
142
- ... secret_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
143
- ... )
144
- >>> for shard_id, stream in source.shards:
145
- ... process(stream)
130
+ Examples:
131
+ >>> source = S3Source(
132
+ ... bucket="my-datasets",
133
+ ... keys=["train/shard-000.tar", "train/shard-001.tar"],
134
+ ... endpoint="https://abc123.r2.cloudflarestorage.com",
135
+ ... access_key="AKIAIOSFODNN7EXAMPLE",
136
+ ... secret_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
137
+ ... )
138
+ >>> for shard_id, stream in source.shards:
139
+ ... process(stream)
146
140
  """
147
141
 
148
142
  bucket: str
@@ -173,7 +167,9 @@ class S3Source:
173
167
  client_kwargs["region_name"] = self.region
174
168
  elif not self.endpoint:
175
169
  # Default region for AWS S3
176
- client_kwargs["region_name"] = os.environ.get("AWS_DEFAULT_REGION", "us-east-1")
170
+ client_kwargs["region_name"] = os.environ.get(
171
+ "AWS_DEFAULT_REGION", "us-east-1"
172
+ )
177
173
 
178
174
  self._client = boto3.client("s3", **client_kwargs)
179
175
  return self._client
@@ -225,7 +221,7 @@ class S3Source:
225
221
  if not shard_id.startswith(f"s3://{self.bucket}/"):
226
222
  raise KeyError(f"Shard not in this bucket: {shard_id}")
227
223
 
228
- key = shard_id[len(f"s3://{self.bucket}/"):]
224
+ key = shard_id[len(f"s3://{self.bucket}/") :]
229
225
  client = self._get_client()
230
226
  response = client.get_object(Bucket=self.bucket, Key=key)
231
227
  return response["Body"]
@@ -258,13 +254,11 @@ class S3Source:
258
254
  Raises:
259
255
  ValueError: If URLs are not valid s3:// URLs or span multiple buckets.
260
256
 
261
- Example:
262
- ::
263
-
264
- >>> source = S3Source.from_urls(
265
- ... ["s3://my-bucket/train-000.tar", "s3://my-bucket/train-001.tar"],
266
- ... endpoint="https://r2.example.com",
267
- ... )
257
+ Examples:
258
+ >>> source = S3Source.from_urls(
259
+ ... ["s3://my-bucket/train-000.tar", "s3://my-bucket/train-001.tar"],
260
+ ... endpoint="https://r2.example.com",
261
+ ... )
268
262
  """
269
263
  if not urls:
270
264
  raise ValueError("urls cannot be empty")
@@ -317,15 +311,13 @@ class S3Source:
317
311
  Returns:
318
312
  Configured S3Source.
319
313
 
320
- Example:
321
- ::
322
-
323
- >>> creds = {
324
- ... "AWS_ACCESS_KEY_ID": "...",
325
- ... "AWS_SECRET_ACCESS_KEY": "...",
326
- ... "AWS_ENDPOINT": "https://r2.example.com",
327
- ... }
328
- >>> source = S3Source.from_credentials(creds, "my-bucket", ["data.tar"])
314
+ Examples:
315
+ >>> creds = {
316
+ ... "AWS_ACCESS_KEY_ID": "...",
317
+ ... "AWS_SECRET_ACCESS_KEY": "...",
318
+ ... "AWS_ENDPOINT": "https://r2.example.com",
319
+ ... }
320
+ >>> source = S3Source.from_credentials(creds, "my-bucket", ["data.tar"])
329
321
  """
330
322
  return cls(
331
323
  bucket=bucket,
@@ -352,22 +344,22 @@ class BlobSource:
352
344
  blob_refs: List of blob reference dicts with 'did' and 'cid' keys.
353
345
  pds_endpoint: Optional PDS endpoint URL. If not provided, resolved from DID.
354
346
 
355
- Example:
356
- ::
357
-
358
- >>> source = BlobSource(
359
- ... blob_refs=[
360
- ... {"did": "did:plc:abc123", "cid": "bafyrei..."},
361
- ... {"did": "did:plc:abc123", "cid": "bafyrei..."},
362
- ... ],
363
- ... )
364
- >>> for shard_id, stream in source.shards:
365
- ... process(stream)
347
+ Examples:
348
+ >>> source = BlobSource(
349
+ ... blob_refs=[
350
+ ... {"did": "did:plc:abc123", "cid": "bafyrei..."},
351
+ ... {"did": "did:plc:abc123", "cid": "bafyrei..."},
352
+ ... ],
353
+ ... )
354
+ >>> for shard_id, stream in source.shards:
355
+ ... process(stream)
366
356
  """
367
357
 
368
358
  blob_refs: list[dict[str, str]]
369
359
  pds_endpoint: str | None = None
370
- _endpoint_cache: dict[str, str] = field(default_factory=dict, repr=False, compare=False)
360
+ _endpoint_cache: dict[str, str] = field(
361
+ default_factory=dict, repr=False, compare=False
362
+ )
371
363
 
372
364
  def _resolve_pds_endpoint(self, did: str) -> str:
373
365
  """Resolve PDS endpoint for a DID, with caching."""
@@ -459,6 +451,7 @@ class BlobSource:
459
451
  url = self._get_blob_url(did, cid)
460
452
 
461
453
  import requests
454
+
462
455
  response = requests.get(url, stream=True, timeout=60)
463
456
  response.raise_for_status()
464
457
  return response.raw
atdata/_stub_manager.py CHANGED
@@ -8,20 +8,18 @@ Unlike simple .pyi stubs, the generated modules are actual Python code that
8
8
  can be imported at runtime. This allows ``decode_schema`` to return properly
9
9
  typed classes that work with both static type checkers and runtime.
10
10
 
11
- Example:
12
- ::
13
-
14
- >>> from atdata.local import Index
15
- >>>
16
- >>> # Enable auto-stub generation
17
- >>> index = Index(auto_stubs=True)
18
- >>>
19
- >>> # Modules are generated automatically on decode_schema
20
- >>> MyType = index.decode_schema("atdata://local/sampleSchema/MySample@1.0.0")
21
- >>> # MyType is now properly typed for IDE autocomplete!
22
- >>>
23
- >>> # Get the stub directory path for IDE configuration
24
- >>> print(f"Add to IDE: {index.stub_dir}")
11
+ Examples:
12
+ >>> from atdata.local import Index
13
+ >>>
14
+ >>> # Enable auto-stub generation
15
+ >>> index = Index(auto_stubs=True)
16
+ >>>
17
+ >>> # Modules are generated automatically on decode_schema
18
+ >>> MyType = index.decode_schema("atdata://local/sampleSchema/MySample@1.0.0")
19
+ >>> # MyType is now properly typed for IDE autocomplete!
20
+ >>>
21
+ >>> # Get the stub directory path for IDE configuration
22
+ >>> print(f"Add to IDE: {index.stub_dir}")
25
23
  """
26
24
 
27
25
  from pathlib import Path
@@ -101,14 +99,12 @@ class StubManager:
101
99
  Args:
102
100
  stub_dir: Directory to write module files. Defaults to ``~/.atdata/stubs/``.
103
101
 
104
- Example:
105
- ::
106
-
107
- >>> manager = StubManager()
108
- >>> schema_dict = {"name": "MySample", "version": "1.0.0", "fields": [...]}
109
- >>> SampleClass = manager.ensure_module(schema_dict)
110
- >>> print(manager.stub_dir)
111
- /Users/you/.atdata/stubs
102
+ Examples:
103
+ >>> manager = StubManager()
104
+ >>> schema_dict = {"name": "MySample", "version": "1.0.0", "fields": [...]}
105
+ >>> SampleClass = manager.ensure_module(schema_dict)
106
+ >>> print(manager.stub_dir)
107
+ /Users/you/.atdata/stubs
112
108
  """
113
109
 
114
110
  def __init__(self, stub_dir: Optional[Union[str, Path]] = None):
@@ -157,7 +153,9 @@ class StubManager:
157
153
  """Alias for _module_filename for backwards compatibility."""
158
154
  return self._module_filename(name, version)
159
155
 
160
- def _module_path(self, name: str, version: str, authority: str = DEFAULT_AUTHORITY) -> Path:
156
+ def _module_path(
157
+ self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
158
+ ) -> Path:
161
159
  """Get full path to module file for a schema.
162
160
 
163
161
  Args:
@@ -170,7 +168,9 @@ class StubManager:
170
168
  """
171
169
  return self._stub_dir / authority / self._module_filename(name, version)
172
170
 
173
- def _stub_path(self, name: str, version: str, authority: str = DEFAULT_AUTHORITY) -> Path:
171
+ def _stub_path(
172
+ self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
173
+ ) -> Path:
174
174
  """Alias for _module_path for backwards compatibility."""
175
175
  return self._module_path(name, version, authority)
176
176
 
@@ -211,7 +211,9 @@ class StubManager:
211
211
  authority_dir.mkdir(parents=True, exist_ok=True)
212
212
  init_path = authority_dir / "__init__.py"
213
213
  if not init_path.exists():
214
- init_path.write_text(f'"""Auto-generated schema modules for {authority}."""\n')
214
+ init_path.write_text(
215
+ f'"""Auto-generated schema modules for {authority}."""\n'
216
+ )
215
217
 
216
218
  def _write_module_atomic(self, path: Path, content: str, authority: str) -> None:
217
219
  """Write module file atomically using temp file and rename.
@@ -359,7 +361,9 @@ class StubManager:
359
361
 
360
362
  return cls
361
363
 
362
- def _import_class_from_module(self, module_path: Path, class_name: str) -> Optional[Type]:
364
+ def _import_class_from_module(
365
+ self, module_path: Path, class_name: str
366
+ ) -> Optional[Type]:
363
367
  """Import a class from a generated module file.
364
368
 
365
369
  Uses importlib to dynamically load the module and extract the class.
@@ -399,6 +403,7 @@ class StubManager:
399
403
  def _print_ide_hint(self) -> None:
400
404
  """Print a one-time hint about IDE configuration."""
401
405
  import sys as _sys
406
+
402
407
  print(
403
408
  f"\n[atdata] Generated schema module in: {self._stub_dir}\n"
404
409
  f"[atdata] For IDE support, add this path to your type checker:\n"
atdata/_type_utils.py CHANGED
@@ -9,15 +9,29 @@ from typing import Any, get_origin, get_args, Union
9
9
 
10
10
  # Mapping from numpy dtype strings to schema dtype names
11
11
  NUMPY_DTYPE_MAP = {
12
- "float16": "float16", "float32": "float32", "float64": "float64",
13
- "int8": "int8", "int16": "int16", "int32": "int32", "int64": "int64",
14
- "uint8": "uint8", "uint16": "uint16", "uint32": "uint32", "uint64": "uint64",
15
- "bool": "bool", "complex64": "complex64", "complex128": "complex128",
12
+ "float16": "float16",
13
+ "float32": "float32",
14
+ "float64": "float64",
15
+ "int8": "int8",
16
+ "int16": "int16",
17
+ "int32": "int32",
18
+ "int64": "int64",
19
+ "uint8": "uint8",
20
+ "uint16": "uint16",
21
+ "uint32": "uint32",
22
+ "uint64": "uint64",
23
+ "bool": "bool",
24
+ "complex64": "complex64",
25
+ "complex128": "complex128",
16
26
  }
17
27
 
18
28
  # Mapping from Python primitive types to schema type names
19
29
  PRIMITIVE_TYPE_MAP = {
20
- str: "str", int: "int", float: "float", bool: "bool", bytes: "bytes",
30
+ str: "str",
31
+ int: "int",
32
+ float: "float",
33
+ bool: "bool",
34
+ bytes: "bytes",
21
35
  }
22
36
 
23
37
 
@@ -31,9 +45,13 @@ def numpy_dtype_to_string(dtype: Any) -> str:
31
45
  Schema dtype string (e.g., "float32", "int64"). Defaults to "float32".
32
46
  """
33
47
  dtype_str = str(dtype)
34
- for key, value in NUMPY_DTYPE_MAP.items():
48
+ # Exact match first (handles "float32", "int64", etc.)
49
+ if dtype_str in NUMPY_DTYPE_MAP:
50
+ return NUMPY_DTYPE_MAP[dtype_str]
51
+ # Substring match, longest keys first to avoid "int8" matching "uint8"
52
+ for key in sorted(NUMPY_DTYPE_MAP, key=len, reverse=True):
35
53
  if key in dtype_str:
36
- return value
54
+ return NUMPY_DTYPE_MAP[key]
37
55
  return "float32"
38
56
 
39
57
 
@@ -88,3 +106,25 @@ def extract_ndarray_dtype(python_type: Any) -> str:
88
106
  if dtype_arg is not None:
89
107
  return numpy_dtype_to_string(dtype_arg)
90
108
  return "float32"
109
+
110
+
111
+ def parse_semver(version: str) -> tuple[int, int, int]:
112
+ """Parse a semantic version string into a comparable tuple.
113
+
114
+ Args:
115
+ version: A ``"major.minor.patch"`` version string.
116
+
117
+ Returns:
118
+ Tuple of (major, minor, patch) integers.
119
+
120
+ Raises:
121
+ ValueError: If the version string is not valid semver.
122
+
123
+ Examples:
124
+ >>> parse_semver("1.2.3")
125
+ (1, 2, 3)
126
+ """
127
+ parts = version.split(".")
128
+ if len(parts) != 3:
129
+ raise ValueError(f"Invalid semver: {version}")
130
+ return int(parts[0]), int(parts[1]), int(parts[2])
@@ -15,16 +15,14 @@ The ATProto integration is additive - existing atdata functionality continues
15
15
  to work unchanged. These features are opt-in for users who want to publish
16
16
  or discover datasets on the ATProto network.
17
17
 
18
- Example:
19
- ::
20
-
21
- >>> from atdata.atmosphere import AtmosphereClient, SchemaPublisher
22
- >>>
23
- >>> client = AtmosphereClient()
24
- >>> client.login("handle.bsky.social", "app-password")
25
- >>>
26
- >>> publisher = SchemaPublisher(client)
27
- >>> schema_uri = publisher.publish(MySampleType, version="1.0.0")
18
+ Examples:
19
+ >>> from atdata.atmosphere import AtmosphereClient, SchemaPublisher
20
+ >>>
21
+ >>> client = AtmosphereClient()
22
+ >>> client.login("handle.bsky.social", "app-password")
23
+ >>>
24
+ >>> publisher = SchemaPublisher(client)
25
+ >>> schema_uri = publisher.publish(MySampleType, version="1.0.0")
28
26
 
29
27
  Note:
30
28
  This module requires the ``atproto`` package to be installed::
@@ -86,6 +84,7 @@ class AtmosphereIndexEntry:
86
84
  def metadata(self) -> Optional[dict]:
87
85
  """Metadata from the record, if any."""
88
86
  import msgpack
87
+
89
88
  metadata_bytes = self._record.get("metadata")
90
89
  if metadata_bytes is None:
91
90
  return None
@@ -100,25 +99,25 @@ class AtmosphereIndexEntry:
100
99
  class AtmosphereIndex:
101
100
  """ATProto index implementing AbstractIndex protocol.
102
101
 
102
+ .. deprecated::
103
+ Use ``atdata.Index(atmosphere=client)`` instead. ``AtmosphereIndex``
104
+ is retained for backwards compatibility and will be removed in a
105
+ future release.
106
+
103
107
  Wraps SchemaPublisher/Loader and DatasetPublisher/Loader to provide
104
- a unified interface compatible with LocalIndex.
108
+ a unified interface compatible with Index.
105
109
 
106
110
  Optionally accepts a ``PDSBlobStore`` for writing dataset shards as
107
111
  ATProto blobs, enabling fully decentralized dataset storage.
108
112
 
109
- Example:
110
- ::
111
-
112
- >>> client = AtmosphereClient()
113
- >>> client.login("handle.bsky.social", "app-password")
114
- >>>
115
- >>> # Without blob storage (external URLs only)
116
- >>> index = AtmosphereIndex(client)
117
- >>>
118
- >>> # With PDS blob storage
119
- >>> store = PDSBlobStore(client)
120
- >>> index = AtmosphereIndex(client, data_store=store)
121
- >>> entry = index.insert_dataset(dataset, name="my-data")
113
+ Examples:
114
+ >>> # Preferred: use unified Index
115
+ >>> from atdata.local import Index
116
+ >>> from atdata.atmosphere import AtmosphereClient
117
+ >>> index = Index(atmosphere=client)
118
+ >>>
119
+ >>> # Legacy (deprecated)
120
+ >>> index = AtmosphereIndex(client)
122
121
  """
123
122
 
124
123
  def __init__(
@@ -134,6 +133,14 @@ class AtmosphereIndex:
134
133
  data_store: Optional PDSBlobStore for writing shards as blobs.
135
134
  If provided, insert_dataset will upload shards to PDS.
136
135
  """
136
+ import warnings
137
+
138
+ warnings.warn(
139
+ "AtmosphereIndex is deprecated. Use atdata.Index(atmosphere=client) "
140
+ "instead for unified index access.",
141
+ DeprecationWarning,
142
+ stacklevel=2,
143
+ )
137
144
  self.client = client
138
145
  self._schema_publisher = SchemaPublisher(client)
139
146
  self._schema_loader = SchemaLoader(client)
@@ -19,16 +19,14 @@ class AtUri:
19
19
 
20
20
  AT URIs follow the format: at://<authority>/<collection>/<rkey>
21
21
 
22
- Example:
23
- ::
24
-
25
- >>> uri = AtUri.parse("at://did:plc:abc123/ac.foundation.dataset.sampleSchema/xyz")
26
- >>> uri.authority
27
- 'did:plc:abc123'
28
- >>> uri.collection
29
- 'ac.foundation.dataset.sampleSchema'
30
- >>> uri.rkey
31
- 'xyz'
22
+ Examples:
23
+ >>> uri = AtUri.parse("at://did:plc:abc123/ac.foundation.dataset.sampleSchema/xyz")
24
+ >>> uri.authority
25
+ 'did:plc:abc123'
26
+ >>> uri.collection
27
+ 'ac.foundation.dataset.sampleSchema'
28
+ >>> uri.rkey
29
+ 'xyz'
32
30
  """
33
31
 
34
32
  authority: str
@@ -58,7 +56,9 @@ class AtUri:
58
56
 
59
57
  parts = uri[5:].split("/")
60
58
  if len(parts) < 3:
61
- raise ValueError(f"Invalid AT URI: expected authority/collection/rkey: {uri}")
59
+ raise ValueError(
60
+ f"Invalid AT URI: expected authority/collection/rkey: {uri}"
61
+ )
62
62
 
63
63
  return cls(
64
64
  authority=parts[0],
@@ -18,6 +18,7 @@ def _get_atproto_client_class():
18
18
  if _atproto_client_class is None:
19
19
  try:
20
20
  from atproto import Client
21
+
21
22
  _atproto_client_class = Client
22
23
  except ImportError as e:
23
24
  raise ImportError(
@@ -33,13 +34,11 @@ class AtmosphereClient:
33
34
  This class wraps the atproto SDK client and provides higher-level methods
34
35
  for working with atdata records (schemas, datasets, lenses).
35
36
 
36
- Example:
37
- ::
38
-
39
- >>> client = AtmosphereClient()
40
- >>> client.login("alice.bsky.social", "app-password")
41
- >>> print(client.did)
42
- 'did:plc:...'
37
+ Examples:
38
+ >>> client = AtmosphereClient()
39
+ >>> client.login("alice.bsky.social", "app-password")
40
+ >>> print(client.did)
41
+ 'did:plc:...'
43
42
 
44
43
  Note:
45
44
  The password should be an app-specific password, not your main account
@@ -327,7 +326,11 @@ class AtmosphereClient:
327
326
  # Convert to dict format suitable for embedding in records
328
327
  return {
329
328
  "$type": "blob",
330
- "ref": {"$link": blob_ref.ref.link if hasattr(blob_ref.ref, "link") else str(blob_ref.ref)},
329
+ "ref": {
330
+ "$link": blob_ref.ref.link
331
+ if hasattr(blob_ref.ref, "link")
332
+ else str(blob_ref.ref)
333
+ },
331
334
  "mimeType": blob_ref.mime_type,
332
335
  "size": blob_ref.size,
333
336
  }
atdata/atmosphere/lens.py CHANGED
@@ -21,6 +21,7 @@ from ._types import (
21
21
 
22
22
  # Import for type checking only
23
23
  from typing import TYPE_CHECKING
24
+
24
25
  if TYPE_CHECKING:
25
26
  from ..lens import Lens
26
27
 
@@ -31,26 +32,24 @@ class LensPublisher:
31
32
  This class creates lens records that reference source and target schemas
32
33
  and point to the transformation code in a git repository.
33
34
 
34
- Example:
35
- ::
36
-
37
- >>> @atdata.lens
38
- ... def my_lens(source: SourceType) -> TargetType:
39
- ... return TargetType(field=source.other_field)
40
- >>>
41
- >>> client = AtmosphereClient()
42
- >>> client.login("handle", "password")
43
- >>>
44
- >>> publisher = LensPublisher(client)
45
- >>> uri = publisher.publish(
46
- ... name="my_lens",
47
- ... source_schema_uri="at://did:plc:abc/ac.foundation.dataset.sampleSchema/source",
48
- ... target_schema_uri="at://did:plc:abc/ac.foundation.dataset.sampleSchema/target",
49
- ... code_repository="https://github.com/user/repo",
50
- ... code_commit="abc123def456",
51
- ... getter_path="mymodule.lenses:my_lens",
52
- ... putter_path="mymodule.lenses:my_lens_putter",
53
- ... )
35
+ Examples:
36
+ >>> @atdata.lens
37
+ ... def my_lens(source: SourceType) -> TargetType:
38
+ ... return TargetType(field=source.other_field)
39
+ >>>
40
+ >>> client = AtmosphereClient()
41
+ >>> client.login("handle", "password")
42
+ >>>
43
+ >>> publisher = LensPublisher(client)
44
+ >>> uri = publisher.publish(
45
+ ... name="my_lens",
46
+ ... source_schema_uri="at://did:plc:abc/ac.foundation.dataset.sampleSchema/source",
47
+ ... target_schema_uri="at://did:plc:abc/ac.foundation.dataset.sampleSchema/target",
48
+ ... code_repository="https://github.com/user/repo",
49
+ ... code_commit="abc123def456",
50
+ ... getter_path="mymodule.lenses:my_lens",
51
+ ... putter_path="mymodule.lenses:my_lens_putter",
52
+ ... )
54
53
 
55
54
  Security Note:
56
55
  Lens code is stored as references to git repositories rather than
@@ -195,16 +194,14 @@ class LensLoader:
195
194
  using a lens requires installing the referenced code and importing
196
195
  it manually.
197
196
 
198
- Example:
199
- ::
200
-
201
- >>> client = AtmosphereClient()
202
- >>> loader = LensLoader(client)
203
- >>>
204
- >>> record = loader.get("at://did:plc:abc/ac.foundation.dataset.lens/xyz")
205
- >>> print(record["name"])
206
- >>> print(record["sourceSchema"])
207
- >>> print(record.get("getterCode", {}).get("repository"))
197
+ Examples:
198
+ >>> client = AtmosphereClient()
199
+ >>> loader = LensLoader(client)
200
+ >>>
201
+ >>> record = loader.get("at://did:plc:abc/ac.foundation.dataset.lens/xyz")
202
+ >>> print(record["name"])
203
+ >>> print(record["sourceSchema"])
204
+ >>> print(record.get("getterCode", {}).get("repository"))
208
205
  """
209
206
 
210
207
  def __init__(self, client: AtmosphereClient):