atdata 0.2.0a1__py3-none-any.whl → 0.2.3b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atdata/__init__.py +43 -10
- atdata/_cid.py +144 -0
- atdata/_helpers.py +7 -5
- atdata/_hf_api.py +690 -0
- atdata/_protocols.py +504 -0
- atdata/_schema_codec.py +438 -0
- atdata/_sources.py +508 -0
- atdata/_stub_manager.py +534 -0
- atdata/_type_utils.py +104 -0
- atdata/atmosphere/__init__.py +269 -1
- atdata/atmosphere/_types.py +4 -2
- atdata/atmosphere/client.py +146 -3
- atdata/atmosphere/lens.py +4 -3
- atdata/atmosphere/records.py +168 -7
- atdata/atmosphere/schema.py +29 -82
- atdata/atmosphere/store.py +204 -0
- atdata/cli/__init__.py +222 -0
- atdata/cli/diagnose.py +169 -0
- atdata/cli/local.py +283 -0
- atdata/dataset.py +615 -257
- atdata/lens.py +53 -54
- atdata/local.py +1456 -228
- atdata/promote.py +195 -0
- {atdata-0.2.0a1.dist-info → atdata-0.2.3b1.dist-info}/METADATA +106 -14
- atdata-0.2.3b1.dist-info/RECORD +28 -0
- atdata-0.2.0a1.dist-info/RECORD +0 -16
- {atdata-0.2.0a1.dist-info → atdata-0.2.3b1.dist-info}/WHEEL +0 -0
- {atdata-0.2.0a1.dist-info → atdata-0.2.3b1.dist-info}/entry_points.txt +0 -0
- {atdata-0.2.0a1.dist-info → atdata-0.2.3b1.dist-info}/licenses/LICENSE +0 -0
atdata/atmosphere/__init__.py
CHANGED
|
@@ -15,7 +15,7 @@ The ATProto integration is additive - existing atdata functionality continues
|
|
|
15
15
|
to work unchanged. These features are opt-in for users who want to publish
|
|
16
16
|
or discover datasets on the ATProto network.
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
Examples:
|
|
19
19
|
>>> from atdata.atmosphere import AtmosphereClient, SchemaPublisher
|
|
20
20
|
>>>
|
|
21
21
|
>>> client = AtmosphereClient()
|
|
@@ -30,10 +30,13 @@ Note:
|
|
|
30
30
|
pip install atproto
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
|
+
from typing import Iterator, Optional, Type, TYPE_CHECKING
|
|
34
|
+
|
|
33
35
|
from .client import AtmosphereClient
|
|
34
36
|
from .schema import SchemaPublisher, SchemaLoader
|
|
35
37
|
from .records import DatasetPublisher, DatasetLoader
|
|
36
38
|
from .lens import LensPublisher, LensLoader
|
|
39
|
+
from .store import PDSBlobStore
|
|
37
40
|
from ._types import (
|
|
38
41
|
AtUri,
|
|
39
42
|
SchemaRecord,
|
|
@@ -41,9 +44,274 @@ from ._types import (
|
|
|
41
44
|
LensRecord,
|
|
42
45
|
)
|
|
43
46
|
|
|
47
|
+
if TYPE_CHECKING:
|
|
48
|
+
from ..dataset import Dataset
|
|
49
|
+
from .._protocols import Packable
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class AtmosphereIndexEntry:
|
|
53
|
+
"""Entry wrapper for ATProto dataset records implementing IndexEntry protocol.
|
|
54
|
+
|
|
55
|
+
Attributes:
|
|
56
|
+
_uri: AT URI of the record.
|
|
57
|
+
_record: Raw record dictionary.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self, uri: str, record: dict):
|
|
61
|
+
self._uri = uri
|
|
62
|
+
self._record = record
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def name(self) -> str:
|
|
66
|
+
"""Human-readable dataset name."""
|
|
67
|
+
return self._record.get("name", "")
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def schema_ref(self) -> str:
|
|
71
|
+
"""AT URI of the schema record."""
|
|
72
|
+
return self._record.get("schemaRef", "")
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def data_urls(self) -> list[str]:
|
|
76
|
+
"""WebDataset URLs from external storage."""
|
|
77
|
+
storage = self._record.get("storage", {})
|
|
78
|
+
storage_type = storage.get("$type", "")
|
|
79
|
+
if "storageExternal" in storage_type:
|
|
80
|
+
return storage.get("urls", [])
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def metadata(self) -> Optional[dict]:
|
|
85
|
+
"""Metadata from the record, if any."""
|
|
86
|
+
import msgpack
|
|
87
|
+
|
|
88
|
+
metadata_bytes = self._record.get("metadata")
|
|
89
|
+
if metadata_bytes is None:
|
|
90
|
+
return None
|
|
91
|
+
return msgpack.unpackb(metadata_bytes, raw=False)
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def uri(self) -> str:
|
|
95
|
+
"""AT URI of this record."""
|
|
96
|
+
return self._uri
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class AtmosphereIndex:
|
|
100
|
+
"""ATProto index implementing AbstractIndex protocol.
|
|
101
|
+
|
|
102
|
+
Wraps SchemaPublisher/Loader and DatasetPublisher/Loader to provide
|
|
103
|
+
a unified interface compatible with LocalIndex.
|
|
104
|
+
|
|
105
|
+
Optionally accepts a ``PDSBlobStore`` for writing dataset shards as
|
|
106
|
+
ATProto blobs, enabling fully decentralized dataset storage.
|
|
107
|
+
|
|
108
|
+
Examples:
|
|
109
|
+
>>> client = AtmosphereClient()
|
|
110
|
+
>>> client.login("handle.bsky.social", "app-password")
|
|
111
|
+
>>>
|
|
112
|
+
>>> # Without blob storage (external URLs only)
|
|
113
|
+
>>> index = AtmosphereIndex(client)
|
|
114
|
+
>>>
|
|
115
|
+
>>> # With PDS blob storage
|
|
116
|
+
>>> store = PDSBlobStore(client)
|
|
117
|
+
>>> index = AtmosphereIndex(client, data_store=store)
|
|
118
|
+
>>> entry = index.insert_dataset(dataset, name="my-data")
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
def __init__(
|
|
122
|
+
self,
|
|
123
|
+
client: AtmosphereClient,
|
|
124
|
+
*,
|
|
125
|
+
data_store: Optional[PDSBlobStore] = None,
|
|
126
|
+
):
|
|
127
|
+
"""Initialize the atmosphere index.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
client: Authenticated AtmosphereClient instance.
|
|
131
|
+
data_store: Optional PDSBlobStore for writing shards as blobs.
|
|
132
|
+
If provided, insert_dataset will upload shards to PDS.
|
|
133
|
+
"""
|
|
134
|
+
self.client = client
|
|
135
|
+
self._schema_publisher = SchemaPublisher(client)
|
|
136
|
+
self._schema_loader = SchemaLoader(client)
|
|
137
|
+
self._dataset_publisher = DatasetPublisher(client)
|
|
138
|
+
self._dataset_loader = DatasetLoader(client)
|
|
139
|
+
self._data_store = data_store
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def data_store(self) -> Optional[PDSBlobStore]:
|
|
143
|
+
"""The PDS blob store for writing shards, or None if not configured."""
|
|
144
|
+
return self._data_store
|
|
145
|
+
|
|
146
|
+
# Dataset operations
|
|
147
|
+
|
|
148
|
+
def insert_dataset(
|
|
149
|
+
self,
|
|
150
|
+
ds: "Dataset",
|
|
151
|
+
*,
|
|
152
|
+
name: str,
|
|
153
|
+
schema_ref: Optional[str] = None,
|
|
154
|
+
**kwargs,
|
|
155
|
+
) -> AtmosphereIndexEntry:
|
|
156
|
+
"""Insert a dataset into ATProto.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
ds: The Dataset to publish.
|
|
160
|
+
name: Human-readable name.
|
|
161
|
+
schema_ref: Optional schema AT URI. If None, auto-publishes schema.
|
|
162
|
+
**kwargs: Additional options (description, tags, license).
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
AtmosphereIndexEntry for the inserted dataset.
|
|
166
|
+
"""
|
|
167
|
+
uri = self._dataset_publisher.publish(
|
|
168
|
+
ds,
|
|
169
|
+
name=name,
|
|
170
|
+
schema_uri=schema_ref,
|
|
171
|
+
description=kwargs.get("description"),
|
|
172
|
+
tags=kwargs.get("tags"),
|
|
173
|
+
license=kwargs.get("license"),
|
|
174
|
+
auto_publish_schema=(schema_ref is None),
|
|
175
|
+
)
|
|
176
|
+
record = self._dataset_loader.get(uri)
|
|
177
|
+
return AtmosphereIndexEntry(str(uri), record)
|
|
178
|
+
|
|
179
|
+
def get_dataset(self, ref: str) -> AtmosphereIndexEntry:
|
|
180
|
+
"""Get a dataset by AT URI.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
ref: AT URI of the dataset record.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
AtmosphereIndexEntry for the dataset.
|
|
187
|
+
|
|
188
|
+
Raises:
|
|
189
|
+
ValueError: If record is not a dataset.
|
|
190
|
+
"""
|
|
191
|
+
record = self._dataset_loader.get(ref)
|
|
192
|
+
return AtmosphereIndexEntry(ref, record)
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def datasets(self) -> Iterator[AtmosphereIndexEntry]:
|
|
196
|
+
"""Lazily iterate over all dataset entries (AbstractIndex protocol).
|
|
197
|
+
|
|
198
|
+
Uses the authenticated user's repository.
|
|
199
|
+
|
|
200
|
+
Yields:
|
|
201
|
+
AtmosphereIndexEntry for each dataset.
|
|
202
|
+
"""
|
|
203
|
+
records = self._dataset_loader.list_all()
|
|
204
|
+
for rec in records:
|
|
205
|
+
uri = rec.get("uri", "")
|
|
206
|
+
yield AtmosphereIndexEntry(uri, rec.get("value", rec))
|
|
207
|
+
|
|
208
|
+
def list_datasets(self, repo: Optional[str] = None) -> list[AtmosphereIndexEntry]:
|
|
209
|
+
"""Get all dataset entries as a materialized list (AbstractIndex protocol).
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
repo: DID of repository. Defaults to authenticated user.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
List of AtmosphereIndexEntry for each dataset.
|
|
216
|
+
"""
|
|
217
|
+
records = self._dataset_loader.list_all(repo=repo)
|
|
218
|
+
return [
|
|
219
|
+
AtmosphereIndexEntry(rec.get("uri", ""), rec.get("value", rec))
|
|
220
|
+
for rec in records
|
|
221
|
+
]
|
|
222
|
+
|
|
223
|
+
# Schema operations
|
|
224
|
+
|
|
225
|
+
def publish_schema(
|
|
226
|
+
self,
|
|
227
|
+
sample_type: "Type[Packable]",
|
|
228
|
+
*,
|
|
229
|
+
version: str = "1.0.0",
|
|
230
|
+
**kwargs,
|
|
231
|
+
) -> str:
|
|
232
|
+
"""Publish a schema to ATProto.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
sample_type: A Packable type (PackableSample subclass or @packable-decorated).
|
|
236
|
+
version: Semantic version string.
|
|
237
|
+
**kwargs: Additional options (description, metadata).
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
AT URI of the schema record.
|
|
241
|
+
"""
|
|
242
|
+
uri = self._schema_publisher.publish(
|
|
243
|
+
sample_type,
|
|
244
|
+
version=version,
|
|
245
|
+
description=kwargs.get("description"),
|
|
246
|
+
metadata=kwargs.get("metadata"),
|
|
247
|
+
)
|
|
248
|
+
return str(uri)
|
|
249
|
+
|
|
250
|
+
def get_schema(self, ref: str) -> dict:
|
|
251
|
+
"""Get a schema record by AT URI.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
ref: AT URI of the schema record.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Schema record dictionary.
|
|
258
|
+
|
|
259
|
+
Raises:
|
|
260
|
+
ValueError: If record is not a schema.
|
|
261
|
+
"""
|
|
262
|
+
return self._schema_loader.get(ref)
|
|
263
|
+
|
|
264
|
+
@property
|
|
265
|
+
def schemas(self) -> Iterator[dict]:
|
|
266
|
+
"""Lazily iterate over all schema records (AbstractIndex protocol).
|
|
267
|
+
|
|
268
|
+
Uses the authenticated user's repository.
|
|
269
|
+
|
|
270
|
+
Yields:
|
|
271
|
+
Schema records as dictionaries.
|
|
272
|
+
"""
|
|
273
|
+
records = self._schema_loader.list_all()
|
|
274
|
+
for rec in records:
|
|
275
|
+
yield rec.get("value", rec)
|
|
276
|
+
|
|
277
|
+
def list_schemas(self, repo: Optional[str] = None) -> list[dict]:
|
|
278
|
+
"""Get all schema records as a materialized list (AbstractIndex protocol).
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
repo: DID of repository. Defaults to authenticated user.
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
List of schema records as dictionaries.
|
|
285
|
+
"""
|
|
286
|
+
records = self._schema_loader.list_all(repo=repo)
|
|
287
|
+
return [rec.get("value", rec) for rec in records]
|
|
288
|
+
|
|
289
|
+
def decode_schema(self, ref: str) -> "Type[Packable]":
|
|
290
|
+
"""Reconstruct a Python type from a schema record.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
ref: AT URI of the schema record.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
Dynamically generated Packable type.
|
|
297
|
+
|
|
298
|
+
Raises:
|
|
299
|
+
ValueError: If schema cannot be decoded.
|
|
300
|
+
"""
|
|
301
|
+
from .._schema_codec import schema_to_type
|
|
302
|
+
|
|
303
|
+
schema = self.get_schema(ref)
|
|
304
|
+
return schema_to_type(schema)
|
|
305
|
+
|
|
306
|
+
|
|
44
307
|
__all__ = [
|
|
45
308
|
# Client
|
|
46
309
|
"AtmosphereClient",
|
|
310
|
+
# Storage
|
|
311
|
+
"PDSBlobStore",
|
|
312
|
+
# Unified index (AbstractIndex protocol)
|
|
313
|
+
"AtmosphereIndex",
|
|
314
|
+
"AtmosphereIndexEntry",
|
|
47
315
|
# Schema operations
|
|
48
316
|
"SchemaPublisher",
|
|
49
317
|
"SchemaLoader",
|
atdata/atmosphere/_types.py
CHANGED
|
@@ -19,7 +19,7 @@ class AtUri:
|
|
|
19
19
|
|
|
20
20
|
AT URIs follow the format: at://<authority>/<collection>/<rkey>
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
Examples:
|
|
23
23
|
>>> uri = AtUri.parse("at://did:plc:abc123/ac.foundation.dataset.sampleSchema/xyz")
|
|
24
24
|
>>> uri.authority
|
|
25
25
|
'did:plc:abc123'
|
|
@@ -56,7 +56,9 @@ class AtUri:
|
|
|
56
56
|
|
|
57
57
|
parts = uri[5:].split("/")
|
|
58
58
|
if len(parts) < 3:
|
|
59
|
-
raise ValueError(
|
|
59
|
+
raise ValueError(
|
|
60
|
+
f"Invalid AT URI: expected authority/collection/rkey: {uri}"
|
|
61
|
+
)
|
|
60
62
|
|
|
61
63
|
return cls(
|
|
62
64
|
authority=parts[0],
|
atdata/atmosphere/client.py
CHANGED
|
@@ -18,6 +18,7 @@ def _get_atproto_client_class():
|
|
|
18
18
|
if _atproto_client_class is None:
|
|
19
19
|
try:
|
|
20
20
|
from atproto import Client
|
|
21
|
+
|
|
21
22
|
_atproto_client_class = Client
|
|
22
23
|
except ImportError as e:
|
|
23
24
|
raise ImportError(
|
|
@@ -33,7 +34,7 @@ class AtmosphereClient:
|
|
|
33
34
|
This class wraps the atproto SDK client and provides higher-level methods
|
|
34
35
|
for working with atdata records (schemas, datasets, lenses).
|
|
35
36
|
|
|
36
|
-
|
|
37
|
+
Examples:
|
|
37
38
|
>>> client = AtmosphereClient()
|
|
38
39
|
>>> client.login("alice.bsky.social", "app-password")
|
|
39
40
|
>>> print(client.did)
|
|
@@ -254,7 +255,18 @@ class AtmosphereClient:
|
|
|
254
255
|
}
|
|
255
256
|
)
|
|
256
257
|
|
|
257
|
-
|
|
258
|
+
# Convert ATProto model to dict if needed
|
|
259
|
+
value = response.value
|
|
260
|
+
# DotDict and similar ATProto models have to_dict()
|
|
261
|
+
if hasattr(value, "to_dict") and callable(value.to_dict):
|
|
262
|
+
return value.to_dict()
|
|
263
|
+
elif isinstance(value, dict):
|
|
264
|
+
return dict(value)
|
|
265
|
+
elif hasattr(value, "model_dump") and callable(value.model_dump):
|
|
266
|
+
return value.model_dump()
|
|
267
|
+
elif hasattr(value, "__dict__"):
|
|
268
|
+
return dict(value.__dict__)
|
|
269
|
+
return value
|
|
258
270
|
|
|
259
271
|
def delete_record(
|
|
260
272
|
self,
|
|
@@ -287,6 +299,123 @@ class AtmosphereClient:
|
|
|
287
299
|
|
|
288
300
|
self._client.com.atproto.repo.delete_record(data=data)
|
|
289
301
|
|
|
302
|
+
def upload_blob(
|
|
303
|
+
self,
|
|
304
|
+
data: bytes,
|
|
305
|
+
mime_type: str = "application/octet-stream",
|
|
306
|
+
) -> dict:
|
|
307
|
+
"""Upload binary data as a blob to the PDS.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
data: Binary data to upload.
|
|
311
|
+
mime_type: MIME type of the data (for reference, not enforced by PDS).
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
A blob reference dict with keys: '$type', 'ref', 'mimeType', 'size'.
|
|
315
|
+
This can be embedded directly in record fields.
|
|
316
|
+
|
|
317
|
+
Raises:
|
|
318
|
+
ValueError: If not authenticated.
|
|
319
|
+
atproto.exceptions.AtProtocolError: If upload fails.
|
|
320
|
+
"""
|
|
321
|
+
self._ensure_authenticated()
|
|
322
|
+
|
|
323
|
+
response = self._client.upload_blob(data)
|
|
324
|
+
blob_ref = response.blob
|
|
325
|
+
|
|
326
|
+
# Convert to dict format suitable for embedding in records
|
|
327
|
+
return {
|
|
328
|
+
"$type": "blob",
|
|
329
|
+
"ref": {
|
|
330
|
+
"$link": blob_ref.ref.link
|
|
331
|
+
if hasattr(blob_ref.ref, "link")
|
|
332
|
+
else str(blob_ref.ref)
|
|
333
|
+
},
|
|
334
|
+
"mimeType": blob_ref.mime_type,
|
|
335
|
+
"size": blob_ref.size,
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
def get_blob(
|
|
339
|
+
self,
|
|
340
|
+
did: str,
|
|
341
|
+
cid: str,
|
|
342
|
+
) -> bytes:
|
|
343
|
+
"""Download a blob from a PDS.
|
|
344
|
+
|
|
345
|
+
This resolves the PDS endpoint from the DID document and fetches
|
|
346
|
+
the blob directly from the PDS.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
did: The DID of the repository containing the blob.
|
|
350
|
+
cid: The CID of the blob.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
The blob data as bytes.
|
|
354
|
+
|
|
355
|
+
Raises:
|
|
356
|
+
ValueError: If PDS endpoint cannot be resolved.
|
|
357
|
+
requests.HTTPError: If blob fetch fails.
|
|
358
|
+
"""
|
|
359
|
+
import requests
|
|
360
|
+
|
|
361
|
+
# Resolve PDS endpoint from DID document
|
|
362
|
+
pds_endpoint = self._resolve_pds_endpoint(did)
|
|
363
|
+
if not pds_endpoint:
|
|
364
|
+
raise ValueError(f"Could not resolve PDS endpoint for {did}")
|
|
365
|
+
|
|
366
|
+
# Fetch blob from PDS
|
|
367
|
+
url = f"{pds_endpoint}/xrpc/com.atproto.sync.getBlob"
|
|
368
|
+
response = requests.get(url, params={"did": did, "cid": cid})
|
|
369
|
+
response.raise_for_status()
|
|
370
|
+
return response.content
|
|
371
|
+
|
|
372
|
+
def _resolve_pds_endpoint(self, did: str) -> Optional[str]:
|
|
373
|
+
"""Resolve the PDS endpoint for a DID.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
did: The DID to resolve.
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
The PDS service endpoint URL, or None if not found.
|
|
380
|
+
"""
|
|
381
|
+
import requests
|
|
382
|
+
|
|
383
|
+
# For did:plc, query the PLC directory
|
|
384
|
+
if did.startswith("did:plc:"):
|
|
385
|
+
try:
|
|
386
|
+
response = requests.get(f"https://plc.directory/{did}")
|
|
387
|
+
response.raise_for_status()
|
|
388
|
+
did_doc = response.json()
|
|
389
|
+
|
|
390
|
+
for service in did_doc.get("service", []):
|
|
391
|
+
if service.get("type") == "AtprotoPersonalDataServer":
|
|
392
|
+
return service.get("serviceEndpoint")
|
|
393
|
+
except requests.RequestException:
|
|
394
|
+
return None
|
|
395
|
+
|
|
396
|
+
# For did:web, would need different resolution (not implemented)
|
|
397
|
+
return None
|
|
398
|
+
|
|
399
|
+
def get_blob_url(self, did: str, cid: str) -> str:
|
|
400
|
+
"""Get the direct URL for fetching a blob.
|
|
401
|
+
|
|
402
|
+
This is useful for passing to WebDataset or other HTTP clients.
|
|
403
|
+
|
|
404
|
+
Args:
|
|
405
|
+
did: The DID of the repository containing the blob.
|
|
406
|
+
cid: The CID of the blob.
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
The full URL for fetching the blob.
|
|
410
|
+
|
|
411
|
+
Raises:
|
|
412
|
+
ValueError: If PDS endpoint cannot be resolved.
|
|
413
|
+
"""
|
|
414
|
+
pds_endpoint = self._resolve_pds_endpoint(did)
|
|
415
|
+
if not pds_endpoint:
|
|
416
|
+
raise ValueError(f"Could not resolve PDS endpoint for {did}")
|
|
417
|
+
return f"{pds_endpoint}/xrpc/com.atproto.sync.getBlob?did={did}&cid={cid}"
|
|
418
|
+
|
|
290
419
|
def list_records(
|
|
291
420
|
self,
|
|
292
421
|
collection: str,
|
|
@@ -324,7 +453,21 @@ class AtmosphereClient:
|
|
|
324
453
|
}
|
|
325
454
|
)
|
|
326
455
|
|
|
327
|
-
|
|
456
|
+
# Convert ATProto models to dicts if needed
|
|
457
|
+
records = []
|
|
458
|
+
for r in response.records:
|
|
459
|
+
value = r.value
|
|
460
|
+
# DotDict and similar ATProto models have to_dict()
|
|
461
|
+
if hasattr(value, "to_dict") and callable(value.to_dict):
|
|
462
|
+
records.append(value.to_dict())
|
|
463
|
+
elif isinstance(value, dict):
|
|
464
|
+
records.append(dict(value))
|
|
465
|
+
elif hasattr(value, "model_dump") and callable(value.model_dump):
|
|
466
|
+
records.append(value.model_dump())
|
|
467
|
+
elif hasattr(value, "__dict__"):
|
|
468
|
+
records.append(dict(value.__dict__))
|
|
469
|
+
else:
|
|
470
|
+
records.append(value)
|
|
328
471
|
return records, response.cursor
|
|
329
472
|
|
|
330
473
|
# Convenience methods for atdata collections
|
atdata/atmosphere/lens.py
CHANGED
|
@@ -9,7 +9,7 @@ Note:
|
|
|
9
9
|
implementations.
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
-
from typing import Optional
|
|
12
|
+
from typing import Optional
|
|
13
13
|
|
|
14
14
|
from .client import AtmosphereClient
|
|
15
15
|
from ._types import (
|
|
@@ -21,6 +21,7 @@ from ._types import (
|
|
|
21
21
|
|
|
22
22
|
# Import for type checking only
|
|
23
23
|
from typing import TYPE_CHECKING
|
|
24
|
+
|
|
24
25
|
if TYPE_CHECKING:
|
|
25
26
|
from ..lens import Lens
|
|
26
27
|
|
|
@@ -31,7 +32,7 @@ class LensPublisher:
|
|
|
31
32
|
This class creates lens records that reference source and target schemas
|
|
32
33
|
and point to the transformation code in a git repository.
|
|
33
34
|
|
|
34
|
-
|
|
35
|
+
Examples:
|
|
35
36
|
>>> @atdata.lens
|
|
36
37
|
... def my_lens(source: SourceType) -> TargetType:
|
|
37
38
|
... return TargetType(field=source.other_field)
|
|
@@ -193,7 +194,7 @@ class LensLoader:
|
|
|
193
194
|
using a lens requires installing the referenced code and importing
|
|
194
195
|
it manually.
|
|
195
196
|
|
|
196
|
-
|
|
197
|
+
Examples:
|
|
197
198
|
>>> client = AtmosphereClient()
|
|
198
199
|
>>> loader = LensLoader(client)
|
|
199
200
|
>>>
|