atdata 0.2.3b1__py3-none-any.whl → 0.3.1b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atdata/.gitignore +1 -0
- atdata/__init__.py +39 -0
- atdata/_cid.py +0 -21
- atdata/_exceptions.py +168 -0
- atdata/_helpers.py +41 -15
- atdata/_hf_api.py +95 -11
- atdata/_logging.py +70 -0
- atdata/_protocols.py +77 -238
- atdata/_schema_codec.py +7 -6
- atdata/_stub_manager.py +5 -25
- atdata/_type_utils.py +28 -2
- atdata/atmosphere/__init__.py +31 -20
- atdata/atmosphere/_types.py +4 -4
- atdata/atmosphere/client.py +64 -12
- atdata/atmosphere/lens.py +11 -12
- atdata/atmosphere/records.py +12 -12
- atdata/atmosphere/schema.py +16 -18
- atdata/atmosphere/store.py +6 -7
- atdata/cli/__init__.py +161 -175
- atdata/cli/diagnose.py +2 -2
- atdata/cli/{local.py → infra.py} +11 -11
- atdata/cli/inspect.py +69 -0
- atdata/cli/preview.py +63 -0
- atdata/cli/schema.py +109 -0
- atdata/dataset.py +583 -328
- atdata/index/__init__.py +54 -0
- atdata/index/_entry.py +157 -0
- atdata/index/_index.py +1198 -0
- atdata/index/_schema.py +380 -0
- atdata/lens.py +9 -2
- atdata/lexicons/__init__.py +121 -0
- atdata/lexicons/ac.foundation.dataset.arrayFormat.json +16 -0
- atdata/lexicons/ac.foundation.dataset.getLatestSchema.json +78 -0
- atdata/lexicons/ac.foundation.dataset.lens.json +99 -0
- atdata/lexicons/ac.foundation.dataset.record.json +96 -0
- atdata/lexicons/ac.foundation.dataset.schema.json +107 -0
- atdata/lexicons/ac.foundation.dataset.schemaType.json +16 -0
- atdata/lexicons/ac.foundation.dataset.storageBlobs.json +24 -0
- atdata/lexicons/ac.foundation.dataset.storageExternal.json +25 -0
- atdata/lexicons/ndarray_shim.json +16 -0
- atdata/local/__init__.py +70 -0
- atdata/local/_repo_legacy.py +218 -0
- atdata/manifest/__init__.py +28 -0
- atdata/manifest/_aggregates.py +156 -0
- atdata/manifest/_builder.py +163 -0
- atdata/manifest/_fields.py +154 -0
- atdata/manifest/_manifest.py +146 -0
- atdata/manifest/_query.py +150 -0
- atdata/manifest/_writer.py +74 -0
- atdata/promote.py +18 -14
- atdata/providers/__init__.py +25 -0
- atdata/providers/_base.py +140 -0
- atdata/providers/_factory.py +69 -0
- atdata/providers/_postgres.py +214 -0
- atdata/providers/_redis.py +171 -0
- atdata/providers/_sqlite.py +191 -0
- atdata/repository.py +323 -0
- atdata/stores/__init__.py +23 -0
- atdata/stores/_disk.py +123 -0
- atdata/stores/_s3.py +349 -0
- atdata/testing.py +341 -0
- {atdata-0.2.3b1.dist-info → atdata-0.3.1b1.dist-info}/METADATA +5 -2
- atdata-0.3.1b1.dist-info/RECORD +67 -0
- atdata/local.py +0 -1720
- atdata-0.2.3b1.dist-info/RECORD +0 -28
- {atdata-0.2.3b1.dist-info → atdata-0.3.1b1.dist-info}/WHEEL +0 -0
- {atdata-0.2.3b1.dist-info → atdata-0.3.1b1.dist-info}/entry_points.txt +0 -0
- {atdata-0.2.3b1.dist-info → atdata-0.3.1b1.dist-info}/licenses/LICENSE +0 -0
atdata/atmosphere/__init__.py
CHANGED
|
@@ -6,7 +6,7 @@ network.
|
|
|
6
6
|
|
|
7
7
|
Key components:
|
|
8
8
|
|
|
9
|
-
- ``
|
|
9
|
+
- ``Atmosphere``: Authentication and session management for ATProto
|
|
10
10
|
- ``SchemaPublisher``: Publish PackableSample schemas as ATProto records
|
|
11
11
|
- ``DatasetPublisher``: Publish dataset index records with WebDataset URLs
|
|
12
12
|
- ``LensPublisher``: Publish lens transformation records
|
|
@@ -16,13 +16,10 @@ to work unchanged. These features are opt-in for users who want to publish
|
|
|
16
16
|
or discover datasets on the ATProto network.
|
|
17
17
|
|
|
18
18
|
Examples:
|
|
19
|
-
>>> from atdata.atmosphere import
|
|
19
|
+
>>> from atdata.atmosphere import Atmosphere
|
|
20
20
|
>>>
|
|
21
|
-
>>>
|
|
22
|
-
>>>
|
|
23
|
-
>>>
|
|
24
|
-
>>> publisher = SchemaPublisher(client)
|
|
25
|
-
>>> schema_uri = publisher.publish(MySampleType, version="1.0.0")
|
|
21
|
+
>>> atmo = Atmosphere.login("handle.bsky.social", "app-password")
|
|
22
|
+
>>> index = Index(atmosphere=atmo)
|
|
26
23
|
|
|
27
24
|
Note:
|
|
28
25
|
This module requires the ``atproto`` package to be installed::
|
|
@@ -32,7 +29,7 @@ Note:
|
|
|
32
29
|
|
|
33
30
|
from typing import Iterator, Optional, Type, TYPE_CHECKING
|
|
34
31
|
|
|
35
|
-
from .client import
|
|
32
|
+
from .client import Atmosphere
|
|
36
33
|
from .schema import SchemaPublisher, SchemaLoader
|
|
37
34
|
from .records import DatasetPublisher, DatasetLoader
|
|
38
35
|
from .lens import LensPublisher, LensLoader
|
|
@@ -99,38 +96,48 @@ class AtmosphereIndexEntry:
|
|
|
99
96
|
class AtmosphereIndex:
|
|
100
97
|
"""ATProto index implementing AbstractIndex protocol.
|
|
101
98
|
|
|
99
|
+
.. deprecated::
|
|
100
|
+
Use ``atdata.Index(atmosphere=client)`` instead. ``AtmosphereIndex``
|
|
101
|
+
is retained for backwards compatibility and will be removed in a
|
|
102
|
+
future release.
|
|
103
|
+
|
|
102
104
|
Wraps SchemaPublisher/Loader and DatasetPublisher/Loader to provide
|
|
103
|
-
a unified interface compatible with
|
|
105
|
+
a unified interface compatible with Index.
|
|
104
106
|
|
|
105
107
|
Optionally accepts a ``PDSBlobStore`` for writing dataset shards as
|
|
106
108
|
ATProto blobs, enabling fully decentralized dataset storage.
|
|
107
109
|
|
|
108
110
|
Examples:
|
|
109
|
-
>>>
|
|
110
|
-
>>>
|
|
111
|
+
>>> # Preferred: use unified Index
|
|
112
|
+
>>> from atdata.local import Index
|
|
113
|
+
>>> from atdata.atmosphere import AtmosphereClient
|
|
114
|
+
>>> index = Index(atmosphere=client)
|
|
111
115
|
>>>
|
|
112
|
-
>>> #
|
|
116
|
+
>>> # Legacy (deprecated)
|
|
113
117
|
>>> index = AtmosphereIndex(client)
|
|
114
|
-
>>>
|
|
115
|
-
>>> # With PDS blob storage
|
|
116
|
-
>>> store = PDSBlobStore(client)
|
|
117
|
-
>>> index = AtmosphereIndex(client, data_store=store)
|
|
118
|
-
>>> entry = index.insert_dataset(dataset, name="my-data")
|
|
119
118
|
"""
|
|
120
119
|
|
|
121
120
|
def __init__(
|
|
122
121
|
self,
|
|
123
|
-
client:
|
|
122
|
+
client: Atmosphere,
|
|
124
123
|
*,
|
|
125
124
|
data_store: Optional[PDSBlobStore] = None,
|
|
126
125
|
):
|
|
127
126
|
"""Initialize the atmosphere index.
|
|
128
127
|
|
|
129
128
|
Args:
|
|
130
|
-
client: Authenticated
|
|
129
|
+
client: Authenticated Atmosphere instance.
|
|
131
130
|
data_store: Optional PDSBlobStore for writing shards as blobs.
|
|
132
131
|
If provided, insert_dataset will upload shards to PDS.
|
|
133
132
|
"""
|
|
133
|
+
import warnings
|
|
134
|
+
|
|
135
|
+
warnings.warn(
|
|
136
|
+
"AtmosphereIndex is deprecated. Use atdata.Index(atmosphere=client) "
|
|
137
|
+
"instead for unified index access.",
|
|
138
|
+
DeprecationWarning,
|
|
139
|
+
stacklevel=2,
|
|
140
|
+
)
|
|
134
141
|
self.client = client
|
|
135
142
|
self._schema_publisher = SchemaPublisher(client)
|
|
136
143
|
self._schema_loader = SchemaLoader(client)
|
|
@@ -304,9 +311,13 @@ class AtmosphereIndex:
|
|
|
304
311
|
return schema_to_type(schema)
|
|
305
312
|
|
|
306
313
|
|
|
314
|
+
# Deprecated alias for backward compatibility
|
|
315
|
+
AtmosphereClient = Atmosphere
|
|
316
|
+
|
|
307
317
|
__all__ = [
|
|
308
318
|
# Client
|
|
309
|
-
"
|
|
319
|
+
"Atmosphere",
|
|
320
|
+
"AtmosphereClient", # deprecated alias
|
|
310
321
|
# Storage
|
|
311
322
|
"PDSBlobStore",
|
|
312
323
|
# Unified index (AbstractIndex protocol)
|
atdata/atmosphere/_types.py
CHANGED
|
@@ -20,11 +20,11 @@ class AtUri:
|
|
|
20
20
|
AT URIs follow the format: at://<authority>/<collection>/<rkey>
|
|
21
21
|
|
|
22
22
|
Examples:
|
|
23
|
-
>>> uri = AtUri.parse("at://did:plc:abc123/ac.foundation.dataset.
|
|
23
|
+
>>> uri = AtUri.parse("at://did:plc:abc123/ac.foundation.dataset.schema/xyz")
|
|
24
24
|
>>> uri.authority
|
|
25
25
|
'did:plc:abc123'
|
|
26
26
|
>>> uri.collection
|
|
27
|
-
'ac.foundation.dataset.
|
|
27
|
+
'ac.foundation.dataset.schema'
|
|
28
28
|
>>> uri.rkey
|
|
29
29
|
'xyz'
|
|
30
30
|
"""
|
|
@@ -119,7 +119,7 @@ class FieldDef:
|
|
|
119
119
|
class SchemaRecord:
|
|
120
120
|
"""ATProto record for a PackableSample schema.
|
|
121
121
|
|
|
122
|
-
Maps to the ``ac.foundation.dataset.
|
|
122
|
+
Maps to the ``ac.foundation.dataset.schema`` Lexicon.
|
|
123
123
|
"""
|
|
124
124
|
|
|
125
125
|
name: str
|
|
@@ -143,7 +143,7 @@ class SchemaRecord:
|
|
|
143
143
|
def to_record(self) -> dict:
|
|
144
144
|
"""Convert to ATProto record dict for publishing."""
|
|
145
145
|
record = {
|
|
146
|
-
"$type": f"{LEXICON_NAMESPACE}.
|
|
146
|
+
"$type": f"{LEXICON_NAMESPACE}.schema",
|
|
147
147
|
"name": self.name,
|
|
148
148
|
"version": self.version,
|
|
149
149
|
"fields": [self._field_to_dict(f) for f in self.fields],
|
atdata/atmosphere/client.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""ATProto client wrapper for atdata.
|
|
2
2
|
|
|
3
|
-
This module provides the ``
|
|
3
|
+
This module provides the ``Atmosphere`` class which wraps the atproto SDK
|
|
4
4
|
client with atdata-specific helpers for publishing and querying records.
|
|
5
5
|
"""
|
|
6
6
|
|
|
@@ -28,16 +28,15 @@ def _get_atproto_client_class():
|
|
|
28
28
|
return _atproto_client_class
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
class
|
|
31
|
+
class Atmosphere:
|
|
32
32
|
"""ATProto client wrapper for atdata operations.
|
|
33
33
|
|
|
34
34
|
This class wraps the atproto SDK client and provides higher-level methods
|
|
35
35
|
for working with atdata records (schemas, datasets, lenses).
|
|
36
36
|
|
|
37
37
|
Examples:
|
|
38
|
-
>>>
|
|
39
|
-
>>>
|
|
40
|
-
>>> print(client.did)
|
|
38
|
+
>>> atmo = Atmosphere.login("alice.bsky.social", "app-password")
|
|
39
|
+
>>> print(atmo.did)
|
|
41
40
|
'did:plc:...'
|
|
42
41
|
|
|
43
42
|
Note:
|
|
@@ -65,7 +64,63 @@ class AtmosphereClient:
|
|
|
65
64
|
|
|
66
65
|
self._session: Optional[dict] = None
|
|
67
66
|
|
|
68
|
-
|
|
67
|
+
@classmethod
|
|
68
|
+
def login(
|
|
69
|
+
cls,
|
|
70
|
+
handle: str,
|
|
71
|
+
password: str,
|
|
72
|
+
*,
|
|
73
|
+
base_url: Optional[str] = None,
|
|
74
|
+
) -> "Atmosphere":
|
|
75
|
+
"""Create an authenticated Atmosphere client.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
handle: Your Bluesky handle (e.g., 'alice.bsky.social').
|
|
79
|
+
password: App-specific password (not your main password).
|
|
80
|
+
base_url: Optional PDS base URL. Defaults to bsky.social.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
An authenticated Atmosphere instance.
|
|
84
|
+
|
|
85
|
+
Raises:
|
|
86
|
+
atproto.exceptions.AtProtocolError: If authentication fails.
|
|
87
|
+
|
|
88
|
+
Examples:
|
|
89
|
+
>>> atmo = Atmosphere.login("alice.bsky.social", "app-password")
|
|
90
|
+
>>> index = Index(atmosphere=atmo)
|
|
91
|
+
"""
|
|
92
|
+
instance = cls(base_url=base_url)
|
|
93
|
+
instance._login(handle, password)
|
|
94
|
+
return instance
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def from_session(
|
|
98
|
+
cls,
|
|
99
|
+
session_string: str,
|
|
100
|
+
*,
|
|
101
|
+
base_url: Optional[str] = None,
|
|
102
|
+
) -> "Atmosphere":
|
|
103
|
+
"""Create an Atmosphere client from an exported session string.
|
|
104
|
+
|
|
105
|
+
This allows reusing a session without re-authenticating, which helps
|
|
106
|
+
avoid rate limits on session creation.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
session_string: Session string from ``export_session()``.
|
|
110
|
+
base_url: Optional PDS base URL. Defaults to bsky.social.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
An authenticated Atmosphere instance.
|
|
114
|
+
|
|
115
|
+
Examples:
|
|
116
|
+
>>> session = atmo.export_session()
|
|
117
|
+
>>> atmo2 = Atmosphere.from_session(session)
|
|
118
|
+
"""
|
|
119
|
+
instance = cls(base_url=base_url)
|
|
120
|
+
instance._login_with_session(session_string)
|
|
121
|
+
return instance
|
|
122
|
+
|
|
123
|
+
def _login(self, handle: str, password: str) -> None:
|
|
69
124
|
"""Authenticate with the ATProto PDS.
|
|
70
125
|
|
|
71
126
|
Args:
|
|
@@ -81,12 +136,9 @@ class AtmosphereClient:
|
|
|
81
136
|
"handle": profile.handle,
|
|
82
137
|
}
|
|
83
138
|
|
|
84
|
-
def
|
|
139
|
+
def _login_with_session(self, session_string: str) -> None:
|
|
85
140
|
"""Authenticate using an exported session string.
|
|
86
141
|
|
|
87
|
-
This allows reusing a session without re-authenticating, which helps
|
|
88
|
-
avoid rate limits on session creation.
|
|
89
|
-
|
|
90
142
|
Args:
|
|
91
143
|
session_string: Session string from ``export_session()``.
|
|
92
144
|
"""
|
|
@@ -161,7 +213,7 @@ class AtmosphereClient:
|
|
|
161
213
|
|
|
162
214
|
Args:
|
|
163
215
|
collection: The NSID of the record collection
|
|
164
|
-
(e.g., 'ac.foundation.dataset.
|
|
216
|
+
(e.g., 'ac.foundation.dataset.schema').
|
|
165
217
|
record: The record data. Must include a '$type' field.
|
|
166
218
|
rkey: Optional explicit record key. If not provided, a TID is generated.
|
|
167
219
|
validate: Whether to validate against the Lexicon schema. Set to False
|
|
@@ -487,7 +539,7 @@ class AtmosphereClient:
|
|
|
487
539
|
List of schema records.
|
|
488
540
|
"""
|
|
489
541
|
records, _ = self.list_records(
|
|
490
|
-
f"{LEXICON_NAMESPACE}.
|
|
542
|
+
f"{LEXICON_NAMESPACE}.schema",
|
|
491
543
|
repo=repo,
|
|
492
544
|
limit=limit,
|
|
493
545
|
)
|
atdata/atmosphere/lens.py
CHANGED
|
@@ -11,7 +11,7 @@ Note:
|
|
|
11
11
|
|
|
12
12
|
from typing import Optional
|
|
13
13
|
|
|
14
|
-
from .client import
|
|
14
|
+
from .client import Atmosphere
|
|
15
15
|
from ._types import (
|
|
16
16
|
AtUri,
|
|
17
17
|
LensRecord,
|
|
@@ -37,14 +37,13 @@ class LensPublisher:
|
|
|
37
37
|
... def my_lens(source: SourceType) -> TargetType:
|
|
38
38
|
... return TargetType(field=source.other_field)
|
|
39
39
|
>>>
|
|
40
|
-
>>>
|
|
41
|
-
>>> client.login("handle", "password")
|
|
40
|
+
>>> atmo = Atmosphere.login("handle", "password")
|
|
42
41
|
>>>
|
|
43
|
-
>>> publisher = LensPublisher(
|
|
42
|
+
>>> publisher = LensPublisher(atmo)
|
|
44
43
|
>>> uri = publisher.publish(
|
|
45
44
|
... name="my_lens",
|
|
46
|
-
... source_schema_uri="at://did:plc:abc/ac.foundation.dataset.
|
|
47
|
-
... target_schema_uri="at://did:plc:abc/ac.foundation.dataset.
|
|
45
|
+
... source_schema_uri="at://did:plc:abc/ac.foundation.dataset.schema/source",
|
|
46
|
+
... target_schema_uri="at://did:plc:abc/ac.foundation.dataset.schema/target",
|
|
48
47
|
... code_repository="https://github.com/user/repo",
|
|
49
48
|
... code_commit="abc123def456",
|
|
50
49
|
... getter_path="mymodule.lenses:my_lens",
|
|
@@ -57,11 +56,11 @@ class LensPublisher:
|
|
|
57
56
|
records. Users must manually install and trust lens implementations.
|
|
58
57
|
"""
|
|
59
58
|
|
|
60
|
-
def __init__(self, client:
|
|
59
|
+
def __init__(self, client: Atmosphere):
|
|
61
60
|
"""Initialize the lens publisher.
|
|
62
61
|
|
|
63
62
|
Args:
|
|
64
|
-
client: Authenticated
|
|
63
|
+
client: Authenticated Atmosphere instance.
|
|
65
64
|
"""
|
|
66
65
|
self.client = client
|
|
67
66
|
|
|
@@ -195,8 +194,8 @@ class LensLoader:
|
|
|
195
194
|
it manually.
|
|
196
195
|
|
|
197
196
|
Examples:
|
|
198
|
-
>>>
|
|
199
|
-
>>> loader = LensLoader(
|
|
197
|
+
>>> atmo = Atmosphere.login("handle", "password")
|
|
198
|
+
>>> loader = LensLoader(atmo)
|
|
200
199
|
>>>
|
|
201
200
|
>>> record = loader.get("at://did:plc:abc/ac.foundation.dataset.lens/xyz")
|
|
202
201
|
>>> print(record["name"])
|
|
@@ -204,11 +203,11 @@ class LensLoader:
|
|
|
204
203
|
>>> print(record.get("getterCode", {}).get("repository"))
|
|
205
204
|
"""
|
|
206
205
|
|
|
207
|
-
def __init__(self, client:
|
|
206
|
+
def __init__(self, client: Atmosphere):
|
|
208
207
|
"""Initialize the lens loader.
|
|
209
208
|
|
|
210
209
|
Args:
|
|
211
|
-
client:
|
|
210
|
+
client: Atmosphere instance.
|
|
212
211
|
"""
|
|
213
212
|
self.client = client
|
|
214
213
|
|
atdata/atmosphere/records.py
CHANGED
|
@@ -8,7 +8,7 @@ and loading them back. Dataset records are published as
|
|
|
8
8
|
from typing import Type, TypeVar, Optional
|
|
9
9
|
import msgpack
|
|
10
10
|
|
|
11
|
-
from .client import
|
|
11
|
+
from .client import Atmosphere
|
|
12
12
|
from .schema import SchemaPublisher
|
|
13
13
|
from ._types import (
|
|
14
14
|
AtUri,
|
|
@@ -21,9 +21,10 @@ from ._types import (
|
|
|
21
21
|
from typing import TYPE_CHECKING
|
|
22
22
|
|
|
23
23
|
if TYPE_CHECKING:
|
|
24
|
-
from ..dataset import
|
|
24
|
+
from ..dataset import Dataset
|
|
25
|
+
from .._protocols import Packable
|
|
25
26
|
|
|
26
|
-
ST = TypeVar("ST", bound="
|
|
27
|
+
ST = TypeVar("ST", bound="Packable")
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class DatasetPublisher:
|
|
@@ -35,10 +36,9 @@ class DatasetPublisher:
|
|
|
35
36
|
Examples:
|
|
36
37
|
>>> dataset = atdata.Dataset[MySample]("s3://bucket/data-{000000..000009}.tar")
|
|
37
38
|
>>>
|
|
38
|
-
>>>
|
|
39
|
-
>>> client.login("handle", "password")
|
|
39
|
+
>>> atmo = Atmosphere.login("handle", "password")
|
|
40
40
|
>>>
|
|
41
|
-
>>> publisher = DatasetPublisher(
|
|
41
|
+
>>> publisher = DatasetPublisher(atmo)
|
|
42
42
|
>>> uri = publisher.publish(
|
|
43
43
|
... dataset,
|
|
44
44
|
... name="My Training Data",
|
|
@@ -47,11 +47,11 @@ class DatasetPublisher:
|
|
|
47
47
|
... )
|
|
48
48
|
"""
|
|
49
49
|
|
|
50
|
-
def __init__(self, client:
|
|
50
|
+
def __init__(self, client: Atmosphere):
|
|
51
51
|
"""Initialize the dataset publisher.
|
|
52
52
|
|
|
53
53
|
Args:
|
|
54
|
-
client: Authenticated
|
|
54
|
+
client: Authenticated Atmosphere instance.
|
|
55
55
|
"""
|
|
56
56
|
self.client = client
|
|
57
57
|
self._schema_publisher = SchemaPublisher(client)
|
|
@@ -267,8 +267,8 @@ class DatasetLoader:
|
|
|
267
267
|
Python class for the sample type.
|
|
268
268
|
|
|
269
269
|
Examples:
|
|
270
|
-
>>>
|
|
271
|
-
>>> loader = DatasetLoader(
|
|
270
|
+
>>> atmo = Atmosphere.login("handle", "password")
|
|
271
|
+
>>> loader = DatasetLoader(atmo)
|
|
272
272
|
>>>
|
|
273
273
|
>>> # List available datasets
|
|
274
274
|
>>> datasets = loader.list()
|
|
@@ -279,11 +279,11 @@ class DatasetLoader:
|
|
|
279
279
|
>>> record = loader.get("at://did:plc:abc/ac.foundation.dataset.record/xyz")
|
|
280
280
|
"""
|
|
281
281
|
|
|
282
|
-
def __init__(self, client:
|
|
282
|
+
def __init__(self, client: Atmosphere):
|
|
283
283
|
"""Initialize the dataset loader.
|
|
284
284
|
|
|
285
285
|
Args:
|
|
286
|
-
client:
|
|
286
|
+
client: Atmosphere instance.
|
|
287
287
|
"""
|
|
288
288
|
self.client = client
|
|
289
289
|
|
atdata/atmosphere/schema.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
"""Schema publishing and loading for ATProto.
|
|
2
2
|
|
|
3
3
|
This module provides classes for publishing PackableSample schemas to ATProto
|
|
4
|
-
and loading them back. Schemas are published as ``ac.foundation.dataset.
|
|
4
|
+
and loading them back. Schemas are published as ``ac.foundation.dataset.schema``
|
|
5
5
|
records.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
from dataclasses import fields, is_dataclass
|
|
9
9
|
from typing import Type, TypeVar, Optional, get_type_hints, get_origin, get_args
|
|
10
10
|
|
|
11
|
-
from .client import
|
|
11
|
+
from .client import Atmosphere
|
|
12
12
|
from ._types import (
|
|
13
13
|
AtUri,
|
|
14
14
|
SchemaRecord,
|
|
@@ -26,9 +26,9 @@ from .._type_utils import (
|
|
|
26
26
|
from typing import TYPE_CHECKING
|
|
27
27
|
|
|
28
28
|
if TYPE_CHECKING:
|
|
29
|
-
from ..
|
|
29
|
+
from .._protocols import Packable
|
|
30
30
|
|
|
31
|
-
ST = TypeVar("ST", bound="
|
|
31
|
+
ST = TypeVar("ST", bound="Packable")
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
class SchemaPublisher:
|
|
@@ -43,20 +43,19 @@ class SchemaPublisher:
|
|
|
43
43
|
... image: NDArray
|
|
44
44
|
... label: str
|
|
45
45
|
...
|
|
46
|
-
>>>
|
|
47
|
-
>>> client.login("handle", "password")
|
|
46
|
+
>>> atmo = Atmosphere.login("handle", "password")
|
|
48
47
|
>>>
|
|
49
|
-
>>> publisher = SchemaPublisher(
|
|
48
|
+
>>> publisher = SchemaPublisher(atmo)
|
|
50
49
|
>>> uri = publisher.publish(MySample, version="1.0.0")
|
|
51
50
|
>>> print(uri)
|
|
52
|
-
at://did:plc:.../ac.foundation.dataset.
|
|
51
|
+
at://did:plc:.../ac.foundation.dataset.schema/...
|
|
53
52
|
"""
|
|
54
53
|
|
|
55
|
-
def __init__(self, client:
|
|
54
|
+
def __init__(self, client: Atmosphere):
|
|
56
55
|
"""Initialize the schema publisher.
|
|
57
56
|
|
|
58
57
|
Args:
|
|
59
|
-
client: Authenticated
|
|
58
|
+
client: Authenticated Atmosphere instance.
|
|
60
59
|
"""
|
|
61
60
|
self.client = client
|
|
62
61
|
|
|
@@ -103,7 +102,7 @@ class SchemaPublisher:
|
|
|
103
102
|
|
|
104
103
|
# Publish to ATProto
|
|
105
104
|
return self.client.create_record(
|
|
106
|
-
collection=f"{LEXICON_NAMESPACE}.
|
|
105
|
+
collection=f"{LEXICON_NAMESPACE}.schema",
|
|
107
106
|
record=schema_record.to_record(),
|
|
108
107
|
rkey=rkey,
|
|
109
108
|
validate=False, # PDS doesn't know our lexicon
|
|
@@ -185,20 +184,19 @@ class SchemaLoader:
|
|
|
185
184
|
schemas from a repository.
|
|
186
185
|
|
|
187
186
|
Examples:
|
|
188
|
-
>>>
|
|
189
|
-
>>> client.login("handle", "password")
|
|
187
|
+
>>> atmo = Atmosphere.login("handle", "password")
|
|
190
188
|
>>>
|
|
191
|
-
>>> loader = SchemaLoader(
|
|
192
|
-
>>> schema = loader.get("at://did:plc:.../ac.foundation.dataset.
|
|
189
|
+
>>> loader = SchemaLoader(atmo)
|
|
190
|
+
>>> schema = loader.get("at://did:plc:.../ac.foundation.dataset.schema/...")
|
|
193
191
|
>>> print(schema["name"])
|
|
194
192
|
'MySample'
|
|
195
193
|
"""
|
|
196
194
|
|
|
197
|
-
def __init__(self, client:
|
|
195
|
+
def __init__(self, client: Atmosphere):
|
|
198
196
|
"""Initialize the schema loader.
|
|
199
197
|
|
|
200
198
|
Args:
|
|
201
|
-
client:
|
|
199
|
+
client: Atmosphere instance (authentication optional for reads).
|
|
202
200
|
"""
|
|
203
201
|
self.client = client
|
|
204
202
|
|
|
@@ -217,7 +215,7 @@ class SchemaLoader:
|
|
|
217
215
|
"""
|
|
218
216
|
record = self.client.get_record(uri)
|
|
219
217
|
|
|
220
|
-
expected_type = f"{LEXICON_NAMESPACE}.
|
|
218
|
+
expected_type = f"{LEXICON_NAMESPACE}.schema"
|
|
221
219
|
if record.get("$type") != expected_type:
|
|
222
220
|
raise ValueError(
|
|
223
221
|
f"Record at {uri} is not a schema record. "
|
atdata/atmosphere/store.py
CHANGED
|
@@ -7,12 +7,11 @@ This enables fully decentralized dataset storage where both metadata (records)
|
|
|
7
7
|
and data (blobs) live on the AT Protocol network.
|
|
8
8
|
|
|
9
9
|
Examples:
|
|
10
|
-
>>> from atdata.atmosphere import
|
|
10
|
+
>>> from atdata.atmosphere import Atmosphere, PDSBlobStore
|
|
11
11
|
>>>
|
|
12
|
-
>>>
|
|
13
|
-
>>> client.login("handle.bsky.social", "app-password")
|
|
12
|
+
>>> atmo = Atmosphere.login("handle.bsky.social", "app-password")
|
|
14
13
|
>>>
|
|
15
|
-
>>> store = PDSBlobStore(
|
|
14
|
+
>>> store = PDSBlobStore(atmo)
|
|
16
15
|
>>> urls = store.write_shards(dataset, prefix="mnist/v1")
|
|
17
16
|
>>> print(urls)
|
|
18
17
|
['at://did:plc:.../blob/bafyrei...', ...]
|
|
@@ -29,7 +28,7 @@ import webdataset as wds
|
|
|
29
28
|
if TYPE_CHECKING:
|
|
30
29
|
from ..dataset import Dataset
|
|
31
30
|
from .._sources import BlobSource
|
|
32
|
-
from .client import
|
|
31
|
+
from .client import Atmosphere
|
|
33
32
|
|
|
34
33
|
|
|
35
34
|
@dataclass
|
|
@@ -44,7 +43,7 @@ class PDSBlobStore:
|
|
|
44
43
|
to HTTP URLs for streaming.
|
|
45
44
|
|
|
46
45
|
Attributes:
|
|
47
|
-
client: Authenticated
|
|
46
|
+
client: Authenticated Atmosphere instance.
|
|
48
47
|
|
|
49
48
|
Examples:
|
|
50
49
|
>>> store = PDSBlobStore(client)
|
|
@@ -53,7 +52,7 @@ class PDSBlobStore:
|
|
|
53
52
|
>>> # ['at://did:plc:abc/blob/bafyrei...', ...]
|
|
54
53
|
"""
|
|
55
54
|
|
|
56
|
-
client: "
|
|
55
|
+
client: "Atmosphere"
|
|
57
56
|
|
|
58
57
|
def write_shards(
|
|
59
58
|
self,
|