atdata 0.3.0b1__py3-none-any.whl → 0.3.2b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atdata/__init__.py +11 -0
- atdata/_cid.py +0 -21
- atdata/_helpers.py +12 -0
- atdata/_hf_api.py +46 -1
- atdata/_logging.py +43 -0
- atdata/_protocols.py +81 -182
- atdata/_schema_codec.py +2 -2
- atdata/_sources.py +24 -4
- atdata/_stub_manager.py +5 -25
- atdata/atmosphere/__init__.py +60 -21
- atdata/atmosphere/_lexicon_types.py +595 -0
- atdata/atmosphere/_types.py +73 -245
- atdata/atmosphere/client.py +64 -12
- atdata/atmosphere/lens.py +60 -53
- atdata/atmosphere/records.py +291 -100
- atdata/atmosphere/schema.py +91 -65
- atdata/atmosphere/store.py +68 -66
- atdata/cli/__init__.py +16 -16
- atdata/cli/diagnose.py +2 -2
- atdata/cli/{local.py → infra.py} +10 -10
- atdata/dataset.py +266 -47
- atdata/index/__init__.py +54 -0
- atdata/{local → index}/_entry.py +6 -2
- atdata/{local → index}/_index.py +617 -72
- atdata/{local → index}/_schema.py +5 -5
- atdata/lexicons/__init__.py +127 -0
- atdata/lexicons/ac.foundation.dataset.arrayFormat.json +16 -0
- atdata/lexicons/ac.foundation.dataset.getLatestSchema.json +78 -0
- atdata/lexicons/ac.foundation.dataset.lens.json +101 -0
- atdata/lexicons/ac.foundation.dataset.record.json +117 -0
- atdata/lexicons/ac.foundation.dataset.schema.json +107 -0
- atdata/lexicons/ac.foundation.dataset.schemaType.json +16 -0
- atdata/lexicons/ac.foundation.dataset.storageBlobs.json +46 -0
- atdata/lexicons/ac.foundation.dataset.storageExternal.json +25 -0
- atdata/lexicons/ac.foundation.dataset.storageHttp.json +45 -0
- atdata/lexicons/ac.foundation.dataset.storageS3.json +61 -0
- atdata/lexicons/ndarray_shim.json +16 -0
- atdata/local/__init__.py +12 -13
- atdata/local/_repo_legacy.py +3 -3
- atdata/manifest/__init__.py +4 -0
- atdata/manifest/_proxy.py +321 -0
- atdata/promote.py +14 -10
- atdata/repository.py +66 -16
- atdata/stores/__init__.py +23 -0
- atdata/stores/_disk.py +131 -0
- atdata/{local → stores}/_s3.py +134 -112
- atdata/testing.py +12 -8
- {atdata-0.3.0b1.dist-info → atdata-0.3.2b1.dist-info}/METADATA +2 -2
- atdata-0.3.2b1.dist-info/RECORD +71 -0
- atdata-0.3.0b1.dist-info/RECORD +0 -54
- {atdata-0.3.0b1.dist-info → atdata-0.3.2b1.dist-info}/WHEEL +0 -0
- {atdata-0.3.0b1.dist-info → atdata-0.3.2b1.dist-info}/entry_points.txt +0 -0
- {atdata-0.3.0b1.dist-info → atdata-0.3.2b1.dist-info}/licenses/LICENSE +0 -0
atdata/_sources.py
CHANGED
|
@@ -64,10 +64,20 @@ class URLSource:
|
|
|
64
64
|
"""Expand brace pattern and return list of shard URLs."""
|
|
65
65
|
return list(braceexpand.braceexpand(self.url))
|
|
66
66
|
|
|
67
|
-
# Legacy alias for backwards compatibility
|
|
68
67
|
@property
|
|
69
68
|
def shard_list(self) -> list[str]:
|
|
70
|
-
"""Expand brace pattern and return list of shard URLs
|
|
69
|
+
"""Expand brace pattern and return list of shard URLs.
|
|
70
|
+
|
|
71
|
+
.. deprecated::
|
|
72
|
+
Use :meth:`list_shards` instead.
|
|
73
|
+
"""
|
|
74
|
+
import warnings
|
|
75
|
+
|
|
76
|
+
warnings.warn(
|
|
77
|
+
"shard_list is deprecated, use list_shards()",
|
|
78
|
+
DeprecationWarning,
|
|
79
|
+
stacklevel=2,
|
|
80
|
+
)
|
|
71
81
|
return self.list_shards()
|
|
72
82
|
|
|
73
83
|
@property
|
|
@@ -178,10 +188,20 @@ class S3Source:
|
|
|
178
188
|
"""Return list of S3 URIs for the shards."""
|
|
179
189
|
return [f"s3://{self.bucket}/{key}" for key in self.keys]
|
|
180
190
|
|
|
181
|
-
# Legacy alias for backwards compatibility
|
|
182
191
|
@property
|
|
183
192
|
def shard_list(self) -> list[str]:
|
|
184
|
-
"""Return list of S3 URIs for the shards
|
|
193
|
+
"""Return list of S3 URIs for the shards.
|
|
194
|
+
|
|
195
|
+
.. deprecated::
|
|
196
|
+
Use :meth:`list_shards` instead.
|
|
197
|
+
"""
|
|
198
|
+
import warnings
|
|
199
|
+
|
|
200
|
+
warnings.warn(
|
|
201
|
+
"shard_list is deprecated, use list_shards()",
|
|
202
|
+
DeprecationWarning,
|
|
203
|
+
stacklevel=2,
|
|
204
|
+
)
|
|
185
205
|
return self.list_shards()
|
|
186
206
|
|
|
187
207
|
@property
|
atdata/_stub_manager.py
CHANGED
|
@@ -15,7 +15,7 @@ Examples:
|
|
|
15
15
|
>>> index = Index(auto_stubs=True)
|
|
16
16
|
>>>
|
|
17
17
|
>>> # Modules are generated automatically on decode_schema
|
|
18
|
-
>>> MyType = index.decode_schema("atdata://local/
|
|
18
|
+
>>> MyType = index.decode_schema("atdata://local/schema/MySample@1.0.0")
|
|
19
19
|
>>> # MyType is now properly typed for IDE autocomplete!
|
|
20
20
|
>>>
|
|
21
21
|
>>> # Get the stub directory path for IDE configuration
|
|
@@ -51,8 +51,8 @@ def _extract_authority(schema_ref: Optional[str]) -> str:
|
|
|
51
51
|
"""Extract authority from a schema reference URI.
|
|
52
52
|
|
|
53
53
|
Args:
|
|
54
|
-
schema_ref: Schema ref like "atdata://local/
|
|
55
|
-
or "atdata://alice.bsky.social/
|
|
54
|
+
schema_ref: Schema ref like "atdata://local/schema/Name@1.0.0"
|
|
55
|
+
or "atdata://alice.bsky.social/schema/Name@1.0.0"
|
|
56
56
|
|
|
57
57
|
Returns:
|
|
58
58
|
Authority string (e.g., "local", "alice.bsky.social", "did_plc_xxx").
|
|
@@ -149,10 +149,6 @@ class StubManager:
|
|
|
149
149
|
safe_version = version.replace(".", "_")
|
|
150
150
|
return f"{name}_{safe_version}.py"
|
|
151
151
|
|
|
152
|
-
def _stub_filename(self, name: str, version: str) -> str:
|
|
153
|
-
"""Alias for _module_filename for backwards compatibility."""
|
|
154
|
-
return self._module_filename(name, version)
|
|
155
|
-
|
|
156
152
|
def _module_path(
|
|
157
153
|
self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
|
|
158
154
|
) -> Path:
|
|
@@ -168,12 +164,6 @@ class StubManager:
|
|
|
168
164
|
"""
|
|
169
165
|
return self._stub_dir / authority / self._module_filename(name, version)
|
|
170
166
|
|
|
171
|
-
def _stub_path(
|
|
172
|
-
self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
|
|
173
|
-
) -> Path:
|
|
174
|
-
"""Alias for _module_path for backwards compatibility."""
|
|
175
|
-
return self._module_path(name, version, authority)
|
|
176
|
-
|
|
177
167
|
def _module_is_current(self, path: Path, version: str) -> bool:
|
|
178
168
|
"""Check if an existing module file matches the expected version.
|
|
179
169
|
|
|
@@ -200,10 +190,6 @@ class StubManager:
|
|
|
200
190
|
except (OSError, IOError):
|
|
201
191
|
return False
|
|
202
192
|
|
|
203
|
-
def _stub_is_current(self, path: Path, version: str) -> bool:
|
|
204
|
-
"""Alias for _module_is_current for backwards compatibility."""
|
|
205
|
-
return self._module_is_current(path, version)
|
|
206
|
-
|
|
207
193
|
def _ensure_authority_package(self, authority: str) -> None:
|
|
208
194
|
"""Ensure authority subdirectory exists with __init__.py."""
|
|
209
195
|
self._ensure_dir_exists()
|
|
@@ -261,12 +247,6 @@ class StubManager:
|
|
|
261
247
|
pass # Temp file cleanup failed, re-raising original error
|
|
262
248
|
raise
|
|
263
249
|
|
|
264
|
-
def _write_stub_atomic(self, path: Path, content: str) -> None:
|
|
265
|
-
"""Legacy method - extracts authority from path and calls _write_module_atomic."""
|
|
266
|
-
# Extract authority from path (parent directory name)
|
|
267
|
-
authority = path.parent.name
|
|
268
|
-
self._write_module_atomic(path, content, authority)
|
|
269
|
-
|
|
270
250
|
def ensure_stub(self, schema: dict) -> Optional[Path]:
|
|
271
251
|
"""Ensure a module file exists for the given schema.
|
|
272
252
|
|
|
@@ -426,7 +406,7 @@ class StubManager:
|
|
|
426
406
|
Returns:
|
|
427
407
|
Path if stub exists, None otherwise
|
|
428
408
|
"""
|
|
429
|
-
path = self.
|
|
409
|
+
path = self._module_path(name, version, authority)
|
|
430
410
|
return path if path.exists() else None
|
|
431
411
|
|
|
432
412
|
def list_stubs(self, authority: Optional[str] = None) -> list[Path]:
|
|
@@ -513,7 +493,7 @@ class StubManager:
|
|
|
513
493
|
Returns:
|
|
514
494
|
True if file was removed, False if it didn't exist
|
|
515
495
|
"""
|
|
516
|
-
path = self.
|
|
496
|
+
path = self._module_path(name, version, authority)
|
|
517
497
|
if path.exists():
|
|
518
498
|
try:
|
|
519
499
|
path.unlink()
|
atdata/atmosphere/__init__.py
CHANGED
|
@@ -6,7 +6,7 @@ network.
|
|
|
6
6
|
|
|
7
7
|
Key components:
|
|
8
8
|
|
|
9
|
-
- ``
|
|
9
|
+
- ``Atmosphere``: Authentication and session management for ATProto
|
|
10
10
|
- ``SchemaPublisher``: Publish PackableSample schemas as ATProto records
|
|
11
11
|
- ``DatasetPublisher``: Publish dataset index records with WebDataset URLs
|
|
12
12
|
- ``LensPublisher``: Publish lens transformation records
|
|
@@ -16,13 +16,10 @@ to work unchanged. These features are opt-in for users who want to publish
|
|
|
16
16
|
or discover datasets on the ATProto network.
|
|
17
17
|
|
|
18
18
|
Examples:
|
|
19
|
-
>>> from atdata.atmosphere import
|
|
19
|
+
>>> from atdata.atmosphere import Atmosphere
|
|
20
20
|
>>>
|
|
21
|
-
>>>
|
|
22
|
-
>>>
|
|
23
|
-
>>>
|
|
24
|
-
>>> publisher = SchemaPublisher(client)
|
|
25
|
-
>>> schema_uri = publisher.publish(MySampleType, version="1.0.0")
|
|
21
|
+
>>> atmo = Atmosphere.login("handle.bsky.social", "app-password")
|
|
22
|
+
>>> index = Index(atmosphere=atmo)
|
|
26
23
|
|
|
27
24
|
Note:
|
|
28
25
|
This module requires the ``atproto`` package to be installed::
|
|
@@ -32,16 +29,28 @@ Note:
|
|
|
32
29
|
|
|
33
30
|
from typing import Iterator, Optional, Type, TYPE_CHECKING
|
|
34
31
|
|
|
35
|
-
from .client import
|
|
32
|
+
from .client import Atmosphere
|
|
36
33
|
from .schema import SchemaPublisher, SchemaLoader
|
|
37
34
|
from .records import DatasetPublisher, DatasetLoader
|
|
38
35
|
from .lens import LensPublisher, LensLoader
|
|
39
36
|
from .store import PDSBlobStore
|
|
40
|
-
from ._types import
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
37
|
+
from ._types import AtUri, LEXICON_NAMESPACE
|
|
38
|
+
from ._lexicon_types import (
|
|
39
|
+
LexSchemaRecord,
|
|
40
|
+
LexDatasetRecord,
|
|
41
|
+
LexLensRecord,
|
|
42
|
+
LexCodeReference,
|
|
43
|
+
JsonSchemaFormat,
|
|
44
|
+
StorageHttp,
|
|
45
|
+
StorageS3,
|
|
46
|
+
StorageBlobs,
|
|
47
|
+
ShardChecksum,
|
|
48
|
+
HttpShardEntry,
|
|
49
|
+
S3ShardEntry,
|
|
50
|
+
BlobEntry,
|
|
51
|
+
DatasetSize,
|
|
52
|
+
StorageUnion,
|
|
53
|
+
storage_from_record,
|
|
45
54
|
)
|
|
46
55
|
|
|
47
56
|
if TYPE_CHECKING:
|
|
@@ -73,11 +82,23 @@ class AtmosphereIndexEntry:
|
|
|
73
82
|
|
|
74
83
|
@property
|
|
75
84
|
def data_urls(self) -> list[str]:
|
|
76
|
-
"""WebDataset URLs from
|
|
85
|
+
"""WebDataset URLs from storage.
|
|
86
|
+
|
|
87
|
+
Handles storageHttp (shard URLs), storageS3 (s3:// URLs),
|
|
88
|
+
storageExternal (legacy), and storageBlobs (PDS blob URLs).
|
|
89
|
+
"""
|
|
77
90
|
storage = self._record.get("storage", {})
|
|
78
91
|
storage_type = storage.get("$type", "")
|
|
92
|
+
if "storageHttp" in storage_type:
|
|
93
|
+
return [s["url"] for s in storage.get("shards", [])]
|
|
94
|
+
if "storageS3" in storage_type:
|
|
95
|
+
bucket = storage.get("bucket", "")
|
|
96
|
+
return [f"s3://{bucket}/{s['key']}" for s in storage.get("shards", [])]
|
|
79
97
|
if "storageExternal" in storage_type:
|
|
80
98
|
return storage.get("urls", [])
|
|
99
|
+
if "storageBlobs" in storage_type:
|
|
100
|
+
# Blob URLs must be resolved via PDS; return empty for now
|
|
101
|
+
return []
|
|
81
102
|
return []
|
|
82
103
|
|
|
83
104
|
@property
|
|
@@ -122,14 +143,14 @@ class AtmosphereIndex:
|
|
|
122
143
|
|
|
123
144
|
def __init__(
|
|
124
145
|
self,
|
|
125
|
-
client:
|
|
146
|
+
client: Atmosphere,
|
|
126
147
|
*,
|
|
127
148
|
data_store: Optional[PDSBlobStore] = None,
|
|
128
149
|
):
|
|
129
150
|
"""Initialize the atmosphere index.
|
|
130
151
|
|
|
131
152
|
Args:
|
|
132
|
-
client: Authenticated
|
|
153
|
+
client: Authenticated Atmosphere instance.
|
|
133
154
|
data_store: Optional PDSBlobStore for writing shards as blobs.
|
|
134
155
|
If provided, insert_dataset will upload shards to PDS.
|
|
135
156
|
"""
|
|
@@ -314,9 +335,13 @@ class AtmosphereIndex:
|
|
|
314
335
|
return schema_to_type(schema)
|
|
315
336
|
|
|
316
337
|
|
|
338
|
+
# Deprecated alias for backward compatibility
|
|
339
|
+
AtmosphereClient = Atmosphere
|
|
340
|
+
|
|
317
341
|
__all__ = [
|
|
318
342
|
# Client
|
|
319
|
-
"
|
|
343
|
+
"Atmosphere",
|
|
344
|
+
"AtmosphereClient", # deprecated alias
|
|
320
345
|
# Storage
|
|
321
346
|
"PDSBlobStore",
|
|
322
347
|
# Unified index (AbstractIndex protocol)
|
|
@@ -331,9 +356,23 @@ __all__ = [
|
|
|
331
356
|
# Lens operations
|
|
332
357
|
"LensPublisher",
|
|
333
358
|
"LensLoader",
|
|
334
|
-
#
|
|
359
|
+
# Core types
|
|
335
360
|
"AtUri",
|
|
336
|
-
"
|
|
337
|
-
|
|
338
|
-
"
|
|
361
|
+
"LEXICON_NAMESPACE",
|
|
362
|
+
# Lexicon-mirror types (Tier 1)
|
|
363
|
+
"LexSchemaRecord",
|
|
364
|
+
"LexDatasetRecord",
|
|
365
|
+
"LexLensRecord",
|
|
366
|
+
"LexCodeReference",
|
|
367
|
+
"JsonSchemaFormat",
|
|
368
|
+
"StorageHttp",
|
|
369
|
+
"StorageS3",
|
|
370
|
+
"StorageBlobs",
|
|
371
|
+
"StorageUnion",
|
|
372
|
+
"storage_from_record",
|
|
373
|
+
"ShardChecksum",
|
|
374
|
+
"HttpShardEntry",
|
|
375
|
+
"S3ShardEntry",
|
|
376
|
+
"BlobEntry",
|
|
377
|
+
"DatasetSize",
|
|
339
378
|
]
|