atdata 0.3.0b1__py3-none-any.whl → 0.3.2b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. atdata/__init__.py +11 -0
  2. atdata/_cid.py +0 -21
  3. atdata/_helpers.py +12 -0
  4. atdata/_hf_api.py +46 -1
  5. atdata/_logging.py +43 -0
  6. atdata/_protocols.py +81 -182
  7. atdata/_schema_codec.py +2 -2
  8. atdata/_sources.py +24 -4
  9. atdata/_stub_manager.py +5 -25
  10. atdata/atmosphere/__init__.py +60 -21
  11. atdata/atmosphere/_lexicon_types.py +595 -0
  12. atdata/atmosphere/_types.py +73 -245
  13. atdata/atmosphere/client.py +64 -12
  14. atdata/atmosphere/lens.py +60 -53
  15. atdata/atmosphere/records.py +291 -100
  16. atdata/atmosphere/schema.py +91 -65
  17. atdata/atmosphere/store.py +68 -66
  18. atdata/cli/__init__.py +16 -16
  19. atdata/cli/diagnose.py +2 -2
  20. atdata/cli/{local.py → infra.py} +10 -10
  21. atdata/dataset.py +266 -47
  22. atdata/index/__init__.py +54 -0
  23. atdata/{local → index}/_entry.py +6 -2
  24. atdata/{local → index}/_index.py +617 -72
  25. atdata/{local → index}/_schema.py +5 -5
  26. atdata/lexicons/__init__.py +127 -0
  27. atdata/lexicons/ac.foundation.dataset.arrayFormat.json +16 -0
  28. atdata/lexicons/ac.foundation.dataset.getLatestSchema.json +78 -0
  29. atdata/lexicons/ac.foundation.dataset.lens.json +101 -0
  30. atdata/lexicons/ac.foundation.dataset.record.json +117 -0
  31. atdata/lexicons/ac.foundation.dataset.schema.json +107 -0
  32. atdata/lexicons/ac.foundation.dataset.schemaType.json +16 -0
  33. atdata/lexicons/ac.foundation.dataset.storageBlobs.json +46 -0
  34. atdata/lexicons/ac.foundation.dataset.storageExternal.json +25 -0
  35. atdata/lexicons/ac.foundation.dataset.storageHttp.json +45 -0
  36. atdata/lexicons/ac.foundation.dataset.storageS3.json +61 -0
  37. atdata/lexicons/ndarray_shim.json +16 -0
  38. atdata/local/__init__.py +12 -13
  39. atdata/local/_repo_legacy.py +3 -3
  40. atdata/manifest/__init__.py +4 -0
  41. atdata/manifest/_proxy.py +321 -0
  42. atdata/promote.py +14 -10
  43. atdata/repository.py +66 -16
  44. atdata/stores/__init__.py +23 -0
  45. atdata/stores/_disk.py +131 -0
  46. atdata/{local → stores}/_s3.py +134 -112
  47. atdata/testing.py +12 -8
  48. {atdata-0.3.0b1.dist-info → atdata-0.3.2b1.dist-info}/METADATA +2 -2
  49. atdata-0.3.2b1.dist-info/RECORD +71 -0
  50. atdata-0.3.0b1.dist-info/RECORD +0 -54
  51. {atdata-0.3.0b1.dist-info → atdata-0.3.2b1.dist-info}/WHEEL +0 -0
  52. {atdata-0.3.0b1.dist-info → atdata-0.3.2b1.dist-info}/entry_points.txt +0 -0
  53. {atdata-0.3.0b1.dist-info → atdata-0.3.2b1.dist-info}/licenses/LICENSE +0 -0
atdata/_sources.py CHANGED
@@ -64,10 +64,20 @@ class URLSource:
64
64
  """Expand brace pattern and return list of shard URLs."""
65
65
  return list(braceexpand.braceexpand(self.url))
66
66
 
67
- # Legacy alias for backwards compatibility
68
67
  @property
69
68
  def shard_list(self) -> list[str]:
70
- """Expand brace pattern and return list of shard URLs (deprecated, use list_shards())."""
69
+ """Expand brace pattern and return list of shard URLs.
70
+
71
+ .. deprecated::
72
+ Use :meth:`list_shards` instead.
73
+ """
74
+ import warnings
75
+
76
+ warnings.warn(
77
+ "shard_list is deprecated, use list_shards()",
78
+ DeprecationWarning,
79
+ stacklevel=2,
80
+ )
71
81
  return self.list_shards()
72
82
 
73
83
  @property
@@ -178,10 +188,20 @@ class S3Source:
178
188
  """Return list of S3 URIs for the shards."""
179
189
  return [f"s3://{self.bucket}/{key}" for key in self.keys]
180
190
 
181
- # Legacy alias for backwards compatibility
182
191
  @property
183
192
  def shard_list(self) -> list[str]:
184
- """Return list of S3 URIs for the shards (deprecated, use list_shards())."""
193
+ """Return list of S3 URIs for the shards.
194
+
195
+ .. deprecated::
196
+ Use :meth:`list_shards` instead.
197
+ """
198
+ import warnings
199
+
200
+ warnings.warn(
201
+ "shard_list is deprecated, use list_shards()",
202
+ DeprecationWarning,
203
+ stacklevel=2,
204
+ )
185
205
  return self.list_shards()
186
206
 
187
207
  @property
atdata/_stub_manager.py CHANGED
@@ -15,7 +15,7 @@ Examples:
15
15
  >>> index = Index(auto_stubs=True)
16
16
  >>>
17
17
  >>> # Modules are generated automatically on decode_schema
18
- >>> MyType = index.decode_schema("atdata://local/sampleSchema/MySample@1.0.0")
18
+ >>> MyType = index.decode_schema("atdata://local/schema/MySample@1.0.0")
19
19
  >>> # MyType is now properly typed for IDE autocomplete!
20
20
  >>>
21
21
  >>> # Get the stub directory path for IDE configuration
@@ -51,8 +51,8 @@ def _extract_authority(schema_ref: Optional[str]) -> str:
51
51
  """Extract authority from a schema reference URI.
52
52
 
53
53
  Args:
54
- schema_ref: Schema ref like "atdata://local/sampleSchema/Name@1.0.0"
55
- or "atdata://alice.bsky.social/sampleSchema/Name@1.0.0"
54
+ schema_ref: Schema ref like "atdata://local/schema/Name@1.0.0"
55
+ or "atdata://alice.bsky.social/schema/Name@1.0.0"
56
56
 
57
57
  Returns:
58
58
  Authority string (e.g., "local", "alice.bsky.social", "did_plc_xxx").
@@ -149,10 +149,6 @@ class StubManager:
149
149
  safe_version = version.replace(".", "_")
150
150
  return f"{name}_{safe_version}.py"
151
151
 
152
- def _stub_filename(self, name: str, version: str) -> str:
153
- """Alias for _module_filename for backwards compatibility."""
154
- return self._module_filename(name, version)
155
-
156
152
  def _module_path(
157
153
  self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
158
154
  ) -> Path:
@@ -168,12 +164,6 @@ class StubManager:
168
164
  """
169
165
  return self._stub_dir / authority / self._module_filename(name, version)
170
166
 
171
- def _stub_path(
172
- self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
173
- ) -> Path:
174
- """Alias for _module_path for backwards compatibility."""
175
- return self._module_path(name, version, authority)
176
-
177
167
  def _module_is_current(self, path: Path, version: str) -> bool:
178
168
  """Check if an existing module file matches the expected version.
179
169
 
@@ -200,10 +190,6 @@ class StubManager:
200
190
  except (OSError, IOError):
201
191
  return False
202
192
 
203
- def _stub_is_current(self, path: Path, version: str) -> bool:
204
- """Alias for _module_is_current for backwards compatibility."""
205
- return self._module_is_current(path, version)
206
-
207
193
  def _ensure_authority_package(self, authority: str) -> None:
208
194
  """Ensure authority subdirectory exists with __init__.py."""
209
195
  self._ensure_dir_exists()
@@ -261,12 +247,6 @@ class StubManager:
261
247
  pass # Temp file cleanup failed, re-raising original error
262
248
  raise
263
249
 
264
- def _write_stub_atomic(self, path: Path, content: str) -> None:
265
- """Legacy method - extracts authority from path and calls _write_module_atomic."""
266
- # Extract authority from path (parent directory name)
267
- authority = path.parent.name
268
- self._write_module_atomic(path, content, authority)
269
-
270
250
  def ensure_stub(self, schema: dict) -> Optional[Path]:
271
251
  """Ensure a module file exists for the given schema.
272
252
 
@@ -426,7 +406,7 @@ class StubManager:
426
406
  Returns:
427
407
  Path if stub exists, None otherwise
428
408
  """
429
- path = self._stub_path(name, version, authority)
409
+ path = self._module_path(name, version, authority)
430
410
  return path if path.exists() else None
431
411
 
432
412
  def list_stubs(self, authority: Optional[str] = None) -> list[Path]:
@@ -513,7 +493,7 @@ class StubManager:
513
493
  Returns:
514
494
  True if file was removed, False if it didn't exist
515
495
  """
516
- path = self._stub_path(name, version, authority)
496
+ path = self._module_path(name, version, authority)
517
497
  if path.exists():
518
498
  try:
519
499
  path.unlink()
@@ -6,7 +6,7 @@ network.
6
6
 
7
7
  Key components:
8
8
 
9
- - ``AtmosphereClient``: Authentication and session management for ATProto
9
+ - ``Atmosphere``: Authentication and session management for ATProto
10
10
  - ``SchemaPublisher``: Publish PackableSample schemas as ATProto records
11
11
  - ``DatasetPublisher``: Publish dataset index records with WebDataset URLs
12
12
  - ``LensPublisher``: Publish lens transformation records
@@ -16,13 +16,10 @@ to work unchanged. These features are opt-in for users who want to publish
16
16
  or discover datasets on the ATProto network.
17
17
 
18
18
  Examples:
19
- >>> from atdata.atmosphere import AtmosphereClient, SchemaPublisher
19
+ >>> from atdata.atmosphere import Atmosphere
20
20
  >>>
21
- >>> client = AtmosphereClient()
22
- >>> client.login("handle.bsky.social", "app-password")
23
- >>>
24
- >>> publisher = SchemaPublisher(client)
25
- >>> schema_uri = publisher.publish(MySampleType, version="1.0.0")
21
+ >>> atmo = Atmosphere.login("handle.bsky.social", "app-password")
22
+ >>> index = Index(atmosphere=atmo)
26
23
 
27
24
  Note:
28
25
  This module requires the ``atproto`` package to be installed::
@@ -32,16 +29,28 @@ Note:
32
29
 
33
30
  from typing import Iterator, Optional, Type, TYPE_CHECKING
34
31
 
35
- from .client import AtmosphereClient
32
+ from .client import Atmosphere
36
33
  from .schema import SchemaPublisher, SchemaLoader
37
34
  from .records import DatasetPublisher, DatasetLoader
38
35
  from .lens import LensPublisher, LensLoader
39
36
  from .store import PDSBlobStore
40
- from ._types import (
41
- AtUri,
42
- SchemaRecord,
43
- DatasetRecord,
44
- LensRecord,
37
+ from ._types import AtUri, LEXICON_NAMESPACE
38
+ from ._lexicon_types import (
39
+ LexSchemaRecord,
40
+ LexDatasetRecord,
41
+ LexLensRecord,
42
+ LexCodeReference,
43
+ JsonSchemaFormat,
44
+ StorageHttp,
45
+ StorageS3,
46
+ StorageBlobs,
47
+ ShardChecksum,
48
+ HttpShardEntry,
49
+ S3ShardEntry,
50
+ BlobEntry,
51
+ DatasetSize,
52
+ StorageUnion,
53
+ storage_from_record,
45
54
  )
46
55
 
47
56
  if TYPE_CHECKING:
@@ -73,11 +82,23 @@ class AtmosphereIndexEntry:
73
82
 
74
83
  @property
75
84
  def data_urls(self) -> list[str]:
76
- """WebDataset URLs from external storage."""
85
+ """WebDataset URLs from storage.
86
+
87
+ Handles storageHttp (shard URLs), storageS3 (s3:// URLs),
88
+ storageExternal (legacy), and storageBlobs (PDS blob URLs).
89
+ """
77
90
  storage = self._record.get("storage", {})
78
91
  storage_type = storage.get("$type", "")
92
+ if "storageHttp" in storage_type:
93
+ return [s["url"] for s in storage.get("shards", [])]
94
+ if "storageS3" in storage_type:
95
+ bucket = storage.get("bucket", "")
96
+ return [f"s3://{bucket}/{s['key']}" for s in storage.get("shards", [])]
79
97
  if "storageExternal" in storage_type:
80
98
  return storage.get("urls", [])
99
+ if "storageBlobs" in storage_type:
100
+ # Blob URLs must be resolved via PDS; return empty for now
101
+ return []
81
102
  return []
82
103
 
83
104
  @property
@@ -122,14 +143,14 @@ class AtmosphereIndex:
122
143
 
123
144
  def __init__(
124
145
  self,
125
- client: AtmosphereClient,
146
+ client: Atmosphere,
126
147
  *,
127
148
  data_store: Optional[PDSBlobStore] = None,
128
149
  ):
129
150
  """Initialize the atmosphere index.
130
151
 
131
152
  Args:
132
- client: Authenticated AtmosphereClient instance.
153
+ client: Authenticated Atmosphere instance.
133
154
  data_store: Optional PDSBlobStore for writing shards as blobs.
134
155
  If provided, insert_dataset will upload shards to PDS.
135
156
  """
@@ -314,9 +335,13 @@ class AtmosphereIndex:
314
335
  return schema_to_type(schema)
315
336
 
316
337
 
338
+ # Deprecated alias for backward compatibility
339
+ AtmosphereClient = Atmosphere
340
+
317
341
  __all__ = [
318
342
  # Client
319
- "AtmosphereClient",
343
+ "Atmosphere",
344
+ "AtmosphereClient", # deprecated alias
320
345
  # Storage
321
346
  "PDSBlobStore",
322
347
  # Unified index (AbstractIndex protocol)
@@ -331,9 +356,23 @@ __all__ = [
331
356
  # Lens operations
332
357
  "LensPublisher",
333
358
  "LensLoader",
334
- # Types
359
+ # Core types
335
360
  "AtUri",
336
- "SchemaRecord",
337
- "DatasetRecord",
338
- "LensRecord",
361
+ "LEXICON_NAMESPACE",
362
+ # Lexicon-mirror types (Tier 1)
363
+ "LexSchemaRecord",
364
+ "LexDatasetRecord",
365
+ "LexLensRecord",
366
+ "LexCodeReference",
367
+ "JsonSchemaFormat",
368
+ "StorageHttp",
369
+ "StorageS3",
370
+ "StorageBlobs",
371
+ "StorageUnion",
372
+ "storage_from_record",
373
+ "ShardChecksum",
374
+ "HttpShardEntry",
375
+ "S3ShardEntry",
376
+ "BlobEntry",
377
+ "DatasetSize",
339
378
  ]