atdata 0.3.0b1__py3-none-any.whl → 0.3.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. atdata/__init__.py +9 -0
  2. atdata/_cid.py +0 -21
  3. atdata/_helpers.py +12 -0
  4. atdata/_hf_api.py +33 -1
  5. atdata/_protocols.py +64 -182
  6. atdata/_schema_codec.py +2 -2
  7. atdata/_stub_manager.py +5 -25
  8. atdata/atmosphere/__init__.py +12 -11
  9. atdata/atmosphere/_types.py +4 -4
  10. atdata/atmosphere/client.py +64 -12
  11. atdata/atmosphere/lens.py +11 -12
  12. atdata/atmosphere/records.py +9 -10
  13. atdata/atmosphere/schema.py +14 -16
  14. atdata/atmosphere/store.py +6 -7
  15. atdata/cli/__init__.py +16 -16
  16. atdata/cli/diagnose.py +2 -2
  17. atdata/cli/{local.py → infra.py} +10 -10
  18. atdata/dataset.py +155 -2
  19. atdata/index/__init__.py +54 -0
  20. atdata/{local → index}/_index.py +322 -64
  21. atdata/{local → index}/_schema.py +5 -5
  22. atdata/lexicons/__init__.py +121 -0
  23. atdata/lexicons/ac.foundation.dataset.arrayFormat.json +16 -0
  24. atdata/lexicons/ac.foundation.dataset.getLatestSchema.json +78 -0
  25. atdata/lexicons/ac.foundation.dataset.lens.json +99 -0
  26. atdata/lexicons/ac.foundation.dataset.record.json +96 -0
  27. atdata/lexicons/ac.foundation.dataset.schema.json +107 -0
  28. atdata/lexicons/ac.foundation.dataset.schemaType.json +16 -0
  29. atdata/lexicons/ac.foundation.dataset.storageBlobs.json +24 -0
  30. atdata/lexicons/ac.foundation.dataset.storageExternal.json +25 -0
  31. atdata/lexicons/ndarray_shim.json +16 -0
  32. atdata/local/__init__.py +12 -13
  33. atdata/local/_repo_legacy.py +3 -3
  34. atdata/promote.py +14 -10
  35. atdata/repository.py +7 -7
  36. atdata/stores/__init__.py +23 -0
  37. atdata/stores/_disk.py +123 -0
  38. atdata/testing.py +12 -8
  39. {atdata-0.3.0b1.dist-info → atdata-0.3.1b1.dist-info}/METADATA +2 -2
  40. atdata-0.3.1b1.dist-info/RECORD +67 -0
  41. atdata-0.3.0b1.dist-info/RECORD +0 -54
  42. /atdata/{local → index}/_entry.py +0 -0
  43. /atdata/{local → stores}/_s3.py +0 -0
  44. {atdata-0.3.0b1.dist-info → atdata-0.3.1b1.dist-info}/WHEEL +0 -0
  45. {atdata-0.3.0b1.dist-info → atdata-0.3.1b1.dist-info}/entry_points.txt +0 -0
  46. {atdata-0.3.0b1.dist-info → atdata-0.3.1b1.dist-info}/licenses/LICENSE +0 -0
atdata/__init__.py CHANGED
@@ -44,6 +44,7 @@ from .dataset import (
44
44
  SampleBatch as SampleBatch,
45
45
  Dataset as Dataset,
46
46
  packable as packable,
47
+ write_samples as write_samples,
47
48
  )
48
49
 
49
50
  from .lens import (
@@ -96,6 +97,14 @@ from .repository import (
96
97
  create_repository as create_repository,
97
98
  )
98
99
 
100
+ from .index import (
101
+ Index as Index,
102
+ )
103
+
104
+ from .stores import (
105
+ LocalDiskStore as LocalDiskStore,
106
+ )
107
+
99
108
  from ._cid import (
100
109
  generate_cid as generate_cid,
101
110
  verify_cid as verify_cid,
atdata/_cid.py CHANGED
@@ -116,29 +116,8 @@ def verify_cid(cid: str, data: Any) -> bool:
116
116
  return cid == expected_cid
117
117
 
118
118
 
119
- def parse_cid(cid: str) -> dict:
120
- """Parse a CID string into its components.
121
-
122
- Args:
123
- cid: CID string to parse.
124
-
125
- Returns:
126
- Dictionary with 'version', 'codec', and 'hash' keys.
127
- The 'hash' value is itself a dict with 'code', 'size', and 'digest'.
128
-
129
- Examples:
130
- >>> info = parse_cid('bafyrei...')
131
- >>> info['version']
132
- 1
133
- >>> info['codec']
134
- 113 # 0x71 = dag-cbor
135
- """
136
- return libipld.decode_cid(cid)
137
-
138
-
139
119
  __all__ = [
140
120
  "generate_cid",
141
121
  "generate_cid_from_bytes",
142
122
  "verify_cid",
143
- "parse_cid",
144
123
  ]
atdata/_helpers.py CHANGED
@@ -65,10 +65,22 @@ def bytes_to_array(b: bytes) -> np.ndarray:
65
65
  return np.load(BytesIO(b), allow_pickle=True)
66
66
 
67
67
  # Compact format: dtype_len(1B) + dtype_str + ndim(1B) + shape(ndim×8B) + data
68
+ if len(b) < 2:
69
+ raise ValueError(f"Array buffer too short ({len(b)} bytes): need at least 2")
68
70
  dlen = b[0]
71
+ min_header = 2 + dlen # dtype_len + dtype_str + ndim
72
+ if len(b) < min_header:
73
+ raise ValueError(
74
+ f"Array buffer too short ({len(b)} bytes): need at least {min_header} for header"
75
+ )
69
76
  dtype = np.dtype(b[1 : 1 + dlen].decode())
70
77
  ndim = b[1 + dlen]
71
78
  offset = 2 + dlen
79
+ min_with_shape = offset + ndim * 8
80
+ if len(b) < min_with_shape:
81
+ raise ValueError(
82
+ f"Array buffer too short ({len(b)} bytes): need at least {min_with_shape} for shape"
83
+ )
72
84
  shape = struct.unpack_from(f"<{ndim}q", b, offset)
73
85
  offset += ndim * 8
74
86
  return np.frombuffer(b, dtype=dtype, offset=offset).reshape(shape).copy()
atdata/_hf_api.py CHANGED
@@ -32,6 +32,7 @@ import re
32
32
  import threading
33
33
  from pathlib import Path
34
34
  from typing import (
35
+ Any,
35
36
  TYPE_CHECKING,
36
37
  Generic,
37
38
  Mapping,
@@ -65,7 +66,7 @@ def get_default_index() -> "Index": # noqa: F821
65
66
  """Get or create the module-level default Index.
66
67
 
67
68
  The default Index uses Redis for local storage (backwards-compatible
68
- default) and an anonymous AtmosphereClient for read-only public data
69
+ default) and an anonymous Atmosphere for read-only public data
69
70
  resolution.
70
71
 
71
72
  The default is created lazily on first access and cached for the
@@ -189,6 +190,37 @@ class DatasetDict(Generic[ST], dict):
189
190
  """
190
191
  return {name: len(ds.list_shards()) for name, ds in self.items()}
191
192
 
193
+ # Methods proxied to the sole Dataset when only one split exists.
194
+ _DATASET_METHODS = frozenset(
195
+ {
196
+ "ordered",
197
+ "shuffled",
198
+ "as_type",
199
+ "list_shards",
200
+ "head",
201
+ }
202
+ )
203
+
204
+ def __getattr__(self, name: str) -> Any:
205
+ """Proxy common Dataset methods when this dict has exactly one split.
206
+
207
+ When a ``DatasetDict`` contains a single split, calling iteration
208
+ methods like ``.ordered()`` or ``.shuffled()`` is forwarded to the
209
+ contained ``Dataset`` for convenience. Multi-split dicts raise
210
+ ``AttributeError`` with a hint to select a split explicitly.
211
+ """
212
+ if name in self._DATASET_METHODS:
213
+ if len(self) == 1:
214
+ return getattr(next(iter(self.values())), name)
215
+ splits = ", ".join(f"'{k}'" for k in self.keys())
216
+ raise AttributeError(
217
+ f"'{type(self).__name__}' has {len(self)} splits ({splits}). "
218
+ f"Select one first, e.g. ds_dict['{next(iter(self.keys()))}'].{name}()"
219
+ )
220
+ raise AttributeError(
221
+ f"'{type(self).__name__}' object has no attribute '{name}'"
222
+ )
223
+
192
224
 
193
225
  ##
194
226
  # Path resolution utilities
atdata/_protocols.py CHANGED
@@ -1,37 +1,25 @@
1
1
  """Protocol definitions for atdata index and storage abstractions.
2
2
 
3
- This module defines the abstract protocols that enable interchangeable
4
- index backends (local Redis vs ATProto PDS) and data stores (S3 vs PDS blobs).
5
-
6
- The key insight is that both local and atmosphere implementations solve the
7
- same problem: indexed dataset storage with external data URLs. These protocols
8
- formalize that common interface.
9
-
10
- Note:
11
- Protocol methods use ``...`` (Ellipsis) as the body per PEP 544. This is
12
- the standard Python syntax for Protocol definitions - these are interface
13
- specifications, not stub implementations. Concrete classes (Index,
14
- AtmosphereIndex, etc.) provide the actual implementations.
3
+ Defines the abstract protocols that enable interchangeable index backends
4
+ (local SQLite/Redis vs ATProto PDS) and data stores (S3, local disk, PDS blobs).
15
5
 
16
6
  Protocols:
17
- Packable: Structural interface for packable sample types (lens compatibility)
7
+ Packable: Structural interface for packable sample types
18
8
  IndexEntry: Common interface for dataset index entries
19
9
  AbstractIndex: Protocol for index operations (schemas, datasets, lenses)
20
10
  AbstractDataStore: Protocol for data storage operations
11
+ DataSource: Protocol for streaming shard data
21
12
 
22
13
  Examples:
23
14
  >>> def process_datasets(index: AbstractIndex) -> None:
24
15
  ... for entry in index.list_datasets():
25
16
  ... print(f"{entry.name}: {entry.data_urls}")
26
- ...
27
- >>> # Works with either Index or AtmosphereIndex
28
- >>> process_datasets(local_index)
29
- >>> process_datasets(atmosphere_index)
30
17
  """
31
18
 
32
19
  from typing import (
33
20
  IO,
34
21
  Any,
22
+ Iterable,
35
23
  Iterator,
36
24
  Optional,
37
25
  Protocol,
@@ -115,7 +103,7 @@ class IndexEntry(Protocol):
115
103
  """Schema reference string.
116
104
 
117
105
  Local: ``local://schemas/{module.Class}@{version}``
118
- Atmosphere: ``at://did:plc:.../ac.foundation.dataset.sampleSchema/...``
106
+ Atmosphere: ``at://did:plc:.../ac.foundation.dataset.schema/...``
119
107
  """
120
108
  ...
121
109
 
@@ -137,32 +125,16 @@ class IndexEntry(Protocol):
137
125
 
138
126
 
139
127
  class AbstractIndex(Protocol):
140
- """Protocol for index operations - implemented by Index and AtmosphereIndex.
141
-
142
- This protocol defines the common interface for managing dataset metadata:
143
- - Publishing and retrieving schemas
144
- - Inserting and listing datasets
145
- - (Future) Publishing and retrieving lenses
146
-
147
- A single index can hold datasets of many different sample types. The sample
148
- type is tracked via schema references, not as a generic parameter on the index.
128
+ """Protocol for index operations implemented by Index and AtmosphereIndex.
149
129
 
150
- Optional Extensions:
151
- Some index implementations support additional features:
152
- - ``data_store``: An AbstractDataStore for reading/writing dataset shards.
153
- If present, ``load_dataset`` will use it for S3 credential resolution.
130
+ Manages dataset metadata: publishing/retrieving schemas, inserting/listing
131
+ datasets. A single index holds datasets of many sample types, tracked via
132
+ schema references.
154
133
 
155
134
  Examples:
156
135
  >>> def publish_and_list(index: AbstractIndex) -> None:
157
- ... # Publish schemas for different types
158
- ... schema1 = index.publish_schema(ImageSample, version="1.0.0")
159
- ... schema2 = index.publish_schema(TextSample, version="1.0.0")
160
- ...
161
- ... # Insert datasets of different types
136
+ ... index.publish_schema(ImageSample, version="1.0.0")
162
137
  ... index.insert_dataset(image_ds, name="images")
163
- ... index.insert_dataset(text_ds, name="texts")
164
- ...
165
- ... # List all datasets (mixed types)
166
138
  ... for entry in index.list_datasets():
167
139
  ... print(f"{entry.name} -> {entry.schema_ref}")
168
140
  """
@@ -171,55 +143,58 @@ class AbstractIndex(Protocol):
171
143
  def data_store(self) -> Optional["AbstractDataStore"]:
172
144
  """Optional data store for reading/writing shards.
173
145
 
174
- If present, ``load_dataset`` will use it for credential resolution
175
- (e.g., S3 credentials from S3DataStore).
176
-
177
- Returns:
178
- AbstractDataStore instance, or None if this index doesn't have
179
- an associated data store.
180
-
181
- Note:
182
- Not all index implementations provide a data_store. Use
183
- ``hasattr(index, 'data_store') and index.data_store is not None``
184
- for safe access.
146
+ If present, ``load_dataset`` uses it for credential resolution.
147
+ Not all implementations provide a data_store; check with
148
+ ``getattr(index, 'data_store', None)``.
185
149
  """
186
150
  ...
187
151
 
188
152
  # Dataset operations
189
153
 
190
- def insert_dataset(
154
+ def write(
191
155
  self,
192
- ds: "Dataset",
156
+ samples: Iterable,
193
157
  *,
194
158
  name: str,
195
159
  schema_ref: Optional[str] = None,
196
160
  **kwargs,
197
161
  ) -> IndexEntry:
198
- """Insert a dataset into the index.
162
+ """Write samples and create an index entry in one step.
199
163
 
200
- The sample type is inferred from ``ds.sample_type``. If schema_ref is not
201
- provided, the schema may be auto-published based on the sample type.
164
+ Serializes samples to WebDataset tar files, stores them via the
165
+ appropriate backend, and creates an index entry.
202
166
 
203
167
  Args:
204
- ds: The Dataset to register in the index (any sample type).
205
- name: Human-readable name for the dataset.
206
- schema_ref: Optional explicit schema reference. If not provided,
207
- the schema may be auto-published or inferred from ds.sample_type.
208
- **kwargs: Additional backend-specific options.
168
+ samples: Iterable of Packable samples. Must be non-empty.
169
+ name: Dataset name, optionally prefixed with target backend.
170
+ schema_ref: Optional schema reference.
171
+ **kwargs: Backend-specific options (maxcount, description, etc.).
209
172
 
210
173
  Returns:
211
- IndexEntry for the inserted dataset.
174
+ IndexEntry for the created dataset.
212
175
  """
213
176
  ...
214
177
 
215
- def get_dataset(self, ref: str) -> IndexEntry:
216
- """Get a dataset entry by name or reference.
178
+ def insert_dataset(
179
+ self,
180
+ ds: "Dataset",
181
+ *,
182
+ name: str,
183
+ schema_ref: Optional[str] = None,
184
+ **kwargs,
185
+ ) -> IndexEntry:
186
+ """Register an existing dataset in the index.
217
187
 
218
188
  Args:
219
- ref: Dataset name, path, or full reference string.
189
+ ds: The Dataset to register.
190
+ name: Human-readable name.
191
+ schema_ref: Explicit schema ref; auto-published if ``None``.
192
+ **kwargs: Backend-specific options.
193
+ """
194
+ ...
220
195
 
221
- Returns:
222
- IndexEntry for the dataset.
196
+ def get_dataset(self, ref: str) -> IndexEntry:
197
+ """Get a dataset entry by name or reference.
223
198
 
224
199
  Raises:
225
200
  KeyError: If dataset not found.
@@ -242,33 +217,19 @@ class AbstractIndex(Protocol):
242
217
  ) -> str:
243
218
  """Publish a schema for a sample type.
244
219
 
245
- The sample_type is accepted as ``type`` rather than ``Type[Packable]`` to
246
- support ``@packable``-decorated classes, which satisfy the Packable protocol
247
- at runtime but cannot be statically verified by type checkers.
248
-
249
220
  Args:
250
- sample_type: A Packable type (PackableSample subclass or @packable-decorated).
251
- Validated at runtime via the @runtime_checkable Packable protocol.
252
- version: Semantic version string for the schema.
253
- **kwargs: Additional backend-specific options.
221
+ sample_type: A Packable type (``@packable``-decorated or subclass).
222
+ version: Semantic version string.
223
+ **kwargs: Backend-specific options.
254
224
 
255
225
  Returns:
256
- Schema reference string:
257
- - Local: 'local://schemas/{module.Class}@{version}'
258
- - Atmosphere: 'at://did:plc:.../ac.foundation.dataset.sampleSchema/...'
226
+ Schema reference string (``local://...`` or ``at://...``).
259
227
  """
260
228
  ...
261
229
 
262
230
  def get_schema(self, ref: str) -> dict:
263
231
  """Get a schema record by reference.
264
232
 
265
- Args:
266
- ref: Schema reference string (local:// or at://).
267
-
268
- Returns:
269
- Schema record as a dictionary with fields like 'name', 'version',
270
- 'fields', etc.
271
-
272
233
  Raises:
273
234
  KeyError: If schema not found.
274
235
  """
@@ -280,30 +241,15 @@ class AbstractIndex(Protocol):
280
241
  def list_schemas(self) -> list[dict]: ...
281
242
 
282
243
  def decode_schema(self, ref: str) -> Type[Packable]:
283
- """Reconstruct a Python Packable type from a stored schema.
284
-
285
- This method enables loading datasets without knowing the sample type
286
- ahead of time. The index retrieves the schema record and dynamically
287
- generates a Packable class matching the schema definition.
288
-
289
- Args:
290
- ref: Schema reference string (local:// or at://).
291
-
292
- Returns:
293
- A dynamically generated Packable class with fields matching
294
- the schema definition. The class can be used with
295
- ``Dataset[T]`` to load and iterate over samples.
244
+ """Reconstruct a Packable type from a stored schema.
296
245
 
297
246
  Raises:
298
247
  KeyError: If schema not found.
299
- ValueError: If schema cannot be decoded (unsupported field types).
248
+ ValueError: If schema has unsupported field types.
300
249
 
301
250
  Examples:
302
- >>> entry = index.get_dataset("my-dataset")
303
251
  >>> SampleType = index.decode_schema(entry.schema_ref)
304
252
  >>> ds = Dataset[SampleType](entry.data_urls[0])
305
- >>> for sample in ds.ordered():
306
- ... print(sample) # sample is instance of SampleType
307
253
  """
308
254
  ...
309
255
 
@@ -313,21 +259,14 @@ class AbstractIndex(Protocol):
313
259
 
314
260
 
315
261
  class AbstractDataStore(Protocol):
316
- """Protocol for data storage operations.
317
-
318
- This protocol abstracts over different storage backends for dataset data:
319
- - S3DataStore: S3-compatible object storage
320
- - PDSBlobStore: ATProto PDS blob storage (future)
262
+ """Protocol for data storage backends (S3, local disk, PDS blobs).
321
263
 
322
- The separation of index (metadata) from data store (actual files) allows
323
- flexible deployment: local index with S3 storage, atmosphere index with
324
- S3 storage, or atmosphere index with PDS blobs.
264
+ Separates index (metadata) from data store (shard files), enabling
265
+ flexible deployment combinations.
325
266
 
326
267
  Examples:
327
268
  >>> store = S3DataStore(credentials, bucket="my-bucket")
328
269
  >>> urls = store.write_shards(dataset, prefix="training/v1")
329
- >>> print(urls)
330
- ['s3://my-bucket/training/v1/shard-000000.tar', ...]
331
270
  """
332
271
 
333
272
  def write_shards(
@@ -341,28 +280,16 @@ class AbstractDataStore(Protocol):
341
280
 
342
281
  Args:
343
282
  ds: The Dataset to write.
344
- prefix: Path prefix for the shards (e.g., 'datasets/mnist/v1').
345
- **kwargs: Backend-specific options (e.g., maxcount for shard size).
283
+ prefix: Path prefix (e.g., ``'datasets/mnist/v1'``).
284
+ **kwargs: Backend-specific options (``maxcount``, ``maxsize``, etc.).
346
285
 
347
286
  Returns:
348
- List of URLs for the written shards, suitable for use with
349
- WebDataset or atdata.Dataset().
287
+ List of shard URLs suitable for ``atdata.Dataset()``.
350
288
  """
351
289
  ...
352
290
 
353
291
  def read_url(self, url: str) -> str:
354
- """Resolve a storage URL for reading.
355
-
356
- Some storage backends may need to transform URLs (e.g., signing S3 URLs
357
- or resolving blob references). This method returns a URL that can be
358
- used directly with WebDataset.
359
-
360
- Args:
361
- url: Storage URL to resolve.
362
-
363
- Returns:
364
- WebDataset-compatible URL for reading.
365
- """
292
+ """Resolve a storage URL for reading (e.g., sign S3 URLs)."""
366
293
  ...
367
294
 
368
295
  def supports_streaming(self) -> bool: ...
@@ -374,77 +301,32 @@ class AbstractDataStore(Protocol):
374
301
 
375
302
  @runtime_checkable
376
303
  class DataSource(Protocol):
377
- """Protocol for data sources that provide streams to Dataset.
378
-
379
- A DataSource abstracts over different ways of accessing dataset shards:
380
- - URLSource: Standard WebDataset-compatible URLs (http, https, pipe, gs, etc.)
381
- - S3Source: S3-compatible storage with explicit credentials
382
- - BlobSource: ATProto blob references (future)
304
+ """Protocol for data sources that stream shard data to Dataset.
383
305
 
384
- The key method is ``shards()``, which yields (identifier, stream) pairs.
385
- These are fed directly to WebDataset's tar_file_expander, bypassing URL
386
- resolution entirely. This enables:
387
- - Private S3 repos with credentials
388
- - Custom endpoints (Cloudflare R2, MinIO)
389
- - ATProto blob streaming
390
- - Any other source that can provide file-like objects
306
+ Implementations (URLSource, S3Source, BlobSource) yield
307
+ ``(identifier, stream)`` pairs fed to WebDataset's tar expander,
308
+ bypassing URL resolution. This enables private S3, custom endpoints,
309
+ and ATProto blob streaming.
391
310
 
392
311
  Examples:
393
- >>> source = S3Source(
394
- ... bucket="my-bucket",
395
- ... keys=["data-000.tar", "data-001.tar"],
396
- ... endpoint="https://r2.example.com",
397
- ... credentials=creds,
398
- ... )
312
+ >>> source = S3Source(bucket="my-bucket", keys=["data-000.tar"])
399
313
  >>> ds = Dataset[MySample](source)
400
- >>> for sample in ds.ordered():
401
- ... print(sample)
402
314
  """
403
315
 
404
316
  @property
405
317
  def shards(self) -> Iterator[tuple[str, IO[bytes]]]:
406
- """Lazily yield (identifier, stream) pairs for each shard.
407
-
408
- The identifier is used for error messages and __url__ metadata.
409
- The stream must be a file-like object that can be read by tarfile.
410
-
411
- Yields:
412
- Tuple of (shard_identifier, file_like_stream).
413
-
414
- Examples:
415
- >>> for shard_id, stream in source.shards:
416
- ... print(f"Processing {shard_id}")
417
- ... data = stream.read()
418
- """
318
+ """Lazily yield ``(shard_id, stream)`` pairs for each shard."""
419
319
  ...
420
320
 
421
321
  def list_shards(self) -> list[str]:
422
- """Get list of shard identifiers without opening streams.
423
-
424
- Used for metadata queries like counting shards without actually
425
- streaming data. Implementations should return identifiers that
426
- match what shards would yield.
427
-
428
- Returns:
429
- List of shard identifier strings.
430
- """
322
+ """Shard identifiers without opening streams."""
431
323
  ...
432
324
 
433
325
  def open_shard(self, shard_id: str) -> IO[bytes]:
434
- """Open a single shard by its identifier.
435
-
436
- This method enables random access to individual shards, which is
437
- required for PyTorch DataLoader worker splitting. Each worker opens
438
- only its assigned shards rather than iterating all shards.
439
-
440
- Args:
441
- shard_id: Shard identifier from list_shards().
442
-
443
- Returns:
444
- File-like stream for reading the shard.
326
+ """Open a single shard for random access (e.g., DataLoader splitting).
445
327
 
446
328
  Raises:
447
- KeyError: If shard_id is not in list_shards().
329
+ KeyError: If *shard_id* is not in ``list_shards()``.
448
330
  """
449
331
  ...
450
332
 
atdata/_schema_codec.py CHANGED
@@ -284,7 +284,7 @@ def generate_stub(schema: dict) -> str:
284
284
  String content for a .pyi stub file.
285
285
 
286
286
  Examples:
287
- >>> schema = index.get_schema("atdata://local/sampleSchema/MySample@1.0.0")
287
+ >>> schema = index.get_schema("atdata://local/schema/MySample@1.0.0")
288
288
  >>> stub_content = generate_stub(schema.to_dict())
289
289
  >>> # Save to a stubs directory configured in your IDE
290
290
  >>> with open("stubs/my_sample.pyi", "w") as f:
@@ -360,7 +360,7 @@ def generate_module(schema: dict) -> str:
360
360
  String content for a .py module file.
361
361
 
362
362
  Examples:
363
- >>> schema = index.get_schema("atdata://local/sampleSchema/MySample@1.0.0")
363
+ >>> schema = index.get_schema("atdata://local/schema/MySample@1.0.0")
364
364
  >>> module_content = generate_module(schema.to_dict())
365
365
  >>> # The module can be imported after being saved
366
366
  """
atdata/_stub_manager.py CHANGED
@@ -15,7 +15,7 @@ Examples:
15
15
  >>> index = Index(auto_stubs=True)
16
16
  >>>
17
17
  >>> # Modules are generated automatically on decode_schema
18
- >>> MyType = index.decode_schema("atdata://local/sampleSchema/MySample@1.0.0")
18
+ >>> MyType = index.decode_schema("atdata://local/schema/MySample@1.0.0")
19
19
  >>> # MyType is now properly typed for IDE autocomplete!
20
20
  >>>
21
21
  >>> # Get the stub directory path for IDE configuration
@@ -51,8 +51,8 @@ def _extract_authority(schema_ref: Optional[str]) -> str:
51
51
  """Extract authority from a schema reference URI.
52
52
 
53
53
  Args:
54
- schema_ref: Schema ref like "atdata://local/sampleSchema/Name@1.0.0"
55
- or "atdata://alice.bsky.social/sampleSchema/Name@1.0.0"
54
+ schema_ref: Schema ref like "atdata://local/schema/Name@1.0.0"
55
+ or "atdata://alice.bsky.social/schema/Name@1.0.0"
56
56
 
57
57
  Returns:
58
58
  Authority string (e.g., "local", "alice.bsky.social", "did_plc_xxx").
@@ -149,10 +149,6 @@ class StubManager:
149
149
  safe_version = version.replace(".", "_")
150
150
  return f"{name}_{safe_version}.py"
151
151
 
152
- def _stub_filename(self, name: str, version: str) -> str:
153
- """Alias for _module_filename for backwards compatibility."""
154
- return self._module_filename(name, version)
155
-
156
152
  def _module_path(
157
153
  self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
158
154
  ) -> Path:
@@ -168,12 +164,6 @@ class StubManager:
168
164
  """
169
165
  return self._stub_dir / authority / self._module_filename(name, version)
170
166
 
171
- def _stub_path(
172
- self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
173
- ) -> Path:
174
- """Alias for _module_path for backwards compatibility."""
175
- return self._module_path(name, version, authority)
176
-
177
167
  def _module_is_current(self, path: Path, version: str) -> bool:
178
168
  """Check if an existing module file matches the expected version.
179
169
 
@@ -200,10 +190,6 @@ class StubManager:
200
190
  except (OSError, IOError):
201
191
  return False
202
192
 
203
- def _stub_is_current(self, path: Path, version: str) -> bool:
204
- """Alias for _module_is_current for backwards compatibility."""
205
- return self._module_is_current(path, version)
206
-
207
193
  def _ensure_authority_package(self, authority: str) -> None:
208
194
  """Ensure authority subdirectory exists with __init__.py."""
209
195
  self._ensure_dir_exists()
@@ -261,12 +247,6 @@ class StubManager:
261
247
  pass # Temp file cleanup failed, re-raising original error
262
248
  raise
263
249
 
264
- def _write_stub_atomic(self, path: Path, content: str) -> None:
265
- """Legacy method - extracts authority from path and calls _write_module_atomic."""
266
- # Extract authority from path (parent directory name)
267
- authority = path.parent.name
268
- self._write_module_atomic(path, content, authority)
269
-
270
250
  def ensure_stub(self, schema: dict) -> Optional[Path]:
271
251
  """Ensure a module file exists for the given schema.
272
252
 
@@ -426,7 +406,7 @@ class StubManager:
426
406
  Returns:
427
407
  Path if stub exists, None otherwise
428
408
  """
429
- path = self._stub_path(name, version, authority)
409
+ path = self._module_path(name, version, authority)
430
410
  return path if path.exists() else None
431
411
 
432
412
  def list_stubs(self, authority: Optional[str] = None) -> list[Path]:
@@ -513,7 +493,7 @@ class StubManager:
513
493
  Returns:
514
494
  True if file was removed, False if it didn't exist
515
495
  """
516
- path = self._stub_path(name, version, authority)
496
+ path = self._module_path(name, version, authority)
517
497
  if path.exists():
518
498
  try:
519
499
  path.unlink()