PyPI - atdata - Versions diffs - 0.2.3b1__py3-none-any.whl → 0.3.1b1__py3-none-any.whl - Mend

atdata 0.2.3b1py3-none-any.whl → 0.3.1b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

atdata/.gitignore +1 -0
atdata/__init__.py +39 -0
atdata/_cid.py +0 -21
atdata/_exceptions.py +168 -0
atdata/_helpers.py +41 -15
atdata/_hf_api.py +95 -11
atdata/_logging.py +70 -0
atdata/_protocols.py +77 -238
atdata/_schema_codec.py +7 -6
atdata/_stub_manager.py +5 -25
atdata/_type_utils.py +28 -2
atdata/atmosphere/__init__.py +31 -20
atdata/atmosphere/_types.py +4 -4
atdata/atmosphere/client.py +64 -12
atdata/atmosphere/lens.py +11 -12
atdata/atmosphere/records.py +12 -12
atdata/atmosphere/schema.py +16 -18
atdata/atmosphere/store.py +6 -7
atdata/cli/__init__.py +161 -175
atdata/cli/diagnose.py +2 -2
atdata/cli/{local.py → infra.py} +11 -11
atdata/cli/inspect.py +69 -0
atdata/cli/preview.py +63 -0
atdata/cli/schema.py +109 -0
atdata/dataset.py +583 -328
atdata/index/__init__.py +54 -0
atdata/index/_entry.py +157 -0
atdata/index/_index.py +1198 -0
atdata/index/_schema.py +380 -0
atdata/lens.py +9 -2
atdata/lexicons/__init__.py +121 -0
atdata/lexicons/ac.foundation.dataset.arrayFormat.json +16 -0
atdata/lexicons/ac.foundation.dataset.getLatestSchema.json +78 -0
atdata/lexicons/ac.foundation.dataset.lens.json +99 -0
atdata/lexicons/ac.foundation.dataset.record.json +96 -0
atdata/lexicons/ac.foundation.dataset.schema.json +107 -0
atdata/lexicons/ac.foundation.dataset.schemaType.json +16 -0
atdata/lexicons/ac.foundation.dataset.storageBlobs.json +24 -0
atdata/lexicons/ac.foundation.dataset.storageExternal.json +25 -0
atdata/lexicons/ndarray_shim.json +16 -0
atdata/local/__init__.py +70 -0
atdata/local/_repo_legacy.py +218 -0
atdata/manifest/__init__.py +28 -0
atdata/manifest/_aggregates.py +156 -0
atdata/manifest/_builder.py +163 -0
atdata/manifest/_fields.py +154 -0
atdata/manifest/_manifest.py +146 -0
atdata/manifest/_query.py +150 -0
atdata/manifest/_writer.py +74 -0
atdata/promote.py +18 -14
atdata/providers/__init__.py +25 -0
atdata/providers/_base.py +140 -0
atdata/providers/_factory.py +69 -0
atdata/providers/_postgres.py +214 -0
atdata/providers/_redis.py +171 -0
atdata/providers/_sqlite.py +191 -0
atdata/repository.py +323 -0
atdata/stores/__init__.py +23 -0
atdata/stores/_disk.py +123 -0
atdata/stores/_s3.py +349 -0
atdata/testing.py +341 -0
{atdata-0.2.3b1.dist-info → atdata-0.3.1b1.dist-info}/METADATA +5 -2
atdata-0.3.1b1.dist-info/RECORD +67 -0
atdata/local.py +0 -1720
atdata-0.2.3b1.dist-info/RECORD +0 -28
{atdata-0.2.3b1.dist-info → atdata-0.3.1b1.dist-info}/WHEEL +0 -0
{atdata-0.2.3b1.dist-info → atdata-0.3.1b1.dist-info}/entry_points.txt +0 -0
{atdata-0.2.3b1.dist-info → atdata-0.3.1b1.dist-info}/licenses/LICENSE +0 -0

atdata/_protocols.py CHANGED Viewed

@@ -1,37 +1,25 @@
 """Protocol definitions for atdata index and storage abstractions.
-This module defines the abstract protocols that enable interchangeable
-index backends (local Redis vs ATProto PDS) and data stores (S3 vs PDS blobs).
-The key insight is that both local and atmosphere implementations solve the
-same problem: indexed dataset storage with external data URLs. These protocols
-formalize that common interface.
-Note:
-    Protocol methods use ``...`` (Ellipsis) as the body per PEP 544. This is
-    the standard Python syntax for Protocol definitions - these are interface
-    specifications, not stub implementations. Concrete classes (LocalIndex,
-    AtmosphereIndex, etc.) provide the actual implementations.
+Defines the abstract protocols that enable interchangeable index backends
+(local SQLite/Redis vs ATProto PDS) and data stores (S3, local disk, PDS blobs).
 Protocols:
-    Packable: Structural interface for packable sample types (lens compatibility)
+    Packable: Structural interface for packable sample types
     IndexEntry: Common interface for dataset index entries
     AbstractIndex: Protocol for index operations (schemas, datasets, lenses)
     AbstractDataStore: Protocol for data storage operations
+    DataSource: Protocol for streaming shard data
 Examples:
     >>> def process_datasets(index: AbstractIndex) -> None:
     ...     for entry in index.list_datasets():
     ...         print(f"{entry.name}: {entry.data_urls}")
-    ...
-    >>> # Works with either LocalIndex or AtmosphereIndex
-    >>> process_datasets(local_index)
-    >>> process_datasets(atmosphere_index)
 """
 from typing import (
     IO,
     Any,
+    Iterable,
     Iterator,
     Optional,
     Protocol,
@@ -77,24 +65,16 @@ class Packable(Protocol):
     """
     @classmethod
-    def from_data(cls, data: dict[str, Any]) -> "Packable":
-        """Create instance from unpacked msgpack data dictionary."""
-        ...
+    def from_data(cls, data: dict[str, Any]) -> "Packable": ...
     @classmethod
-    def from_bytes(cls, bs: bytes) -> "Packable":
-        """Create instance from raw msgpack bytes."""
-        ...
+    def from_bytes(cls, bs: bytes) -> "Packable": ...
     @property
-    def packed(self) -> bytes:
-        """Pack this sample's data into msgpack bytes."""
-        ...
+    def packed(self) -> bytes: ...
     @property
-    def as_wds(self) -> dict[str, Any]:
-        """WebDataset-compatible representation with __key__ and msgpack."""
-        ...
+    def as_wds(self) -> dict[str, Any]: ...
 ##
@@ -116,16 +96,14 @@ class IndexEntry(Protocol):
     """
     @property
-    def name(self) -> str:
-        """Human-readable dataset name."""
-        ...
+    def name(self) -> str: ...
     @property
     def schema_ref(self) -> str:
-        """Reference to the schema for this dataset.
+        """Schema reference string.
-        For local: 'local://schemas/{module.Class}@{version}'
-        For atmosphere: 'at://did:plc:.../ac.foundation.dataset.sampleSchema/...'
+        Local: ``local://schemas/{module.Class}@{version}``
+        Atmosphere: ``at://did:plc:.../ac.foundation.dataset.schema/...``
         """
         ...
@@ -139,9 +117,7 @@ class IndexEntry(Protocol):
         ...
     @property
-    def metadata(self) -> Optional[dict]:
-        """Arbitrary metadata dictionary, or None if not set."""
-        ...
+    def metadata(self) -> Optional[dict]: ...
 ##
@@ -149,32 +125,16 @@ class IndexEntry(Protocol):
 class AbstractIndex(Protocol):
-    """Protocol for index operations - implemented by LocalIndex and AtmosphereIndex.
-    This protocol defines the common interface for managing dataset metadata:
-    - Publishing and retrieving schemas
-    - Inserting and listing datasets
-    - (Future) Publishing and retrieving lenses
+    """Protocol for index operations — implemented by Index and AtmosphereIndex.
-    A single index can hold datasets of many different sample types. The sample
-    type is tracked via schema references, not as a generic parameter on the index.
-    Optional Extensions:
-        Some index implementations support additional features:
-        - ``data_store``: An AbstractDataStore for reading/writing dataset shards.
-          If present, ``load_dataset`` will use it for S3 credential resolution.
+    Manages dataset metadata: publishing/retrieving schemas, inserting/listing
+    datasets. A single index holds datasets of many sample types, tracked via
+    schema references.
     Examples:
         >>> def publish_and_list(index: AbstractIndex) -> None:
-        ...     # Publish schemas for different types
-        ...     schema1 = index.publish_schema(ImageSample, version="1.0.0")
-        ...     schema2 = index.publish_schema(TextSample, version="1.0.0")
-        ...
-        ...     # Insert datasets of different types
+        ...     index.publish_schema(ImageSample, version="1.0.0")
         ...     index.insert_dataset(image_ds, name="images")
-        ...     index.insert_dataset(text_ds, name="texts")
-        ...
-        ...     # List all datasets (mixed types)
         ...     for entry in index.list_datasets():
         ...         print(f"{entry.name} -> {entry.schema_ref}")
     """
@@ -183,55 +143,58 @@ class AbstractIndex(Protocol):
     def data_store(self) -> Optional["AbstractDataStore"]:
         """Optional data store for reading/writing shards.
-        If present, ``load_dataset`` will use it for credential resolution
-        (e.g., S3 credentials from S3DataStore).
-        Returns:
-            AbstractDataStore instance, or None if this index doesn't have
-            an associated data store.
-        Note:
-            Not all index implementations provide a data_store. Use
-            ``hasattr(index, 'data_store') and index.data_store is not None``
-            for safe access.
+        If present, ``load_dataset`` uses it for credential resolution.
+        Not all implementations provide a data_store; check with
+        ``getattr(index, 'data_store', None)``.
         """
         ...
     # Dataset operations
-    def insert_dataset(
+    def write(
         self,
-        ds: "Dataset",
+        samples: Iterable,
         *,
         name: str,
         schema_ref: Optional[str] = None,
         **kwargs,
     ) -> IndexEntry:
-        """Insert a dataset into the index.
+        """Write samples and create an index entry in one step.
-        The sample type is inferred from ``ds.sample_type``. If schema_ref is not
-        provided, the schema may be auto-published based on the sample type.
+        Serializes samples to WebDataset tar files, stores them via the
+        appropriate backend, and creates an index entry.
         Args:
-            ds: The Dataset to register in the index (any sample type).
-            name: Human-readable name for the dataset.
-            schema_ref: Optional explicit schema reference. If not provided,
-                the schema may be auto-published or inferred from ds.sample_type.
-            **kwargs: Additional backend-specific options.
+            samples: Iterable of Packable samples. Must be non-empty.
+            name: Dataset name, optionally prefixed with target backend.
+            schema_ref: Optional schema reference.
+            **kwargs: Backend-specific options (maxcount, description, etc.).
         Returns:
-            IndexEntry for the inserted dataset.
+            IndexEntry for the created dataset.
         """
         ...
-    def get_dataset(self, ref: str) -> IndexEntry:
-        """Get a dataset entry by name or reference.
+    def insert_dataset(
+        self,
+        ds: "Dataset",
+        *,
+        name: str,
+        schema_ref: Optional[str] = None,
+        **kwargs,
+    ) -> IndexEntry:
+        """Register an existing dataset in the index.
         Args:
-            ref: Dataset name, path, or full reference string.
+            ds: The Dataset to register.
+            name: Human-readable name.
+            schema_ref: Explicit schema ref; auto-published if ``None``.
+            **kwargs: Backend-specific options.
+        """
+        ...
-        Returns:
-            IndexEntry for the dataset.
+    def get_dataset(self, ref: str) -> IndexEntry:
+        """Get a dataset entry by name or reference.
         Raises:
             KeyError: If dataset not found.
@@ -239,21 +202,9 @@ class AbstractIndex(Protocol):
         ...
     @property
-    def datasets(self) -> Iterator[IndexEntry]:
-        """Lazily iterate over all dataset entries in this index.
-        Yields:
-            IndexEntry for each dataset (may be of different sample types).
-        """
-        ...
-    def list_datasets(self) -> list[IndexEntry]:
-        """Get all dataset entries as a materialized list.
+    def datasets(self) -> Iterator[IndexEntry]: ...
-        Returns:
-            List of IndexEntry for each dataset.
-        """
-        ...
+    def list_datasets(self) -> list[IndexEntry]: ...
     # Schema operations
@@ -266,80 +217,39 @@ class AbstractIndex(Protocol):
     ) -> str:
         """Publish a schema for a sample type.
-        The sample_type is accepted as ``type`` rather than ``Type[Packable]`` to
-        support ``@packable``-decorated classes, which satisfy the Packable protocol
-        at runtime but cannot be statically verified by type checkers.
         Args:
-            sample_type: A Packable type (PackableSample subclass or @packable-decorated).
-                Validated at runtime via the @runtime_checkable Packable protocol.
-            version: Semantic version string for the schema.
-            **kwargs: Additional backend-specific options.
+            sample_type: A Packable type (``@packable``-decorated or subclass).
+            version: Semantic version string.
+            **kwargs: Backend-specific options.
         Returns:
-            Schema reference string:
-            - Local: 'local://schemas/{module.Class}@{version}'
-            - Atmosphere: 'at://did:plc:.../ac.foundation.dataset.sampleSchema/...'
+            Schema reference string (``local://...`` or ``at://...``).
         """
         ...
     def get_schema(self, ref: str) -> dict:
         """Get a schema record by reference.
-        Args:
-            ref: Schema reference string (local:// or at://).
-        Returns:
-            Schema record as a dictionary with fields like 'name', 'version',
-            'fields', etc.
         Raises:
             KeyError: If schema not found.
         """
         ...
     @property
-    def schemas(self) -> Iterator[dict]:
-        """Lazily iterate over all schema records in this index.
-        Yields:
-            Schema records as dictionaries.
-        """
-        ...
+    def schemas(self) -> Iterator[dict]: ...
-    def list_schemas(self) -> list[dict]:
-        """Get all schema records as a materialized list.
-        Returns:
-            List of schema records as dictionaries.
-        """
-        ...
+    def list_schemas(self) -> list[dict]: ...
     def decode_schema(self, ref: str) -> Type[Packable]:
-        """Reconstruct a Python Packable type from a stored schema.
-        This method enables loading datasets without knowing the sample type
-        ahead of time. The index retrieves the schema record and dynamically
-        generates a Packable class matching the schema definition.
-        Args:
-            ref: Schema reference string (local:// or at://).
-        Returns:
-            A dynamically generated Packable class with fields matching
-            the schema definition. The class can be used with
-            ``Dataset[T]`` to load and iterate over samples.
+        """Reconstruct a Packable type from a stored schema.
         Raises:
             KeyError: If schema not found.
-            ValueError: If schema cannot be decoded (unsupported field types).
+            ValueError: If schema has unsupported field types.
         Examples:
-            >>> entry = index.get_dataset("my-dataset")
             >>> SampleType = index.decode_schema(entry.schema_ref)
             >>> ds = Dataset[SampleType](entry.data_urls[0])
-            >>> for sample in ds.ordered():
-            ...     print(sample)  # sample is instance of SampleType
         """
         ...
@@ -349,21 +259,14 @@ class AbstractIndex(Protocol):
 class AbstractDataStore(Protocol):
-    """Protocol for data storage operations.
-    This protocol abstracts over different storage backends for dataset data:
-    - S3DataStore: S3-compatible object storage
-    - PDSBlobStore: ATProto PDS blob storage (future)
+    """Protocol for data storage backends (S3, local disk, PDS blobs).
-    The separation of index (metadata) from data store (actual files) allows
-    flexible deployment: local index with S3 storage, atmosphere index with
-    S3 storage, or atmosphere index with PDS blobs.
+    Separates index (metadata) from data store (shard files), enabling
+    flexible deployment combinations.
     Examples:
         >>> store = S3DataStore(credentials, bucket="my-bucket")
         >>> urls = store.write_shards(dataset, prefix="training/v1")
-        >>> print(urls)
-        ['s3://my-bucket/training/v1/shard-000000.tar', ...]
     """
     def write_shards(
@@ -377,38 +280,19 @@ class AbstractDataStore(Protocol):
         Args:
             ds: The Dataset to write.
-            prefix: Path prefix for the shards (e.g., 'datasets/mnist/v1').
-            **kwargs: Backend-specific options (e.g., maxcount for shard size).
+            prefix: Path prefix (e.g., ``'datasets/mnist/v1'``).
+            **kwargs: Backend-specific options (``maxcount``, ``maxsize``, etc.).
         Returns:
-            List of URLs for the written shards, suitable for use with
-            WebDataset or atdata.Dataset().
+            List of shard URLs suitable for ``atdata.Dataset()``.
         """
         ...
     def read_url(self, url: str) -> str:
-        """Resolve a storage URL for reading.
-        Some storage backends may need to transform URLs (e.g., signing S3 URLs
-        or resolving blob references). This method returns a URL that can be
-        used directly with WebDataset.
-        Args:
-            url: Storage URL to resolve.
-        Returns:
-            WebDataset-compatible URL for reading.
-        """
+        """Resolve a storage URL for reading (e.g., sign S3 URLs)."""
         ...
-    def supports_streaming(self) -> bool:
-        """Whether this store supports streaming reads.
-        Returns:
-            True if the store supports efficient streaming (like S3),
-            False if data must be fully downloaded first.
-        """
-        ...
+    def supports_streaming(self) -> bool: ...
 ##
@@ -417,77 +301,32 @@ class AbstractDataStore(Protocol):
 @runtime_checkable
 class DataSource(Protocol):
-    """Protocol for data sources that provide streams to Dataset.
+    """Protocol for data sources that stream shard data to Dataset.
-    A DataSource abstracts over different ways of accessing dataset shards:
-    - URLSource: Standard WebDataset-compatible URLs (http, https, pipe, gs, etc.)
-    - S3Source: S3-compatible storage with explicit credentials
-    - BlobSource: ATProto blob references (future)
-    The key method is ``shards()``, which yields (identifier, stream) pairs.
-    These are fed directly to WebDataset's tar_file_expander, bypassing URL
-    resolution entirely. This enables:
-    - Private S3 repos with credentials
-    - Custom endpoints (Cloudflare R2, MinIO)
-    - ATProto blob streaming
-    - Any other source that can provide file-like objects
+    Implementations (URLSource, S3Source, BlobSource) yield
+    ``(identifier, stream)`` pairs fed to WebDataset's tar expander,
+    bypassing URL resolution. This enables private S3, custom endpoints,
+    and ATProto blob streaming.
     Examples:
-        >>> source = S3Source(
-        ...     bucket="my-bucket",
-        ...     keys=["data-000.tar", "data-001.tar"],
-        ...     endpoint="https://r2.example.com",
-        ...     credentials=creds,
-        ... )
+        >>> source = S3Source(bucket="my-bucket", keys=["data-000.tar"])
         >>> ds = Dataset[MySample](source)
-        >>> for sample in ds.ordered():
-        ...     print(sample)
     """
     @property
     def shards(self) -> Iterator[tuple[str, IO[bytes]]]:
-        """Lazily yield (identifier, stream) pairs for each shard.
-        The identifier is used for error messages and __url__ metadata.
-        The stream must be a file-like object that can be read by tarfile.
-        Yields:
-            Tuple of (shard_identifier, file_like_stream).
-        Examples:
-            >>> for shard_id, stream in source.shards:
-            ...     print(f"Processing {shard_id}")
-            ...     data = stream.read()
-        """
+        """Lazily yield ``(shard_id, stream)`` pairs for each shard."""
         ...
     def list_shards(self) -> list[str]:
-        """Get list of shard identifiers without opening streams.
-        Used for metadata queries like counting shards without actually
-        streaming data. Implementations should return identifiers that
-        match what shards would yield.
-        Returns:
-            List of shard identifier strings.
-        """
+        """Shard identifiers without opening streams."""
         ...
     def open_shard(self, shard_id: str) -> IO[bytes]:
-        """Open a single shard by its identifier.
-        This method enables random access to individual shards, which is
-        required for PyTorch DataLoader worker splitting. Each worker opens
-        only its assigned shards rather than iterating all shards.
-        Args:
-            shard_id: Shard identifier from shard_list.
-        Returns:
-            File-like stream for reading the shard.
+        """Open a single shard for random access (e.g., DataLoader splitting).
         Raises:
-            KeyError: If shard_id is not in shard_list.
+            KeyError: If *shard_id* is not in ``list_shards()``.
         """
         ...

atdata/_schema_codec.py CHANGED Viewed

@@ -28,13 +28,14 @@ import hashlib
 from numpy.typing import NDArray
-# Import PackableSample for inheritance
+# Import PackableSample for inheritance in dynamic class generation
 from .dataset import PackableSample
+from ._protocols import Packable
 # Type cache to avoid regenerating identical types
 # Uses insertion order (Python 3.7+) for simple FIFO eviction
-_type_cache: dict[str, Type[PackableSample]] = {}
+_type_cache: dict[str, Type[Packable]] = {}
 _TYPE_CACHE_MAX_SIZE = 256
@@ -130,7 +131,7 @@ def schema_to_type(
     schema: dict,
     *,
     use_cache: bool = True,
-) -> Type[PackableSample]:
+) -> Type[Packable]:
     """Generate a PackableSample subclass from a schema record.
     This function dynamically creates a dataclass that inherits from PackableSample,
@@ -283,7 +284,7 @@ def generate_stub(schema: dict) -> str:
         String content for a .pyi stub file.
     Examples:
-        >>> schema = index.get_schema("atdata://local/sampleSchema/MySample@1.0.0")
+        >>> schema = index.get_schema("atdata://local/schema/MySample@1.0.0")
         >>> stub_content = generate_stub(schema.to_dict())
         >>> # Save to a stubs directory configured in your IDE
         >>> with open("stubs/my_sample.pyi", "w") as f:
@@ -359,7 +360,7 @@ def generate_module(schema: dict) -> str:
         String content for a .py module file.
     Examples:
-        >>> schema = index.get_schema("atdata://local/sampleSchema/MySample@1.0.0")
+        >>> schema = index.get_schema("atdata://local/schema/MySample@1.0.0")
         >>> module_content = generate_module(schema.to_dict())
         >>> # The module can be imported after being saved
     """
@@ -420,7 +421,7 @@ def clear_type_cache() -> None:
     _type_cache.clear()
-def get_cached_types() -> dict[str, Type[PackableSample]]:
+def get_cached_types() -> dict[str, Type[Packable]]:
     """Get a copy of the current type cache.
     Returns:

atdata/_stub_manager.py CHANGED Viewed

@@ -15,7 +15,7 @@ Examples:
     >>> index = Index(auto_stubs=True)
     >>>
     >>> # Modules are generated automatically on decode_schema
-    >>> MyType = index.decode_schema("atdata://local/sampleSchema/MySample@1.0.0")
+    >>> MyType = index.decode_schema("atdata://local/schema/MySample@1.0.0")
     >>> # MyType is now properly typed for IDE autocomplete!
     >>>
     >>> # Get the stub directory path for IDE configuration
@@ -51,8 +51,8 @@ def _extract_authority(schema_ref: Optional[str]) -> str:
     """Extract authority from a schema reference URI.
     Args:
-        schema_ref: Schema ref like "atdata://local/sampleSchema/Name@1.0.0"
-            or "atdata://alice.bsky.social/sampleSchema/Name@1.0.0"
+        schema_ref: Schema ref like "atdata://local/schema/Name@1.0.0"
+            or "atdata://alice.bsky.social/schema/Name@1.0.0"
     Returns:
         Authority string (e.g., "local", "alice.bsky.social", "did_plc_xxx").
@@ -149,10 +149,6 @@ class StubManager:
         safe_version = version.replace(".", "_")
         return f"{name}_{safe_version}.py"
-    def _stub_filename(self, name: str, version: str) -> str:
-        """Alias for _module_filename for backwards compatibility."""
-        return self._module_filename(name, version)
     def _module_path(
         self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
     ) -> Path:
@@ -168,12 +164,6 @@ class StubManager:
         """
         return self._stub_dir / authority / self._module_filename(name, version)
-    def _stub_path(
-        self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
-    ) -> Path:
-        """Alias for _module_path for backwards compatibility."""
-        return self._module_path(name, version, authority)
     def _module_is_current(self, path: Path, version: str) -> bool:
         """Check if an existing module file matches the expected version.
@@ -200,10 +190,6 @@ class StubManager:
         except (OSError, IOError):
             return False
-    def _stub_is_current(self, path: Path, version: str) -> bool:
-        """Alias for _module_is_current for backwards compatibility."""
-        return self._module_is_current(path, version)
     def _ensure_authority_package(self, authority: str) -> None:
         """Ensure authority subdirectory exists with __init__.py."""
         self._ensure_dir_exists()
@@ -261,12 +247,6 @@ class StubManager:
                 pass  # Temp file cleanup failed, re-raising original error
             raise
-    def _write_stub_atomic(self, path: Path, content: str) -> None:
-        """Legacy method - extracts authority from path and calls _write_module_atomic."""
-        # Extract authority from path (parent directory name)
-        authority = path.parent.name
-        self._write_module_atomic(path, content, authority)
     def ensure_stub(self, schema: dict) -> Optional[Path]:
         """Ensure a module file exists for the given schema.
@@ -426,7 +406,7 @@ class StubManager:
         Returns:
             Path if stub exists, None otherwise
         """
-        path = self._stub_path(name, version, authority)
+        path = self._module_path(name, version, authority)
         return path if path.exists() else None
     def list_stubs(self, authority: Optional[str] = None) -> list[Path]:
@@ -513,7 +493,7 @@ class StubManager:
         Returns:
             True if file was removed, False if it didn't exist
         """
-        path = self._stub_path(name, version, authority)
+        path = self._module_path(name, version, authority)
         if path.exists():
             try:
                 path.unlink()

atdata/_type_utils.py CHANGED Viewed

@@ -45,9 +45,13 @@ def numpy_dtype_to_string(dtype: Any) -> str:
         Schema dtype string (e.g., "float32", "int64"). Defaults to "float32".
     """
     dtype_str = str(dtype)
-    for key, value in NUMPY_DTYPE_MAP.items():
+    # Exact match first (handles "float32", "int64", etc.)
+    if dtype_str in NUMPY_DTYPE_MAP:
+        return NUMPY_DTYPE_MAP[dtype_str]
+    # Substring match, longest keys first to avoid "int8" matching "uint8"
+    for key in sorted(NUMPY_DTYPE_MAP, key=len, reverse=True):
         if key in dtype_str:
-            return value
+            return NUMPY_DTYPE_MAP[key]
     return "float32"
@@ -102,3 +106,25 @@ def extract_ndarray_dtype(python_type: Any) -> str:
         if dtype_arg is not None:
             return numpy_dtype_to_string(dtype_arg)
     return "float32"
+def parse_semver(version: str) -> tuple[int, int, int]:
+    """Parse a semantic version string into a comparable tuple.
+    Args:
+        version: A ``"major.minor.patch"`` version string.
+    Returns:
+        Tuple of (major, minor, patch) integers.
+    Raises:
+        ValueError: If the version string is not valid semver.
+    Examples:
+        >>> parse_semver("1.2.3")
+        (1, 2, 3)
+    """
+    parts = version.split(".")
+    if len(parts) != 3:
+        raise ValueError(f"Invalid semver: {version}")
+    return int(parts[0]), int(parts[1]), int(parts[2])

atdata 0.2.3b1__py3-none-any.whl → 0.3.1b1__py3-none-any.whl

atdata 0.2.3b1py3-none-any.whl → 0.3.1b1py3-none-any.whl