PyPI - atdata - Versions diffs - 0.1.1a3__tar.gz → 0.1.2a3__tar.gz - Mend

atdata 0.1.1a3tar.gz → 0.1.2a3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{atdata-0.1.1a3 → atdata-0.1.2a3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: atdata
-Version: 0.1.1a3
+Version: 0.1.2a3
 Summary: A loose federation of distributed, typed datasets
 Author-email: Maxine Levesque <hello@maxine.science>
 License-File: LICENSE

{atdata-0.1.1a3 → atdata-0.1.2a3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "atdata"
-version = "0.1.1a3"
+version = "0.1.2a3"
 description = "A loose federation of distributed, typed datasets"
 readme = "README.md"
 authors = [

{atdata-0.1.1a3 → atdata-0.1.2a3}/src/atdata/__init__.py RENAMED Viewed

@@ -7,6 +7,7 @@ from .dataset import (
     PackableSample,
     SampleBatch,
     Dataset,
+    packable,
 )

{atdata-0.1.1a3 → atdata-0.1.2a3}/src/atdata/dataset.py RENAMED Viewed

@@ -5,6 +5,7 @@
 import webdataset as wds
+import functools
 from dataclasses import dataclass
 import uuid
@@ -96,7 +97,8 @@ def _make_packable( x ):
 class PackableSample( ABC ):
     """A sample that can be packed and unpacked with msgpack"""
-    def __post_init__( self ):
+    def _ensure_good( self ):
+        """TODO Stupid kludge because of __post_init__ nonsense for wrapped classes"""
         # Auto-convert known types when annotated
         for var_name, var_type in vars( self.__class__ )['__annotations__'].items():
@@ -120,12 +122,17 @@ class PackableSample( ABC ):
                 elif isinstance( var_cur_value, bytes ):
                     setattr( self, var_name, eh.bytes_to_array( var_cur_value ) )
+    def __post_init__( self ):
+        self._ensure_good()
     ##
     @classmethod
     def from_data( cls, data: MsgpackRawSample ) -> Self:
         """Create a sample instance from unpacked msgpack data"""
-        return cls( **data )
+        ret = cls( **data )
+        ret._ensure_good()
+        return ret
     @classmethod
     def from_bytes( cls, bs: bytes ) -> Self:
@@ -415,4 +422,42 @@ class Dataset( Generic[ST] ):
     #     This default implementation simply creates a list one sample at a time
     #     """
     #     assert cls.batch_class is not None, 'No batch class specified'
-    #     return cls.batch_class( **batch )
+    #     return cls.batch_class( **batch )
+##
+# Shortcut decorators
+# def packable( cls ):
+#     """TODO"""
+#     def decorator( cls ):
+#         # Create a new class dynamically
+#         # The new class inherits from the new_parent_class first, then the original cls
+#         new_bases = (PackableSample,) + cls.__bases__
+#         new_cls = type(cls.__name__, new_bases, dict(cls.__dict__))
+#         # Optionally, update __module__ and __qualname__ for better introspection
+#         new_cls.__module__ = cls.__module__
+#         new_cls.__qualname__ = cls.__qualname__
+#         return new_cls
+#     return decorator
+def packable( cls ):
+    """TODO"""
+    ##
+    as_dataclass = dataclass( cls )
+    class as_packable( PackableSample, as_dataclass ):
+        def __post_init__( self ):
+            return PackableSample.__post_init__( self )
+    as_packable.__name__ = cls.__name__
+    as_packable.__annotations__ = cls.__annotations__
+    ##
+    return as_packable

{atdata-0.1.1a3 → atdata-0.1.2a3}/tests/test_dataset.py RENAMED Viewed

@@ -39,6 +39,17 @@ class NumpyTestSample( atdata.PackableSample ):
     label: int
     image: NDArray
+@atdata.packable
+class BasicTestSampleDecorated:
+    name: str
+    position: int
+    value: float
+@atdata.packable
+class NumpyTestSampleDecorated:
+    label: int
+    image: NDArray
 test_cases = [
     {
         'SampleType': BasicTestSample,
@@ -51,13 +62,31 @@ test_cases = [
     },
     {
         'SampleType': NumpyTestSample,
-        'sample_data':
+        'sample_data':
         {
             'label': 9_001,
             'image': np.random.randn( 1024, 1024 ),
         },
         'sample_wds_stem': 'numpy_test',
     },
+    {
+        'SampleType': BasicTestSampleDecorated,
+        'sample_data': {
+            'name': 'Hello, world!',
+            'position': 42,
+            'value': 1024.768,
+        },
+        'sample_wds_stem': 'basic_test_decorated',
+    },
+    {
+        'SampleType': NumpyTestSampleDecorated,
+        'sample_data':
+        {
+            'label': 9_001,
+            'image': np.random.randn( 1024, 1024 ),
+        },
+        'sample_wds_stem': 'numpy_test_decorated',
+    },
 ]
@@ -89,6 +118,35 @@ def test_create_sample(
 #
+# def test_decorator_syntax():
+#     """Test use of decorator syntax for sample types"""
+#     @atdata.packable
+#     class BasicTestSampleDecorated:
+#         name: str
+#         position: int
+#         value: float
+#     @atdata.packable
+#     class NumpyTestSampleDecorated:
+#         label: int
+#         image: NDArray
+#     ##
+#     test_create_sample( BasicTestSampleDecorated, {
+#         'name': 'Hello, world!',
+#         'position': 42,
+#         'value': 1024.768,
+#     } )
+#     test_create_sample( NumpyTestSampleDecorated, {
+#         'label': 9_001,
+#         'image': np.random.randn( 1024, 1024 ),
+#     } )
+#
 @pytest.mark.parametrize(
     ('SampleType', 'sample_data', 'sample_wds_stem'),
     [ (case['SampleType'], case['sample_data'], case['sample_wds_stem'])
@@ -109,7 +167,6 @@ def test_wds(
     batch_size = 4
     n_iterate = 10
     ## Write sharded dataset
     file_pattern = (
@@ -140,7 +197,7 @@ def test_wds(
     iterations_run = 0
     for i_iterate, cur_sample in enumerate( dataset.ordered( batch_size = None ) ):
         assert isinstance( cur_sample, SampleType ), \
             f'Single sample for {SampleType} written to `wds` is of wrong type'
@@ -152,7 +209,7 @@ def test_wds(
             else:
                 is_correct = getattr( cur_sample, k ) == v
             assert is_correct, \
-                f'{SampleType}: Incorrect sample value found for {k}'
+                f'{SampleType}: Incorrect sample value found for {k} - {type( getattr( cur_sample, k ) )}'
         iterations_run += 1
         if iterations_run >= n_iterate:
@@ -166,7 +223,6 @@ def test_wds(
     start_id = f'{0:06d}'
     end_id = f'{9:06d}'
     first_filename = file_pattern.format( shard_id = '{' + start_id + '..' + end_id + '}' )
-    print( first_filename )
     dataset = atdata.Dataset[SampleType]( first_filename )
     iterations_run = 0
@@ -241,7 +297,6 @@ def test_wds(
     start_id = f'{0:06d}'
     end_id = f'{9:06d}'
     first_filename = file_pattern.format( shard_id = '{' + start_id + '..' + end_id + '}' )
-    print( first_filename )
     dataset = atdata.Dataset[SampleType]( first_filename )
     iterations_run = 0

{atdata-0.1.1a3 → atdata-0.1.2a3}/.github/workflows/uv-publish-pypi.yml RENAMED Viewed

File without changes

{atdata-0.1.1a3 → atdata-0.1.2a3}/.github/workflows/uv-test.yml RENAMED Viewed

File without changes

{atdata-0.1.1a3 → atdata-0.1.2a3}/.gitignore RENAMED Viewed

File without changes

{atdata-0.1.1a3 → atdata-0.1.2a3}/.python-version RENAMED Viewed

File without changes

{atdata-0.1.1a3 → atdata-0.1.2a3}/LICENSE RENAMED Viewed

File without changes

{atdata-0.1.1a3 → atdata-0.1.2a3}/README.md RENAMED Viewed

File without changes

{atdata-0.1.1a3 → atdata-0.1.2a3}/src/atdata/_helpers.py RENAMED Viewed

File without changes

atdata 0.1.1a3__tar.gz → 0.1.2a3__tar.gz

atdata 0.1.1a3tar.gz → 0.1.2a3tar.gz