atdata 0.1.1a1__py3-none-any.whl → 0.1.1a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
atdata/__init__.py CHANGED
@@ -1,2 +1,13 @@
1
- def main() -> None:
2
- print("Hello from ekumen!")
1
+ """A loose federation of distributed, typed datasets"""
2
+
3
+ ##
4
+ # Expose components
5
+
6
+ from .dataset import (
7
+ PackableSample,
8
+ SampleBatch,
9
+ Dataset,
10
+ )
11
+
12
+
13
+ #
atdata/_helpers.py CHANGED
@@ -1,30 +1,22 @@
1
- """Assorted helper methods for `ekumen`"""
1
+ """Assorted helper methods for `atdata`"""
2
2
 
3
3
  ##
4
4
  # Imports
5
5
 
6
6
  from io import BytesIO
7
- import ormsgpack as omp
8
7
 
9
8
  import numpy as np
10
9
 
11
10
 
12
11
  ##
13
- #
14
12
 
15
- def pack_instance( x ) -> bytes:
16
- return omp.packb( x )
17
-
18
- def unpack( bs: bytes ):
19
- return omp.unpackb( bs )
20
-
21
- ##
22
-
23
- def array_to_bytes(x: np.ndarray) -> bytes:
13
+ def array_to_bytes( x: np.ndarray ) -> bytes:
14
+ """Convert `numpy` array to a format suitable for packing"""
24
15
  np_bytes = BytesIO()
25
- np.save(np_bytes, x, allow_pickle=True)
16
+ np.save( np_bytes, x, allow_pickle = True )
26
17
  return np_bytes.getvalue()
27
18
 
28
- def bytes_to_array(b: bytes) -> np.ndarray:
29
- np_bytes = BytesIO(b)
30
- return np.load(np_bytes, allow_pickle=True)
19
+ def bytes_to_array( b: bytes ) -> np.ndarray:
20
+ """Convert packed bytes back to a `numpy` array"""
21
+ np_bytes = BytesIO( b )
22
+ return np.load( np_bytes, allow_pickle = True )
atdata/dataset.py CHANGED
@@ -57,38 +57,38 @@ DT = TypeVar( 'DT' )
57
57
 
58
58
  MsgpackRawSample: TypeAlias = Dict[str, Any]
59
59
 
60
- @dataclass
61
- class ArrayBytes:
62
- """Annotates bytes that should be interpreted as the raw contents of a
63
- numpy NDArray"""
60
+ # @dataclass
61
+ # class ArrayBytes:
62
+ # """Annotates bytes that should be interpreted as the raw contents of a
63
+ # numpy NDArray"""
64
64
 
65
- raw_bytes: bytes
66
- """The raw bytes of the corresponding NDArray"""
67
-
68
- def __init__( self,
69
- array: Optional[ArrayLike] = None,
70
- raw: Optional[bytes] = None,
71
- ):
72
- """TODO"""
73
-
74
- if array is not None:
75
- array = np.array( array )
76
- self.raw_bytes = eh.array_to_bytes( array )
65
+ # raw_bytes: bytes
66
+ # """The raw bytes of the corresponding NDArray"""
67
+
68
+ # def __init__( self,
69
+ # array: Optional[ArrayLike] = None,
70
+ # raw: Optional[bytes] = None,
71
+ # ):
72
+ # """TODO"""
73
+
74
+ # if array is not None:
75
+ # array = np.array( array )
76
+ # self.raw_bytes = eh.array_to_bytes( array )
77
77
 
78
- elif raw is not None:
79
- self.raw_bytes = raw
78
+ # elif raw is not None:
79
+ # self.raw_bytes = raw
80
80
 
81
- else:
82
- raise ValueError( 'Must provide either `array` or `raw` bytes' )
81
+ # else:
82
+ # raise ValueError( 'Must provide either `array` or `raw` bytes' )
83
83
 
84
- @property
85
- def to_numpy( self ) -> NDArray:
86
- """Return the `raw_bytes` data as an NDArray"""
87
- return eh.bytes_to_array( self.raw_bytes )
84
+ # @property
85
+ # def to_numpy( self ) -> NDArray:
86
+ # """Return the `raw_bytes` data as an NDArray"""
87
+ # return eh.bytes_to_array( self.raw_bytes )
88
88
 
89
89
  def _make_packable( x ):
90
- if isinstance( x, ArrayBytes ):
91
- return x.raw_bytes
90
+ # if isinstance( x, ArrayBytes ):
91
+ # return x.raw_bytes
92
92
  if isinstance( x, np.ndarray ):
93
93
  return eh.array_to_bytes( x )
94
94
  return x
@@ -114,8 +114,8 @@ class PackableSample( ABC ):
114
114
  # we're good!
115
115
  pass
116
116
 
117
- elif isinstance( var_cur_value, ArrayBytes ):
118
- setattr( self, var_name, var_cur_value.to_numpy )
117
+ # elif isinstance( var_cur_value, ArrayBytes ):
118
+ # setattr( self, var_name, var_cur_value.to_numpy )
119
119
 
120
120
  elif isinstance( var_cur_value, bytes ):
121
121
  setattr( self, var_name, eh.bytes_to_array( var_cur_value ) )
@@ -172,7 +172,7 @@ def _batch_aggregate( xs: Sequence ):
172
172
 
173
173
  return list( xs )
174
174
 
175
- class SamlpeBatch( Generic[DT] ):
175
+ class SampleBatch( Generic[DT] ):
176
176
 
177
177
  def __init__( self, samples: Sequence[DT] ):
178
178
  """TODO"""
@@ -233,7 +233,7 @@ class Dataset( Generic[ST] ):
233
233
  def batch_type( self ) -> Type:
234
234
  """The type of a batch built from `sample_class`"""
235
235
  # return self.__orig_class__.__args__[1]
236
- return SamlpeBatch[self.sample_type]
236
+ return SampleBatch[self.sample_type]
237
237
 
238
238
 
239
239
  # _schema_registry_sample: dict[str, Type]
@@ -396,7 +396,7 @@ class Dataset( Generic[ST] ):
396
396
  value = sample,
397
397
  )
398
398
 
399
- def wrap_batch( self, batch: WDSRawBatch ) -> SamlpeBatch[ST]:
399
+ def wrap_batch( self, batch: WDSRawBatch ) -> SampleBatch[ST]:
400
400
  """Wrap a `batch` of samples into the appropriate dataset-specific type
401
401
 
402
402
  This default implementation simply creates a list one sample at a time
@@ -405,7 +405,7 @@ class Dataset( Generic[ST] ):
405
405
  assert 'msgpack' in batch
406
406
  batch_unpacked = [ self.sample_type.from_bytes( bs )
407
407
  for bs in batch['msgpack'] ]
408
- return SamlpeBatch[self.sample_type]( batch_unpacked )
408
+ return SampleBatch[self.sample_type]( batch_unpacked )
409
409
 
410
410
 
411
411
  # # @classmethod
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atdata
3
- Version: 0.1.1a1
3
+ Version: 0.1.1a3
4
4
  Summary: A loose federation of distributed, typed datasets
5
5
  Author-email: Maxine Levesque <hello@maxine.science>
6
6
  License-File: LICENSE
@@ -11,5 +11,5 @@ Requires-Dist: ormsgpack>=1.11.0
11
11
  Requires-Dist: webdataset>=1.0.2
12
12
  Description-Content-Type: text/markdown
13
13
 
14
- # ekumen
14
+ # atdata
15
15
  A loose federation of distributed, typed datasets
@@ -0,0 +1,8 @@
1
+ atdata/__init__.py,sha256=VwlLoJkqsXf1XWSzsGGIiuxUluIihR2vHOtYao3w0EY,159
2
+ atdata/_helpers.py,sha256=R63JhXewAKZYnZ9Th7R6yZh0IOUPYGBsth3FpRUMD-U,503
3
+ atdata/dataset.py,sha256=tuph87zUZmSLPIXrHvPxSxdrAknLT_4gKAB4kHTqiZ8,12210
4
+ atdata-0.1.1a3.dist-info/METADATA,sha256=gKjbubI4HoLafCG5p4lsE-1_QMPMaHWZ48TLcIiWKO8,434
5
+ atdata-0.1.1a3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ atdata-0.1.1a3.dist-info/entry_points.txt,sha256=6-iQr1veSTq-ac94bLyfcyGHprrZWevPEd12BWX37tQ,39
7
+ atdata-0.1.1a3.dist-info/licenses/LICENSE,sha256=Pz2eACSxkhsGfW9_iN60pgy-enjnbGTj8df8O3ebnQQ,16726
8
+ atdata-0.1.1a3.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ atdata = atdata:main
@@ -1,8 +0,0 @@
1
- atdata/__init__.py,sha256=yN07kW_3UcMlYZrM_Jrpy6DMCzTp9kvu2ICcU7n1-5w,52
2
- atdata/_helpers.py,sha256=CjIvLruNOhHRl1Arse5SahGTmI0Et3BoNqsWC9b8noE,515
3
- atdata/dataset.py,sha256=mvmCYtL6wD9961qq4lprZSkone56ubTKp3vDgEnWdPI,12158
4
- atdata-0.1.1a1.dist-info/METADATA,sha256=9-r5DmpN26ZNNMCp21wtOrk1RxTiWCsZ8rVSd-uig6E,434
5
- atdata-0.1.1a1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
- atdata-0.1.1a1.dist-info/entry_points.txt,sha256=KuQtj4ZAwWLSyJUxhpQEHYfwSG-0ZXuj5hcZ1uAgGRQ,39
7
- atdata-0.1.1a1.dist-info/licenses/LICENSE,sha256=Pz2eACSxkhsGfW9_iN60pgy-enjnbGTj8df8O3ebnQQ,16726
8
- atdata-0.1.1a1.dist-info/RECORD,,
@@ -1,2 +0,0 @@
1
- [console_scripts]
2
- ekumen = atdata:main