atdata 0.1.1a1__py3-none-any.whl → 0.1.1a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atdata/__init__.py +13 -2
- atdata/_helpers.py +8 -16
- atdata/dataset.py +32 -32
- {atdata-0.1.1a1.dist-info → atdata-0.1.1a3.dist-info}/METADATA +2 -2
- atdata-0.1.1a3.dist-info/RECORD +8 -0
- atdata-0.1.1a3.dist-info/entry_points.txt +2 -0
- atdata-0.1.1a1.dist-info/RECORD +0 -8
- atdata-0.1.1a1.dist-info/entry_points.txt +0 -2
- {atdata-0.1.1a1.dist-info → atdata-0.1.1a3.dist-info}/WHEEL +0 -0
- {atdata-0.1.1a1.dist-info → atdata-0.1.1a3.dist-info}/licenses/LICENSE +0 -0
atdata/__init__.py
CHANGED
atdata/_helpers.py
CHANGED
|
@@ -1,30 +1,22 @@
|
|
|
1
|
-
"""Assorted helper methods for `
|
|
1
|
+
"""Assorted helper methods for `atdata`"""
|
|
2
2
|
|
|
3
3
|
##
|
|
4
4
|
# Imports
|
|
5
5
|
|
|
6
6
|
from io import BytesIO
|
|
7
|
-
import ormsgpack as omp
|
|
8
7
|
|
|
9
8
|
import numpy as np
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
##
|
|
13
|
-
#
|
|
14
12
|
|
|
15
|
-
def
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def unpack( bs: bytes ):
|
|
19
|
-
return omp.unpackb( bs )
|
|
20
|
-
|
|
21
|
-
##
|
|
22
|
-
|
|
23
|
-
def array_to_bytes(x: np.ndarray) -> bytes:
|
|
13
|
+
def array_to_bytes( x: np.ndarray ) -> bytes:
|
|
14
|
+
"""Convert `numpy` array to a format suitable for packing"""
|
|
24
15
|
np_bytes = BytesIO()
|
|
25
|
-
np.save(np_bytes, x, allow_pickle=True)
|
|
16
|
+
np.save( np_bytes, x, allow_pickle = True )
|
|
26
17
|
return np_bytes.getvalue()
|
|
27
18
|
|
|
28
|
-
def bytes_to_array(b: bytes) -> np.ndarray:
|
|
29
|
-
|
|
30
|
-
|
|
19
|
+
def bytes_to_array( b: bytes ) -> np.ndarray:
|
|
20
|
+
"""Convert packed bytes back to a `numpy` array"""
|
|
21
|
+
np_bytes = BytesIO( b )
|
|
22
|
+
return np.load( np_bytes, allow_pickle = True )
|
atdata/dataset.py
CHANGED
|
@@ -57,38 +57,38 @@ DT = TypeVar( 'DT' )
|
|
|
57
57
|
|
|
58
58
|
MsgpackRawSample: TypeAlias = Dict[str, Any]
|
|
59
59
|
|
|
60
|
-
@dataclass
|
|
61
|
-
class ArrayBytes:
|
|
62
|
-
|
|
63
|
-
|
|
60
|
+
# @dataclass
|
|
61
|
+
# class ArrayBytes:
|
|
62
|
+
# """Annotates bytes that should be interpreted as the raw contents of a
|
|
63
|
+
# numpy NDArray"""
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
65
|
+
# raw_bytes: bytes
|
|
66
|
+
# """The raw bytes of the corresponding NDArray"""
|
|
67
|
+
|
|
68
|
+
# def __init__( self,
|
|
69
|
+
# array: Optional[ArrayLike] = None,
|
|
70
|
+
# raw: Optional[bytes] = None,
|
|
71
|
+
# ):
|
|
72
|
+
# """TODO"""
|
|
73
|
+
|
|
74
|
+
# if array is not None:
|
|
75
|
+
# array = np.array( array )
|
|
76
|
+
# self.raw_bytes = eh.array_to_bytes( array )
|
|
77
77
|
|
|
78
|
-
|
|
79
|
-
|
|
78
|
+
# elif raw is not None:
|
|
79
|
+
# self.raw_bytes = raw
|
|
80
80
|
|
|
81
|
-
|
|
82
|
-
|
|
81
|
+
# else:
|
|
82
|
+
# raise ValueError( 'Must provide either `array` or `raw` bytes' )
|
|
83
83
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
84
|
+
# @property
|
|
85
|
+
# def to_numpy( self ) -> NDArray:
|
|
86
|
+
# """Return the `raw_bytes` data as an NDArray"""
|
|
87
|
+
# return eh.bytes_to_array( self.raw_bytes )
|
|
88
88
|
|
|
89
89
|
def _make_packable( x ):
|
|
90
|
-
if isinstance( x, ArrayBytes ):
|
|
91
|
-
|
|
90
|
+
# if isinstance( x, ArrayBytes ):
|
|
91
|
+
# return x.raw_bytes
|
|
92
92
|
if isinstance( x, np.ndarray ):
|
|
93
93
|
return eh.array_to_bytes( x )
|
|
94
94
|
return x
|
|
@@ -114,8 +114,8 @@ class PackableSample( ABC ):
|
|
|
114
114
|
# we're good!
|
|
115
115
|
pass
|
|
116
116
|
|
|
117
|
-
elif isinstance( var_cur_value, ArrayBytes ):
|
|
118
|
-
|
|
117
|
+
# elif isinstance( var_cur_value, ArrayBytes ):
|
|
118
|
+
# setattr( self, var_name, var_cur_value.to_numpy )
|
|
119
119
|
|
|
120
120
|
elif isinstance( var_cur_value, bytes ):
|
|
121
121
|
setattr( self, var_name, eh.bytes_to_array( var_cur_value ) )
|
|
@@ -172,7 +172,7 @@ def _batch_aggregate( xs: Sequence ):
|
|
|
172
172
|
|
|
173
173
|
return list( xs )
|
|
174
174
|
|
|
175
|
-
class
|
|
175
|
+
class SampleBatch( Generic[DT] ):
|
|
176
176
|
|
|
177
177
|
def __init__( self, samples: Sequence[DT] ):
|
|
178
178
|
"""TODO"""
|
|
@@ -233,7 +233,7 @@ class Dataset( Generic[ST] ):
|
|
|
233
233
|
def batch_type( self ) -> Type:
|
|
234
234
|
"""The type of a batch built from `sample_class`"""
|
|
235
235
|
# return self.__orig_class__.__args__[1]
|
|
236
|
-
return
|
|
236
|
+
return SampleBatch[self.sample_type]
|
|
237
237
|
|
|
238
238
|
|
|
239
239
|
# _schema_registry_sample: dict[str, Type]
|
|
@@ -396,7 +396,7 @@ class Dataset( Generic[ST] ):
|
|
|
396
396
|
value = sample,
|
|
397
397
|
)
|
|
398
398
|
|
|
399
|
-
def wrap_batch( self, batch: WDSRawBatch ) ->
|
|
399
|
+
def wrap_batch( self, batch: WDSRawBatch ) -> SampleBatch[ST]:
|
|
400
400
|
"""Wrap a `batch` of samples into the appropriate dataset-specific type
|
|
401
401
|
|
|
402
402
|
This default implementation simply creates a list one sample at a time
|
|
@@ -405,7 +405,7 @@ class Dataset( Generic[ST] ):
|
|
|
405
405
|
assert 'msgpack' in batch
|
|
406
406
|
batch_unpacked = [ self.sample_type.from_bytes( bs )
|
|
407
407
|
for bs in batch['msgpack'] ]
|
|
408
|
-
return
|
|
408
|
+
return SampleBatch[self.sample_type]( batch_unpacked )
|
|
409
409
|
|
|
410
410
|
|
|
411
411
|
# # @classmethod
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: atdata
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1a3
|
|
4
4
|
Summary: A loose federation of distributed, typed datasets
|
|
5
5
|
Author-email: Maxine Levesque <hello@maxine.science>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -11,5 +11,5 @@ Requires-Dist: ormsgpack>=1.11.0
|
|
|
11
11
|
Requires-Dist: webdataset>=1.0.2
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
|
|
14
|
-
#
|
|
14
|
+
# atdata
|
|
15
15
|
A loose federation of distributed, typed datasets
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
atdata/__init__.py,sha256=VwlLoJkqsXf1XWSzsGGIiuxUluIihR2vHOtYao3w0EY,159
|
|
2
|
+
atdata/_helpers.py,sha256=R63JhXewAKZYnZ9Th7R6yZh0IOUPYGBsth3FpRUMD-U,503
|
|
3
|
+
atdata/dataset.py,sha256=tuph87zUZmSLPIXrHvPxSxdrAknLT_4gKAB4kHTqiZ8,12210
|
|
4
|
+
atdata-0.1.1a3.dist-info/METADATA,sha256=gKjbubI4HoLafCG5p4lsE-1_QMPMaHWZ48TLcIiWKO8,434
|
|
5
|
+
atdata-0.1.1a3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
+
atdata-0.1.1a3.dist-info/entry_points.txt,sha256=6-iQr1veSTq-ac94bLyfcyGHprrZWevPEd12BWX37tQ,39
|
|
7
|
+
atdata-0.1.1a3.dist-info/licenses/LICENSE,sha256=Pz2eACSxkhsGfW9_iN60pgy-enjnbGTj8df8O3ebnQQ,16726
|
|
8
|
+
atdata-0.1.1a3.dist-info/RECORD,,
|
atdata-0.1.1a1.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
atdata/__init__.py,sha256=yN07kW_3UcMlYZrM_Jrpy6DMCzTp9kvu2ICcU7n1-5w,52
|
|
2
|
-
atdata/_helpers.py,sha256=CjIvLruNOhHRl1Arse5SahGTmI0Et3BoNqsWC9b8noE,515
|
|
3
|
-
atdata/dataset.py,sha256=mvmCYtL6wD9961qq4lprZSkone56ubTKp3vDgEnWdPI,12158
|
|
4
|
-
atdata-0.1.1a1.dist-info/METADATA,sha256=9-r5DmpN26ZNNMCp21wtOrk1RxTiWCsZ8rVSd-uig6E,434
|
|
5
|
-
atdata-0.1.1a1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
-
atdata-0.1.1a1.dist-info/entry_points.txt,sha256=KuQtj4ZAwWLSyJUxhpQEHYfwSG-0ZXuj5hcZ1uAgGRQ,39
|
|
7
|
-
atdata-0.1.1a1.dist-info/licenses/LICENSE,sha256=Pz2eACSxkhsGfW9_iN60pgy-enjnbGTj8df8O3ebnQQ,16726
|
|
8
|
-
atdata-0.1.1a1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|