atdata 0.1.3b4__py3-none-any.whl → 0.2.2b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atdata/__init__.py +44 -8
- atdata/_cid.py +150 -0
- atdata/_hf_api.py +692 -0
- atdata/_protocols.py +519 -0
- atdata/_schema_codec.py +442 -0
- atdata/_sources.py +515 -0
- atdata/_stub_manager.py +529 -0
- atdata/_type_utils.py +90 -0
- atdata/atmosphere/__init__.py +332 -0
- atdata/atmosphere/_types.py +331 -0
- atdata/atmosphere/client.py +533 -0
- atdata/atmosphere/lens.py +284 -0
- atdata/atmosphere/records.py +509 -0
- atdata/atmosphere/schema.py +239 -0
- atdata/atmosphere/store.py +208 -0
- atdata/cli/__init__.py +213 -0
- atdata/cli/diagnose.py +165 -0
- atdata/cli/local.py +280 -0
- atdata/dataset.py +510 -324
- atdata/lens.py +63 -112
- atdata/local.py +1707 -0
- atdata/promote.py +199 -0
- atdata-0.2.2b1.dist-info/METADATA +272 -0
- atdata-0.2.2b1.dist-info/RECORD +28 -0
- {atdata-0.1.3b4.dist-info → atdata-0.2.2b1.dist-info}/WHEEL +1 -1
- atdata-0.1.3b4.dist-info/METADATA +0 -172
- atdata-0.1.3b4.dist-info/RECORD +0 -9
- {atdata-0.1.3b4.dist-info → atdata-0.2.2b1.dist-info}/entry_points.txt +0 -0
- {atdata-0.1.3b4.dist-info → atdata-0.2.2b1.dist-info}/licenses/LICENSE +0 -0
atdata/lens.py
CHANGED
|
@@ -15,27 +15,29 @@ Lenses support the functional programming concept of composable, well-behaved
|
|
|
15
15
|
transformations that satisfy lens laws (GetPut and PutGet).
|
|
16
16
|
|
|
17
17
|
Example:
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
18
|
+
::
|
|
19
|
+
|
|
20
|
+
>>> @packable
|
|
21
|
+
... class FullData:
|
|
22
|
+
... name: str
|
|
23
|
+
... age: int
|
|
24
|
+
... embedding: NDArray
|
|
25
|
+
...
|
|
26
|
+
>>> @packable
|
|
27
|
+
... class NameOnly:
|
|
28
|
+
... name: str
|
|
29
|
+
...
|
|
30
|
+
>>> @lens
|
|
31
|
+
... def name_view(full: FullData) -> NameOnly:
|
|
32
|
+
... return NameOnly(name=full.name)
|
|
33
|
+
...
|
|
34
|
+
>>> @name_view.putter
|
|
35
|
+
... def name_view_put(view: NameOnly, source: FullData) -> FullData:
|
|
36
|
+
... return FullData(name=view.name, age=source.age,
|
|
37
|
+
... embedding=source.embedding)
|
|
38
|
+
...
|
|
39
|
+
>>> ds = Dataset[FullData]("data.tar")
|
|
40
|
+
>>> ds_names = ds.as_type(NameOnly) # Uses registered lens
|
|
39
41
|
"""
|
|
40
42
|
|
|
41
43
|
##
|
|
@@ -60,6 +62,8 @@ from typing import (
|
|
|
60
62
|
if TYPE_CHECKING:
|
|
61
63
|
from .dataset import PackableSample
|
|
62
64
|
|
|
65
|
+
from ._protocols import Packable
|
|
66
|
+
|
|
63
67
|
|
|
64
68
|
##
|
|
65
69
|
# Typing helpers
|
|
@@ -67,8 +71,8 @@ if TYPE_CHECKING:
|
|
|
67
71
|
DatasetType: TypeAlias = Type['PackableSample']
|
|
68
72
|
LensSignature: TypeAlias = Tuple[DatasetType, DatasetType]
|
|
69
73
|
|
|
70
|
-
S = TypeVar( 'S', bound =
|
|
71
|
-
V = TypeVar( 'V', bound =
|
|
74
|
+
S = TypeVar( 'S', bound = Packable )
|
|
75
|
+
V = TypeVar( 'V', bound = Packable )
|
|
72
76
|
type LensGetter[S, V] = Callable[[S], V]
|
|
73
77
|
type LensPutter[S, V] = Callable[[V, S], S]
|
|
74
78
|
|
|
@@ -84,19 +88,22 @@ class Lens( Generic[S, V] ):
|
|
|
84
88
|
and an optional putter that transforms ``(V, S) -> S``, enabling updates to
|
|
85
89
|
the view to be reflected back in the source.
|
|
86
90
|
|
|
87
|
-
|
|
91
|
+
Parameters:
|
|
88
92
|
S: The source type, must derive from ``PackableSample``.
|
|
89
93
|
V: The view type, must derive from ``PackableSample``.
|
|
90
94
|
|
|
91
95
|
Example:
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
96
|
+
::
|
|
97
|
+
|
|
98
|
+
>>> @lens
|
|
99
|
+
... def name_lens(full: FullData) -> NameOnly:
|
|
100
|
+
... return NameOnly(name=full.name)
|
|
101
|
+
...
|
|
102
|
+
>>> @name_lens.putter
|
|
103
|
+
... def name_lens_put(view: NameOnly, source: FullData) -> FullData:
|
|
104
|
+
... return FullData(name=view.name, age=source.age)
|
|
99
105
|
"""
|
|
106
|
+
# TODO The above has a line for "Parameters:" that should be "Type Parameters:"; this is a temporary fix for `quartodoc` auto-generation bugs.
|
|
100
107
|
|
|
101
108
|
def __init__( self, get: LensGetter[S, V],
|
|
102
109
|
put: Optional[LensPutter[S, V]] = None
|
|
@@ -113,8 +120,7 @@ class Lens( Generic[S, V] ):
|
|
|
113
120
|
trivial putter is used that ignores updates to the view.
|
|
114
121
|
|
|
115
122
|
Raises:
|
|
116
|
-
|
|
117
|
-
parameter.
|
|
123
|
+
ValueError: If the getter function doesn't have exactly one parameter.
|
|
118
124
|
"""
|
|
119
125
|
##
|
|
120
126
|
|
|
@@ -122,14 +128,17 @@ class Lens( Generic[S, V] ):
|
|
|
122
128
|
|
|
123
129
|
sig = inspect.signature( get )
|
|
124
130
|
input_types = list( sig.parameters.values() )
|
|
125
|
-
|
|
126
|
-
|
|
131
|
+
if len(input_types) != 1:
|
|
132
|
+
raise ValueError(
|
|
133
|
+
f"Lens getter must have exactly one parameter, got {len(input_types)}: "
|
|
134
|
+
f"{[p.name for p in input_types]}"
|
|
135
|
+
)
|
|
127
136
|
|
|
128
137
|
# Update function details for this object as returned by annotation
|
|
129
138
|
functools.update_wrapper( self, get )
|
|
130
139
|
|
|
131
|
-
self.source_type: Type[
|
|
132
|
-
self.view_type: Type[
|
|
140
|
+
self.source_type: Type[Packable] = input_types[0].annotation
|
|
141
|
+
self.view_type: Type[Packable] = sig.return_annotation
|
|
133
142
|
|
|
134
143
|
# Store the getter
|
|
135
144
|
self._getter = get
|
|
@@ -155,9 +164,11 @@ class Lens( Generic[S, V] ):
|
|
|
155
164
|
The putter function, allowing this to be used as a decorator.
|
|
156
165
|
|
|
157
166
|
Example:
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
167
|
+
::
|
|
168
|
+
|
|
169
|
+
>>> @my_lens.putter
|
|
170
|
+
... def my_lens_put(view: ViewType, source: SourceType) -> SourceType:
|
|
171
|
+
... return SourceType(...)
|
|
161
172
|
"""
|
|
162
173
|
##
|
|
163
174
|
self._putter = put
|
|
@@ -188,35 +199,10 @@ class Lens( Generic[S, V] ):
|
|
|
188
199
|
"""
|
|
189
200
|
return self( s )
|
|
190
201
|
|
|
191
|
-
# Convenience to enable calling the lens as its getter
|
|
192
|
-
|
|
193
202
|
def __call__( self, s: S ) -> V:
|
|
194
|
-
"""Apply the lens transformation (same as ``get()``).
|
|
195
|
-
|
|
196
|
-
Args:
|
|
197
|
-
s: The source sample of type ``S``.
|
|
198
|
-
|
|
199
|
-
Returns:
|
|
200
|
-
A view of the source as type ``V``.
|
|
201
|
-
"""
|
|
203
|
+
"""Apply the lens transformation (same as ``get()``)."""
|
|
202
204
|
return self._getter( s )
|
|
203
205
|
|
|
204
|
-
# TODO Figure out how to properly parameterize this
|
|
205
|
-
# def _lens_factory[S, V]( register: bool = True ):
|
|
206
|
-
# """Register the annotated function `f` as the getter of a sample lens"""
|
|
207
|
-
|
|
208
|
-
# # The actual lens decorator taking a lens getter function to a lens object
|
|
209
|
-
# def _decorator( f: LensGetter[S, V] ) -> Lens[S, V]:
|
|
210
|
-
# ret = Lens[S, V]( f )
|
|
211
|
-
# if register:
|
|
212
|
-
# _network.register( ret )
|
|
213
|
-
# return ret
|
|
214
|
-
|
|
215
|
-
# # Return the lens decorator
|
|
216
|
-
# return _decorator
|
|
217
|
-
|
|
218
|
-
# # For convenience
|
|
219
|
-
# lens = _lens_factory
|
|
220
206
|
|
|
221
207
|
def lens( f: LensGetter[S, V] ) -> Lens[S, V]:
|
|
222
208
|
"""Decorator to create and register a lens transformation.
|
|
@@ -233,25 +219,21 @@ def lens( f: LensGetter[S, V] ) -> Lens[S, V]:
|
|
|
233
219
|
or decorated with ``@lens_name.putter`` to add a putter function.
|
|
234
220
|
|
|
235
221
|
Example:
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
222
|
+
::
|
|
223
|
+
|
|
224
|
+
>>> @lens
|
|
225
|
+
... def extract_name(full: FullData) -> NameOnly:
|
|
226
|
+
... return NameOnly(name=full.name)
|
|
227
|
+
...
|
|
228
|
+
>>> @extract_name.putter
|
|
229
|
+
... def extract_name_put(view: NameOnly, source: FullData) -> FullData:
|
|
230
|
+
... return FullData(name=view.name, age=source.age)
|
|
243
231
|
"""
|
|
244
232
|
ret = Lens[S, V]( f )
|
|
245
233
|
_network.register( ret )
|
|
246
234
|
return ret
|
|
247
235
|
|
|
248
236
|
|
|
249
|
-
##
|
|
250
|
-
# Global registry of used lenses
|
|
251
|
-
|
|
252
|
-
# _registered_lenses: Dict[LensSignature, Lens] = dict()
|
|
253
|
-
# """TODO"""
|
|
254
|
-
|
|
255
237
|
class LensNetwork:
|
|
256
238
|
"""Global registry for lens transformations between sample types.
|
|
257
239
|
|
|
@@ -292,18 +274,6 @@ class LensNetwork:
|
|
|
292
274
|
If a lens already exists for the same type pair, it will be
|
|
293
275
|
overwritten.
|
|
294
276
|
"""
|
|
295
|
-
|
|
296
|
-
# sig = inspect.signature( _lens.get )
|
|
297
|
-
# input_types = list( sig.parameters.values() )
|
|
298
|
-
# assert len( input_types ) == 1, \
|
|
299
|
-
# 'Wrong number of input args for lens: should only have one'
|
|
300
|
-
|
|
301
|
-
# input_type = input_types[0].annotation
|
|
302
|
-
# print( input_type )
|
|
303
|
-
# output_type = sig.return_annotation
|
|
304
|
-
|
|
305
|
-
# self._registry[input_type, output_type] = _lens
|
|
306
|
-
# print( _lens.source_type )
|
|
307
277
|
self._registry[_lens.source_type, _lens.view_type] = _lens
|
|
308
278
|
|
|
309
279
|
def transform( self, source: DatasetType, view: DatasetType ) -> Lens:
|
|
@@ -323,8 +293,6 @@ class LensNetwork:
|
|
|
323
293
|
Currently only supports direct transformations. Compositional
|
|
324
294
|
transformations (chaining multiple lenses) are not yet implemented.
|
|
325
295
|
"""
|
|
326
|
-
|
|
327
|
-
# TODO Handle compositional closure
|
|
328
296
|
ret = self._registry.get( (source, view), None )
|
|
329
297
|
if ret is None:
|
|
330
298
|
raise ValueError( f'No registered lens from source {source} to view {view}' )
|
|
@@ -332,22 +300,5 @@ class LensNetwork:
|
|
|
332
300
|
return ret
|
|
333
301
|
|
|
334
302
|
|
|
335
|
-
#
|
|
336
|
-
_network = LensNetwork()
|
|
337
|
-
|
|
338
|
-
# def lens( f: LensPutter ) -> Lens:
|
|
339
|
-
# """Register the annotated function `f` as a sample lens"""
|
|
340
|
-
# ##
|
|
341
|
-
|
|
342
|
-
# sig = inspect.signature( f )
|
|
343
|
-
|
|
344
|
-
# input_types = list( sig.parameters.values() )
|
|
345
|
-
# output_type = sig.return_annotation
|
|
346
|
-
|
|
347
|
-
# _registered_lenses[]
|
|
348
|
-
|
|
349
|
-
# f.lens = Lens(
|
|
350
|
-
|
|
351
|
-
# )
|
|
352
|
-
|
|
353
|
-
# return f
|
|
303
|
+
# Global singleton registry instance
|
|
304
|
+
_network = LensNetwork()
|