atdata 0.2.2b1__py3-none-any.whl → 0.2.3b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atdata/__init__.py +1 -1
- atdata/_cid.py +29 -35
- atdata/_helpers.py +7 -5
- atdata/_hf_api.py +48 -50
- atdata/_protocols.py +56 -71
- atdata/_schema_codec.py +33 -37
- atdata/_sources.py +57 -64
- atdata/_stub_manager.py +31 -26
- atdata/_type_utils.py +19 -5
- atdata/atmosphere/__init__.py +20 -23
- atdata/atmosphere/_types.py +11 -11
- atdata/atmosphere/client.py +11 -8
- atdata/atmosphere/lens.py +27 -30
- atdata/atmosphere/records.py +31 -37
- atdata/atmosphere/schema.py +33 -29
- atdata/atmosphere/store.py +16 -20
- atdata/cli/__init__.py +12 -3
- atdata/cli/diagnose.py +12 -8
- atdata/cli/local.py +4 -1
- atdata/dataset.py +284 -241
- atdata/lens.py +77 -82
- atdata/local.py +182 -169
- atdata/promote.py +18 -22
- {atdata-0.2.2b1.dist-info → atdata-0.2.3b1.dist-info}/METADATA +2 -1
- atdata-0.2.3b1.dist-info/RECORD +28 -0
- atdata-0.2.2b1.dist-info/RECORD +0 -28
- {atdata-0.2.2b1.dist-info → atdata-0.2.3b1.dist-info}/WHEEL +0 -0
- {atdata-0.2.2b1.dist-info → atdata-0.2.3b1.dist-info}/entry_points.txt +0 -0
- {atdata-0.2.2b1.dist-info → atdata-0.2.3b1.dist-info}/licenses/LICENSE +0 -0
atdata/lens.py
CHANGED
|
@@ -14,30 +14,28 @@ Key components:
|
|
|
14
14
|
Lenses support the functional programming concept of composable, well-behaved
|
|
15
15
|
transformations that satisfy lens laws (GetPut and PutGet).
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
>>> ds = Dataset[FullData]("data.tar")
|
|
40
|
-
>>> ds_names = ds.as_type(NameOnly) # Uses registered lens
|
|
17
|
+
Examples:
|
|
18
|
+
>>> @packable
|
|
19
|
+
... class FullData:
|
|
20
|
+
... name: str
|
|
21
|
+
... age: int
|
|
22
|
+
... embedding: NDArray
|
|
23
|
+
...
|
|
24
|
+
>>> @packable
|
|
25
|
+
... class NameOnly:
|
|
26
|
+
... name: str
|
|
27
|
+
...
|
|
28
|
+
>>> @lens
|
|
29
|
+
... def name_view(full: FullData) -> NameOnly:
|
|
30
|
+
... return NameOnly(name=full.name)
|
|
31
|
+
...
|
|
32
|
+
>>> @name_view.putter
|
|
33
|
+
... def name_view_put(view: NameOnly, source: FullData) -> FullData:
|
|
34
|
+
... return FullData(name=view.name, age=source.age,
|
|
35
|
+
... embedding=source.embedding)
|
|
36
|
+
...
|
|
37
|
+
>>> ds = Dataset[FullData]("data.tar")
|
|
38
|
+
>>> ds_names = ds.as_type(NameOnly) # Uses registered lens
|
|
41
39
|
"""
|
|
42
40
|
|
|
43
41
|
##
|
|
@@ -56,7 +54,7 @@ from typing import (
|
|
|
56
54
|
Optional,
|
|
57
55
|
Generic,
|
|
58
56
|
#
|
|
59
|
-
TYPE_CHECKING
|
|
57
|
+
TYPE_CHECKING,
|
|
60
58
|
)
|
|
61
59
|
|
|
62
60
|
if TYPE_CHECKING:
|
|
@@ -68,11 +66,11 @@ from ._protocols import Packable
|
|
|
68
66
|
##
|
|
69
67
|
# Typing helpers
|
|
70
68
|
|
|
71
|
-
DatasetType: TypeAlias = Type[
|
|
69
|
+
DatasetType: TypeAlias = Type["PackableSample"]
|
|
72
70
|
LensSignature: TypeAlias = Tuple[DatasetType, DatasetType]
|
|
73
71
|
|
|
74
|
-
S = TypeVar(
|
|
75
|
-
V = TypeVar(
|
|
72
|
+
S = TypeVar("S", bound=Packable)
|
|
73
|
+
V = TypeVar("V", bound=Packable)
|
|
76
74
|
type LensGetter[S, V] = Callable[[S], V]
|
|
77
75
|
type LensPutter[S, V] = Callable[[V, S], S]
|
|
78
76
|
|
|
@@ -80,7 +78,8 @@ type LensPutter[S, V] = Callable[[V, S], S]
|
|
|
80
78
|
##
|
|
81
79
|
# Shortcut decorators
|
|
82
80
|
|
|
83
|
-
|
|
81
|
+
|
|
82
|
+
class Lens(Generic[S, V]):
|
|
84
83
|
"""A bidirectional transformation between two sample types.
|
|
85
84
|
|
|
86
85
|
A lens provides a way to view and update data of type ``S`` (source) as if
|
|
@@ -92,22 +91,21 @@ class Lens( Generic[S, V] ):
|
|
|
92
91
|
S: The source type, must derive from ``PackableSample``.
|
|
93
92
|
V: The view type, must derive from ``PackableSample``.
|
|
94
93
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
... def name_lens_put(view: NameOnly, source: FullData) -> FullData:
|
|
104
|
-
... return FullData(name=view.name, age=source.age)
|
|
94
|
+
Examples:
|
|
95
|
+
>>> @lens
|
|
96
|
+
... def name_lens(full: FullData) -> NameOnly:
|
|
97
|
+
... return NameOnly(name=full.name)
|
|
98
|
+
...
|
|
99
|
+
>>> @name_lens.putter
|
|
100
|
+
... def name_lens_put(view: NameOnly, source: FullData) -> FullData:
|
|
101
|
+
... return FullData(name=view.name, age=source.age)
|
|
105
102
|
"""
|
|
103
|
+
|
|
106
104
|
# TODO The above has a line for "Parameters:" that should be "Type Parameters:"; this is a temporary fix for `quartodoc` auto-generation bugs.
|
|
107
105
|
|
|
108
|
-
def __init__(
|
|
109
|
-
|
|
110
|
-
|
|
106
|
+
def __init__(
|
|
107
|
+
self, get: LensGetter[S, V], put: Optional[LensPutter[S, V]] = None
|
|
108
|
+
) -> None:
|
|
111
109
|
"""Initialize a lens with a getter and optional putter function.
|
|
112
110
|
|
|
113
111
|
Args:
|
|
@@ -126,8 +124,8 @@ class Lens( Generic[S, V] ):
|
|
|
126
124
|
|
|
127
125
|
# Check argument validity
|
|
128
126
|
|
|
129
|
-
sig = inspect.signature(
|
|
130
|
-
input_types = list(
|
|
127
|
+
sig = inspect.signature(get)
|
|
128
|
+
input_types = list(sig.parameters.values())
|
|
131
129
|
if len(input_types) != 1:
|
|
132
130
|
raise ValueError(
|
|
133
131
|
f"Lens getter must have exactly one parameter, got {len(input_types)}: "
|
|
@@ -135,7 +133,7 @@ class Lens( Generic[S, V] ):
|
|
|
135
133
|
)
|
|
136
134
|
|
|
137
135
|
# Update function details for this object as returned by annotation
|
|
138
|
-
functools.update_wrapper(
|
|
136
|
+
functools.update_wrapper(self, get)
|
|
139
137
|
|
|
140
138
|
self.source_type: Type[Packable] = input_types[0].annotation
|
|
141
139
|
self.view_type: Type[Packable] = sig.return_annotation
|
|
@@ -146,14 +144,15 @@ class Lens( Generic[S, V] ):
|
|
|
146
144
|
# Determine and store the putter
|
|
147
145
|
if put is None:
|
|
148
146
|
# Trivial putter does not update the source
|
|
149
|
-
def _trivial_put(
|
|
147
|
+
def _trivial_put(v: V, s: S) -> S:
|
|
150
148
|
return s
|
|
149
|
+
|
|
151
150
|
put = _trivial_put
|
|
152
151
|
self._putter = put
|
|
153
|
-
|
|
152
|
+
|
|
154
153
|
#
|
|
155
154
|
|
|
156
|
-
def putter(
|
|
155
|
+
def putter(self, put: LensPutter[S, V]) -> LensPutter[S, V]:
|
|
157
156
|
"""Decorator to register a putter function for this lens.
|
|
158
157
|
|
|
159
158
|
Args:
|
|
@@ -163,20 +162,18 @@ class Lens( Generic[S, V] ):
|
|
|
163
162
|
Returns:
|
|
164
163
|
The putter function, allowing this to be used as a decorator.
|
|
165
164
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
... def my_lens_put(view: ViewType, source: SourceType) -> SourceType:
|
|
171
|
-
... return SourceType(...)
|
|
165
|
+
Examples:
|
|
166
|
+
>>> @my_lens.putter
|
|
167
|
+
... def my_lens_put(view: ViewType, source: SourceType) -> SourceType:
|
|
168
|
+
... return SourceType(field=view.field, other=source.other)
|
|
172
169
|
"""
|
|
173
170
|
##
|
|
174
171
|
self._putter = put
|
|
175
172
|
return put
|
|
176
|
-
|
|
173
|
+
|
|
177
174
|
# Methods to actually execute transformations
|
|
178
175
|
|
|
179
|
-
def put(
|
|
176
|
+
def put(self, v: V, s: S) -> S:
|
|
180
177
|
"""Update the source based on a modified view.
|
|
181
178
|
|
|
182
179
|
Args:
|
|
@@ -186,9 +183,9 @@ class Lens( Generic[S, V] ):
|
|
|
186
183
|
Returns:
|
|
187
184
|
An updated source of type ``S`` that reflects changes from the view.
|
|
188
185
|
"""
|
|
189
|
-
return self._putter(
|
|
186
|
+
return self._putter(v, s)
|
|
190
187
|
|
|
191
|
-
def get(
|
|
188
|
+
def get(self, s: S) -> V:
|
|
192
189
|
"""Transform the source into the view type.
|
|
193
190
|
|
|
194
191
|
Args:
|
|
@@ -197,14 +194,14 @@ class Lens( Generic[S, V] ):
|
|
|
197
194
|
Returns:
|
|
198
195
|
A view of the source as type ``V``.
|
|
199
196
|
"""
|
|
200
|
-
return self(
|
|
197
|
+
return self(s)
|
|
201
198
|
|
|
202
|
-
def __call__(
|
|
199
|
+
def __call__(self, s: S) -> V:
|
|
203
200
|
"""Apply the lens transformation (same as ``get()``)."""
|
|
204
|
-
return self._getter(
|
|
201
|
+
return self._getter(s)
|
|
205
202
|
|
|
206
203
|
|
|
207
|
-
def lens(
|
|
204
|
+
def lens(f: LensGetter[S, V]) -> Lens[S, V]:
|
|
208
205
|
"""Decorator to create and register a lens transformation.
|
|
209
206
|
|
|
210
207
|
This decorator converts a getter function into a ``Lens`` object and
|
|
@@ -218,19 +215,17 @@ def lens( f: LensGetter[S, V] ) -> Lens[S, V]:
|
|
|
218
215
|
A ``Lens[S, V]`` object that can be called to apply the transformation
|
|
219
216
|
or decorated with ``@lens_name.putter`` to add a putter function.
|
|
220
217
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
... def extract_name_put(view: NameOnly, source: FullData) -> FullData:
|
|
230
|
-
... return FullData(name=view.name, age=source.age)
|
|
218
|
+
Examples:
|
|
219
|
+
>>> @lens
|
|
220
|
+
... def extract_name(full: FullData) -> NameOnly:
|
|
221
|
+
... return NameOnly(name=full.name)
|
|
222
|
+
...
|
|
223
|
+
>>> @extract_name.putter
|
|
224
|
+
... def extract_name_put(view: NameOnly, source: FullData) -> FullData:
|
|
225
|
+
... return FullData(name=view.name, age=source.age)
|
|
231
226
|
"""
|
|
232
|
-
ret = Lens[S, V](
|
|
233
|
-
_network.register(
|
|
227
|
+
ret = Lens[S, V](f)
|
|
228
|
+
_network.register(ret)
|
|
234
229
|
return ret
|
|
235
230
|
|
|
236
231
|
|
|
@@ -259,11 +254,11 @@ class LensNetwork:
|
|
|
259
254
|
|
|
260
255
|
def __init__(self):
|
|
261
256
|
"""Initialize the lens registry (only on first instantiation)."""
|
|
262
|
-
if not hasattr(self,
|
|
257
|
+
if not hasattr(self, "_initialized"): # Check if already initialized
|
|
263
258
|
self._registry: Dict[LensSignature, Lens] = dict()
|
|
264
259
|
self._initialized = True
|
|
265
|
-
|
|
266
|
-
def register(
|
|
260
|
+
|
|
261
|
+
def register(self, _lens: Lens):
|
|
267
262
|
"""Register a lens as the canonical transformation between two types.
|
|
268
263
|
|
|
269
264
|
Args:
|
|
@@ -275,8 +270,8 @@ class LensNetwork:
|
|
|
275
270
|
overwritten.
|
|
276
271
|
"""
|
|
277
272
|
self._registry[_lens.source_type, _lens.view_type] = _lens
|
|
278
|
-
|
|
279
|
-
def transform(
|
|
273
|
+
|
|
274
|
+
def transform(self, source: DatasetType, view: DatasetType) -> Lens:
|
|
280
275
|
"""Look up the lens transformation between two sample types.
|
|
281
276
|
|
|
282
277
|
Args:
|
|
@@ -293,12 +288,12 @@ class LensNetwork:
|
|
|
293
288
|
Currently only supports direct transformations. Compositional
|
|
294
289
|
transformations (chaining multiple lenses) are not yet implemented.
|
|
295
290
|
"""
|
|
296
|
-
ret = self._registry.get(
|
|
291
|
+
ret = self._registry.get((source, view), None)
|
|
297
292
|
if ret is None:
|
|
298
|
-
raise ValueError(
|
|
293
|
+
raise ValueError(f"No registered lens from source {source} to view {view}")
|
|
299
294
|
|
|
300
295
|
return ret
|
|
301
296
|
|
|
302
297
|
|
|
303
298
|
# Global singleton registry instance
|
|
304
|
-
_network = LensNetwork()
|
|
299
|
+
_network = LensNetwork()
|