datamaestro 1.0.4__tar.gz → 1.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamaestro-1.0.4 → datamaestro-1.0.6}/.readthedocs.yml +4 -1
- {datamaestro-1.0.4 → datamaestro-1.0.6}/PKG-INFO +1 -1
- datamaestro-1.0.6/docs/requirements.txt +5 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/docs/source/api/index.md +2 -0
- datamaestro-1.0.6/docs/source/api/records.rst +59 -0
- datamaestro-1.0.6/src/datamaestro/record.py +312 -0
- datamaestro-1.0.6/src/datamaestro/test/test_record.py +151 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/version.py +2 -2
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro.egg-info/PKG-INFO +1 -1
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro.egg-info/SOURCES.txt +1 -0
- datamaestro-1.0.4/docs/requirements.txt +0 -2
- datamaestro-1.0.4/src/datamaestro/record.py +0 -196
- datamaestro-1.0.4/src/datamaestro/test/test_record.py +0 -81
- {datamaestro-1.0.4 → datamaestro-1.0.6}/.coverage +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/.github/workflows/pytest.yml +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/.github/workflows/python-publish.yml +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/.gitignore +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/.pre-commit-config.yaml +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/CHANGELOG.md +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/LICENSE +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/MANIFEST.in +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/README.md +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/TODO.md +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/docs/Makefile +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/docs/make.bat +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/docs/source/api/data.md +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/docs/source/api/download.rst +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/docs/source/conf.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/docs/source/datasets.rst +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/docs/source/developping.md +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/docs/source/index.md +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/docs/source/style.css +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/mkdocs.yml +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/pyproject.toml +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/pytest.ini +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/requirements-dev.txt +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/requirements.txt +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/schema.yaml +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/setup.cfg +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/setup.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/__init__.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/__main__.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/annotations/__init__.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/annotations/agreement.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/commands/__init__.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/commands/mainstyle.css +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/commands/site.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/context.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/data/__init__.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/data/csv.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/data/huggingface.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/data/ml.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/data/tensor.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/definitions.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/download/__init__.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/download/archive.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/download/huggingface.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/download/links.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/download/manual.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/download/multiple.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/download/single.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/download/sync.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/download/todo.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/registry.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/search.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/settings.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/sphinx.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/stream/__init__.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/stream/compress.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/stream/lines.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/templates/dataset.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/test/__init__.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/test/checks.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/test/conftest.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/test/test_annotations.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/test/test_download_handlers.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro/utils.py +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro.egg-info/dependency_links.txt +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro.egg-info/entry_points.txt +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro.egg-info/not-zip-safe +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro.egg-info/requires.txt +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/src/datamaestro.egg-info/top_level.txt +0 -0
- {datamaestro-1.0.4 → datamaestro-1.0.6}/tox.ini +0 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
Records
|
|
2
|
+
=======
|
|
3
|
+
|
|
4
|
+
Records can hold arbitrary information. They are quite useful when precessing data, since
|
|
5
|
+
information can be easily added to a record.
|
|
6
|
+
|
|
7
|
+
.. code-block:: python
|
|
8
|
+
|
|
9
|
+
@define
|
|
10
|
+
class AItem(Item):
|
|
11
|
+
a: int
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@define
|
|
15
|
+
class A1Item(AItem):
|
|
16
|
+
a1: int
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@define
|
|
20
|
+
class BItem(Item):
|
|
21
|
+
b: int
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@define
|
|
25
|
+
class CItem(Item):
|
|
26
|
+
c: int
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@recordtypes(A1Item)
|
|
30
|
+
class ARecord(Record):
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@recordtypes(BItem)
|
|
35
|
+
class ABRecord(ARecord):
|
|
36
|
+
...
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
record = ABRecode(AItem(1), BItem(2))
|
|
40
|
+
print(record[AItem].a) # 1
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
record = record.update(BItem(3))
|
|
44
|
+
print(record[BItem]) # 3
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
.. autoclass:: datamaestro.record.Item
|
|
48
|
+
:members:
|
|
49
|
+
|
|
50
|
+
.. autoclass:: datamaestro.record.Record
|
|
51
|
+
:members: update, has, get, from_types, from_record
|
|
52
|
+
|
|
53
|
+
.. autofunction:: datamaestro.record.recordtypes
|
|
54
|
+
|
|
55
|
+
.. autoclass:: datamaestro.record.RecordTypesCache
|
|
56
|
+
:members: __init__, update
|
|
57
|
+
|
|
58
|
+
.. autoclass:: datamaestro.record.SingleRecordTypeCache
|
|
59
|
+
:members: __init__, update
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import ClassVar, Type, TypeVar, Dict, List, Union, Optional, FrozenSet
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Item:
|
|
6
|
+
"""Base class for all item types"""
|
|
7
|
+
|
|
8
|
+
@classmethod
|
|
9
|
+
def __get_base__(cls: Type) -> Type:
|
|
10
|
+
"""Get the most generic superclass for this type of item"""
|
|
11
|
+
if base := getattr(cls, "__base__cache__", None):
|
|
12
|
+
return base
|
|
13
|
+
|
|
14
|
+
base = cls
|
|
15
|
+
for supercls in cls.__mro__:
|
|
16
|
+
if issubclass(supercls, Item) and supercls is not Item:
|
|
17
|
+
base = supercls
|
|
18
|
+
setattr(cls, "__base__cache__", base)
|
|
19
|
+
return base
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
T = TypeVar("T", bound=Item)
|
|
23
|
+
Items = Dict[Type[T], T]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Record:
|
|
27
|
+
"""Associate types with entries
|
|
28
|
+
|
|
29
|
+
A record is a composition of items; each item base class is unique.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
items: Items
|
|
33
|
+
|
|
34
|
+
def __init__(self, *items: Union[Items, T], override=False):
|
|
35
|
+
self.items = {}
|
|
36
|
+
|
|
37
|
+
if len(items) == 1 and isinstance(items[0], dict):
|
|
38
|
+
# Just copy the dictionary
|
|
39
|
+
self.items = items[0]
|
|
40
|
+
else:
|
|
41
|
+
for entry in items:
|
|
42
|
+
# Returns a new record if the item exists
|
|
43
|
+
base = entry.__get_base__()
|
|
44
|
+
if not override and base in self.items:
|
|
45
|
+
raise RuntimeError(
|
|
46
|
+
f"The item type {base} ({entry.__class__})"
|
|
47
|
+
" is already in the record"
|
|
48
|
+
)
|
|
49
|
+
self.items[base] = entry
|
|
50
|
+
|
|
51
|
+
self.validate()
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def from_record(cls, record: "Record", *items: T, override=True):
|
|
55
|
+
"""Build from another record"""
|
|
56
|
+
return cls({**record.items, **{item.__get_base__(): item for item in items}})
|
|
57
|
+
|
|
58
|
+
def __str__(self):
|
|
59
|
+
return (
|
|
60
|
+
"{"
|
|
61
|
+
+ ", ".join(f"{key}: {value}" for key, value in self.items.items())
|
|
62
|
+
+ "}"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def __reduce__(self):
|
|
66
|
+
cls = self.__class__
|
|
67
|
+
if cls.__trueclass__ is None:
|
|
68
|
+
return (cls.__new__, (cls.__trueclass__ or cls,), {"items": self.items})
|
|
69
|
+
|
|
70
|
+
return (
|
|
71
|
+
cls.__new__,
|
|
72
|
+
(cls.__trueclass__ or cls,),
|
|
73
|
+
{"items": self.items, "itemtypes": self.itemtypes},
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def __setstate__(self, state):
|
|
77
|
+
self.items = state["items"]
|
|
78
|
+
self.itemtypes = None
|
|
79
|
+
|
|
80
|
+
def validate(self, cls: Type["Record"] = None):
|
|
81
|
+
"""Validate the record"""
|
|
82
|
+
cls = cls if cls is not None else self.__class__
|
|
83
|
+
|
|
84
|
+
if cls.itemtypes:
|
|
85
|
+
for itemtype in cls.itemtypes:
|
|
86
|
+
try:
|
|
87
|
+
self.__getitem__(itemtype)
|
|
88
|
+
except KeyError:
|
|
89
|
+
raise KeyError(f"Item of type {itemtype} is missing")
|
|
90
|
+
|
|
91
|
+
if len(self.items) != len(cls.itemtypes):
|
|
92
|
+
unregistered = [
|
|
93
|
+
item
|
|
94
|
+
for item in self.items.values()
|
|
95
|
+
if all(
|
|
96
|
+
not issubclass(item.__get_base__(), itemtype)
|
|
97
|
+
for itemtype in cls.itemtypes
|
|
98
|
+
)
|
|
99
|
+
]
|
|
100
|
+
raise KeyError(
|
|
101
|
+
f"The record {cls} contains unregistered items: {unregistered}"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def get(self, key: Type[T]) -> Optional[T]:
|
|
105
|
+
"""Get a given item or None if it does not exist"""
|
|
106
|
+
try:
|
|
107
|
+
return self[key]
|
|
108
|
+
except KeyError:
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
def has(self, key: Type[T]) -> bool:
|
|
112
|
+
"""Returns True if the record has the given item type"""
|
|
113
|
+
return key.__get_base__() in self.items
|
|
114
|
+
|
|
115
|
+
def __getitem__(self, key: Type[T]) -> T:
|
|
116
|
+
"""Get an item given its type"""
|
|
117
|
+
base = key.__get_base__()
|
|
118
|
+
entry = self.items[base]
|
|
119
|
+
|
|
120
|
+
# Check if this matches the expected class
|
|
121
|
+
if not isinstance(entry, key):
|
|
122
|
+
raise KeyError(f"No entry with type {key}")
|
|
123
|
+
return entry
|
|
124
|
+
|
|
125
|
+
def is_pickled(self):
|
|
126
|
+
return self.itemtypes is None
|
|
127
|
+
|
|
128
|
+
def update(self, *items: T) -> "Record":
|
|
129
|
+
"""Update some items"""
|
|
130
|
+
# Create our new dictionary
|
|
131
|
+
item_dict = {**self.items}
|
|
132
|
+
for item in items:
|
|
133
|
+
item_dict[item.__get_base__()] = item
|
|
134
|
+
|
|
135
|
+
return self.__class__(item_dict)
|
|
136
|
+
|
|
137
|
+
# --- Class methods and variables
|
|
138
|
+
|
|
139
|
+
itemtypes: ClassVar[Optional[FrozenSet[Type[T]]]] = []
|
|
140
|
+
"""For specific records, this is the list of types. The value is null when
|
|
141
|
+
no validation is used (e.g. pickled records created on the fly)"""
|
|
142
|
+
|
|
143
|
+
__trueclass__: ClassVar[Optional[Type["Record"]]] = None
|
|
144
|
+
"""The last class in the type hierarchy corresponding to an actual type,
|
|
145
|
+
i.e. not created on the fly (only defined when the record is pickled)"""
|
|
146
|
+
|
|
147
|
+
@classmethod
|
|
148
|
+
def has_type(cls, itemtype: Type[T]):
|
|
149
|
+
return any(issubclass(cls_itemtype, itemtype) for cls_itemtype in cls.itemtypes)
|
|
150
|
+
|
|
151
|
+
@classmethod
|
|
152
|
+
def _subclass(cls, *itemtypes: Type[T]):
|
|
153
|
+
cls_itemtypes = [x for x in getattr(cls, "itemtypes", [])]
|
|
154
|
+
mapping = {
|
|
155
|
+
itemtype.__get_base__(): ix for ix, itemtype in enumerate(cls_itemtypes)
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
for itemtype in itemtypes:
|
|
159
|
+
if (ix := mapping.get(itemtype.__get_base__(), -1)) >= 0:
|
|
160
|
+
cls_itemtypes[ix] = itemtype
|
|
161
|
+
else:
|
|
162
|
+
cls_itemtypes.append(itemtype)
|
|
163
|
+
|
|
164
|
+
return frozenset(cls_itemtypes)
|
|
165
|
+
|
|
166
|
+
@classmethod
|
|
167
|
+
def from_types(cls, name: str, *itemtypes: Type[T], module: str = None):
|
|
168
|
+
"""Construct a new sub-record type
|
|
169
|
+
|
|
170
|
+
:param name: The name of the subrecord
|
|
171
|
+
:param module: The module name, defaults to None
|
|
172
|
+
:return: A new Record type
|
|
173
|
+
"""
|
|
174
|
+
extra_dict = {}
|
|
175
|
+
if module:
|
|
176
|
+
extra_dict["__module__"] = module
|
|
177
|
+
|
|
178
|
+
return type(
|
|
179
|
+
name,
|
|
180
|
+
(cls,),
|
|
181
|
+
{
|
|
182
|
+
**extra_dict,
|
|
183
|
+
"itemtypes": frozenset(cls._subclass(*itemtypes)),
|
|
184
|
+
"__trueclass__": cls.__trueclass__ or cls,
|
|
185
|
+
},
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
__RECORD_TYPES_CACHE__: Dict[frozenset, Type["Record"]] = {}
|
|
189
|
+
|
|
190
|
+
@staticmethod
|
|
191
|
+
def fromitemtypes(itemtypes: FrozenSet[T]):
|
|
192
|
+
if recordtype := Record.__RECORD_TYPES_CACHE__.get(itemtypes, None):
|
|
193
|
+
return recordtype
|
|
194
|
+
|
|
195
|
+
recordtype = Record.from_types(
|
|
196
|
+
"_".join(itemtype.__name__ for itemtype in itemtypes), *itemtypes
|
|
197
|
+
)
|
|
198
|
+
Record.__RECORD_TYPES_CACHE__[itemtypes] = recordtype
|
|
199
|
+
return recordtype
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def recordtypes(*types: List[Type[T]]):
|
|
203
|
+
"""Adds types for a new record class"""
|
|
204
|
+
|
|
205
|
+
def decorate(cls: Type[Record]):
|
|
206
|
+
(base_cls,) = [base for base in cls.__bases__ if issubclass(base, Record)]
|
|
207
|
+
|
|
208
|
+
setattr(cls, "itemtypes", base_cls._subclass(*types))
|
|
209
|
+
return cls
|
|
210
|
+
|
|
211
|
+
return decorate
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class RecordTypesCacheBase:
|
|
215
|
+
def __init__(self, name: str, *itemtypes: Type[T], module: str = None):
|
|
216
|
+
self._module = module
|
|
217
|
+
self._name = name
|
|
218
|
+
self._itemtypes = itemtypes
|
|
219
|
+
|
|
220
|
+
def _compute(self, record_type: Type[Record]):
|
|
221
|
+
updated_type = record_type.from_types(
|
|
222
|
+
f"{self._name}_{record_type.__name__}",
|
|
223
|
+
*self._itemtypes,
|
|
224
|
+
module=self._module,
|
|
225
|
+
)
|
|
226
|
+
return updated_type
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class RecordTypesCache(RecordTypesCacheBase):
|
|
230
|
+
"""Class to use when new record types need to be created on the fly by
|
|
231
|
+
adding new items"""
|
|
232
|
+
|
|
233
|
+
def __init__(self, name: str, *itemtypes: Type[T], module: str = None):
|
|
234
|
+
"""Creates a new cache
|
|
235
|
+
|
|
236
|
+
:param name: Base name for new record types
|
|
237
|
+
:param module: The module name for new types, defaults to None
|
|
238
|
+
"""
|
|
239
|
+
super().__init__(name, *itemtypes, module=module)
|
|
240
|
+
self._cache: Dict[Type[Record], Type[Record]] = {}
|
|
241
|
+
self._warning = False
|
|
242
|
+
|
|
243
|
+
def __call__(self, record_type: Type[Record]):
|
|
244
|
+
if (updated_type := self._cache.get(record_type, None)) is None:
|
|
245
|
+
self._cache[record_type] = updated_type = self._compute(record_type)
|
|
246
|
+
return updated_type
|
|
247
|
+
|
|
248
|
+
def update(self, record: Record, *items: Item, cls=None):
|
|
249
|
+
"""Update the record with the given items
|
|
250
|
+
|
|
251
|
+
:param record: The record to which we add items
|
|
252
|
+
:param cls: The class of the record, useful if the record has been
|
|
253
|
+
pickled, defaults to None
|
|
254
|
+
:return: A new record with the extra items
|
|
255
|
+
"""
|
|
256
|
+
if cls is None:
|
|
257
|
+
cls = record.__class__
|
|
258
|
+
if record.is_pickled() and not self._warning:
|
|
259
|
+
logging.warning(
|
|
260
|
+
"Updating unpickled records is not recommended"
|
|
261
|
+
" (speed issues): use the pickle record class as the cls input"
|
|
262
|
+
)
|
|
263
|
+
itemtypes = frozenset(type(item) for item in record.items.values())
|
|
264
|
+
cls = Record.fromitemtypes(itemtypes)
|
|
265
|
+
else:
|
|
266
|
+
assert (
|
|
267
|
+
record.is_pickled()
|
|
268
|
+
), "cls can be used only when the record as been pickled"
|
|
269
|
+
|
|
270
|
+
return self(cls)(*record.items.values(), *items, override=True)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class SingleRecordTypeCache(RecordTypesCacheBase):
|
|
274
|
+
"""Class to use when new record types need to be created on the fly by
|
|
275
|
+
adding new items
|
|
276
|
+
|
|
277
|
+
This class supposes that the input record type is always the same (no check
|
|
278
|
+
is done to ensure this)"""
|
|
279
|
+
|
|
280
|
+
def __init__(self, name: str, *itemtypes: Type[T], module: str = None):
|
|
281
|
+
"""Creates a new cache
|
|
282
|
+
|
|
283
|
+
:param name: Base name for new record types
|
|
284
|
+
:param module: The module name for new types, defaults to None
|
|
285
|
+
"""
|
|
286
|
+
super().__init__(name, *itemtypes, module=module)
|
|
287
|
+
self._cache: Optional[Type[Record]] = None
|
|
288
|
+
|
|
289
|
+
def __call__(self, record_type: Type[Record]):
|
|
290
|
+
if self._cache is None:
|
|
291
|
+
self._cache = self._compute(record_type)
|
|
292
|
+
return self._cache
|
|
293
|
+
|
|
294
|
+
def update(self, record: Record, *items: Item, cls=None):
|
|
295
|
+
"""Update the record with the given items
|
|
296
|
+
|
|
297
|
+
:param record: The record to which we add items
|
|
298
|
+
:param cls: The class of the record, useful if the record has been
|
|
299
|
+
pickled, defaults to None
|
|
300
|
+
:return: A new record with the extra items
|
|
301
|
+
"""
|
|
302
|
+
if self._cache is None:
|
|
303
|
+
if cls is None:
|
|
304
|
+
cls = record.__class__
|
|
305
|
+
itemtypes = frozenset(type(item) for item in record.items.values())
|
|
306
|
+
cls = Record.fromitemtypes(itemtypes)
|
|
307
|
+
else:
|
|
308
|
+
assert (
|
|
309
|
+
record.is_pickled()
|
|
310
|
+
), "cls can be used only when the record as been pickled"
|
|
311
|
+
|
|
312
|
+
return self(cls)(*record.items.values(), *items, override=True)
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import pickle
|
|
2
|
+
from datamaestro.record import (
|
|
3
|
+
Record,
|
|
4
|
+
Item,
|
|
5
|
+
RecordTypesCache,
|
|
6
|
+
recordtypes,
|
|
7
|
+
SingleRecordTypeCache,
|
|
8
|
+
)
|
|
9
|
+
from attrs import define
|
|
10
|
+
import pytest
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@define
|
|
14
|
+
class AItem(Item):
|
|
15
|
+
a: int
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@define
|
|
19
|
+
class A1Item(AItem):
|
|
20
|
+
a1: int
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@define
|
|
24
|
+
class BItem(Item):
|
|
25
|
+
b: int
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@define
|
|
29
|
+
class B1Item(BItem):
|
|
30
|
+
b1: int
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@define
|
|
34
|
+
class CItem(Item):
|
|
35
|
+
c: int
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@recordtypes(A1Item)
|
|
39
|
+
class BaseRecord(Record):
|
|
40
|
+
...
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@recordtypes(BItem)
|
|
44
|
+
class MyRecord(BaseRecord):
|
|
45
|
+
...
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@recordtypes(CItem)
|
|
49
|
+
class MyRecord2(MyRecord):
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_record_simple():
|
|
54
|
+
a = A1Item(1, 2)
|
|
55
|
+
b = BItem(4)
|
|
56
|
+
r = MyRecord(a, b)
|
|
57
|
+
assert r[AItem] is a
|
|
58
|
+
assert r[A1Item] is a
|
|
59
|
+
assert r[BItem] is b
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_record_missing_init():
|
|
63
|
+
with pytest.raises(KeyError):
|
|
64
|
+
# A1Item is missing
|
|
65
|
+
MyRecord(AItem(1), BItem(2))
|
|
66
|
+
|
|
67
|
+
with pytest.raises(KeyError):
|
|
68
|
+
MyRecord(A1Item(1, 2))
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_record_update():
|
|
72
|
+
a = A1Item(1, 2)
|
|
73
|
+
b = BItem(4)
|
|
74
|
+
r = MyRecord(a, b)
|
|
75
|
+
|
|
76
|
+
r2 = r.update(BItem(3))
|
|
77
|
+
assert r is not r2
|
|
78
|
+
assert r2[BItem] is not b
|
|
79
|
+
|
|
80
|
+
r3 = MyRecord2.from_record(r, CItem(2), BItem(5))
|
|
81
|
+
assert r[BItem].b == 4
|
|
82
|
+
assert r3[BItem].b == 5
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_record_decorator():
|
|
86
|
+
MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_record_type_update():
|
|
90
|
+
itemtypes = MyRecord2.from_types("Test", B1Item).itemtypes
|
|
91
|
+
assert itemtypes == frozenset((A1Item, B1Item, CItem))
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def test_record_onthefly():
|
|
95
|
+
cache = RecordTypesCache("OnTheFly", CItem)
|
|
96
|
+
|
|
97
|
+
MyRecord2 = cache(MyRecord)
|
|
98
|
+
MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
|
|
99
|
+
|
|
100
|
+
assert cache(MyRecord) is MyRecord2
|
|
101
|
+
|
|
102
|
+
r = MyRecord(A1Item(1, 2), BItem(2))
|
|
103
|
+
assert cache(r.__class__) is MyRecord2
|
|
104
|
+
|
|
105
|
+
r = cache.update(r, CItem(3))
|
|
106
|
+
|
|
107
|
+
# Same record type
|
|
108
|
+
cache2 = RecordTypesCache("OnTheFly", CItem)
|
|
109
|
+
|
|
110
|
+
cache2.update(r, CItem(4))
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_record_pickled():
|
|
114
|
+
# First,
|
|
115
|
+
MyRecord2 = BaseRecord.from_types("MyRecordBis", BItem)
|
|
116
|
+
r = MyRecord2(A1Item(1, 2), BItem(2))
|
|
117
|
+
r = pickle.loads(pickle.dumps(r))
|
|
118
|
+
|
|
119
|
+
assert isinstance(r, BaseRecord) and not isinstance(r, MyRecord2)
|
|
120
|
+
cache = RecordTypesCache("OnTheFly", CItem)
|
|
121
|
+
|
|
122
|
+
assert r.is_pickled()
|
|
123
|
+
|
|
124
|
+
r2 = cache.update(r, CItem(4))
|
|
125
|
+
assert not r2.is_pickled()
|
|
126
|
+
|
|
127
|
+
# Test with cls update
|
|
128
|
+
with pytest.raises(KeyError):
|
|
129
|
+
cache.update(r, CItem(4), cls=BaseRecord)
|
|
130
|
+
|
|
131
|
+
# This is OK
|
|
132
|
+
cache.update(r, CItem(4), cls=MyRecord)
|
|
133
|
+
|
|
134
|
+
# --- Test when we update a pickled record with an of a sub-class
|
|
135
|
+
cache = RecordTypesCache("OnTheFly", B1Item)
|
|
136
|
+
r2 = cache.update(r, B1Item(1, 2))
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_record_pickled_single():
|
|
140
|
+
MyRecord2 = BaseRecord.from_types("MyRecordBis", BItem)
|
|
141
|
+
r = MyRecord2(A1Item(1, 2), BItem(2))
|
|
142
|
+
r = pickle.loads(pickle.dumps(r))
|
|
143
|
+
|
|
144
|
+
cache = SingleRecordTypeCache("OnTheFly", CItem)
|
|
145
|
+
|
|
146
|
+
updated = cache.update(r, CItem(4))
|
|
147
|
+
|
|
148
|
+
assert updated.itemtypes == frozenset((A1Item, BItem, CItem))
|
|
149
|
+
|
|
150
|
+
# Even with the wrong record, no change now
|
|
151
|
+
assert cache(BaseRecord).itemtypes == frozenset((A1Item, BItem, CItem))
|
|
@@ -1,196 +0,0 @@
|
|
|
1
|
-
from typing import ClassVar, Type, TypeVar, Dict, List, Union, Optional
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class Item:
|
|
5
|
-
"""Base class for all item types"""
|
|
6
|
-
|
|
7
|
-
@classmethod
|
|
8
|
-
def __get_base__(cls: Type) -> Type:
|
|
9
|
-
if base := getattr(cls, "__base__cache__", None):
|
|
10
|
-
return base
|
|
11
|
-
|
|
12
|
-
base = cls
|
|
13
|
-
for supercls in cls.__mro__:
|
|
14
|
-
if issubclass(supercls, Item) and supercls is not Item:
|
|
15
|
-
base = supercls
|
|
16
|
-
setattr(cls, "__base__cache__", base)
|
|
17
|
-
return base
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
T = TypeVar("T", bound=Item)
|
|
21
|
-
Items = Dict[Type[T], T]
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class Record:
|
|
25
|
-
"""Associate types with entries"""
|
|
26
|
-
|
|
27
|
-
items: Items
|
|
28
|
-
|
|
29
|
-
def __init__(self, *items: Union[Items, T], no_check=False):
|
|
30
|
-
self.items = {}
|
|
31
|
-
|
|
32
|
-
if len(items) == 1 and isinstance(items[0], dict):
|
|
33
|
-
self.items = items[0]
|
|
34
|
-
else:
|
|
35
|
-
for item in items:
|
|
36
|
-
self.add(item, update_only=True)
|
|
37
|
-
|
|
38
|
-
# Check if the record is constructured
|
|
39
|
-
if not no_check:
|
|
40
|
-
self.validate()
|
|
41
|
-
|
|
42
|
-
def __new__(cls, *items: Union[Items, T], no_check=False):
|
|
43
|
-
# Without this, impossible to pickle objects
|
|
44
|
-
if cls.__trueclass__ is not None:
|
|
45
|
-
record = object.__new__(cls.__trueclass__)
|
|
46
|
-
record.__init__(*items, no_check=True)
|
|
47
|
-
if not no_check:
|
|
48
|
-
record.validate(cls=cls)
|
|
49
|
-
return record
|
|
50
|
-
|
|
51
|
-
return object.__new__(cls)
|
|
52
|
-
|
|
53
|
-
def __str__(self):
|
|
54
|
-
return (
|
|
55
|
-
"{"
|
|
56
|
-
+ ", ".join(f"{key}: {value}" for key, value in self.items.items())
|
|
57
|
-
+ "}"
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
def validate(self, cls: Type["Record"] = None):
|
|
61
|
-
"""Validate the record"""
|
|
62
|
-
cls = cls if cls is not None else self.__class__
|
|
63
|
-
|
|
64
|
-
if cls.itemtypes:
|
|
65
|
-
for itemtype in cls.itemtypes:
|
|
66
|
-
try:
|
|
67
|
-
self.__getitem__(itemtype)
|
|
68
|
-
except KeyError:
|
|
69
|
-
raise KeyError(f"Item of type {itemtype} is missing")
|
|
70
|
-
|
|
71
|
-
if len(self.items) != len(cls.itemtypes):
|
|
72
|
-
unregistered = [
|
|
73
|
-
item
|
|
74
|
-
for item in self.items.values()
|
|
75
|
-
if all(
|
|
76
|
-
not issubclass(item.__get_base__(), itemtype)
|
|
77
|
-
for itemtype in cls.itemtypes
|
|
78
|
-
)
|
|
79
|
-
]
|
|
80
|
-
raise RuntimeError(
|
|
81
|
-
f"The record {cls} contains unregistered items: {unregistered}"
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
def get(self, key: Type[T]) -> Optional[T]:
|
|
85
|
-
try:
|
|
86
|
-
return self[key]
|
|
87
|
-
except KeyError:
|
|
88
|
-
return None
|
|
89
|
-
|
|
90
|
-
def has(self, key: Type[T]) -> bool:
|
|
91
|
-
return key.__get_base__() in self.items
|
|
92
|
-
|
|
93
|
-
def __getitem__(self, key: Type[T]) -> T:
|
|
94
|
-
"""Get an item given its type"""
|
|
95
|
-
base = key.__get_base__()
|
|
96
|
-
entry = self.items[base]
|
|
97
|
-
|
|
98
|
-
# Check if this matches the expected class
|
|
99
|
-
if not isinstance(entry, key):
|
|
100
|
-
raise KeyError(f"No entry with type {key}")
|
|
101
|
-
return entry
|
|
102
|
-
|
|
103
|
-
def add(self, *entries: T, update_only=False, no_check=False) -> "Record":
|
|
104
|
-
"""Update the record with this new entry, returns a new record if
|
|
105
|
-
it exists"""
|
|
106
|
-
|
|
107
|
-
for entry in entries:
|
|
108
|
-
# Returns a new record if the item exists
|
|
109
|
-
base = entry.__get_base__()
|
|
110
|
-
if base in self.items:
|
|
111
|
-
if update_only:
|
|
112
|
-
raise RuntimeError(
|
|
113
|
-
f"The item type {base} ({entry.__class__})"
|
|
114
|
-
" is already in the record"
|
|
115
|
-
)
|
|
116
|
-
return self.__class__({**self.items, base: entry}, no_check=no_check)
|
|
117
|
-
|
|
118
|
-
# No, just update
|
|
119
|
-
self.items[base] = entry
|
|
120
|
-
return self
|
|
121
|
-
|
|
122
|
-
# --- Class methods and variables
|
|
123
|
-
|
|
124
|
-
itemtypes: ClassVar[List[Type[T]]] = []
|
|
125
|
-
"""For specific records, this is the list of types"""
|
|
126
|
-
|
|
127
|
-
__trueclass__: ClassVar[Optional[Type["Record"]]] = None
|
|
128
|
-
"""True when the class is defined in a module"""
|
|
129
|
-
|
|
130
|
-
@classmethod
|
|
131
|
-
def has_type(cls, itemtype: Type[T]):
|
|
132
|
-
return any(issubclass(cls_itemtype, itemtype) for cls_itemtype in cls.itemtypes)
|
|
133
|
-
|
|
134
|
-
@classmethod
|
|
135
|
-
def _subclass(cls, *itemtypes: Type[T]):
|
|
136
|
-
cls_itemtypes = [x for x in getattr(cls, "itemtypes", [])]
|
|
137
|
-
mapping = {
|
|
138
|
-
ix: itemtype.__get_base__() for ix, itemtype in enumerate(cls_itemtypes)
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
for itemtype in itemtypes:
|
|
142
|
-
if ix := mapping.get(itemtype.__get_base__(), None):
|
|
143
|
-
cls_itemtypes[ix] = itemtype
|
|
144
|
-
else:
|
|
145
|
-
cls_itemtypes.append(itemtype)
|
|
146
|
-
return cls_itemtypes
|
|
147
|
-
|
|
148
|
-
@classmethod
|
|
149
|
-
def from_types(cls, name: str, *itemtypes: Type[T], module: str = None):
|
|
150
|
-
extra_dict = {}
|
|
151
|
-
if module:
|
|
152
|
-
extra_dict["__module__"] = module
|
|
153
|
-
return type(
|
|
154
|
-
name,
|
|
155
|
-
(cls,),
|
|
156
|
-
{
|
|
157
|
-
**extra_dict,
|
|
158
|
-
"__trueclass__": cls.__trueclass__ or cls,
|
|
159
|
-
"itemtypes": cls._subclass(*itemtypes),
|
|
160
|
-
},
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
def recordtypes(*types: List[Type[T]]):
|
|
165
|
-
"""Adds types for a new record class"""
|
|
166
|
-
|
|
167
|
-
def decorate(cls: Type[Record]):
|
|
168
|
-
(base_cls,) = [base for base in cls.__bases__ if issubclass(base, Record)]
|
|
169
|
-
|
|
170
|
-
setattr(cls, "itemtypes", base_cls._subclass(*types))
|
|
171
|
-
return cls
|
|
172
|
-
|
|
173
|
-
return decorate
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
class RecordTypesCache:
|
|
177
|
-
"""Class to use when new record types need to be created on the fly by
|
|
178
|
-
adding new items"""
|
|
179
|
-
|
|
180
|
-
def __init__(self, name: str, *itemtypes: Type[T], module: str = None):
|
|
181
|
-
self._module = module
|
|
182
|
-
self._name = name
|
|
183
|
-
self._itemtypes = itemtypes
|
|
184
|
-
self._cache: Dict[Type[Record], Type[Record]] = {}
|
|
185
|
-
|
|
186
|
-
def __getitem__(self, record_type: Type[Record]):
|
|
187
|
-
if updated_type := self._cache.get(record_type, None):
|
|
188
|
-
return updated_type
|
|
189
|
-
|
|
190
|
-
updated_type = record_type.from_types(
|
|
191
|
-
f"{self._name}_{record_type.__name__}",
|
|
192
|
-
*self._itemtypes,
|
|
193
|
-
module=self._module,
|
|
194
|
-
)
|
|
195
|
-
self._cache[record_type] = updated_type
|
|
196
|
-
return updated_type
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
from datamaestro.record import Record, Item, RecordTypesCache, recordtypes
|
|
2
|
-
from attrs import define
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
@define
|
|
7
|
-
class AItem(Item):
|
|
8
|
-
a: int
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@define
|
|
12
|
-
class A1Item(AItem):
|
|
13
|
-
a1: int
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@define
|
|
17
|
-
class BItem(Item):
|
|
18
|
-
b: int
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
@define
|
|
22
|
-
class CItem(Item):
|
|
23
|
-
c: int
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class MyRecord(Record):
|
|
27
|
-
itemtypes = [A1Item, BItem]
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
@recordtypes(CItem)
|
|
31
|
-
class MyRecord2(MyRecord):
|
|
32
|
-
pass
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def test_record_simple():
|
|
36
|
-
a = A1Item(1, 2)
|
|
37
|
-
b = BItem(4)
|
|
38
|
-
r = MyRecord(a, b)
|
|
39
|
-
assert r[AItem] is a
|
|
40
|
-
assert r[A1Item] is a
|
|
41
|
-
assert r[BItem] is b
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def test_record_missing_init():
|
|
45
|
-
with pytest.raises(KeyError):
|
|
46
|
-
MyRecord(AItem(1), BItem(2))
|
|
47
|
-
|
|
48
|
-
with pytest.raises(KeyError):
|
|
49
|
-
MyRecord(A1Item(1, 2))
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def test_record_update():
|
|
53
|
-
a = A1Item(1, 2)
|
|
54
|
-
b = BItem(4)
|
|
55
|
-
r = MyRecord(a, b)
|
|
56
|
-
|
|
57
|
-
r2 = r.add(BItem(3))
|
|
58
|
-
assert r is not r2
|
|
59
|
-
assert r2[BItem] is not b
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def test_record_decorator():
|
|
63
|
-
MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def test_record_newtype():
|
|
67
|
-
MyRecord2 = MyRecord.from_types("MyRecord2", CItem)
|
|
68
|
-
r = MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
|
|
69
|
-
|
|
70
|
-
# For a dynamic class, we should have the same MyRecord type
|
|
71
|
-
assert r.__class__ is MyRecord
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def test_record_onthefly():
|
|
75
|
-
cache = RecordTypesCache("OnTheFly", CItem)
|
|
76
|
-
|
|
77
|
-
MyRecord2 = cache[MyRecord]
|
|
78
|
-
r = MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
|
|
79
|
-
assert r.__class__ is MyRecord
|
|
80
|
-
|
|
81
|
-
assert cache[MyRecord] is MyRecord2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|