datamaestro 1.0.5__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamaestro/record.py CHANGED
@@ -1,4 +1,5 @@
1
- from typing import ClassVar, Type, TypeVar, Dict, List, Union, Optional
1
+ import logging
2
+ from typing import ClassVar, Type, TypeVar, Dict, List, Union, Optional, FrozenSet
2
3
 
3
4
 
4
5
  class Item:
@@ -6,6 +7,7 @@ class Item:
6
7
 
7
8
  @classmethod
8
9
  def __get_base__(cls: Type) -> Type:
10
+ """Get the most generic superclass for this type of item"""
9
11
  if base := getattr(cls, "__base__cache__", None):
10
12
  return base
11
13
 
@@ -22,33 +24,36 @@ Items = Dict[Type[T], T]
22
24
 
23
25
 
24
26
  class Record:
25
- """Associate types with entries"""
27
+ """Associate types with entries
28
+
29
+ A record is a composition of items; each item base class is unique.
30
+ """
26
31
 
27
32
  items: Items
28
33
 
29
- def __init__(self, *items: Union[Items, T], no_check=False):
34
+ def __init__(self, *items: Union[Items, T], override=False):
30
35
  self.items = {}
31
36
 
32
37
  if len(items) == 1 and isinstance(items[0], dict):
38
+ # Just copy the dictionary
33
39
  self.items = items[0]
34
40
  else:
35
- for item in items:
36
- self._add(item, update_only=True)
37
-
38
- # Check if the record is constructured
39
- if not no_check:
40
- self.validate()
41
+ for entry in items:
42
+ # Returns a new record if the item exists
43
+ base = entry.__get_base__()
44
+ if not override and base in self.items:
45
+ raise RuntimeError(
46
+ f"The item type {base} ({entry.__class__})"
47
+ " is already in the record"
48
+ )
49
+ self.items[base] = entry
41
50
 
42
- def __new__(cls, *items: Union[Items, T], no_check=False):
43
- # Without this, impossible to pickle objects
44
- if cls.__trueclass__ is not None:
45
- record = object.__new__(cls.__trueclass__)
46
- record.__init__(*items, no_check=True)
47
- if not no_check:
48
- record.validate(cls=cls)
49
- return record
51
+ self.validate()
50
52
 
51
- return object.__new__(cls)
53
+ @classmethod
54
+ def from_record(cls, record: "Record", *items: T, override=True):
55
+ """Build from another record"""
56
+ return cls({**record.items, **{item.__get_base__(): item for item in items}})
52
57
 
53
58
  def __str__(self):
54
59
  return (
@@ -57,6 +62,21 @@ class Record:
57
62
  + "}"
58
63
  )
59
64
 
65
+ def __reduce__(self):
66
+ cls = self.__class__
67
+ if cls.__trueclass__ is None:
68
+ return (cls.__new__, (cls.__trueclass__ or cls,), {"items": self.items})
69
+
70
+ return (
71
+ cls.__new__,
72
+ (cls.__trueclass__ or cls,),
73
+ {"items": self.items, "itemtypes": self.itemtypes},
74
+ )
75
+
76
+ def __setstate__(self, state):
77
+ self.items = state["items"]
78
+ self.itemtypes = None
79
+
60
80
  def validate(self, cls: Type["Record"] = None):
61
81
  """Validate the record"""
62
82
  cls = cls if cls is not None else self.__class__
@@ -77,17 +97,19 @@ class Record:
77
97
  for itemtype in cls.itemtypes
78
98
  )
79
99
  ]
80
- raise RuntimeError(
100
+ raise KeyError(
81
101
  f"The record {cls} contains unregistered items: {unregistered}"
82
102
  )
83
103
 
84
104
  def get(self, key: Type[T]) -> Optional[T]:
105
+ """Get a given item or None if it does not exist"""
85
106
  try:
86
107
  return self[key]
87
108
  except KeyError:
88
109
  return None
89
110
 
90
111
  def has(self, key: Type[T]) -> bool:
112
+ """Returns True if the record has the given item type"""
91
113
  return key.__get_base__() in self.items
92
114
 
93
115
  def __getitem__(self, key: Type[T]) -> T:
@@ -100,35 +122,27 @@ class Record:
100
122
  raise KeyError(f"No entry with type {key}")
101
123
  return entry
102
124
 
103
- def add(self, *entries: T, update_only=False, no_check=False) -> "Record":
104
- """Update the record with these new items, and returns a new record if
105
- any item already exists"""
106
- return self._add(*entries, update_only=update_only)
107
-
108
- def _add(self, *entries: T, update_only=False, no_check=False) -> "Record":
109
- """Internal method for updating records"""
110
- for entry in entries:
111
- # Returns a new record if the item exists
112
- base = entry.__get_base__()
113
- if base in self.items:
114
- if update_only:
115
- raise RuntimeError(
116
- f"The item type {base} ({entry.__class__})"
117
- " is already in the record"
118
- )
119
- return self.__class__({**self.items, base: entry}, no_check=no_check)
125
+ def is_pickled(self):
126
+ return self.itemtypes is None
120
127
 
121
- # No, just update
122
- self.items[base] = entry
123
- return self
128
+ def update(self, *items: T) -> "Record":
129
+ """Update some items"""
130
+ # Create our new dictionary
131
+ item_dict = {**self.items}
132
+ for item in items:
133
+ item_dict[item.__get_base__()] = item
134
+
135
+ return self.__class__(item_dict)
124
136
 
125
137
  # --- Class methods and variables
126
138
 
127
- itemtypes: ClassVar[List[Type[T]]] = []
128
- """For specific records, this is the list of types"""
139
+ itemtypes: ClassVar[Optional[FrozenSet[Type[T]]]] = []
140
+ """For specific records, this is the list of types. The value is null when
141
+ no validation is used (e.g. pickled records created on the fly)"""
129
142
 
130
143
  __trueclass__: ClassVar[Optional[Type["Record"]]] = None
131
- """True when the class is defined in a module"""
144
+ """The last class in the type hierarchy corresponding to an actual type,
145
+ i.e. not created on the fly (only defined when the record is pickled)"""
132
146
 
133
147
  @classmethod
134
148
  def has_type(cls, itemtype: Type[T]):
@@ -138,31 +152,52 @@ class Record:
138
152
  def _subclass(cls, *itemtypes: Type[T]):
139
153
  cls_itemtypes = [x for x in getattr(cls, "itemtypes", [])]
140
154
  mapping = {
141
- ix: itemtype.__get_base__() for ix, itemtype in enumerate(cls_itemtypes)
155
+ itemtype.__get_base__(): ix for ix, itemtype in enumerate(cls_itemtypes)
142
156
  }
143
157
 
144
158
  for itemtype in itemtypes:
145
- if ix := mapping.get(itemtype.__get_base__(), None):
159
+ if (ix := mapping.get(itemtype.__get_base__(), -1)) >= 0:
146
160
  cls_itemtypes[ix] = itemtype
147
161
  else:
148
162
  cls_itemtypes.append(itemtype)
149
- return cls_itemtypes
163
+
164
+ return frozenset(cls_itemtypes)
150
165
 
151
166
  @classmethod
152
167
  def from_types(cls, name: str, *itemtypes: Type[T], module: str = None):
168
+ """Construct a new sub-record type
169
+
170
+ :param name: The name of the subrecord
171
+ :param module: The module name, defaults to None
172
+ :return: A new Record type
173
+ """
153
174
  extra_dict = {}
154
175
  if module:
155
176
  extra_dict["__module__"] = module
177
+
156
178
  return type(
157
179
  name,
158
180
  (cls,),
159
181
  {
160
182
  **extra_dict,
183
+ "itemtypes": frozenset(cls._subclass(*itemtypes)),
161
184
  "__trueclass__": cls.__trueclass__ or cls,
162
- "itemtypes": cls._subclass(*itemtypes),
163
185
  },
164
186
  )
165
187
 
188
+ __RECORD_TYPES_CACHE__: Dict[frozenset, Type["Record"]] = {}
189
+
190
+ @staticmethod
191
+ def fromitemtypes(itemtypes: FrozenSet[T]):
192
+ if recordtype := Record.__RECORD_TYPES_CACHE__.get(itemtypes, None):
193
+ return recordtype
194
+
195
+ recordtype = Record.from_types(
196
+ "_".join(itemtype.__name__ for itemtype in itemtypes), *itemtypes
197
+ )
198
+ Record.__RECORD_TYPES_CACHE__[itemtypes] = recordtype
199
+ return recordtype
200
+
166
201
 
167
202
  def recordtypes(*types: List[Type[T]]):
168
203
  """Adds types for a new record class"""
@@ -176,27 +211,102 @@ def recordtypes(*types: List[Type[T]]):
176
211
  return decorate
177
212
 
178
213
 
179
- class RecordTypesCache:
180
- """Class to use when new record types need to be created on the fly by
181
- adding new items"""
182
-
214
+ class RecordTypesCacheBase:
183
215
  def __init__(self, name: str, *itemtypes: Type[T], module: str = None):
184
216
  self._module = module
185
217
  self._name = name
186
218
  self._itemtypes = itemtypes
187
- self._cache: Dict[Type[Record], Type[Record]] = {}
188
-
189
- def __getitem__(self, record_type: Type[Record]):
190
- if updated_type := self._cache.get(record_type, None):
191
- return updated_type
192
219
 
220
+ def _compute(self, record_type: Type[Record]):
193
221
  updated_type = record_type.from_types(
194
222
  f"{self._name}_{record_type.__name__}",
195
223
  *self._itemtypes,
196
224
  module=self._module,
197
225
  )
198
- self._cache[record_type] = updated_type
199
226
  return updated_type
200
227
 
201
- def update(self, record: Record, *items: Item):
202
- return self[record.__class__](*record.items.values(), *items)
228
+
229
+ class RecordTypesCache(RecordTypesCacheBase):
230
+ """Class to use when new record types need to be created on the fly by
231
+ adding new items"""
232
+
233
+ def __init__(self, name: str, *itemtypes: Type[T], module: str = None):
234
+ """Creates a new cache
235
+
236
+ :param name: Base name for new record types
237
+ :param module: The module name for new types, defaults to None
238
+ """
239
+ super().__init__(name, *itemtypes, module=module)
240
+ self._cache: Dict[Type[Record], Type[Record]] = {}
241
+ self._warning = False
242
+
243
+ def __call__(self, record_type: Type[Record]):
244
+ if (updated_type := self._cache.get(record_type, None)) is None:
245
+ self._cache[record_type] = updated_type = self._compute(record_type)
246
+ return updated_type
247
+
248
+ def update(self, record: Record, *items: Item, cls=None):
249
+ """Update the record with the given items
250
+
251
+ :param record: The record to which we add items
252
+ :param cls: The class of the record, useful if the record has been
253
+ pickled, defaults to None
254
+ :return: A new record with the extra items
255
+ """
256
+ if cls is None:
257
+ cls = record.__class__
258
+ if record.is_pickled() and not self._warning:
259
+ logging.warning(
260
+ "Updating unpickled records is not recommended"
261
+ " (speed issues): use the pickle record class as the cls input"
262
+ )
263
+ itemtypes = frozenset(type(item) for item in record.items.values())
264
+ cls = Record.fromitemtypes(itemtypes)
265
+ else:
266
+ assert (
267
+ record.is_pickled()
268
+ ), "cls can be used only when the record as been pickled"
269
+
270
+ return self(cls)(*record.items.values(), *items, override=True)
271
+
272
+
273
+ class SingleRecordTypeCache(RecordTypesCacheBase):
274
+ """Class to use when new record types need to be created on the fly by
275
+ adding new items
276
+
277
+ This class supposes that the input record type is always the same (no check
278
+ is done to ensure this)"""
279
+
280
+ def __init__(self, name: str, *itemtypes: Type[T], module: str = None):
281
+ """Creates a new cache
282
+
283
+ :param name: Base name for new record types
284
+ :param module: The module name for new types, defaults to None
285
+ """
286
+ super().__init__(name, *itemtypes, module=module)
287
+ self._cache: Optional[Type[Record]] = None
288
+
289
+ def __call__(self, record_type: Type[Record]):
290
+ if self._cache is None:
291
+ self._cache = self._compute(record_type)
292
+ return self._cache
293
+
294
+ def update(self, record: Record, *items: Item, cls=None):
295
+ """Update the record with the given items
296
+
297
+ :param record: The record to which we add items
298
+ :param cls: The class of the record, useful if the record has been
299
+ pickled, defaults to None
300
+ :return: A new record with the extra items
301
+ """
302
+ if self._cache is None:
303
+ if cls is None:
304
+ cls = record.__class__
305
+ itemtypes = frozenset(type(item) for item in record.items.values())
306
+ cls = Record.fromitemtypes(itemtypes)
307
+ else:
308
+ assert (
309
+ record.is_pickled()
310
+ ), "cls can be used only when the record as been pickled"
311
+
312
+ return self(cls)(*record.items.values(), *items, override=True)
@@ -1,4 +1,11 @@
1
- from datamaestro.record import Record, Item, RecordTypesCache, recordtypes
1
+ import pickle
2
+ from datamaestro.record import (
3
+ Record,
4
+ Item,
5
+ RecordTypesCache,
6
+ recordtypes,
7
+ SingleRecordTypeCache,
8
+ )
2
9
  from attrs import define
3
10
  import pytest
4
11
 
@@ -18,13 +25,24 @@ class BItem(Item):
18
25
  b: int
19
26
 
20
27
 
28
+ @define
29
+ class B1Item(BItem):
30
+ b1: int
31
+
32
+
21
33
  @define
22
34
  class CItem(Item):
23
35
  c: int
24
36
 
25
37
 
26
- class MyRecord(Record):
27
- itemtypes = [A1Item, BItem]
38
+ @recordtypes(A1Item)
39
+ class BaseRecord(Record):
40
+ ...
41
+
42
+
43
+ @recordtypes(BItem)
44
+ class MyRecord(BaseRecord):
45
+ ...
28
46
 
29
47
 
30
48
  @recordtypes(CItem)
@@ -43,6 +61,7 @@ def test_record_simple():
43
61
 
44
62
  def test_record_missing_init():
45
63
  with pytest.raises(KeyError):
64
+ # A1Item is missing
46
65
  MyRecord(AItem(1), BItem(2))
47
66
 
48
67
  with pytest.raises(KeyError):
@@ -54,33 +73,79 @@ def test_record_update():
54
73
  b = BItem(4)
55
74
  r = MyRecord(a, b)
56
75
 
57
- r2 = r.add(BItem(3))
76
+ r2 = r.update(BItem(3))
58
77
  assert r is not r2
59
78
  assert r2[BItem] is not b
60
79
 
80
+ r3 = MyRecord2.from_record(r, CItem(2), BItem(5))
81
+ assert r[BItem].b == 4
82
+ assert r3[BItem].b == 5
83
+
61
84
 
62
85
  def test_record_decorator():
63
86
  MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
64
87
 
65
88
 
66
- def test_record_newtype():
67
- MyRecord2 = MyRecord.from_types("MyRecord2", CItem)
68
- r = MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
69
-
70
- # For a dynamic class, we should have the same MyRecord type
71
- assert r.__class__ is MyRecord
89
+ def test_record_type_update():
90
+ itemtypes = MyRecord2.from_types("Test", B1Item).itemtypes
91
+ assert itemtypes == frozenset((A1Item, B1Item, CItem))
72
92
 
73
93
 
74
94
  def test_record_onthefly():
75
95
  cache = RecordTypesCache("OnTheFly", CItem)
76
96
 
77
- MyRecord2 = cache[MyRecord]
78
- r2 = MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
79
- assert r2.__class__ is MyRecord
97
+ MyRecord2 = cache(MyRecord)
98
+ MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
80
99
 
81
- assert cache[MyRecord] is MyRecord2
100
+ assert cache(MyRecord) is MyRecord2
82
101
 
83
102
  r = MyRecord(A1Item(1, 2), BItem(2))
84
- assert cache[r.__class__] is MyRecord2
103
+ assert cache(r.__class__) is MyRecord2
104
+
105
+ r = cache.update(r, CItem(3))
106
+
107
+ # Same record type
108
+ cache2 = RecordTypesCache("OnTheFly", CItem)
109
+
110
+ cache2.update(r, CItem(4))
111
+
112
+
113
+ def test_record_pickled():
114
+ # First,
115
+ MyRecord2 = BaseRecord.from_types("MyRecordBis", BItem)
116
+ r = MyRecord2(A1Item(1, 2), BItem(2))
117
+ r = pickle.loads(pickle.dumps(r))
118
+
119
+ assert isinstance(r, BaseRecord) and not isinstance(r, MyRecord2)
120
+ cache = RecordTypesCache("OnTheFly", CItem)
121
+
122
+ assert r.is_pickled()
123
+
124
+ r2 = cache.update(r, CItem(4))
125
+ assert not r2.is_pickled()
126
+
127
+ # Test with cls update
128
+ with pytest.raises(KeyError):
129
+ cache.update(r, CItem(4), cls=BaseRecord)
130
+
131
+ # This is OK
132
+ cache.update(r, CItem(4), cls=MyRecord)
133
+
134
+ # --- Test when we update a pickled record with an of a sub-class
135
+ cache = RecordTypesCache("OnTheFly", B1Item)
136
+ r2 = cache.update(r, B1Item(1, 2))
137
+
138
+
139
+ def test_record_pickled_single():
140
+ MyRecord2 = BaseRecord.from_types("MyRecordBis", BItem)
141
+ r = MyRecord2(A1Item(1, 2), BItem(2))
142
+ r = pickle.loads(pickle.dumps(r))
143
+
144
+ cache = SingleRecordTypeCache("OnTheFly", CItem)
145
+
146
+ updated = cache.update(r, CItem(4))
147
+
148
+ assert updated.itemtypes == frozenset((A1Item, BItem, CItem))
85
149
 
86
- cache.update(r, CItem(3))
150
+ # Even with the wrong record, no change now
151
+ assert cache(BaseRecord).itemtypes == frozenset((A1Item, BItem, CItem))
datamaestro/version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.0.5'
16
- __version_tuple__ = version_tuple = (1, 0, 5)
15
+ __version__ = version = '1.0.6'
16
+ __version_tuple__ = version_tuple = (1, 0, 6)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamaestro
3
- Version: 1.0.5
3
+ Version: 1.0.6
4
4
  Summary: "Dataset management command line and API"
5
5
  Home-page: https://github.com/experimaestro/datamaestro
6
6
  Author: Benjamin Piwowarski
@@ -2,13 +2,13 @@ datamaestro/__init__.py,sha256=9M5hA6FVngduJBcjInvJWQM8n0cqapXAFPzfRLHR74c,237
2
2
  datamaestro/__main__.py,sha256=tJTf1sTWKRIatvBcHlWDIZRZodAZ2B2zkD01pD89MYk,9024
3
3
  datamaestro/context.py,sha256=imDAs9v5yR_O871cdM4e5Q0KBSHmT-j5qc83sz6npWI,13210
4
4
  datamaestro/definitions.py,sha256=ORlD3kxLvmw1EScCepzGQOHjsTiC3yvY-z_7LxKDp08,15415
5
- datamaestro/record.py,sha256=xFFyXRbV_cm52o3KUDFN0G7TseRgqO7IpFfIAJlmCh0,6386
5
+ datamaestro/record.py,sha256=gGOJAKHKc3SN3zctVZ6Bq21cuLnWymc2cE9ukeyrZzs,10475
6
6
  datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
7
7
  datamaestro/search.py,sha256=PMceNp5hcp0dlzs4cLb6LJT7XHrdXo58oO7oTucawbE,2887
8
8
  datamaestro/settings.py,sha256=HYSElTUYZ6DZocBb9o3ifm6WW9knRO64XJUwxGIpvwQ,1304
9
9
  datamaestro/sphinx.py,sha256=bp7x_2BFoTSwTqcVZDM8R8cWa7G2pz0Zb8GS054lLYM,6996
10
10
  datamaestro/utils.py,sha256=Y3_aqeOHW8vuifwggGWJfgONyDG1FLX7ONAnX85jENI,6511
11
- datamaestro/version.py,sha256=ClFIlbf5O23dVWC6oD6t3m2n6nB22IOt3V9t-YX_mG0,411
11
+ datamaestro/version.py,sha256=T17ZEPR5Omt5RLA0TuZWTufRi58PzGIPek75nFcuMQY,411
12
12
  datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
13
13
  datamaestro/annotations/agreement.py,sha256=IPHjXX8ld5blvSDNXGt8RfuHDgVAITN52gJZh93AY4g,723
14
14
  datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -37,10 +37,10 @@ datamaestro/test/checks.py,sha256=1eTkz4YJhAPOcnQSsz4vPnvzwwfrEnpn6H_s1ADISpo,17
37
37
  datamaestro/test/conftest.py,sha256=it4S5Qq1CA_U8qM0pr4m7v-1dhLj5Y49WjVg5Ee3mpM,767
38
38
  datamaestro/test/test_annotations.py,sha256=kRPUmS_UAN6JSSVPUwV4OM_LEuEUHF1OcLSiYXjsKjw,246
39
39
  datamaestro/test/test_download_handlers.py,sha256=Qqm-fML1KVp6dPwAUcH6xzi_dpQIshvROzviSYCUzc0,603
40
- datamaestro/test/test_record.py,sha256=78wgQoVyXiej0MmaDwHPTlqXh5bqlWU6AHTTY89qLPU,1571
41
- datamaestro-1.0.5.dist-info/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
42
- datamaestro-1.0.5.dist-info/METADATA,sha256=7dr7-Q8ld_KY5IUkv3dwKamBmG0vqns1u6wbW9wXEPA,8999
43
- datamaestro-1.0.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
44
- datamaestro-1.0.5.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
45
- datamaestro-1.0.5.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
46
- datamaestro-1.0.5.dist-info/RECORD,,
40
+ datamaestro/test/test_record.py,sha256=zo8faY8xqRRE916mXIGaq-WXhhvQrNFmDJTzB3CCFEk,3050
41
+ datamaestro-1.0.6.dist-info/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
42
+ datamaestro-1.0.6.dist-info/METADATA,sha256=U_fiwzaUSjEV5s4DOp3dRn9MzXesAkQ2o_ZpXCZIW8Y,8999
43
+ datamaestro-1.0.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
44
+ datamaestro-1.0.6.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
45
+ datamaestro-1.0.6.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
46
+ datamaestro-1.0.6.dist-info/RECORD,,