datamaestro 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamaestro/record.py CHANGED
@@ -1,4 +1,5 @@
1
- from typing import ClassVar, Type, TypeVar, Dict, List, Union, Optional
1
+ import logging
2
+ from typing import ClassVar, Type, TypeVar, Dict, List, Union, Optional, FrozenSet
2
3
 
3
4
 
4
5
  class Item:
@@ -6,6 +7,7 @@ class Item:
6
7
 
7
8
  @classmethod
8
9
  def __get_base__(cls: Type) -> Type:
10
+ """Get the most generic superclass for this type of item"""
9
11
  if base := getattr(cls, "__base__cache__", None):
10
12
  return base
11
13
 
@@ -22,33 +24,36 @@ Items = Dict[Type[T], T]
22
24
 
23
25
 
24
26
  class Record:
25
- """Associate types with entries"""
27
+ """Associate types with entries
28
+
29
+ A record is a composition of items; each item base class is unique.
30
+ """
26
31
 
27
32
  items: Items
28
33
 
29
- def __init__(self, *items: Union[Items, T], no_check=False):
34
+ def __init__(self, *items: Union[Items, T], override=False):
30
35
  self.items = {}
31
36
 
32
37
  if len(items) == 1 and isinstance(items[0], dict):
38
+ # Just copy the dictionary
33
39
  self.items = items[0]
34
40
  else:
35
- for item in items:
36
- self.add(item, update_only=True)
37
-
38
- # Check if the record is constructured
39
- if not no_check:
40
- self.validate()
41
+ for entry in items:
42
+ # Returns a new record if the item exists
43
+ base = entry.__get_base__()
44
+ if not override and base in self.items:
45
+ raise RuntimeError(
46
+ f"The item type {base} ({entry.__class__})"
47
+ " is already in the record"
48
+ )
49
+ self.items[base] = entry
41
50
 
42
- def __new__(cls, *items: Union[Items, T], no_check=False):
43
- # Without this, impossible to pickle objects
44
- if cls.__trueclass__ is not None:
45
- record = object.__new__(cls.__trueclass__)
46
- record.__init__(*items, no_check=True)
47
- if not no_check:
48
- record.validate(cls=cls)
49
- return record
51
+ self.validate()
50
52
 
51
- return object.__new__(cls)
53
+ @classmethod
54
+ def from_record(cls, record: "Record", *items: T, override=True):
55
+ """Build from another record"""
56
+ return cls({**record.items, **{item.__get_base__(): item for item in items}})
52
57
 
53
58
  def __str__(self):
54
59
  return (
@@ -57,6 +62,21 @@ class Record:
57
62
  + "}"
58
63
  )
59
64
 
65
+ def __reduce__(self):
66
+ cls = self.__class__
67
+ if cls.__trueclass__ is None:
68
+ return (cls.__new__, (cls.__trueclass__ or cls,), {"items": self.items})
69
+
70
+ return (
71
+ cls.__new__,
72
+ (cls.__trueclass__ or cls,),
73
+ {"items": self.items, "itemtypes": self.itemtypes},
74
+ )
75
+
76
+ def __setstate__(self, state):
77
+ self.items = state["items"]
78
+ self.itemtypes = None
79
+
60
80
  def validate(self, cls: Type["Record"] = None):
61
81
  """Validate the record"""
62
82
  cls = cls if cls is not None else self.__class__
@@ -77,17 +97,19 @@ class Record:
77
97
  for itemtype in cls.itemtypes
78
98
  )
79
99
  ]
80
- raise RuntimeError(
100
+ raise KeyError(
81
101
  f"The record {cls} contains unregistered items: {unregistered}"
82
102
  )
83
103
 
84
104
  def get(self, key: Type[T]) -> Optional[T]:
105
+ """Get a given item or None if it does not exist"""
85
106
  try:
86
107
  return self[key]
87
108
  except KeyError:
88
109
  return None
89
110
 
90
111
  def has(self, key: Type[T]) -> bool:
112
+ """Returns True if the record has the given item type"""
91
113
  return key.__get_base__() in self.items
92
114
 
93
115
  def __getitem__(self, key: Type[T]) -> T:
@@ -100,32 +122,27 @@ class Record:
100
122
  raise KeyError(f"No entry with type {key}")
101
123
  return entry
102
124
 
103
- def add(self, *entries: T, update_only=False, no_check=False) -> "Record":
104
- """Update the record with this new entry, returns a new record if
105
- it exists"""
125
+ def is_pickled(self):
126
+ return self.itemtypes is None
106
127
 
107
- for entry in entries:
108
- # Returns a new record if the item exists
109
- base = entry.__get_base__()
110
- if base in self.items:
111
- if update_only:
112
- raise RuntimeError(
113
- f"The item type {base} ({entry.__class__})"
114
- " is already in the record"
115
- )
116
- return self.__class__({**self.items, base: entry}, no_check=no_check)
128
+ def update(self, *items: T) -> "Record":
129
+ """Update some items"""
130
+ # Create our new dictionary
131
+ item_dict = {**self.items}
132
+ for item in items:
133
+ item_dict[item.__get_base__()] = item
117
134
 
118
- # No, just update
119
- self.items[base] = entry
120
- return self
135
+ return self.__class__(item_dict)
121
136
 
122
137
  # --- Class methods and variables
123
138
 
124
- itemtypes: ClassVar[List[Type[T]]] = []
125
- """For specific records, this is the list of types"""
139
+ itemtypes: ClassVar[Optional[FrozenSet[Type[T]]]] = []
140
+ """For specific records, this is the list of types. The value is null when
141
+ no validation is used (e.g. pickled records created on the fly)"""
126
142
 
127
143
  __trueclass__: ClassVar[Optional[Type["Record"]]] = None
128
- """True when the class is defined in a module"""
144
+ """The last class in the type hierarchy corresponding to an actual type,
145
+ i.e. not created on the fly (only defined when the record is pickled)"""
129
146
 
130
147
  @classmethod
131
148
  def has_type(cls, itemtype: Type[T]):
@@ -135,31 +152,52 @@ class Record:
135
152
  def _subclass(cls, *itemtypes: Type[T]):
136
153
  cls_itemtypes = [x for x in getattr(cls, "itemtypes", [])]
137
154
  mapping = {
138
- ix: itemtype.__get_base__() for ix, itemtype in enumerate(cls_itemtypes)
155
+ itemtype.__get_base__(): ix for ix, itemtype in enumerate(cls_itemtypes)
139
156
  }
140
157
 
141
158
  for itemtype in itemtypes:
142
- if ix := mapping.get(itemtype.__get_base__(), None):
159
+ if (ix := mapping.get(itemtype.__get_base__(), -1)) >= 0:
143
160
  cls_itemtypes[ix] = itemtype
144
161
  else:
145
162
  cls_itemtypes.append(itemtype)
146
- return cls_itemtypes
163
+
164
+ return frozenset(cls_itemtypes)
147
165
 
148
166
  @classmethod
149
167
  def from_types(cls, name: str, *itemtypes: Type[T], module: str = None):
168
+ """Construct a new sub-record type
169
+
170
+ :param name: The name of the subrecord
171
+ :param module: The module name, defaults to None
172
+ :return: A new Record type
173
+ """
150
174
  extra_dict = {}
151
175
  if module:
152
176
  extra_dict["__module__"] = module
177
+
153
178
  return type(
154
179
  name,
155
180
  (cls,),
156
181
  {
157
182
  **extra_dict,
183
+ "itemtypes": frozenset(cls._subclass(*itemtypes)),
158
184
  "__trueclass__": cls.__trueclass__ or cls,
159
- "itemtypes": cls._subclass(*itemtypes),
160
185
  },
161
186
  )
162
187
 
188
+ __RECORD_TYPES_CACHE__: Dict[frozenset, Type["Record"]] = {}
189
+
190
+ @staticmethod
191
+ def fromitemtypes(itemtypes: FrozenSet[T]):
192
+ if recordtype := Record.__RECORD_TYPES_CACHE__.get(itemtypes, None):
193
+ return recordtype
194
+
195
+ recordtype = Record.from_types(
196
+ "_".join(itemtype.__name__ for itemtype in itemtypes), *itemtypes
197
+ )
198
+ Record.__RECORD_TYPES_CACHE__[itemtypes] = recordtype
199
+ return recordtype
200
+
163
201
 
164
202
  def recordtypes(*types: List[Type[T]]):
165
203
  """Adds types for a new record class"""
@@ -173,24 +211,102 @@ def recordtypes(*types: List[Type[T]]):
173
211
  return decorate
174
212
 
175
213
 
176
- class RecordTypesCache:
177
- """Class to use when new record types need to be created on the fly by
178
- adding new items"""
179
-
214
+ class RecordTypesCacheBase:
180
215
  def __init__(self, name: str, *itemtypes: Type[T], module: str = None):
181
216
  self._module = module
182
217
  self._name = name
183
218
  self._itemtypes = itemtypes
184
- self._cache: Dict[Type[Record], Type[Record]] = {}
185
-
186
- def __getitem__(self, record_type: Type[Record]):
187
- if updated_type := self._cache.get(record_type, None):
188
- return updated_type
189
219
 
220
+ def _compute(self, record_type: Type[Record]):
190
221
  updated_type = record_type.from_types(
191
222
  f"{self._name}_{record_type.__name__}",
192
223
  *self._itemtypes,
193
224
  module=self._module,
194
225
  )
195
- self._cache[record_type] = updated_type
196
226
  return updated_type
227
+
228
+
229
+ class RecordTypesCache(RecordTypesCacheBase):
230
+ """Class to use when new record types need to be created on the fly by
231
+ adding new items"""
232
+
233
+ def __init__(self, name: str, *itemtypes: Type[T], module: str = None):
234
+ """Creates a new cache
235
+
236
+ :param name: Base name for new record types
237
+ :param module: The module name for new types, defaults to None
238
+ """
239
+ super().__init__(name, *itemtypes, module=module)
240
+ self._cache: Dict[Type[Record], Type[Record]] = {}
241
+ self._warning = False
242
+
243
+ def __call__(self, record_type: Type[Record]):
244
+ if (updated_type := self._cache.get(record_type, None)) is None:
245
+ self._cache[record_type] = updated_type = self._compute(record_type)
246
+ return updated_type
247
+
248
+ def update(self, record: Record, *items: Item, cls=None):
249
+ """Update the record with the given items
250
+
251
+ :param record: The record to which we add items
252
+ :param cls: The class of the record, useful if the record has been
253
+ pickled, defaults to None
254
+ :return: A new record with the extra items
255
+ """
256
+ if cls is None:
257
+ cls = record.__class__
258
+ if record.is_pickled() and not self._warning:
259
+ logging.warning(
260
+ "Updating unpickled records is not recommended"
261
+ " (speed issues): use the pickle record class as the cls input"
262
+ )
263
+ itemtypes = frozenset(type(item) for item in record.items.values())
264
+ cls = Record.fromitemtypes(itemtypes)
265
+ else:
266
+ assert (
267
+ record.is_pickled()
268
+ ), "cls can be used only when the record as been pickled"
269
+
270
+ return self(cls)(*record.items.values(), *items, override=True)
271
+
272
+
273
+ class SingleRecordTypeCache(RecordTypesCacheBase):
274
+ """Class to use when new record types need to be created on the fly by
275
+ adding new items
276
+
277
+ This class supposes that the input record type is always the same (no check
278
+ is done to ensure this)"""
279
+
280
+ def __init__(self, name: str, *itemtypes: Type[T], module: str = None):
281
+ """Creates a new cache
282
+
283
+ :param name: Base name for new record types
284
+ :param module: The module name for new types, defaults to None
285
+ """
286
+ super().__init__(name, *itemtypes, module=module)
287
+ self._cache: Optional[Type[Record]] = None
288
+
289
+ def __call__(self, record_type: Type[Record]):
290
+ if self._cache is None:
291
+ self._cache = self._compute(record_type)
292
+ return self._cache
293
+
294
+ def update(self, record: Record, *items: Item, cls=None):
295
+ """Update the record with the given items
296
+
297
+ :param record: The record to which we add items
298
+ :param cls: The class of the record, useful if the record has been
299
+ pickled, defaults to None
300
+ :return: A new record with the extra items
301
+ """
302
+ if self._cache is None:
303
+ if cls is None:
304
+ cls = record.__class__
305
+ itemtypes = frozenset(type(item) for item in record.items.values())
306
+ cls = Record.fromitemtypes(itemtypes)
307
+ else:
308
+ assert (
309
+ record.is_pickled()
310
+ ), "cls can be used only when the record as been pickled"
311
+
312
+ return self(cls)(*record.items.values(), *items, override=True)
@@ -1,4 +1,11 @@
1
- from datamaestro.record import Record, Item, RecordTypesCache, recordtypes
1
+ import pickle
2
+ from datamaestro.record import (
3
+ Record,
4
+ Item,
5
+ RecordTypesCache,
6
+ recordtypes,
7
+ SingleRecordTypeCache,
8
+ )
2
9
  from attrs import define
3
10
  import pytest
4
11
 
@@ -18,13 +25,24 @@ class BItem(Item):
18
25
  b: int
19
26
 
20
27
 
28
+ @define
29
+ class B1Item(BItem):
30
+ b1: int
31
+
32
+
21
33
  @define
22
34
  class CItem(Item):
23
35
  c: int
24
36
 
25
37
 
26
- class MyRecord(Record):
27
- itemtypes = [A1Item, BItem]
38
+ @recordtypes(A1Item)
39
+ class BaseRecord(Record):
40
+ ...
41
+
42
+
43
+ @recordtypes(BItem)
44
+ class MyRecord(BaseRecord):
45
+ ...
28
46
 
29
47
 
30
48
  @recordtypes(CItem)
@@ -43,6 +61,7 @@ def test_record_simple():
43
61
 
44
62
  def test_record_missing_init():
45
63
  with pytest.raises(KeyError):
64
+ # A1Item is missing
46
65
  MyRecord(AItem(1), BItem(2))
47
66
 
48
67
  with pytest.raises(KeyError):
@@ -54,28 +73,79 @@ def test_record_update():
54
73
  b = BItem(4)
55
74
  r = MyRecord(a, b)
56
75
 
57
- r2 = r.add(BItem(3))
76
+ r2 = r.update(BItem(3))
58
77
  assert r is not r2
59
78
  assert r2[BItem] is not b
60
79
 
80
+ r3 = MyRecord2.from_record(r, CItem(2), BItem(5))
81
+ assert r[BItem].b == 4
82
+ assert r3[BItem].b == 5
83
+
61
84
 
62
85
  def test_record_decorator():
63
86
  MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
64
87
 
65
88
 
66
- def test_record_newtype():
67
- MyRecord2 = MyRecord.from_types("MyRecord2", CItem)
68
- r = MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
69
-
70
- # For a dynamic class, we should have the same MyRecord type
71
- assert r.__class__ is MyRecord
89
+ def test_record_type_update():
90
+ itemtypes = MyRecord2.from_types("Test", B1Item).itemtypes
91
+ assert itemtypes == frozenset((A1Item, B1Item, CItem))
72
92
 
73
93
 
74
94
  def test_record_onthefly():
75
95
  cache = RecordTypesCache("OnTheFly", CItem)
76
96
 
77
- MyRecord2 = cache[MyRecord]
78
- r = MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
79
- assert r.__class__ is MyRecord
97
+ MyRecord2 = cache(MyRecord)
98
+ MyRecord2(A1Item(1, 2), BItem(2), CItem(3))
99
+
100
+ assert cache(MyRecord) is MyRecord2
101
+
102
+ r = MyRecord(A1Item(1, 2), BItem(2))
103
+ assert cache(r.__class__) is MyRecord2
104
+
105
+ r = cache.update(r, CItem(3))
106
+
107
+ # Same record type
108
+ cache2 = RecordTypesCache("OnTheFly", CItem)
109
+
110
+ cache2.update(r, CItem(4))
111
+
112
+
113
+ def test_record_pickled():
114
+ # First,
115
+ MyRecord2 = BaseRecord.from_types("MyRecordBis", BItem)
116
+ r = MyRecord2(A1Item(1, 2), BItem(2))
117
+ r = pickle.loads(pickle.dumps(r))
118
+
119
+ assert isinstance(r, BaseRecord) and not isinstance(r, MyRecord2)
120
+ cache = RecordTypesCache("OnTheFly", CItem)
121
+
122
+ assert r.is_pickled()
123
+
124
+ r2 = cache.update(r, CItem(4))
125
+ assert not r2.is_pickled()
126
+
127
+ # Test with cls update
128
+ with pytest.raises(KeyError):
129
+ cache.update(r, CItem(4), cls=BaseRecord)
130
+
131
+ # This is OK
132
+ cache.update(r, CItem(4), cls=MyRecord)
133
+
134
+ # --- Test when we update a pickled record with an of a sub-class
135
+ cache = RecordTypesCache("OnTheFly", B1Item)
136
+ r2 = cache.update(r, B1Item(1, 2))
137
+
138
+
139
+ def test_record_pickled_single():
140
+ MyRecord2 = BaseRecord.from_types("MyRecordBis", BItem)
141
+ r = MyRecord2(A1Item(1, 2), BItem(2))
142
+ r = pickle.loads(pickle.dumps(r))
143
+
144
+ cache = SingleRecordTypeCache("OnTheFly", CItem)
145
+
146
+ updated = cache.update(r, CItem(4))
147
+
148
+ assert updated.itemtypes == frozenset((A1Item, BItem, CItem))
80
149
 
81
- assert cache[MyRecord] is MyRecord2
150
+ # Even with the wrong record, no change now
151
+ assert cache(BaseRecord).itemtypes == frozenset((A1Item, BItem, CItem))
datamaestro/version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.0.4'
16
- __version_tuple__ = version_tuple = (1, 0, 4)
15
+ __version__ = version = '1.0.6'
16
+ __version_tuple__ = version_tuple = (1, 0, 6)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamaestro
3
- Version: 1.0.4
3
+ Version: 1.0.6
4
4
  Summary: "Dataset management command line and API"
5
5
  Home-page: https://github.com/experimaestro/datamaestro
6
6
  Author: Benjamin Piwowarski
@@ -2,13 +2,13 @@ datamaestro/__init__.py,sha256=9M5hA6FVngduJBcjInvJWQM8n0cqapXAFPzfRLHR74c,237
2
2
  datamaestro/__main__.py,sha256=tJTf1sTWKRIatvBcHlWDIZRZodAZ2B2zkD01pD89MYk,9024
3
3
  datamaestro/context.py,sha256=imDAs9v5yR_O871cdM4e5Q0KBSHmT-j5qc83sz6npWI,13210
4
4
  datamaestro/definitions.py,sha256=ORlD3kxLvmw1EScCepzGQOHjsTiC3yvY-z_7LxKDp08,15415
5
- datamaestro/record.py,sha256=hIOHD4ere9C2gDCi_FQM4lKZRVZ5lQU43JhqRXLKT_s,6052
5
+ datamaestro/record.py,sha256=gGOJAKHKc3SN3zctVZ6Bq21cuLnWymc2cE9ukeyrZzs,10475
6
6
  datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
7
7
  datamaestro/search.py,sha256=PMceNp5hcp0dlzs4cLb6LJT7XHrdXo58oO7oTucawbE,2887
8
8
  datamaestro/settings.py,sha256=HYSElTUYZ6DZocBb9o3ifm6WW9knRO64XJUwxGIpvwQ,1304
9
9
  datamaestro/sphinx.py,sha256=bp7x_2BFoTSwTqcVZDM8R8cWa7G2pz0Zb8GS054lLYM,6996
10
10
  datamaestro/utils.py,sha256=Y3_aqeOHW8vuifwggGWJfgONyDG1FLX7ONAnX85jENI,6511
11
- datamaestro/version.py,sha256=9acUHRb1fq-uv5dr49LlEh189daKw4-SeypW5NxDolA,411
11
+ datamaestro/version.py,sha256=T17ZEPR5Omt5RLA0TuZWTufRi58PzGIPek75nFcuMQY,411
12
12
  datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
13
13
  datamaestro/annotations/agreement.py,sha256=IPHjXX8ld5blvSDNXGt8RfuHDgVAITN52gJZh93AY4g,723
14
14
  datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -37,10 +37,10 @@ datamaestro/test/checks.py,sha256=1eTkz4YJhAPOcnQSsz4vPnvzwwfrEnpn6H_s1ADISpo,17
37
37
  datamaestro/test/conftest.py,sha256=it4S5Qq1CA_U8qM0pr4m7v-1dhLj5Y49WjVg5Ee3mpM,767
38
38
  datamaestro/test/test_annotations.py,sha256=kRPUmS_UAN6JSSVPUwV4OM_LEuEUHF1OcLSiYXjsKjw,246
39
39
  datamaestro/test/test_download_handlers.py,sha256=Qqm-fML1KVp6dPwAUcH6xzi_dpQIshvROzviSYCUzc0,603
40
- datamaestro/test/test_record.py,sha256=QngOHfx6QDOPepgj-nUcCmFqmweVAacllKw-EENb3Qk,1453
41
- datamaestro-1.0.4.dist-info/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
42
- datamaestro-1.0.4.dist-info/METADATA,sha256=X94mSPdOtOUQmQWYmlIToSStQzp81Q37JMSdC8VFXhw,8999
43
- datamaestro-1.0.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
44
- datamaestro-1.0.4.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
45
- datamaestro-1.0.4.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
46
- datamaestro-1.0.4.dist-info/RECORD,,
40
+ datamaestro/test/test_record.py,sha256=zo8faY8xqRRE916mXIGaq-WXhhvQrNFmDJTzB3CCFEk,3050
41
+ datamaestro-1.0.6.dist-info/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
42
+ datamaestro-1.0.6.dist-info/METADATA,sha256=U_fiwzaUSjEV5s4DOp3dRn9MzXesAkQ2o_ZpXCZIW8Y,8999
43
+ datamaestro-1.0.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
44
+ datamaestro-1.0.6.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
45
+ datamaestro-1.0.6.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
46
+ datamaestro-1.0.6.dist-info/RECORD,,