labfreed 0.0.8__py2.py3-none-any.whl → 0.0.10__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- labfreed/DisplayNameExtension/DisplayNameExtension.py +6 -3
- labfreed/PAC_CAT/data_model copy.py +232 -0
- labfreed/PAC_CAT/data_model.py +319 -59
- labfreed/PAC_ID/data_model.py +89 -127
- labfreed/PAC_ID/extensions.py +55 -0
- labfreed/TREX/UneceUnits.json +33730 -0
- labfreed/TREX/data_model.py +789 -0
- labfreed/{TREXExtension → TREX}/parse.py +23 -16
- labfreed/TREX/unece_units.py +106 -0
- labfreed/__init__.py +1 -1
- labfreed/parse_pac.py +189 -0
- labfreed/{DisplayNameExtension → utilities}/base36.py +29 -13
- labfreed/utilities/extension_intertpreters.py +4 -0
- labfreed/utilities/utility_types.py +103 -0
- labfreed/{PAC_ID/well_known_segment_keys.py → utilities/well_known_keys.py} +1 -1
- labfreed/validation.py +117 -39
- {labfreed-0.0.8.dist-info → labfreed-0.0.10.dist-info}/METADATA +1 -1
- labfreed-0.0.10.dist-info/RECORD +22 -0
- labfreed/PAC_ID/parse.py +0 -142
- labfreed/PAC_ID/serialize.py +0 -60
- labfreed/TREXExtension/data_model.py +0 -239
- labfreed/TREXExtension/uncertainty.py +0 -32
- labfreed/TREXExtension/unit_utilities.py +0 -143
- labfreed-0.0.8.dist-info/RECORD +0 -19
- {labfreed-0.0.8.dist-info → labfreed-0.0.10.dist-info}/WHEEL +0 -0
- {labfreed-0.0.8.dist-info → labfreed-0.0.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,789 @@
|
|
|
1
|
+
from datetime import date, datetime, time
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
from collections import Counter
|
|
5
|
+
from typing import Annotated, Literal
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
from pydantic import PrivateAttr, RootModel, ValidationError, field_validator, model_validator, Field
|
|
9
|
+
from labfreed.TREX.unece_units import unece_unit, unece_unit_codes, unece_units, unit_name, unit_symbol
|
|
10
|
+
from labfreed.utilities.utility_types import DataTable, Quantity, Unit, unece_unit_code_from_quantity
|
|
11
|
+
from labfreed.validation import BaseModelWithValidationMessages
|
|
12
|
+
from abc import ABC, abstractmethod
|
|
13
|
+
|
|
14
|
+
from labfreed.PAC_ID.extensions import Extension
|
|
15
|
+
from labfreed.utilities.base36 import base36, to_base36, from_base36
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TREX_Segment(BaseModelWithValidationMessages, ABC):
|
|
20
|
+
key: str
|
|
21
|
+
|
|
22
|
+
@model_validator(mode='after')
|
|
23
|
+
def validate_key(self):
|
|
24
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.key)):
|
|
25
|
+
self.add_validation_message(
|
|
26
|
+
source=f"TREX segment key {self.key}",
|
|
27
|
+
type="Error",
|
|
28
|
+
msg=f"Segment key contains invalid characters: {','.join(not_allowed_chars)}",
|
|
29
|
+
highlight_pattern = f'{self.key}$',
|
|
30
|
+
highlight_sub=not_allowed_chars
|
|
31
|
+
)
|
|
32
|
+
return self
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def serialize_for_trex(self):
|
|
36
|
+
raise NotImplementedError("Subclasses must implement 'serialize_as_trex()' method")
|
|
37
|
+
|
|
38
|
+
# @abstractmethod
|
|
39
|
+
# def to_python_type(self):
|
|
40
|
+
# raise NotImplementedError("Subclasses must implement 'to_python_type()' method")
|
|
41
|
+
|
|
42
|
+
# @abstractmethod
|
|
43
|
+
# def from_python_type(self):
|
|
44
|
+
# raise NotImplementedError("Subclasses must implement 'from_python_type()' method")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ValueMixin(BaseModelWithValidationMessages, ABC):
|
|
50
|
+
value:str
|
|
51
|
+
|
|
52
|
+
def serialize_for_trex(self):
|
|
53
|
+
return self.value
|
|
54
|
+
|
|
55
|
+
# @abstractclassmethod
|
|
56
|
+
# def from_python_type(cls, v):
|
|
57
|
+
# ...
|
|
58
|
+
|
|
59
|
+
@abstractmethod
|
|
60
|
+
def value_to_python_type(self):
|
|
61
|
+
...
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class NumericValue(ValueMixin):
|
|
65
|
+
@field_validator('value', mode='before')
|
|
66
|
+
@classmethod
|
|
67
|
+
def from_python_type(cls, v:str| int|float):
|
|
68
|
+
if isinstance(v, str):
|
|
69
|
+
return v
|
|
70
|
+
return str(v)
|
|
71
|
+
|
|
72
|
+
@model_validator(mode='after')
|
|
73
|
+
def validate(self):
|
|
74
|
+
value = self.value
|
|
75
|
+
if not_allowed_chars := set(re.sub(r'[0-9\.\-E]', '', value)):
|
|
76
|
+
self.add_validation_message(
|
|
77
|
+
source=f"TREX numeric value {value}",
|
|
78
|
+
type="Error",
|
|
79
|
+
msg=f"Characters {','.join(not_allowed_chars)} are not allowed in quantity segment. Base36 encoding only allows A-Z0-9",
|
|
80
|
+
highlight_pattern = f'{value}',
|
|
81
|
+
highlight_sub=not_allowed_chars
|
|
82
|
+
)
|
|
83
|
+
if not re.fullmatch(r'-?\d+(\.\d+)?(E-?\d+)?', value):
|
|
84
|
+
self.add_validation_message(
|
|
85
|
+
source=f"TREX numeric value {value}",
|
|
86
|
+
type="Error",
|
|
87
|
+
msg=f"{value} cannot be converted to number",
|
|
88
|
+
highlight_pattern = f'{value}'
|
|
89
|
+
)
|
|
90
|
+
return self
|
|
91
|
+
|
|
92
|
+
def value_to_python_type(self) -> str:
|
|
93
|
+
v = float(self.value)
|
|
94
|
+
if not '.' in self.value and not 'E' in self.value:
|
|
95
|
+
return int(v)
|
|
96
|
+
else:
|
|
97
|
+
return v
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class DateValue(ValueMixin):
|
|
101
|
+
_date_time_dict:dict|None = PrivateAttr(default=None)
|
|
102
|
+
@field_validator('value', mode='before')
|
|
103
|
+
@classmethod
|
|
104
|
+
def from_python_type(cls, v:str| date|time|datetime):
|
|
105
|
+
if isinstance(v, str):
|
|
106
|
+
return v
|
|
107
|
+
|
|
108
|
+
sd = ""
|
|
109
|
+
st = ""
|
|
110
|
+
if isinstance(v, date) or isinstance(v, datetime):
|
|
111
|
+
sd = v.strftime('%Y%m%d')
|
|
112
|
+
if isinstance(v, time) or isinstance(v, datetime):
|
|
113
|
+
if v.microsecond:
|
|
114
|
+
st = v.strftime("T%H%M%S.") + f"{v.microsecond // 1000:03d}"
|
|
115
|
+
elif v.second:
|
|
116
|
+
st = v.strftime("T%H%M%S")
|
|
117
|
+
else:
|
|
118
|
+
st = v.strftime("T%H%M")
|
|
119
|
+
|
|
120
|
+
return sd + st
|
|
121
|
+
|
|
122
|
+
@model_validator(mode='after')
|
|
123
|
+
def validate(self):
|
|
124
|
+
pattern:str = r'((?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2}))?(T(?P<hour>\d{2})(?P<minute>\d{2})(?P<second>\d{2})?(\.(?P<millisecond>\d{3}))?)?'
|
|
125
|
+
value=self.value
|
|
126
|
+
if not re.fullmatch(pattern, value):
|
|
127
|
+
self.add_validation_message(
|
|
128
|
+
source=f"TREX date value {value}",
|
|
129
|
+
type="Error",
|
|
130
|
+
msg=f'{value} is not in a valid format. Valid format for date: YYYYMMDD; Valid for time: THHMM, THHMMSS, THHMMSS.SSS; Datetime any combination of valid date and time',
|
|
131
|
+
highlight_pattern = f'{value}'
|
|
132
|
+
)
|
|
133
|
+
return self
|
|
134
|
+
|
|
135
|
+
matches = re.match(pattern, value)
|
|
136
|
+
d = matches.groupdict()
|
|
137
|
+
d = {k: int(v) for k,v in d.items() if v }
|
|
138
|
+
if 'millisecond' in d.keys():
|
|
139
|
+
ms = d.pop('millisecond')
|
|
140
|
+
d.update({'microsecond': ms * 1000})
|
|
141
|
+
try:
|
|
142
|
+
if d.get('year'): # input is only a time
|
|
143
|
+
datetime(**d)
|
|
144
|
+
else:
|
|
145
|
+
time(**d)
|
|
146
|
+
except ValueError as e:
|
|
147
|
+
self.add_validation_message(
|
|
148
|
+
source=f"TREX date value {value}",
|
|
149
|
+
type="Error",
|
|
150
|
+
msg=f'{value} is no valid date or time.',
|
|
151
|
+
highlight_pattern = f'{value}'
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
self._date_time_dict = d
|
|
155
|
+
return self
|
|
156
|
+
|
|
157
|
+
def value_to_python_type(self) -> str:
|
|
158
|
+
d = self._date_time_dict
|
|
159
|
+
if d.get('year') and d.get('hour'): # input is only a time
|
|
160
|
+
return datetime(**d)
|
|
161
|
+
elif d.get('year'):
|
|
162
|
+
return date(**d)
|
|
163
|
+
else:
|
|
164
|
+
return time(**d)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class BoolValue(ValueMixin):
|
|
170
|
+
@field_validator('value', mode='before')
|
|
171
|
+
@classmethod
|
|
172
|
+
def from_python_type(cls, v:str| bool):
|
|
173
|
+
if isinstance(v, str):
|
|
174
|
+
return v
|
|
175
|
+
|
|
176
|
+
return 'T' if v else 'F'
|
|
177
|
+
|
|
178
|
+
@model_validator(mode='after')
|
|
179
|
+
def validate(self):
|
|
180
|
+
if not self.value in ['T', 'F']:
|
|
181
|
+
self.add_validation_message(
|
|
182
|
+
source=f"TREX boolean value {self.value}",
|
|
183
|
+
type="Error",
|
|
184
|
+
msg=f'{self.value} is no valid boolean. Must be T or F',
|
|
185
|
+
highlight_pattern = f'{self.value}',
|
|
186
|
+
highlight_sub=[c for c in self.value]
|
|
187
|
+
)
|
|
188
|
+
return self
|
|
189
|
+
|
|
190
|
+
def value_to_python_type(self) -> str:
|
|
191
|
+
if self.value == 'T':
|
|
192
|
+
return True
|
|
193
|
+
elif self.value == 'F':
|
|
194
|
+
return False
|
|
195
|
+
else:
|
|
196
|
+
Exception(f'{self} is not valid boolean. That really should not have been possible -- Contact the maintainers of the library')
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class AlphanumericValue(ValueMixin):
|
|
200
|
+
@field_validator('value', mode='before')
|
|
201
|
+
@classmethod
|
|
202
|
+
def from_python_type(cls, v:str):
|
|
203
|
+
return v
|
|
204
|
+
|
|
205
|
+
@model_validator(mode='after')
|
|
206
|
+
def validate(self):
|
|
207
|
+
if re.match(r'[a-z]', self.value):
|
|
208
|
+
self.add_validation_message(
|
|
209
|
+
source=f"TREX value {self.value}",
|
|
210
|
+
type="Error",
|
|
211
|
+
msg=f"Lower case characters are not allowed.",
|
|
212
|
+
highlight_pattern = self.value
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.value)):
|
|
216
|
+
self.add_validation_message(
|
|
217
|
+
source=f"TREX value {self.value}",
|
|
218
|
+
type="Error",
|
|
219
|
+
msg=f"Characters {','.join(not_allowed_chars)} are not allowed in alphanumeric segment",
|
|
220
|
+
highlight_pattern = self.value,
|
|
221
|
+
highlight_sub=not_allowed_chars
|
|
222
|
+
)
|
|
223
|
+
return self
|
|
224
|
+
|
|
225
|
+
def value_to_python_type(self) -> str:
|
|
226
|
+
return self.value
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class TextValue(ValueMixin):
|
|
231
|
+
@field_validator('value', mode='before')
|
|
232
|
+
@classmethod
|
|
233
|
+
def from_python_type(cls, v:base36|str):
|
|
234
|
+
if isinstance(v, str):
|
|
235
|
+
logging.info('Got str for text value > converting to base36')
|
|
236
|
+
return to_base36(v).root
|
|
237
|
+
else:
|
|
238
|
+
return v.root
|
|
239
|
+
|
|
240
|
+
@model_validator(mode='after')
|
|
241
|
+
def validate(self):
|
|
242
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9]', '', self.value)):
|
|
243
|
+
self.add_validation_message(
|
|
244
|
+
source=f"TREX value {self.value}",
|
|
245
|
+
type="Error",
|
|
246
|
+
msg=f"Characters {','.join(not_allowed_chars)} are not allowed in text segment. Base36 encoding only allows A-Z0-9",
|
|
247
|
+
highlight_pattern = self.value,
|
|
248
|
+
highlight_sub=not_allowed_chars
|
|
249
|
+
)
|
|
250
|
+
return self
|
|
251
|
+
|
|
252
|
+
def value_to_python_type(self) -> str:
|
|
253
|
+
decoded = from_base36(self.value)
|
|
254
|
+
return decoded
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class BinaryValue(ValueMixin):
|
|
258
|
+
@field_validator('value', mode='before')
|
|
259
|
+
@classmethod
|
|
260
|
+
def from_python_type(cls, v:base36|str):
|
|
261
|
+
if isinstance(v, str):
|
|
262
|
+
return v
|
|
263
|
+
else:
|
|
264
|
+
return v.root
|
|
265
|
+
|
|
266
|
+
@model_validator(mode='after')
|
|
267
|
+
def validate(self):
|
|
268
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9]', '', self.value)):
|
|
269
|
+
self.add_validation_message(
|
|
270
|
+
source=f"TREX value {self.value}",
|
|
271
|
+
type="Error",
|
|
272
|
+
msg=f"Characters {','.join(not_allowed_chars)} are not allowed in text segment. Base36 encoding only allows A-Z0-9",
|
|
273
|
+
highlight_pattern = self.value,
|
|
274
|
+
highlight_sub=not_allowed_chars
|
|
275
|
+
)
|
|
276
|
+
return self
|
|
277
|
+
|
|
278
|
+
def value_to_python_type(self) -> bytes:
|
|
279
|
+
decoded = bytes(from_base36(self))
|
|
280
|
+
return decoded
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
class ErrorValue(ValueMixin):
|
|
284
|
+
@model_validator(mode='after')
|
|
285
|
+
def validate(self):
|
|
286
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.value)):
|
|
287
|
+
self.add_validation_message(
|
|
288
|
+
source=f"TREX value {self.value}",
|
|
289
|
+
type="Error",
|
|
290
|
+
msg=f"Characters {','.join(not_allowed_chars)} are not allowed in error segment",
|
|
291
|
+
highlight_pattern = self.value,
|
|
292
|
+
highlight_sub=not_allowed_chars
|
|
293
|
+
)
|
|
294
|
+
return self
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def value_to_python_type(self) -> str:
|
|
298
|
+
return self.value
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
class ValueSegment(TREX_Segment, ValueMixin, ABC):
|
|
304
|
+
type:str
|
|
305
|
+
|
|
306
|
+
@model_validator(mode='after')
|
|
307
|
+
def validate_type(self):
|
|
308
|
+
valid_types = valid_types = unece_unit_codes() + ['T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E']
|
|
309
|
+
if not self.type in valid_types:
|
|
310
|
+
self.add_validation_message(
|
|
311
|
+
source=f"TREX value segment {self.key}",
|
|
312
|
+
type="Error",
|
|
313
|
+
msg=f"Type {self.type} is invalid. Must be 'T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E' or a UNECE unit",
|
|
314
|
+
highlight_pattern = self.type
|
|
315
|
+
)
|
|
316
|
+
return self
|
|
317
|
+
|
|
318
|
+
# @classmethod
|
|
319
|
+
# def get_subclass(cls, type:str, key:str, value:str):
|
|
320
|
+
# match type:
|
|
321
|
+
# case 'T.D':
|
|
322
|
+
# model = DateSegment(key=key, value=value, type=type)
|
|
323
|
+
# case 'T.B':
|
|
324
|
+
# model = BoolSegment(key=key, value=value, type=type)
|
|
325
|
+
# case 'T.A':
|
|
326
|
+
# model = AlphanumericSegment(key=key, value=value, type=type)
|
|
327
|
+
# case 'T.T':
|
|
328
|
+
# model = TextSegment(key=key, value=value, type=type)
|
|
329
|
+
# case 'T.X':
|
|
330
|
+
# model = BinarySegment(key=key, value=value, type=type)
|
|
331
|
+
# case 'E':
|
|
332
|
+
# model = ErrorSegment(key=key, value=value, type=type)
|
|
333
|
+
# case _:
|
|
334
|
+
# model = NumericSegment(value=value, key=key, type=type)
|
|
335
|
+
|
|
336
|
+
# return model
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def serialize_for_trex(self) -> str:
|
|
340
|
+
return f'{self.key}${self.type}:{self.value}'
|
|
341
|
+
|
|
342
|
+
def to_python_type(self):
|
|
343
|
+
return self.value_to_python_type()
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
class NumericSegment(ValueSegment, NumericValue):
|
|
352
|
+
type: str
|
|
353
|
+
|
|
354
|
+
def to_python_type(self):
|
|
355
|
+
unit = unece_unit(self.type)
|
|
356
|
+
out = Quantity(value=self.value_to_python_type(), unit=Unit(name=unit_name(unit), symbol=unit_symbol(unit)))
|
|
357
|
+
return out
|
|
358
|
+
|
|
359
|
+
class DateSegment(ValueSegment, DateValue):
|
|
360
|
+
type: Literal['T.D'] = Field('T.D', frozen=True)
|
|
361
|
+
|
|
362
|
+
class BoolSegment(ValueSegment, BoolValue):
|
|
363
|
+
type: Literal['T.B'] = Field('T.B', frozen=True)
|
|
364
|
+
|
|
365
|
+
class AlphanumericSegment(ValueSegment, AlphanumericValue):
|
|
366
|
+
type: Literal['T.A'] = Field('T.A', frozen=True)
|
|
367
|
+
|
|
368
|
+
class TextSegment(ValueSegment, TextValue):
|
|
369
|
+
type: Literal['T.T'] = Field('T.T', frozen=True)
|
|
370
|
+
|
|
371
|
+
class BinarySegment(ValueSegment, BinaryValue):
|
|
372
|
+
type: Literal['T.X'] = Field('T.X', frozen=True)
|
|
373
|
+
|
|
374
|
+
class ErrorSegment(ValueSegment, ErrorValue):
|
|
375
|
+
type: Literal['E'] = Field('E', frozen=True)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
class ColumnHeader(BaseModelWithValidationMessages):
|
|
380
|
+
key:str
|
|
381
|
+
type:str
|
|
382
|
+
|
|
383
|
+
@model_validator(mode='after')
|
|
384
|
+
def validate_key(self):
|
|
385
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.key)):
|
|
386
|
+
self.add_validation_message(
|
|
387
|
+
source=f"TREX table column {self.key}",
|
|
388
|
+
type="Error",
|
|
389
|
+
msg=f"Column header key contains invalid characters: {','.join(not_allowed_chars)}",
|
|
390
|
+
highlight_pattern = f'{self.key}$',
|
|
391
|
+
highlight_sub=not_allowed_chars
|
|
392
|
+
)
|
|
393
|
+
return self
|
|
394
|
+
|
|
395
|
+
@model_validator(mode='after')
|
|
396
|
+
def validate_type(self):
|
|
397
|
+
valid_types = unece_unit_codes() + ['T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E']
|
|
398
|
+
if not self.type in valid_types:
|
|
399
|
+
self.add_validation_message(
|
|
400
|
+
source=f"TREX table column {self.key}",
|
|
401
|
+
type="Error",
|
|
402
|
+
msg=f"Type '{self.type}' is invalid. Must be 'T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E' or a UNECE unit",
|
|
403
|
+
highlight_pattern = self.type
|
|
404
|
+
)
|
|
405
|
+
return self
|
|
406
|
+
|
|
407
|
+
class TableRow(RootModel[list[ValueMixin]]):
|
|
408
|
+
def serialize_for_trex(self):
|
|
409
|
+
return ':'.join([e.serialize_for_trex() for e in self.root])
|
|
410
|
+
|
|
411
|
+
def __len__(self):
|
|
412
|
+
return len(self.root)
|
|
413
|
+
|
|
414
|
+
def __iter__(self):
|
|
415
|
+
return iter(self.root)
|
|
416
|
+
|
|
417
|
+
class TREX_Table(TREX_Segment):
|
|
418
|
+
column_headers: list[ColumnHeader]
|
|
419
|
+
data: list[TableRow]
|
|
420
|
+
|
|
421
|
+
@property
|
|
422
|
+
def column_names(self):
|
|
423
|
+
return [h.key for h in self.column_headers]
|
|
424
|
+
|
|
425
|
+
@property
|
|
426
|
+
def column_types(self):
|
|
427
|
+
return [h.type for h in self.column_headers]
|
|
428
|
+
|
|
429
|
+
@model_validator(mode='after')
|
|
430
|
+
def validate_sizes(self):
|
|
431
|
+
sizes = [len(self.column_headers)]
|
|
432
|
+
sizes.extend( [ len(row) for row in self.data ] )
|
|
433
|
+
most_common_len, count = Counter(sizes).most_common(1)[0]
|
|
434
|
+
|
|
435
|
+
if len(self.column_headers) != most_common_len:
|
|
436
|
+
self.add_validation_message(
|
|
437
|
+
source=f"Table {self.key}",
|
|
438
|
+
type="Error",
|
|
439
|
+
msg=f"Size mismatch: Table header contains {self.col_names} keys, while most rows have {most_common_len}",
|
|
440
|
+
highlight_pattern = self.key
|
|
441
|
+
)
|
|
442
|
+
expected_row_len = most_common_len
|
|
443
|
+
else:
|
|
444
|
+
expected_row_len = len(self.column_headers)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
for i, row in enumerate(self.data):
|
|
448
|
+
if len(row) != expected_row_len:
|
|
449
|
+
self.add_validation_message(
|
|
450
|
+
source=f"Table {self.key}",
|
|
451
|
+
type="Error",
|
|
452
|
+
msg=f"Size mismatch: Table row {i} contains {len(row)} elements. Expected size is {expected_row_len}",
|
|
453
|
+
highlight_pattern = row.serialize_for_trex()
|
|
454
|
+
)
|
|
455
|
+
return self
|
|
456
|
+
|
|
457
|
+
@model_validator(mode='after')
|
|
458
|
+
def validate_data_types(self):
|
|
459
|
+
expected_types = self.column_types
|
|
460
|
+
i = 0
|
|
461
|
+
for row in self.data:
|
|
462
|
+
for e, t_expected, nm in zip(row, expected_types, self.column_names):
|
|
463
|
+
try:
|
|
464
|
+
match t_expected:
|
|
465
|
+
case 'T.D':
|
|
466
|
+
assert isinstance(e, DateValue)
|
|
467
|
+
case 'T.B':
|
|
468
|
+
assert isinstance(e, BoolValue)
|
|
469
|
+
case 'T.A':
|
|
470
|
+
assert isinstance(e, AlphanumericValue)
|
|
471
|
+
|
|
472
|
+
case 'T.T':
|
|
473
|
+
assert isinstance(e, TextValue)
|
|
474
|
+
case 'T.X':
|
|
475
|
+
assert isinstance(e, BinaryValue)
|
|
476
|
+
case 'E':
|
|
477
|
+
assert isinstance(e, ErrorValue)
|
|
478
|
+
case _:
|
|
479
|
+
assert isinstance(e, NumericValue)
|
|
480
|
+
except AssertionError:
|
|
481
|
+
self.add_validation_message(
|
|
482
|
+
source=f"Table {self.key}",
|
|
483
|
+
type="Error",
|
|
484
|
+
msg=f"Type mismatch: Table row {i}, column {nm} is of wrong type. According to the header it should be {t_expected}",
|
|
485
|
+
highlight_pattern = row.serialize_for_trex(),
|
|
486
|
+
highlight_sub=[c for c in e.value]
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
if msg := e.get_errors():
|
|
490
|
+
for m in msg:
|
|
491
|
+
self.add_validation_message(
|
|
492
|
+
source=f"Table {self.key}",
|
|
493
|
+
type="Error",
|
|
494
|
+
msg=m.problem_msg,
|
|
495
|
+
highlight_pattern = row.serialize_for_trex(),
|
|
496
|
+
highlight_sub=[c for c in e.value]
|
|
497
|
+
)
|
|
498
|
+
i += 1
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def _get_col_index(self, col:str|int):
|
|
502
|
+
if isinstance(col, str):
|
|
503
|
+
col_index = self.column_names.index(col)
|
|
504
|
+
elif isinstance(col, int):
|
|
505
|
+
col_index = col
|
|
506
|
+
else:
|
|
507
|
+
raise TypeError(f"Column must be specified as string or int: {col.__name__}")
|
|
508
|
+
return col_index
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def serialize_for_trex(self):
|
|
513
|
+
header = ':'.join([f'{h.key}${h.type}' for h in self.column_headers])
|
|
514
|
+
data = '::'.join([r.serialize_for_trex() for r in self.data])
|
|
515
|
+
s = f'{self.key}$${header}::{data}'
|
|
516
|
+
return s
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def to_python_type(self):
|
|
520
|
+
table = DataTable([ch.key for ch in self.column_headers])
|
|
521
|
+
for row in self.data:
|
|
522
|
+
r = []
|
|
523
|
+
for e, h in zip(row, self.column_headers):
|
|
524
|
+
if isinstance(e, NumericValue):
|
|
525
|
+
u = unece_unit(h.type)
|
|
526
|
+
unit = Unit(name=u.get('name'), symbol=u.get('symbol'))
|
|
527
|
+
r.append(Quantity(value=e.value, unit=unit))
|
|
528
|
+
else:
|
|
529
|
+
r.append(e.value_to_python_type())
|
|
530
|
+
table.append(r)
|
|
531
|
+
return table
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def n_rows(self) -> int:
|
|
536
|
+
return len(self.data)
|
|
537
|
+
|
|
538
|
+
def n_cols(self) -> int:
|
|
539
|
+
return len(self.column_headers)
|
|
540
|
+
|
|
541
|
+
def row_data(self, row:int) -> list:
|
|
542
|
+
out = self.data[row]
|
|
543
|
+
return out
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def column_data(self, col:str|int) -> list:
|
|
547
|
+
col_index = self._get_col_index(col)
|
|
548
|
+
type = self.column_headers[col_index].type
|
|
549
|
+
out = [row[col_index] for row in self.data]
|
|
550
|
+
return out
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def cell_data(self, row:int, col:str|int):
|
|
554
|
+
try:
|
|
555
|
+
col_index = self._get_col_index(col)
|
|
556
|
+
value = self.data[row][col_index]
|
|
557
|
+
except ValueError:
|
|
558
|
+
logging.warning(f"row {row}, column {col} not found")
|
|
559
|
+
return None
|
|
560
|
+
return value
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
class TREX(Extension, BaseModelWithValidationMessages):
|
|
566
|
+
name_:str
|
|
567
|
+
segments: list[TREX_Segment] = Field(default_factory=list)
|
|
568
|
+
|
|
569
|
+
@property
|
|
570
|
+
def name(self)->str:
|
|
571
|
+
return self.name_
|
|
572
|
+
|
|
573
|
+
@property
|
|
574
|
+
def type(self)->str:
|
|
575
|
+
return 'TREX'
|
|
576
|
+
|
|
577
|
+
@property
|
|
578
|
+
def data(self)->str:
|
|
579
|
+
seg_strings = list()
|
|
580
|
+
for s in self.segments:
|
|
581
|
+
seg_strings.append(s.serialize_for_trex())
|
|
582
|
+
s_out = '+'.join(seg_strings)
|
|
583
|
+
return s_out
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
def get_segment(self, segment_key:str) -> TREX_Segment:
|
|
587
|
+
s = [s for s in self.segments if s.key == segment_key]
|
|
588
|
+
if s:
|
|
589
|
+
return s[0]
|
|
590
|
+
else:
|
|
591
|
+
return None
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def update(self, segments: dict[str, Quantity|datetime|time|date|bool|str|base36|DataTable] ):
|
|
595
|
+
for k, v in segments.items():
|
|
596
|
+
if isinstance(v, bool):
|
|
597
|
+
self.segments.append(BoolSegment(key=k, value=v))
|
|
598
|
+
elif isinstance(v, Quantity):
|
|
599
|
+
unece_code = unece_unit_code_from_quantity(v)
|
|
600
|
+
self.segments.append(NumericSegment(key=k, value=v.value, type=unece_code))
|
|
601
|
+
elif isinstance(v, (int, float)):
|
|
602
|
+
self.segments.append(NumericSegment(key=k, value=v, type='C63')) # unitless
|
|
603
|
+
elif isinstance(v, (datetime, time, date)):
|
|
604
|
+
self.segments.append(DateSegment(key=k, value=v))
|
|
605
|
+
elif isinstance(v, str):
|
|
606
|
+
if re.fullmatch(r'[A-Z0-9\-\.]*', v):
|
|
607
|
+
self.segments.append(AlphanumericSegment(key=k, value=v))
|
|
608
|
+
else:
|
|
609
|
+
v = to_base36(v)
|
|
610
|
+
self.segments.append(TextSegment(key=k, value=v))
|
|
611
|
+
elif isinstance(v, base36):
|
|
612
|
+
self.segments.append(TextSegment(key=k, value=v))
|
|
613
|
+
elif isinstance(v, DataTable):
|
|
614
|
+
v:DataTable = v
|
|
615
|
+
headers = list()
|
|
616
|
+
for nm, rt in zip(v.col_names, v.row_template):
|
|
617
|
+
if isinstance(rt, bool): # must come first otherwise int matches the bool
|
|
618
|
+
t = 'T.B'
|
|
619
|
+
elif isinstance(rt, Quantity):
|
|
620
|
+
unece_code = unece_unit_code_from_quantity(rt)
|
|
621
|
+
t = unece_code
|
|
622
|
+
elif isinstance(rt, (datetime, time, date)):
|
|
623
|
+
t = 'T.D'
|
|
624
|
+
elif isinstance(rt, str):
|
|
625
|
+
if re.fullmatch(r'[A-Z0-9\-\.]*', rt):
|
|
626
|
+
t = 'T.A'
|
|
627
|
+
else:
|
|
628
|
+
v = to_base36(rt)
|
|
629
|
+
t = 'T.X'
|
|
630
|
+
elif isinstance(rt, base36):
|
|
631
|
+
t = 'T.X'
|
|
632
|
+
|
|
633
|
+
headers.append(ColumnHeader(key=nm, type=t))
|
|
634
|
+
data = []
|
|
635
|
+
for row in v:
|
|
636
|
+
r = []
|
|
637
|
+
for e in row:
|
|
638
|
+
if isinstance(e, bool): # must come first otherwise int matches the bool
|
|
639
|
+
r.append(BoolValue(value=e))
|
|
640
|
+
elif isinstance(e, Quantity):
|
|
641
|
+
r.append(NumericValue(value=e.value))
|
|
642
|
+
elif isinstance(e, (int, float)):
|
|
643
|
+
r.append(NumericValue(value=e))
|
|
644
|
+
elif isinstance(e, (datetime, time, date)):
|
|
645
|
+
r.append(DateValue(value=e))
|
|
646
|
+
elif isinstance(e, str):
|
|
647
|
+
if re.fullmatch(r'[A-Z0-9\-\.]*', e):
|
|
648
|
+
r.append(AlphanumericValue(value=e))
|
|
649
|
+
else:
|
|
650
|
+
e = to_base36(e)
|
|
651
|
+
r.append(TextValue(value=e))
|
|
652
|
+
elif isinstance(e, base36):
|
|
653
|
+
r.append(TextValue(value=e))
|
|
654
|
+
data.append(r)
|
|
655
|
+
|
|
656
|
+
self.segments.append(TREX_Table(key=k, column_headers=headers, data=data))
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def dict(self):
|
|
660
|
+
return {s.key: s.to_python_type() for s in self.segments}
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
@field_validator('segments')
|
|
665
|
+
@classmethod
|
|
666
|
+
def validate_segments(cls, segments):
|
|
667
|
+
segment_keys = [s.key for s in segments]
|
|
668
|
+
duplicates = [item for item, count in Counter(segment_keys).items() if count > 1]
|
|
669
|
+
if duplicates:
|
|
670
|
+
raise ValueError(f"Duplicate segment keys: {','.join(duplicates)}")
|
|
671
|
+
return segments
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
@staticmethod
|
|
676
|
+
def from_spec_fields(*, name, data, type='TREX'):
|
|
677
|
+
segment_strings = data.split('+')
|
|
678
|
+
out_segments = list()
|
|
679
|
+
for s in segment_strings:
|
|
680
|
+
# there are only two valid options. The segment is a scalar or a table.
|
|
681
|
+
# Constructors do the parsing anyways and raise exceptions if invalid data
|
|
682
|
+
# try both options and then let it fail
|
|
683
|
+
segment = _deserialize_table_segment_from_trex_segment_str(s)
|
|
684
|
+
if not segment:
|
|
685
|
+
segment = _deserialize_value_segment_from_trex_segment_str(s)
|
|
686
|
+
if not segment:
|
|
687
|
+
raise ValueError('TREX contains neither valid value segment nor table')
|
|
688
|
+
|
|
689
|
+
out_segments.append(segment)
|
|
690
|
+
trex = TREX(name_= name, segments=out_segments)
|
|
691
|
+
|
|
692
|
+
return trex
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
def _deserialize_value_segment_from_trex_segment_str(trex_segment_str) -> ValueSegment:
|
|
696
|
+
#re_scalar_pattern = re.compile(f"(?P<name>[\w\.-]*?)\$(?P<unit>[\w\.]*?):(?P<value>.*)")
|
|
697
|
+
re_scalar_pattern = re.compile(f"(?P<name>.+?)\$(?P<unit>.+?):(?P<value>.+)")
|
|
698
|
+
matches = re_scalar_pattern.match(trex_segment_str)
|
|
699
|
+
if not matches:
|
|
700
|
+
return None
|
|
701
|
+
|
|
702
|
+
key, type_, value = matches.groups()
|
|
703
|
+
|
|
704
|
+
match type_:
|
|
705
|
+
case 'T.D':
|
|
706
|
+
out = DateSegment(key=key, value=value, type=type_)
|
|
707
|
+
case 'T.B':
|
|
708
|
+
out = BoolSegment(key=key, value=value, type=type_)
|
|
709
|
+
case 'T.A':
|
|
710
|
+
out = AlphanumericSegment(key=key, value=value, type=type_)
|
|
711
|
+
case 'T.T':
|
|
712
|
+
out = TextSegment(key=key, value=base36(value), type=type_) # prevent repeated conversion from str to base36 and make explict that when parsing we assume the string tpo be base36 already
|
|
713
|
+
case 'T.X':
|
|
714
|
+
out = BinarySegment(key=key, value=base36(value), type=type_) # prevent repeated conversion from str to base36 and make explict that when parsing we assume the string tpo be base36 already
|
|
715
|
+
case 'E':
|
|
716
|
+
out = ErrorSegment(key=key, value=value, type=type_)
|
|
717
|
+
case _:
|
|
718
|
+
out = NumericSegment(value=value, key=key, type=type_)
|
|
719
|
+
|
|
720
|
+
return out
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
def _deserialize_table_segment_from_trex_segment_str(trex_segment_str) -> TREX_Table:
|
|
725
|
+
# re_table_pattern = re.compile(f"(?P<tablename>[\w\.-]*?)\$\$(?P<header>[\w\.,\$:]*?)::(?P<body>.*)")
|
|
726
|
+
# re_col_head_pattern = re.compile(f"(?P<name>[\w\.-]*?)\$(?P<unit>[\w\.]*)")
|
|
727
|
+
re_table_pattern = re.compile(r"(?P<tablename>.+?)\$\$(?P<header>.+?)::(?P<body>.+)")
|
|
728
|
+
|
|
729
|
+
matches = re_table_pattern.match(trex_segment_str)
|
|
730
|
+
if not matches:
|
|
731
|
+
return None
|
|
732
|
+
name, header, body = matches.groups()
|
|
733
|
+
|
|
734
|
+
column_headers_str = header.split(':')
|
|
735
|
+
|
|
736
|
+
headers = []
|
|
737
|
+
for colum_header in column_headers_str:
|
|
738
|
+
ch = colum_header.split('$')
|
|
739
|
+
col_key = ch[0]
|
|
740
|
+
col_type = ch[1] if len(ch) > 1 else ''
|
|
741
|
+
headers.append(ColumnHeader(key=col_key, type=col_type))
|
|
742
|
+
|
|
743
|
+
data = [row.split(':') for row in body.split('::') ]
|
|
744
|
+
col_types = [h.type for h in headers]
|
|
745
|
+
# convert to correct value types
|
|
746
|
+
data_with_types = [[str_to_value_type(c,t) for c, t in zip(r, col_types)] for r in data]
|
|
747
|
+
data = [ TableRow(r) for r in data_with_types]
|
|
748
|
+
|
|
749
|
+
out = TREX_Table(column_headers=headers, data=data_with_types, key=name)
|
|
750
|
+
return out
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
def str_to_value_type(s:str, t:str):
|
|
754
|
+
match t:
|
|
755
|
+
case 'T.D': v = DateValue(value=s)
|
|
756
|
+
case 'T.B': v = BoolValue(value=s)
|
|
757
|
+
case 'T.A': v = AlphanumericValue(value=s)
|
|
758
|
+
case 'T.T': v = TextValue(value=base36(s))
|
|
759
|
+
case 'T.X': v = BinaryValue(value=s)
|
|
760
|
+
case 'E' : v = ErrorValue(value=s)
|
|
761
|
+
case _ : v = NumericValue(value=s)
|
|
762
|
+
return v
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
class TREX_Struct(TREX_Segment):
|
|
767
|
+
"""Struct is a special interpretation of a T-REX Table with one row"""
|
|
768
|
+
wrapped_table:TREX_Table
|
|
769
|
+
|
|
770
|
+
@property
|
|
771
|
+
def segment_name_(self):
|
|
772
|
+
return self.wrapped_table.key
|
|
773
|
+
|
|
774
|
+
@field_validator('wrapped_table')
|
|
775
|
+
def validate_table(table):
|
|
776
|
+
if len(table.data) != 1:
|
|
777
|
+
raise ValidationError("Too many input rows. Struct can only have one row")
|
|
778
|
+
return table
|
|
779
|
+
|
|
780
|
+
def get(self, key):
|
|
781
|
+
return self.wrapped_table.cell_data(0, key)
|
|
782
|
+
|
|
783
|
+
def keys(self):
|
|
784
|
+
return self.wrapped_table.col_names
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
|